def addAllLinks(url): source = reader.readURL(url) soup = BeautifulSoup(source) links = soup.findAll("a", {"rel":"full-text"}) for link in links: url = link['href'].split("?sid")[0] #getting rid of search id articleURL = baseURL + url addMetaURL(articleURL) removeMetaDuplicates()
def readMeta(): lines = [line.rstrip('\n') for line in open(metaList)] for i in range(len(lines)): link = lines[i] source = reader.readURL(link) writeSource(link, source)