Esempio n. 1
0
def addAllLinks(url):
    source = reader.readURL(url)
    soup = BeautifulSoup(source)
    links = soup.findAll("a", {"rel":"full-text"})
    for link in links:
        url = link['href'].split("?sid")[0] #getting rid of search id
        articleURL = baseURL + url
        addMetaURL(articleURL)
    removeMetaDuplicates()
Esempio n. 2
0
def readMeta():
    lines = [line.rstrip('\n') for line in open(metaList)]
    for i in range(len(lines)):
        link = lines[i]
        source = reader.readURL(link)
        writeSource(link, source)