Beispiel #1
0
def parse_citmap(doc):
    def get_docs(sp):
        out = []
        if sp is not None:
            for a in sp.findAll("a"):
                try:
                    if "articleDetails" in a["href"]:
                        up = urltoarnumber(a['href'])
                        if up not in out: out.append(up)
                except:
                    pass
        return out
    #to_file("dump.soup", doc)
    soup = BeautifulSoup(doc)
    citing = get_docs(soup.find("div", {'id':'colFirst'}))
    citedby = get_docs(soup.find("div", {'id':'colSecond'}))
        
    return dict(citing=citing, citedby=citedby)