Exemple #1
0
def ukpmc(ids=None):
    """
    Given a pubmed id, Load entities into DB from UKPMC
    """
    ids = ids.split(",") if ids else demo_pubmeds
    
    url = 'http://ukpmc.ac.uk/abstract/MED/'
    import requests,re
    col = mongo.getCollection('publication')
    for id in ids:
        print "#### proceesing %s" %id
        p = col.find_one({'_id':'publ%s'%id})
        pub = Publication( p )
        u = "%s%s" %(url, id)
        r = requests.get(u)         
        if r.status_code == 200:
                from django.utils.encoding import smart_str, smart_unicode            
                content = smart_str(r.text)
                entities = {}
                for m in re.finditer(r'<span class="(disease|protein|geneOntology|species|chemical)".*?_blank">(.*?)</a></span>', content):
                    group = m.group(1)
                    group = 'go' if group == 'geneOntology' else group.lower()                
                    e = {'name': m.group(2).lower(), 'group': group}
                    entities[e['name']] = e
                pub.entities = []
                for en, item in entities.items():
                    pub.entities.append(item)
                if(pub.entities):                    
                    pub.save()     
                    print("Saved %d items" %(len(entities)))