Example #1
0
def ukpmc(ids=None):
    """
    Given a pubmed id, Load entities into DB from UKPMC
    """
    ids = ids.split(",") if ids else demo_pubmeds

    url = 'http://ukpmc.ac.uk/abstract/MED/'
    import requests, re
    col = mongo.getCollection('publication')
    for id in ids:
        print "#### proceesing %s" % id
        p = col.find_one({'_id': 'publ%s' % id})
        pub = Publication(p)
        u = "%s%s" % (url, id)
        r = requests.get(u)
        if r.status_code == 200:
            from django.utils.encoding import smart_str, smart_unicode
            content = smart_str(r.text)
            entities = {}
            for m in re.finditer(
                    r'<span class="(disease|protein|geneOntology|species|chemical)".*?_blank">(.*?)</a></span>',
                    content):
                group = m.group(1)
                group = 'go' if group == 'geneOntology' else group.lower()
                e = {'name': m.group(2).lower(), 'group': group}
                entities[e['name']] = e
            pub.entities = []
            for en, item in entities.items():
                pub.entities.append(item)
            if (pub.entities):
                pub.save()
                print("Saved %d items" % (len(entities)))
Example #2
0
def ukpmc(ids=None):
    """
    Given a pubmed id, Load entities into DB from UKPMC
    """
    ids = ids.split(",") if ids else demo_pubmeds
    
    url = 'http://ukpmc.ac.uk/abstract/MED/'
    import requests,re
    col = mongo.getCollection('publication')
    for id in ids:
        print "#### proceesing %s" %id
        p = col.find_one({'_id':'publ%s'%id})
        pub = Publication( p )
        u = "%s%s" %(url, id)
        r = requests.get(u)         
        if r.status_code == 200:
                from django.utils.encoding import smart_str, smart_unicode            
                content = smart_str(r.text)
                entities = {}
                for m in re.finditer(r'<span class="(disease|protein|geneOntology|species|chemical)".*?_blank">(.*?)</a></span>', content):
                    group = m.group(1)
                    group = 'go' if group == 'geneOntology' else group.lower()                
                    e = {'name': m.group(2).lower(), 'group': group}
                    entities[e['name']] = e
                pub.entities = []
                for en, item in entities.items():
                    pub.entities.append(item)
                if(pub.entities):                    
                    pub.save()     
                    print("Saved %d items" %(len(entities)))