def ukpmc(ids=None): """ Given a pubmed id, Load entities into DB from UKPMC """ ids = ids.split(",") if ids else demo_pubmeds url = 'http://ukpmc.ac.uk/abstract/MED/' import requests, re col = mongo.getCollection('publication') for id in ids: print "#### proceesing %s" % id p = col.find_one({'_id': 'publ%s' % id}) pub = Publication(p) u = "%s%s" % (url, id) r = requests.get(u) if r.status_code == 200: from django.utils.encoding import smart_str, smart_unicode content = smart_str(r.text) entities = {} for m in re.finditer( r'<span class="(disease|protein|geneOntology|species|chemical)".*?_blank">(.*?)</a></span>', content): group = m.group(1) group = 'go' if group == 'geneOntology' else group.lower() e = {'name': m.group(2).lower(), 'group': group} entities[e['name']] = e pub.entities = [] for en, item in entities.items(): pub.entities.append(item) if (pub.entities): pub.save() print("Saved %d items" % (len(entities)))
def ukpmc(ids=None): """ Given a pubmed id, Load entities into DB from UKPMC """ ids = ids.split(",") if ids else demo_pubmeds url = 'http://ukpmc.ac.uk/abstract/MED/' import requests,re col = mongo.getCollection('publication') for id in ids: print "#### proceesing %s" %id p = col.find_one({'_id':'publ%s'%id}) pub = Publication( p ) u = "%s%s" %(url, id) r = requests.get(u) if r.status_code == 200: from django.utils.encoding import smart_str, smart_unicode content = smart_str(r.text) entities = {} for m in re.finditer(r'<span class="(disease|protein|geneOntology|species|chemical)".*?_blank">(.*?)</a></span>', content): group = m.group(1) group = 'go' if group == 'geneOntology' else group.lower() e = {'name': m.group(2).lower(), 'group': group} entities[e['name']] = e pub.entities = [] for en, item in entities.items(): pub.entities.append(item) if(pub.entities): pub.save() print("Saved %d items" %(len(entities)))