def on_activate_event(self, document): if len(document.annotations('GPCRDB cache')) == 0: print 'annotating stuff . . .' pubmedId = utopialib.utils.metadata(document, 'identifiers[pubmed]') if pubmedId is not None: print 'found pubmed id: ' + pubmedId else: print 'did not find pubmed id' ns = {'r': 'GPCR'} textMentions = self.getMentions(document.text(), pubmedId) objectlist = [] mention_cache = {} for mention in textMentions: if mention.mentionType != 'SPECIES': mention_cache.setdefault(mention.html, []) mention_cache[mention.html].append(mention) for html, mentions in mention_cache.iteritems(): annotation = self.createAnnotation(document, html, mentions) annotation['displayRelevance'] = '2000' annotation['displayRank'] = '2000' document.addAnnotation(annotation) document.addAnnotation(spineapi.Annotation(), 'GPCRDB cache')
def on_activate_event(self, document): if len(document.annotations('NucleaRDB cache')) == 0: print 'annotating stuff . . .' pubmedId = common.utils.metadata(document, 'pmid') if pubmedId is not None: print 'found pubmed id: ' + pubmedId else: print 'did not find pubmed id' ns = {'r': 'GPCR'} textMentions = self.getMentions(document.text(), pubmedId) objectlist = [] mention_cache = {} for mention in textMentions: if mention.mentionType != 'SPECIES': mention_cache.setdefault(mention.html, []) mention_cache[mention.html].append(mention) for html, mentions in mention_cache.iteritems(): annotation = self.createAnnotation(document, html, mentions) annotation['displayRelevance']='2000' annotation['displayRank']= '2000' document.addAnnotation(annotation) document.addAnnotation(spineapi.Annotation(), 'NucleaRDB cache')
def on_activate_event(self, document, data={}): action = data.get('action') domain = data.get('domain') if self.annotatedDomains is None: self.annotatedDomains = [] if action == 'annotate': print 'starting 3DM annotation . . .' pubmedId = utopia.tools.utils.metadata(document, 'identifiers[pubmed]') if pubmedId is None: pubmedId = '0' print 'sending text to remote server (' + pubmedId + '). . .' textMentions = self.getMentions(domain, document.text()) print 'received response, adding annotations for domain ' + domain + ' . . .' mention_cache = {} for mention in textMentions: if mention['mentionType'] != 'SPECIES' and mention['mentionType'] != 'PDB': html, css, js = self.buildHtml(domain, mention) mention['html'] = html.encode('utf-8') mention['css'] = css.encode('utf-8') mention['js'] = js.encode('utf-8') mention_cache.setdefault(mention['html'], []) mention_cache[mention['html']].append(mention) for html, mentions in mention_cache.iteritems(): annotation = self.createAnnotation(domain, document, html, mentions) annotation['displayRelevance'] = '2000' annotation['displayRank'] = '2000' document.addAnnotation(annotation) document.addAnnotation(Annotation(), domain) print 'done adding annotations.'
def on_activate_event(self, document): text = document.text().encode('utf8') text_hash = hashlib.md5(text).hexdigest() url = 'http://beta.sciencewise.info/api/utopia' payload = urllib.urlencode({'text': text, 'chksum': text_hash}) response = urllib2.urlopen(url, payload, timeout=8).read() results = json.loads(response) annotations = [] for result in results: before = result.get('context', {}).get('before', '') term = result.get('value', '') after = result.get('context', {}).get('after', '') link = result.get('link') definitions = [] for definition in result.get('definitions', []): definitions.append( (definition.get('url'), definition.get('title'))) if len(term) > 0 and len(before) + len(term) + len( after) > 0 and link is not None: matches = document.findInContext(before, term, after) if len(matches) > 0: annotation = spineapi.Annotation() annotation['concept'] = 'ScienceWISE' annotation['property:webpageUrl'] = link annotation['property:term'] = term annotation['property:name'] = 'Definitions of {0}'.format( term) annotation[ 'property:description'] = 'ScienceWISE ontology definitions' annotation['property:sourceDatabase'] = 'sciencewise' annotation[ 'property:sourceDescription'] = '<p><a href="http://sciencewise.info/">ScienceWISE</a> provides phycists with article annotation and scientific bookmarking.</p>' for url, title in definitions: annotation.insertProperty('property:definitions', '{0} {1}'.format(url, title)) for match in matches: annotation.addExtent(match) annotations.append(annotation) if len(annotations) > 0: document.addAnnotations(annotations)
def on_activate_event(self, document): text = document.text().encode('utf8') text_hash = hashlib.md5(text).hexdigest() url = 'http://beta.sciencewise.info/api/utopia' payload = urllib.urlencode({ 'text': text, 'chksum': text_hash }) response = urllib2.urlopen(url, payload, timeout=8).read() results = json.loads(response) annotations = [] for result in results: before = result.get('context', {}).get('before', '') term = result.get('value', '') after = result.get('context', {}).get('after', '') link = result.get('link') definitions = [] for definition in result.get('definitions', []): definitions.append((definition.get('url'), definition.get('title'))) if len(term) > 0 and len(before) + len(term) + len(after) > 0 and link is not None: matches = document.findInContext(before, term, after) if len(matches) > 0: annotation = spineapi.Annotation() annotation['concept'] = 'ScienceWISE' annotation['property:webpageUrl'] = link annotation['property:term'] = term annotation['property:name'] = 'Definitions of {0}'.format(term) annotation['property:description'] = 'ScienceWISE ontology definitions' annotation['property:sourceDatabase'] = 'sciencewise' annotation['property:sourceDescription'] = '<p><a href="http://sciencewise.info/">ScienceWISE</a> provides phycists with article annotation and scientific bookmarking.</p>' for url, title in definitions: annotation.insertProperty('property:definitions', '{0} {1}'.format(url, title)) for match in matches: annotation.addExtent(match) annotations.append(annotation) if len(annotations) > 0: document.addAnnotations(annotations)
def on_activate_event(self, document, data={}): action = data.get("action") domain = data.get("domain") if self.annotatedDomains == None: self.annotatedDomains = [] if action == "annotate": print "starting 3DM anntotation . . ." ns = {"r": "GPCR"} pubmedId = common.utils.metadata(document, "pmid") if pubmedId == None: pubmedId = "0" print "sending text to remote server (" + pubmedId + "). . ." textMentions = self.getMentions(domain, document.text(), pubmedId) print "recieved response, adding annotations for domain " + domain + " . . ." objectlist = [] mention_cache = {} for mention in textMentions: if mention.mentionType != "SPECIES" and mention.mentionType != "PDB": newData = self.rewriteData(mention) mention.data = newData html, css, js = self.buildHtml(domain, mention) mention.html = html.encode("utf-8") mention.css = css.encode("utf-8") mention.js = js.encode("utf-8") mention_cache.setdefault(mention.html, []) mention_cache[mention.html].append(mention) for html, mentions in mention_cache.iteritems(): annotation = self.createAnnotation(domain, document, html, mentions) annotation["displayRelevance"] = "2000" annotation["displayRank"] = "2000" document.addAnnotation(annotation) document.addAnnotation(Annotation(), domain) print "done adding annotations."
def after_ready_event(self, document): # Get (if present) the RSCMetadataLink annotation for annotation in document.annotations(): if annotation.get('concept') == 'RSCMetadataLink': text = document.text() doi = annotation['property:doi'].upper() rscId = annotation['property:rscId'].upper() xmlquery = '<SearchCriteria><SearchTerm><Category>Journal</Category><ContentType>All</ContentType><Criterias><NameValue><Name>FreeText</Name><Value>"%s"</Value></NameValue></Criterias><Source>Utopia</Source></SearchTerm><PageNo>1</PageNo><PageSize>10</PageSize><SortBy>Relevance</SortBy></SearchCriteria>' % doi baseurl = 'http://pubs.rsc.org/en/federated/search' params = { 'federatedsearchname': 'Utopia', 'inputxml': xmlquery } url = baseurl + '?%s' % urllib.urlencode(params) searchresult = urllib2.urlopen(url, timeout=14).read() root = etree.fromstring(searchresult) #print etree.tostring(root, pretty_print=True, encoding='utf8') articles = root.findall( './{http://www.rsc.org/schema/rscart38}article') #print articles # the search use above can return more than one article, so select out only the one with # the correct doi thearticle = None articleID = None for article in articles: found_doi = article.findtext( "./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='doi']" ) if found_doi is None: found_doi = article.findtext( "./{http://www.rsc.org/schema/rscart38}art-admin/{http://www.rsc.org/schema/rscart38}doi" ) if found_doi is not None and found_doi.upper() == doi: thearticle = article articleIDelem = article.find( "./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='docid']" ) if articleIDelem is not None: articleID = articleIDelem.text break # if we get back a single valid article... if thearticle != None: #print articleID compoundsInArticle = [] compoundText = {} annotationsInArticle = [] annotationText = {} # create a list of all the compounds that are mentioned in the article body compnames = thearticle.findall( './{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}compname' ) #print compnames for compname in compnames: # This line removes (erroneous?) elements from inside the XML etree.strip_elements( compname, '{http://www.rsc.org/schema/rscart38}compound', with_tail=False) #print compname.attrib['idrefs'], compname.text compoundsInArticle.append(compname.attrib['idrefs']) compoundText[ compname.attrib['idrefs']] = etree.tounicode( compname, method='text') annotationnames = thearticle.findall( './{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}annref' ) #print annotationnames for annotationname in annotationnames: # This line removes (erroneous?) elements from inside the XML etree.strip_elements( annotationname, '{http://www.rsc.org/schema/rscart38}annotation', with_tail=False) #print annotationname.attrib['idrefs'], annotationname.text annotationsInArticle.append( annotationname.attrib['idrefs']) annotationText[ annotationname.attrib['idrefs']] = etree.tounicode( annotationname, method='text') #print compoundText, annotationText #sprint annotationsInArticle # then for all the compounds that are defined in the article back compounds = thearticle.findall( './{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}compound' ) for compound in compounds: id = compound.attrib['id'] if id in compoundsInArticle: url = None id = compound.attrib['id'] # if the compound has a CSID, then the URL links to the chemspider page csid = compound.find( "./{http://www.rsc.org/schema/rscart38}link[@type='CSID']" ) # if the compound has a CSID, create a Chemspider URL for it if csid is not None and csid.text is not None: url = 'http://www.chemspider.com/Chemical-Structure.%s.html' % csid.text[ 5:] else: # otherwise, use the RSC landing page url = 'http://www.rsc.org/publishing/journals/prospect/cheminfo.asp?XMLID=%s&compoundtext=%s&MSID=%s' % ( id[4:], compoundText[id], articleID) if url is not None: options = spineapi.WholeWordsOnly + spineapi.IgnoreCase matches = document.search( compoundText[id], options) annotation = spineapi.Annotation() annotation['concept'] = 'Hyperlink' annotation['property:webpageUrl'] = url for match in matches: annotation.addExtent(match) document.addAnnotation(annotation) # similarly, for all the annotations annotations = thearticle.findall( './{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}annotation' ) for annotation in annotations: id = annotation.attrib['id'] url = None if id in annotationsInArticle: id = annotation.attrib['id'] # get the link id link = annotation.findtext( "./{http://www.rsc.org/schema/rscart38}link") # if the compound has a link, create an RSC ontology landing page for it if link is not None: if link[:3] == 'AU:': url = 'http://goldbook.iupac.org/%s.html' % link[ 3:] else: url = 'http://www.rsc.org/publishing/journals/prospect/ontology.asp?id=%s&MSID=%s' % ( link, articleID) if url is not None: matches = document.search( annotationText[id], spineapi.IgnoreCase + spineapi.WholeWordsOnly) annotation = spineapi.Annotation() annotation['concept'] = 'Hyperlink' annotation['property:webpageUrl'] = url for match in matches: annotation.addExtent(match) document.addAnnotation(annotation) break
def after_ready_event(self, document): # Get (if present) the RSCMetadataLink annotation for annotation in document.annotations(): if annotation.get('concept') == 'RSCMetadataLink': text = document.text() doi = annotation['property:doi'].upper() rscId = annotation['property:rscId'].upper() xmlquery = '<SearchCriteria><SearchTerm><Category>Journal</Category><ContentType>All</ContentType><Criterias><NameValue><Name>FreeText</Name><Value>"%s"</Value></NameValue></Criterias><Source>Utopia</Source></SearchTerm><PageNo>1</PageNo><PageSize>10</PageSize><SortBy>Relevance</SortBy></SearchCriteria>' % doi baseurl = 'http://pubs.rsc.org/en/federated/search' params = { 'federatedsearchname': 'Utopia', 'inputxml': xmlquery } url = baseurl + '?%s' % urllib.urlencode(params) searchresult = urllib2.urlopen(url, timeout=14).read() root = etree.fromstring(searchresult) #print etree.tostring(root, pretty_print=True, encoding='utf8') articles = root.findall('./{http://www.rsc.org/schema/rscart38}article') #print articles # the search use above can return more than one article, so select out only the one with # the correct doi thearticle = None articleID = None for article in articles: found_doi = article.findtext("./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='doi']") if found_doi is None: found_doi = article.findtext("./{http://www.rsc.org/schema/rscart38}art-admin/{http://www.rsc.org/schema/rscart38}doi") if found_doi is not None and found_doi.upper() == doi: thearticle = article articleIDelem = article.find("./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='docid']") if articleIDelem is not None: articleID = articleIDelem.text break # if we get back a single valid article... if thearticle != None: #print articleID compoundsInArticle = [] compoundText = {} annotationsInArticle = [] annotationText = {} # create a list of all the compounds that are mentioned in the article body compnames = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}compname') #print compnames for compname in compnames: # This line removes (erroneous?) elements from inside the XML etree.strip_elements(compname, '{http://www.rsc.org/schema/rscart38}compound', with_tail=False) #print compname.attrib['idrefs'], compname.text compoundsInArticle.append(compname.attrib['idrefs']) compoundText[compname.attrib['idrefs']] = etree.tounicode(compname, method='text') annotationnames = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}annref') #print annotationnames for annotationname in annotationnames: # This line removes (erroneous?) elements from inside the XML etree.strip_elements(annotationname, '{http://www.rsc.org/schema/rscart38}annotation', with_tail=False) #print annotationname.attrib['idrefs'], annotationname.text annotationsInArticle.append(annotationname.attrib['idrefs']) annotationText[annotationname.attrib['idrefs']] = etree.tounicode(annotationname, method='text') #print compoundText, annotationText #sprint annotationsInArticle # then for all the compounds that are defined in the article back compounds = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}compound') for compound in compounds: id = compound.attrib['id'] if id in compoundsInArticle: url = None id = compound.attrib['id'] # if the compound has a CSID, then the URL links to the chemspider page csid = compound.find("./{http://www.rsc.org/schema/rscart38}link[@type='CSID']" ) # if the compound has a CSID, create a Chemspider URL for it if csid is not None and csid.text is not None: url = 'http://www.chemspider.com/Chemical-Structure.%s.html' % csid.text[5:] else: # otherwise, use the RSC landing page url = 'http://www.rsc.org/publishing/journals/prospect/cheminfo.asp?XMLID=%s&compoundtext=%s&MSID=%s' % (id[4:], compoundText[id], articleID) if url is not None: options = spineapi.WholeWordsOnly + spineapi.IgnoreCase matches = document.search(compoundText[id], options) annotation = spineapi.Annotation() annotation['concept'] = 'Hyperlink' annotation['property:webpageUrl'] = url for match in matches: annotation.addExtent(match) document.addAnnotation(annotation) # similarly, for all the annotations annotations = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}annotation') for annotation in annotations: id = annotation.attrib['id'] url = None if id in annotationsInArticle: id = annotation.attrib['id'] # get the link id link = annotation.findtext("./{http://www.rsc.org/schema/rscart38}link" ) # if the compound has a link, create an RSC ontology landing page for it if link is not None: if link[:3] == 'AU:': url = 'http://goldbook.iupac.org/%s.html' % link[3:] else: url = 'http://www.rsc.org/publishing/journals/prospect/ontology.asp?id=%s&MSID=%s' % (link, articleID) if url is not None: matches = document.search(annotationText[id], spineapi.IgnoreCase + spineapi.WholeWordsOnly) annotation = spineapi.Annotation() annotation['concept'] = 'Hyperlink' annotation['property:webpageUrl'] = url for match in matches: annotation.addExtent(match) document.addAnnotation(annotation) break