def on_persist_event(self, document): client = kend.client.Client() document_id = utopia.tools.utils.metadata(document, 'identifiers[utopia]') if document_id is not None: for annotation in document.annotations('PersistQueue'): if 'session:volatile' not in annotation: try: ka = kend.converter.Annotation.spineapi2kend( annotation, document_id) ka.context = self._context_ updated = client.persistAnnotation( ka, context=self._context_) if isinstance(updated, kend.model.Annotation): for key in ('id', 'created', 'author', 'revision', 'edit', 'media_edit'): annotation[key] = getattr(updated, key) annotation.removePropertyAll('session:media') for media in updated.media: mediaDict = {} for k in [ 'name', 'src', 'type', 'revision', 'size', 'edit' ]: if hasattr(media, k): mediaDict[k] = getattr(media, k) annotation.insertProperty( 'session:media', urllib.urlencode(mediaDict)) document.removeAnnotation(annotation, 'PersistQueue') document.addAnnotation(annotation) except: raise pass for annotation in document.annotations( document.deletedItemsScratchId()): try: if 'session:volatile' not in annotation: ka = kend.converter.Annotation.spineapi2kend( annotation, document_id) client.deleteAnnotation(ka) document.removeAnnotation(annotation, document.deletedItemsScratchId()) document.removeAnnotation(annotation) except: raise pass
def on_load_event(self, document): outline={} for a in document.annotations(): if a.get('concept') == 'OutlineItem': outline[tuple([int(x) for x in a.get('property:outlinePosition').split('.')])]=a if len(outline): # html='<div><style media="screen" type="text/css">ul { list-style-type: none; }</style><ul>' html='<div><ul>' plen=1 for item in (sorted(outline.keys())): if len(item) > plen: html+='<ul><li>' elif len(item) < plen: html+='</li></ul></li><li>' else: html+='</li><li>' plen=len(item) html += '<a href="#" title="{0}" target="pdf; anchor={0}">{1}</a>'.format(outline[item].get('property:destinationAnchorName'), cgi.escape(outline[item].get('property:outlineTitle'), quote=True).encode('ascii', 'xmlcharrefreplace'),) html+="</ul></div>" a = spineapi.Annotation() a['concept'] = 'Collated' a['property:name'] = 'Outline' a['property:description'] = 'Document Structure' a['session:weight'] = '10000' a['property:html'] = html document.addAnnotation(a)
def on_activate_event(self, document): if len(document.annotations('NucleaRDB cache')) == 0: print 'annotating stuff . . .' pubmedId = common.utils.metadata(document, 'pmid') if pubmedId is not None: print 'found pubmed id: ' + pubmedId else: print 'did not find pubmed id' ns = {'r': 'GPCR'} textMentions = self.getMentions(document.text(), pubmedId) objectlist = [] mention_cache = {} for mention in textMentions: if mention.mentionType != 'SPECIES': mention_cache.setdefault(mention.html, []) mention_cache[mention.html].append(mention) for html, mentions in mention_cache.iteritems(): annotation = self.createAnnotation(document, html, mentions) annotation['displayRelevance']='2000' annotation['displayRank']= '2000' document.addAnnotation(annotation) document.addAnnotation(spineapi.Annotation(), 'NucleaRDB cache')
def after_ready_event(self, document): outline={} for a in document.annotations(): if a.get('concept') == 'OutlineItem': outline[tuple([int(x) for x in a.get('property:outlinePosition').split('.')])]=a if len(outline): # html='<div><style media="screen" type="text/css">ul { list-style-type: none; }</style><ul>' html='<div><ul>' plen=1 for item in (sorted(outline.keys())): if len(item) > plen: html+='<ul><li>' elif len(item) < plen: html+='</li></ul></li><li>' else: html+='</li><li>' plen=len(item) html += '<a href="#" title="{0}" target="pdf; anchor={0}">{1}</a>'.format(outline[item].get('property:destinationAnchorName'), cgi.escape(outline[item].get('property:outlineTitle'), quote=True).encode('ascii', 'xmlcharrefreplace'),) html+="</ul></div>" a = spineapi.Annotation() a['concept'] = 'Collated' a['property:name'] = 'Outline' a['property:description'] = 'Document Structure' a['session:weight'] = '999' a['property:html'] = html document.addAnnotation(a)
def on_activate_event(self, document): if len(document.annotations('GPCRDB cache')) == 0: print 'annotating stuff . . .' pubmedId = utopialib.utils.metadata(document, 'identifiers[pubmed]') if pubmedId is not None: print 'found pubmed id: ' + pubmedId else: print 'did not find pubmed id' ns = {'r': 'GPCR'} textMentions = self.getMentions(document.text(), pubmedId) objectlist = [] mention_cache = {} for mention in textMentions: if mention.mentionType != 'SPECIES': mention_cache.setdefault(mention.html, []) mention_cache[mention.html].append(mention) for html, mentions in mention_cache.iteritems(): annotation = self.createAnnotation(document, html, mentions) annotation['displayRelevance'] = '2000' annotation['displayRank'] = '2000' document.addAnnotation(annotation) document.addAnnotation(spineapi.Annotation(), 'GPCRDB cache')
def on_filter_event(self, document, data=None): for annotation in document.annotations(): if annotation.get( 'concept' ) == 'Definition' and 'displayTooltip' not in annotation and 'property:name' in annotation and 'property:description' in annotation: annotation['displayTooltip'] = '<strong>%s</strong>: %s' % ( annotation['property:name'], annotation['property:description'])
def on_filter_event(self, document, data = None): for annotation in document.annotations(): if annotation.get('concept') in ['Hyperlink', 'WebPage'] and 'displayTooltip' not in annotation and 'property:webpageUrl' in annotation: if annotation['property:webpageUrl'].startswith("mailto:"): annotation['displayTooltip'] = '<span>Email:</span><br/> <strong>%s</strong>' % annotation['property:webpageUrl'][7:] elif annotation['property:webpageUrl'].startswith("#"): annotation['displayTooltip'] = '<span>Internal Link to:</span><br/> <strong>%s</strong>' % annotation['property:destinationAnchorName'] else: annotation['displayTooltip'] = '<span>Link to:</span><br/> <strong>%s</strong>' % annotation['property:webpageUrl']
def on_filter_event(self, document, data = None): for annotation in document.annotations(): if annotation.get('concept') != 'DemoLogoOverlay' and annotation.get('property:demo_logo') == '1': annotation.removePropertyAll('property:demo_logo') overlay = spineapi.Annotation() overlay['concept'] = 'DemoLogoOverlay' overlay['property:demo_logo'] = '1' overlay.addExtents(annotation.extents()) overlay.addAreas(annotation.areas()) document.addAnnotation(overlay)
def on_filter_event(self, document, data=None): for annotation in document.annotations(): if annotation.get( 'concept') != 'DemoLogoOverlay' and annotation.get( 'property:demo_logo') == '1': annotation.removePropertyAll('property:demo_logo') overlay = spineapi.Annotation() overlay['concept'] = 'DemoLogoOverlay' overlay['property:demo_logo'] = '1' overlay.addExtents(annotation.extents()) overlay.addAreas(annotation.areas()) document.addAnnotation(overlay)
def on_persist_event(self, document): client = kend.client.Client() document_id, doi = self._resolve(document) if document_id is not None: for annotation in document.annotations('PersistQueue'): if 'session:volatile' not in annotation: try: ka = kend.converter.Annotation.spineapi2kend(annotation, document_id) ka.context = self._context_ updated = client.persistAnnotation(ka, context = self._context_) if isinstance(updated, kend.model.Annotation): for key in ('id', 'created', 'author', 'revision', 'edit', 'media_edit'): annotation[key] = getattr(updated, key) annotation.removePropertyAll('session:media') for media in updated.media: mediaDict = {} for k in ['name', 'src', 'type', 'revision', 'size', 'edit']: if hasattr(media, k): mediaDict[k] = getattr(media, k) annotation.insertProperty('session:media', urllib.urlencode(mediaDict)) document.removeAnnotation(annotation, 'PersistQueue') document.addAnnotation(annotation) except: raise pass for annotation in document.annotations(document.deletedItemsScratchId()): try: if 'session:volatile' not in annotation: ka = kend.converter.Annotation.spineapi2kend(annotation, document_id) client.deleteAnnotation(ka) document.removeAnnotation(annotation, document.deletedItemsScratchId()) except: raise pass
def after_ready_event(self, document): # Get (if present) the RSCMetadataLink annotation for annotation in document.annotations(): if annotation.get('concept') == 'RSCMetadataLink': text = document.text() doi = annotation['property:doi'].upper() rscId = annotation['property:rscId'].upper() xmlquery = '<SearchCriteria><SearchTerm><Category>Journal</Category><ContentType>All</ContentType><Criterias><NameValue><Name>FreeText</Name><Value>"%s"</Value></NameValue></Criterias><Source>Utopia</Source></SearchTerm><PageNo>1</PageNo><PageSize>10</PageSize><SortBy>Relevance</SortBy></SearchCriteria>' % doi baseurl = 'http://pubs.rsc.org/en/federated/search' params = { 'federatedsearchname': 'Utopia', 'inputxml': xmlquery } url = baseurl + '?%s' % urllib.urlencode(params) searchresult = urllib2.urlopen(url, timeout=14).read() root = etree.fromstring(searchresult) #print etree.tostring(root, pretty_print=True, encoding='utf8') articles = root.findall('./{http://www.rsc.org/schema/rscart38}article') #print articles # the search use above can return more than one article, so select out only the one with # the correct doi thearticle = None articleID = None for article in articles: found_doi = article.findtext("./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='doi']") if found_doi is None: found_doi = article.findtext("./{http://www.rsc.org/schema/rscart38}art-admin/{http://www.rsc.org/schema/rscart38}doi") if found_doi is not None and found_doi.upper() == doi: thearticle = article articleIDelem = article.find("./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='docid']") if articleIDelem is not None: articleID = articleIDelem.text break # if we get back a single valid article... if thearticle != None: #print articleID compoundsInArticle = [] compoundText = {} annotationsInArticle = [] annotationText = {} # create a list of all the compounds that are mentioned in the article body compnames = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}compname') #print compnames for compname in compnames: # This line removes (erroneous?) elements from inside the XML etree.strip_elements(compname, '{http://www.rsc.org/schema/rscart38}compound', with_tail=False) #print compname.attrib['idrefs'], compname.text compoundsInArticle.append(compname.attrib['idrefs']) compoundText[compname.attrib['idrefs']] = etree.tounicode(compname, method='text') annotationnames = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}annref') #print annotationnames for annotationname in annotationnames: # This line removes (erroneous?) elements from inside the XML etree.strip_elements(annotationname, '{http://www.rsc.org/schema/rscart38}annotation', with_tail=False) #print annotationname.attrib['idrefs'], annotationname.text annotationsInArticle.append(annotationname.attrib['idrefs']) annotationText[annotationname.attrib['idrefs']] = etree.tounicode(annotationname, method='text') #print compoundText, annotationText #sprint annotationsInArticle # then for all the compounds that are defined in the article back compounds = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}compound') for compound in compounds: id = compound.attrib['id'] if id in compoundsInArticle: url = None id = compound.attrib['id'] # if the compound has a CSID, then the URL links to the chemspider page csid = compound.find("./{http://www.rsc.org/schema/rscart38}link[@type='CSID']" ) # if the compound has a CSID, create a Chemspider URL for it if csid is not None and csid.text is not None: url = 'http://www.chemspider.com/Chemical-Structure.%s.html' % csid.text[5:] else: # otherwise, use the RSC landing page url = 'http://www.rsc.org/publishing/journals/prospect/cheminfo.asp?XMLID=%s&compoundtext=%s&MSID=%s' % (id[4:], compoundText[id], articleID) if url is not None: options = spineapi.WholeWordsOnly + spineapi.IgnoreCase matches = document.search(compoundText[id], options) annotation = spineapi.Annotation() annotation['concept'] = 'Hyperlink' annotation['property:webpageUrl'] = url for match in matches: annotation.addExtent(match) document.addAnnotation(annotation) # similarly, for all the annotations annotations = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}annotation') for annotation in annotations: id = annotation.attrib['id'] url = None if id in annotationsInArticle: id = annotation.attrib['id'] # get the link id link = annotation.findtext("./{http://www.rsc.org/schema/rscart38}link" ) # if the compound has a link, create an RSC ontology landing page for it if link is not None: if link[:3] == 'AU:': url = 'http://goldbook.iupac.org/%s.html' % link[3:] else: url = 'http://www.rsc.org/publishing/journals/prospect/ontology.asp?id=%s&MSID=%s' % (link, articleID) if url is not None: matches = document.search(annotationText[id], spineapi.IgnoreCase + spineapi.WholeWordsOnly) annotation = spineapi.Annotation() annotation['concept'] = 'Hyperlink' annotation['property:webpageUrl'] = url for match in matches: annotation.addExtent(match) document.addAnnotation(annotation) break
def on_ready_event(self, document): '''Fetch information from the Lazarus service''' permission = self.get_config('permission', False) if permission: # If an outline already exists, don't make a new one needs_outline = True for annotation in document.annotations(): if annotation.get('concept') == 'OutlineItem': needs_outline = False break # The Lazarus server needs to know what this document is document_id = utopia.tools.utils.metadata(document, 'identifiers[utopia]') this_doi = utopia.tools.utils.metadata(document, 'identifiers[doi]') if this_doi is not None: this_doi = u'doi:' + this_doi # Speak to server params = {'fingerprint': document.fingerprints()} url = '{0}?{1}'.format(laz_docUrl, urllib.urlencode(params, doseq=True)) response = urllib2.urlopen(url, timeout=60) if response.getcode() == 204: request = urllib2.Request( url, data=document.data(), headers={'Content-Type': 'application/pdf'}) response = urllib2.urlopen(request, timeout=60) #response = open('/Users/dave/Desktop/ananiadou_tibtech06.pdf-response.xml', 'r') # Create Metadata link annotation link = document.newAccList('metadata', 50) link['property:sourceDatabase'] = 'lazarus' link['property:sourceTitle'] = 'Lazarus' link['property:sourceDescription'] = self.sourceDescription link['property:sourceIcon'] = utopia.get_plugin_data_as_url( 'images/lazarus-prefs-logo.png', 'image/png') headers = [] pos = [] refs = [] annotations = [] concepts = {} hits = [] expression_annotations = [] for kAnnotation in kend.converter.XML.parse( response, kend.model.Document): #print kend.converter.XML.serialise(kAnnotation)[0] try: annotations.append( utopia.tools.converters.Annotation.kend2spineapi( kAnnotation, document)) except: pass annotations.sort(key=lambda a: int(a.get('structure:order', 0))) for sAnnotation in annotations: if sAnnotation['concept'] == 'structure_element': role, level = self.getHeaderRole(sAnnotation) if role is not None and needs_outline: while len(pos) < level: pos.append(0) while len(pos) > level: pos.pop() pos[-1] += 1 outline = u'.'.join([unicode(i) for i in pos]) anchor_name = '#lazarus.outline.{0}'.format(outline) anchor = spineapi.Annotation() anchor['concept'] = 'Anchor' anchor['property:anchor'] = anchor_name anchor.addExtents(sAnnotation.extents()) anchor.addAreas(sAnnotation.areas()) document.addAnnotation(anchor) header = spineapi.Annotation() header['concept'] = 'OutlineItem' header['property:outlinePosition'] = outline header['property:outlineTitle'] = u' '.join( [e.text() for e in sAnnotation.extents()]) header['property:destinationAnchorName'] = anchor_name document.addAnnotation(header) print((u' ' * level + u'.'.join([unicode(i) for i in pos]) + u' ' + u' '.join([ e.text() for e in sAnnotation.extents() ])).encode('utf8')) elif 'bibitem' in sAnnotation.getAllProperties( 'structure:role'): #refs.append(sAnnotation) pass elif sAnnotation['concept'] == 'Citation': # Hack to fix a mistake in authors property name if 'property:author' in sAnnotation and not 'property:authors' in sAnnotation: sAnnotation[ 'property:authors'] = sAnnotation.getAllProperties( 'property:author') refs.append(sAnnotation) elif sAnnotation['concept'] == 'LazarusConcept': concept_id = sAnnotation.get('property:identifier') if concept_id is not None: sAnnotation['id'] = str(uuid.uuid4()) concepts[concept_id] = sAnnotation document.addAnnotation(sAnnotation, 'Lazarus Concept') elif sAnnotation['concept'] == 'LazarusConceptHit': hits.append(sAnnotation) elif sAnnotation['concept'] == 'LazarusSentenceExpression': expression_annotations.append(sAnnotation) else: document.addAnnotation(sAnnotation) for ref in refs: #print(ref.get('structure:order', '0')) pass refs = sorted(refs, key=lambda ref: int(ref.get('property:order', '0'))) for ref in refs: #print(ref.get('structure:order', '0')) pass for ref in refs: # Create Bibliography annotations #citation = {'unstructured': u' '.join([e.text() for e in ref.extents()])} #annotation = utopia.tools.utils.citation_to_annotation(citation) #annotation['property:order'] = ref.get('structure:order') #annotation.addExtents(ref.extents()) #annotation.addAreas(ref.areas()) #document.addAnnotation(annotation, link['scratch']) document.addAnnotation(ref, link['scratch']) # Now link hits to concepts for i, hit in enumerate(hits): concept_id = hit.get('property:identifier') concept = concepts.get(concept_id) if concept is not None: concept_uuid = concept.get('id') hit['property:concept_id'] = concept_uuid identifier = concept.get('property:identifier') name = concept.get('property:name', '???') sources = concept.get('property:externalSources', 'json:[]') if sources.startswith('json:'): sources = json.loads(sources[5:]) if 'property:stdInchiKey' in concept: sources.append({ 'database': ' InchiKey', 'identifier': concept['property:stdInchiKey'] }) if 'property:canonicalSmiles' in concept: sources.append({ 'database': ' SMILES', 'identifier': concept['property:canonicalSmiles'] }) kind = concept.get('property:kind') kind = self.dbs.get(kind, {}).get('title', kind) links = {} for source in sources: uri = source.get('uri') if 'primary' in source.get('relationship', []): links.setdefault('definition', []) links['definition'].append(u''' <a href="{uri}" title="{uri}">{database}</a> '''.format(**source)) elif uri is None: if source.get('database') in (' InchiKey', ' SMILES'): links.setdefault('main', []) links['main'].append(u''' <tr><td>{database}:</td><td>{identifier}</td></tr> '''.format(**source)) else: identifier = source.get('identifier') links_category = 'xref' if 'seeAlso' in source.get('relationship', []) or uri is None: links_category = 'seeAlso' links.setdefault(links_category, []) if identifier is not None: links[links_category].append(u''' <a href="{uri}" title="{uri}">{name}...</a> ({identifier}) '''.format(**source)) else: links[links_category].append(u''' <a href="{uri}" title="{uri}">{name}...</a> '''.format(**source)) style = u''' <style> .lazarus-table tbody { border: none; } .lazarus-table td:first-of-type { text-align: right; font-weight: bold; } .lazarus-table td { vertical-align: top; } .lazarus-table td:first-of-type { white-space: nowrap; } .lazarus-table td:not(:first-of-type) { word-break: break-all; } .lazarus-table tr td { padding-top: 0ex; padding-bottom: 0ex; } .lazarus-table tbody:not(:first-of-type) tr:first-of-type td { padding-top: 1ex; } </style> ''' html = u''' <table class="lazarus-table"> <tr><td>Name:</td><td>{name}</td></tr> '''.format(**{'name': name}) categories = { 'xref': 'Related:', 'seeAlso': 'See also:', 'definition': 'Defined in:' } for links_category in ('main', 'xref', 'seeAlso', 'definition'): links_title = categories.get(links_category) these_links = sorted( list(set(links.get(links_category, [])))) if len(these_links) > 0: html += '<tbody>' if links_category != 'main': html += u'<tr><td>{0}</td><td>'.format( links_title) html += u'<br>'.join(these_links) html += '</td></tr>' else: html += ''.join(these_links) html += '</tbody>' #pprint('------------------------') html += u''' </table> ''' #print(html) hasLinks = len( links.get('xref', []) + links.get('seeAlso', [])) > 0 ann = spineapi.Annotation() ann['concept'] = 'Collated' ann['property:name'] = u'{0}'.format(name) ann['property:description'] = 'Lazarus Concept' ann['session:semanticTerm'] = name ann['property:html'] = [style, html] ann['property:sourceDescription'] = self.sourceDescription ann['property:sourceIcon'] = utopia.get_plugin_data_as_url( 'images/lazarus-prefs-logo.png', 'image/png') ann['session:overlay'] = 'hyperlink' ann['session:color'] = '#880000' count = 0 print('====', 7) if 'property:hitFragments' in hit: hitFragments = hit.getAllProperties( 'property:hitFragments') or [] #pprint(hitFragments) for hitFragment in hitFragments: pre, _, rest = hitFragment.partition('{!') match, _, post = rest.partition('!}') #pprint((pre, match, post)) matches = document.findInContext(pre, match, post, fuzzy=True) count += len(matches) ann.addExtents(matches) if hasLinks and count > 0: document.addAnnotation(ann) style = u''' <style> .lazarus-expression .box { background-color: #FFF0E8; border-color: #EEE0D8; } .lazarus-related { padding-left: 42px; background-image: url(%s); background-repeat: no-repeat; background-position: top left; background-size: 37px 48px; min-height: 53px; } .lazarus-related + .lazarus-related { margin-top: 5px; border-top: 1px dotted #aaa; padding-top: 5px; background-position-y: 5px; min-height: 58px; } .lazarus-sentence { padding-left: 0.5em; color: black; } .lazarus-sentence.negative { border-left: solid 5px #bb0000; } .lazarus-sentence.positive { border-left: solid 5px #008800; } .lazarus-sentence.negative a { color: #bb0000; } .lazarus-sentence.positive a { color: #008800; } </style> ''' % utopia.get_plugin_data_as_url('images/pdf-page-icon.png', 'image/png') expressions = [] for sAnnotation in expression_annotations: exp = sAnnotation.get('property:expressions', 'json:{}') if exp.startswith('json:'): exp = json.loads(exp[5:]) context = sAnnotation.get('property:context') if context is not None: if exp.get('negative', False): exp['posneg'] = 'negative' else: exp['posneg'] = 'positive' pprint(context) pprint(exp) matched_context = exp.get('context') matches = [] if matched_context is not None: matches = document.search( re.sub(r'\s+', ' ', matched_context)) if len(matches) > 0: anchor_id = str(uuid.uuid4())[1:-1] anchor = spineapi.Annotation() anchor['concept'] = 'Anchor' anchor['property:anchor'] = anchor_id anchor.addExtents(matches) document.addAnnotation(anchor) exp.update({ 'anchor_id': anchor_id, 'sentence': context }) expressions.append(exp) js = u''' <script> $(document).on('DOMNodeInserted', function(e) { var element = e.target; $(element).filter('a[target="tab"]').add('a[target="tab"]', element).each(function () { var fragment = $(this).closest('.-papyro-internal-citation').data('citation')['userdef']['first_fragment']; $(this).attr('target', 'pdf; show=highlight; text=[' + encodeURIComponent(fragment) + ']'); }); }); $(function () { var lazarus = { expressions: %s, fingerprints: %s, relUrl: %s }; var more_expressions_link = $('#lazarus-expression > p.more').hide(); var more_expressions_spinner = $('#lazarus-expression > div.spinner'); Spinners.create(more_expressions_spinner); Spinners.play(more_expressions_spinner); var exp_divs = []; var identifiers = []; for (var e = 0; e < lazarus.expressions.length; e++) { var expression = lazarus.expressions[e]; var exp_div = $('<div class="box"></div>'); exp_div.data('expression', expression); exp_div.hide(); exp_divs.push(exp_div); identifiers.push(expression.identifiers); } var params = { fingerprint: lazarus.fingerprints }; var url = lazarus.relUrl + '?' + $.param(params, traditional=true); $.ajax({ url: url, type: 'POST', dataType: 'json', data: JSON.stringify(identifiers), contentType: "application/json", error: function (xhr, ajaxOptions, thrownError) { console.log(xhr.statusText); console.log(xhr.responseText); console.log(xhr.status); console.log(thrownError); // FIXME do something here Spinners.remove(more_expressions_spinner); }, success: function (related) { // Sort related according to the number of articles found related.results.sort(function (l, r) { var lv = Object.keys(l.related).length; var rv = Object.keys(r.related).length; return (lv > rv) ? -1 : (lv < rv) ? 1 : 0; }); $.each(related.results, function (idx, result) { var exp_div = exp_divs[idx]; var expression = exp_div.data('expression'); expression.related = result.related; delete expression.related[%s]; split = expression.sentence.split(expression.context); pre = split[0]; pre = pre.replace(/(\w)$/, '$1 '); pre = pre.replace(/^\s*/, ''); match = expression.context; post = split[1]; post = post.replace(/^(\w)/, ' $1'); post = post.replace(/\s*$/, ''); expression.pre = pre; expression.match = match; expression.post = post; // Create expression element exp_div.append('<p class="lazarus-sentence ' + expression.posneg + '">“' + expression.pre + '<a target="pdf; show=select; anchor=' + expression.anchor_id + '"><strong>' + expression.match + '</strong></a>' + expression.post + '”</p>'); exp_div.data('expression', expression); $('#lazarus-expression > .content').append(exp_div); if (Object.keys(expression.related).length > 0) { var related_div = $('<div class="expandable" title="Related expressions elsewhere"></div>'); var related_div_content = $('<div></div>').appendTo(related_div); function on_expand() { related_div.off('papyro:expandable:expand', on_expand); $.each(expression.related, function (idx, obj) { fragments = []; $.each(obj, function (id, obj) { fragments.push(obj.context); }); fragments.join('\\n'); related_div_content.append($('<div class="lazarus-related unprocessed"></div>').append('<p><strong>“…'+fragments+'…”</strong></p>').hide().data('citation', {identifiers:{doi:idx},userdef:{first_fragment:fragments[0]}})); // .append(utopia.citation.render({identifiers:{doi:idx},first_fragment:fragments[0]}, true, true)) }); expression.related.length = 0; // empty for future if ($('.lazarus-related.unprocessed', exp_div).length > 0) { var more = $('<p class="more right"><a class="more">More related articles...</a></p>'); related_div_content.append(more); function show_five_related(e) { e.preventDefault(); $('.lazarus-related.unprocessed', exp_div).slice(0, 5).each(function (idx, obj) { var citation = $(obj).data('citation'); $(obj).append(utopia.citation.render(citation, true, true)); $(obj).show().removeClass('unprocessed'); }); if ($('.lazarus-related.unprocessed', exp_div).length == 0) { more.remove(); } } more.on('click', show_five_related).click(); } } related_div.on('papyro:expandable:expand', on_expand); exp_div.append(related_div); utopia.processNewContent(related_div); } }); Spinners.remove(more_expressions_spinner); more_expressions_link.show(); $('a.more', more_expressions_link).click(); } }); function append_five(e) { e.preventDefault(); // Show the next five $('#lazarus-expression > .content').children().filter(':hidden').slice(0,5).show(); // Hide the 'more' link if everything is now visible if ($('#lazarus-expression > .content').children().filter(':hidden').length == 0) { more_expressions_link.hide(); } } // Hook up 'more' link $('#lazarus-expression > p.more > a.more').on('click', append_five).click(); }); </script> ''' % (json.dumps(expressions), json.dumps( document.fingerprints()), json.dumps(laz_docRelUrl), json.dumps(this_doi)) #print(js.encode('utf8')) html = u''' <div id="lazarus-expression"><div class="content"></div><div class="spinner"></div><p class="more"><a class="more">More expressions...</a></p></div> ''' if len(expressions) > 0: ann = spineapi.Annotation() ann['concept'] = 'Collated' ann['property:name'] = 'Lazarus Expressions' ann['property:description'] = u'Summarizing expression(s)' ann['property:html'] = [js, style, html] ann['property:sourceDescription'] = self.sourceDescription ann['property:sourceIcon'] = utopia.get_plugin_data_as_url( 'images/lazarus-prefs-logo.png', 'image/png') document.addAnnotation(ann) else: # no permission noprompt = self.get_config('noprompt', False) if not noprompt: annotation = spineapi.Annotation() annotation['concept'] = 'Collated' params = { 'uuid': self.uuid(), } annotation['property:html'] = utopia.get_plugin_data( 'tpl/denied.html').format(**params) annotation['property:name'] = 'Lazarus' annotation[ 'property:description'] = 'Lazarus functionality is turned off' annotation[ 'property:sourceDescription'] = self.sourceDescription annotation[ 'property:sourceIcon'] = utopia.get_plugin_data_as_url( 'images/lazarus-prefs-logo.png', 'image/png') annotation['session:default'] = '1' document.addAnnotation(annotation)
def on_filter_event(self, document, data=None): for a in document.annotations(): if a.get( 'author' ) == 'http://utopia.cs.manchester.ac.uk/users/11679' and a.get( 'concept') in ('Definition', 'DatabaseEntry' ) and 'session:legacy' not in a: document.removeAnnotation(a) identifier = a.get('property:identifier', '') if identifier.startswith('http://bio2rdf.org/pdb:'): # PDB entry a2 = spineapi.Annotation() a2['concept'] = 'DatabaseEntry' a2['author'] = a['author'] a2['session:volatile'] = '1' a2['session:legacy'] = '1' a2['property:sourceDatabase'] = 'pdb' a2['property:sourceDescription'] = '<p>The <a href="http://www.rcsb.org/">Protein Data Bank</a> of the Research Collaboratory for Structural Bioinformatics (<a href="http://home.rcsb.org/">RCSB</a>).</p>' a2['property:identifier'] = identifier a2['property:description'] = 'PDB entry {0}'.format( identifier[-4:].upper()) if 'property:name' in a: a2['property:name'] = a['property:name'][:-11] if 'property:imageUrl' in a: a2['property:imageUrl'] = a['property:imageUrl'] if 'property:molecularDescription' in a: a2['property:molecularDescription'] = a[ 'property:molecularDescription'] if 'property:webpageUrl' in a: a2['property:webpageUrl'] = a['property:webpageUrl'] if 'property:embedded' in a: a2['property:embedded'] = a['property:embedded'] for extent in a.extents(): a2.addExtent(extent) for area in a.areas(): a2.addArea(area) document.addAnnotation(a2) if identifier.startswith('http://dbpedia.org/resource/'): # Wikipedia entry a2 = spineapi.Annotation() a2['concept'] = 'Definition' a2['author'] = a['author'] a2['session:volatile'] = '1' a2['session:legacy'] = '1' a2['property:sourceDatabase'] = 'wikipedia' a2['property:sourceDescription'] = '<p>Structured <a href="http://www.wikipedia.org/">Wikipedia</a> information provided by the <a href="http://DBpedia.org/">DBpedia</a> project.</p>' a2['property:description'] = a.get('property:summary', 'Wikipedia entry') if 'property:name' in a: a2['property:name'] = a['property:name'] if 'property:identifier' in a: a2['property:identifier'] = a['property:identifier'] if 'property:imageUrl' in a: a2['property:imageUrl'] = a['property:imageUrl'] if 'property:summary' in a: a2['property:summary'] = a['property:summary'] if 'property:webpageUrl' in a: a2['property:webpageUrl'] = a['property:webpageUrl'] for extent in a.extents(): a2.addExtent(extent) for area in a.areas(): a2.addArea(area) document.addAnnotation(a2) if identifier.startswith( 'http://www.portlandpress.com/utopia/glick/'): # Wikipedia entry a2 = spineapi.Annotation() a2['concept'] = 'Definition' a2['author'] = a['author'] a2['session:volatile'] = '1' a2['session:legacy'] = '1' a2['property:sourceDatabase'] = 'glick' a2['property:sourceDescription'] = '<p>David M. Glick\'s <a href="http://www.portlandpress.com/pp/books/online/glick/search.htm">Glossary of Biochemistry and Molecular Biology</a>.</p><p>Made available by <a href="http://www.portlandpress.com/">Portland Press Limited</a>.</p>' a2['property:description'] = a[ 'property:description'] + '<p><em>(Glick Glossary/Portland Press Ltd.)</em></p>' a2['property:name'] = a['property:name'] for extent in a.extents(): a2.addExtent(extent) for area in a.areas(): a2.addArea(area) document.addAnnotation(a2)
def after_ready_event(self, document): # Make an annotation for all these metadata ids = { "doi": ("DOI", u'<a href="http://dx.doi.org/{0}">{0}</a>'), "issn": ("ISSN", u"<strong>{0}</strong>"), "pii": ("PII", u"<strong>{0}</strong>"), "pmid": ("Pubmed", u'<a href="http://www.ncbi.nlm.nih.gov/pubmed/{0}">{0}</a>'), "pmcid": ("PMC", u'<a href="http://www.ncbi.nlm.nih.gov/pmc/articles/{0}">{0}</a>'), "arxivid": ("arXiv", u'<a href="http://arxiv.org/abs/{0}">{0}</a>'), } # Build list of fragments fragments = [] pub_icon = "" html = """ <style> .fancy_quotes { position: relative; } .fancy_quotes:before { content: "\\201C"; } .fancy_quotes:after { content: "\\201D"; } </style> """ for key, (name, format) in ids.iteritems(): id = common.utils.metadata(document, key) if id is not None: fragments.append( u'<td style="text-align: right; opacity: 0.7">{0}:</td><td>{1}</td>'.format(name, format.format(id)) ) # Resolve publisher info for annotation in document.annotations("PublisherMetadata"): if annotation.get("concept") == "PublisherIdentity": logo = annotation.get("property:logo") title = annotation.get("property:title") webpageUrl = annotation.get("property:webpageUrl") if None not in (logo, title, webpageUrl): pub_icon = u'<a href="{0}" title="{2}"><img src="{1}" alt="{2}" /></a></td>'.format( webpageUrl, logo, title ) break # Compile fragments title = common.utils.metadata(document, "title") if title is not None or len(pub_icon) > 0: html += u'<table style="border: none; margin: 0 0 1em 0;">' html += u"<tr>" if title is not None: html += u'<td style="text-align:left; vertical-align: middle;"><strong class="nohyphenate fancy_quotes">{0}</strong></td>'.format( title.strip() ) if len(pub_icon) > 0: html += u'<td style="text-align:right; vertical-align: middle; width: 80px;">{0}</td>'.format(pub_icon) html += u"</tr>" html += u"</table>" if len(fragments) > 0: html += u'<div class="box">' html += u'<table style="border: none">' html += u"<tr>" html += u"</tr><tr>".join(fragments) html += u"</tr>" html += u"</table>" html += u"</div>" annotation = spineapi.Annotation() annotation["concept"] = "Collated" annotation["property:html"] = html annotation["property:name"] = "About this article" annotation["session:weight"] = "100" annotation["session:default"] = "1" annotation["session:headless"] = "1" document.addAnnotation(annotation)
def on_filter_event(self, document, data = None): for annotation in document.annotations(): if annotation.get('concept') == 'Definition' and 'displayTooltip' not in annotation and 'property:name' in annotation and 'property:description' in annotation: annotation['displayTooltip'] = '<strong>%s</strong>: %s' % (annotation['property:name'], annotation['property:description'])
def after_ready_event(self, document): print 'Formatting metadata' # Find highest matching metadata accumulation list for references source = None for accListLink in document.getAccLists('metadata'): matches = document.annotationsIf({'concept': 'Citation'}, accListLink['scratch']) if len(matches) > 0: print 'Selected for [Citation] list %s with rank %s' % ( accListLink['scratch'], repr(accListLink.get('rank', 0))) source = accListLink bibliography = list(matches) bibliography.sort(key=sortfn) rt = '' for annotation in bibliography: citation = utopia.tools.utils.citation_from_annotation( annotation) rt += utopia.citation.render(citation, links=True) if len(bibliography) > 0: # Create Metadata link annotation link = document.newAccList('citation_list') link['property:list_name'] = 'Bibliography' document.addAnnotations(bibliography, link['scratch']) if len(rt) > 0: references = spineapi.Annotation() references['displayBibliography'] = rt references['concept'] = 'BibliographyMetadata' references['property:identifier'] = '#bibliography' references['property:name'] = 'Bibliography' references['displayName'] = 'Bibliography' references['displayRelevance'] = '800' if accListLink is not None: for i in ('sourceIcon', 'sourceTitle', 'sourceDescription', 'sourceDatabase'): k = 'property:{0}'.format(i) if k in accListLink: references[k] = accListLink[k] references[ 'property:description'] = 'From ' + accListLink[ 'property:sourceTitle'] document.addAnnotation(references) break if source is None: print 'No metadata found' # Find highest matching metadata accumulation list for in-text citations for accListLink in document.getAccLists('metadata'): matches = document.annotationsIf({'concept': 'ForwardCitation'}, accListLink['scratch']) if len(matches) > 0: print 'Selected for [ForwardCitation] list %s with rank %s' % ( accListLink['scratch'], repr(accListLink.get('rank', 0))) document.addAnnotations(matches) break # Find highest matching metadata accumulation list for in-text citations for accListLink in document.getAccLists('metadata'): matches = document.annotationsIf({'concept': 'Table'}, accListLink['scratch']) if len(matches) > 0: print 'Selected for [Table] list %s with rank %s' % ( accListLink['scratch'], repr(accListLink.get('rank', 0))) document.addAnnotations(matches) break metadata = None if source is not None: for annotation in document.annotations(source['scratch']): if annotation.get('concept') == 'DocumentMetadata': metadata = annotation if metadata: metadata['displayName'] = 'Document Information' metadata['displayRelevance'] = '1000' document.addAnnotation(metadata, 'Document Metadata')
def before_load_event(self, document): # Get existing areas existing_hyperlinks = [ann for ann in document.annotations() if ann.get('concept') == 'Hyperlink'] existing_extent_areas = [area for ann in existing_hyperlinks for ext in ann.extents() for area in ext.areas()] existing_areas = existing_extent_areas + [area for ann in existing_hyperlinks for area in ann.areas()] self.existing_areas = existing_areas
def after_load_event(self, document): # Put errors together in a sensible way errors = {} failures = 0 successes = 0 for error in document.annotations('errors.metadata'): if error.get('concept') == 'Success': successes += 1 elif error.get('concept') == 'Error': failures += 1 component = error.get('property:component') errors.setdefault(component, {}) category = error.get('property:category') errors[component].setdefault(category, []) method = error.get('property:method') message = error.get('property:message', '') errors[component][category].append((method, message)) categories = {} for component, details in errors.iteritems(): for category in details.keys(): categories.setdefault(category, 0) categories[category] += 1 # If there are errors, provide feedback to the user if failures > 0: # Check for likely client problems if categories.get('connection', 0) == failures and successes == 0: summary = ''' Utopia could not reach any of the online services it would normally use to identify this document, meaning you are likely to see limited or no information below. You might wish to check your Internet connection and reload the document. ''' elif categories.get('timeout', 0) > 1: if categories.get('timeout', 0) == failures and successes == 0: many = '' else: many = 'some of' summary = ''' Utopia gave up contacting {0} the online services it would normally use to identify this document because they were taking too long to respond. You are likely to see limited or no information below. You might wish to check your Internet connection and reload the document. '''.format(many) else: if failures == 1: noun = 'An error' else: noun = 'Errors' summary = ''' {0} occurred when trying to discover the identity of this document. You are likely to see limited or no information below. '''.format(noun) html = ''' <div class="box error"> <strong>Warning</strong> <p> {0} </p> <div class="expandable" title="Details..."> <ul> '''.format(summary) for component, details in errors.iteritems(): for category, methods in details.iteritems(): if category != 'success': summary = { 'timeout': '{0} did not respond', 'connection': 'Could not connect to {0}', 'server': '{0} behaved unexpectedly', }.get(category, 'An error occurred accessing {0}') methods_html = ', '.join( ('<span title="{1}">{0}</span>'.format( method, message) for method, message in methods)) html += '<li>{0} (when accessing: {1}).</li>'.format( summary.format('<strong>' + component + '</strong>'), methods_html) html += ''' </ul> </div> <div> ''' annotation = spineapi.Annotation() annotation['concept'] = 'Collated' annotation['property:html'] = html annotation['property:name'] = 'Error' annotation['session:weight'] = '1000' annotation['session:default'] = '1' annotation['session:headless'] = '1' document.addAnnotation(annotation) print errors
def after_ready_event(self, document): # Make an annotation for all these metadata ids = { 'doi': ('DOI', u'<a href="http://dx.doi.org/{0}">{0}</a>'), 'issn': ('ISSN', u'<strong>{0}</strong>'), 'pii': ('PII', u'<strong>{0}</strong>'), 'pubmed': ('Pubmed', u'<a href="http://www.ncbi.nlm.nih.gov/pubmed/{0}">{0}</a>'), 'pmc': ('PMC', u'<a href="http://www.ncbi.nlm.nih.gov/pmc/articles/{0}">{0}</a>' ), 'arxiv': ('arXiv', u'<a href="http://arxiv.org/abs/{0}">{0}</a>'), } # Build list of fragments fragments = [] pub_icon = '' html = ''' <style> .fancy_quotes { position: relative; } .fancy_quotes:before { content: "\\201C"; } .fancy_quotes:after { content: "\\201D"; } </style> ''' for key, (name, format) in ids.iteritems(): id = utopialib.utils.metadata(document, 'identifiers[{0}]'.format(key)) if id is not None: fragments.append( u'<td style="text-align: right; opacity: 0.7">{0}:</td><td>{1}</td>' .format(name, format.format(id))) issn = utopialib.utils.metadata(document, 'publication-issn') if issn is not None: fragments.append( u'<td style="text-align: right; opacity: 0.7">{0}:</td><td><strong>{1}</strong></td>' .format('ISSN', issn)) # Resolve publisher info for annotation in document.annotations('PublisherMetadata'): if annotation.get('concept') == 'PublisherIdentity': logo = annotation.get('property:logo') title = annotation.get('property:title') webpageUrl = annotation.get('property:webpageUrl') if None not in (logo, title, webpageUrl): pub_icon = u'<a href="{0}" title="{2}"><img src="{1}" alt="{2}" /></a></td>'.format( webpageUrl, logo, title) break # Compile fragments title = utopialib.utils.metadata(document, 'title') if title is not None or len(pub_icon) > 0: html += u'<table style="border: none; margin: 0 0 1em 0;">' html += u'<tr>' if title is not None: html += u'<td style="text-align:left; vertical-align: middle;"><strong class="nohyphenate fancy_quotes">{0}</strong></td>'.format( title.strip()) if len(pub_icon) > 0: html += u'<td style="text-align:right; vertical-align: middle; width: 80px;">{0}</td>'.format( pub_icon) html += u'</tr>' html += u'</table>' if len(fragments) > 0: html += u'<div class="box">' html += u'<table style="border: none">' html += u'<tr>' html += u'</tr><tr>'.join(fragments) html += u'</tr>' html += u'</table>' html += u'</div>' annotation = spineapi.Annotation() annotation['concept'] = 'Collated' annotation['property:html'] = html annotation['property:name'] = 'About this article' annotation['session:weight'] = '1000' annotation['session:default'] = '1' annotation['session:headless'] = '1' document.addAnnotation(annotation)
def on_filter_event(self, document, data = None): for a in document.annotations(): if a.get('author') == 'http://utopia.cs.manchester.ac.uk/users/11679' and a.get('concept') in ('Definition', 'DatabaseEntry') and 'session:legacy' not in a: document.removeAnnotation(a) identifier = a.get('property:identifier', '') if identifier.startswith('http://bio2rdf.org/pdb:'): # PDB entry a2 = spineapi.Annotation() a2['concept'] = 'DatabaseEntry' a2['author'] = a['author'] a2['session:volatile'] = '1' a2['session:legacy'] = '1' a2['property:sourceDatabase'] = 'pdb' a2['property:sourceDescription'] = '<p>The <a href="http://www.rcsb.org/">Protein Data Bank</a> of the Research Collaboratory for Structural Bioinformatics (<a href="http://home.rcsb.org/">RCSB</a>).</p>' a2['property:identifier'] = identifier a2['property:description'] = 'PDB entry {0}'.format(identifier[-4:].upper()) if 'property:name' in a: a2['property:name'] = a['property:name'][:-11] if 'property:imageUrl' in a: a2['property:imageUrl'] = a['property:imageUrl'] if 'property:molecularDescription' in a: a2['property:molecularDescription'] = a['property:molecularDescription'] if 'property:webpageUrl' in a: a2['property:webpageUrl'] = a['property:webpageUrl'] if 'property:embedded' in a: a2['property:embedded'] = a['property:embedded'] for extent in a.extents(): a2.addExtent(extent) for area in a.areas(): a2.addArea(area) document.addAnnotation(a2) if identifier.startswith('http://dbpedia.org/resource/'): # Wikipedia entry a2 = spineapi.Annotation() a2['concept'] = 'Definition' a2['author'] = a['author'] a2['session:volatile'] = '1' a2['session:legacy'] = '1' a2['property:sourceDatabase'] = 'wikipedia' a2['property:sourceDescription'] = '<p>Structured <a href="http://www.wikipedia.org/">Wikipedia</a> information provided by the <a href="http://DBpedia.org/">DBpedia</a> project.</p>' a2['property:description'] = a.get('property:summary', 'Wikipedia entry') if 'property:name' in a: a2['property:name'] = a['property:name'] if 'property:identifier' in a: a2['property:identifier'] = a['property:identifier'] if 'property:imageUrl' in a: a2['property:imageUrl'] = a['property:imageUrl'] if 'property:summary' in a: a2['property:summary'] = a['property:summary'] if 'property:webpageUrl' in a: a2['property:webpageUrl'] = a['property:webpageUrl'] for extent in a.extents(): a2.addExtent(extent) for area in a.areas(): a2.addArea(area) document.addAnnotation(a2) if identifier.startswith('http://www.portlandpress.com/utopia/glick/'): # Wikipedia entry a2 = spineapi.Annotation() a2['concept'] = 'Definition' a2['author'] = a['author'] a2['session:volatile'] = '1' a2['session:legacy'] = '1' a2['property:sourceDatabase'] = 'glick' a2['property:sourceDescription'] = '<p>David M. Glick\'s <a href="http://www.portlandpress.com/pp/books/online/glick/search.htm">Glossary of Biochemistry and Molecular Biology</a>.</p><p>Made available by <a href="http://www.portlandpress.com/">Portland Press Limited</a>.</p>' a2['property:description'] = a['property:description'] + '<p><em>(Glick Glossary/Portland Press Ltd.)</em></p>' a2['property:name'] = a['property:name'] for extent in a.extents(): a2.addExtent(extent) for area in a.areas(): a2.addArea(area) document.addAnnotation(a2)
def after_ready_event(self, document): # Get (if present) the RSCMetadataLink annotation for annotation in document.annotations(): if annotation.get('concept') == 'RSCMetadataLink': text = document.text() doi = annotation['property:doi'].upper() rscId = annotation['property:rscId'].upper() xmlquery = '<SearchCriteria><SearchTerm><Category>Journal</Category><ContentType>All</ContentType><Criterias><NameValue><Name>FreeText</Name><Value>"%s"</Value></NameValue></Criterias><Source>Utopia</Source></SearchTerm><PageNo>1</PageNo><PageSize>10</PageSize><SortBy>Relevance</SortBy></SearchCriteria>' % doi baseurl = 'http://pubs.rsc.org/en/federated/search' params = { 'federatedsearchname': 'Utopia', 'inputxml': xmlquery } url = baseurl + '?%s' % urllib.urlencode(params) searchresult = urllib2.urlopen(url, timeout=14).read() root = etree.fromstring(searchresult) #print etree.tostring(root, pretty_print=True, encoding='utf8') articles = root.findall( './{http://www.rsc.org/schema/rscart38}article') #print articles # the search use above can return more than one article, so select out only the one with # the correct doi thearticle = None articleID = None for article in articles: found_doi = article.findtext( "./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='doi']" ) if found_doi is None: found_doi = article.findtext( "./{http://www.rsc.org/schema/rscart38}art-admin/{http://www.rsc.org/schema/rscart38}doi" ) if found_doi is not None and found_doi.upper() == doi: thearticle = article articleIDelem = article.find( "./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='docid']" ) if articleIDelem is not None: articleID = articleIDelem.text break # if we get back a single valid article... if thearticle != None: #print articleID compoundsInArticle = [] compoundText = {} annotationsInArticle = [] annotationText = {} # create a list of all the compounds that are mentioned in the article body compnames = thearticle.findall( './{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}compname' ) #print compnames for compname in compnames: # This line removes (erroneous?) elements from inside the XML etree.strip_elements( compname, '{http://www.rsc.org/schema/rscart38}compound', with_tail=False) #print compname.attrib['idrefs'], compname.text compoundsInArticle.append(compname.attrib['idrefs']) compoundText[ compname.attrib['idrefs']] = etree.tounicode( compname, method='text') annotationnames = thearticle.findall( './{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}annref' ) #print annotationnames for annotationname in annotationnames: # This line removes (erroneous?) elements from inside the XML etree.strip_elements( annotationname, '{http://www.rsc.org/schema/rscart38}annotation', with_tail=False) #print annotationname.attrib['idrefs'], annotationname.text annotationsInArticle.append( annotationname.attrib['idrefs']) annotationText[ annotationname.attrib['idrefs']] = etree.tounicode( annotationname, method='text') #print compoundText, annotationText #sprint annotationsInArticle # then for all the compounds that are defined in the article back compounds = thearticle.findall( './{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}compound' ) for compound in compounds: id = compound.attrib['id'] if id in compoundsInArticle: url = None id = compound.attrib['id'] # if the compound has a CSID, then the URL links to the chemspider page csid = compound.find( "./{http://www.rsc.org/schema/rscart38}link[@type='CSID']" ) # if the compound has a CSID, create a Chemspider URL for it if csid is not None and csid.text is not None: url = 'http://www.chemspider.com/Chemical-Structure.%s.html' % csid.text[ 5:] else: # otherwise, use the RSC landing page url = 'http://www.rsc.org/publishing/journals/prospect/cheminfo.asp?XMLID=%s&compoundtext=%s&MSID=%s' % ( id[4:], compoundText[id], articleID) if url is not None: options = spineapi.WholeWordsOnly + spineapi.IgnoreCase matches = document.search( compoundText[id], options) annotation = spineapi.Annotation() annotation['concept'] = 'Hyperlink' annotation['property:webpageUrl'] = url for match in matches: annotation.addExtent(match) document.addAnnotation(annotation) # similarly, for all the annotations annotations = thearticle.findall( './{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}annotation' ) for annotation in annotations: id = annotation.attrib['id'] url = None if id in annotationsInArticle: id = annotation.attrib['id'] # get the link id link = annotation.findtext( "./{http://www.rsc.org/schema/rscart38}link") # if the compound has a link, create an RSC ontology landing page for it if link is not None: if link[:3] == 'AU:': url = 'http://goldbook.iupac.org/%s.html' % link[ 3:] else: url = 'http://www.rsc.org/publishing/journals/prospect/ontology.asp?id=%s&MSID=%s' % ( link, articleID) if url is not None: matches = document.search( annotationText[id], spineapi.IgnoreCase + spineapi.WholeWordsOnly) annotation = spineapi.Annotation() annotation['concept'] = 'Hyperlink' annotation['property:webpageUrl'] = url for match in matches: annotation.addExtent(match) document.addAnnotation(annotation) break
def after_load_event(self, document): # Put errors together in a sensible way errors = {} failures = 0 successes = 0 for error in document.annotations("errors.metadata"): if error.get("concept") == "Success": successes += 1 elif error.get("concept") == "Error": failures += 1 component = error.get("property:component") errors.setdefault(component, {}) category = error.get("property:category") errors[component].setdefault(category, []) method = error.get("property:method") message = error.get("property:message", "") errors[component][category].append((method, message)) categories = {} for component, details in errors.iteritems(): for category in details.keys(): categories.setdefault(category, 0) categories[category] += 1 # If there are errors, provide feedback to the user if failures > 0: # Check for likely client problems if categories.get("connection", 0) == failures and successes == 0: summary = """ Utopia could not reach any of the online services it would normally use to identify this document, meaning you are likely to see limited or no information below. You might wish to check your Internet connection and reload the document. """ elif categories.get("timeout", 0) > 1: if categories.get("timeout", 0) == failures and successes == 0: many = "" else: many = "some of" summary = """ Utopia gave up contacting {0} the online services it would normally use to identify this document because they were taking too long to respond. You are likely to see limited or no information below. You might wish to check your Internet connection and reload the document. """.format( many ) else: if failures == 1: noun = "An error" else: noun = "Errors" summary = """ {0} occurred when trying to discover the identity of this document. You are likely to see limited or no information below. """.format( noun ) html = """ <div class="box error"> <strong>Warning</strong> <p> {0} </p> <div class="expandable" title="Details..."> <ul> """.format( summary ) for component, details in errors.iteritems(): for category, methods in details.iteritems(): if category != "success": summary = { "timeout": "{0} did not respond", "connection": "Could not connect to {0}", "server": "{0} behaved unexpectedly", }.get(category, "An error occurred accessing {0}") methods_html = ", ".join( ('<span title="{1}">{0}</span>'.format(method, message) for method, message in methods) ) html += "<li>{0} (when accessing: {1}).</li>".format( summary.format("<strong>" + component + "</strong>"), methods_html ) html += """ </ul> </div> <div> """ annotation = spineapi.Annotation() annotation["concept"] = "Collated" annotation["property:html"] = html annotation["property:name"] = "Error" annotation["session:weight"] = "1000" annotation["session:default"] = "1" annotation["session:headless"] = "1" document.addAnnotation(annotation) print errors
def on_load_event(self, document): '''Using the document content, try to resolve various bits of metadata''' #import pprint # Start by getting any citations already in the document input_citations = [] for annotation in document.annotations('Document Metadata'): # Check the kinds of annotations that hold citation information if annotation.get('concept') in ('Citation', ): # Compile information from annotation input_citations.append( utopia.tools.utils.citation_from_annotation(annotation)) citations = input_citations[:] #pprint.PrettyPrinter(indent=2).pprint(citations) # Run the resolution pipeline flattened = utopia.citation.resolve(citations=citations, document=document) #pprint.PrettyPrinter(indent=2).pprint(citations) # Save the resulting citations as annotations in the document sources = flattened.get('provenance', {}).get('sources', []) for citation in sources: if citation not in input_citations and 'error' not in citation: utopia.tools.utils.store_metadata(document, **citation) # Deal with errors errors = {} failures = 0 successes = 0 for error in [error for error in sources if 'error' in error]: provenance = error.get('provenance', {}) failures += 1 component = provenance.get('whence') plugin = provenance.get('plugin') errors.setdefault(component, {}) error = error.get('error', {}) category = error.get('category') errors[component].setdefault(category, []) message = error.get('message', '') errors[component][category].append((plugin, message)) categories = {} for component, details in errors.iteritems(): for category in details.keys(): categories.setdefault(category, 0) categories[category] += 1 # If there are errors, provide feedback to the user if failures > 0: # Check for likely client problems if categories.get('connection', 0) == failures and successes == 0: summary = ''' Utopia could not reach any of the online services it would normally use to identify this document, meaning you are likely to see limited or no information below. You might wish to check your Internet connection and reload the document. ''' elif categories.get('timeout', 0) > 1: if categories.get('timeout', 0) == failures and successes == 0: many = '' else: many = 'some of' summary = ''' Utopia gave up contacting {0} the online services it would normally use to identify this document because they were taking too long to respond. You are likely to see limited or no information below. You might wish to check your Internet connection and reload the document. '''.format(many) else: if failures == 1: noun = 'An error' else: noun = 'Errors' summary = ''' {0} occurred when trying to discover the identity of this document. You are likely to see limited or no information below. '''.format(noun) html = ''' <div class="box error"> <strong>Warning</strong> <p> {0} </p> <div class="expandable" title="Details..."> <ul> '''.format(summary) for component, details in errors.iteritems(): for category, methods in details.iteritems(): if category != 'success': summary = { 'timeout': '{0} did not respond', 'connection': 'Could not connect to {0}', 'server': '{0} behaved unexpectedly', }.get(category, 'An error occurred accessing {0}') methods_html = ', '.join( ('<span title="{1}">{0}</span>'.format( method, message) for method, message in methods)) html += '<li>{0} (when accessing: {1}).</li>'.format( summary.format('<strong>' + component + '</strong>'), methods_html) html += ''' </ul> </div> <div> ''' annotation = spineapi.Annotation() annotation['concept'] = 'Collated' annotation['property:html'] = html annotation['property:name'] = 'Error' annotation['session:weight'] = '1000' annotation['session:default'] = '1' annotation['session:headless'] = '1' document.addAnnotation(annotation)