def on_persist_event(self, document):
        client = kend.client.Client()

        document_id = utopia.tools.utils.metadata(document,
                                                  'identifiers[utopia]')
        if document_id is not None:
            for annotation in document.annotations('PersistQueue'):
                if 'session:volatile' not in annotation:
                    try:
                        ka = kend.converter.Annotation.spineapi2kend(
                            annotation, document_id)
                        ka.context = self._context_

                        updated = client.persistAnnotation(
                            ka, context=self._context_)

                        if isinstance(updated, kend.model.Annotation):
                            for key in ('id', 'created', 'author', 'revision',
                                        'edit', 'media_edit'):
                                annotation[key] = getattr(updated, key)
                            annotation.removePropertyAll('session:media')
                            for media in updated.media:
                                mediaDict = {}
                                for k in [
                                        'name', 'src', 'type', 'revision',
                                        'size', 'edit'
                                ]:
                                    if hasattr(media, k):
                                        mediaDict[k] = getattr(media, k)
                                annotation.insertProperty(
                                    'session:media',
                                    urllib.urlencode(mediaDict))
                            document.removeAnnotation(annotation,
                                                      'PersistQueue')
                            document.addAnnotation(annotation)
                    except:
                        raise
                        pass

            for annotation in document.annotations(
                    document.deletedItemsScratchId()):
                try:
                    if 'session:volatile' not in annotation:
                        ka = kend.converter.Annotation.spineapi2kend(
                            annotation, document_id)
                        client.deleteAnnotation(ka)
                    document.removeAnnotation(annotation,
                                              document.deletedItemsScratchId())
                    document.removeAnnotation(annotation)
                except:
                    raise
                    pass
    def on_load_event(self, document):
        outline={}

        for a in document.annotations():
            if a.get('concept') == 'OutlineItem':
                outline[tuple([int(x) for x in a.get('property:outlinePosition').split('.')])]=a

        if len(outline):
#            html='<div><style media="screen" type="text/css">ul { list-style-type: none; }</style><ul>'
            html='<div><ul>'
            plen=1
            for item in (sorted(outline.keys())):

                if len(item) > plen:
                    html+='<ul><li>'
                elif len(item) < plen:
                    html+='</li></ul></li><li>'
                else:
                    html+='</li><li>'
                plen=len(item)

                html += '<a href="#" title="{0}" target="pdf; anchor={0}">{1}</a>'.format(outline[item].get('property:destinationAnchorName'), cgi.escape(outline[item].get('property:outlineTitle'), quote=True).encode('ascii', 'xmlcharrefreplace'),)

            html+="</ul></div>"
            a = spineapi.Annotation()
            a['concept'] = 'Collated'
            a['property:name'] = 'Outline'
            a['property:description'] = 'Document Structure'
            a['session:weight'] = '10000'
            a['property:html'] = html
            document.addAnnotation(a)
    def on_activate_event(self, document):
        if len(document.annotations('NucleaRDB cache')) == 0:
            print 'annotating stuff . . .'

            pubmedId = common.utils.metadata(document, 'pmid')
            if pubmedId is not None:
                print 'found pubmed id: ' + pubmedId
            else:
                print 'did not find pubmed id'

            ns = {'r': 'GPCR'}

            textMentions = self.getMentions(document.text(), pubmedId)

            objectlist = []
            mention_cache = {}
            for mention in textMentions:
                if mention.mentionType != 'SPECIES':
                    mention_cache.setdefault(mention.html, [])
                    mention_cache[mention.html].append(mention)

            for html, mentions in mention_cache.iteritems():
                annotation = self.createAnnotation(document, html, mentions)
                annotation['displayRelevance']='2000'
                annotation['displayRank']= '2000'
                document.addAnnotation(annotation)

            document.addAnnotation(spineapi.Annotation(), 'NucleaRDB cache')
    def after_ready_event(self, document):
        outline={}

        for a in document.annotations():
            if a.get('concept') == 'OutlineItem':
                outline[tuple([int(x) for x in a.get('property:outlinePosition').split('.')])]=a

        if len(outline):
#            html='<div><style media="screen" type="text/css">ul { list-style-type: none; }</style><ul>'
            html='<div><ul>'
            plen=1
            for item in (sorted(outline.keys())):

                if len(item) > plen:
                    html+='<ul><li>'
                elif len(item) < plen:
                    html+='</li></ul></li><li>'
                else:
                    html+='</li><li>'
                plen=len(item)

                html += '<a href="#" title="{0}" target="pdf; anchor={0}">{1}</a>'.format(outline[item].get('property:destinationAnchorName'), cgi.escape(outline[item].get('property:outlineTitle'), quote=True).encode('ascii', 'xmlcharrefreplace'),)

            html+="</ul></div>"
            a = spineapi.Annotation()
            a['concept'] = 'Collated'
            a['property:name'] = 'Outline'
            a['property:description'] = 'Document Structure'
            a['session:weight'] = '999'
            a['property:html'] = html
            document.addAnnotation(a)
    def on_activate_event(self, document):
        if len(document.annotations('GPCRDB cache')) == 0:
            print 'annotating stuff . . .'

            pubmedId = utopialib.utils.metadata(document,
                                                'identifiers[pubmed]')
            if pubmedId is not None:
                print 'found pubmed id: ' + pubmedId
            else:
                print 'did not find pubmed id'

            ns = {'r': 'GPCR'}

            textMentions = self.getMentions(document.text(), pubmedId)

            objectlist = []
            mention_cache = {}
            for mention in textMentions:
                if mention.mentionType != 'SPECIES':
                    mention_cache.setdefault(mention.html, [])
                    mention_cache[mention.html].append(mention)

            for html, mentions in mention_cache.iteritems():
                annotation = self.createAnnotation(document, html, mentions)
                annotation['displayRelevance'] = '2000'
                annotation['displayRank'] = '2000'
                document.addAnnotation(annotation)

            document.addAnnotation(spineapi.Annotation(), 'GPCRDB cache')
Example #6
0
 def on_filter_event(self, document, data=None):
     for annotation in document.annotations():
         if annotation.get(
                 'concept'
         ) == 'Definition' and 'displayTooltip' not in annotation and 'property:name' in annotation and 'property:description' in annotation:
             annotation['displayTooltip'] = '<strong>%s</strong>: %s' % (
                 annotation['property:name'],
                 annotation['property:description'])
 def on_filter_event(self, document, data = None):
     for annotation in document.annotations():
         if annotation.get('concept') in ['Hyperlink', 'WebPage'] and 'displayTooltip' not in annotation and 'property:webpageUrl' in annotation:
             if annotation['property:webpageUrl'].startswith("mailto:"):
                 annotation['displayTooltip'] = '<span>Email:</span><br/>&nbsp;&nbsp;&nbsp;<strong>%s</strong>' % annotation['property:webpageUrl'][7:]
             elif annotation['property:webpageUrl'].startswith("#"):
                 annotation['displayTooltip'] = '<span>Internal&nbsp;Link&nbsp;to:</span><br/>&nbsp;&nbsp;&nbsp;<strong>%s</strong>' % annotation['property:destinationAnchorName']
             else:
                 annotation['displayTooltip'] = '<span>Link&nbsp;to:</span><br/>&nbsp;&nbsp;&nbsp;<strong>%s</strong>' % annotation['property:webpageUrl']
 def on_filter_event(self, document, data = None):
     for annotation in document.annotations():
         if annotation.get('concept') in ['Hyperlink', 'WebPage'] and 'displayTooltip' not in annotation and 'property:webpageUrl' in annotation:
             if annotation['property:webpageUrl'].startswith("mailto:"):
                 annotation['displayTooltip'] = '<span>Email:</span><br/>&nbsp;&nbsp;&nbsp;<strong>%s</strong>' % annotation['property:webpageUrl'][7:]
             elif annotation['property:webpageUrl'].startswith("#"):
                 annotation['displayTooltip'] = '<span>Internal&nbsp;Link&nbsp;to:</span><br/>&nbsp;&nbsp;&nbsp;<strong>%s</strong>' % annotation['property:destinationAnchorName']
             else:
                 annotation['displayTooltip'] = '<span>Link&nbsp;to:</span><br/>&nbsp;&nbsp;&nbsp;<strong>%s</strong>' % annotation['property:webpageUrl']
    def on_filter_event(self, document, data = None):
        for annotation in document.annotations():
            if annotation.get('concept') != 'DemoLogoOverlay' and annotation.get('property:demo_logo') == '1':
                annotation.removePropertyAll('property:demo_logo')

                overlay = spineapi.Annotation()
                overlay['concept'] = 'DemoLogoOverlay'
                overlay['property:demo_logo'] = '1'
                overlay.addExtents(annotation.extents())
                overlay.addAreas(annotation.areas())
                document.addAnnotation(overlay)
    def on_filter_event(self, document, data=None):
        for annotation in document.annotations():
            if annotation.get(
                    'concept') != 'DemoLogoOverlay' and annotation.get(
                        'property:demo_logo') == '1':
                annotation.removePropertyAll('property:demo_logo')

                overlay = spineapi.Annotation()
                overlay['concept'] = 'DemoLogoOverlay'
                overlay['property:demo_logo'] = '1'
                overlay.addExtents(annotation.extents())
                overlay.addAreas(annotation.areas())
                document.addAnnotation(overlay)
    def on_persist_event(self, document):
        client = kend.client.Client()

        document_id, doi = self._resolve(document)
        if document_id is not None:
            for annotation in document.annotations('PersistQueue'):
                if 'session:volatile' not in annotation:
                    try:
                        ka = kend.converter.Annotation.spineapi2kend(annotation, document_id)
                        ka.context = self._context_

                        updated = client.persistAnnotation(ka, context = self._context_)

                        if isinstance(updated, kend.model.Annotation):
                            for key in ('id', 'created', 'author', 'revision', 'edit', 'media_edit'):
                                annotation[key] = getattr(updated, key)
                            annotation.removePropertyAll('session:media')
                            for media in updated.media:
                                mediaDict = {}
                                for k in ['name', 'src', 'type', 'revision', 'size', 'edit']:
                                    if hasattr(media, k):
                                        mediaDict[k] = getattr(media, k)
                                annotation.insertProperty('session:media', urllib.urlencode(mediaDict))
                            document.removeAnnotation(annotation, 'PersistQueue')
                            document.addAnnotation(annotation)
                    except:
                        raise
                        pass

            for annotation in document.annotations(document.deletedItemsScratchId()):
                try:
                    if 'session:volatile' not in annotation:
                        ka = kend.converter.Annotation.spineapi2kend(annotation, document_id)
                        client.deleteAnnotation(ka)
                    document.removeAnnotation(annotation, document.deletedItemsScratchId())
                except:
                    raise
                    pass
    def after_ready_event(self, document):
        # Get (if present) the RSCMetadataLink annotation
        for annotation in document.annotations():
            if annotation.get('concept') == 'RSCMetadataLink':
                text = document.text()

                doi = annotation['property:doi'].upper()
                rscId = annotation['property:rscId'].upper()

                xmlquery = '<SearchCriteria><SearchTerm><Category>Journal</Category><ContentType>All</ContentType><Criterias><NameValue><Name>FreeText</Name><Value>"%s"</Value></NameValue></Criterias><Source>Utopia</Source></SearchTerm><PageNo>1</PageNo><PageSize>10</PageSize><SortBy>Relevance</SortBy></SearchCriteria>' % doi

                baseurl = 'http://pubs.rsc.org/en/federated/search'
                params = { 'federatedsearchname': 'Utopia',
                           'inputxml': xmlquery }
                url = baseurl + '?%s' % urllib.urlencode(params)
                searchresult = urllib2.urlopen(url, timeout=14).read()
                root = etree.fromstring(searchresult)
                #print etree.tostring(root, pretty_print=True, encoding='utf8')

                articles = root.findall('./{http://www.rsc.org/schema/rscart38}article')
                #print articles

                # the search use above can return more than one article, so select out only the one with
                # the correct doi

                thearticle = None
                articleID = None
                for article in articles:
                    found_doi = article.findtext("./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='doi']")
                    if found_doi is None:
                        found_doi = article.findtext("./{http://www.rsc.org/schema/rscart38}art-admin/{http://www.rsc.org/schema/rscart38}doi")
                    if found_doi is not None and found_doi.upper() == doi:
                        thearticle = article
                        articleIDelem = article.find("./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='docid']")
                        if articleIDelem is not None:
                            articleID = articleIDelem.text
                        break

                # if we get back a single valid article...
                if thearticle != None:
                    #print articleID

                    compoundsInArticle = []
                    compoundText = {}

                    annotationsInArticle = []
                    annotationText = {}

                    # create a list of all the compounds that are mentioned in the article body
                    compnames = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}compname')
                    #print compnames
                    for compname in compnames:
                        # This line removes (erroneous?) elements from inside the XML
                        etree.strip_elements(compname, '{http://www.rsc.org/schema/rscart38}compound', with_tail=False)
                        #print compname.attrib['idrefs'], compname.text
                        compoundsInArticle.append(compname.attrib['idrefs'])
                        compoundText[compname.attrib['idrefs']] = etree.tounicode(compname, method='text')

                    annotationnames = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}annref')
                    #print annotationnames
                    for annotationname in annotationnames:
                        # This line removes (erroneous?) elements from inside the XML
                        etree.strip_elements(annotationname, '{http://www.rsc.org/schema/rscart38}annotation', with_tail=False)
                        #print annotationname.attrib['idrefs'], annotationname.text
                        annotationsInArticle.append(annotationname.attrib['idrefs'])
                        annotationText[annotationname.attrib['idrefs']] = etree.tounicode(annotationname, method='text')

                    #print compoundText, annotationText
                    #sprint annotationsInArticle

                    # then for all the compounds that are defined in the article back
                    compounds = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}compound')
                    for compound in compounds:
                        id = compound.attrib['id']
                        if id in compoundsInArticle:
                            url = None
                            id = compound.attrib['id']

                            # if the compound has a CSID, then the URL links to the chemspider page
                            csid = compound.find("./{http://www.rsc.org/schema/rscart38}link[@type='CSID']" )

                            # if the compound has a CSID, create a Chemspider URL for it
                            if csid is not None and csid.text is not None:
                                url = 'http://www.chemspider.com/Chemical-Structure.%s.html' % csid.text[5:]
                            else:
                                # otherwise, use the RSC landing page
                                url = 'http://www.rsc.org/publishing/journals/prospect/cheminfo.asp?XMLID=%s&compoundtext=%s&MSID=%s' % (id[4:], compoundText[id], articleID)

                            if url is not None:
                                options = spineapi.WholeWordsOnly + spineapi.IgnoreCase
                                matches = document.search(compoundText[id], options)
                                annotation = spineapi.Annotation()
                                annotation['concept'] = 'Hyperlink'
                                annotation['property:webpageUrl'] = url
                                for match in matches:
                                    annotation.addExtent(match)
                                document.addAnnotation(annotation)

                    # similarly, for all the annotations
                    annotations = thearticle.findall('./{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}annotation')
                    for annotation in annotations:
                        id = annotation.attrib['id']
                        url = None
                        if id in annotationsInArticle:
                            id = annotation.attrib['id']

                            # get the link id
                            link = annotation.findtext("./{http://www.rsc.org/schema/rscart38}link" )

                            # if the compound has a link, create an RSC ontology landing page for it
                            if link is not None:
                                if link[:3] == 'AU:':
                                    url = 'http://goldbook.iupac.org/%s.html' % link[3:]
                                else:
                                    url = 'http://www.rsc.org/publishing/journals/prospect/ontology.asp?id=%s&MSID=%s' % (link, articleID)

                            if url is not None:
                                matches = document.search(annotationText[id], spineapi.IgnoreCase + spineapi.WholeWordsOnly)
                                annotation = spineapi.Annotation()
                                annotation['concept'] = 'Hyperlink'
                                annotation['property:webpageUrl'] = url
                                for match in matches:
                                    annotation.addExtent(match)
                                document.addAnnotation(annotation)
                break
Example #13
0
    def on_ready_event(self, document):
        '''Fetch information from the Lazarus service'''

        permission = self.get_config('permission', False)
        if permission:
            # If an outline already exists, don't make a new one
            needs_outline = True
            for annotation in document.annotations():
                if annotation.get('concept') == 'OutlineItem':
                    needs_outline = False
                    break

            # The Lazarus server needs to know what this document is
            document_id = utopia.tools.utils.metadata(document,
                                                      'identifiers[utopia]')
            this_doi = utopia.tools.utils.metadata(document,
                                                   'identifiers[doi]')
            if this_doi is not None:
                this_doi = u'doi:' + this_doi

            # Speak to server
            params = {'fingerprint': document.fingerprints()}
            url = '{0}?{1}'.format(laz_docUrl,
                                   urllib.urlencode(params, doseq=True))
            response = urllib2.urlopen(url, timeout=60)
            if response.getcode() == 204:
                request = urllib2.Request(
                    url,
                    data=document.data(),
                    headers={'Content-Type': 'application/pdf'})
                response = urllib2.urlopen(request, timeout=60)
            #response = open('/Users/dave/Desktop/ananiadou_tibtech06.pdf-response.xml', 'r')

            # Create Metadata link annotation
            link = document.newAccList('metadata', 50)
            link['property:sourceDatabase'] = 'lazarus'
            link['property:sourceTitle'] = 'Lazarus'
            link['property:sourceDescription'] = self.sourceDescription
            link['property:sourceIcon'] = utopia.get_plugin_data_as_url(
                'images/lazarus-prefs-logo.png', 'image/png')

            headers = []
            pos = []
            refs = []
            annotations = []
            concepts = {}
            hits = []
            expression_annotations = []
            for kAnnotation in kend.converter.XML.parse(
                    response, kend.model.Document):
                #print kend.converter.XML.serialise(kAnnotation)[0]
                try:
                    annotations.append(
                        utopia.tools.converters.Annotation.kend2spineapi(
                            kAnnotation, document))
                except:
                    pass
            annotations.sort(key=lambda a: int(a.get('structure:order', 0)))
            for sAnnotation in annotations:
                if sAnnotation['concept'] == 'structure_element':
                    role, level = self.getHeaderRole(sAnnotation)
                    if role is not None and needs_outline:
                        while len(pos) < level:
                            pos.append(0)
                        while len(pos) > level:
                            pos.pop()
                        pos[-1] += 1

                        outline = u'.'.join([unicode(i) for i in pos])
                        anchor_name = '#lazarus.outline.{0}'.format(outline)

                        anchor = spineapi.Annotation()
                        anchor['concept'] = 'Anchor'
                        anchor['property:anchor'] = anchor_name
                        anchor.addExtents(sAnnotation.extents())
                        anchor.addAreas(sAnnotation.areas())
                        document.addAnnotation(anchor)

                        header = spineapi.Annotation()
                        header['concept'] = 'OutlineItem'
                        header['property:outlinePosition'] = outline
                        header['property:outlineTitle'] = u' '.join(
                            [e.text() for e in sAnnotation.extents()])
                        header['property:destinationAnchorName'] = anchor_name
                        document.addAnnotation(header)

                        print((u'    ' * level +
                               u'.'.join([unicode(i)
                                          for i in pos]) + u' ' + u' '.join([
                                              e.text()
                                              for e in sAnnotation.extents()
                                          ])).encode('utf8'))
                    elif 'bibitem' in sAnnotation.getAllProperties(
                            'structure:role'):
                        #refs.append(sAnnotation)
                        pass
                elif sAnnotation['concept'] == 'Citation':
                    # Hack to fix a mistake in authors property name
                    if 'property:author' in sAnnotation and not 'property:authors' in sAnnotation:
                        sAnnotation[
                            'property:authors'] = sAnnotation.getAllProperties(
                                'property:author')
                    refs.append(sAnnotation)
                elif sAnnotation['concept'] == 'LazarusConcept':
                    concept_id = sAnnotation.get('property:identifier')
                    if concept_id is not None:
                        sAnnotation['id'] = str(uuid.uuid4())
                        concepts[concept_id] = sAnnotation
                        document.addAnnotation(sAnnotation, 'Lazarus Concept')
                elif sAnnotation['concept'] == 'LazarusConceptHit':
                    hits.append(sAnnotation)
                elif sAnnotation['concept'] == 'LazarusSentenceExpression':
                    expression_annotations.append(sAnnotation)
                else:
                    document.addAnnotation(sAnnotation)

            for ref in refs:
                #print(ref.get('structure:order', '0'))
                pass
            refs = sorted(refs,
                          key=lambda ref: int(ref.get('property:order', '0')))

            for ref in refs:
                #print(ref.get('structure:order', '0'))
                pass
            for ref in refs:
                # Create Bibliography annotations
                #citation = {'unstructured': u' '.join([e.text() for e in ref.extents()])}
                #annotation = utopia.tools.utils.citation_to_annotation(citation)
                #annotation['property:order'] = ref.get('structure:order')
                #annotation.addExtents(ref.extents())
                #annotation.addAreas(ref.areas())
                #document.addAnnotation(annotation, link['scratch'])
                document.addAnnotation(ref, link['scratch'])

            # Now link hits to concepts
            for i, hit in enumerate(hits):
                concept_id = hit.get('property:identifier')
                concept = concepts.get(concept_id)
                if concept is not None:
                    concept_uuid = concept.get('id')
                    hit['property:concept_id'] = concept_uuid

                    identifier = concept.get('property:identifier')
                    name = concept.get('property:name', '???')
                    sources = concept.get('property:externalSources',
                                          'json:[]')
                    if sources.startswith('json:'):
                        sources = json.loads(sources[5:])
                    if 'property:stdInchiKey' in concept:
                        sources.append({
                            'database':
                            ' InchiKey',
                            'identifier':
                            concept['property:stdInchiKey']
                        })
                    if 'property:canonicalSmiles' in concept:
                        sources.append({
                            'database':
                            ' SMILES',
                            'identifier':
                            concept['property:canonicalSmiles']
                        })
                    kind = concept.get('property:kind')
                    kind = self.dbs.get(kind, {}).get('title', kind)
                    links = {}
                    for source in sources:
                        uri = source.get('uri')
                        if 'primary' in source.get('relationship', []):
                            links.setdefault('definition', [])
                            links['definition'].append(u'''
                                <a href="{uri}" title="{uri}">{database}</a>
                            '''.format(**source))
                        elif uri is None:
                            if source.get('database') in (' InchiKey',
                                                          ' SMILES'):
                                links.setdefault('main', [])
                                links['main'].append(u'''
                                    <tr><td>{database}:</td><td>{identifier}</td></tr>
                                '''.format(**source))
                        else:
                            identifier = source.get('identifier')
                            links_category = 'xref'
                            if 'seeAlso' in source.get('relationship',
                                                       []) or uri is None:
                                links_category = 'seeAlso'
                            links.setdefault(links_category, [])
                            if identifier is not None:
                                links[links_category].append(u'''
                                    <a href="{uri}" title="{uri}">{name}...</a> ({identifier})
                                '''.format(**source))
                            else:
                                links[links_category].append(u'''
                                    <a href="{uri}" title="{uri}">{name}...</a>
                                '''.format(**source))

                    style = u'''
                        <style>
                          .lazarus-table tbody {
                            border: none;
                          }
                          .lazarus-table td:first-of-type {
                            text-align: right;
                            font-weight: bold;
                          }
                          .lazarus-table td {
                            vertical-align: top;
                          }
                          .lazarus-table td:first-of-type {
                            white-space: nowrap;
                          }
                          .lazarus-table td:not(:first-of-type) {
                            word-break: break-all;
                          }
                          .lazarus-table tr td {
                            padding-top: 0ex;
                            padding-bottom: 0ex;
                          }
                          .lazarus-table tbody:not(:first-of-type) tr:first-of-type td {
                            padding-top: 1ex;
                          }
                        </style>
                    '''
                    html = u'''
                        <table class="lazarus-table">
                          <tr><td>Name:</td><td>{name}</td></tr>
                    '''.format(**{'name': name})
                    categories = {
                        'xref': 'Related:',
                        'seeAlso': 'See also:',
                        'definition': 'Defined in:'
                    }
                    for links_category in ('main', 'xref', 'seeAlso',
                                           'definition'):
                        links_title = categories.get(links_category)
                        these_links = sorted(
                            list(set(links.get(links_category, []))))
                        if len(these_links) > 0:
                            html += '<tbody>'
                            if links_category != 'main':
                                html += u'<tr><td>{0}</td><td>'.format(
                                    links_title)
                                html += u'<br>'.join(these_links)
                                html += '</td></tr>'
                            else:
                                html += ''.join(these_links)
                            html += '</tbody>'
                    #pprint('------------------------')
                    html += u'''
                        </table>
                    '''
                    #print(html)

                    hasLinks = len(
                        links.get('xref', []) + links.get('seeAlso', [])) > 0

                    ann = spineapi.Annotation()
                    ann['concept'] = 'Collated'
                    ann['property:name'] = u'{0}'.format(name)
                    ann['property:description'] = 'Lazarus Concept'
                    ann['session:semanticTerm'] = name
                    ann['property:html'] = [style, html]
                    ann['property:sourceDescription'] = self.sourceDescription
                    ann['property:sourceIcon'] = utopia.get_plugin_data_as_url(
                        'images/lazarus-prefs-logo.png', 'image/png')
                    ann['session:overlay'] = 'hyperlink'
                    ann['session:color'] = '#880000'
                    count = 0
                    print('====', 7)
                    if 'property:hitFragments' in hit:
                        hitFragments = hit.getAllProperties(
                            'property:hitFragments') or []
                        #pprint(hitFragments)
                        for hitFragment in hitFragments:
                            pre, _, rest = hitFragment.partition('{!')
                            match, _, post = rest.partition('!}')
                            #pprint((pre, match, post))
                            matches = document.findInContext(pre,
                                                             match,
                                                             post,
                                                             fuzzy=True)
                            count += len(matches)
                            ann.addExtents(matches)
                    if hasLinks and count > 0:
                        document.addAnnotation(ann)

            style = u'''
                <style>
                    .lazarus-expression .box {
                        background-color: #FFF0E8;
                        border-color: #EEE0D8;
                    }
                    .lazarus-related {
                        padding-left: 42px;
                        background-image: url(%s);
                        background-repeat: no-repeat;
                        background-position: top left;
                        background-size: 37px 48px;
                        min-height: 53px;
                    }
                    .lazarus-related + .lazarus-related {
                        margin-top: 5px;
                        border-top: 1px dotted #aaa;
                        padding-top: 5px;
                        background-position-y: 5px;
                        min-height: 58px;
                    }
                    .lazarus-sentence {
                        padding-left: 0.5em;
                        color: black;
                    }
                    .lazarus-sentence.negative {
                        border-left: solid 5px #bb0000;
                    }
                    .lazarus-sentence.positive {
                        border-left: solid 5px #008800;
                    }
                    .lazarus-sentence.negative a {
                        color: #bb0000;
                    }
                    .lazarus-sentence.positive a {
                        color: #008800;
                    }
                </style>
            ''' % utopia.get_plugin_data_as_url('images/pdf-page-icon.png',
                                                'image/png')

            expressions = []
            for sAnnotation in expression_annotations:
                exp = sAnnotation.get('property:expressions', 'json:{}')
                if exp.startswith('json:'):
                    exp = json.loads(exp[5:])
                context = sAnnotation.get('property:context')
                if context is not None:
                    if exp.get('negative', False):
                        exp['posneg'] = 'negative'
                    else:
                        exp['posneg'] = 'positive'

                    pprint(context)
                    pprint(exp)

                    matched_context = exp.get('context')
                    matches = []
                    if matched_context is not None:
                        matches = document.search(
                            re.sub(r'\s+', ' ', matched_context))
                        if len(matches) > 0:
                            anchor_id = str(uuid.uuid4())[1:-1]
                            anchor = spineapi.Annotation()
                            anchor['concept'] = 'Anchor'
                            anchor['property:anchor'] = anchor_id
                            anchor.addExtents(matches)
                            document.addAnnotation(anchor)

                            exp.update({
                                'anchor_id': anchor_id,
                                'sentence': context
                            })
                            expressions.append(exp)

            js = u'''
                <script>
                    $(document).on('DOMNodeInserted', function(e) {
                        var element = e.target;
                        $(element).filter('a[target="tab"]').add('a[target="tab"]', element).each(function () {
                            var fragment = $(this).closest('.-papyro-internal-citation').data('citation')['userdef']['first_fragment'];
                            $(this).attr('target', 'pdf; show=highlight; text=[' + encodeURIComponent(fragment) + ']');
                        });
                    });

                    $(function () {
                        var lazarus = {
                            expressions: %s,
                            fingerprints: %s,
                            relUrl: %s
                        };

                        var more_expressions_link = $('#lazarus-expression > p.more').hide();
                        var more_expressions_spinner = $('#lazarus-expression > div.spinner');

                        Spinners.create(more_expressions_spinner);
                        Spinners.play(more_expressions_spinner);

                        var exp_divs = [];
                        var identifiers = [];
                        for (var e = 0; e < lazarus.expressions.length; e++) {
                            var expression = lazarus.expressions[e];
                            var exp_div = $('<div class="box"></div>');
                            exp_div.data('expression', expression);
                            exp_div.hide();
                            exp_divs.push(exp_div);
                            identifiers.push(expression.identifiers);
                        }
                        var params = {
                            fingerprint: lazarus.fingerprints
                        };
                        var url = lazarus.relUrl + '?' + $.param(params, traditional=true);
                        $.ajax({
                            url: url,
                            type: 'POST',
                            dataType: 'json',
                            data: JSON.stringify(identifiers),
                            contentType: "application/json",
                            error: function (xhr, ajaxOptions, thrownError) {
                                console.log(xhr.statusText);
                                console.log(xhr.responseText);
                                console.log(xhr.status);
                                console.log(thrownError);

                                // FIXME do something here
                                Spinners.remove(more_expressions_spinner);
                            },
                            success: function (related) {
                                // Sort related according to the number of articles found
                                related.results.sort(function (l, r) {
                                    var lv = Object.keys(l.related).length;
                                    var rv = Object.keys(r.related).length;
                                    return (lv > rv) ? -1 : (lv < rv) ? 1 : 0;
                                });
                                $.each(related.results, function (idx, result) {
                                    var exp_div = exp_divs[idx];
                                    var expression = exp_div.data('expression');
                                    expression.related = result.related;
                                    delete expression.related[%s];

                                    split = expression.sentence.split(expression.context);
                                    pre = split[0];
                                    pre = pre.replace(/(\w)$/, '$1 ');
                                    pre = pre.replace(/^\s*/, '');
                                    match = expression.context;
                                    post = split[1];
                                    post = post.replace(/^(\w)/, ' $1');
                                    post = post.replace(/\s*$/, '');
                                    expression.pre = pre;
                                    expression.match = match;
                                    expression.post = post;

                                    // Create expression element
                                    exp_div.append('<p class="lazarus-sentence ' + expression.posneg + '">&ldquo;' + expression.pre + '<a target="pdf; show=select; anchor=' + expression.anchor_id + '"><strong>' + expression.match + '</strong></a>' + expression.post + '&rdquo;</p>');
                                    exp_div.data('expression', expression);

                                    $('#lazarus-expression > .content').append(exp_div);

                                    if (Object.keys(expression.related).length > 0) {
                                        var related_div = $('<div class="expandable" title="Related expressions elsewhere"></div>');
                                        var related_div_content = $('<div></div>').appendTo(related_div);
                                        function on_expand() {
                                            related_div.off('papyro:expandable:expand', on_expand);
                                            $.each(expression.related, function (idx, obj) {
                                                fragments = [];
                                                $.each(obj, function (id, obj) {
                                                    fragments.push(obj.context);
                                                });
                                                fragments.join('\\n');
                                                related_div_content.append($('<div class="lazarus-related unprocessed"></div>').append('<p><strong>&ldquo;&hellip;'+fragments+'&hellip;&rdquo;</strong></p>').hide().data('citation', {identifiers:{doi:idx},userdef:{first_fragment:fragments[0]}}));
                                                // .append(utopia.citation.render({identifiers:{doi:idx},first_fragment:fragments[0]}, true, true))
                                            });
                                            expression.related.length = 0; // empty for future

                                            if ($('.lazarus-related.unprocessed', exp_div).length > 0) {
                                                var more = $('<p class="more right"><a class="more">More related articles...</a></p>');
                                                related_div_content.append(more);
                                                function show_five_related(e) {
                                                    e.preventDefault();

                                                    $('.lazarus-related.unprocessed', exp_div).slice(0, 5).each(function (idx, obj) {
                                                        var citation = $(obj).data('citation');
                                                        $(obj).append(utopia.citation.render(citation, true, true));
                                                        $(obj).show().removeClass('unprocessed');
                                                    });
                                                    if ($('.lazarus-related.unprocessed', exp_div).length == 0) {
                                                        more.remove();
                                                    }
                                                }
                                                more.on('click', show_five_related).click();
                                            }
                                        }
                                        related_div.on('papyro:expandable:expand', on_expand);
                                        exp_div.append(related_div);
                                        utopia.processNewContent(related_div);
                                    }
                                });

                                Spinners.remove(more_expressions_spinner);
                                more_expressions_link.show();
                                $('a.more', more_expressions_link).click();
                            }
                        });

                        function append_five(e) {
                            e.preventDefault();

                            // Show the next five
                            $('#lazarus-expression > .content').children().filter(':hidden').slice(0,5).show();

                            // Hide the 'more' link if everything is now visible
                            if ($('#lazarus-expression > .content').children().filter(':hidden').length == 0) {
                                more_expressions_link.hide();
                            }
                        }

                        // Hook up 'more' link
                        $('#lazarus-expression > p.more > a.more').on('click', append_five).click();
                    });
                </script>
            ''' % (json.dumps(expressions), json.dumps(
                document.fingerprints()), json.dumps(laz_docRelUrl),
                   json.dumps(this_doi))
            #print(js.encode('utf8'))

            html = u'''
                <div id="lazarus-expression"><div class="content"></div><div class="spinner"></div><p class="more"><a class="more">More expressions...</a></p></div>
            '''

            if len(expressions) > 0:
                ann = spineapi.Annotation()
                ann['concept'] = 'Collated'
                ann['property:name'] = 'Lazarus Expressions'
                ann['property:description'] = u'Summarizing expression(s)'
                ann['property:html'] = [js, style, html]
                ann['property:sourceDescription'] = self.sourceDescription
                ann['property:sourceIcon'] = utopia.get_plugin_data_as_url(
                    'images/lazarus-prefs-logo.png', 'image/png')
                document.addAnnotation(ann)

        else:  # no permission
            noprompt = self.get_config('noprompt', False)
            if not noprompt:
                annotation = spineapi.Annotation()
                annotation['concept'] = 'Collated'
                params = {
                    'uuid': self.uuid(),
                }
                annotation['property:html'] = utopia.get_plugin_data(
                    'tpl/denied.html').format(**params)
                annotation['property:name'] = 'Lazarus'
                annotation[
                    'property:description'] = 'Lazarus functionality is turned off'
                annotation[
                    'property:sourceDescription'] = self.sourceDescription
                annotation[
                    'property:sourceIcon'] = utopia.get_plugin_data_as_url(
                        'images/lazarus-prefs-logo.png', 'image/png')
                annotation['session:default'] = '1'
                document.addAnnotation(annotation)
 def on_filter_event(self, document, data=None):
     for a in document.annotations():
         if a.get(
                 'author'
         ) == 'http://utopia.cs.manchester.ac.uk/users/11679' and a.get(
                 'concept') in ('Definition', 'DatabaseEntry'
                                ) and 'session:legacy' not in a:
             document.removeAnnotation(a)
             identifier = a.get('property:identifier', '')
             if identifier.startswith('http://bio2rdf.org/pdb:'):
                 # PDB entry
                 a2 = spineapi.Annotation()
                 a2['concept'] = 'DatabaseEntry'
                 a2['author'] = a['author']
                 a2['session:volatile'] = '1'
                 a2['session:legacy'] = '1'
                 a2['property:sourceDatabase'] = 'pdb'
                 a2['property:sourceDescription'] = '<p>The <a href="http://www.rcsb.org/">Protein Data Bank</a> of the Research Collaboratory for Structural Bioinformatics (<a href="http://home.rcsb.org/">RCSB</a>).</p>'
                 a2['property:identifier'] = identifier
                 a2['property:description'] = 'PDB entry {0}'.format(
                     identifier[-4:].upper())
                 if 'property:name' in a:
                     a2['property:name'] = a['property:name'][:-11]
                 if 'property:imageUrl' in a:
                     a2['property:imageUrl'] = a['property:imageUrl']
                 if 'property:molecularDescription' in a:
                     a2['property:molecularDescription'] = a[
                         'property:molecularDescription']
                 if 'property:webpageUrl' in a:
                     a2['property:webpageUrl'] = a['property:webpageUrl']
                 if 'property:embedded' in a:
                     a2['property:embedded'] = a['property:embedded']
                 for extent in a.extents():
                     a2.addExtent(extent)
                 for area in a.areas():
                     a2.addArea(area)
                 document.addAnnotation(a2)
             if identifier.startswith('http://dbpedia.org/resource/'):
                 # Wikipedia entry
                 a2 = spineapi.Annotation()
                 a2['concept'] = 'Definition'
                 a2['author'] = a['author']
                 a2['session:volatile'] = '1'
                 a2['session:legacy'] = '1'
                 a2['property:sourceDatabase'] = 'wikipedia'
                 a2['property:sourceDescription'] = '<p>Structured <a href="http://www.wikipedia.org/">Wikipedia</a> information provided by the <a href="http://DBpedia.org/">DBpedia</a> project.</p>'
                 a2['property:description'] = a.get('property:summary',
                                                    'Wikipedia entry')
                 if 'property:name' in a:
                     a2['property:name'] = a['property:name']
                 if 'property:identifier' in a:
                     a2['property:identifier'] = a['property:identifier']
                 if 'property:imageUrl' in a:
                     a2['property:imageUrl'] = a['property:imageUrl']
                 if 'property:summary' in a:
                     a2['property:summary'] = a['property:summary']
                 if 'property:webpageUrl' in a:
                     a2['property:webpageUrl'] = a['property:webpageUrl']
                 for extent in a.extents():
                     a2.addExtent(extent)
                 for area in a.areas():
                     a2.addArea(area)
                 document.addAnnotation(a2)
             if identifier.startswith(
                     'http://www.portlandpress.com/utopia/glick/'):
                 # Wikipedia entry
                 a2 = spineapi.Annotation()
                 a2['concept'] = 'Definition'
                 a2['author'] = a['author']
                 a2['session:volatile'] = '1'
                 a2['session:legacy'] = '1'
                 a2['property:sourceDatabase'] = 'glick'
                 a2['property:sourceDescription'] = '<p>David M. Glick\'s <a href="http://www.portlandpress.com/pp/books/online/glick/search.htm">Glossary of Biochemistry and Molecular Biology</a>.</p><p>Made available by <a href="http://www.portlandpress.com/">Portland Press Limited</a>.</p>'
                 a2['property:description'] = a[
                     'property:description'] + '<p><em>(Glick Glossary/Portland Press Ltd.)</em></p>'
                 a2['property:name'] = a['property:name']
                 for extent in a.extents():
                     a2.addExtent(extent)
                 for area in a.areas():
                     a2.addArea(area)
                 document.addAnnotation(a2)
    def after_ready_event(self, document):
        # Make an annotation for all these metadata
        ids = {
            "doi": ("DOI", u'<a href="http://dx.doi.org/{0}">{0}</a>'),
            "issn": ("ISSN", u"<strong>{0}</strong>"),
            "pii": ("PII", u"<strong>{0}</strong>"),
            "pmid": ("Pubmed", u'<a href="http://www.ncbi.nlm.nih.gov/pubmed/{0}">{0}</a>'),
            "pmcid": ("PMC", u'<a href="http://www.ncbi.nlm.nih.gov/pmc/articles/{0}">{0}</a>'),
            "arxivid": ("arXiv", u'<a href="http://arxiv.org/abs/{0}">{0}</a>'),
        }
        # Build list of fragments
        fragments = []
        pub_icon = ""
        html = """
            <style>
              .fancy_quotes {
                position: relative;
              }
              .fancy_quotes:before {
                content: "\\201C";
              }
              .fancy_quotes:after {
                content: "\\201D";
              }
            </style>
        """
        for key, (name, format) in ids.iteritems():
            id = common.utils.metadata(document, key)
            if id is not None:
                fragments.append(
                    u'<td style="text-align: right; opacity: 0.7">{0}:</td><td>{1}</td>'.format(name, format.format(id))
                )
        # Resolve publisher info
        for annotation in document.annotations("PublisherMetadata"):
            if annotation.get("concept") == "PublisherIdentity":
                logo = annotation.get("property:logo")
                title = annotation.get("property:title")
                webpageUrl = annotation.get("property:webpageUrl")
                if None not in (logo, title, webpageUrl):
                    pub_icon = u'<a href="{0}" title="{2}"><img src="{1}" alt="{2}" /></a></td>'.format(
                        webpageUrl, logo, title
                    )
                    break
        # Compile fragments
        title = common.utils.metadata(document, "title")
        if title is not None or len(pub_icon) > 0:
            html += u'<table style="border: none; margin: 0 0 1em 0;">'
            html += u"<tr>"
            if title is not None:
                html += u'<td style="text-align:left; vertical-align: middle;"><strong class="nohyphenate fancy_quotes">{0}</strong></td>'.format(
                    title.strip()
                )
            if len(pub_icon) > 0:
                html += u'<td style="text-align:right; vertical-align: middle; width: 80px;">{0}</td>'.format(pub_icon)
            html += u"</tr>"
            html += u"</table>"
        if len(fragments) > 0:
            html += u'<div class="box">'
            html += u'<table style="border: none">'
            html += u"<tr>"
            html += u"</tr><tr>".join(fragments)
            html += u"</tr>"
            html += u"</table>"
            html += u"</div>"

            annotation = spineapi.Annotation()
            annotation["concept"] = "Collated"
            annotation["property:html"] = html
            annotation["property:name"] = "About this article"
            annotation["session:weight"] = "100"
            annotation["session:default"] = "1"
            annotation["session:headless"] = "1"
            document.addAnnotation(annotation)
 def on_filter_event(self, document, data = None):
     for annotation in document.annotations():
         if annotation.get('concept') == 'Definition' and 'displayTooltip' not in annotation and 'property:name' in annotation and 'property:description' in annotation:
             annotation['displayTooltip'] = '<strong>%s</strong>: %s' % (annotation['property:name'], annotation['property:description'])
Example #17
0
    def after_ready_event(self, document):
        print 'Formatting metadata'

        # Find highest matching metadata accumulation list for references
        source = None
        for accListLink in document.getAccLists('metadata'):
            matches = document.annotationsIf({'concept': 'Citation'},
                                             accListLink['scratch'])
            if len(matches) > 0:
                print 'Selected for [Citation] list %s with rank %s' % (
                    accListLink['scratch'], repr(accListLink.get('rank', 0)))
                source = accListLink
                bibliography = list(matches)
                bibliography.sort(key=sortfn)
                rt = ''
                for annotation in bibliography:
                    citation = utopia.tools.utils.citation_from_annotation(
                        annotation)
                    rt += utopia.citation.render(citation, links=True)

                if len(bibliography) > 0:
                    # Create Metadata link annotation
                    link = document.newAccList('citation_list')
                    link['property:list_name'] = 'Bibliography'
                    document.addAnnotations(bibliography, link['scratch'])

                if len(rt) > 0:
                    references = spineapi.Annotation()
                    references['displayBibliography'] = rt
                    references['concept'] = 'BibliographyMetadata'
                    references['property:identifier'] = '#bibliography'
                    references['property:name'] = 'Bibliography'
                    references['displayName'] = 'Bibliography'
                    references['displayRelevance'] = '800'
                    if accListLink is not None:
                        for i in ('sourceIcon', 'sourceTitle',
                                  'sourceDescription', 'sourceDatabase'):
                            k = 'property:{0}'.format(i)
                            if k in accListLink:
                                references[k] = accListLink[k]
                        references[
                            'property:description'] = 'From ' + accListLink[
                                'property:sourceTitle']
                    document.addAnnotation(references)
                break
        if source is None:
            print 'No metadata found'

        # Find highest matching metadata accumulation list for in-text citations
        for accListLink in document.getAccLists('metadata'):
            matches = document.annotationsIf({'concept': 'ForwardCitation'},
                                             accListLink['scratch'])
            if len(matches) > 0:
                print 'Selected for [ForwardCitation] list %s with rank %s' % (
                    accListLink['scratch'], repr(accListLink.get('rank', 0)))
                document.addAnnotations(matches)
                break

        # Find highest matching metadata accumulation list for in-text citations
        for accListLink in document.getAccLists('metadata'):
            matches = document.annotationsIf({'concept': 'Table'},
                                             accListLink['scratch'])
            if len(matches) > 0:
                print 'Selected for [Table] list %s with rank %s' % (
                    accListLink['scratch'], repr(accListLink.get('rank', 0)))
                document.addAnnotations(matches)
                break

        metadata = None
        if source is not None:
            for annotation in document.annotations(source['scratch']):
                if annotation.get('concept') == 'DocumentMetadata':
                    metadata = annotation
            if metadata:
                metadata['displayName'] = 'Document Information'
                metadata['displayRelevance'] = '1000'
                document.addAnnotation(metadata, 'Document Metadata')
 def before_load_event(self, document):
     # Get existing areas
     existing_hyperlinks = [ann for ann in document.annotations() if ann.get('concept') == 'Hyperlink']
     existing_extent_areas = [area for ann in existing_hyperlinks for ext in ann.extents() for area in ext.areas()]
     existing_areas = existing_extent_areas + [area for ann in existing_hyperlinks for area in ann.areas()]
     self.existing_areas = existing_areas
Example #19
0
    def after_load_event(self, document):
        # Put errors together in a sensible way
        errors = {}
        failures = 0
        successes = 0
        for error in document.annotations('errors.metadata'):
            if error.get('concept') == 'Success':
                successes += 1
            elif error.get('concept') == 'Error':
                failures += 1

            component = error.get('property:component')
            errors.setdefault(component, {})

            category = error.get('property:category')
            errors[component].setdefault(category, [])

            method = error.get('property:method')
            message = error.get('property:message', '')
            errors[component][category].append((method, message))
        categories = {}
        for component, details in errors.iteritems():
            for category in details.keys():
                categories.setdefault(category, 0)
                categories[category] += 1

        # If there are errors, provide feedback to the user
        if failures > 0:
            # Check for likely client problems
            if categories.get('connection', 0) == failures and successes == 0:
                summary = '''
                    Utopia could not reach any of the online services it would
                    normally use to identify this document, meaning you are
                    likely to see limited or no information below. You might
                    wish to check your Internet connection and reload the
                    document.
                    '''
            elif categories.get('timeout', 0) > 1:
                if categories.get('timeout', 0) == failures and successes == 0:
                    many = ''
                else:
                    many = 'some of'
                summary = '''
                    Utopia gave up contacting {0} the online services it would
                    normally use to identify this document because they were
                    taking too long to respond. You are likely to see limited
                    or no information below. You might wish to check your
                    Internet connection and reload the document.
                    '''.format(many)
            else:
                if failures == 1:
                    noun = 'An error'
                else:
                    noun = 'Errors'
                summary = '''
                    {0} occurred when trying to discover the identity
                    of this document. You are likely to see limited or no
                    information below.
                    '''.format(noun)
            html = '''
                <div class="box error">
                    <strong>Warning</strong>
                    <p>
                        {0}
                    </p>
                    <div class="expandable" title="Details...">
                    <ul>
            '''.format(summary)
            for component, details in errors.iteritems():
                for category, methods in details.iteritems():
                    if category != 'success':
                        summary = {
                            'timeout': '{0} did not respond',
                            'connection': 'Could not connect to {0}',
                            'server': '{0} behaved unexpectedly',
                        }.get(category, 'An error occurred accessing {0}')
                        methods_html = ', '.join(
                            ('<span title="{1}">{0}</span>'.format(
                                method, message)
                             for method, message in methods))
                        html += '<li>{0} (when accessing: {1}).</li>'.format(
                            summary.format('<strong>' + component +
                                           '</strong>'), methods_html)
            html += '''
                    </ul>
                    </div>
                <div>
            '''
            annotation = spineapi.Annotation()
            annotation['concept'] = 'Collated'
            annotation['property:html'] = html
            annotation['property:name'] = 'Error'
            annotation['session:weight'] = '1000'
            annotation['session:default'] = '1'
            annotation['session:headless'] = '1'
            document.addAnnotation(annotation)

        print errors
Example #20
0
    def after_ready_event(self, document):
        # Make an annotation for all these metadata
        ids = {
            'doi': ('DOI', u'<a href="http://dx.doi.org/{0}">{0}</a>'),
            'issn': ('ISSN', u'<strong>{0}</strong>'),
            'pii': ('PII', u'<strong>{0}</strong>'),
            'pubmed':
            ('Pubmed',
             u'<a href="http://www.ncbi.nlm.nih.gov/pubmed/{0}">{0}</a>'),
            'pmc':
            ('PMC',
             u'<a href="http://www.ncbi.nlm.nih.gov/pmc/articles/{0}">{0}</a>'
             ),
            'arxiv': ('arXiv', u'<a href="http://arxiv.org/abs/{0}">{0}</a>'),
        }
        # Build list of fragments
        fragments = []
        pub_icon = ''
        html = '''
            <style>
              .fancy_quotes {
                position: relative;
              }
              .fancy_quotes:before {
                content: "\\201C";
              }
              .fancy_quotes:after {
                content: "\\201D";
              }
            </style>
        '''

        for key, (name, format) in ids.iteritems():
            id = utopialib.utils.metadata(document,
                                          'identifiers[{0}]'.format(key))
            if id is not None:
                fragments.append(
                    u'<td style="text-align: right; opacity: 0.7">{0}:</td><td>{1}</td>'
                    .format(name, format.format(id)))
        issn = utopialib.utils.metadata(document, 'publication-issn')
        if issn is not None:
            fragments.append(
                u'<td style="text-align: right; opacity: 0.7">{0}:</td><td><strong>{1}</strong></td>'
                .format('ISSN', issn))
        # Resolve publisher info
        for annotation in document.annotations('PublisherMetadata'):
            if annotation.get('concept') == 'PublisherIdentity':
                logo = annotation.get('property:logo')
                title = annotation.get('property:title')
                webpageUrl = annotation.get('property:webpageUrl')
                if None not in (logo, title, webpageUrl):
                    pub_icon = u'<a href="{0}" title="{2}"><img src="{1}" alt="{2}" /></a></td>'.format(
                        webpageUrl, logo, title)
                    break
        # Compile fragments
        title = utopialib.utils.metadata(document, 'title')
        if title is not None or len(pub_icon) > 0:
            html += u'<table style="border: none; margin: 0 0 1em 0;">'
            html += u'<tr>'
            if title is not None:
                html += u'<td style="text-align:left; vertical-align: middle;"><strong class="nohyphenate fancy_quotes">{0}</strong></td>'.format(
                    title.strip())
            if len(pub_icon) > 0:
                html += u'<td style="text-align:right; vertical-align: middle; width: 80px;">{0}</td>'.format(
                    pub_icon)
            html += u'</tr>'
            html += u'</table>'
        if len(fragments) > 0:
            html += u'<div class="box">'
            html += u'<table style="border: none">'
            html += u'<tr>'
            html += u'</tr><tr>'.join(fragments)
            html += u'</tr>'
            html += u'</table>'
            html += u'</div>'

            annotation = spineapi.Annotation()
            annotation['concept'] = 'Collated'
            annotation['property:html'] = html
            annotation['property:name'] = 'About this article'
            annotation['session:weight'] = '1000'
            annotation['session:default'] = '1'
            annotation['session:headless'] = '1'
            document.addAnnotation(annotation)
 def on_filter_event(self, document, data = None):
     for a in document.annotations():
         if a.get('author') == 'http://utopia.cs.manchester.ac.uk/users/11679' and a.get('concept') in ('Definition', 'DatabaseEntry') and 'session:legacy' not in a:
             document.removeAnnotation(a)
             identifier = a.get('property:identifier', '')
             if identifier.startswith('http://bio2rdf.org/pdb:'):
                 # PDB entry
                 a2 = spineapi.Annotation()
                 a2['concept'] = 'DatabaseEntry'
                 a2['author'] = a['author']
                 a2['session:volatile'] = '1'
                 a2['session:legacy'] = '1'
                 a2['property:sourceDatabase'] = 'pdb'
                 a2['property:sourceDescription'] = '<p>The <a href="http://www.rcsb.org/">Protein Data Bank</a> of the Research Collaboratory for Structural Bioinformatics (<a href="http://home.rcsb.org/">RCSB</a>).</p>'
                 a2['property:identifier'] = identifier
                 a2['property:description'] = 'PDB entry {0}'.format(identifier[-4:].upper())
                 if 'property:name' in a:
                     a2['property:name'] = a['property:name'][:-11]
                 if 'property:imageUrl' in a:
                     a2['property:imageUrl'] = a['property:imageUrl']
                 if 'property:molecularDescription' in a:
                     a2['property:molecularDescription'] = a['property:molecularDescription']
                 if 'property:webpageUrl' in a:
                     a2['property:webpageUrl'] = a['property:webpageUrl']
                 if 'property:embedded' in a:
                     a2['property:embedded'] = a['property:embedded']
                 for extent in a.extents():
                     a2.addExtent(extent)
                 for area in a.areas():
                     a2.addArea(area)
                 document.addAnnotation(a2)
             if identifier.startswith('http://dbpedia.org/resource/'):
                 # Wikipedia entry
                 a2 = spineapi.Annotation()
                 a2['concept'] = 'Definition'
                 a2['author'] = a['author']
                 a2['session:volatile'] = '1'
                 a2['session:legacy'] = '1'
                 a2['property:sourceDatabase'] = 'wikipedia'
                 a2['property:sourceDescription'] = '<p>Structured <a href="http://www.wikipedia.org/">Wikipedia</a> information provided by the <a href="http://DBpedia.org/">DBpedia</a> project.</p>'
                 a2['property:description'] = a.get('property:summary', 'Wikipedia entry')
                 if 'property:name' in a:
                     a2['property:name'] = a['property:name']
                 if 'property:identifier' in a:
                     a2['property:identifier'] = a['property:identifier']
                 if 'property:imageUrl' in a:
                     a2['property:imageUrl'] = a['property:imageUrl']
                 if 'property:summary' in a:
                     a2['property:summary'] = a['property:summary']
                 if 'property:webpageUrl' in a:
                     a2['property:webpageUrl'] = a['property:webpageUrl']
                 for extent in a.extents():
                     a2.addExtent(extent)
                 for area in a.areas():
                     a2.addArea(area)
                 document.addAnnotation(a2)
             if identifier.startswith('http://www.portlandpress.com/utopia/glick/'):
                 # Wikipedia entry
                 a2 = spineapi.Annotation()
                 a2['concept'] = 'Definition'
                 a2['author'] = a['author']
                 a2['session:volatile'] = '1'
                 a2['session:legacy'] = '1'
                 a2['property:sourceDatabase'] = 'glick'
                 a2['property:sourceDescription'] = '<p>David M. Glick\'s <a href="http://www.portlandpress.com/pp/books/online/glick/search.htm">Glossary of Biochemistry and Molecular Biology</a>.</p><p>Made available by <a href="http://www.portlandpress.com/">Portland Press Limited</a>.</p>'
                 a2['property:description'] = a['property:description'] + '<p><em>(Glick Glossary/Portland Press Ltd.)</em></p>'
                 a2['property:name'] = a['property:name']
                 for extent in a.extents():
                     a2.addExtent(extent)
                 for area in a.areas():
                     a2.addArea(area)
                 document.addAnnotation(a2)
Example #22
0
    def after_ready_event(self, document):
        # Get (if present) the RSCMetadataLink annotation
        for annotation in document.annotations():
            if annotation.get('concept') == 'RSCMetadataLink':
                text = document.text()

                doi = annotation['property:doi'].upper()
                rscId = annotation['property:rscId'].upper()

                xmlquery = '<SearchCriteria><SearchTerm><Category>Journal</Category><ContentType>All</ContentType><Criterias><NameValue><Name>FreeText</Name><Value>"%s"</Value></NameValue></Criterias><Source>Utopia</Source></SearchTerm><PageNo>1</PageNo><PageSize>10</PageSize><SortBy>Relevance</SortBy></SearchCriteria>' % doi

                baseurl = 'http://pubs.rsc.org/en/federated/search'
                params = {
                    'federatedsearchname': 'Utopia',
                    'inputxml': xmlquery
                }
                url = baseurl + '?%s' % urllib.urlencode(params)
                searchresult = urllib2.urlopen(url, timeout=14).read()
                root = etree.fromstring(searchresult)
                #print etree.tostring(root, pretty_print=True, encoding='utf8')

                articles = root.findall(
                    './{http://www.rsc.org/schema/rscart38}article')
                #print articles

                # the search use above can return more than one article, so select out only the one with
                # the correct doi

                thearticle = None
                articleID = None
                for article in articles:
                    found_doi = article.findtext(
                        "./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='doi']"
                    )
                    if found_doi is None:
                        found_doi = article.findtext(
                            "./{http://www.rsc.org/schema/rscart38}art-admin/{http://www.rsc.org/schema/rscart38}doi"
                        )
                    if found_doi is not None and found_doi.upper() == doi:
                        thearticle = article
                        articleIDelem = article.find(
                            "./{http://www.rsc.org/schema/rscart38}metainfo/{http://www.rsc.org/schema/rscart38}meta[@field='docid']"
                        )
                        if articleIDelem is not None:
                            articleID = articleIDelem.text
                        break

                # if we get back a single valid article...
                if thearticle != None:
                    #print articleID

                    compoundsInArticle = []
                    compoundText = {}

                    annotationsInArticle = []
                    annotationText = {}

                    # create a list of all the compounds that are mentioned in the article body
                    compnames = thearticle.findall(
                        './{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}compname'
                    )
                    #print compnames
                    for compname in compnames:
                        # This line removes (erroneous?) elements from inside the XML
                        etree.strip_elements(
                            compname,
                            '{http://www.rsc.org/schema/rscart38}compound',
                            with_tail=False)
                        #print compname.attrib['idrefs'], compname.text
                        compoundsInArticle.append(compname.attrib['idrefs'])
                        compoundText[
                            compname.attrib['idrefs']] = etree.tounicode(
                                compname, method='text')

                    annotationnames = thearticle.findall(
                        './{http://www.rsc.org/schema/rscart38}art-body/{http://www.rsc.org/schema/rscart38}annref'
                    )
                    #print annotationnames
                    for annotationname in annotationnames:
                        # This line removes (erroneous?) elements from inside the XML
                        etree.strip_elements(
                            annotationname,
                            '{http://www.rsc.org/schema/rscart38}annotation',
                            with_tail=False)
                        #print annotationname.attrib['idrefs'], annotationname.text
                        annotationsInArticle.append(
                            annotationname.attrib['idrefs'])
                        annotationText[
                            annotationname.attrib['idrefs']] = etree.tounicode(
                                annotationname, method='text')

                    #print compoundText, annotationText
                    #sprint annotationsInArticle

                    # then for all the compounds that are defined in the article back
                    compounds = thearticle.findall(
                        './{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}compound'
                    )
                    for compound in compounds:
                        id = compound.attrib['id']
                        if id in compoundsInArticle:
                            url = None
                            id = compound.attrib['id']

                            # if the compound has a CSID, then the URL links to the chemspider page
                            csid = compound.find(
                                "./{http://www.rsc.org/schema/rscart38}link[@type='CSID']"
                            )

                            # if the compound has a CSID, create a Chemspider URL for it
                            if csid is not None and csid.text is not None:
                                url = 'http://www.chemspider.com/Chemical-Structure.%s.html' % csid.text[
                                    5:]
                            else:
                                # otherwise, use the RSC landing page
                                url = 'http://www.rsc.org/publishing/journals/prospect/cheminfo.asp?XMLID=%s&compoundtext=%s&MSID=%s' % (
                                    id[4:], compoundText[id], articleID)

                            if url is not None:
                                options = spineapi.WholeWordsOnly + spineapi.IgnoreCase
                                matches = document.search(
                                    compoundText[id], options)
                                annotation = spineapi.Annotation()
                                annotation['concept'] = 'Hyperlink'
                                annotation['property:webpageUrl'] = url
                                for match in matches:
                                    annotation.addExtent(match)
                                document.addAnnotation(annotation)

                    # similarly, for all the annotations
                    annotations = thearticle.findall(
                        './{http://www.rsc.org/schema/rscart38}art-back/{http://www.rsc.org/schema/rscart38}annotation'
                    )
                    for annotation in annotations:
                        id = annotation.attrib['id']
                        url = None
                        if id in annotationsInArticle:
                            id = annotation.attrib['id']

                            # get the link id
                            link = annotation.findtext(
                                "./{http://www.rsc.org/schema/rscart38}link")

                            # if the compound has a link, create an RSC ontology landing page for it
                            if link is not None:
                                if link[:3] == 'AU:':
                                    url = 'http://goldbook.iupac.org/%s.html' % link[
                                        3:]
                                else:
                                    url = 'http://www.rsc.org/publishing/journals/prospect/ontology.asp?id=%s&MSID=%s' % (
                                        link, articleID)

                            if url is not None:
                                matches = document.search(
                                    annotationText[id], spineapi.IgnoreCase +
                                    spineapi.WholeWordsOnly)
                                annotation = spineapi.Annotation()
                                annotation['concept'] = 'Hyperlink'
                                annotation['property:webpageUrl'] = url
                                for match in matches:
                                    annotation.addExtent(match)
                                document.addAnnotation(annotation)
                break
    def after_load_event(self, document):
        # Put errors together in a sensible way
        errors = {}
        failures = 0
        successes = 0
        for error in document.annotations("errors.metadata"):
            if error.get("concept") == "Success":
                successes += 1
            elif error.get("concept") == "Error":
                failures += 1

            component = error.get("property:component")
            errors.setdefault(component, {})

            category = error.get("property:category")
            errors[component].setdefault(category, [])

            method = error.get("property:method")
            message = error.get("property:message", "")
            errors[component][category].append((method, message))
        categories = {}
        for component, details in errors.iteritems():
            for category in details.keys():
                categories.setdefault(category, 0)
                categories[category] += 1

        # If there are errors, provide feedback to the user
        if failures > 0:
            # Check for likely client problems
            if categories.get("connection", 0) == failures and successes == 0:
                summary = """
                    Utopia could not reach any of the online services it would
                    normally use to identify this document, meaning you are
                    likely to see limited or no information below. You might
                    wish to check your Internet connection and reload the
                    document.
                    """
            elif categories.get("timeout", 0) > 1:
                if categories.get("timeout", 0) == failures and successes == 0:
                    many = ""
                else:
                    many = "some of"
                summary = """
                    Utopia gave up contacting {0} the online services it would
                    normally use to identify this document because they were
                    taking too long to respond. You are likely to see limited
                    or no information below. You might wish to check your
                    Internet connection and reload the document.
                    """.format(
                    many
                )
            else:
                if failures == 1:
                    noun = "An error"
                else:
                    noun = "Errors"
                summary = """
                    {0} occurred when trying to discover the identity
                    of this document. You are likely to see limited or no
                    information below.
                    """.format(
                    noun
                )
            html = """
                <div class="box error">
                    <strong>Warning</strong>
                    <p>
                        {0}
                    </p>
                    <div class="expandable" title="Details...">
                    <ul>
            """.format(
                summary
            )
            for component, details in errors.iteritems():
                for category, methods in details.iteritems():
                    if category != "success":
                        summary = {
                            "timeout": "{0} did not respond",
                            "connection": "Could not connect to {0}",
                            "server": "{0} behaved unexpectedly",
                        }.get(category, "An error occurred accessing {0}")
                        methods_html = ", ".join(
                            ('<span title="{1}">{0}</span>'.format(method, message) for method, message in methods)
                        )
                        html += "<li>{0} (when accessing: {1}).</li>".format(
                            summary.format("<strong>" + component + "</strong>"), methods_html
                        )
            html += """
                    </ul>
                    </div>
                <div>
            """
            annotation = spineapi.Annotation()
            annotation["concept"] = "Collated"
            annotation["property:html"] = html
            annotation["property:name"] = "Error"
            annotation["session:weight"] = "1000"
            annotation["session:default"] = "1"
            annotation["session:headless"] = "1"
            document.addAnnotation(annotation)

        print errors
 def before_load_event(self, document):
     # Get existing areas
     existing_hyperlinks = [ann for ann in document.annotations() if ann.get('concept') == 'Hyperlink']
     existing_extent_areas = [area for ann in existing_hyperlinks for ext in ann.extents() for area in ext.areas()]
     existing_areas = existing_extent_areas + [area for ann in existing_hyperlinks for area in ann.areas()]
     self.existing_areas = existing_areas
Example #25
0
    def on_load_event(self, document):
        '''Using the document content, try to resolve various bits of metadata'''
        #import pprint

        # Start by getting any citations already in the document
        input_citations = []
        for annotation in document.annotations('Document Metadata'):
            # Check the kinds of annotations that hold citation information
            if annotation.get('concept') in ('Citation', ):
                # Compile information from annotation
                input_citations.append(
                    utopia.tools.utils.citation_from_annotation(annotation))
        citations = input_citations[:]
        #pprint.PrettyPrinter(indent=2).pprint(citations)

        # Run the resolution pipeline
        flattened = utopia.citation.resolve(citations=citations,
                                            document=document)

        #pprint.PrettyPrinter(indent=2).pprint(citations)

        # Save the resulting citations as annotations in the document
        sources = flattened.get('provenance', {}).get('sources', [])
        for citation in sources:
            if citation not in input_citations and 'error' not in citation:
                utopia.tools.utils.store_metadata(document, **citation)

        # Deal with errors
        errors = {}
        failures = 0
        successes = 0
        for error in [error for error in sources if 'error' in error]:
            provenance = error.get('provenance', {})
            failures += 1

            component = provenance.get('whence')
            plugin = provenance.get('plugin')
            errors.setdefault(component, {})

            error = error.get('error', {})
            category = error.get('category')
            errors[component].setdefault(category, [])

            message = error.get('message', '')
            errors[component][category].append((plugin, message))

        categories = {}
        for component, details in errors.iteritems():
            for category in details.keys():
                categories.setdefault(category, 0)
                categories[category] += 1

        # If there are errors, provide feedback to the user
        if failures > 0:
            # Check for likely client problems
            if categories.get('connection', 0) == failures and successes == 0:
                summary = '''
                    Utopia could not reach any of the online services it would
                    normally use to identify this document, meaning you are
                    likely to see limited or no information below. You might
                    wish to check your Internet connection and reload the
                    document.
                    '''
            elif categories.get('timeout', 0) > 1:
                if categories.get('timeout', 0) == failures and successes == 0:
                    many = ''
                else:
                    many = 'some of'
                summary = '''
                    Utopia gave up contacting {0} the online services it would
                    normally use to identify this document because they were
                    taking too long to respond. You are likely to see limited
                    or no information below. You might wish to check your
                    Internet connection and reload the document.
                    '''.format(many)
            else:
                if failures == 1:
                    noun = 'An error'
                else:
                    noun = 'Errors'
                summary = '''
                    {0} occurred when trying to discover the identity
                    of this document. You are likely to see limited or no
                    information below.
                    '''.format(noun)
            html = '''
                <div class="box error">
                    <strong>Warning</strong>
                    <p>
                        {0}
                    </p>
                    <div class="expandable" title="Details...">
                    <ul>
            '''.format(summary)
            for component, details in errors.iteritems():
                for category, methods in details.iteritems():
                    if category != 'success':
                        summary = {
                            'timeout': '{0} did not respond',
                            'connection': 'Could not connect to {0}',
                            'server': '{0} behaved unexpectedly',
                        }.get(category, 'An error occurred accessing {0}')
                        methods_html = ', '.join(
                            ('<span title="{1}">{0}</span>'.format(
                                method, message)
                             for method, message in methods))
                        html += '<li>{0} (when accessing: {1}).</li>'.format(
                            summary.format('<strong>' + component +
                                           '</strong>'), methods_html)
            html += '''
                    </ul>
                    </div>
                <div>
            '''
            annotation = spineapi.Annotation()
            annotation['concept'] = 'Collated'
            annotation['property:html'] = html
            annotation['property:name'] = 'Error'
            annotation['session:weight'] = '1000'
            annotation['session:default'] = '1'
            annotation['session:headless'] = '1'
            document.addAnnotation(annotation)