Beispiel #1
0
 def __call__(self, rdfDataSource=None):
     '''Ingest and render a results page.'''
     context = aq_inner(self.context)
     if rdfDataSource is None:
         rdfDataSource = context.rdfDataSource
     if not rdfDataSource:
         raise RDFIngestException(_(u'This folder has no RDF data source URL.'))
     normalizerFunction = queryUtility(IIDNormalizer).normalize
     graph = ConjunctiveGraph()
     graph.parse(URLInputSource(rdfDataSource))
     statements = self._parseRDF(graph)
     createdObjects = []
     for uri, predicates in statements.items():
         typeURI = predicates[_typeURI][0]
         handler = _handlers[typeURI]
         objectID = handler.generateID(uri, predicates, normalizerFunction)
         handler.deleteExistingObject(objectID, context, uri, predicates, statements)
         title = handler.generateTitle(uri, predicates)
         created = handler.createObjects(objectID, title, uri, predicates, statements, context)
         for obj in created:
             obj.reindex()
         createdObjects.extend(created)
         self.objects = createdObjects
     return self.renderResults()
Beispiel #2
0
    def __call__(self):
        '''Ingest and render a results page.'''
        context = aq_inner(self.context)
        rdfDataSource, bmoDataSource, bmuDataSource, bmSumDataSource, idDataSource = context.rdfDataSource, context.bmoDataSource, context.bmuDataSource, context.bmSumDataSource, context.idDataSource
        if bmSumDataSource:
            context.dataSummary = self.getSummaryData(bmSumDataSource)

        if not rdfDataSource or not bmoDataSource or not bmuDataSource:
            raise RDFIngestException(
                _(u'This biomarker folder lacks one or both of its RDF source URLs.'
                  ))
        # Weapons at ready
        catalog = getToolByName(context, 'portal_catalog')
        normalizerFunction = queryUtility(IIDNormalizer).normalize
        graph = ConjunctiveGraph()
        graph.parse(URLInputSource(rdfDataSource))
        statements = self._parseRDF(graph)

        # Add mutation-specific information
        graph = ConjunctiveGraph()
        graph.parse(URLInputSource(bmuDataSource))
        mutationStatements = self._parseRDF(graph)

        # Clean the slate (but not the subfolders)
        results = catalog(path=dict(query='/'.join(context.getPhysicalPath()),
                                    depth=1),
                          object_provides=IBiomarker.__identifier__)
        context.manage_delObjects([i.id for i in results])
        newBiomarkers = {}
        # Make all the biomarker objects
        for uri, predicates in statements.items():
            try:
                typeURI = predicates[_typeURI][0]
                if typeURI != _biomarkerTypeURI:
                    continue
                isPanel = bool(int(predicates[_isPanelPredicateURI][0]))
                title = unicode(predicates[_bmTitlePredicateURI][0])
                hgnc = predicates[_hgncPredicateURI][
                    0] if _hgncPredicateURI in predicates else None
                if hgnc is not None:
                    hgnc = hgnc.strip()
                objID = hgnc if hgnc else normalizerFunction(title)
                objType = isPanel and 'Biomarker Panel' or 'Elemental Biomarker'
                try:
                    obj = context[context.invokeFactory(objType, objID)]
                except BadRequest:
                    obj = None
                    for appendedNumber in xrange(
                            1, MAX_NON_UNIQUE_BIOMARKER_IDS + 1):
                        try:
                            obj = context[context.invokeFactory(
                                objType, "%s-%d" % (objID, appendedNumber))]
                            break
                        except BadRequest:
                            pass
                    if obj is None:
                        raise BadRequest(
                            "Something's wrong. Got more than %d biomarkers with the same ID '%s'!"
                            % (MAX_NON_UNIQUE_BIOMARKER_IDS, objID))
                if not isPanel:
                    #Append biomuta's predicates if gene symbol exists in biomuta's list as well
                    self.addMutationSpecificInformation(
                        objID, predicates, mutationStatements)
                    # Disabled because it causes the ingest to take multiple hours instead of just 1 hour
                    # See CA-1434 (kelly 2016-12-06)
                    # Re-enabled to because added blob storage to save existing queries. Only the first query might take 2 hours.
                    testpred = self.addExternaResourcesInformation(
                        objID, predicates, idDataSource)
                    predicates = testpred
                    #Add frequencies for biomarker associated with biomarker type (Gene, Protein, etc...)
                #Update biomarker, if biomuta was added, biomuta predicates will be updated as well
                self.updateBiomarker(obj, uri, predicates, context, statements)
                newBiomarkers[uri] = obj
                obj.reindexObject()
            except KeyError:
                pass
        # Connect elementals to their panels
        for uri, predicates in statements.items():
            try:
                typeURI = predicates[_typeURI][0]
                if typeURI != _biomarkerTypeURI:
                    continue
                biomarkerUID = newBiomarkers[uri].UID()
                panelURIs = predicates[_memberOfPanelPredicateURI]
                panels = self.findObjectsByIdentifiers(catalog, panelURIs)
                for panel in panels:
                    current = [i.UID() for i in panel.members]
                    current.append(biomarkerUID)
                    panel.setMembers(current)

            except KeyError:
                pass
        # Add organ-specific information
        graph = ConjunctiveGraph()
        graph.parse(URLInputSource(bmoDataSource))
        organStatements = self._parseRDF(graph)
        self.addOrganSpecificInformation(newBiomarkers, organStatements,
                                         normalizerFunction, catalog)

        # Update indicated organs:
        for biomarker in newBiomarkers.values():
            biomarker.updatedIndicatedBodySystems()
            biomarker.reindexObject()
        # Publish as necessary
        for uri, predicates in statements.items():
            if uri in newBiomarkers:
                biomarker = newBiomarkers[uri]
                if biomarker.qaState == 'Private':
                    self.retractBiomarker(context, biomarker, predicates)
                else:
                    self.publishBiomarker(context, biomarker, predicates)
        self.objects = [CreatedObject(i) for i in newBiomarkers.values()]
        return self.render and self.template() or len(self.objects)
Beispiel #3
0
 def __call__(self, request):
     value = request.form.get('rdfDataSource', request.get('rdfDataSource', None))
     if value and not _protocolsRegex.match(value):
         return {'rdfDataSource': _(u'Please enter a valid URL to an RDF data source.')}
     return None
Beispiel #4
0
from Products.Archetypes.interfaces import IObjectPostValidation
from zope.component import adapts
import re
_protocols = (
    'http', 'ftp', 'irc', 'news', 'imap', 'gopher', 'jabber', 'webdav', 'smb', 'fish',
    'ldap', 'pop3', 'smtp', 'sftp', 'ssh', 'feed', 'testscheme', 'file'
)
_protocolsRegex = re.compile(r'(%s)s?:(//)?[^\s\r\n]+' % '|'.join(_protocols))

KnowledgeFolderSchema = folder.ATFolderSchema.copy() + atapi.Schema((
    atapi.StringField(
        'rdfDataSource',
        required=False,
        storage=atapi.AnnotationStorage(),
        widget=atapi.StringWidget(
            label=_(u'RDF Data Source'),
            description=_(u'URL to a source of Resource Description Format data that mandates the contents of this folder.'),
            size=60,
        ),
    ),
))
KnowledgeFolderSchema['title'].storage = atapi.AnnotationStorage()
KnowledgeFolderSchema['description'].storage = atapi.AnnotationStorage()

finalizeATCTSchema(KnowledgeFolderSchema, folderish=True, moveDiscussion=False)

class KnowledgeFolder(folder.ATFolder):
    '''Knowledge folder which contains knowledge objects.'''
    implements(IKnowledgeFolder)
    portal_type               = 'Knowledge Folder'
    _at_rename_after_creation = True
Beispiel #5
0
 def __call__(self, request):
     value = request.form.get('identifier', request.get('identifier', None))
     if value and not _uriRegex.match(value):
         return {'identifier': _(u'Please enter a valid URI.')}
     return None
Beispiel #6
0
import re
_uriSchemes = (
    'http', 'ftp', 'irc', 'news', 'imap', 'gopher', 'jabber', 'webdav', 'smb', 'fish',
    'ldap', 'pop3', 'smtp', 'sftp', 'ssh', 'feed', 'testscheme', 'urn'
)
_uriRegex = re.compile(r'(%s)s?:(//)?[^\s\r\n]+' % '|'.join(_uriSchemes))

KnowledgeObjectSchema = schemata.ATContentTypeSchema.copy() + atapi.Schema((
    atapi.StringField(
        'identifier',
        required=True,
        default='http://',
        storage=atapi.AnnotationStorage(),
        predicateURI=dublincore.IDENTIFIER_URI,
        widget=atapi.StringWidget(
            label=_(u'Identifier'),
            description=_(u'The Uniform Resource Identifier identifying the resource.'),
        ),
    ),
))
KnowledgeObjectSchema['title'].storage            = atapi.AnnotationStorage()
# FIXME: Should be this:
# KnowledgeObjectSchema['title'].predicateURI       = dublincore.TITLE_URI
# But BMDB uses this:
KnowledgeObjectSchema['title'].predicateURI       = 'http://edrn.nci.nih.gov/rdf/rdfs/bmdb-1.0.0#Description'
KnowledgeObjectSchema['description'].storage      = atapi.AnnotationStorage()
KnowledgeObjectSchema['description'].predicateURI = dublincore.DESCRIPTION_URI

finalizeATCTSchema(KnowledgeObjectSchema, folderish=False, moveDiscussion=False)

class KnowledgeObject(base.ATCTContent):
Beispiel #7
0
from zope.interface import implements, directlyProvides
from zope.schema.interfaces import IVocabularyFactory
from zope.schema.vocabulary import SimpleVocabulary

DiseaseSchema = knowledgeobject.KnowledgeObjectSchema.copy() + atapi.Schema((
    atapi.ReferenceField(
        'affectedOrgans',
        enforceVocabulary=True,
        multiValued=True,
        relationship='affectsOrgan',
        required=False,
        storage=atapi.AnnotationStorage(),
        vocabulary_factory=u'eke.knowledge.BodySystems',
        vocabulary_display_path_bound=-1,
        widget=atapi.ReferenceWidget(
            label=_(u'Affected Body Systems'),
            description=_(u'Body systems for which this disease is an ailment.'),
        ),
    ),
    atapi.StringField(
        'icd9Code',
        required=False,
        storage=atapi.AnnotationStorage(),
        widget=atapi.StringWidget(
            label=_(u'ICD9 Code'),
            description=_(u'International Statistical Classifiction of Disease Code (version 9)'),
            size=10,
        ),
        predicateURI=u'http://edrn.nci.nih.gov/xml/rdf/edrn.rdf#icd9',
    ),
    atapi.StringField(
Beispiel #8
0
    def __call__(self):
        '''Ingest and render a results page.'''
        context = aq_inner(self.context)
        rdfDataSource, bmoDataSource, bmuDataSource, bmSumDataSource, idDataSource = context.rdfDataSource, context.bmoDataSource, context.bmuDataSource, context.bmSumDataSource, context.idDataSource
        if bmSumDataSource:
            context.dataSummary = self.getSummaryData(bmSumDataSource)

        if not rdfDataSource or not bmoDataSource or not bmuDataSource:
            raise RDFIngestException(_(u'This biomarker folder lacks one or both of its RDF source URLs.'))
        # Weapons at ready
        catalog = getToolByName(context, 'portal_catalog')
        normalizerFunction = queryUtility(IIDNormalizer).normalize
        graph = ConjunctiveGraph()
        graph.parse(URLInputSource(rdfDataSource))
        statements = self._parseRDF(graph)

        # Add mutation-specific information
        graph = ConjunctiveGraph()
        graph.parse(URLInputSource(bmuDataSource))
        mutationStatements = self._parseRDF(graph)

        # Clean the slate (but not the subfolders)
        results = catalog(path=dict(query='/'.join(context.getPhysicalPath()), depth=1),
            object_provides=IBiomarker.__identifier__)
        context.manage_delObjects([i.id for i in results])
        newBiomarkers = {}
        # Make all the biomarker objects
        for uri, predicates in statements.items():
            try:
                typeURI = predicates[_typeURI][0]
                if typeURI != _biomarkerTypeURI:
                    continue
                isPanel = bool(int(predicates[_isPanelPredicateURI][0]))
                title = unicode(predicates[_bmTitlePredicateURI][0])
                hgnc = predicates[_hgncPredicateURI][0] if _hgncPredicateURI in predicates else None
                if hgnc is not None:
                    hgnc = hgnc.strip()
                objID = hgnc if hgnc else normalizerFunction(title)
                objType = isPanel and 'Biomarker Panel' or 'Elemental Biomarker'
                try:
                    obj = context[context.invokeFactory(objType, objID)]
                except BadRequest:
                    obj = None
                    for appendedNumber in xrange(1, MAX_NON_UNIQUE_BIOMARKER_IDS+1):
                        try:
                            obj = context[context.invokeFactory(objType, "%s-%d" % (objID, appendedNumber))]
                            break
                        except BadRequest:
                            pass
                    if obj is None:
                        raise BadRequest("Something's wrong. Got more than %d biomarkers with the same ID '%s'!" %
                            (MAX_NON_UNIQUE_BIOMARKER_IDS, objID))
                if not isPanel:
                    #Append biomuta's predicates if gene symbol exists in biomuta's list as well
                    self.addMutationSpecificInformation(objID, predicates, mutationStatements)
                    # Disabled because it causes the ingest to take multiple hours instead of just 1 hour
                    # See CA-1434 (kelly 2016-12-06)
                    # Re-enabled to because added blob storage to save existing queries. Only the first query might take 2 hours.                   
                    testpred = self.addExternaResourcesInformation(objID, predicates, idDataSource)
                    predicates = testpred
                    #Add frequencies for biomarker associated with biomarker type (Gene, Protein, etc...)
                #Update biomarker, if biomuta was added, biomuta predicates will be updated as well
                self.updateBiomarker(obj, uri, predicates, context, statements)
                newBiomarkers[uri] = obj
                obj.reindexObject()
            except KeyError:
                pass
        # Connect elementals to their panels
        for uri, predicates in statements.items():
            try:
                typeURI = predicates[_typeURI][0]
                if typeURI != _biomarkerTypeURI:
                    continue
                biomarkerUID = newBiomarkers[uri].UID()
                panelURIs = predicates[_memberOfPanelPredicateURI]
                panels = self.findObjectsByIdentifiers(catalog, panelURIs)
                for panel in panels:
                    current = [i.UID() for i in panel.members]
                    current.append(biomarkerUID)
                    panel.setMembers(current)

            except KeyError:
                pass
        # Add organ-specific information
        graph = ConjunctiveGraph()
        graph.parse(URLInputSource(bmoDataSource))
        organStatements = self._parseRDF(graph)
        self.addOrganSpecificInformation(newBiomarkers, organStatements, normalizerFunction, catalog)

        # Update indicated organs:
        for biomarker in newBiomarkers.values():
            biomarker.updatedIndicatedBodySystems()
            biomarker.reindexObject()
        # Publish as necessary
        for uri, predicates in statements.items():
            if uri in newBiomarkers:
                biomarker = newBiomarkers[uri]
                if biomarker.qaState == 'Private':
                    self.retractBiomarker(context, biomarker, predicates)
                else:
                    self.publishBiomarker(context, biomarker, predicates)
        self.objects = [CreatedObject(i) for i in newBiomarkers.values()]
        return self.render and self.template() or len(self.objects)