def __call__(self, rdfDataSource=None): '''Ingest and render a results page.''' context = aq_inner(self.context) if rdfDataSource is None: rdfDataSource = context.rdfDataSource if not rdfDataSource: raise RDFIngestException(_(u'This folder has no RDF data source URL.')) normalizerFunction = queryUtility(IIDNormalizer).normalize graph = ConjunctiveGraph() graph.parse(URLInputSource(rdfDataSource)) statements = self._parseRDF(graph) createdObjects = [] for uri, predicates in statements.items(): typeURI = predicates[_typeURI][0] handler = _handlers[typeURI] objectID = handler.generateID(uri, predicates, normalizerFunction) handler.deleteExistingObject(objectID, context, uri, predicates, statements) title = handler.generateTitle(uri, predicates) created = handler.createObjects(objectID, title, uri, predicates, statements, context) for obj in created: obj.reindex() createdObjects.extend(created) self.objects = createdObjects return self.renderResults()
def __call__(self): '''Ingest and render a results page.''' context = aq_inner(self.context) rdfDataSource, bmoDataSource, bmuDataSource, bmSumDataSource, idDataSource = context.rdfDataSource, context.bmoDataSource, context.bmuDataSource, context.bmSumDataSource, context.idDataSource if bmSumDataSource: context.dataSummary = self.getSummaryData(bmSumDataSource) if not rdfDataSource or not bmoDataSource or not bmuDataSource: raise RDFIngestException( _(u'This biomarker folder lacks one or both of its RDF source URLs.' )) # Weapons at ready catalog = getToolByName(context, 'portal_catalog') normalizerFunction = queryUtility(IIDNormalizer).normalize graph = ConjunctiveGraph() graph.parse(URLInputSource(rdfDataSource)) statements = self._parseRDF(graph) # Add mutation-specific information graph = ConjunctiveGraph() graph.parse(URLInputSource(bmuDataSource)) mutationStatements = self._parseRDF(graph) # Clean the slate (but not the subfolders) results = catalog(path=dict(query='/'.join(context.getPhysicalPath()), depth=1), object_provides=IBiomarker.__identifier__) context.manage_delObjects([i.id for i in results]) newBiomarkers = {} # Make all the biomarker objects for uri, predicates in statements.items(): try: typeURI = predicates[_typeURI][0] if typeURI != _biomarkerTypeURI: continue isPanel = bool(int(predicates[_isPanelPredicateURI][0])) title = unicode(predicates[_bmTitlePredicateURI][0]) hgnc = predicates[_hgncPredicateURI][ 0] if _hgncPredicateURI in predicates else None if hgnc is not None: hgnc = hgnc.strip() objID = hgnc if hgnc else normalizerFunction(title) objType = isPanel and 'Biomarker Panel' or 'Elemental Biomarker' try: obj = context[context.invokeFactory(objType, objID)] except BadRequest: obj = None for appendedNumber in xrange( 1, MAX_NON_UNIQUE_BIOMARKER_IDS + 1): try: obj = context[context.invokeFactory( objType, "%s-%d" % (objID, appendedNumber))] break except BadRequest: pass if obj is None: raise BadRequest( "Something's wrong. Got more than %d biomarkers with the same ID '%s'!" % (MAX_NON_UNIQUE_BIOMARKER_IDS, objID)) if not isPanel: #Append biomuta's predicates if gene symbol exists in biomuta's list as well self.addMutationSpecificInformation( objID, predicates, mutationStatements) # Disabled because it causes the ingest to take multiple hours instead of just 1 hour # See CA-1434 (kelly 2016-12-06) # Re-enabled to because added blob storage to save existing queries. Only the first query might take 2 hours. testpred = self.addExternaResourcesInformation( objID, predicates, idDataSource) predicates = testpred #Add frequencies for biomarker associated with biomarker type (Gene, Protein, etc...) #Update biomarker, if biomuta was added, biomuta predicates will be updated as well self.updateBiomarker(obj, uri, predicates, context, statements) newBiomarkers[uri] = obj obj.reindexObject() except KeyError: pass # Connect elementals to their panels for uri, predicates in statements.items(): try: typeURI = predicates[_typeURI][0] if typeURI != _biomarkerTypeURI: continue biomarkerUID = newBiomarkers[uri].UID() panelURIs = predicates[_memberOfPanelPredicateURI] panels = self.findObjectsByIdentifiers(catalog, panelURIs) for panel in panels: current = [i.UID() for i in panel.members] current.append(biomarkerUID) panel.setMembers(current) except KeyError: pass # Add organ-specific information graph = ConjunctiveGraph() graph.parse(URLInputSource(bmoDataSource)) organStatements = self._parseRDF(graph) self.addOrganSpecificInformation(newBiomarkers, organStatements, normalizerFunction, catalog) # Update indicated organs: for biomarker in newBiomarkers.values(): biomarker.updatedIndicatedBodySystems() biomarker.reindexObject() # Publish as necessary for uri, predicates in statements.items(): if uri in newBiomarkers: biomarker = newBiomarkers[uri] if biomarker.qaState == 'Private': self.retractBiomarker(context, biomarker, predicates) else: self.publishBiomarker(context, biomarker, predicates) self.objects = [CreatedObject(i) for i in newBiomarkers.values()] return self.render and self.template() or len(self.objects)
def __call__(self, request): value = request.form.get('rdfDataSource', request.get('rdfDataSource', None)) if value and not _protocolsRegex.match(value): return {'rdfDataSource': _(u'Please enter a valid URL to an RDF data source.')} return None
from Products.Archetypes.interfaces import IObjectPostValidation from zope.component import adapts import re _protocols = ( 'http', 'ftp', 'irc', 'news', 'imap', 'gopher', 'jabber', 'webdav', 'smb', 'fish', 'ldap', 'pop3', 'smtp', 'sftp', 'ssh', 'feed', 'testscheme', 'file' ) _protocolsRegex = re.compile(r'(%s)s?:(//)?[^\s\r\n]+' % '|'.join(_protocols)) KnowledgeFolderSchema = folder.ATFolderSchema.copy() + atapi.Schema(( atapi.StringField( 'rdfDataSource', required=False, storage=atapi.AnnotationStorage(), widget=atapi.StringWidget( label=_(u'RDF Data Source'), description=_(u'URL to a source of Resource Description Format data that mandates the contents of this folder.'), size=60, ), ), )) KnowledgeFolderSchema['title'].storage = atapi.AnnotationStorage() KnowledgeFolderSchema['description'].storage = atapi.AnnotationStorage() finalizeATCTSchema(KnowledgeFolderSchema, folderish=True, moveDiscussion=False) class KnowledgeFolder(folder.ATFolder): '''Knowledge folder which contains knowledge objects.''' implements(IKnowledgeFolder) portal_type = 'Knowledge Folder' _at_rename_after_creation = True
def __call__(self, request): value = request.form.get('identifier', request.get('identifier', None)) if value and not _uriRegex.match(value): return {'identifier': _(u'Please enter a valid URI.')} return None
import re _uriSchemes = ( 'http', 'ftp', 'irc', 'news', 'imap', 'gopher', 'jabber', 'webdav', 'smb', 'fish', 'ldap', 'pop3', 'smtp', 'sftp', 'ssh', 'feed', 'testscheme', 'urn' ) _uriRegex = re.compile(r'(%s)s?:(//)?[^\s\r\n]+' % '|'.join(_uriSchemes)) KnowledgeObjectSchema = schemata.ATContentTypeSchema.copy() + atapi.Schema(( atapi.StringField( 'identifier', required=True, default='http://', storage=atapi.AnnotationStorage(), predicateURI=dublincore.IDENTIFIER_URI, widget=atapi.StringWidget( label=_(u'Identifier'), description=_(u'The Uniform Resource Identifier identifying the resource.'), ), ), )) KnowledgeObjectSchema['title'].storage = atapi.AnnotationStorage() # FIXME: Should be this: # KnowledgeObjectSchema['title'].predicateURI = dublincore.TITLE_URI # But BMDB uses this: KnowledgeObjectSchema['title'].predicateURI = 'http://edrn.nci.nih.gov/rdf/rdfs/bmdb-1.0.0#Description' KnowledgeObjectSchema['description'].storage = atapi.AnnotationStorage() KnowledgeObjectSchema['description'].predicateURI = dublincore.DESCRIPTION_URI finalizeATCTSchema(KnowledgeObjectSchema, folderish=False, moveDiscussion=False) class KnowledgeObject(base.ATCTContent):
from zope.interface import implements, directlyProvides from zope.schema.interfaces import IVocabularyFactory from zope.schema.vocabulary import SimpleVocabulary DiseaseSchema = knowledgeobject.KnowledgeObjectSchema.copy() + atapi.Schema(( atapi.ReferenceField( 'affectedOrgans', enforceVocabulary=True, multiValued=True, relationship='affectsOrgan', required=False, storage=atapi.AnnotationStorage(), vocabulary_factory=u'eke.knowledge.BodySystems', vocabulary_display_path_bound=-1, widget=atapi.ReferenceWidget( label=_(u'Affected Body Systems'), description=_(u'Body systems for which this disease is an ailment.'), ), ), atapi.StringField( 'icd9Code', required=False, storage=atapi.AnnotationStorage(), widget=atapi.StringWidget( label=_(u'ICD9 Code'), description=_(u'International Statistical Classifiction of Disease Code (version 9)'), size=10, ), predicateURI=u'http://edrn.nci.nih.gov/xml/rdf/edrn.rdf#icd9', ), atapi.StringField(
def __call__(self): '''Ingest and render a results page.''' context = aq_inner(self.context) rdfDataSource, bmoDataSource, bmuDataSource, bmSumDataSource, idDataSource = context.rdfDataSource, context.bmoDataSource, context.bmuDataSource, context.bmSumDataSource, context.idDataSource if bmSumDataSource: context.dataSummary = self.getSummaryData(bmSumDataSource) if not rdfDataSource or not bmoDataSource or not bmuDataSource: raise RDFIngestException(_(u'This biomarker folder lacks one or both of its RDF source URLs.')) # Weapons at ready catalog = getToolByName(context, 'portal_catalog') normalizerFunction = queryUtility(IIDNormalizer).normalize graph = ConjunctiveGraph() graph.parse(URLInputSource(rdfDataSource)) statements = self._parseRDF(graph) # Add mutation-specific information graph = ConjunctiveGraph() graph.parse(URLInputSource(bmuDataSource)) mutationStatements = self._parseRDF(graph) # Clean the slate (but not the subfolders) results = catalog(path=dict(query='/'.join(context.getPhysicalPath()), depth=1), object_provides=IBiomarker.__identifier__) context.manage_delObjects([i.id for i in results]) newBiomarkers = {} # Make all the biomarker objects for uri, predicates in statements.items(): try: typeURI = predicates[_typeURI][0] if typeURI != _biomarkerTypeURI: continue isPanel = bool(int(predicates[_isPanelPredicateURI][0])) title = unicode(predicates[_bmTitlePredicateURI][0]) hgnc = predicates[_hgncPredicateURI][0] if _hgncPredicateURI in predicates else None if hgnc is not None: hgnc = hgnc.strip() objID = hgnc if hgnc else normalizerFunction(title) objType = isPanel and 'Biomarker Panel' or 'Elemental Biomarker' try: obj = context[context.invokeFactory(objType, objID)] except BadRequest: obj = None for appendedNumber in xrange(1, MAX_NON_UNIQUE_BIOMARKER_IDS+1): try: obj = context[context.invokeFactory(objType, "%s-%d" % (objID, appendedNumber))] break except BadRequest: pass if obj is None: raise BadRequest("Something's wrong. Got more than %d biomarkers with the same ID '%s'!" % (MAX_NON_UNIQUE_BIOMARKER_IDS, objID)) if not isPanel: #Append biomuta's predicates if gene symbol exists in biomuta's list as well self.addMutationSpecificInformation(objID, predicates, mutationStatements) # Disabled because it causes the ingest to take multiple hours instead of just 1 hour # See CA-1434 (kelly 2016-12-06) # Re-enabled to because added blob storage to save existing queries. Only the first query might take 2 hours. testpred = self.addExternaResourcesInformation(objID, predicates, idDataSource) predicates = testpred #Add frequencies for biomarker associated with biomarker type (Gene, Protein, etc...) #Update biomarker, if biomuta was added, biomuta predicates will be updated as well self.updateBiomarker(obj, uri, predicates, context, statements) newBiomarkers[uri] = obj obj.reindexObject() except KeyError: pass # Connect elementals to their panels for uri, predicates in statements.items(): try: typeURI = predicates[_typeURI][0] if typeURI != _biomarkerTypeURI: continue biomarkerUID = newBiomarkers[uri].UID() panelURIs = predicates[_memberOfPanelPredicateURI] panels = self.findObjectsByIdentifiers(catalog, panelURIs) for panel in panels: current = [i.UID() for i in panel.members] current.append(biomarkerUID) panel.setMembers(current) except KeyError: pass # Add organ-specific information graph = ConjunctiveGraph() graph.parse(URLInputSource(bmoDataSource)) organStatements = self._parseRDF(graph) self.addOrganSpecificInformation(newBiomarkers, organStatements, normalizerFunction, catalog) # Update indicated organs: for biomarker in newBiomarkers.values(): biomarker.updatedIndicatedBodySystems() biomarker.reindexObject() # Publish as necessary for uri, predicates in statements.items(): if uri in newBiomarkers: biomarker = newBiomarkers[uri] if biomarker.qaState == 'Private': self.retractBiomarker(context, biomarker, predicates) else: self.publishBiomarker(context, biomarker, predicates) self.objects = [CreatedObject(i) for i in newBiomarkers.values()] return self.render and self.template() or len(self.objects)