def create_gp_statements(self): """ Create genomic_pos start stop orientation plus chromosome qualifiers :return: """ genomic_pos_value = self.record['genomic_pos']['@value'][0] genomic_pos_source = self.record['genomic_pos']['@source'] genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']] assert isinstance(self.external_ids[genomic_pos_id_prop], str) external_id = self.external_ids[genomic_pos_id_prop] genomic_pos_ref = make_ref_source(genomic_pos_source, PROPS[genomic_pos_id_prop], external_id, login=self.login) s = [] # create qualifier for chromosome (which has the refseq ID on it) chr_refseq = genomic_pos_value['chr'] chr_qid = self.refseq_qid_chrom[chr_refseq] qualifiers = [wdi_core.WDItemID(value=chr_qid, prop_nr=PROPS['chromosome'], is_qualifier=True)] # strand orientation strand_orientation = 'Q22809680' if genomic_pos_value['strand'] == 1 else 'Q22809711' s.append(wdi_core.WDItemID(strand_orientation, PROPS['strand orientation'], references=[genomic_pos_ref], qualifiers=qualifiers)) # genomic start and end s.append(wdi_core.WDString(str(int(genomic_pos_value['start'])), PROPS['genomic start'], references=[genomic_pos_ref], qualifiers=qualifiers)) s.append(wdi_core.WDString(str(int(genomic_pos_value['end'])), PROPS['genomic end'], references=[genomic_pos_ref], qualifiers=qualifiers)) return s
def create_main_statements(self): if not self.reference: self.create_reference() self.s_main = [] for relationship in self.relationships: if relationship[0] not in self.do_graph.edge_prop: # s = "unknown relationship: {}".format(relationship[0]) # msg = wdi_helpers.format_msg(self.doid, 'P699', None, s, msg_type="unknown relationship") # wdi_core.WDItemEngine.log("WARNING", msg) continue if relationship[1] not in self.do_graph.purl_wdid: s = "unknown obj: {}".format(relationship[1]) msg = wdi_helpers.format_msg(self.doid, 'P699', None, s, msg_type="unknown obj") wdi_core.WDItemEngine.log("WARNING", msg) continue self.s_main.append(wdi_core.WDItemID(self.do_graph.purl_wdid[relationship[1]], self.do_graph.edge_prop[relationship[0]], references=[self.reference])) # add http://purl.obolibrary.org/obo/, exact match self.s_main.append(wdi_core.WDString(self.id, PROPS['exact match'], references=[self.reference])) if self.doid != "DOID:4": # instance of disease self.s_main.append(wdi_core.WDItemID('Q12136', PROPS['instance of'], references=[self.reference])) miriam_ref = [wdi_core.WDItemID(value="Q16335166", prop_nr='P248', is_reference=True), wdi_core.WDUrl("http://www.ebi.ac.uk/miriam/main/collections/MIR:00000233", 'P854', is_reference=True)] self.s_main.append(wdi_core.WDString("http://identifiers.org/doid/{}".format(self.doid), PROPS['exact match'], references=[miriam_ref]))
def to_wikidata(self): refs = [[ wdi_core.WDItemID(value='Q278487', prop_nr='P248', is_reference=True), # stated in wdi_core.WDExternalID(value=self.cid, prop_nr='P662', is_reference=True), # source element wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved ]] elements = {'P662': self.cid[3:]} data = [] for k, v in elements.items(): if not v: continue print('{}:'.format(k), v) if isinstance(v, list) or isinstance(v, set): for x in v: data.append( wdi_core.WDString(prop_nr=k, value=x, references=refs)) else: data.append( wdi_core.WDString(prop_nr=k, value=v, references=refs)) return data
def create_statements(self): """ create statements common to all genes """ s = [] if not self.entrez_ref: self.create_ref_sources() ############ # ID statements (required) ############ s.append(wdi_core.WDString(self.external_ids['Entrez Gene ID'], PROPS['Entrez Gene ID'], references=[self.entrez_ref])) # optional ID statements if self.ensembl_ref: for ensembl_gene_id in self.external_ids['Ensembl Gene ID']: s.append(wdi_core.WDString(ensembl_gene_id, PROPS['Ensembl Gene ID'], references=[self.ensembl_ref])) if 'Ensembl Transcript ID' in self.external_ids: for id in self.external_ids['Ensembl Transcript ID']: s.append(wdi_core.WDString(id, PROPS['Ensembl Transcript ID'], references=[self.ensembl_ref])) key = 'RefSeq RNA ID' if key in self.external_ids: for id in self.external_ids[key]: s.append(wdi_core.WDString(id, PROPS[key], references=[self.entrez_ref])) for key in ['NCBI Locus tag', 'Saccharomyces Genome Database ID', 'Mouse Genome Informatics ID', 'MGI Gene Symbol', 'HomoloGene ID', 'Rat Genome Database ID', 'FlyBase Gene ID', 'Wormbase Gene ID', 'ZFIN Gene ID', 'cytogenetic location']: if key in self.external_ids: s.append(wdi_core.WDString(self.external_ids[key], PROPS[key], references=[self.entrez_ref])) ############ # Gene statements ############ # if there is an ensembl ID, this comes from ensembl, otherwise, entrez gene_ref = self.ensembl_ref if self.ensembl_ref is not None else self.entrez_ref # instance of gene, ncRNA.. etc type_of_gene = self.record['type_of_gene']['@value'] assert type_of_gene in type_of_gene_map, "unknown type of gene: {}".format(type_of_gene) self.type_of_gene = type_of_gene # "protein-coding gene" will be instance of "gene" s.append(wdi_core.WDItemID(type_of_gene_map[type_of_gene], PROPS['instance of'], references=[gene_ref])) if type_of_gene not in {'protein-coding', 'pseudo', 'other', 'unknown'}: # make sure we add instance of "gene" as well s.append(wdi_core.WDItemID("Q7187", PROPS['instance of'], references=[gene_ref])) # found in taxon s.append(wdi_core.WDItemID(self.organism_info['wdid'], PROPS['found in taxon'], references=[gene_ref])) return s
def make_go_ref(curator, pmid_map, external_id, uniprot_id, evidence_wdid, retrieved, pmid=None): # initialize this reference for this evidence code with retrieved reference = [ wdi_core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) ] # stated in pmid if pmid: if pmid in pmid_map: reference.append( wdi_core.WDItemID(pmid_map[pmid], 'P248', is_reference=True)) else: raise ValueError( "article item for pmid {} not found. skipping item".format( pmid)) # stated in uniprot-GOA Q28018111 reference.append(wdi_core.WDItemID('Q28018111', 'P248', is_reference=True)) # curator if curator in curators_wdids: reference.append( wdi_core.WDItemID(curators_wdids[curator], 'P1640', is_reference=True)) # curator-specific reference URLs # If curator is SGD, add external ID to ref if curator in curator_ref and curator_ref[curator] in external_id: reference.append( wdi_core.WDString(external_id[curator_ref[curator]], PROPS[curator_ref[curator]], is_reference=True)) else: raise ValueError("curator not found: {}".format(curator)) # reference URL # ref_url = "http://www.ebi.ac.uk/QuickGO/GAnnotation?protein={}".format(uniprot_id) ref_url = "http://www.ebi.ac.uk/QuickGO/annotations?protein={}&geneProductId=UniProtKB:{}".format( uniprot_id, uniprot_id) reference.append(wdi_core.WDString(ref_url, 'P854', is_reference=True)) # ref determination method reference.append( wdi_core.WDItemID(evidence_wdid, 'P459', is_reference=True)) return reference
def test_new_item_creation(self): data = [ wdi_core.WDString(value='test', prop_nr='P716'), wdi_core.WDString(value='test1', prop_nr='P76') ] item = wdi_core.WDItemEngine(item_name='dae', domain=None, data=data) pprint.pprint(item.get_wd_json_representation()) if not item.get_wd_json_representation(): raise ValueError
def create_statements(self): """ create statements common to all proteins """ s = [] ############ # ID statements # Required: uniprot (1) # Optional: OMIM (1?), Ensembl protein (0 or more), refseq protein (0 or more) ############ entrez_gene = self.external_ids['Entrez Gene ID'] uniprot_ref = make_ref_source(self.record['uniprot']['@source'], PROPS['UniProt ID'], self.external_ids['UniProt ID'], login=self.login) entrez_ref = make_ref_source(self.record['entrezgene']['@source'], PROPS['Entrez Gene ID'], self.external_ids['Entrez Gene ID'], login=self.login) s.append(wdi_core.WDString(self.external_ids['UniProt ID'], PROPS['UniProt ID'], references=[uniprot_ref])) for key in ['Saccharomyces Genome Database ID']: if key in self.external_ids: s.append(wdi_core.WDString(self.external_ids[key], PROPS[key], references=[entrez_ref])) key = 'Ensembl Protein ID' if key in self.external_ids: for id in self.external_ids[key]: ref = make_ref_source(self.record['ensembl']['@source'], PROPS[key], id, login=self.login) s.append(wdi_core.WDString(id, PROPS[key], references=[ref])) key = 'RefSeq Protein ID' if key in self.external_ids: for id in self.external_ids[key]: ref = make_ref_source(self.record['refseq']['@source'], PROPS['Entrez Gene ID'], entrez_gene, login=self.login) s.append(wdi_core.WDString(id, PROPS[key], references=[ref])) ############ # Protein statements ############ # instance of protein s.append(wdi_core.WDItemID("Q8054", PROPS['instance of'], references=[uniprot_ref])) # found in taxon s.append(wdi_core.WDItemID(self.organism_info['wdid'], PROPS['found in taxon'], references=[uniprot_ref])) # encoded by s.append(wdi_core.WDItemID(self.gene_wdid, PROPS['encoded by'], references=[uniprot_ref])) return s
def create_gp_statements_chr(self): """ Create genomic_pos start stop orientation on a chromosome :return: """ genomic_pos_value = self.record['genomic_pos']['@value'] genomic_pos_source = self.record['genomic_pos']['@source'] genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']] genomic_pos_ref = make_ref_source( genomic_pos_source, PROPS[genomic_pos_id_prop], self.external_ids[genomic_pos_id_prop], login=self.login) # create qualifier for start/stop/orientation chrom_wdid = self.chr_num_wdid[genomic_pos_value['chr']] qualifiers = [ wdi_core.WDItemID(chrom_wdid, PROPS['chromosome'], is_qualifier=True) ] s = [] # strand orientation strand_orientation = 'Q22809680' if genomic_pos_value[ 'strand'] == 1 else 'Q22809711' s.append( wdi_core.WDItemID(strand_orientation, PROPS['strand orientation'], references=[genomic_pos_ref])) # genomic start and end s.append( wdi_core.WDString(str(int(genomic_pos_value['start'])), PROPS['genomic start'], references=[genomic_pos_ref], qualifiers=qualifiers)) s.append( wdi_core.WDString(str(int(genomic_pos_value['end'])), PROPS['genomic end'], references=[genomic_pos_ref], qualifiers=qualifiers)) # chromosome s.append( wdi_core.WDItemID(chrom_wdid, PROPS['chromosome'], references=[genomic_pos_ref])) return s
def createNCBITaxReference(ncbiTaxId, retrieved): refStatedIn = wdi_core.WDItemID(value="Q13711410", prop_nr="P248", is_reference=True) timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z") refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True) refNcbiTaxID = wdi_core.WDString(value=ncbiTaxId, prop_nr="P685", is_reference=True) ncbi_reference = [refStatedIn, refRetrieved, refNcbiTaxID] return ncbi_reference
def gene_expressed_in_organ_statements(bgee_gene_id: object, wikidata_gene_ids: list, wikidata_organ_ids: list) -> dict: """Get Wikidata gene id to Wikidata anatomic entity items dictionary. :param bgee_gene_id: the gene id used in Bgee such as an Ensembl identifier :param wikidata_gene_ids: the Wikidata gene identifiers that corresponds to bgee_gene_id :param wikidata_organ_ids: the ordered Wikidata anatomic entity items that bgee_gene_id is expressed :return: a dictionary where key = Wikidata gene id, value = Wikidata anatomic entity items, otherwise an empty dictionary """ reference = create_reference(bgee_gene_id) count_order = 1 result_dict = {} statements = [] for wikidata_organ_id in wikidata_organ_ids: #we consider that the organs ids are already ordered order = wdi_core.WDString(str(count_order), PROPS['series ordinal'], is_qualifier=True) count_order = count_order + 1 expressed_in_statement = wdi_core.WDItemID(wikidata_organ_id, PROPS['expressed in'], references=[reference], qualifiers=[order]) statements.append(expressed_in_statement) for wikidata_gene_id in wikidata_gene_ids: result_dict.update({wikidata_gene_id: statements}) return result_dict
def append_taxon_and_gender(cell_line_object, data_to_add_to_wikidata, list_of_taxons_of_origin, list_of_biological_sexes_of_source): cell_line_references = cell_line_object.references_in_wdi_format if list_of_taxons_of_origin: for taxon_of_origin in list_of_taxons_of_origin: if taxon_of_origin == "Unknow value": data_to_add_to_wikidata.append( wdi_core.WDString( value="Unknow value", prop_nr="P703", qualifiers=list_of_biological_sexes_of_source, references=cell_line_references, snak_type='somevalue')) else: data_to_add_to_wikidata.append( wdi_core.WDItemID( value=taxon_of_origin, prop_nr="P703", qualifiers=list_of_biological_sexes_of_source, references=cell_line_references)) return data_to_add_to_wikidata
def set_taxon(taxid): ncbiTaxon = json.loads( requests.get( "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=taxonomy&id={}&format=json" .format(taxid)).text) taxonitemStatements = [] ncbiTaxref = createNCBITaxReference(taxid, retrieved) ## instance of taxonitemStatements.append( wdi_core.WDItemID(value="Q16521", prop_nr="P31", references=[copy.deepcopy(ncbiTaxref)])) ## NCBI tax id taxonitemStatements.append( wdi_core.WDExternalID(value=taxid, prop_nr="P685", references=[copy.deepcopy(ncbiTaxref)])) ## scientificname scientificName = ncbiTaxon["result"][taxid]['scientificname'] taxonitemStatements.append( wdi_core.WDString(scientificName, prop_nr="P225", references=[copy.deepcopy(ncbiTaxref)])) item = wdi_core.WDItemEngine(data=taxonitemStatements) if item.get_label() == "": item.set_label(label=scientificName, lang="en") if item.get_label() != scientificName: item.set_aliases(aliases=[scientificName]) if item.get_description(lang="en") == "": item.set_description(description="strain of virus", lang="en") return item
def test_fastrun_label(self): data = [ wdi_core.WDItemID('Q544', 'P361'), wdi_core.WDItemID('Q7547', 'P398'), wdi_core.WDString('Mars', 'P2572') ] fast_run_base_filter = {'P361': 'Q544'} item = wdi_core.WDItemEngine(wd_item_id="Q2", data=data, fast_run=True, fast_run_base_filter=fast_run_base_filter) fast_run_container = wdi_core.WDItemEngine.fast_run_store[0] print(fast_run_container.prop_data) print(fast_run_container.statements) print(fast_run_container.prop_dt_map) print(fast_run_container.rev_lookup) assert item.get_label('en') == "Earth" assert item.fast_run_container.get_language_data("Q2", 'en', 'label')[0] == "Earth" assert item.fast_run_container.check_language_data( "Q2", ['not the Earth'], 'en', 'label') assert "Terra" in item.get_aliases() """
def get_item_statements(i_dict, type): if type is not None: statements = [wdi_core.WDItemID(cfg.object_ids.get(type), prop_nr="P1")] else: statements = [] for prop in i_dict.keys(): #ignore the wikidata q value and label #wikidata property statements will be imported later if prop in ["Q", "wiki", "label"]: continue elif prop == "is_related_to" and i_dict.get("label")[0] == "Koppel Ted": continue #this weird case that causes internal server error else: #get the information about the property pid = cfg.property_ids.get(prop) object = cfg.property_keys.index(prop) in cfg.object_prop #make statements for each value of the property for value in i_dict.get(prop): if object: #get the q identifier of the object if applicable qid = get_local_q(value) if qid is None: props_missed.append([i_dict.get("label")[0], pid, value]) continue state = wdi_core.WDItemID(qid, prop_nr=pid) else: if len(value) > 400: value = value[:395] + "..." state = wdi_core.WDString(value, prop_nr=pid) #add statement to the list statements.append(state) return statements
def remove_deprecated_statements(qid, frc, release_wdid, props, login): releases = set(INTERPRO_RELEASES.values()) | {'Q3047275'} releases = set(int(x.replace("Q", "")) for x in releases) # don't count this release releases.discard(int(release_wdid.replace("Q", ""))) # make sure we have these props in frc for prop in props: frc.write_required([wdi_core.WDString("fake value", prop)]) orig_statements = frc.reconstruct_statements(qid) s_dep = [] for s in orig_statements: if any( any(x.get_prop_nr() == 'P248' and x.get_value() in releases for x in r) for r in s.get_references()): setattr(s, 'remove', '') s_dep.append(s) if s_dep: print("-----") print(qid) print(orig_statements) print(s_dep) print([(x.get_prop_nr(), x.value) for x in s_dep]) print([(x.get_references()[0]) for x in s_dep]) wd_item = wdi_core.WDItemEngine(wd_item_id=qid, domain='none', data=s_dep, fast_run=False) wdi_helpers.try_write(wd_item, '', '', login, edit_summary="remove deprecated statements")
def create_item(self, label, description, ext_id, synonyms=None, type_of=None, force=False): if (not force) and ext_id in self.dbxref_qid: print("item already exists: {} {}".format(self.dbxref_qid[ext_id], ext_id)) return None s = [wdi_core.WDString(ext_id, self.dbxref_pid)] if type_of: s.append( wdi_core.WDItemID( self.dbxref_qid[type_of], self.uri_pid[ 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rdf_type']) ) item = self.item_engine(item_name=label, domain="foo", data=s, core_props=[self.dbxref_pid]) item.set_label(label) if description: item.set_description(description) if synonyms: item.set_aliases(synonyms) if self.write: item.write(self.login) self.dbxref_qid[ext_id] = item.wd_item_id
def createUniprotReference(uniprotId, retrieved): refStatedIn = wdi_core.WDItemID(value="Q905695", prop_nr="P248", is_reference=True) timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z") refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True) refUniprotID = wdi_core.WDString(value=uniprotId, prop_nr="P352", is_reference=True) reference = [refStatedIn, refRetrieved, refUniprotID] return reference
def create_reference(self): """ Create wikidata references for interpro This same reference will be used for everything. Except for a ref to the interpro item itself """ # stated in Interpro version XX.X ref_stated_in = wdi_core.WDItemID(self.release_wdid, 'P248', is_reference=True) ref_ipr = wdi_core.WDString(self.id, INTERPRO, is_reference=True) # interpro ID self.reference = [ref_stated_in, ref_ipr]
def getGeneQid(ncbiId, ncbi_reference): # Parent taxon gene_statements = [ wdi_core.WDString(value=ncbiId, prop_nr="P351", references=[copy.deepcopy(ncbi_reference)]) ] return wdi_core.WDItemEngine(data=gene_statements)
def createNCBIGeneReference(ncbiGeneId, retrieved): refStatedIn = wdi_core.WDItemID(value="Q20641742", prop_nr="P248", is_reference=True) timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z") refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True) refNcbiGeneID = wdi_core.WDString(value=ncbiGeneId, prop_nr="P351", is_reference=True) ncbi_reference = [refStatedIn, refRetrieved, refNcbiGeneID] return ncbi_reference
def add_entity(self, property_list, result): """ function to add pathway item to wikidata :param property_list: the list of property entries that will be made :param result: the data from Reactome :return: """ et = result['entitytype'] if et == 'COMP': wditem_value = 'Q420927' elif et == 'DS': wditem_value = 'Q47461827' elif et == 'CS': wditem_value = 'Q47461807' elif et == 'OS': wditem_value = 'Q49980450' else: return # P31 = instance of cpref = [] if result['cportal'] != '': cpref = self.create_complex_portal_reference(result['cportal']) if cpref: property_list["P31"] = [ wdi_core.WDItemID( value=wditem_value, prop_nr="P31", references=[copy.deepcopy(self.reference), cpref]) ] else: property_list["P31"] = [ wdi_core.WDItemID(value=wditem_value, prop_nr="P31", references=[copy.deepcopy(self.reference)]) ] # P2888 = exact match property_list["P2888"] = [ wdi_core.WDUrl(self.match_url, prop_nr='P2888', references=[copy.deepcopy(self.reference)]) ] # P703 = found in taxon property_list["P703"] = [ wdi_core.WDItemID(value=self.species, prop_nr='P703', references=[copy.deepcopy(self.reference)]) ] # P3937 = Reactome ID property_list["P3937"] = [ wdi_core.WDString(value=self.reactome_id, prop_nr='P3937') ] self.add_entity_parts(property_list, result)
def create_reference(iNaturalist_id, retrieved): refStatedIn = wdi_core.WDItemID(value=ITEMS['iNaturalist'], prop_nr=PROPS['stated in'], is_reference=True) refReferenceUrl = wdi_core.WDString( value= "https://www.dropbox.com/s/gdcjmre6v3h9k20/inaturalist_taxon_mapping.tgz?dl=0", prop_nr=PROPS['reference URL'], is_reference=True) timeStringNow = retrieved.strftime("+%Y-%m-%dT00:00:00Z") refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr=PROPS['retrieved'], is_reference=True) refiNaturalistID = wdi_core.WDString(value=iNaturalist_id, prop_nr=PROPS['iNaturalist Taxon ID'], is_reference=True) reference = [refStatedIn, refReferenceUrl, refRetrieved, refiNaturalistID] return reference
def create_reference(spl_url,source_type): timeStringNow = datetime.now().strftime("+%Y-%m-%dT00:00:00Z") archived_date = datetime.strptime('9/29/2015','%m/%d/%Y').strftime("+%Y-%m-%dT00:00:00Z") refStatedIn = wdi_core.WDItemID(value="Q73670648", prop_nr="P248", is_reference=True) refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True) refRetrieved2 = wdi_core.WDTime(archived_date, prop_nr="P2960", is_reference=True) refURL = wdi_core.WDUrl(value=spl_url, prop_nr="P854", is_reference=True) reftype = wdi_core.WDString(value=source_type, prop_nr="P958", is_reference=True) return [refStatedIn, refRetrieved, refRetrieved2, refURL, reftype]
def append_identifiers(wikidata_id, doi=None, pmid=None, pmcid=None, nioshtic=None): """ Adds identifiers such as DOI and NIOSHTIC to an existing Wikidata item. Reconciliation of identifiers across databases helps us root out duplicates. @param wikidata_id: the Q-number of the Wikidata item to edit @param doi: string; defaults to None @param pmid: string; defaults to None @param pmcid: string; defaults to None @param nioshtic: string; defaults to None """ data = [] if doi is not None: to_append = wdi_core.WDString(value=doi, prop_nr='P356') data.append(to_append) if pmid is not None: to_append = wdi_core.WDString(value=pmid, prop_nr='P698') data.append(to_append) if pmcid is not None: to_append = wdi_core.WDString(value=pmcid, prop_nr='P932') data.append(to_append) if nioshtic is not None: to_append = wdi_core.WDString(value=nioshtic, prop_nr='P2880') data.append(to_append) append_value = ['P356', 'P698', 'P932', 'P2880'] wikidata_item = wdi_core.WDItemEngine(wd_item_id=wikidata_id, data=data, append_value=append_value) wikidata_item.write(WIKI_SESSION) if doi is None: doi = '' if pmid is None: pmid = '' if pmcid is None: pmcid = '' if nioshtic is None: nioshtic = '' print(wikidata_id + '|' + doi + '|' + pmid + '|' + pmcid + '|' + nioshtic)
def to_wikidata(self): refs = [[ wdi_core.WDItemID(value='Q6593799', prop_nr='P248', is_reference=True), # stated in wdi_core.WDExternalID(value=self.unii, prop_nr='P652', is_reference=True), # source element wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved ]] print('UNII Main label is', self.label) elements = { 'P652': self.unii, 'P2017': self.smiles, 'P235': self.stdinchikey, 'P231': self.cas, 'P232': self.einecs, 'P1748': self.nci, 'P3345': self.rxnorm } if self.smiles and len(self.smiles) > 400: del elements['P2017'] data = [] for k, v in elements.items(): if not v: continue print('{}:'.format(k), v) if isinstance(v, list) or isinstance(v, set): for x in v: data.append( wdi_core.WDString(prop_nr=k, value=x, references=refs)) else: data.append( wdi_core.WDString(prop_nr=k, value=v, references=refs)) return data
def create_gp_statements(self): """ Create genomic_pos start stop orientation no chromosome :return: """ genomic_pos_value = self.record['genomic_pos']['@value'] genomic_pos_source = self.record['genomic_pos']['@source'] genomic_pos_id_prop = source_ref_id[genomic_pos_source['id']] genomic_pos_ref = make_ref_source( genomic_pos_source, PROPS[genomic_pos_id_prop], self.external_ids[genomic_pos_id_prop], login=self.login) s = [] # create qualifier for chromosome REFSEQ ID (not chrom item) chromosome = genomic_pos_value['chr'] rs_chrom = wdi_core.WDString(value=chromosome, prop_nr='P2249', is_qualifier=True) # strand orientation strand_orientation = 'Q22809680' if genomic_pos_value[ 'strand'] == 1 else 'Q22809711' s.append( wdi_core.WDItemID(strand_orientation, PROPS['strand orientation'], references=[genomic_pos_ref], qualifiers=[rs_chrom])) # genomic start and end s.append( wdi_core.WDString(str(int(genomic_pos_value['start'])), PROPS['genomic start'], references=[genomic_pos_ref], qualifiers=[rs_chrom])) s.append( wdi_core.WDString(str(int(genomic_pos_value['end'])), PROPS['genomic end'], references=[genomic_pos_ref], qualifiers=[rs_chrom])) return s
def test_new_item_creation(self): data = [ wdi_core.WDString(value='test', prop_nr='P1'), wdi_core.WDString(value='test1', prop_nr='P2'), wdi_core.WDMath("xxx", prop_nr="P3"), wdi_core.WDExternalID("xxx", prop_nr="P4"), wdi_core.WDItemID("Q123", prop_nr="P5"), wdi_core.WDTime('+%Y-%m-%dT%H:%M:%SZ', "P6"), wdi_core.WDUrl("http://www.google.com", "P7"), wdi_core.WDMonolingualText("xxx", prop_nr="P8"), wdi_core.WDQuantity(5, prop_nr="P9"), wdi_core.WDQuantity(5, upper_bound=9, lower_bound=2, prop_nr="P10"), wdi_core.WDCommonsMedia("xxx", prop_nr="P11"), wdi_core.WDGlobeCoordinate(1.2345, 1.2345, 12, prop_nr="P12"), wdi_core.WDGeoShape("xxx", prop_nr="P13"), wdi_core.WDProperty("P123", "P14") ] core_props = set(["P{}".format(x) for x in range(20)]) for d in data: item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=[d], core_props=core_props) assert item.get_wd_json_representation() item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=[d], core_props=set()) assert item.get_wd_json_representation() item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=data, core_props=core_props) assert item.get_wd_json_representation() item = wdi_core.WDItemEngine(item_name='dae', domain="szadf", data=data, core_props=set()) assert item.get_wd_json_representation()
def create_gp_statements_chr(self): """ Create genomic_pos start stop orientation on a chromosome :return: """ if not self.entrez_ref: self.create_ref_sources() genomic_pos_values = self.record['genomic_pos']['@value'] genomic_pos_source = self.record['genomic_pos']['@source'] if genomic_pos_source['id'] == "entrez": genomic_pos_ref = self.entrez_ref elif genomic_pos_source['id'] == "ensembl": genomic_pos_ref = self.ensembl_ref else: raise ValueError() if not genomic_pos_ref: return None all_chr = set([self.chr_num_wdid[x['chr']] for x in genomic_pos_values]) all_strand = set(['Q22809680' if x['strand'] == 1 else 'Q22809711' for x in genomic_pos_values]) s = [] for genomic_pos_value in genomic_pos_values: # create qualifier for start/stop/orientation chrom_wdid = self.chr_num_wdid[genomic_pos_value['chr']] qualifiers = [wdi_core.WDItemID(chrom_wdid, PROPS['chromosome'], is_qualifier=True)] # genomic start and end s.append(wdi_core.WDString(str(int(genomic_pos_value['start'])), PROPS['genomic start'], references=[genomic_pos_ref], qualifiers=qualifiers)) s.append(wdi_core.WDString(str(int(genomic_pos_value['end'])), PROPS['genomic end'], references=[genomic_pos_ref], qualifiers=qualifiers)) for chr in all_chr: s.append(wdi_core.WDItemID(chr, PROPS['chromosome'], references=[genomic_pos_ref])) if len(all_strand) == 1: # todo: not sure what to do if you have both orientations on the same chr strand_orientation = list(all_strand)[0] s.append(wdi_core.WDItemID(strand_orientation, PROPS['strand orientation'], references=[genomic_pos_ref])) return s
def get_list_of_biological_sexes(cell_line_object, list_of_taxons_of_origin): list_of_biological_sexes_of_source = [] cell_line_sexes_of_source = cell_line_object.cell_line_dump["SX"] for biological_sex_of_source in cell_line_sexes_of_source: if biological_sex_of_source == "Sex unspecified": list_of_biological_sexes_of_source.append( wdi_core.WDString( value="Unknow value", prop_nr="P21", is_qualifier=True, snak_type="somevalue", )) else: dict_for_human_sexes = { "Female": "Q6581072", "Male": "Q6581097", "Sex ambiguous": "Q1097630", } dict_for_non_human_sexes = { "Female": "Q43445", "Male": "Q44148", "Sex ambiguous": "Q28873047", } id_for_homo_sapiens = "Q15978631" if id_for_homo_sapiens in list_of_taxons_of_origin: biological_sex_id = dict_for_human_sexes[ biological_sex_of_source] else: if biological_sex_of_source == "Mixed sex": biological_sex_id = "Q43445" list_of_biological_sexes_of_source.append( wdi_core.WDItemID(value="Q44148", prop_nr="P21", is_qualifier=True)) else: biological_sex_id = dict_for_non_human_sexes[ biological_sex_of_source] list_of_biological_sexes_of_source.append( wdi_core.WDItemID(value=biological_sex_id, prop_nr="P21", is_qualifier=True)) return list_of_biological_sexes_of_source
def create_property(label, description, property_datatype, equiv_props, login): s = [ wdi_core.WDString(equiv_prop, get_quiv_prop_pid()) for equiv_prop in equiv_props ] item = localItemEngine(item_name=label, domain="foo", data=s) item.set_label(label) item.set_description(description) item.write(login, entity_type="property", property_datatype=property_datatype) return item