def create_release(self):
     r = wdi_helpers.Release('Disease Ontology release {}'.format(self.date.strftime('%Y-%m-%d')),
                             'Release of the Disease Ontology', self.date.strftime('%Y-%m-%d'),
                             archive_url=self.version, edition_of_wdid='Q5282129',
                             pub_date=self.date.date().strftime('+%Y-%m-%dT%H:%M:%SZ'))
     wd_item_id = r.get_or_create(self.login)
     if wd_item_id:
         self.release = wd_item_id
     else:
         raise ValueError("unable to create release")
Esempio n. 2
0
 def create_release(self, login):
     self.release = wdi_helpers.Release('{} release {}'.format(self.NAME, self.edition),
                                        'Release of the {}'.format(self.NAME), self.edition,
                                        archive_url=self.version, edition_of_wdid=self.QID,
                                        pub_date=self.date.date().strftime('+%Y-%m-%dT%H:%M:%SZ'),
                                        sparql_endpoint_url=self.sparql_endpoint_url,
                                        mediawiki_api_url=self.mediawiki_api_url)
     wd_item_id = self.release.get_or_create(login)
     if wd_item_id:
         self.release_qid = wd_item_id
     else:
         raise ValueError("unable to create release")
Esempio n. 3
0
    def create_release(self):
        #  get information about ontology to create/get release
        ontology_label = Graph.get_item_label(self.ontology_qid)
        print(ontology_label)

        r = wdi_helpers.Release(
            '{} release {}'.format(ontology_label,
                                   self.date.strftime('%Y-%m-%d')),
            'Release of {}'.format(ontology_label),
            self.date.strftime('%Y-%m-%d'),
            archive_url=self.version,
            edition_of_wdid=self.ontology_qid,
            pub_date=self.date.date().strftime('+%Y-%m-%dT%H:%M:%SZ'))
        wd_item_id = r.get_or_create(self.login)
        if wd_item_id:
            self.release_qid = wd_item_id
        else:
            raise ValueError("unable to create release")
Esempio n. 4
0
def make_ref_source(source_doc, id_prop, identifier, login=None):
    """
    Reference is made up of:
    stated_in: if the source has a release #:
        release edition
        else, stated in the source
    link to id: link to identifier in source
    retrieved: only if source has no release #
    login: must be passed if you want to be able to create new release items

    :param source_doc:
    Example source_doc = {'_id': 'uniprot', 'timestamp': '20161006'}
    or source_doc = {'_id': 'ensembl', 'release': '86'}
    :param id_prop:
    :param identifier:
    :return:
    """
    source = source_doc['id']
    if source not in source_items:
        raise ValueError("Unknown source for reference creation: {}".format(source))
    assert id_prop.startswith("P")

    link_to_id = wdi_core.WDString(value=str(identifier), prop_nr=id_prop, is_reference=True)

    if "release" in source_doc:
        source_doc['release'] = str(source_doc['release'])
        title = "{} Release {}".format(source_doc['id'], source_doc['release'])
        description = "Release {} of {}".format(source_doc['release'], source_doc['id'])
        edition_of_wdid = source_items[source_doc['id']]
        release = wdi_helpers.Release(title, description, source_doc['release'],
                                      edition_of_wdid=edition_of_wdid).get_or_create(login)

        stated_in = wdi_core.WDItemID(value=release, prop_nr='P248', is_reference=True)
        reference = [stated_in, link_to_id]
    else:
        date_string = source_doc['timestamp']
        retrieved = datetime.strptime(date_string, "%Y%m%d")
        stated_in = wdi_core.WDItemID(value=source_items[source], prop_nr='P248', is_reference=True)
        retrieved = wdi_core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True)
        reference = [stated_in, retrieved, link_to_id]
    return reference
Esempio n. 5
0
    args = parser.parse_args()
    if not (args.protein or args.items):
        args.protein = args.items = True

    log_dir = args.log_dir if args.log_dir else "./logs"
    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)

    version_date = date_parse(args.interpro_date)
    version_num = args.interpro_version

    release = wdi_helpers.Release(
        title="InterPro Release {}".format(version_num),
        description="Release {} of the InterPro database & software".format(
            version_num),
        edition_of_wdid="Q3047275",
        edition=version_num,
        pub_date=version_date,
        archive_url="ftp://ftp.ebi.ac.uk/pub/databases/interpro/{}/".format(
            version_num))
    release_wdid = release.get_or_create(login)
    print("release_wdid: {}".format(release_wdid))

    if args.items:
        print("running item bot")
        ItemsBot.main(login,
                      release_wdid,
                      log_dir=log_dir,
                      run_one=args.run_one,
                      write=not args.dummy)
Esempio n. 6
0
def main(metadata, log_dir="./logs", fast_run=True, write=True):
    """
    Main function for creating/updating genes

    :param metadata: looks like: {"ensembl" : 84, "cpdb" : 31, "netaffy" : "na35", "ucsc" : "20160620", .. }
    :type metadata: dict
    :param log_dir: dir to store logs
    :type log_dir: str
    :param fast_run: use fast run mode
    :type fast_run: bool
    :param write: actually perform write
    :type write: bool
    :return: None
    """

    # login
    login = wdi_login.WDLogin(user=WDUSER, pwd=WDPASS)
    wdi_core.WDItemEngine.setup_logging(log_dir=log_dir,
                                        logger_name='WD_logger',
                                        log_name=log_name,
                                        header=json.dumps(__metadata__))

    # get all ids mappings
    entrez_wdid = wdi_helpers.id_mapper(PROPS['Entrez Gene ID'])
    wdid_entrez = {v: k for k, v in entrez_wdid.items()}
    homo_wdid = wdi_helpers.id_mapper(PROPS['HomoloGene ID'],
                                      return_as_set=True)
    wdid_homo = dict()
    for h**o, wdids in homo_wdid.items():
        for wdid in wdids:
            wdid_homo[wdid] = h**o
    entrez_homo = {
        wdid_entrez[wdid]: h**o
        for wdid, h**o in wdid_homo.items() if wdid in wdid_entrez
    }
    taxon_wdid = wdi_helpers.id_mapper(PROPS['NCBI Taxonomy ID'])

    # only do certain records
    mgd = MyGeneDownloader(
        q="_exists_:homologene AND type_of_gene:protein-coding",
        fields=','.join(['taxid', 'homologene', 'entrezgene']))
    docs, total = mgd.query()
    docs = list(tqdm(docs, total=total))
    records = HelperBot.tag_mygene_docs(docs, metadata)

    # group together all orthologs
    # d[taxid][entrezgene] = { set of entrezgene ids for orthologs }
    d = defaultdict(lambda: defaultdict(set))
    entrez_taxon = dict()  # keep this for the qualifier on the statements
    for doc in records:
        this_taxid = doc['taxid']['@value']
        this_entrez = doc['entrezgene']['@value']
        entrez_taxon[str(this_entrez)] = str(this_taxid)
        if str(this_entrez) not in entrez_wdid:
            continue
        for taxid, entrez in doc['homologene']['@value']['genes']:
            if taxid == 4932 and this_taxid == 559292:
                # ridiculous workaround because entrez has the taxid for the strain and homologene has it for the species
                # TODO: This needs to be fixed if you want to use other things that may have species/strains .. ?`
                continue
            if taxid != this_taxid and str(entrez) in entrez_wdid:
                d[str(this_taxid)][str(this_entrez)].add(str(entrez))

    print("taxid: # of genes  : {}".format({k: len(v) for k, v in d.items()}))

    homogene_ver = metadata['homologene']
    release = wdi_helpers.Release(
        "HomoloGene build{}".format(homogene_ver),
        "Version of HomoloGene",
        homogene_ver,
        edition_of_wdid='Q468215',
        archive_url='ftp://ftp.ncbi.nih.gov/pub/HomoloGene/build{}/'.format(
            homogene_ver)).get_or_create(login)

    reference = lambda homogeneid: [
        wdi_core.WDItemID(release, PROPS['stated in'], is_reference=True),
        wdi_core.WDExternalID(
            homogeneid, PROPS['HomoloGene ID'], is_reference=True)
    ]

    ec = 0
    for taxid, subd in tqdm(d.items()):
        for entrezgene, orthologs in tqdm(subd.items(), leave=False):
            try:
                do_item(entrezgene, orthologs, reference, entrez_homo,
                        entrez_taxon, taxon_wdid, entrez_wdid, login, write)
            except Exception as e:
                wdi_helpers.format_msg(entrezgene, PROPS['Entrez Gene ID'],
                                       None, str(e), type(e))
                ec += 1
        # clear the fast run store once we move on to the next taxon
        wdi_core.WDItemEngine.fast_run_store = []
        wdi_core.WDItemEngine.fast_run_container = None

    print("Completed succesfully with {} exceptions".format(ec))