Exemple #1
0
    def create_item(self, login):
        statements = [
            PBB_Core.WDExternalID(value=self.id,
                                  prop_nr=INTERPRO,
                                  references=[self.reference]),
            PBB_Core.WDItemID(value=self.type_wdid,
                              prop_nr="P279",
                              references=[self.reference])
        ]

        wd_item = PBB_Core.WDItemEngine(
            item_name=self.name,
            domain='interpro',
            data=statements,
            append_value=["P279"],
            fast_run=True,
            fast_run_base_filter=IPRTerm.fast_run_base_filter)
        wd_item.set_label(self.name, lang='en')
        for lang, description in self.lang_descr.items():
            wd_item.set_description(description, lang=lang)
        wd_item.set_aliases([self.short_name, self.id])

        PBB_Helpers.try_write(wd_item, self.id, INTERPRO, login)

        return wd_item
Exemple #2
0
    def create_relationships(self, login):
        try:
            # endpoint may not get updated in time?
            self.do_wdid_lookup()
        except KeyError as e:
            PBB_Core.WDItemEngine.log(
                "ERROR", format_msg(self.id, INTERPRO, None, str(e), type(e)))
            return

        statements = [
            PBB_Core.WDExternalID(value=self.id,
                                  prop_nr=INTERPRO,
                                  references=[self.reference])
        ]
        if self.parent:
            # subclass of
            statements.append(
                PBB_Core.WDItemID(value=self.parent_wdid,
                                  prop_nr='P279',
                                  references=[self.reference]))
        if self.contains:
            for c in self.contains_wdid:
                statements.append(
                    PBB_Core.WDItemID(value=c,
                                      prop_nr='P527',
                                      references=[self.reference]))  # has part
        if self.found_in:
            for f in self.found_in_wdid:
                statements.append(
                    PBB_Core.WDItemID(value=f,
                                      prop_nr='P361',
                                      references=[self.reference]))  # part of
        if len(statements) == 1:
            return

        wd_item = PBB_Core.WDItemEngine(
            wd_item_id=self.wdid,
            domain='interpro',
            data=statements,
            append_value=['P279', 'P527', 'P361'],
            fast_run=True,
            fast_run_base_filter=IPRTerm.fast_run_base_filter)

        PBB_Helpers.try_write(
            wd_item,
            self.id,
            INTERPRO,
            login,
            edit_summary="create/update subclass/has part/part of")
Exemple #3
0
def make_ref_source(source_doc, id_prop, identifier, login=None):
    """
    Reference is made up of:
    stated_in: if the source has a release #:
        release edition
        else, stated in the source
    link to id: link to identifier in source
    retrieved: only if source has no release #
    login: must be passed if you want to be able to create new release items

    :param source_doc:
    :param id_prop:
    :param identifier:
    :return:
    """
    # source_doc = {'_id': 'uniprot', 'timestamp': '20161006'}
    # source_doc = {'_id': 'ensembl', 'release': 86, 'timestamp': '20161005'}
    source = source_doc['_id']
    if source not in source_items:
        raise ValueError(
            "Unknown source for reference creation: {}".format(source))
    if id_prop not in prop_ids:
        raise ValueError(
            "Unknown id_prop for reference creation: {}".format(id_prop))

    link_to_id = PBB_Core.WDString(value=str(identifier),
                                   prop_nr=prop_ids[id_prop],
                                   is_reference=True)

    if "release" in source_doc:
        source_doc['release'] = str(source_doc['release'])
        title = "{} Release {}".format(source_doc['_id'],
                                       source_doc['release'])
        description = "Release {} of {}".format(source_doc['release'],
                                                source_doc['_id'])
        edition_of_wdid = source_items[source_doc['_id']]
        release = PBB_Helpers.Release(
            title,
            description,
            source_doc['release'],
            edition_of_wdid=edition_of_wdid).get_or_create(login)

        stated_in = PBB_Core.WDItemID(value=release,
                                      prop_nr='P248',
                                      is_reference=True)
        reference = [stated_in, link_to_id]
    else:
        date_string = source_doc['timestamp']
        retrieved = datetime.strptime(date_string, "%Y%m%d")
        stated_in = PBB_Core.WDItemID(value=source_items[source],
                                      prop_nr='P248',
                                      is_reference=True)
        retrieved = PBB_Core.WDTime(retrieved.strftime('+%Y-%m-%dT00:00:00Z'),
                                    prop_nr='P813',
                                    is_reference=True)
        reference = [stated_in, retrieved, link_to_id]
    return reference
Exemple #4
0
def create_uniprot_relationships(login, release_wdid, collection, taxon=None):
    # only do uniprot proteins that are already in wikidata
    if taxon:
        uniprot2wd = PBB_Helpers.id_mapper(UNIPROT, (("P703", taxon),))
        fast_run_base_filter = {UNIPROT: "", "P703": taxon}
    else:
        uniprot2wd = PBB_Helpers.id_mapper(UNIPROT)
        fast_run_base_filter = {UNIPROT: ""}

    cursor = collection.find({'_id': {'$in': list(uniprot2wd.keys())}}, no_cursor_timeout=True)
    for doc in tqdm(cursor, total=cursor.count()):
        uniprot_id = doc['_id']
        statements = []
        # uniprot ID. needed for PBB_core to find uniprot item
        # statements.append(PBB_Core.WDExternalID(value=uniprot_id, prop_nr=UNIPROT))

        ## References
        # stated in Interpro version XX.X
        ref_stated_in = PBB_Core.WDItemID(release_wdid, 'P248', is_reference=True)
        ref_ipr = PBB_Core.WDString("http://www.ebi.ac.uk/interpro/protein/{}".format(uniprot_id), "P854",
                                    is_reference=True)
        reference = [ref_stated_in, ref_ipr]

        if doc['subclass']:
            for f in doc['subclass']:
                statements.append(PBB_Core.WDItemID(value=IPRTerm.ipr2wd[f], prop_nr='P279', references=[reference]))
        if doc['has_part']:
            for hp in doc['has_part']:
                statements.append(PBB_Core.WDItemID(value=IPRTerm.ipr2wd[hp], prop_nr='P527', references=[reference]))

        if uniprot_id not in uniprot2wd:
            print("wdid_not_found " + uniprot_id + " " + uniprot2wd[uniprot_id])
            PBB_Core.WDItemEngine.log("ERROR", PBB_Helpers.format_msg(uniprot_id, UNIPROT, None, "wdid_not_found"))

        wd_item = PBB_Core.WDItemEngine(wd_item_id=uniprot2wd[uniprot_id], domain="proteins", data=statements,
                                        fast_run=True, fast_run_base_filter=fast_run_base_filter,
                                        append_value=["P279", "P527", "P361"])

        if wd_item.create_new_item:
            raise ValueError("something bad happened")
        PBB_Helpers.try_write(wd_item, uniprot_id, INTERPRO, login, edit_summary="add/update family and/or domains")

    cursor.close()
Exemple #5
0
def run_encodes(login, records):
    # get all entrez gene id -> wdid mappings, where found in taxon is this strain
    gene_wdid_mapping = PBB_Helpers.id_mapper(
        "P351", (("P703", strain_info['organism_wdid']), ))

    # get all ensembl protein id -> wdid mappings, where found in taxon is this strain
    protein_wdid_mapping = PBB_Helpers.id_mapper(
        "P705", (("P703", strain_info['organism_wdid']), ))

    for record in tqdm(records, desc=strain_info['organism_name']):
        entrez_gene = str(record['entrezgene']['@value'])
        if entrez_gene not in gene_wdid_mapping:
            PBB_Core.WDItemEngine.log(
                "ERROR",
                format_msg(record['_id']['@value'], "gene_not_found", None,
                           ENTREZ_PROP))
            continue
        gene_qid = gene_wdid_mapping[entrez_gene]
        protein_qid = protein_wdid_mapping[record['ensembl']['@value']
                                           ['protein']]
        gene_encodes_statement(gene_qid, protein_qid, 'ncbi_gene', entrez_gene,
                               record['ensembl']['@source'], login)
Exemple #6
0
def main(log_dir="./logs", run_id=None):
    if run_id is None:
        run_id = datetime.now().strftime('%Y%m%d_%H:%M')
    __metadata__['run_id'] = run_id
    __metadata__['timestamp'] = str(datetime.now())

    log_name = 'YeastBot_gene-{}.log'.format(run_id)
    __metadata__['log_name'] = log_name
    __metadata__['sources'] = get_source_versions()

    records = get_data_from_mygene()

    login = PBB_login.WDLogin(user=WDUSER, pwd=WDPASS)

    chrom_wdid = PBB_Helpers.id_mapper("P2249", (("P703", "Q27510868"), ))

    if PBB_Core.WDItemEngine.logger is not None:
        PBB_Core.WDItemEngine.logger.handles = []
    PBB_Core.WDItemEngine.setup_logging(log_dir=log_dir,
                                        log_name=log_name,
                                        header=json.dumps(__metadata__))
    run(login, records, chrom_wdid)
Exemple #7
0
def main(version_info, log_dir="./logs", run_id=None, mongo_uri="mongodb://localhost:27017",
         mongo_db="wikidata_src", mongo_coll="interpro_protein", taxon=None):
    # data sources
    db = MongoClient(mongo_uri)[mongo_db]
    collection = db[mongo_coll]

    if run_id is None:
        run_id = datetime.now().strftime('%Y%m%d_%H:%M')
    if log_dir is None:
        log_dir = "./logs"
    __metadata__['run_id'] = run_id
    __metadata__['timestamp'] = str(datetime.now())

    login = PBB_login.WDLogin(user=WDUSER, pwd=WDPASS)

    # handle version_info. parsed from interpro xml file. looks like:
    # { "_id" : "INTERPRO", "dbname" : "INTERPRO", "file_date" : "03-NOV-16", "version" : "60.0", "entry_count" : "29700" }
    version = version_info['version']
    pub_date = date_parse(version_info['file_date'])
    release = PBB_Helpers.Release(title="InterPro Release {}".format(version),
                                  description="Release {} of the InterPro database & software".format(version),
                                  edition_of_wdid="Q3047275",
                                  edition=version,
                                  pub_date=pub_date,
                                  archive_url="ftp://ftp.ebi.ac.uk/pub/databases/interpro/{}/".format(version))
    release_wdid = release.get_or_create(login)
    __metadata__['release'] = {
        'InterPro': {'release': version, '_id': 'InterPro', 'wdid': release_wdid, 'timestamp': str(pub_date)}}

    log_name = '{}-{}.log'.format(__metadata__['name'], __metadata__['run_id'])
    if PBB_Core.WDItemEngine.logger is not None:
        PBB_Core.WDItemEngine.logger.handles = []
    PBB_Core.WDItemEngine.setup_logging(log_dir=log_dir, log_name=log_name, header=json.dumps(__metadata__))

    create_uniprot_relationships(login, release_wdid, collection, taxon=taxon)

    return os.path.join(log_dir, log_name)
Exemple #8
0
def wd_item_construction(record, strain_info, chrom_wdid, login):
    """
    generate pbb_core item object
    """

    # If the source is "entrez", the reference identifier to be used is "entrez_gene"
    # These are defined in HelperBot
    source_ref_id = {
        'Ensembl': 'ensembl_gene',
        'Entrez': 'entrez_gene',
        'Uniprot': 'uniprot'
    }

    def gene_item_statements():
        """
        construct list of referenced statements to past to PBB_Core Item engine
        """
        s = []

        ############
        # external IDs
        ############
        # will be used for reference statements
        external_ids = {
            'entrez_gene': str(record['entrezgene']['@value']),
            'ensembl_gene': record['ensembl']['@value']['gene'],
            'locus_tag': record['locus_tag']['@value']
        }

        # entrez gene id
        entrez_ref = make_ref_source(record['entrezgene']['@source'],
                                     'entrez_gene',
                                     external_ids['entrez_gene'])
        s.append(
            PBB_Core.WDString(external_ids['entrez_gene'],
                              PROPS['Entrez Gene ID'],
                              references=[entrez_ref]))

        # ensembl gene id
        ensembl_ref = make_ref_source(record['ensembl']['@source'],
                                      'ensembl_gene',
                                      external_ids['ensembl_gene'])
        s.append(
            PBB_Core.WDString(external_ids['ensembl_gene'],
                              PROPS['Ensembl Gene ID'],
                              references=[ensembl_ref]))

        # ncbi locus tag
        s.append(
            PBB_Core.WDString(external_ids['locus_tag'],
                              PROPS['NCBI Locus tag'],
                              references=[entrez_ref]))

        ############
        # statements with no referencable sources (make by hand, for now...)
        ############
        # subclass of gene
        s.append(
            PBB_Core.WDItemID('Q7187',
                              PROPS['subclass of'],
                              references=[ensembl_ref]))

        # found in taxon
        s.append(
            PBB_Core.WDItemID(strain_info['organism_wdid'],
                              PROPS['found in taxon'],
                              references=[ensembl_ref]))

        ############
        # genomic position: start, end, strand orientation, chromosome
        ############
        genomic_pos_value = record['genomic_pos']['@value']
        genomic_pos_source = record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['_id']]
        genomic_pos_ref = make_ref_source(genomic_pos_source,
                                          genomic_pos_id_prop,
                                          external_ids[genomic_pos_id_prop])

        # create chromosome qualifier
        chrom_genomeid = strain_info['chrom_genomeid_map'][
            genomic_pos_value['chr']]
        rs_chrom = PBB_Core.WDString(chrom_genomeid,
                                     'P2249',
                                     is_qualifier=True)  # Refseq Genome ID

        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value[
            'strand'] == 1 else 'Q22809711'
        s.append(
            PBB_Core.WDItemID(strand_orientation,
                              PROPS['strand orientation'],
                              references=[genomic_pos_ref]))
        # genomic start and end
        s.append(
            PBB_Core.WDString(str(int(genomic_pos_value['start'])),
                              PROPS['genomic start'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))
        s.append(
            PBB_Core.WDString(str(int(genomic_pos_value['end'])),
                              PROPS['genomic end'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))
        # chromosome
        chr_genomic_id = strain_info['chrom_genomeid_map'][
            genomic_pos_value['chr']]
        s.append(
            PBB_Core.WDItemID(chrom_wdid[chr_genomic_id],
                              PROPS['chromosome'],
                              references=[genomic_pos_ref]))

        return s

    item_name = '{} {}'.format(record['name']['@value'],
                               record['ensembl']['@value']['gene'])
    item_description = '{} gene found in {}'.format(
        strain_info['organism_type'], strain_info['organism_name'])

    statements = gene_item_statements()
    wd_item_gene = PBB_Core.WDItemEngine(item_name=item_name,
                                         domain='genes',
                                         data=statements,
                                         append_value=[PROPS['subclass of']],
                                         fast_run=True,
                                         fast_run_base_filter={
                                             PROPS['Entrez Gene ID']:
                                             '',
                                             PROPS['found in taxon']:
                                             strain_info['organism_wdid']
                                         })
    wd_item_gene.set_label(item_name)
    wd_item_gene.set_description(item_description, lang='en')
    wd_item_gene.set_aliases(
        [record['symbol']['@value'], record['locus_tag']['@value']])

    PBB_Helpers.try_write(wd_item_gene, record['_id']['@value'], ENTREZ_PROP,
                          login)
Exemple #9
0
def main(version_info,
         log_dir="./logs",
         run_id=None,
         mongo_uri="mongodb://localhost:27017",
         mongo_db="wikidata_src",
         mongo_coll="interpro",
         debug=False):
    # data sources
    db = MongoClient(mongo_uri)[mongo_db]
    interpro_coll = db[mongo_coll]

    if run_id is None:
        run_id = datetime.now().strftime('%Y%m%d_%H:%M')
    if log_dir is None:
        log_dir = "./logs"
    __metadata__['run_id'] = run_id
    __metadata__['timestamp'] = str(datetime.now())

    login = PBB_login.WDLogin(user=WDUSER, pwd=WDPASS)

    # handle version_info. parsed from interpro xml file. looks like:
    # { "_id" : "INTERPRO", "dbname" : "INTERPRO", "file_date" : "03-NOV-16", "version" : "60.0", "entry_count" : "29700" }
    version = version_info['version']
    pub_date = date_parse(version_info['file_date'])
    release = PBB_Helpers.Release(
        title="InterPro Release {}".format(version),
        description="Release {} of the InterPro database & software".format(
            version),
        edition_of_wdid="Q3047275",
        edition=version,
        pub_date=pub_date,
        archive_url="ftp://ftp.ebi.ac.uk/pub/databases/interpro/{}/".format(
            version))
    release_wdid = release.get_or_create(login)
    __metadata__['release'] = {
        'InterPro': {
            'release': version,
            '_id': 'InterPro',
            'wdid': release_wdid,
            'timestamp': str(pub_date)
        }
    }

    log_name = '{}-{}.log'.format(__metadata__['name'], __metadata__['run_id'])
    if PBB_Core.WDItemEngine.logger is not None:
        PBB_Core.WDItemEngine.logger.handles = []
    PBB_Core.WDItemEngine.setup_logging(log_dir=log_dir,
                                        log_name=log_name,
                                        header=json.dumps(__metadata__))

    # create/update all interpro items
    terms = []
    cursor = interpro_coll.find(no_cursor_timeout=True)
    for n, doc in tqdm(enumerate(cursor), total=cursor.count()):
        doc['release_wdid'] = release_wdid
        term = IPRTerm(**doc)
        term.create_item(login)
        terms.append(term)
        if debug and n > 100:
            break
    cursor.close()

    # create/update interpro item relationships
    IPRTerm.refresh_ipr_wd()
    for term in tqdm(terms):
        term.create_relationships(login)

    return os.path.join(log_dir, log_name)
Exemple #10
0
def protein_item(record, strain_info, gene_qid, go_wdid_mapping, login,
                 add_pubmed):
    """
    generate pbb_core item object
    """

    item_name = '{} {}'.format(record['name']['@value'],
                               record['ensembl']['@value']['protein'])
    item_description = '{} protein found in {}'.format(
        strain_info['organism_type'], strain_info['organism_name'])

    s = []

    ############
    # external IDs
    ############
    # will be used for reference statements
    external_ids = {
        'entrez_gene': str(record['entrezgene']['@value']),
        'ensembl_protein': record['ensembl']['@value']['protein'],
        'ensembl_gene': record['ensembl']['@value']['gene'],
        'refseq_protein': record['refseq']['@value']['protein'],
        'uniprot': record['uniprot']['@value']['Swiss-Prot']
    }

    # ensembl protein id
    ensembl_ref = make_ref_source(record['ensembl']['@source'],
                                  'ensembl_protein',
                                  external_ids['ensembl_protein'])
    s.append(
        PBB_Core.WDString(external_ids['ensembl_protein'],
                          'P705',
                          references=[ensembl_ref]))
    # refseq protein id
    refseq_ref = make_ref_source(record['refseq']['@source'], 'refseq_protein',
                                 external_ids['refseq_protein'])
    s.append(
        PBB_Core.WDString(external_ids['refseq_protein'],
                          'P637',
                          references=[refseq_ref]))
    # uniprot id
    uniprot_ref = make_ref_source(record['uniprot']['@source'], 'uniprot',
                                  external_ids['uniprot'])
    s.append(
        PBB_Core.WDString(external_ids['uniprot'],
                          'P352',
                          references=[uniprot_ref]))

    ############
    # GO terms
    # TODO: https://www.wikidata.org/wiki/Q3460832
    ############

    preprocess_go(record)
    print(record)
    go_source = record['go']['@source']
    go_id_prop = source_ref_id[go_source['_id']]
    reference = make_ref_source(go_source, go_id_prop,
                                external_ids[go_id_prop])
    for go_level, go_records in record['go']['@value'].items():
        level_wdid = go_props[go_level]
        for go_record in go_records:
            go_wdid = go_wdid_mapping[go_record['id']]
            evidence_wdid = go_evidence_codes[go_record['evidence']]
            evidence_statement = PBB_Core.WDItemID(value=evidence_wdid,
                                                   prop_nr='P459',
                                                   is_qualifier=True)
            this_reference = copy.deepcopy(reference)
            if add_pubmed:
                for pubmed in go_record['pubmed']:
                    pmid_wdid = PBB_Helpers.PubmedStub(pubmed).create(login)
                    this_reference.append(
                        PBB_Core.WDItemID(pmid_wdid, 'P248',
                                          is_reference=True))
            s.append(
                PBB_Core.WDItemID(go_wdid,
                                  level_wdid,
                                  references=[this_reference],
                                  qualifiers=[evidence_statement]))

    ############
    # statements with no referencable sources (make by hand, for now...)
    ############
    # subclass of protein
    s.append(PBB_Core.WDItemID('Q8054', 'P279', references=[ensembl_ref]))

    # found in taxon
    s.append(
        PBB_Core.WDItemID(strain_info['organism_wdid'],
                          'P703',
                          references=[ensembl_ref]))

    # encodes gene
    s.append(PBB_Core.WDItemID(gene_qid, 'P702', references=[ensembl_ref]))

    try:
        wd_item_protein = PBB_Core.WDItemEngine(
            item_name=item_name,
            domain='proteins',
            data=s,
            append_value=['P279'],
            fast_run=True,
            fast_run_base_filter={
                'P352': '',
                'P703': strain_info['organism_wdid']
            })
        wd_item_protein.set_label(item_name)
        wd_item_protein.set_description(item_description, lang='en')
        wd_item_protein.set_aliases(
            [record['symbol']['@value'], record['locus_tag']['@value']])
    except Exception as e:
        print(e)
        PBB_Core.WDItemEngine.log(
            "ERROR",
            format_msg(record['entrezgene']['@value'], str(e), None,
                       ENTREZ_PROP))
        return

    try_write(wd_item_protein, record['entrezgene']['@value'], 'P351', login)
Exemple #11
0
 def refresh_ipr_wd(cls):
     cls.ipr2wd = PBB_Helpers.id_mapper(INTERPRO)
Exemple #12
0
class IPRTerm:
    """
    Represents one interproscan term/item

    {'children': ['IPR020635'],
     'contains': ['IPR001824', 'IPR002011', 'IPR008266', 'IPR017441'],
     'description': 'InterPro Domain',
     'found_in': ['IPR009136','IPR012234','IPR020777'],
     'id': 'IPR001245',
     'name': 'Serine-threonine/tyrosine-protein kinase catalytic domain',
     'parent': 'IPR000719',
     'short_name': 'Ser-Thr/Tyr_kinase_cat_dom',
     'type': 'Domain',
     'type_wdid': 'Q898273'}

    """
    fast_run_base_filter = {INTERPRO: ''}
    ipr2wd = PBB_Helpers.id_mapper(INTERPRO)

    type2desc = {
        "Active_site": "InterPro Active Site",
        "Binding_site": "InterPro Binding Site",
        "Conserved_site": "InterPro Conserved Site",
        "Domain": "InterPro Domain",
        "Family": "InterPro Family",
        "PTM": "InterPro PTM",
        "Repeat": "InterPro Repeat"
    }
    type2wdid = {
        "Active_site": "Q423026",  # Active site
        "Binding_site": "Q616005",  # Binding site
        "Conserved_site": "Q7644128",  # Supersecondary_structure
        "Domain": "Q898273",  # Protein domain
        "Family": "Q417841",  # Protein family
        "PTM": "Q898362",  # Post-translational modification
        "Repeat": "Q3273544"
    }  # Structural motif

    def __init__(self,
                 name=None,
                 short_name=None,
                 id=None,
                 parent=None,
                 children=None,
                 contains=None,
                 found_in=None,
                 type=None,
                 description=None,
                 release_wdid=None,
                 **kwargs):
        self.name = name
        self.short_name = short_name
        self.id = id
        self.wdid = None
        self.parent = parent  # subclass of (P279)
        self.parent_wdid = None
        self.children = children  # not added to wd
        self.children_wdid = None
        self.contains = contains  # has part (P527)
        self.contains_wdid = None
        self.found_in = found_in  # part of (P361)
        self.found_in_wdid = None
        self.type = type
        self.type_wdid = IPRTerm.type2wdid[
            self.type]  # subclass of (from type2wdid)
        self.description = description
        if self.description is None and self.type:
            self.description = IPRTerm.type2desc[self.type]
        self.lang_descr = {'en': self.description}
        self.release_wdid = release_wdid
        self.reference = None
        self.create_reference()

    def __repr__(self):
        return '{}: {}'.format(self.id, self.name)

    def __str__(self):
        return '{}: {}'.format(self.id, self.name)

    @classmethod
    def refresh_ipr_wd(cls):
        cls.ipr2wd = PBB_Helpers.id_mapper(INTERPRO)

    def do_wdid_lookup(self):
        # this can only be done after all items have been created
        self.wdid = IPRTerm.ipr2wd[self.id]
        if self.parent:
            self.parent_wdid = IPRTerm.ipr2wd[self.parent]
        # children aren't added (reverse of parent relationship)
        if self.contains:
            self.contains_wdid = [IPRTerm.ipr2wd[x] for x in self.contains]
        if self.found_in:
            self.found_in_wdid = [IPRTerm.ipr2wd[x] for x in self.found_in]

    def create_reference(self):
        """ Create wikidata references for interpro
        This same reference will be used for everything. Except for a ref to the interpro item itself
        """
        # stated in Interpro version XX.X
        ref_stated_in = PBB_Core.WDItemID(self.release_wdid,
                                          'P248',
                                          is_reference=True)
        ref_ipr = PBB_Core.WDString(self.id, INTERPRO,
                                    is_reference=True)  # interpro ID
        self.reference = [ref_stated_in, ref_ipr]

    def create_item(self, login):
        statements = [
            PBB_Core.WDExternalID(value=self.id,
                                  prop_nr=INTERPRO,
                                  references=[self.reference]),
            PBB_Core.WDItemID(value=self.type_wdid,
                              prop_nr="P279",
                              references=[self.reference])
        ]

        wd_item = PBB_Core.WDItemEngine(
            item_name=self.name,
            domain='interpro',
            data=statements,
            append_value=["P279"],
            fast_run=True,
            fast_run_base_filter=IPRTerm.fast_run_base_filter)
        wd_item.set_label(self.name, lang='en')
        for lang, description in self.lang_descr.items():
            wd_item.set_description(description, lang=lang)
        wd_item.set_aliases([self.short_name, self.id])

        PBB_Helpers.try_write(wd_item, self.id, INTERPRO, login)

        return wd_item

    def create_relationships(self, login):
        try:
            # endpoint may not get updated in time?
            self.do_wdid_lookup()
        except KeyError as e:
            PBB_Core.WDItemEngine.log(
                "ERROR", format_msg(self.id, INTERPRO, None, str(e), type(e)))
            return

        statements = [
            PBB_Core.WDExternalID(value=self.id,
                                  prop_nr=INTERPRO,
                                  references=[self.reference])
        ]
        if self.parent:
            # subclass of
            statements.append(
                PBB_Core.WDItemID(value=self.parent_wdid,
                                  prop_nr='P279',
                                  references=[self.reference]))
        if self.contains:
            for c in self.contains_wdid:
                statements.append(
                    PBB_Core.WDItemID(value=c,
                                      prop_nr='P527',
                                      references=[self.reference]))  # has part
        if self.found_in:
            for f in self.found_in_wdid:
                statements.append(
                    PBB_Core.WDItemID(value=f,
                                      prop_nr='P361',
                                      references=[self.reference]))  # part of
        if len(statements) == 1:
            return

        wd_item = PBB_Core.WDItemEngine(
            wd_item_id=self.wdid,
            domain='interpro',
            data=statements,
            append_value=['P279', 'P527', 'P361'],
            fast_run=True,
            fast_run_base_filter=IPRTerm.fast_run_base_filter)

        PBB_Helpers.try_write(
            wd_item,
            self.id,
            INTERPRO,
            login,
            edit_summary="create/update subclass/has part/part of")