Exemple #1
0
    def create_item(self, login):
        statements = [
            PBB_Core.WDExternalID(value=self.id,
                                  prop_nr=INTERPRO,
                                  references=[self.reference]),
            PBB_Core.WDItemID(value=self.type_wdid,
                              prop_nr="P279",
                              references=[self.reference])
        ]

        wd_item = PBB_Core.WDItemEngine(
            item_name=self.name,
            domain='interpro',
            data=statements,
            append_value=["P279"],
            fast_run=True,
            fast_run_base_filter=IPRTerm.fast_run_base_filter)
        wd_item.set_label(self.name, lang='en')
        for lang, description in self.lang_descr.items():
            wd_item.set_description(description, lang=lang)
        wd_item.set_aliases([self.short_name, self.id])

        PBB_Helpers.try_write(wd_item, self.id, INTERPRO, login)

        return wd_item
Exemple #2
0
def gene_encodes_statement(gene_qid, protein_qid, id_prop, external_id, source,
                           login):
    """

    :param gene_qid:
    :param protein_qid:
    :param id_prop:
    :param external_id:
    :param source:
    :param login:
    :return:
    """
    ensembl_protein_reference = make_ref_source(source, id_prop, external_id)

    # gene
    gene_encodes = PBB_Core.WDItemID(value=protein_qid,
                                     prop_nr='P688',
                                     references=[ensembl_protein_reference])

    wd_item_protein = PBB_Core.WDItemEngine(wd_item_id=gene_qid,
                                            domain='genes',
                                            data=[gene_encodes],
                                            fast_run=True,
                                            fast_run_base_filter={
                                                'P351':
                                                '',
                                                'P703':
                                                strain_info['organism_wdid']
                                            })

    if wd_item_protein.create_new_item:
        raise ValueError("nooo!!")

    try_write(wd_item_protein, external_id, id_prop, login)
Exemple #3
0
    def create_relationships(self, ipr_wd):
        # ipr_wd is a dict ipr ID to wikidata ID mapping
        statements = [
            PBB_Core.WDExternalID(value=self.id,
                                  prop_nr=INTERPRO,
                                  references=[self.reference])
        ]
        if self.parent:
            statements.append(
                PBB_Core.WDItemID(value=ipr_wd[self.parent],
                                  prop_nr='P279',
                                  references=[self.reference]))  # subclass of
        if self.contains:
            for c in self.contains:
                statements.append(
                    PBB_Core.WDItemID(value=ipr_wd[c],
                                      prop_nr='P527',
                                      references=[self.reference]))  # has part
        if self.found_in:
            for f in self.found_in:
                statements.append(
                    PBB_Core.WDItemID(value=ipr_wd[f],
                                      prop_nr='P361',
                                      references=[self.reference]))  # part of
        if len(statements) == 1:
            return
        # write data
        item = PBB_Core.WDItemEngine(item_name=self.name,
                                     domain='interpro',
                                     data=statements,
                                     server=SERVER,
                                     append_value=["P279", "P527", "P361"])
        try:
            item.write(self.login)
        except WDApiError as e:
            print(e)
            PBB_Core.WDItemEngine.log(
                'ERROR',
                '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                .format(main_data_id=self.id,
                        exception_type=type(e),
                        message=e.__str__(),
                        wd_id=self.wd_item_id,
                        duration=datetime.now()))
            return

        PBB_Core.WDItemEngine.log(
            'INFO',
            '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
            .format(main_data_id=self.id,
                    exception_type='',
                    message='created interpro relationships: {}'.format([
                        (x.prop_nr, x.value) for x in statements
                    ]),
                    wd_id=item.wd_item_id,
                    duration=datetime.now()))
Exemple #4
0
    def create_relationships(self, login):
        try:
            # endpoint may not get updated in time?
            self.do_wdid_lookup()
        except KeyError as e:
            PBB_Core.WDItemEngine.log(
                "ERROR", format_msg(self.id, INTERPRO, None, str(e), type(e)))
            return

        statements = [
            PBB_Core.WDExternalID(value=self.id,
                                  prop_nr=INTERPRO,
                                  references=[self.reference])
        ]
        if self.parent:
            # subclass of
            statements.append(
                PBB_Core.WDItemID(value=self.parent_wdid,
                                  prop_nr='P279',
                                  references=[self.reference]))
        if self.contains:
            for c in self.contains_wdid:
                statements.append(
                    PBB_Core.WDItemID(value=c,
                                      prop_nr='P527',
                                      references=[self.reference]))  # has part
        if self.found_in:
            for f in self.found_in_wdid:
                statements.append(
                    PBB_Core.WDItemID(value=f,
                                      prop_nr='P361',
                                      references=[self.reference]))  # part of
        if len(statements) == 1:
            return

        wd_item = PBB_Core.WDItemEngine(
            wd_item_id=self.wdid,
            domain='interpro',
            data=statements,
            append_value=['P279', 'P527', 'P361'],
            fast_run=True,
            fast_run_base_filter=IPRTerm.fast_run_base_filter)

        PBB_Helpers.try_write(
            wd_item,
            self.id,
            INTERPRO,
            login,
            edit_summary="create/update subclass/has part/part of")
Exemple #5
0
    def do_umls_statement(self, doid, umls_list, dry_run=False):
        statements = []
        for umls in umls_list:
            statements.append(PBB_Core.WDExternalID(value=umls, prop_nr=self.UMLS_PROP, references=[self.reference]))

        wd_item = PBB_Core.WDItemEngine(wd_item_id=self.DOID2WD[doid], domain='disease', data=statements,
                                        append_value=[self.UMLS_PROP], fast_run=True,
                                        fast_run_base_filter=self.fast_run_base_filter)

        # no item creation should be done
        if wd_item.create_new_item:
            raise ValueError("something bad happpened")

        if dry_run:
            if wd_item.require_write:
                info_logger.info(" ".join(["item_updated", doid, wd_item.wd_item_id]))

        if wd_item.require_write and not dry_run:
            self.try_write(wd_item, doid)
Exemple #6
0
    def create_item(self):
        statements = [
            PBB_Core.WDExternalID(value=self.id,
                                  prop_nr=INTERPRO,
                                  references=[self.reference]),
            PBB_Core.WDItemID(value=IPRItem.type2subclass[self.type],
                              prop_nr="P279",
                              references=[self.reference])
        ]

        item = PBB_Core.WDItemEngine(item_name=self.name,
                                     domain='interpro',
                                     data=statements,
                                     server=SERVER)

        item.set_label(self.name)
        for lang, description in self.description.items():
            item.set_description(description, lang=lang)
        item.set_aliases([self.short_name, self.id])

        try:
            item.write(login=self.login)
        except WDApiError as e:
            print(e)
            PBB_Core.WDItemEngine.log(
                'ERROR',
                '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
                .format(main_data_id=self.id,
                        exception_type=type(e),
                        message=e.__str__(),
                        wd_id=self.wd_item_id,
                        duration=datetime.now()))
            return

        self.wd_item_id = item.wd_item_id
        PBB_Core.WDItemEngine.log(
            'INFO',
            '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
            .format(main_data_id=self.id,
                    exception_type='',
                    message='created/updated interpro item',
                    wd_id=item.wd_item_id,
                    duration=datetime.now()))
Exemple #7
0
def create_uniprot_relationships(login, release_wdid, collection, taxon=None):
    # only do uniprot proteins that are already in wikidata
    if taxon:
        uniprot2wd = PBB_Helpers.id_mapper(UNIPROT, (("P703", taxon),))
        fast_run_base_filter = {UNIPROT: "", "P703": taxon}
    else:
        uniprot2wd = PBB_Helpers.id_mapper(UNIPROT)
        fast_run_base_filter = {UNIPROT: ""}

    cursor = collection.find({'_id': {'$in': list(uniprot2wd.keys())}}, no_cursor_timeout=True)
    for doc in tqdm(cursor, total=cursor.count()):
        uniprot_id = doc['_id']
        statements = []
        # uniprot ID. needed for PBB_core to find uniprot item
        # statements.append(PBB_Core.WDExternalID(value=uniprot_id, prop_nr=UNIPROT))

        ## References
        # stated in Interpro version XX.X
        ref_stated_in = PBB_Core.WDItemID(release_wdid, 'P248', is_reference=True)
        ref_ipr = PBB_Core.WDString("http://www.ebi.ac.uk/interpro/protein/{}".format(uniprot_id), "P854",
                                    is_reference=True)
        reference = [ref_stated_in, ref_ipr]

        if doc['subclass']:
            for f in doc['subclass']:
                statements.append(PBB_Core.WDItemID(value=IPRTerm.ipr2wd[f], prop_nr='P279', references=[reference]))
        if doc['has_part']:
            for hp in doc['has_part']:
                statements.append(PBB_Core.WDItemID(value=IPRTerm.ipr2wd[hp], prop_nr='P527', references=[reference]))

        if uniprot_id not in uniprot2wd:
            print("wdid_not_found " + uniprot_id + " " + uniprot2wd[uniprot_id])
            PBB_Core.WDItemEngine.log("ERROR", PBB_Helpers.format_msg(uniprot_id, UNIPROT, None, "wdid_not_found"))

        wd_item = PBB_Core.WDItemEngine(wd_item_id=uniprot2wd[uniprot_id], domain="proteins", data=statements,
                                        fast_run=True, fast_run_base_filter=fast_run_base_filter,
                                        append_value=["P279", "P527", "P361"])

        if wd_item.create_new_item:
            raise ValueError("something bad happened")
        PBB_Helpers.try_write(wd_item, uniprot_id, INTERPRO, login, edit_summary="add/update family and/or domains")

    cursor.close()
Exemple #8
0
def wd_item_construction(record, strain_info, chrom_wdid, login):
    """
    generate pbb_core item object
    """

    # If the source is "entrez", the reference identifier to be used is "entrez_gene"
    # These are defined in HelperBot
    source_ref_id = {
        'Ensembl': 'ensembl_gene',
        'Entrez': 'entrez_gene',
        'Uniprot': 'uniprot'
    }

    def gene_item_statements():
        """
        construct list of referenced statements to past to PBB_Core Item engine
        """
        s = []

        ############
        # external IDs
        ############
        # will be used for reference statements
        external_ids = {
            'entrez_gene': str(record['entrezgene']['@value']),
            'ensembl_gene': record['ensembl']['@value']['gene'],
            'locus_tag': record['locus_tag']['@value']
        }

        # entrez gene id
        entrez_ref = make_ref_source(record['entrezgene']['@source'],
                                     'entrez_gene',
                                     external_ids['entrez_gene'])
        s.append(
            PBB_Core.WDString(external_ids['entrez_gene'],
                              PROPS['Entrez Gene ID'],
                              references=[entrez_ref]))

        # ensembl gene id
        ensembl_ref = make_ref_source(record['ensembl']['@source'],
                                      'ensembl_gene',
                                      external_ids['ensembl_gene'])
        s.append(
            PBB_Core.WDString(external_ids['ensembl_gene'],
                              PROPS['Ensembl Gene ID'],
                              references=[ensembl_ref]))

        # ncbi locus tag
        s.append(
            PBB_Core.WDString(external_ids['locus_tag'],
                              PROPS['NCBI Locus tag'],
                              references=[entrez_ref]))

        ############
        # statements with no referencable sources (make by hand, for now...)
        ############
        # subclass of gene
        s.append(
            PBB_Core.WDItemID('Q7187',
                              PROPS['subclass of'],
                              references=[ensembl_ref]))

        # found in taxon
        s.append(
            PBB_Core.WDItemID(strain_info['organism_wdid'],
                              PROPS['found in taxon'],
                              references=[ensembl_ref]))

        ############
        # genomic position: start, end, strand orientation, chromosome
        ############
        genomic_pos_value = record['genomic_pos']['@value']
        genomic_pos_source = record['genomic_pos']['@source']
        genomic_pos_id_prop = source_ref_id[genomic_pos_source['_id']]
        genomic_pos_ref = make_ref_source(genomic_pos_source,
                                          genomic_pos_id_prop,
                                          external_ids[genomic_pos_id_prop])

        # create chromosome qualifier
        chrom_genomeid = strain_info['chrom_genomeid_map'][
            genomic_pos_value['chr']]
        rs_chrom = PBB_Core.WDString(chrom_genomeid,
                                     'P2249',
                                     is_qualifier=True)  # Refseq Genome ID

        # strand orientation
        strand_orientation = 'Q22809680' if genomic_pos_value[
            'strand'] == 1 else 'Q22809711'
        s.append(
            PBB_Core.WDItemID(strand_orientation,
                              PROPS['strand orientation'],
                              references=[genomic_pos_ref]))
        # genomic start and end
        s.append(
            PBB_Core.WDString(str(int(genomic_pos_value['start'])),
                              PROPS['genomic start'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))
        s.append(
            PBB_Core.WDString(str(int(genomic_pos_value['end'])),
                              PROPS['genomic end'],
                              references=[genomic_pos_ref],
                              qualifiers=[rs_chrom]))
        # chromosome
        chr_genomic_id = strain_info['chrom_genomeid_map'][
            genomic_pos_value['chr']]
        s.append(
            PBB_Core.WDItemID(chrom_wdid[chr_genomic_id],
                              PROPS['chromosome'],
                              references=[genomic_pos_ref]))

        return s

    item_name = '{} {}'.format(record['name']['@value'],
                               record['ensembl']['@value']['gene'])
    item_description = '{} gene found in {}'.format(
        strain_info['organism_type'], strain_info['organism_name'])

    statements = gene_item_statements()
    wd_item_gene = PBB_Core.WDItemEngine(item_name=item_name,
                                         domain='genes',
                                         data=statements,
                                         append_value=[PROPS['subclass of']],
                                         fast_run=True,
                                         fast_run_base_filter={
                                             PROPS['Entrez Gene ID']:
                                             '',
                                             PROPS['found in taxon']:
                                             strain_info['organism_wdid']
                                         })
    wd_item_gene.set_label(item_name)
    wd_item_gene.set_description(item_description, lang='en')
    wd_item_gene.set_aliases(
        [record['symbol']['@value'], record['locus_tag']['@value']])

    PBB_Helpers.try_write(wd_item_gene, record['_id']['@value'], ENTREZ_PROP,
                          login)
Exemple #9
0
def protein_item(record, strain_info, gene_qid, go_wdid_mapping, login,
                 add_pubmed):
    """
    generate pbb_core item object
    """

    item_name = '{} {}'.format(record['name']['@value'],
                               record['ensembl']['@value']['protein'])
    item_description = '{} protein found in {}'.format(
        strain_info['organism_type'], strain_info['organism_name'])

    s = []

    ############
    # external IDs
    ############
    # will be used for reference statements
    external_ids = {
        'entrez_gene': str(record['entrezgene']['@value']),
        'ensembl_protein': record['ensembl']['@value']['protein'],
        'ensembl_gene': record['ensembl']['@value']['gene'],
        'refseq_protein': record['refseq']['@value']['protein'],
        'uniprot': record['uniprot']['@value']['Swiss-Prot']
    }

    # ensembl protein id
    ensembl_ref = make_ref_source(record['ensembl']['@source'],
                                  'ensembl_protein',
                                  external_ids['ensembl_protein'])
    s.append(
        PBB_Core.WDString(external_ids['ensembl_protein'],
                          'P705',
                          references=[ensembl_ref]))
    # refseq protein id
    refseq_ref = make_ref_source(record['refseq']['@source'], 'refseq_protein',
                                 external_ids['refseq_protein'])
    s.append(
        PBB_Core.WDString(external_ids['refseq_protein'],
                          'P637',
                          references=[refseq_ref]))
    # uniprot id
    uniprot_ref = make_ref_source(record['uniprot']['@source'], 'uniprot',
                                  external_ids['uniprot'])
    s.append(
        PBB_Core.WDString(external_ids['uniprot'],
                          'P352',
                          references=[uniprot_ref]))

    ############
    # GO terms
    # TODO: https://www.wikidata.org/wiki/Q3460832
    ############

    preprocess_go(record)
    print(record)
    go_source = record['go']['@source']
    go_id_prop = source_ref_id[go_source['_id']]
    reference = make_ref_source(go_source, go_id_prop,
                                external_ids[go_id_prop])
    for go_level, go_records in record['go']['@value'].items():
        level_wdid = go_props[go_level]
        for go_record in go_records:
            go_wdid = go_wdid_mapping[go_record['id']]
            evidence_wdid = go_evidence_codes[go_record['evidence']]
            evidence_statement = PBB_Core.WDItemID(value=evidence_wdid,
                                                   prop_nr='P459',
                                                   is_qualifier=True)
            this_reference = copy.deepcopy(reference)
            if add_pubmed:
                for pubmed in go_record['pubmed']:
                    pmid_wdid = PBB_Helpers.PubmedStub(pubmed).create(login)
                    this_reference.append(
                        PBB_Core.WDItemID(pmid_wdid, 'P248',
                                          is_reference=True))
            s.append(
                PBB_Core.WDItemID(go_wdid,
                                  level_wdid,
                                  references=[this_reference],
                                  qualifiers=[evidence_statement]))

    ############
    # statements with no referencable sources (make by hand, for now...)
    ############
    # subclass of protein
    s.append(PBB_Core.WDItemID('Q8054', 'P279', references=[ensembl_ref]))

    # found in taxon
    s.append(
        PBB_Core.WDItemID(strain_info['organism_wdid'],
                          'P703',
                          references=[ensembl_ref]))

    # encodes gene
    s.append(PBB_Core.WDItemID(gene_qid, 'P702', references=[ensembl_ref]))

    try:
        wd_item_protein = PBB_Core.WDItemEngine(
            item_name=item_name,
            domain='proteins',
            data=s,
            append_value=['P279'],
            fast_run=True,
            fast_run_base_filter={
                'P352': '',
                'P703': strain_info['organism_wdid']
            })
        wd_item_protein.set_label(item_name)
        wd_item_protein.set_description(item_description, lang='en')
        wd_item_protein.set_aliases(
            [record['symbol']['@value'], record['locus_tag']['@value']])
    except Exception as e:
        print(e)
        PBB_Core.WDItemEngine.log(
            "ERROR",
            format_msg(record['entrezgene']['@value'], str(e), None,
                       ENTREZ_PROP))
        return

    try_write(wd_item_protein, record['entrezgene']['@value'], 'P351', login)
Exemple #10
0
def create_protein_ipr(uniprot_id, uniprot_wdid, families, has_part,
                       release_info, login):
    """
    Create interpro relationships to one protein
    :param uniprot_id: uniprot ID of the protein to modify
    :type uniprot_id: str
    :param uniprot_wdid: wikidata ID of the protein
    :param families: list of ipr wd ids the protein is a (P279) subclass of
    :param has_part: list of ipr wd ids the protein has (P527) has part
    :return:
    """
    date = release_info['date']
    version = release_info['version']

    # create ref
    ref_stated_in = PBB_Core.WDItemID("Q3047275", 'P248', is_reference=True)
    ref_imported = PBB_Core.WDItemID("Q3047275", 'P143', is_reference=True)
    ref_version = PBB_Core.WDString(version, 'P348', is_reference=True)
    ref_date = PBB_Core.WDTime(date.strftime("+%Y-%m-%dT00:00:00Z"),
                               'P577',
                               is_reference=True)
    ref_ipr = PBB_Core.WDString(
        "http://www.ebi.ac.uk/interpro/protein/{}".format(uniprot_id),
        "P854",
        is_reference=True)
    reference = [ref_stated_in, ref_imported, ref_version, ref_date, ref_ipr]
    for ref in reference:
        ref.overwrite_references = True

    statements = []
    if families:
        for f in families:
            statements.append(
                PBB_Core.WDItemID(value=f,
                                  prop_nr='P279',
                                  references=[reference]))
    if has_part:
        for hp in has_part:
            statements.append(
                PBB_Core.WDItemID(value=hp,
                                  prop_nr='P527',
                                  references=[reference]))

    item = PBB_Core.WDItemEngine(wd_item_id=uniprot_wdid,
                                 data=statements,
                                 server=SERVER,
                                 append_value=["P279", "P527", "P361"])
    # print(item.get_wd_json_representation())
    try:
        item.write(login)
    except WDApiError as e:
        print(e)
        PBB_Core.WDItemEngine.log(
            'ERROR',
            '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
            .format(main_data_id=uniprot_id,
                    exception_type=type(e),
                    message=e.__str__(),
                    wd_id=uniprot_wdid,
                    duration=datetime.now()))
        return

    PBB_Core.WDItemEngine.log(
        'INFO',
        '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.
        format(main_data_id=uniprot_id,
               exception_type='',
               message='created protein interpro relationships: {}'.format([
                   (x.prop_nr, x.value) for x in statements
               ]),
               wd_id=uniprot_wdid,
               duration=datetime.now()))
Exemple #11
0
def make_chroms(strain_info, retrieved, login):
    chrom_wdid = {}
    for chrom_num, genome_id in strain_info['chrom_genomeid_map'].items():

        item_name = '{} chromosome {}'.format(strain_info['organism_name'],
                                              chrom_num)
        item_description = '{} chromosome'.format(strain_info['organism_type'])
        print(item_name)
        print(genome_id)

        reference = make_ref(retrieved, genome_id)
        statements = []
        statements.append(
            PBB_Core.WDItemID(value='Q37748',
                              prop_nr='P279',
                              references=[reference
                                          ]))  # subclass of chromosome
        statements.append(
            PBB_Core.WDItemID(value=strain_info['organism_wdid'],
                              prop_nr='P703',
                              references=[reference]))  # found in taxon
        statements.append(
            PBB_Core.WDString(value=genome_id,
                              prop_nr='P2249',
                              references=[reference]))  # genome id

        wd_item = PBB_Core.WDItemEngine(item_name=item_name,
                                        domain='chromosome',
                                        data=statements,
                                        append_value=['P279'],
                                        fast_run=True,
                                        fast_run_base_filter={
                                            'P703':
                                            strain_info['organism_wdid'],
                                            'P2249': ''
                                        })

        if wd_item.require_write:
            print("require write")
            wd_item.set_label(item_name)
            wd_item.set_description(item_description, lang='en')
            try:
                msg = "CREATE" if wd_item.create_new_item else "UPDATE"
                wd_item.write(login=login)
                PBB_Core.WDItemEngine.log(
                    "INFO",
                    format_msg(genome_id,
                               msg,
                               wd_item.wd_item_id,
                               external_id_prop='P2249'))
            except Exception as e:
                print(e)
                PBB_Core.WDItemEngine.log(
                    "ERROR",
                    format_msg(genome_id,
                               str(e),
                               wd_item.wd_item_id,
                               external_id_prop='P2249'))
        else:
            chrom_wdid[chrom_num] = wd_item.wd_item_id
            PBB_Core.WDItemEngine.log(
                "INFO",
                format_msg(genome_id,
                           "SKIP",
                           wd_item.wd_item_id,
                           external_id_prop='P2249'))

    return chrom_wdid
Exemple #12
0
    def __init__(self, object):
        """

        :type self: object
        """
        self.start = object["start"]
        self.entrezgene = object["entrezgene"]
        self.uniprotwikidataids = object["uniprotwikidataids"]
        gene_annotations = self.annotate_gene()
        self.genomeInfo = object["speciesInfo"][str(gene_annotations['taxid'])]
        self.content = object
        self.name = gene_annotations["name"]
        self.logincreds = object["logincreds"]
        if "_timestamp" in gene_annotations.keys():
            self.annotationstimestamp = gene_annotations["_timestamp"]
        self.wdid = object["wdid"]

        # symbol:
        self.symbol = gene_annotations["symbol"]
        print(self.symbol)
        # HGNC
        if "HGNC" in gene_annotations:
            if isinstance(gene_annotations["HGNC"], list):
                self.hgnc = gene_annotations["HGNC"]
            else:
                self.hgnc = [gene_annotations["HGNC"]]
        else:
            self.hgnc = None

        # Ensembl Gene & transcript
        if "ensembl" in gene_annotations:
            if "gene" in gene_annotations["ensembl"]:
                if isinstance(gene_annotations["ensembl"]["gene"], list):
                    self.ensembl_gene = gene_annotations["ensembl"]["gene"]
                else:
                    self.ensembl_gene = [gene_annotations["ensembl"]["gene"]]
            else:
                self.ensembl_gene = None

            if "transcript" in gene_annotations["ensembl"]:
                if isinstance(gene_annotations["ensembl"]["transcript"], list):
                    self.ensembl_transcript = gene_annotations["ensembl"]["transcript"]
                else:
                    self.ensembl_transcript = [gene_annotations["ensembl"]["transcript"]]
            else:
                self.ensembl_transcript = None
        # Homologene
        if "homologene" in gene_annotations:
            if isinstance(gene_annotations["homologene"]["id"], list):
                self.homologene = [str(i) for i in gene_annotations["homologene"]["id"]]
            else:
                self.homologene = [str(gene_annotations["homologene"]["id"])]
        else:
            self.homologene = None
        # Refseq 
        if "refseq" in gene_annotations:
            if "rna" in gene_annotations["refseq"]:
                if isinstance(gene_annotations["refseq"]["rna"], list):
                    self.refseq_rna = gene_annotations["refseq"]["rna"]
                else:
                    self.refseq_rna = [gene_annotations["refseq"]["rna"]]
            else:
                self.refseq_rna = None
        else:
            self.refseq_rna = None

            # MGI
        if "MGI" in gene_annotations:
            if isinstance(gene_annotations["MGI"], list):
                self.MGI = gene_annotations["MGI"]
            else:
                self.MGI = [gene_annotations["MGI"]]
        else:
            self.MGI = None

        self.chromosome = None
        self.startpost = None
        self.endpos = None
        if "genomic_pos" in gene_annotations:
            if isinstance(gene_annotations["genomic_pos"], list):
                self.chromosome = []
                self.startpos = []
                self.endpos = []
                for i in range(len(gene_annotations["genomic_pos"])):
                    if gene_annotations["genomic_pos"][i]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                        self.genomeInfo["name"]].keys():
                        self.chromosome.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                                   gene_annotations["genomic_pos"][i]["chr"]])
                        self.startpos.append(gene_annotations["genomic_pos"][i]["start"])
                        self.endpos.append(gene_annotations["genomic_pos"][i]["end"])
            else:
                self.chromosome = []
                self.startpos = []
                self.endpos = []
                if gene_annotations["genomic_pos"]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                    self.genomeInfo["name"]].keys():
                    self.chromosome.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                               gene_annotations["genomic_pos"]["chr"]])
                    self.startpos.append(gene_annotations["genomic_pos"]["start"])
                    self.endpos.append(gene_annotations["genomic_pos"]["end"])

        self.encodes = None
        if "uniprot" in gene_annotations.keys():
            if "Swiss-Prot" in gene_annotations["uniprot"].keys():
                if isinstance(gene_annotations["uniprot"]["Swiss-Prot"], list):
                    self.encodes = []
                    for uniprot in gene_annotations["uniprot"]["Swiss-Prot"]:
                        self.encodes.append(uniprot)
                else:
                    self.encodes = [gene_annotations["uniprot"]["Swiss-Prot"]]


        self.chromosomeHg19 = None
        self.startposHg19 = None
        self.endposHg19 = None
        if "genomic_pos_hg19" in gene_annotations:
            if isinstance(gene_annotations["genomic_pos_hg19"], list):
                self.chromosomeHg19 = []
                self.startposHg19 = []
                self.endposHg19 = []
                for i in range(len(gene_annotations["genomic_pos_hg19"])):
                    if gene_annotations["genomic_pos_hg19"][i]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                        self.genomeInfo["name"]].keys():
                        self.chromosomeHg19.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                                       gene_annotations["genomic_pos_hg19"][i]["chr"]])
                        self.startposHg19.append(gene_annotations["genomic_pos_hg19"][i]["start"])
                        self.endposHg19.append(gene_annotations["genomic_pos_hg19"][i]["end"])
            else:
                self.chromosomeHg19 = []
                self.startposHg19 = []
                self.endposHg19 = []
                if gene_annotations["genomic_pos_hg19"]["chr"] in ProteinBoxBotKnowledge.chromosomes[
                    self.genomeInfo["name"]].keys():
                    self.chromosomeHg19.append(ProteinBoxBotKnowledge.chromosomes[self.genomeInfo["name"]][
                                                   gene_annotations["genomic_pos_hg19"]["chr"]])
                    self.startposHg19.append(gene_annotations["genomic_pos_hg19"]["start"])
                    self.endposHg19.append(gene_annotations["genomic_pos_hg19"]["end"])

        # type of Gene
        if "type_of_gene" in gene_annotations:
            self.type_of_gene = []
            if gene_annotations["type_of_gene"] == "ncRNA":
                self.type_of_gene.append("Q427087")
            if gene_annotations["type_of_gene"] == "snRNA":
                self.type_of_gene.append("Q284578")
            if gene_annotations["type_of_gene"] == "snoRNA":
                self.type_of_gene.append("Q284416")
            if gene_annotations["type_of_gene"] == "rRNA":
                self.type_of_gene.append("Q215980")
            if gene_annotations["type_of_gene"] == "tRNA":
                self.type_of_gene.append("Q201448")
            if gene_annotations["type_of_gene"] == "pseudo":
                self.type_of_gene.append("Q277338")
            if gene_annotations["type_of_gene"] == "protein-coding":
                self.type_of_gene.append("Q20747295")
        else:
            self.type_of_gene = None
        # Reference section  
        # Prepare references
        refStatedIn = PBB_Core.WDItemID(value=self.genomeInfo["release"], prop_nr='P248', is_reference=True)
        refStatedIn.overwrite_references = True
        refImported = PBB_Core.WDItemID(value='Q20641742', prop_nr='P143', is_reference=True)
        refImported.overwrite_references = True
        timeStringNow = strftime("+%Y-%m-%dT00:00:00Z", gmtime())
        refRetrieved = PBB_Core.WDTime(timeStringNow, prop_nr='P813', is_reference=True)
        refRetrieved.overwrite_references = True
        gene_reference = [refStatedIn, refImported, refRetrieved]

        refStatedInEnsembl = PBB_Core.WDItemID(value= 'Q21996330', prop_nr='P248', is_reference=True)
        refStatedInEnsembl.overwrite_references = True
        refImportedEnsembl = PBB_Core.WDItemID(value='Q1344256', prop_nr='P143', is_reference=True)
        refImportedEnsembl.overwrite_references = True

        ensembl_reference = [refStatedInEnsembl, refImportedEnsembl, refRetrieved]

        genomeBuildQualifier = PBB_Core.WDItemID(value=self.genomeInfo["genome_assembly"], prop_nr='P659',
                                                 is_qualifier=True)
        genomeBuildPreviousQualifier = PBB_Core.WDItemID(value=self.genomeInfo["genome_assembly_previous"],
                                                         prop_nr='P659', is_qualifier=True)

        prep = dict()
        prep['P703'] = [PBB_Core.WDItemID(value=self.genomeInfo['wdid'], prop_nr='P703',
                                          references=[copy.deepcopy(gene_reference)])]
        if self.genomeInfo["name"] == "human":
            prep['P353'] = [
                PBB_Core.WDString(value=self.symbol, prop_nr='P353', references=[copy.deepcopy(gene_reference)])]
        prep['P351'] = [
            PBB_Core.WDString(value=str(self.entrezgene), prop_nr='P351', references=[copy.deepcopy(gene_reference)])]

        prep['P279'] = [PBB_Core.WDItemID(value='Q7187', prop_nr='P279', references=[copy.deepcopy(gene_reference)])]
        if "type_of_gene" in vars(self):
            if self.type_of_gene != None:
                for i in range(len(self.type_of_gene)):
                    prep['P279'].append(PBB_Core.WDItemID(value=self.type_of_gene[i], prop_nr='P279',
                                                          references=[copy.deepcopy(gene_reference)]))

        if "ensembl_gene" in vars(self):
            if self.ensembl_gene != None:
                prep['P594'] = []
                for ensemblg in self.ensembl_gene:
                    prep['P594'].append(
                        PBB_Core.WDString(value=ensemblg, prop_nr='P594', references=[copy.deepcopy(gene_reference)]))

        if "ensembl_transcript" in vars(self):
            if self.ensembl_transcript != None:
                prep['P704'] = []
                for ensemblt in self.ensembl_transcript:
                    prep['P704'].append(
                        PBB_Core.WDString(value=ensemblt, prop_nr='P704', references=[copy.deepcopy(gene_reference)]))

        if "encodes" in vars(self):
            if self.encodes != None:
                prep['P688'] = []
                for uniprot in self.encodes:
                    if uniprot in self.uniprotwikidataids.keys():
                        prep['P688'].append(PBB_Core.WDItemID(value=self.uniprotwikidataids[uniprot], prop_nr='P688', references=[copy.deepcopy(gene_reference)]))

        if "hgnc" in vars(self):
            if self.hgnc != None:
                prep['P354'] = []
                for hugo in self.hgnc:
                    prep['P354'].append(
                        PBB_Core.WDString(value=hugo, prop_nr='P354', references=[copy.deepcopy(gene_reference)]))

        if "homologene" in vars(self):
            if self.homologene != None:
                prep['P593'] = []
                for ortholog in self.homologene:
                    prep['P593'].append(
                        PBB_Core.WDString(value=ortholog, prop_nr='P593', references=[copy.deepcopy(gene_reference)]))

        if "refseq_rna" in vars(self):
            if self.refseq_rna != None:
                prep['P639'] = []
                for refseq in self.refseq_rna:
                    prep['P639'].append(
                        PBB_Core.WDString(value=refseq, prop_nr='P639', references=[copy.deepcopy(gene_reference)]))

        if "chromosome" in vars(self):
            prep['P1057'] = []
            if self.chromosome != None:
                for chrom in list(set(self.chromosome)):
                    prep['P1057'].append(
                        PBB_Core.WDItemID(value=chrom, prop_nr='P1057', references=[copy.deepcopy(gene_reference)]))

        if "startpos" in vars(self):
            if not 'P644' in prep.keys():
                prep['P644'] = []
            if self.startpos != None:
                for pos in self.startpos:
                    prep['P644'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P644', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildQualifier)]))
        if "endpos" in vars(self):
            if not 'P645' in prep.keys():
                prep['P645'] = []
            if self.endpos != None:
                for pos in self.endpos:
                    prep['P645'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P645', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildQualifier)]))

        if "startposHg19" in vars(self):
            if not 'P644' in prep.keys():
                prep['P644'] = []
            if self.startposHg19 != None:
                for pos in self.startposHg19:
                    prep['P644'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P644', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildPreviousQualifier)]))
        if "endposHg19" in vars(self):
            if not 'P644' in prep.keys():
                prep['P645'] = []
            if self.endposHg19 != None:
                for pos in self.endposHg19:
                    prep['P645'].append(
                        PBB_Core.WDString(value=str(pos), prop_nr='P645', references=[copy.deepcopy(ensembl_reference)],
                                          qualifiers=[copy.deepcopy(genomeBuildPreviousQualifier)]))

        if "MGI" in vars(self):
            prep['P671'] = []
            if self.MGI != None:
                for mgi in self.MGI:
                    prep['P671'].append(PBB_Core.WDString(value=mgi, prop_nr='P671',
                                        references=[copy.deepcopy(gene_reference)]))

        if "alias" in gene_annotations.keys():
            if isinstance(gene_annotations["alias"], list):
                self.synonyms = []
                for alias in gene_annotations["alias"]:
                    self.synonyms.append(alias)
            else:
                self.synonyms = [gene_annotations["alias"]]
            self.synonyms.append(self.symbol)
            print(self.synonyms)
        else:
            self.synonyms = None

        data2add = []
        for key in prep.keys():
            for statement in prep[key]:
                data2add.append(statement)
                print(statement.prop_nr, statement.value)

        if self.wdid != None:
          # if self.encodes != None:
            wdPage = PBB_Core.WDItemEngine(self.wdid, item_name=self.name, data=data2add, server="www.wikidata.org",
                                           domain="genes")
            if wdPage.get_description() == "":
                wdPage.set_description(description=self.genomeInfo['name'] + ' gene', lang='en')
            if wdPage.get_description(lang='fr') == "" or wdPage.get_description(lang='fr') == "gène":
                wdPage.set_description(description="Un gène " + self.genomeInfo['fr-name'], lang='fr')
            if wdPage.get_description(lang='nl') == "" or wdPage.get_description(lang='nl') == "gen":
                wdPage.set_description(description="Een "+ self.genomeInfo['nl-name']+ " gen", lang='nl')
            if self.synonyms != None:
                wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True)
            print(self.wdid)
            self.wd_json_representation = wdPage.get_wd_json_representation()
            PBB_Debug.prettyPrint(self.wd_json_representation)
            PBB_Debug.prettyPrint(data2add)
            # print(self.wd_json_representation)
            wdPage.write(self.logincreds)
            print("aa")
        else:
          #if self.encodes != None:
            wdPage = PBB_Core.WDItemEngine(item_name=self.name, data=data2add, server="www.wikidata.org",
                                           domain="genes")
            if wdPage.get_description() != "":
                wdPage.set_description(description=self.genomeInfo['name'] + ' gene', lang='en')
            if wdPage.get_description(lang='fr') == "" or wdPage.get_description(lang='fr') == "gène":
                wdPage.setdescription(description="Un gène " + self.genomeInfo['fr-name'], lang='fr')
            if wdPage.get_description(lang='nl') == "" or wdPage.get_description(lang='nl') == "gen":
                wdPage.setdescription(description="Een "+ self.genomeInfo['nl-name']+ " gen", lang='nl')
            if self.synonyms != None:
                wdPage.set_aliases(aliases=self.synonyms, lang='en', append=True)
            self.wd_json_representation = wdPage.get_wd_json_representation()
            PBB_Debug.prettyPrint(self.wd_json_representation)
            PBB_Debug.prettyPrint(data2add)
            # print(self.wd_json_representation)
            self.wdid = wdPage.write(self.logincreds)

        PBB_Core.WDItemEngine.log('INFO', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'.format(
                        main_data_id=str(self.entrezgene),
                        exception_type='',
                        message=f.name,
                        wd_id=self.wdid,
                        duration=time.time()-self.start
                    ))