Esempio n. 1
0
    def write_hpo_edge(self, fh: TextIO, subject: str, edge_label: str,
                       object: str, relation: str) -> None:

        # ['subject', 'edge_label', 'object', 'relation', 'publications']
        write_node_edge_item(fh=fh,
                             header=self.edge_header,
                             data=[subject, edge_label, object, relation, ""])
Esempio n. 2
0
 def test_write_node_edge_item(self):
     write_node_edge_item(fh=self.fh,
                          header=self.header,
                          data=self.valid_data)
     self.fh.close()
     self.assertTrue(os.path.exists(self.outfile))
     with open(self.outfile, 'r') as tsvfile:
         lines = tsvfile.read().split('\n')
         self.assertEqual(['id1234', '1234', 'biolink:Gene'],
                          lines[0].split('\t'))
Esempio n. 3
0
 def test_write_node_edge_item_with_tabs_in_data(self):
     write_node_edge_item(
         fh=self.fh,
         header=self.header,
         data=['id1234', '1234', 'biolink:Gene\tbiolink:Gene\t'],
         sanitize_sep_char=True)
     self.fh.close()
     self.assertTrue(os.path.exists(self.outfile))
     with open(self.outfile, 'r') as tsvfile:
         lines = tsvfile.read().split('\n')
         self.assertEqual(
             ['id1234', '1234', 'biolink:Gene0x9biolink:Gene0x9'],
             lines[0].split('\t'))
Esempio n. 4
0
    def write_hpo_node(self, fh: TextIO, id: str, data: dict,
                       node_type: str) -> None:
        # Try to get comments/def in case this is useful for ML
        try:
            comment_field = get_item_by_priority(data, ['comment'])
        except ItemInDictNotFound:
            comment_field = ""

        try:
            description = get_item_by_priority(data, ['def'])
        except ItemInDictNotFound:
            description = ""

        try:
            name_field = get_item_by_priority(data, ['name'])
        except ItemInDictNotFound:
            name_field = ""

        write_node_edge_item(
            fh=fh,
            header=self.node_header,
            data=[id, name_field, node_type, comment_field, description])
    def run(self, data_file: str = None):

        # file housekeeping
        os.makedirs(self.output_dir, exist_ok=True)

        gpi_file = os.path.join(self.input_base_dir, "uniprot_sars-cov-2.gpi")
        gpa_file = os.path.join(self.input_base_dir, "uniprot_sars-cov-2.gpa")

        with open(self.output_node_file, 'w') as node, \
                open(self.output_edge_file, 'w') as edge:

            # write headers
            node.write("\t".join(self.node_header) + "\n")
            edge.write("\t".join(self.edge_header) + "\n")

            with open(gpi_file, 'r') as gpi_fh:
                for rec in _gpi12iterator(gpi_fh):
                    node_data = self.gpi_to_gene_node_data(rec)
                    write_node_edge_item(node, self.node_header, node_data)

            with open(gpa_file, 'r') as gpa_fh:
                for rec in _gpa11iterator(gpa_fh):
                    edge_data = self.gpa_to_edge_data(rec)
                    write_node_edge_item(edge, self.edge_header, edge_data)
Esempio n. 6
0
    def run(self, data_file: str = None):

        # file housekeeping
        os.makedirs(self.output_dir, exist_ok=True)

        gpi_file = os.path.join(self.input_base_dir, "uniprot_sars-cov-2.gpi")
        gpa_file = os.path.join(self.input_base_dir, "uniprot_sars-cov-2.gpa")

        with open(self.output_node_file, 'w') as node, \
                open(self.output_edge_file, 'w') as edge:

            # write headers
            node.write("\t".join(self.node_header) + "\n")
            edge.write("\t".join(self.edge_header) + "\n")
            seen = set()
            with open(gpi_file, 'r') as gpi_fh:
                for rec in _gpi12iterator(gpi_fh):
                    node_data = self.gpi_to_gene_node_data(rec)
                    seen.add(node_data[0])
                    write_node_edge_item(node, self.node_header, node_data)

            with open(gpa_file, 'r') as gpa_fh:
                for rec in _gpa11iterator(gpa_fh):
                    edge_data = self.gpa_to_edge_data(rec)
                    subject_node = edge_data[0]
                    if subject_node not in seen:
                        subject_node_data = [
                            subject_node,
                            guess_bl_category(subject_node)
                        ] + [""] * 5 + [self.source_name]
                        write_node_edge_item(node, self.node_header,
                                             subject_node_data)
                        seen.add(subject_node)
                    object_node = edge_data[2]
                    if object_node not in seen:
                        object_node_data = [
                            object_node,
                            guess_bl_category(object_node)
                        ] + [""] * 5 + [self.source_name]
                        write_node_edge_item(node, self.node_header,
                                             object_node_data)
                        seen.add(object_node)

                    write_node_edge_item(edge, self.edge_header, edge_data)
Esempio n. 7
0
    def parse_cooccurrence_record(self, node_handle: Any, edge_handle: Any,
                                  record: Dict) -> None:
        """Parse term-cooccurrences.

        Args:
            node_handle: File handle for nodes.csv.
            edge_handle: File handle for edges.csv.
            record: A dictionary corresponding to a row from a table.

        Returns:
             None.

        """
        terms = set()
        paper_id = record['document_id']
        if not pd.isna(record['entity_uris']):
            terms.update(record['entity_uris'].split('|'))
            # add a biolink:Publication for each paper
            if paper_id.endswith('.xml'):
                paper_id = paper_id.replace('.xml', '')
            paper_curie = f"CORD:{paper_id}"
            if paper_id not in self.seen:
                write_node_edge_item(
                    fh=node_handle,
                    header=self.node_header,
                    data=[paper_curie, "", "biolink:Publication", ""])
                self.seen.add(paper_id)

            for t in terms:
                if len(t) == 2:
                    # country code
                    if t in self.country_code_map:
                        mapped_t = self.country_code_map[t][0]
                        name = self.country_code_map[t][1]
                        curie = self.contract_uri(mapped_t)
                    else:
                        name = ""
                        curie = self.contract_uri(t)
                    category = 'biolink:NamedThing'
                else:
                    category = 'biolink:OntologyClass'
                    curie = self.contract_uri(t)
                    name = self.concept_name_map[
                        t] if t in self.concept_name_map else "",

                if t not in self.seen:
                    # add a biolink:OntologyClass node for each term
                    write_node_edge_item(
                        fh=node_handle,
                        header=self.node_header,
                        data=[
                            f"{curie}", name if isinstance(name, str) else "",
                            category, ""
                        ])
                    self.seen.add(curie)

                    # simplified generation of edges between OntologyClass and the publication where
                    # OntologyClass -> correlated_with -> Publication
                    # with the edge having relation RO:0002610
                    write_node_edge_item(
                        fh=edge_handle,
                        header=self.edge_header,
                        data=[
                            f"{curie}",
                            "biolink:correlated_with",
                            f"{paper_curie}",
                            f"RO:0002610",  # 'correlated with'
                            f"{self.source_name} co-occurrences"
                        ])
Esempio n. 8
0
    def parse_annotation_doc(self,
                             node_handle,
                             edge_handle,
                             doc: Dict,
                             subset: str = None) -> None:
        """Parse a JSON document corresponding to a publication.

        Args:
            node_handle: File handle for nodes.csv.
            edge_handle: File handle for edges.csv.
            doc: JSON document as dict.
            subset: The subset name for this dataset.

        Returns:
            None.

        """
        terms = set()
        paper_id = doc['paper_id']
        title = None
        if 'metadata' in doc:
            metadata = doc['metadata']
            title = metadata['title'].replace('\n', ' ')
            # extract hits from metadata
            terms.update(self.extract_termite_hits(metadata))

        if 'abstract' in doc:
            abstract = doc['abstract']
            # extract hits from abstract
            for x in abstract:
                terms.update(self.extract_termite_hits(x))

        if 'body_text' in doc:
            body_text = doc['body_text']
            # extract hits from body text
            for x in body_text:
                terms.update(self.extract_termite_hits(x))

        provided_by = f"{self.source_name}"
        if subset:
            provided_by += f" {subset}"

        # add a biolink:Publication for each paper
        write_node_edge_item(
            fh=node_handle,
            header=self.node_header,
            data=[f"CORD:{paper_id}", f"{title}", "biolink:Publication", ""])
        self.seen.add(paper_id)

        for t in terms:
            if len(t) == 2:
                # country code
                if t in self.country_code_map:
                    mapped_t = self.country_code_map[t][0]
                    name = self.country_code_map[t][1]
                    curie = self.contract_uri(mapped_t)
                else:
                    name = ""
                    curie = self.contract_uri(t)
                category = 'biolink:NamedThing'
            else:
                category = 'biolink:OntologyClass'
                curie = self.contract_uri(t)
                name = self.concept_name_map[
                    t] if t in self.concept_name_map else "",

            if t not in self.seen:
                # add a biolink:OntologyClass node for each term
                write_node_edge_item(fh=node_handle,
                                     header=self.node_header,
                                     data=[
                                         f"{curie}",
                                         name if isinstance(name, str) else "",
                                         category, ""
                                     ])
                self.seen.add(curie)

            # add has_annotation edge between OntologyClass and Publication
            write_node_edge_item(fh=edge_handle,
                                 header=self.edge_header,
                                 data=[
                                     f"{curie}", f"biolink:related_to",
                                     f"CORD:{paper_id}", "SIO:000255",
                                     provided_by
                                 ])
Esempio n. 9
0
 def test_write_node_edge_item_bad_fh(self):
     with self.assertRaises(Exception):
         write_node_edge_item(fh='',
                              header=self.header,
                              data=self.valid_data)
Esempio n. 10
0
    def run(self, data_file: Optional[str] = None) -> None:
        """Method is called and performs needed transformations to process
        SARS-CoV-2 subset of ChEMBL.

        http://chembl.blogspot.com/2020/05/chembl27-sars-cov-2-release.html

        Args:
            data_file: data file to parse

        Returns:
            None.

        """
        self.node_header = ['id', 'name', 'category', 'provided_by']
        self.edge_header = [
            'id', 'subject', 'edge_label', 'object', 'relation', 'provided_by',
            'type'
        ]

        # ChEMBL molecules
        data = self.get_chembl_molecules()
        molecule_nodes = self.parse_chembl_molecules(data)

        # ChEMBL assay
        data = self.get_chembl_assays()
        assay_nodes = self.parse_chembl_assay(data)

        # ChEMBL document
        data = self.get_chembl_documents()
        document_nodes = self.parse_chembl_document(data)

        # ChEMBL activity
        data = self.get_chembl_activities()
        activity_edges = self.parse_chembl_activity(data)

        self.node_header.extend(
            [x for x in self._node_header if x not in self.node_header])
        self.edge_header.extend(
            [x for x in self._edge_header if x not in self.edge_header])

        node_handle = open(self.output_node_file, 'w')
        edge_handle = open(self.output_edge_file, 'w')
        node_handle.write("\t".join(sorted(self.node_header)) + "\n")
        edge_handle.write("\t".join(sorted(self.edge_header)) + "\n")

        for n in molecule_nodes:
            write_node_edge_item(fh=node_handle,
                                 header=sorted(self.node_header),
                                 data=[
                                     n[x] if x in n else ''
                                     for x in sorted(self.node_header)
                                 ])
        for n in assay_nodes:
            write_node_edge_item(fh=node_handle,
                                 header=sorted(self.node_header),
                                 data=[
                                     n[x] if x in n else ''
                                     for x in sorted(self.node_header)
                                 ])

        for n in document_nodes:
            write_node_edge_item(fh=node_handle,
                                 header=sorted(self.node_header),
                                 data=[
                                     n[x] if x in n else ''
                                     for x in sorted(self.node_header)
                                 ])

        # write node for organisms in TAXON_MAP
        for org_curie, org_name in {v: k
                                    for k, v in TAXON_MAP.items()}.items():
            o = {
                'id': org_curie,
                'name': org_name,
                'category': 'biolink:OrganismTaxon'
            }
            write_node_edge_item(fh=node_handle,
                                 header=sorted(self.node_header),
                                 data=[
                                     o[x] if x in o else ''
                                     for x in sorted(self.node_header)
                                 ])

        for e in activity_edges:
            write_node_edge_item(fh=edge_handle,
                                 header=sorted(self.edge_header),
                                 data=[
                                     e[x] if x in e else ''
                                     for x in sorted(self.edge_header)
                                 ])
    def run(self,
            data_file: Optional[str] = None,
            chembl_data_files: Optional[dict] = None) -> None:
        """Method is called and performs needed transformations to process
        SARS-CoV-2 subset of ChEMBL.

        http://chembl.blogspot.com/2020/05/chembl27-sars-cov-2-release.html

        Args:
            data_file: NOT USED - preserves to placate mypy. Use "data_files" instead
            chembl_data_files: data files to parse

        Returns:
            None.

        """
        self.node_header = ['id', 'name', 'category', 'provided_by']
        self.edge_header = [
            'id', 'subject', 'predicate', 'object', 'relation', 'provided_by',
            'type'
        ]

        if chembl_data_files is None:
            chembl_data_files = {
                'molecules_data': 'data/raw/chembl_molecule_records.json',
                'assay_data': 'data/raw/chembl_assay_records.json',
                'document_data': 'data/raw/chembl_document_records.json',
                'activity_data': 'data/raw/chembl_activity_records.json'
            }

        # ChEMBL molecules
        molecules_data = self.read_json(chembl_data_files['molecules_data'])
        molecule_nodes = self.parse_chembl_molecules(molecules_data)

        # ChEMBL assay
        assays_data = self.read_json(chembl_data_files['assay_data'])
        assay_nodes = self.parse_chembl_assay(assays_data)

        # ChEMBL document
        documents_data = self.read_json(chembl_data_files['document_data'])
        document_nodes = self.parse_chembl_document(documents_data)

        # ChEMBL activity
        activities_data = self.read_json(chembl_data_files['activity_data'])
        activity_edges = self.parse_chembl_activity(activities_data)

        self.node_header.extend(
            [x for x in self._node_header if x not in self.node_header])
        self.edge_header.extend(
            [x for x in self._edge_header if x not in self.edge_header])

        node_handle = open(self.output_node_file, 'w')
        edge_handle = open(self.output_edge_file, 'w')
        node_handle.write("\t".join(sorted(self.node_header)) + "\n")
        edge_handle.write("\t".join(sorted(self.edge_header)) + "\n")

        for n in molecule_nodes:
            write_node_edge_item(fh=node_handle,
                                 header=sorted(self.node_header),
                                 data=[
                                     n[x] if x in n else ''
                                     for x in sorted(self.node_header)
                                 ])
        for n in assay_nodes:
            write_node_edge_item(fh=node_handle,
                                 header=sorted(self.node_header),
                                 data=[
                                     n[x] if x in n else ''
                                     for x in sorted(self.node_header)
                                 ])

        for n in document_nodes:
            write_node_edge_item(fh=node_handle,
                                 header=sorted(self.node_header),
                                 data=[
                                     n[x] if x in n else ''
                                     for x in sorted(self.node_header)
                                 ])

        # write node for organisms in TAXON_MAP
        for org_curie, org_name in {v: k
                                    for k, v in TAXON_MAP.items()}.items():
            o = {
                'id': org_curie,
                'name': org_name,
                'category': 'biolink:OrganismTaxon'
            }
            write_node_edge_item(fh=node_handle,
                                 header=sorted(self.node_header),
                                 data=[
                                     o[x] if x in o else ''
                                     for x in sorted(self.node_header)
                                 ])

        for e in activity_edges:
            write_node_edge_item(fh=edge_handle,
                                 header=sorted(self.edge_header),
                                 data=[
                                     e[x] if x in e else ''
                                     for x in sorted(self.edge_header)
                                 ])
Esempio n. 12
0
    def parse_annotation_doc(self,
                             node_handle,
                             edge_handle,
                             doc: Dict,
                             subset: str = None) -> None:
        """Parse a JSON document corresponding to a publication.

        Args:
            node_handle: File handle for nodes.csv.
            edge_handle: File handle for edges.csv.
            doc: JSON document as dict.
            subset: The subset name for this dataset.

        Returns:
            None.

        """
        paper_id = doc['paper_id']
        metadata = doc['metadata']
        abstract = doc['abstract']
        body_text = doc['body_text']
        terms = set()
        provided_by = f"{self.source_name}"
        if subset:
            provided_by += f" {subset}"
        # extract hits from metadata
        terms.update(self.extract_termite_hits(metadata))
        # extract hits from abstract
        for x in abstract:
            terms.update(self.extract_termite_hits(x))
        # extract hits from body text
        for x in body_text:
            terms.update(self.extract_termite_hits(x))

        # add a biolink:Publication for each paper
        write_node_edge_item(fh=node_handle,
                             header=self.node_header,
                             data=[
                                 f"CORD:{paper_id}", f"{metadata['title']}",
                                 "biolink:Publication", ""
                             ])
        self.seen.add(paper_id)

        # TODO: use CURIE for terms
        for t in terms:
            if t not in self.seen:
                # add a biolink:OntologyClass node for each term
                write_node_edge_item(fh=node_handle,
                                     header=self.node_header,
                                     data=[
                                         f"{t}", f"{self.concept_name_map[t]}",
                                         "biolink:OntologyClass" if len(t) != 2
                                         else "biolink:NamedThing", ""
                                     ])
                self.seen.add(t)

            # add has_annotation edge between OntologyClass and Publication
            write_node_edge_item(fh=edge_handle,
                                 header=self.edge_header,
                                 data=[
                                     f"{t}", f"biolink:related_to",
                                     f"CORD:{paper_id}", "SIO:000255",
                                     provided_by
                                 ])
Esempio n. 13
0
    def parse_cooccurrence_record(self, node_handle: Any, edge_handle: Any,
                                  record: Dict) -> None:
        """Parse term-cooccurrences.

        Args:
            node_handle: File handle for nodes.csv.
            edge_handle: File handle for edges.csv.
            record: A dictionary corresponding to a row from a table.

        Returns:
             None.

        """
        terms = set()
        paper_id = record['document_id']
        if not pd.isna(record['entity_uris']):
            terms.update(record['entity_uris'].split('|'))
            # add a biolink:Publication for each paper
            if paper_id not in self.seen:
                write_node_edge_item(
                    fh=node_handle,
                    header=self.node_header,
                    data=[f"CORD:{paper_id}", "", "biolink:Publication", ""])
                self.seen.add(paper_id)

            for t in terms:
                if t not in self.seen:
                    # add a biolink:OntologyClass node for each term
                    write_node_edge_item(
                        fh=node_handle,
                        header=self.node_header,
                        data=[
                            f"{t}", self.concept_name_map[t]
                            if t in self.concept_name_map else "",
                            "biolink:OntologyClass"
                            if len(t) != 2 else "biolink:NamedThing", ""
                        ])
                    self.seen.add(t)

            information_entity = uuid.uuid1()
            write_node_edge_item(fh=node_handle,
                                 header=self.node_header,
                                 data=[
                                     f"{uuid.uuid1()}", "",
                                     "biolink:InformationContentEntity", ""
                                 ])
            # add has_annotation edge between co-occurrence entity and publication
            write_node_edge_item(
                fh=edge_handle,
                header=self.edge_header,
                data=[
                    f"{information_entity}",
                    "biolink:related_to",
                    f"{record['document_id']}",
                    "SIO:000255",  # 'has annotation'
                    f"{self.source_name}"
                ])
            for t in terms:
                # add has_member edges between co-occurrence entity and each term
                write_node_edge_item(
                    fh=edge_handle,
                    header=self.edge_header,
                    data=[
                        f"{information_entity}",
                        "biolink:related_to",
                        f"{t}",
                        f"SIO:000059",  # 'has member'
                        f"{self.source_name}"
                    ])
Esempio n. 14
0
    def run(self, data_file: Optional[str] = None):
        ttd_file_name = os.path.join(self.input_base_dir,
                                     "P1-01-TTD_target_download.txt")
        ttd_data = self.parse_ttd_file(ttd_file_name)
        gene_node_type = "biolink:Protein"
        drug_id_prefix = "TTD:"
        drug_node_type = "biolink:Drug"
        drug_gene_edge_label = "biolink:interacts_with"
        drug_gene_edge_relation = "RO:0002436"  # molecularly interacts with
        uniprot_curie_prefix = "UniProtKB:"
        self.node_header = ['id', 'name', 'category', 'TTD_ID', 'provided_by']
        self.edge_header = [
            'subject', 'edge_label', 'object', 'relation', 'provided_by',
            'target_type'
        ]

        # make name to id map for uniprot names of human proteins
        dat_gz_id_file = os.path.join(self.input_base_dir,
                                      "HUMAN_9606_idmapping.dat.gz")
        name_2_id_map = uniprot_make_name_to_id_mapping(dat_gz_id_file)

        # transform data, something like:
        with open(self.output_node_file, 'w') as node,\
                open(self.output_edge_file, 'w') as edge:

            # write headers (change default node/edge headers if necessary
            node.write("\t".join(self.node_header) + "\n")
            edge.write("\t".join(self.edge_header) + "\n")

            for target_id, data in ttd_data.items():
                # WRITE NODES

                # skip items that don't refer to UNIPRO gene targets or don't have
                # drug info
                if 'UNIPROID' not in data:
                    continue
                if 'DRUGINFO' not in data:
                    continue

                #
                # make node for gene(s)
                #
                uniproids: list = self.get_uniproids(data, name_2_id_map,
                                                     uniprot_curie_prefix)
                gene_name = self.get_gene_name(data)

                # gene - ['id', 'name', 'category', 'ttd id for this target']
                for this_id in uniproids:
                    write_node_edge_item(fh=node,
                                         header=self.node_header,
                                         data=[
                                             this_id, gene_name,
                                             gene_node_type, target_id,
                                             self.source_name
                                         ])

                # for each drug in DRUGINFO:
                for this_drug in data['DRUGINFO']:
                    this_drug_curie = drug_id_prefix + this_drug[0]
                    #
                    # make node for drug
                    #
                    write_node_edge_item(fh=node,
                                         header=self.node_header,
                                         data=[
                                             this_drug_curie, this_drug[1],
                                             drug_node_type, this_drug[0],
                                             self.source_name
                                         ])

                    #
                    # make edges for target gene ids <-> drug
                    #
                    targ_type = self.get_targ_type(data)

                    # ['subject', 'edge_label', 'object', 'relation', 'comment']
                    for this_id in uniproids:
                        write_node_edge_item(fh=edge,
                                             header=self.edge_header,
                                             data=[
                                                 this_drug_curie,
                                                 drug_gene_edge_label, this_id,
                                                 drug_gene_edge_relation,
                                                 self.source_name, targ_type
                                             ])
Esempio n. 15
0
    def run(self) -> None:
        self.node_header.append(
            "TTD_ID")  # append ttd id for drug targets and drugs
        ttd_file_name = os.path.join(self.input_base_dir,
                                     "P1-01-TTD_target_download.txt")
        ttd_data = self.parse_ttd_file(ttd_file_name)
        gene_node_type = "biolink:Protein"
        drug_node_type = "biolink:Drug"
        drug_gene_edge_label = "biolink:interacts_with"
        drug_gene_edge_relation = "RO:0002436"  # molecularly interacts with
        uniprot_curie_prefix = "UniProtKB:"

        self.edge_header = [
            'subject', 'edge_label', 'object', 'relation', 'target_type'
        ]

        # make name to id map for uniprot names of human proteins
        dat_gz_id_file = os.path.join(self.input_base_dir,
                                      "HUMAN_9606_idmapping.dat.gz")
        name_2_id_map = uniprot_make_name_to_id_mapping(dat_gz_id_file)

        # transform data, something like:
        with open(self.output_node_file, 'w') as node,\
                open(self.output_edge_file, 'w') as edge:

            # write headers (change default node/edge headers if necessary
            node.write("\t".join(self.node_header) + "\n")
            edge.write("\t".join(self.edge_header) + "\n")

            for target_id, data in ttd_data.items():
                # WRITE NODES

                # skip items that don't refer to UNIPRO gene targets or don't have
                # drug info
                if 'UNIPROID' not in data:
                    logging.info(
                        "Skipping item that doesn't refer to UNIPROT gene")
                    continue
                if 'DRUGINFO' not in data:
                    logging.info(
                        "Skipping item that doesn't have any drug info")
                    continue

                #
                # make node for gene
                #
                uniproid = self.get_uniproid(data, name_2_id_map,
                                             uniprot_curie_prefix)
                gene_name = self.get_gene_name(data)

                # gene - ['id', 'name', 'category', 'ttd id for this target']
                write_node_edge_item(
                    fh=node,
                    header=self.node_header,
                    data=[uniproid, gene_name, gene_node_type, target_id])

                # for each drug in DRUGINFO:
                for this_drug in data['DRUGINFO']:
                    #
                    # make node for drug
                    #
                    write_node_edge_item(fh=node,
                                         header=self.node_header,
                                         data=[
                                             this_drug[0], this_drug[1],
                                             drug_node_type, this_drug[0]
                                         ])

                    #
                    # make edge for target <-> drug
                    #
                    targ_type = self.get_targ_type(data)

                    # ['subject', 'edge_label', 'object', 'relation', 'comment']
                    write_node_edge_item(fh=edge,
                                         header=self.edge_header,
                                         data=[
                                             target_id, drug_gene_edge_label,
                                             uniproid, drug_gene_edge_relation,
                                             targ_type
                                         ])