Example #1
0
    def test_context_set_correctly_on_network(self):
        temp_dir = tempfile.mkdtemp()
        try:
            here = os.path.dirname(__file__)
            tsvfile = os.path.join(here, 'ctd_test.tsv')
            loadplanfile = os.path.join(
                here, 'ctd-gene-disease-2019-norm-'
                'plan-collapsed.json')

            with open(loadplanfile, 'r') as f:
                loadplan = json.load(f)

            df = pd.read_csv(tsvfile, sep='\t')
            first = tsv2nicecx2.convert_pandas_to_nice_cx_with_load_plan(
                df, loadplan, name='mynetwork', description='mydesc')

            nadd = [{'n': 'hi', 'v': 'data'}]
            second = tsv2nicecx2.convert_pandas_to_nice_cx_with_load_plan(
                df, loadplan, network_attributes=nadd)
            for net in [first, second]:
                res = net.get_network_attribute('@context')
                self.assertEqual(
                    '{"ncbigene": "http://ctdbase.org/detail.go'
                    '?type=gene&acc=", "OMIM": "http://ctdbase.'
                    'org/detail.go?type=disease&acc=OMIM:", "ME'
                    'SH": "http://ctdbase.org/detail.go?type=dis'
                    'ease&acc=MESH:", "pubmed": "http://ctdbase.'
                    'org/detail.go?type=reference&acc="}', res['v'])
                self.assertEqual(len(net.edges), 49)
                self.assertEqual(len(net.nodes), 50)
                node_attr_cnt = 0
                for key, value in net.nodeAttributes.items():
                    node_attr_cnt += len(value)
                self.assertEqual(node_attr_cnt, 50)
                edge_attr_cnt = 0
                for key, value in net.edgeAttributes.items():
                    edge_attr_cnt += len(value)
                self.assertEqual(edge_attr_cnt, 147)

            self.assertEqual(len(first.networkAttributes), 3)
            self.assertEqual(len(second.networkAttributes), 2)
            self.assertEqual('data', second.get_network_attribute('hi')['v'])

        finally:
            shutil.rmtree(temp_dir)
    def _generate_CX_file(self, load_plan, network_tsv):

        with open(load_plan, 'r') as lp:
            plan = json.load(lp)

        dataframe = pd.read_csv(network_tsv,
                                dtype=str,
                                na_filter=False,
                                delimiter='\t',
                                engine='python')

        network = t2n.convert_pandas_to_nice_cx_with_load_plan(dataframe, plan)

        return network, SUCCESS
Example #3
0
def get_signor_network(pathway_id, load_plan):
    # TODO - add context (normalize?)
    # @CONTEXT is set from the load plan

    human_dataframe = get_signor_pathway_relations_df(pathway_id)

    # upcase column names
    rename = {}
    for column_name in human_dataframe.columns:
        rename[column_name] = column_name.upper()

    human_dataframe = human_dataframe.rename(columns=rename)

    network = t2n.convert_pandas_to_nice_cx_with_load_plan(human_dataframe, load_plan)

    # Fix values for "DIRECT"
    for edge_id, edge in network.get_edges():
        direct = network.get_edge_attribute_value(edge_id, "DIRECT")
        # print(direct)
        if direct:
            if direct == "t":
                network.set_edge_attribute(edge_id, "DIRECT", "YES")
            else:
                network.set_edge_attribute(edge_id, "DIRECT", "NO")

    # Set prefixes for represents based on the "DATABASE" attribute
    #
    #   Note that this is a good example of a situation that calls
    #   for custom code and does not justify an extension to the load_plan
    #   Cases of this type are too variable. Custom code is easier.
    #
    for node_id, node in network.get_nodes():
        database = network.get_node_attribute_value(node_id, "DATABASE")
        represents = node.get('r')
        if database == "UNIPROT":
            if 'uniprot:' not in represents:
                represents = "uniprot:" + represents
                node['r'] = represents
        elif database in ["SIGNOR"]:
            if 'signor:' not in represents:
                represents = "signor:" + represents
                node['r'] = represents
        # in all other cases, the identifier is already prefixed
        network.remove_node_attribute(node_id, "DATABASE")

    if len(network.edges) < 1 or  len(network.nodes) < 1:
        return None
    else:
        return network
Example #4
0
    def generate_nice_cx_from_panda_df(self, df, file_name,
                                       network_description, id_to_gene_dict):
        # replace node names with IDs before transforming Panda dataframe to Nice CX;
        # this is done because as of the moment of writing convert_pandas_to_nice_cx_with_load_plan() cannot
        # handle frames with multiple nodes with the same name; so we use unique IDs instead
        for idx, row in df.iterrows():
            row['SOURCE'] = row['NODE_ID']
            row['TARGET'] = row['NODE_ID_B']

        network = t2n.convert_pandas_to_nice_cx_with_load_plan(
            df, self._loadplan)

        # now, replace 'name' and 'represents' in network with names;
        # we only have represents for simple nodes (genes) whose represetns comply with NDExNdextcgaloaderLoader.HGNC_REGEX
        for id, node in network.get_nodes():
            node['n'] = id_to_gene_dict[node['n']]

            nodeId = node['@id']

            if network.nodeAttributes and network.nodeAttributes[nodeId]:
                nodeAttributes = network.nodeAttributes[nodeId]

                node_resolvable = False

                for attr in nodeAttributes:

                    if attr['v'] == 'gene':
                        # only simple nodes, i.e. genes can be  resolvable

                        if re.match(NDExNdextcgaloaderLoader.HGNC_REGEX,
                                    id_to_gene_dict[node['r']]):
                            node['r'] = 'hgnc.symbol:' + id_to_gene_dict[
                                node['r']]
                            node_resolvable = True

                        break

                if not node_resolvable:
                    del node['r']

        self._add_coordinates_aspect_from_pos_attributes(network)
        network.set_name(os.path.basename(file_name).replace('.txt', ''))

        self._set_network_attributes(network, network_description)

        return network
Example #5
0
def main():

    parser = argparse.ArgumentParser(description='Biogrid network loader')

    parser.add_argument('version', action='store', nargs='?')
    parser.add_argument('username', action='store', nargs='?')
    parser.add_argument('password', action='store', nargs='?')

    parser.add_argument('-s',
                        dest='server',
                        action='store',
                        help='NDEx server for the target NDEx account')

    parser.add_argument('-t',
                        dest='template_id',
                        action='store',
                        help='ID for the network to use as a graphic template')

    #parser.add_argument('-target', dest='target_network_id', action='store',
    #                    help='ID for the network to be updated')

    args = parser.parse_args()

    print(vars(args))

    version = args.version
    username = args.username
    password = args.password
    if args.server:
        server = args.server
    else:
        server = 'public.ndexbio.org'

#       print ("Usage load_biogrid.py version user_name password [server]\nFor example: 3.4.158 biogrid mypassword test.ndexbio.org\n")
#       print ("server name is optional, default is public.ndexbio.org\n")

    PROTFILE_NAME = "BIOGRID-ORGANISM-" + version + ".tab2"
    prog = re.compile("http:\/\/.*/\#\/network\/(.*)")

    with open('organism_list.txt') as orgsh:
        for org_line in orgsh:
            ro = org_line.strip().split("\t")
            organism = ro[0]
            org_str = ro[1].replace('"', '')
            common_name = ro[2]
            target_uuid = ro[3].strip() if len(ro) > 3 else None
            if target_uuid:
                target_uuid = prog.match(target_uuid).group(1) if prog.match(
                    target_uuid) else target_uuid

            print("Processing " + organism)
            #unpack the zip file for this organims
            working_file = 'BIOGRID-ORGANISM-' + organism + '-' + version + '.tab2.txt'
            os.system('unzip -o -p ' + PROTFILE_NAME + '.zip ' + working_file +
                      ' >' + working_file)

            with open(working_file) as fh:

                outFile = organism + str(os.getpid()) + ".txt"
                result = {}
                fho = open(outFile, "w")
                line_cnt = 0
                pubmed_id_idx = 8  # this is the column number in the preprocessed file for pubmed ids.
                for line in fh:
                    if line_cnt == 0:
                        #             0                                1                               2                       3
                        fho.write(
                            "Entrez Gene Interactor A\tEntrez Gene Interactor B\tOfficial Symbol Interactor A\tOfficial Symbol Interactor B\t"
                            +
                            #                   4                    5                      6                    7                    8
                            "Synonyms Interactor A\tSynonyms Interactor B\tExperimental System\tExperimental System Type\tPubmed ID\t"
                            #       9         10     11              12          13
                            +
                            "Throughput\tScore\tModification\tPhenotypes\tQualifications\tOrganism Interactor A\tOrganism Interactor B\n"
                        )
                    else:
                        r = line.split("\t")
                        #                if (r[15] == '9606' and r[16] == '9606'):  # filter on human
                        # add line to hash table
                        key = r[1] + "," + r[2] + "," + r[11] + "," + r[
                            12] + "," + r[17] + "," + r[18] + "," + r[
                                19] + "," + r[20] + "," + r[21]
                        entry = result.get(key)
                        if entry:
                            entry[pubmed_id_idx].append(r[14])
                        else:
                            entry = [
                                r[1],
                                r[2],
                                r[7],
                                r[8],
                                cvtfield(r[9]),
                                cvtfield(r[10]),
                                cvtfield(r[11]),
                                cvtfield(r[12]),
                                [r[14]],  # pubmed_ids
                                cvtfield(r[17]),
                                cvtfield(r[18]),
                                cvtfield(r[19]),
                                cvtfield(r[20]),
                                cvtfield(r[21]),
                                r[15],
                                r[16]
                            ]
                            result[key] = entry

                    line_cnt += 1
            fh.close()

            # write the hash table out
            for key, value in result.items():
                value[pubmed_id_idx] = '|'.join(value[pubmed_id_idx])
                fho.write('\t'.join(value) + "\n")
            fho.close()

            print(
                str(datetime.now()) + " - preprocess finished. newfile has " +
                str(len(result)) + " lines.\n")
            sys.stdout.flush()
            result = None
            path_to_load_plan = 'human_plan.json'
            load_plan = None
            with open(path_to_load_plan, 'r') as lp:
                load_plan = json.load(lp)

                dataframe = pd.read_csv(outFile,
                                        dtype=str,
                                        na_filter=False,
                                        delimiter='\t',
                                        engine='python')

                network = t2n.convert_pandas_to_nice_cx_with_load_plan(
                    dataframe, load_plan)

            lp.close()

            print(str(datetime.now()) + " - network created in memory.\n")
            sys.stdout.flush()

            # post processing.

            network.set_name("BioGRID: Protein-Protein Interactions (" +
                             common_name + ")")
            network.set_network_attribute(
                "description",
                """Proteins are normalized to official gene symbols and NCBI gene identifiers while alternative entity names and identifiers are provided in 
the alias field. Edges with identical properties (except citations) are collapsed to simplify visualization and citations displayed as a 
list of PMIDs. This network is updated periodically with the latest data available on the  <a href=\"https://thebiogrid.org/\">BioGRID</a>.<p><p>
 <b>Edge legend</b><br>
Solid line: High Throughput experiment<br>
Dashed line: Low Throughput experiment<br>
Blue line: physical interaction<br>
Green line: genetic interaction""")

            network.set_network_attribute(
                "reference",
                "Chatr-Aryamontri A et al. <b>The BioGRID interaction database: 2017 update.</b><br>"
                +
                'Nucleic Acids Res. 2016 Dec 14;2017(1)<br><a href="http://doi.org/10.1093/nar/gkw1102">doi:10.1093/nar/gkw1102</a>'
            )

            network.set_network_attribute("version", version)
            network.set_network_attribute("organism", org_str)
            network.set_network_attribute("networkType",
                                          "Protein-Protein Interaction")

            if args.template_id:
                network.apply_template(username=username,
                                       password=password,
                                       server=server,
                                       uuid=args.template_id)

            if target_uuid:
                print(
                    str(datetime.now()) + " - Updating network " +
                    target_uuid + "...\n")
                sys.stdout.flush()
                network.update_to(target_uuid, server, username, password)
            else:
                print(
                    str(datetime.now()) + " - Creating new network in NDEx\n")
                sys.stdout.flush()
                network.upload_to(server, username, password)

            print(str(datetime.now()) + " - Cleaning up working files...\n")
            os.remove(outFile)
            os.remove(working_file)
            print("Finished processing " + organism + "\n")

    print("Done.\n")
Example #6
0
def get_full_signor_network(load_plan, species):
    url = "https://signor.uniroma2.it/getData.php?organism=" + species # Human 9606 # mouse 10090 - Rat 10116

    df = get_full_signor_pathway_relations_df(species)

    # filter dataframe to remove rows that are not human
    human_dataframe = df[(df["entitya"] != "") & (df["entityb"] != "") & (df["ida"] != "") & (df["idb"] != "")]

    # upcase column names
    rename = {}
    for column_name in human_dataframe.columns:
        rename[column_name] = column_name.upper()

    human_dataframe = human_dataframe.rename(columns=rename)

    network = t2n.convert_pandas_to_nice_cx_with_load_plan(human_dataframe, load_plan)

    # Fix values for "DIRECT"
    for edge_id, edge in network.get_edges():
        direct = network.get_edge_attribute_value(edge_id, "DIRECT")
        # print(direct)
        if direct:
            if direct == "t":
                network.set_edge_attribute(edge, "DIRECT", "YES")
            else:
                network.set_edge_attribute(edge, "DIRECT", "NO")

    # Set prefixes for represents based on the "DATABASE" attribute
    #
    #   Note that this is a good example of a situation that calls
    #   for custom code and does not justify an extension to the load_plan
    #   Cases of this type are too variable. Custom code is easier.
    #
    for node_id, node in network.get_nodes():
        database = network.get_node_attribute_value(node_id, "DATABASE")
        represents = node.get('r')
        if database == "UNIPROT":
            represents = "uniprot:" + represents
            node['r'] = represents
        if database == "SIGNOR":
            represents = "signor:" + represents
            node['r'] = represents
        # in all other cases, the identifier is already prefixed
        network.remove_node_attribute(node_id, "DATABASE")

    template_network = ndex2.create_nice_cx_from_server(server=my_server,
                                                        uuid=cytoscape_visual_properties_template_id,
                                                        username=my_username, password=my_password)

    network.set_network_attribute("labels", template_network.get_network_attribute('labels'))
    network.set_network_attribute("author", template_network.get_network_attribute('author'))

    full_desc = ('This network contains all the ' +
                 species_mapping.get(species) +
                 ' interactions currently available in SIGNOR' +
                 template_network.get_network_attribute('description')['v'])

    network.set_network_attribute('description', full_desc)

    network.set_network_attribute("version", f"{datetime.now():%d-%b-%Y}")  # "0.0.1")

    network.set_network_attribute('rightsHolder', template_network.get_network_attribute('rightsHolder')['v'])
    network.set_network_attribute('rights', template_network.get_network_attribute('rights')['v'])
    network.set_network_attribute("reference",
                                  template_network.get_network_attribute('reference')['v'])

    return network
Example #7
0
def main():

    parser = argparse.ArgumentParser(description='STRING link network loader')

    parser.add_argument('version', action='store', nargs='?')
    parser.add_argument('username', action='store', nargs='?')
    parser.add_argument('password', action='store', nargs='?')

    parser.add_argument('-s', dest='server', action='store', help='NDEx server for the target NDEx account')

    parser.add_argument('-t', dest='template_id', action='store',
                        help='ID for the network to use as a graphic template')

    parser.add_argument('-t2', dest='template_id2', action='store',
                        help='ID for the network to use as a graphic template for the high confidence network')

    parser.add_argument('-target', dest='target_network_id', action='store',
                        help='ID for the network to be updated')

    args = parser.parse_args()

    print(vars(args))

    version = args.version
    username = args.username
    password = args.password
    if args.server:
            server = args.server
    else:
            server = 'public.ndexbio.org'

 #       print ("Usage load_biogrid.py version user_name password [server]\nFor example: 3.4.158 biogrid mypassword test.ndexbio.org\n")
 #       print ("server name is optional, default is public.ndexbio.org\n")

 #   gene_ids = set()

  #  outFile = 'links-59975.txt'

  #  path_to_load_plan = 'human_links_plan.json'
  #  load_plan = None
  #  with open(path_to_load_plan, 'r') as lp:
  #      load_plan = json.load(lp)

  #  print(str(datetime.now()) + " - reading file into panda data frame.\n")

   # dataframe = pd.read_csv(outFile,
   #                         dtype=str,
   #                         na_filter=False,
   #                         delimiter='\t',
   #                         engine='python')

   # print(str(datetime.now()) + " - done reading.\n")

   # network = t2n.convert_pandas_to_nice_cx_with_load_plan(dataframe, load_plan)

   # print(str(datetime.now()) + " in memory cx created from panda dataframe.\n")

    #build the node name table first
    protein_table = {}
    with open ('9606.psicquic-mitab_2.5.v' + version +'.txt') as fh:
        cnt = 0
        for line in fh:
            col = line.split('\t')
            cnt+=1
            if cnt % 1000 == 0:
                print('processing line ' + str(cnt))
            process_id(col[0], col[4], protein_table)
            process_id(col[1], col[5], protein_table)
        fh.close()

    print ("Protein id table has " + str(len(protein_table)) + " records.")

            # filter: only keep records for human
    edge_table = {}
    outFile = "links-" + str(os.getpid()) + ".txt"
    outFile2 = "links2-" + str(os.getpid()) + ".txt"

    #===========================
    # GET LINE COUNT FROM FILE
    #===========================
    with open('9606.protein.links.v' + version + '.txt') as f:
        for i, l in enumerate(f):
            pass
    file_line_count = i + 1


    with open('9606.protein.links.v' + version + '.txt') as fh:

        fho = open(outFile, "w")
        fho2 = open (outFile2, "w")
        line_cnt = 0
        for line in fh:
            if line_cnt == 0:
                #             0             1                   2       3       4                   5
                fho.write("protein1\tprotein2\tname1\tname2\tscore\n")
                fho2.write("protein1\tprotein2\tname1\tname2\tscore\n")
            else:
                r = re.split("\s+", line.strip())
                node_a = r[0].split('.')[1]
                node_b = r[1].split('.')[1]
                score = int(r[2])
                tmp_key = node_a +"-" +node_b
                if tmp_key not in edge_table:
                    rev_tmp_key = node_b + '-' + node_a
                    rev_score = edge_table.get(rev_tmp_key)
                    if rev_score is None:
                        r_a = protein_table.get(node_a)
                        r_b = protein_table.get(node_b)
                        fho.write(r[0] + '\t' + r[1] + '\t' + (r_a['n'] if r_a is not None else node_a) + '\t'
                                  + (r_b['n'] if r_b is not None else node_b) + '\t' + r[2] +'\n')
                        if score >700:
                            fho2.write(r[0] + '\t' + r[1] + '\t' + (r_a['n'] if r_a is not None else node_a) + '\t'
                                      + (r_b['n'] if r_b is not None else node_b) + '\t' + r[2] + '\n')
                        edge_table[tmp_key] = r[2]
                    else:
                        if rev_score != r[2]:
                            print ("duplicate " + line + " with different reverse score:" + rev_score)
                else:
                    if edge_table[tmp_key] != r[2]:
                        print("duplicate line " + line +' with different score ' + edge_table[tmp_key])
            line_cnt += 1
            if line_cnt % 100000 == 0:
                print('processing line %s of %s' % (line_cnt, file_line_count))
        fho.close()
        fh.close()
        fho2.close()

    path_to_load_plan = 'human_links_plan.json'
    load_plan = None
    with open(path_to_load_plan, 'r') as lp:
        load_plan = json.load(lp)

    print(str(datetime.now()) + " - reading file into panda data frame.\n")

    dataframe = pd.read_csv(outFile,
                            dtype=str,
                            na_filter=False,
                            delimiter='\t',
                            engine='python')

    print(str(datetime.now()) + " - done reading.\n")

    network = t2n.convert_pandas_to_nice_cx_with_load_plan(dataframe, load_plan)

    print(str(datetime.now()) + " in memory cx created from panda dataframe.\n")

    # post processing.

    network.set_name( "STRING-Human Protein Links")
    network.set_network_attribute("description",
    """This network contains human protein links with combined scores. All duplicate 
interactions were removed thus reducing the total number of interactions by 50%. 
Edge color was mapped to the Score value using a gradient from light grey (low Score) to black (high Score).
    """)


    network.set_network_attribute("version", version )
    network.set_network_attribute("organism", "Human, 9606, H**o sapiens" )
    network.set_network_attribute("networkType", "Protein-Protein Interaction")
    network.set_network_attribute("reference",
                                  "Szklarczyk D, Morris JH, Cook H, Kuhn M, Wyder S, Simonovic M, Santos A, Doncheva NT, Roth A, Bork P, Jensen LJ, von Mering C." +
                                  '<b>The STRING database in 2017: quality-controlled protein-protein association networks, made broadly accessible.</b>' +
                                  'Nucleic Acids Res. 2017 Jan; 45:D362-68. <a href="https://doi.org/10.1093/nar/gkw937">DOI:10.1093/nar/gkw937</a>')
    if args.template_id :
        network.apply_template(username=username, password=password, server=server,
                           uuid=args.template_id)
    if args.target_network_id:
        network.update_to(args.target_network_id, server, username, password)
        print(str(datetime.now()) + " network updated.\n")
    else:
        network.upload_to(server, username, password)
        print(str(datetime.now()) + " network created on server.\n")

    os.remove(outFile)

    # high confi.
    dataframe = pd.read_csv(outFile2,
                            dtype=str,
                            na_filter=False,
                            delimiter='\t',
                            engine='python')

    print(str(datetime.now()) + " - done reading.\n")

    network = t2n.convert_pandas_to_nice_cx_with_load_plan(dataframe, load_plan)

    print(str(datetime.now()) + " in memory cx created from panda dataframe.\n")

    # post processing.

    network.set_name("STRING-Human Protein Links-High Confidence (Score>0.7)")
    network.set_network_attribute("description",
                                  """This network contains high confidence human protein links. All interactions with Score lower 
than 0.7 were filtered out. All duplicate interactions were also removed. Edge color was mapped to the Score value using 
a gradient from dark grey (lower Score) to black (higher Score).
                                  """)

    network.set_network_attribute("version", version)
    network.set_network_attribute("organism", "Human, 9606, H**o sapiens")
    network.set_network_attribute("networkType", "Protein-Protein Interaction")
    network.set_network_attribute("reference", "Szklarczyk D, Morris JH, Cook H, Kuhn M, Wyder S, Simonovic M, Santos A, Doncheva NT, Roth A, Bork P, Jensen LJ, von Mering C." +
            '<b>The STRING database in 2017: quality-controlled protein-protein association networks, made broadly accessible.</b>' +
             'Nucleic Acids Res. 2017 Jan; 45:D362-68. <a href="https://doi.org/10.1093/nar/gkw937">DOI:10.1093/nar/gkw937</a>')

    if args.template_id:
        network.apply_template(username=username, password=password, server=server,
                               uuid=args.template_id)
    if args.target_network_id:
        network.update_to(args.target_network_id, server, username, password)
        print(str(datetime.now()) + " network updated.\n")
    else:
        network.upload_to(server, username, password)
        print(str(datetime.now()) + " network created on server.\n")

    os.remove(outFile2)
Example #8
0
def run_loading(params):
    #==============================
    # LOAD TSV FILE INTO DATAFRAME
    #==============================
    if params.get('tsv_file') is not None:
        with open(params.get('tsv_file'), 'r', encoding='utf-8', errors='ignore') as tsvfile:
            if params.get('header'):
                header = params.get('header').split(',')
            else:
                header = [h.strip() for h in tsvfile.readline().split(params.get('delimiter'))]

            df = pd.read_csv(tsvfile, delimiter=params.get('delimiter'), na_filter=False, engine='python', names=header,
                             dtype = str, error_bad_lines=False, comment='#')
            df['gene-variant'] = df['gene'] + '-' + df['variant']
    else:
        raise Exception('Please provide a tsv file name')


    #=====================
    # LOAD TSV LOAD PLAN
    #=====================
    if params.get('load_plan') is not None:
        try:
            load_plan_name = params.get('load_plan')
            load_plan = params.get('all_load_plans').get(load_plan_name)
        except jsonschema.ValidationError as e1:
            print("Failed to parse the loading plan: " + e1.message)
            print('at path: ' + str(e1.absolute_path))
            print("in block: ")
            print(e1.instance)
    else:
        raise Exception('Please provide a load plan')


    #====================
    # UPPERCASE COLUMNS
    #====================
    rename = {}
    for column_name in df.columns:
        rename[column_name] = column_name.upper()

    network = t2n.convert_pandas_to_nice_cx_with_load_plan(df, load_plan)

    if params.get('template_id') is not None:
        network.apply_template(username=params.get('username'), password=params.get('password'), server=params.get('server'), uuid=params.get('template_id'))
    elif params.get('update_uuid') is not None:
        network.apply_template(username=params.get('username'), password=params.get('password'), server=params.get('server'), uuid=params.get('update_uuid'))

    #===============================
    # UPDATE NETWORK OR CREATE NEW
    #===============================
    if params.get('update_uuid') is not None:
        network_properties = get_network_properties(params)

        for k, v in network_properties.items():
            if k.upper() == 'NAME':
                # ===================
                # SET NETWORK NAME
                # ===================
                if params.get('net_name') is not None:
                    network.set_name(params.get('net_name'))
                else:
                    network.set_name(v)
            elif k.upper() == 'DESCRIPTION':
                # ==========================
                # SET NETWORK DESCRIPTION
                # ==========================
                if params.get('net_description') is not None:
                    network.set_network_attribute('description', params.get('net_description'))
                else:
                    network.set_network_attribute(name='description', values=v)
            elif k.upper() == 'VERSION':
                network.set_network_attribute(name='version', values=datetime.datetime.now().strftime("%Y-%m-%d"))
            else:
                network.set_network_attribute(name=k, values=v)

        message = network.update_to(params.get('update_uuid'), params.get('server'), params.get('username'),
                                    params.get('password'))
    else:
        # ===================
        # SET NETWORK NAME
        # ===================
        if params.get('net_name') is not None:
            network.set_name(params.get('net_name'))
        else:
            network.set_name(path.splitext(path.basename(params.get('tsv_file')))[0])

        # ==========================
        # SET NETWORK DESCRIPTION
        # ==========================
        if params.get('net_description') is not None and len(params.get('net_description').replace('"', '')) < 1:
            network.set_network_attribute('description', params.get('net_description'))

        message = network.upload_to(params.get('server'), params.get('username'), params.get('password'))

    network.print_summary()
Example #9
0
    def _using_panda_generate_nice_CX(self,
                                      biogrid_file_path,
                                      organism_entry,
                                      template_network,
                                      type='organism'):

        tsv_file_path = self._generate_TSV_from_biogrid_organism_file(biogrid_file_path) if type == 'organism' else \
            self._generate_TSV_from_biogrid_chemicals_file(biogrid_file_path)

        cx_file_path, cx_file_name = self._get_CX_file_path_and_name(
            biogrid_file_path, organism_entry, type)
        print('\n{} - started generating {}...'.format(
            str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")), cx_file_name))

        load_plan = self._organism_load_plan if type == 'organism' else self._chem_load_plan

        with open(load_plan, 'r') as lp:
            plan = json.load(lp)

        dataframe = pd.read_csv(tsv_file_path,
                                dtype=str,
                                na_filter=False,
                                delimiter='\t',
                                engine='python')

        network = t2n.convert_pandas_to_nice_cx_with_load_plan(dataframe, plan)

        organism = organism_entry[1]

        if type == 'organism':
            network_name = "BioGRID: Protein-Protein Interactions (" + organism_entry[
                2] + ")"
            networkType = ['interactome', 'ppi']
        else:
            network_name = "BioGRID: Protein-Chemical Interactions (" + organism_entry[
                2] + ")"
            networkType = ['proteinassociation', 'compoundassociation']

        network.set_name(network_name)

        network.set_network_attribute(
            "description",
            template_network.get_network_attribute('description')['v'])

        network.set_network_attribute(
            "reference",
            template_network.get_network_attribute('reference')['v'])
        network.set_network_attribute("version", self._biogrid_version)
        network.set_network_attribute("organism", organism_entry[1])
        network.set_network_attribute("networkType", networkType,
                                      'list_of_string')
        network.set_network_attribute(
            "__iconurl", "https://home.ndexbio.org/img/biogrid_logo.jpg")

        network.apply_style_from_network(template_network)

        self._network = network

        #with open(cx_file_path, 'w') as f:
        #    json.dump(network.to_cx(), f, indent=4)

        # note, CX file is in memory, but it is not written to file yet
        print('{} - finished generating {}'.format(
            str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")), cx_file_name))

        # return path where to write CX file abd network name
        return cx_file_path, network_name
Example #10
0
def main():

    parser = argparse.ArgumentParser(description='Biogrid network loader')

    parser.add_argument('version', action='store', nargs='?')
    parser.add_argument('username', action='store', nargs='?')
    parser.add_argument('password', action='store', nargs='?')

    parser.add_argument('-s',
                        dest='server',
                        action='store',
                        help='NDEx server for the target NDEx account')

    parser.add_argument('-t',
                        dest='template_id',
                        action='store',
                        help='ID for the network to use as a graphic template')

    parser.add_argument('-target',
                        dest='target_network_id',
                        action='store',
                        help='ID for the network to be updated')

    args = parser.parse_args()

    print(vars(args))

    version = args.version
    username = args.username
    password = args.password
    if args.server:
        server = args.server
    else:
        server = 'public.ndexbio.org'

#       print ("Usage load_biogrid.py version user_name password [server]\nFor example: 3.4.158 biogrid mypassword test.ndexbio.org\n")
#       print ("server name is optional, default is public.ndexbio.org\n")

#   gene_ids = set()

# filter: only keep records for human
    with open('BIOGRID-CHEMICALS-' + version + '.chemtab.txt') as fh:

        outFile = "chem-" + str(os.getpid()) + ".txt"
        result = {}
        fho = open(outFile, "w")
        line_cnt = 0
        for line in fh:
            if line_cnt == 0:
                #             0             1                   2       3       4                   5
                fho.write(
                    "Entrez Gene ID\tOfficial Symbol\tSynonyms\tAction\tInteraction Type\tPubmed ID\t"
                    #       6           7                   8                   9               10
                    +
                    "Chemical Name\tChemical Synonyms\tChemical Source ID\tChemical Type\n"
                )
            else:
                r = line.split("\t")
                if (r[6] == '9606'):
                    # add line to hash table
                    key = r[1] + "," + r[13]
                    #                  if r[2] not in gene_ids:
                    #                      gene_ids.add(r[2])
                    entry = result.get(key)
                    if entry:
                        entry[5].append(r[11])
                    else:

                        chem_synon = "" if r[15] == '-' else r[15]
                        cas = "" if r[22] == '-' else "cas:" + r[22]
                        chem_alias = cas
                        if chem_alias:
                            if chem_synon:
                                chem_alias += "|" + chem_synon
                        else:
                            chem_alias = chem_synon

                        entry = [
                            r[2], r[4], "" if r[5] == '-' else r[5], r[8],
                            r[9], [r[11]], r[14], chem_alias, r[18], r[20]
                        ]
                        result[key] = entry

            line_cnt += 1
        fh.close()

        #write the hash table out
        for key, value in result.items():
            value[5] = '|'.join(value[5])
            fho.write('\t'.join(value) + "\n")
        fho.close()

    path_to_load_plan = 'chem_plan.json'
    load_plan = None
    with open(path_to_load_plan, 'r') as lp:
        load_plan = json.load(lp)

    dataframe = pd.read_csv(outFile,
                            dtype=str,
                            na_filter=False,
                            delimiter='\t',
                            engine='python')

    network = t2n.convert_pandas_to_nice_cx_with_load_plan(
        dataframe, load_plan)

    # post processing.

    network.set_name("BioGRID: Protein-Chemical Interactions (H. sapiens)")
    network.set_network_attribute(
        "description",
        """This network contains human protein-chemical interactions. Proteins are normalized to 
    official gene symbols and NCBI gene identifiers while alternative entity names and identifiers are 
    provided in the alias field. Edges with identical properties (except citations) are collapsed 
    to simplify visualization and citations displayed as a list of PMIDs. This network is updated 
    periodically with the latest data available on the <a href=\"https://thebiogrid.org/\">BioGRID</a>.<p><p>
    <b>Node legend</b><br>
    White oval: protein<br>
    Yellow diamond: biologic<br>
    Orange rectangle: small molecule
    """)

    network.set_network_attribute(
        "reference",
        "Chatr-Aryamontri A et al. <b>The BioGRID interaction database: 2017 update.</b><br>"
        +
        'Nucleic Acids Res. 2016 Dec 14;2017(1)<br><a href="http://doi.org/10.1093/nar/gkw1102">doi:10.1093/nar/gkw1102</a>'
    )

    network.set_network_attribute("version", version)
    network.set_network_attribute("organism", "Human, 9606, H**o sapiens")
    network.set_network_attribute("networkType",
                                  "Protein-Chemical Interaction")
    if args.template_id:
        network.apply_template(username=username,
                               password=password,
                               server=server,
                               uuid=args.template_id)
    if args.target_network_id:
        network.update_to(args.target_network_id, server, username, password)
    else:
        network.upload_to(server, username, password)

    os.remove(outFile)
Example #11
0
        print(e1.instance)
else:
    raise Exception('Please provide a load plan')

#====================
# UPPERCASE COLUMNS
#====================
rename = {}
for column_name in df.columns:
    rename[column_name] = column_name.upper()

#df = df.rename(columns=rename)

#print(df.head())

network = t2n.convert_pandas_to_nice_cx_with_load_plan(df, load_plan)

if args.template_id is not None:
    network.apply_template(username=my_username,
                           password=my_password,
                           server=my_server,
                           uuid=args.template_id)


#==============
# APPLY LAYOUT
#==============
def cartesian(G, node_id_look_up):
    #print('POS')
    #print(G.pos)
Example #12
0
    def process_file(self,
                     file_name,
                     load_plan_path,
                     name,
                     style_template=None,
                     custom_header=None,
                     delimiter='\t'):
        # ==============================
        # LOAD TSV FILE INTO DATAFRAME
        # ==============================
        if not path.isfile(
                file_name
        ):  # If file is not in main directory try the ./data directory
            file_name = path.join('data', file_name)

        if not path.isfile(
                load_plan_path
        ):  # If file is not in main directory try the ./data directory
            load_plan_path = path.join('data', load_plan_path)

        with open(file_name, 'r', encoding='utf-8',
                  errors='ignore') as tsvfile:
            if custom_header is None:
                header = [
                    h.strip() for h in tsvfile.readline().split(delimiter)
                ]

                df = pd.read_csv(tsvfile,
                                 delimiter=delimiter,
                                 na_filter=False,
                                 engine='python',
                                 names=header,
                                 dtype=str,
                                 error_bad_lines=False,
                                 comment='#')
            else:
                if isinstance(custom_header, list):
                    df = pd.read_csv(tsvfile,
                                     delimiter=delimiter,
                                     na_filter=False,
                                     engine='python',
                                     names=custom_header,
                                     dtype=str,
                                     error_bad_lines=False,
                                     comment='#')
                else:
                    raise Exception(
                        'Custom header provided was not of type list')

        # =====================
        # LOAD TSV LOAD PLAN
        # =====================
        if load_plan_path is not None:
            try:
                with open(load_plan_path, 'r') as lp:
                    load_plan = json.load(lp)
            except jsonschema.ValidationError as e1:
                logger.exception("Failed to parse the loading plan: " +
                                 e1.message)
                logger.error('at path: ' + str(e1.absolute_path))
                logger.error("in block: ")
                logger(e1.instance)
        else:
            raise Exception('Please provide a load plan')

        # ====================
        # UPPERCASE COLUMNS
        # ====================
        rename = {}
        for column_name in df.columns:
            rename[column_name] = column_name.upper()

        network = t2n.convert_pandas_to_nice_cx_with_load_plan(df, load_plan)
        network.set_name(name)
        if style_template is not None:
            logger.debug('Applying style from network: ' + style_template)
            network.apply_template(username=self.username,
                                   password=self.password,
                                   server=self.server,
                                   uuid=style_template)

        self.network = network