def run(self):
        path_file = self.path_file
        if isinstance(path_file, basestring):
            t = BioPaxFile()
            if path_file.endswith(".gz"):
                handle = gzip.GzipFile(path_file)
                t.load(handle)
            else:
                t.load(path_file)
            path_file = t
        paths = []
        for key,value in path_file.pathways().iteritems():
            add = False
            if self.pathways is None or value in self.pathways:
                add = True
            elif key in self.pathways:
                add = True
            else:
                a_name = re_namesplit.split(key)[-1]
                if a_name in self.pathways:
                    add = True
            if add:
                paths.append(key)

        for subnet in path_file.toNet(paths):                
            gr = networkx.MultiDiGraph()

            for k, v in subnet.meta.items():
                if k != 'type':
                    gr.graph[k] = v
            subnet.to_graph(gr)

            name = re_namesplit.split(gr.graph['url'])[-1]
            if self.outdir is not None:
                handle = open(os.path.join(self.outdir, name + ".xgmml"), "w")
            elif self.singlepath is not None:
                handle = open(self.singlepath, "w")
            else:
                handle = sys.stdout
            
            convert.write_xgmml(gr, handle)
            
            if self.singlepath is not None:
                handle.close()
                return
Beispiel #2
0
    def run(self):
        path_file = self.path_file
        if isinstance(path_file, basestring):
            t = BioPaxFile()
            if path_file.endswith(".gz"):
                handle = gzip.GzipFile(path_file)
                t.load(handle)
            else:
                t.load(path_file)
            path_file = t
        paths = []
        for key, value in path_file.pathways().iteritems():
            add = False
            if self.pathways is None or value in self.pathways:
                add = True
            elif key in self.pathways:
                add = True
            else:
                a_name = re_namesplit.split(key)[-1]
                if a_name in self.pathways:
                    add = True
            if add:
                paths.append(key)

        for subnet in path_file.toNet(paths):
            gr = networkx.MultiDiGraph()

            for k, v in subnet.meta.items():
                if k != 'type':
                    gr.graph[k] = v
            subnet.to_graph(gr)

            name = re_namesplit.split(gr.graph['url'])[-1]
            if self.outdir is not None:
                handle = open(os.path.join(self.outdir, name + ".xgmml"), "w")
            elif self.singlepath is not None:
                handle = open(self.singlepath, "w")
            else:
                handle = sys.stdout

            convert.write_xgmml(gr, handle)

            if self.singlepath is not None:
                handle.close()
                return
Beispiel #3
0
def main_build(args):
    gr = networkx.MultiDiGraph()

    paths = pathway_opener( list( (args.pathways[i],args.pathways[i+1]) for i in range(0, len(args.pathways),2) ) )

    builer = GraphBuilder(args)

    type_count = {}
    interaction_count = {}
    duplicate_edges = 0
    for cur_path in paths:
        log("Scanning: %s" % (cur_path.name))
        cur_gr = cur_path.read()
        if args.organism is None or cur_gr.graph.get('organism', args.organism) == args.organism:
            fix_gr = builer.fix_graph(cur_gr)

            for node in fix_gr.node:
                if node in gr.node:
                    if 'type' in gr.node[node] and 'type' in fix_gr.node[node] and gr.node[node]['type'] != fix_gr.node[node]['type']:
                        error("%s failure: Mismatch Node Type: %s :%s --> %s" % (cur_path.name, node, gr.node[node]['type'], fix_gr.node[node]['type'] ))
                        if args.rename_nonprotein or args.all:
                            #because 'protein' is a default node type, if we see something not protein, then change the node to match
                            if gr.node[node]['type'] == 'protein':
                                gr.node[node]['type'] = fix_gr.node[node]['type']
                else:
                    log("Merging: %s" % node)
                    gr.add_node(node, attr_dict=fix_gr.node[node])

            for src, dst, data in fix_gr.edges(data=True):
                interaction = data['interaction']
                src_node_type = fix_gr.node[src].get('type', None)
                dst_node_type = fix_gr.node[dst].get('type', None)

                if src in gr.node and dst in gr.node:
                    has_edge = False
                    if dst in gr.edge[src]:
                        for i in gr.edge[src][dst]:
                            if gr.edge[src][dst][i]['interaction'] == interaction:
                                has_edge = True

                    if not has_edge:
                        if not (args.remove_self or args.all) or src != dst:
                            gr.add_edge(src, dst, attr_dict=data )
                        else:
                            log("Removing self loop: %s" % (src))
                    else:
                        duplicate_edges += 1

    connect_list = networkx.connected_components(networkx.Graph(gr))
    rm_list = []
    for group in connect_list:
        if len(group) < args.min_subgraph:
            rm_list.extend(group)
    for r in rm_list:
        gr.remove_node(r)


    log("+---------------------------+")
    log("|Node Count: %15d|" % (len(gr.nodes())))
    log("|Edge Count: %15d|" % (len(gr.edges())))
    log("|Duplicate Edges: %10d|" % (duplicate_edges))
    log("|Connected Components: %5d|" % (networkx.number_connected_components(networkx.Graph(gr))))
    log("+---------------------------+")
    if args.output:
        handle = open(args.output, "w")
    else:
        handle = sys.stdout

    for n_type in type_count:
        log("Node Type %s: %d" % (n_type, type_count[n_type]))
    if args.spf:
        network_convert.write_spf(gr, handle)
    elif args.sif:
        network_convert.write_sif(gr, handle)
    else:
        network_convert.write_xgmml(gr, handle)
    handle.close()
    if gr is not None:
        if args.out_spf is not None:
            if args.out_spf == "-":
                ohandle = sys.stdout
            else:
                ohandle = open(args.out_spf, "w")
            convert.write_spf(gr, ohandle, 
                node_type_field=args.node_type_field, node_type_default=args.node_type_default, 
                edge_type_field=args.edge_type_field, edge_type_default=args.edge_type_default
            )
        if args.out_xgmml is not None:
            if args.out_xgmml == "-":
                ohandle = sys.stdout
            else:
                ohandle = open(args.out_xgmml, "w")
            convert.write_xgmml(gr, ohandle)
        if args.out_gmt:
            if args.out_gmt == "-":
                ohandle = sys.stdout
            else:
                ohandle = open(args.out_gmt, "w")

            pathmap = {}
            for node in gr.node:
                if 'pathway' in gr.node[node]:
                    for p in gr.node[node]['pathway']:
                        if p not in pathmap:
                            pathmap[p] = [node]
                        else:
                            pathmap[p].append(node)
            for path in pathmap:
        handle.close()

    if args.file_mode:
        for path in pathways:
            handle = open(path)
            gr = convert.read_gpml(handle)
            handle.close()
            if translate_table:
                re_map = {}
                for n in gr.node:
                    if 'db_xref' in gr.node[n]:
                        relabel = find_translation(gr.node[n]['db_xref'])
                        if relabel:
                            re_map[n] = relabel
                gr = nx.relabel_nodes(gr, re_map)
            convert.write_xgmml(gr, sys.stdout)            

    else:
        for path in pathways:
            print "Getting", path
            grs = []
            if args.biopax:
                pathdata_str = server.getPathwayAs(fileType="owl", pwId=path)
                pathdata_xml = base64.b64decode(pathdata_str)
                handle = open( os.path.join(args.outdir, path + ".owl"), "w" )
                handle.write(pathdata_xml)
                handle.close()
                b = biopax.BioPax()
                b.parse(pathdata_xml)
                grs = b.toNet()
            else:
        handle.close()

    if args.file_mode:
        for path in pathways:
            handle = open(path)
            gr = convert.read_gpml(handle)
            handle.close()
            if translate_table:
                re_map = {}
                for n in gr.node:
                    if 'db_xref' in gr.node[n]:
                        relabel = find_translation(gr.node[n]['db_xref'])
                        if relabel:
                            re_map[n] = relabel
                gr = nx.relabel_nodes(gr, re_map)
            convert.write_xgmml(gr, sys.stdout)

    else:
        for path in pathways:
            print "Getting", path
            grs = []
            if args.biopax:
                pathdata_str = server.getPathwayAs(fileType="owl", pwId=path)
                pathdata_xml = base64.b64decode(pathdata_str)
                handle = open(os.path.join(args.outdir, path + ".owl"), "w")
                handle.write(pathdata_xml)
                handle.close()
                b = biopax.BioPax()
                b.parse(pathdata_xml)
                grs = b.toNet()
            else: