def unzip(args): if not args.graph[0].endswith(".gfa") and not args.graph[0].endswith( ".gfa.gz"): logging.fatal("Invalid gfa file.") return # G=nx.MultiDiGraph() G = nx.DiGraph() utils.read_gfa(args.graph[0], None, None, G, remap=False) if args.source == None and args.sink == None: unzip_graph(G, args, minunzip=args.minunzip) else: b = bubbles.Bubble(G, args.source, args.sink) unzip_bubble(G, b, minunzip=args.minunzip, idoffset=max([n for n in G.nodes() if type(n) == int]) + 1) if args.output == None: of = os.path.splitext(args.graph[0])[0] + ".unzipped.gfa" else: of = args.output + ".gfa" utils.write_gfa(G, None, outputfile=of)
def chop_cmd(args): if not args.graph[0].endswith(".gfa"): logging.fatal("Invalid gfa file.") return G=nx.DiGraph() utils.read_gfa(args.graph[0], None, None, G, remap=False) # assert(len(G.edges())==0) if args.output==None: fof=os.path.splitext(args.graph[0])[0]+".chopped.fasta" gof=os.path.splitext(args.graph[0])[0]+".chopped.gfa" else: fof=args.output+".fasta" gof=args.output+".gfa" if args.check: Gorg=G.copy() chop(G,k=args.k,extend=args.extend) logging.debug("Merging node sequence...") for node in G.nodes(): if type(node)==str: #skip start/end nodes continue G.node[node]['seq']=G.node[node]['prefix']+G.node[node]['seq']+G.node[node]['suffix'] logging.debug("Done.") logging.debug("Write overlap graph...") utils.write_gfa(G,None,outputfile=gof,remap=False) logging.debug("Done.") if args.fasta: logging.debug("Write corresponding fasta file...") with open(fof,'w') as ff: for node in G.nodes(): if type(node)==str: #skip start/end nodes continue name=">"+str(node)+"\n" seq=G.node[node]['seq'] ff.write(name) for i in range( (len(seq)/args.lw)+(len(seq) % args.lw > 0)): ff.write(seq[i*args.lw:(i+1)*args.lw]+"\n") logging.debug("Done.") if args.check: logging.debug("Validate if all k-mers from the original graph are contained in overlap graph...") import extract r="$".join([G.node[node]['seq'] for node in G]) for path in Gorg.graph['paths']: logging.debug("Check: %s"%path) s=extract.extract(Gorg,path) for i in xrange(len(s)-args.k): if r.find(s[i:i+args.k])==-1: logging.error("Flat representation does not cover all k-length substrings for %s, could not find: %s!"%(path,s[i:i+args.k])) sys.exit(1) logging.debug("Done.")
def comp_cmd(args): g = nx.DiGraph() g.graph['paths'] = [] utils.read_gfa(args.graph[0], None, None, g, targetsample=None) g = comp(g) utils.write_gfa(g, "", outputfile=args.graph[0].replace('.gfa', '.rc.gfa'), nometa=False)
def chop_cmd(args): if not args.graph[0].endswith(".gfa"): logging.fatal("Invalid gfa file.") return G=nx.DiGraph() utils.read_gfa(args.graph[0], None, None, G, remap=False) # assert(len(G.edges())==0) if args.output==None: fof=os.path.splitext(args.graph[0])[0]+".chopped.fasta" gof=os.path.splitext(args.graph[0])[0]+".chopped.gfa" else: fof=args.output+".fasta" gof=args.output+".gfa" if args.check: Gorg=G.copy() chop(G,k=args.k,extend=args.extend) logging.debug("Merging node sequence...") for node in G.nodes(): G.node[node]['seq']=G.node[node]['prefix']+G.node[node]['seq']+G.node[node]['suffix'] logging.debug("Done.") logging.debug("Write overlap graph...") utils.write_gfa(G,None,outputfile=gof,remap=False) logging.debug("Done.") if args.fasta: logging.debug("Write corresponding fasta file...") with open(fof,'w') as ff: for node in G.nodes(): name=">"+str(node)+"\n" seq=G.node[node]['seq'] ff.write(name) for i in range( (len(seq)/args.lw)+(len(seq) % args.lw > 0)): ff.write(seq[i*args.lw:(i+1)*args.lw]+"\n") logging.debug("Done.") if args.check: logging.debug("Validate if all k-mers from the original graph are contained in overlap graph...") import extract r="$".join([G.node[node]['seq'] for node in G]) for path in Gorg.graph['paths']: logging.debug("Check: %s"%path) s=extract.extract(Gorg,path) for i in xrange(len(s)-args.k): if r.find(s[i:i+args.k])==-1: logging.error("Flat representation does not cover all k-length substrings for %s, could not find: %s!"%(path,s[i:i+args.k])) sys.exit(1) logging.debug("Done.")
def unzip(args): if not args.graph[0].endswith(".gfa"): logging.fatal("Invalid gfa file.") return G=nx.MultiDiGraph() utils.read_gfa(args.graph[0], None, None, G, remap=False) if args.source==None and args.sink==None: unzip_graph(G,minunzip=args.minunzip) else: b=bubbles.Bubble(G,args.source,args.sink) unzip_bubble(G,b,minunzip=args.minunzip,idoffset=max([n for n in G.nodes() if type(n)==int])+1) if args.output==None: of=os.path.splitext(args.graph[0])[0]+".unzipped.gfa" else: of=args.output+".gfa" utils.write_gfa(G,None,outputfile=of)
def convert(args): for graph in args.graphs: if args.nocycles: g=nx.DiGraph() else: g=nx.MultiDiGraph() g.graph['paths']=[] g.graph['path2id']=dict() g.graph['id2path']=dict() if graph.endswith(".gfa"): #gfa to gml/gfa utils.read_gfa(graph,None,None,g,minsamples=args.minsamples, maxsamples=args.maxsamples, targetsample=args.targetsample) if args.type=="gfa": fn=graph.replace(".gfa",".rewrite.gfa") graph=utils.write_gfa(g,"", outputfile=fn) logging.info("gfa graph written to: %s"%fn) elif args.type=="gml": fn=utils.write_gml(g,"", hwm=args.hwm, outputfile=graph.replace(".gfa",""), partition=args.partition) logging.info("gml graph written to: %s"%fn) elif args.type=="maf": logging.info("Converting graph to maf..") graph2maf(g,graph.replace(".gfa",".maf")) elif graph.endswith(".fa") or graph.endswith(".fasta") or graph.endswith(".fna"): #assume fasta to gfa i=0 for name,seq in utils.fasta_reader(graph): g.graph['paths'].append(os.path.basename(graph)) g.graph['path2id'][os.path.basename(graph)]=0 g.graph['id2path'][0]=os.path.basename(graph) g.add_node(i,offsets={0:0},seq=seq) i+=1 filename=graph[:graph.rfind(".")]+".gfa" utils.write_gfa(g,"", outputfile=filename) logging.info("gfa graph written to: %s"%filename) else: logging.fatal("Unknown filetype, need gfa or fasta extension.") return
def comp_cmd(args): g=nx.DiGraph() g.graph['paths']=[] utils.read_gfa(args.graph[0],None,None,g,targetsample=None) g=comp(g) utils.write_gfa(g,"",outputfile=args.graph[0].replace('.gfa','.rc.gfa'), nometa=False)
def convert(args): for graph in args.graphs: if args.nocycles: g = nx.DiGraph() else: g = nx.MultiDiGraph() g.graph['paths'] = [] g.graph['path2id'] = dict() g.graph['id2path'] = dict() if graph.endswith(".gfa"): #gfa to gml/gfa utils.read_gfa(graph, None, None, g, minsamples=args.minsamples, maxsamples=args.maxsamples, targetsample=args.targetsample, remap=False) if args.type == "gfa": fn = graph.replace(".gfa", ".rewrite.gfa") graph = utils.write_gfa(g, "", outputfile=fn) logging.info("gfa graph written to: %s" % fn) elif args.type == "gml": fn = utils.write_gml(g, "", hwm=args.hwm, outputfile=graph.replace(".gfa", ""), partition=args.partition) logging.info("gml graph written to: %s" % fn) elif args.type == "maf": logging.info("Converting graph to maf..") graph2maf(g, graph.replace(".gfa", ".maf")) elif graph.endswith( ".maf"): #multiple alignment format, convert to graph g = maf2graph(graph) filename = graph[:graph.rfind(".")] + ".gml" utils.write_gml(g, "", outputfile=filename) filename = graph[:graph.rfind(".")] + ".gfa" utils.write_gfa(g, "", outputfile=filename) logging.debug("gfa graph written to: %s" % filename) elif graph.endswith(".fa") or graph.endswith( ".fasta") or graph.endswith(".fna"): #assume fasta to gfa if args.aligned: seqs = [] names = [] for name, seq in utils.fasta_reader(graph, keepdash=True): seqs.append(seq) names.append(name) g, nid = utils.aln2graph(seqs, names) else: i = 0 start = uuid.uuid4().hex end = uuid.uuid4().hex g.graph['startnodes'] = [start] g.graph['endnodes'] = [end] g.add_node(start, offsets=dict()) g.add_node(end, offsets=dict()) for i, v in enumerate(utils.fasta_reader(graph)): name, seq = v g.graph['paths'].append(name) g.graph['path2id'][name] = i g.graph['id2path'][i] = name g.node[start]['offsets'][i] = 0 g.node[end]['offsets'][i] = len(seq) g.add_node(i, offsets={i: 0}, seq=seq) g.add_edge(start, i, paths=set([i])) g.add_edge(i, end, paths=set([i])) filename = graph[:graph.rfind(".")] + ".gfa" utils.write_gfa(g, "", outputfile=filename) logging.debug("gfa graph written to: %s" % filename) else: logging.fatal("Unknown filetype, need gfa or fasta extension.") return