def run(self): edges_path = cluster_svs.ClusterSVsStep(self.options).outpaths(final=True)["edges"] clusters = pandas.read_table(edges_path) clusters["chromx"] = clusters["chromx"].astype("string") clusters["chromy"] = clusters["chromy"].astype("string") assembled = [] utilities.ensure_dir(self.outpaths(final=False)["graphs"]) for cluster_number, cluster in clusters.groupby("cluster"): self.logger.log(cluster_number) try: cur_assembled = self.walk(cluster_number, cluster) assembled.append(cur_assembled) except IOError: print "not found", cluster_number # TODO: deal with empty list # TODO: normalize coordinates according to reference.compare_chroms() assembled = pandas.concat(assembled, ignore_index=True) assembled["x"] = assembled["x"].astype(int) assembled["y"] = assembled["y"].astype(int) print self.options.reference.chroms print assembled["chromx"].unique() print assembled["chromy"].unique() outpath = self.outpaths(final=False)["walk_assemblies"] assembled.to_csv(outpath, sep="\t", index=False)
def run(self): self.assembly_dir = self.outpaths(final=False)["assembly_dir"] utilities.ensure_dir(self.assembly_dir) # TODO: how many reads is reasonable here? max_reads = 5e6 fasta_path = self.combine_fastas(max_reads) contigs_path = self.run_assembly(fasta_path) self.align_contigs(contigs_path)
def visualize_graphs(outdir, graphs, evidence, file_label=""): try: utilities.ensure_dir(outdir) supported = 0 missing = 0 breakpoints = 0 for i, graph in enumerate(graphs): print "visualize", i, graph graph = graph.copy() for n1,n2,data in graph.edges(data=True): data["label"] = "{}/{}={:.2g};{:.2g}".format( int(data["shared"]), int(data["total"]), data["shared"]/float(data["total"]), data["p"]) if data["kind"]=="facing": data["label"] = "[{}]".format(data["label"]) data["style"] = "dashed" elif data["kind"] == "breakpoint": breakpoints += 1 elif data["kind"] == "weak": data["fontsize"] = 11 data["color"] = "gray" if "assembled" in data: data["color"] = "orange" for node in graph.nodes(): graph.node[node]["label"] = get_node_label(node) dot = networkx.nx_agraph.to_agraph(graph) if len(dot.edges())> 1000: print(" skipping") continue dot.draw("{}/temp{}{}.pdf".format(outdir, file_label, i), prog="dot") print("Supported:", supported, "Missing:", missing, "Total breakpoints:", breakpoints) except: pass
def visualize_frags(outdir, graphs, options): from rpy2.robjects import r utilities.ensure_dir(outdir) for i, graph in enumerate(graphs): r.pdf(os.path.join(outdir, "fragments.cluster_{}.pdf".format(i))) for component in networkx.connected_components(graph): subgraph = graph.subgraph(component) ends = [node for node,degree in subgraph.degree_iter() if degree==1] breakends = [node for node in list(networkx.shortest_simple_paths(subgraph, ends[0], ends[1]))[0]] # breakends = [breakend_from_label(node) for node in breakends] breakends = breakends[:-1:2] + breakends[-1:] # print ")"*100, breakends for sample, dataset in sorted(options.iter_10xdatasets()): plot_frags(breakends, options, sample, dataset) # plot_frags(breakpoints, options, sample, dataset) r["dev.off"]()
def ensure_dirs(self, to_run): utilities.ensure_dir(to_run[0].results_dir) utilities.ensure_dir(to_run[0].working_dir) utilities.ensure_dir( os.path.join(self.options.log_dir, to_run[0].__class__.__name__))
def ready_output_dir(options): utilities.ensure_dir(options.working_dir) utilities.ensure_dir(options.results_dir) utilities.ensure_dir(options.log_dir)