def align_pairwise(genomes, new_align, r_root_dir, g_root_dir,dirs, run, max_size, chop_mode, mauve_exec, mtype, segtype, min_size): """Make a pairwise alignment.""" # set up directories aln_dir = r_root_dir+run+dirs['aln_segs'] map_dir = r_root_dir+run+dirs['maps'] # load inputs and process genomes seq_dir = g_root_dir ref = [Noodle(genome, seq_dir) for genome in genomes if genome['order'] == 1][0] query = [Noodle(genome, seq_dir) for genome in genomes if genome['order'] == 2][0] # align if needed print "Aligning", ref.name, "and", query.name, "...", if new_align: mauve_pw_align(ref, query, r_root_dir, g_root_dir, dirs, run, max_size, chop_mode, mauve_exec, mtype) # process data segments print "Processing segments ..", seg_file = aln_dir+ref.name+"_"+query.name+"_segs.txt" pair_data = process_segdata(seg_file, ref, query, segtype) print "OK" # map of query aligned to reference print "Mapping ...", map_file = map_dir+run+"_"+ref.name+"_vs_"+query.name+".pdf" pairwise_draw(ref, query, pair_data, map_file, 'dual', 'dual', 'm', 'fct', 'fct', idpt, fct_flags, fct_colors, min_size) print "OK\n"
def map_cst_aln(run_ref, ref_gbk, genome, scaff_gbk, segs_root, maps_root, segtype, min_size, fct_flags, fct_colors, idpt): """Generate map of construct aligned to reference.""" # set inputs and outputs g_name = genome['name'] ref_ctg_n = run_ref.name seg_file = segs_root + g_name + "/" + g_name + "_" + ref_ctg_n + "_segs.txt" map_file = maps_root + g_name + "_vs_" + ref_ctg_n + ".pdf" # start mapping try: open(scaff_gbk) except IOError: print "WARNING: No scaffold construct to map" else: try: # load segments TODO: add idp-based clumping segdata = np.loadtxt(seg_file, skiprows=1, dtype=segtype) except IOError: msg = "\nERROR: could not load segments data" run_ref.log(msg) print msg except StopIteration: msg = "\nERROR: could not make map" run_ref.log(msg) print msg else: # offset coordinates where desired try: g_offset = genome['offset'] if g_offset[0] != 0 or g_offset[1] != 0: q_len = len(load_genbank(scaff_gbk).seq) segdata = offset_q2r_coords(segdata, q_len, g_offset, segtype) # determine whether to flip the query sequence (negative offset) if g_offset[1] < 0: q_invert = True else: q_invert = False except KeyError: g_offset = (0, 0) q_invert = False # generate graphical map pairwise_draw(ref_ctg_n, g_name, ref_gbk, scaff_gbk, segdata, map_file, q_invert, g_offset, 'dual', 'dual', 'm', 'fct', 'fct', min_size, fct_flags, fct_colors, idpt)
def map_cst_aln(run_ref, ref_gbk, genome, scaff_gbk, segs_root, maps_root, segtype, min_size, fct_flags, fct_colors, idpt): """Generate map of construct aligned to reference.""" # set inputs and outputs g_name = genome['name'] ref_ctg_n = run_ref.name seg_file = segs_root+g_name+"/"+g_name+"_"+ref_ctg_n+"_segs.txt" map_file = maps_root+g_name+"_vs_"+ref_ctg_n+".pdf" # start mapping try: open(scaff_gbk) except IOError: print "WARNING: No scaffold construct to map" else: try: # load segments TODO: add idp-based clumping segdata = np.loadtxt(seg_file, skiprows=1, dtype=segtype) except IOError: msg = "\nERROR: could not load segments data" run_ref.log(msg) print msg except StopIteration: msg = "\nERROR: could not make map" run_ref.log(msg) print msg else: # offset coordinates where desired try: g_offset = genome['offset'] if g_offset[0] != 0 or g_offset[1] != 0: q_len = len(load_genbank(scaff_gbk).seq) segdata = offset_q2r_coords(segdata, q_len, g_offset, segtype) # determine whether to flip the query sequence (negative offset) if g_offset[1] < 0: q_invert = True else: q_invert = False except KeyError: g_offset = (0,0) q_invert = False # generate graphical map pairwise_draw(ref_ctg_n, g_name, ref_gbk, scaff_gbk, segdata, map_file, q_invert, g_offset, 'dual', 'dual', 'm', 'fct', 'fct', min_size, fct_flags, fct_colors, idpt)
def align_multi(genomes, new_align, r_root_dir, g_root_dir, dirs, run, max_size, chop_mode, mauve_exec, mtype, segtype, idpt, fct_flags, fct_colors, min_size): """Make a multiple alignment.""" # set up directories aln_dir = r_root_dir+run+dirs['aln_segs'] map_dir = r_root_dir+run+dirs['maps'] # load inputs, process and pair up genomes seq_dir = g_root_dir g_pairs = [] counter = 1 while counter < len(genomes): g_pairs.append(([Noodle(genome, seq_dir) for genome in genomes if genome['order'] == counter][0], [Noodle(genome, seq_dir) for genome in genomes if genome['order'] == counter+1][0])) counter +=1 # process pairs for (ref, query) in g_pairs: # align if needed print "Aligning", ref.name, "and", query.name, "...", if new_align: mauve_pw_align(ref, query, r_root_dir, g_root_dir, dirs, run, max_size, chop_mode, mauve_exec, mtype) # traverse genome pairs segdata_list = [] counter = 0 for (ref, query) in g_pairs: counter +=1 # process data segments print "Processing pair", counter, "segments ...", p_seg_file = aln_dir+ref.name+"_"+query.name+"_segs.txt" pair_data = process_segdata(p_seg_file, ref, query, segtype) segdata_list.append(pair_data) print "OK" # make a pairwise map while we're at it print "Mapping pair", counter, "...", p_map_file = map_dir+run+"_"+ref.name+"_vs_"+query.name+".pdf" pairwise_draw(ref, query, pair_data, p_map_file, 'dual', 'dual', 'm', 'fct', 'fct', idpt, fct_flags, fct_colors, min_size) print "OK" # map of query aligned to reference print "Mapping multiple alignment...", map_file = map_dir+run+".pdf" multi_draw(g_pairs, segdata_list, map_file, idpt, fct_flags, fct_colors, min_size) print "OK\n"
def align_multi(genomes, new_align, dirs, run): """Make a multiple alignment.""" # load inputs, process and pair up genomes seq_dir = dirs['seqfiles'] g_pairs = [] counter = 1 while counter < len(genomes): g_pairs.append(([Noodle(genome, seq_dir) for genome in genomes if genome['order'] == counter][0], [Noodle(genome, seq_dir) for genome in genomes if genome['order'] == counter+1][0])) counter +=1 # process pairs for (ref, query) in g_pairs: # align if needed print "Aligning", ref.name, "and", query.name, "...", if new_align: mauve_pw_align(ref, query, dirs) # traverse genome pairs segdata_list = [] counter = 0 for (ref, query) in g_pairs: counter +=1 # process data segments print "Processing pair", counter, "segments ...", p_seg_file = dirs['aln_segs']+"/"+ref.name+"_"+query.name+"_segs.txt" pair_data = process_segdata(p_seg_file, ref, query) segdata_list.append(pair_data) print "OK" # make a pairwise map while we're at it print "Mapping pair", counter, "...", p_map_file = dirs['maps']+run+"_"+ref.name+"_vs_"+query.name+".pdf" pairwise_draw(ref, query, pair_data, p_map_file, 'dual', 'dual', 'm', 'fct', 'fct') print "OK" # map of query aligned to reference print "Mapping multiple alignment...", map_file = dirs['maps']+run+".pdf" multi_draw(g_pairs, segdata_list, map_file) print "OK\n"
def map_ctg_alns(run_ref, ref_gbk, genome, ctg_segs_root, maps_root, fixed_dirs, segtype, min_size, fct_flags, fct_colors, idpt): """Generate maps of contigs aligned to reference.""" # set inputs and outputs g_name = genome['name'] ref_ctg_n = run_ref.name segs_root = ctg_segs_root+g_name+"/" ctgs_dir = fixed_dirs['gbk_contigs_dir']+g_name+"/" # list genbank files in matches directory try: dir_contents = listdir(segs_root) except OSError: msg = "\nWARNING: no matching segments" run_ref.log(msg) print msg else: for ctg_num in dir_contents: ctg_gbk = ctgs_dir+g_name+"_"+ctg_num+".gbk" seg_file = segs_root+ctg_num+"/"+ctg_num+"_"+ref_ctg_n+"_segs.txt" map_file = maps_root+g_name+"_"+ctg_num+"_vs_"+ref_ctg_n+".pdf" # start mapping try: # load segments TODO: add idp-based clumping segdata = np.loadtxt(seg_file, skiprows=1, dtype=segtype) # deactivate offsetting g_offset = (0,0) q_invert = False # generate graphical map pairwise_draw(ref_ctg_n, g_name+"_"+ctg_num, ref_gbk, ctg_gbk, segdata, map_file, q_invert, g_offset, 'dual', 'dual', 'm', 'fct', 'fct', min_size, fct_flags, fct_colors, idpt) except IOError: msg = "\nERROR: could not load segments data" run_ref.log(msg) print msg except StopIteration: msg = "\nERROR: could not make map" run_ref.log(msg) print msg
def map_ctg_alns(run_ref, ref_gbk, genome, ctg_segs_root, maps_root, fixed_dirs, segtype, min_size, fct_flags, fct_colors, idpt): """Generate maps of contigs aligned to reference.""" # set inputs and outputs g_name = genome['name'] ref_ctg_n = run_ref.name segs_root = ctg_segs_root + g_name + "/" ctgs_dir = fixed_dirs['gbk_contigs_dir'] + g_name + "/" # list genbank files in matches directory try: dir_contents = listdir(segs_root) except OSError: msg = "\nWARNING: no matching segments" run_ref.log(msg) print msg else: for ctg_num in dir_contents: ctg_gbk = ctgs_dir + g_name + "_" + ctg_num + ".gbk" seg_file = segs_root + ctg_num + "/" + ctg_num + "_" + ref_ctg_n + "_segs.txt" map_file = maps_root + g_name + "_" + ctg_num + "_vs_" + ref_ctg_n + ".pdf" # start mapping try: # load segments TODO: add idp-based clumping segdata = np.loadtxt(seg_file, skiprows=1, dtype=segtype) # deactivate offsetting g_offset = (0, 0) q_invert = False # generate graphical map pairwise_draw(ref_ctg_n, g_name + "_" + ctg_num, ref_gbk, ctg_gbk, segdata, map_file, q_invert, g_offset, 'dual', 'dual', 'm', 'fct', 'fct', min_size, fct_flags, fct_colors, idpt) except IOError: msg = "\nERROR: could not load segments data" run_ref.log(msg) print msg except StopIteration: msg = "\nERROR: could not make map" run_ref.log(msg) print msg
def map_ref_segs(run_ref, run_id, r_root_dir, run_dirs, min_size, fct_flags, fct_colors, idpt): """Generate map of reference contig with segment details. This provides a comparison of the original reference and the re-annotated version. """ # set inputs and outputs ref_n = run_ref.name run_root = r_root_dir+run_id+"/" ori_file = run_ref.file ref_maps_root = run_root+run_dirs['ref_map_dir'] ensure_dir([ref_maps_root]) gbk_file = run_root+run_dirs['ref_gbk_dir']+ref_n+"_re-annot.gbk" map_file = ref_maps_root+ref_n+"_ref.pdf" # start mapping try: # make mock segment, full-length with 100% id record = load_genbank(gbk_file) length = len(record.seq) segdata = [[1, length, 1, length, 100]] # deactivate offsetting g_offset = (0,0) q_invert = False # generate graphical map pairwise_draw(ref_n+"_ra", ref_n+"_ori", gbk_file, ori_file, segdata, map_file, q_invert, g_offset, 'dual', 'dual', 'm', 'fct', 'product', min_size, fct_flags, fct_colors, idpt) except IOError: msg = "\nERROR: could not load segments data" run_ref.log(msg) print msg except StopIteration: msg = "\nERROR: could not make map" run_ref.log(msg) print msg
def map_ref_segs(run_ref, run_id, r_root_dir, run_dirs, min_size, fct_flags, fct_colors, idpt): """Generate map of reference contig with segment details. This provides a comparison of the original reference and the re-annotated version. """ # set inputs and outputs ref_n = run_ref.name run_root = r_root_dir + run_id + "/" ori_file = run_ref.file ref_maps_root = run_root + run_dirs['ref_map_dir'] ensure_dir([ref_maps_root]) gbk_file = run_root + run_dirs['ref_gbk_dir'] + ref_n + "_re-annot.gbk" map_file = ref_maps_root + ref_n + "_ref.pdf" # start mapping try: # make mock segment, full-length with 100% id record = load_genbank(gbk_file) length = len(record.seq) segdata = [[1, length, 1, length, 100]] # deactivate offsetting g_offset = (0, 0) q_invert = False # generate graphical map pairwise_draw(ref_n + "_ra", ref_n + "_ori", gbk_file, ori_file, segdata, map_file, q_invert, g_offset, 'dual', 'dual', 'm', 'fct', 'product', min_size, fct_flags, fct_colors, idpt) except IOError: msg = "\nERROR: could not load segments data" run_ref.log(msg) print msg except StopIteration: msg = "\nERROR: could not make map" run_ref.log(msg) print msg
def align_pairwise(genomes, new_align, dirs, run): """Make a pairwise alignment.""" # load inputs and process genomes seq_dir = dirs['seqfiles'] ref = [Noodle(genome, seq_dir) for genome in genomes if genome['order'] == 1][0] query = [Noodle(genome, seq_dir) for genome in genomes if genome['order'] == 2][0] # align if needed print "Aligning", ref.name, "and", query.name, "...", if new_align: mauve_pw_align(ref, query, dirs) # process data segments print "Processing segments ..", seg_file = dirs['aln_segs']+"/"+ref.name+"_"+query.name+"_segs.txt" pair_data = process_segdata(seg_file, ref, query) print "OK" # map of query aligned to reference print "Mapping ...", map_file = dirs['maps']+run+"_"+ref.name+"_vs_"+query.name+".pdf" pairwise_draw(ref, query, pair_data, map_file, 'dual', 'dual', 'm', 'fct', 'fct') print "OK\n"