def create_merged_seq(self): """Writes merged target-ordered query sequence""" align = pd.read_table(os.path.join(self.out_dir, "longest_segments.txt")) align.sort(columns=["target_name", "target_final_start"], inplace=True) out = open(os.path.join(self.out_dir, "merged_seq.fa"), "w") curr_target_name = "" i = 0 start = 0 for row in align.iterrows(): query = row[1]['query_name'] target = row[1]['target_name'] fa = FastaHack(self.query_fas[query]) if curr_target_name != target: if i > 0: out.write("\n") i = 1 out.write(">{0}\n".format(target)) curr_target_name = target fasub = fa.get_sequence(query) if row[1]['query_strand'] == "-": fasub = futil.reverse_complement(fasub) fasub1, start = futil.format_fasta(fasub, start) [out.write(x) for x in fasub1] ns = "N" * 1000 ns1, start = futil.format_fasta(ns, start) [out.write(x) for x in ns1] out.write("\n")
def main(argv): infile = open(argv[1]) outfile = open(os.path.splitext(argv[1])[0] + ".fa", 'w', -1) pos = 0 curr_target_name = "" start = 0 a_pattern = re.compile("^a") s_pattern = re.compile("^s") for line in infile: if s_pattern.search(line): sline = line.split() if pos == 0: # target pos = 1 target = sline[1] if curr_target_name != target: if curr_target_name != "": outfile.write("\n") start = 0 outfile.write(">{0}\n".format(target)) curr_target_name = target elif pos == 1: # query pos = 0 seq = sline[6].translate(None, "-") if sline[4] == "-": seq = futil.reverse_complement(seq) start = futil.write_segment(seq, start, outfile) outfile.write("\n")