def main(): parser = argparse.ArgumentParser(description = "get cell inform from pickle file") parser.add_argument('dir', metavar="<DIR>", help='directory containing subdirectories for each cell to be summarised') parser.add_argument('--ignore_inkt', '-i', help='ignore iNKT cells ', action="store_true") parser.add_argument("--sample", dest="sample_id", type=str, nargs='?', default="SAMPLE", help="sample id") parser.add_argument("-o", "--output_prefix", metavar="STR", type=str, dest="output_prefix", default="output_prefix", help="output prefix [default: %(default)s]") args = parser.parse_args() root_dir = os.path.abspath(args.dir) subdirectories = os.walk(root_dir).next()[1] pkl_dir = "filtered_TCR_seqs" outdir = "{}/filtered_TCR_summary".format(root_dir) tracer.makeOutputDir(outdir) out1 = open("%s.dna_seq.fa" % args.output_prefix,"w") out2 = open("%s.protein_seq.fa" % args.output_prefix,"w") for d in subdirectories: cell_pkl = "{root_dir}/{d}/{pkl_dir}/{d}.pkl".format(pkl_dir=pkl_dir, d=d, root_dir=root_dir) if os.path.isfile(cell_pkl): cl = pickle.load(open(cell_pkl)) if not cl.is_empty and not (cl.is_inkt and args.ignore_inkt): for locus in ['A','B','D', 'G']: if cl.all_recombinants[locus] is not None: for recombinant in cl.all_recombinants[locus]: aaseq = Seq(str(recombinant.dna_seq), generic_dna).translate() seqAnn = "productive=%s in_frame=%s stop_codon=%s cdr3=%s TPM=%f" % (recombinant.productive, recombinant.in_frame, recombinant.stop_codon, recombinant.cdr3,recombinant.TPM) if recombinant.productive and ( (locus == "A" and recombinant.TPM > 10) or ( (locus == "B" and recombinant.TPM > 15) ) ) : print(">%s %s\n%s" % ("|".join([cl.name,locus, recombinant.contig_name, recombinant.identifier]), seqAnn, recombinant.dna_seq), file = out1) print(">%s %s\n%s" % ("|".join([cl.name,locus, recombinant.contig_name, recombinant.identifier]), seqAnn, str(aaseq)), file = out2) print("",file = out1) print("",file = out2)
def main(): parser = argparse.ArgumentParser(description = "get cell inform from pickle file") parser.add_argument('dir', metavar="<DIR>", help='directory containing subdirectories for each cell to be summarised') parser.add_argument('--ignore_inkt', '-i', help='ignore iNKT cells ', action="store_true") parser.add_argument("--sample", dest="sample_id", type=str, nargs='?', default="SAMPLE", help="sample id") parser.add_argument("-o", "--output_prefix", metavar="STR", type=str, dest="output_prefix", default="output_prefix", help="output prefix [default: %(default)s]") args = parser.parse_args() root_dir = os.path.abspath(args.dir) subdirectories = os.walk(root_dir).next()[1] pkl_dir = "filtered_TCR_seqs" outdir = "{}/filtered_TCR_summary".format(root_dir) tracer.makeOutputDir(outdir) out1 = open("%s.dna_seq.fa" % args.output_prefix,"w") out2 = open("%s.protein_seq.fa" % args.output_prefix,"w") for d in subdirectories: cell_pkl = "{root_dir}/{d}/{pkl_dir}/{d}.pkl".format(pkl_dir=pkl_dir, d=d, root_dir=root_dir) if os.path.isfile(cell_pkl): cl = pickle.load(open(cell_pkl)) if not cl.is_empty and not (cl.is_inkt and args.ignore_inkt): for locus in ['A','B','D', 'G']: if cl.all_recombinants[locus] is not None: for recombinant in cl.all_recombinants[locus]: aaseq = Seq(str(recombinant.dna_seq), generic_dna).translate() seqAnn = "productive=%s in_frame=%s stop_codon=%s cdr3=%s" % (recombinant.productive, recombinant.in_frame, recombinant.stop_codon, recombinant.cdr3) print(">%s %s\n%s" % ("|".join([cl.name,locus, recombinant.contig_name, recombinant.identifier]), seqAnn, recombinant.dna_seq), file = out1) print(">%s %s\n%s" % ("|".join([cl.name,locus, recombinant.contig_name, recombinant.identifier]), seqAnn, str(aaseq)), file = out2) print("",file = out1) print("",file = out2)
def main(): parser = argparse.ArgumentParser(description = "Summarise set of cells with reconstructed TCR sequences") parser.add_argument('dir', metavar="<DIR>", help='directory containing subdirectories for each cell to be summarised') parser.add_argument('--keep_inkt', '-i', help='ignore iNKT cells when constructing networks', action="store_true") parser.add_argument("--sample", dest="sample_id", type=str, nargs='?', default="SAMPLE", help="sample id") args = parser.parse_args() root_dir = os.path.abspath(args.dir) cells = {} empty_cells = [] NKT_cells = {} subdirectories = os.walk(root_dir).next()[1] pkl_dir = "filtered_TCR_seqs" outdir = "{}/filtered_TCR_summary".format(root_dir) tracer.makeOutputDir(outdir) for d in subdirectories: cell_pkl = "{root_dir}/{d}/{pkl_dir}/{d}.pkl".format(pkl_dir=pkl_dir, d=d, root_dir=root_dir) if os.path.isfile(cell_pkl): cl = pickle.load(open(cell_pkl)) cells[d] = cl if cl.is_empty: empty_cells.append(d) if cl.is_inkt: NKT_cells[d] = (cl.is_inkt, cl.getMainRecombinantIdentifiersForLocus('B')) for cell_name in empty_cells: del cells[cell_name] if not args.keep_inkt: for cell_name in NKT_cells.keys(): del cells[cell_name] #output cells' info get_cells_component(cells, args.sample_id) #plot clonotype sizes plt.figure() clonotype_sizes = tracer.get_component_groups_sizes(cells) w = 0.85 x_range = range(1, len(clonotype_sizes) + 1) plt.bar(x_range, height=clonotype_sizes, width=w, color='black', align='center') plt.gca().set_xticks(x_range) plt.savefig("{}/clonotype_sizes_test.pdf".format(outdir))