def main():
    parser = argparse.ArgumentParser(description = "get cell inform from pickle file")
    parser.add_argument('dir', metavar="<DIR>", help='directory containing subdirectories for each cell to be summarised')
    parser.add_argument('--ignore_inkt', '-i', help='ignore iNKT cells ', action="store_true")
    parser.add_argument("--sample", dest="sample_id", type=str, nargs='?', default="SAMPLE", help="sample id")
    parser.add_argument("-o", "--output_prefix", metavar="STR", type=str, dest="output_prefix",
                        default="output_prefix", help="output prefix [default: %(default)s]")
    args = parser.parse_args()

    root_dir = os.path.abspath(args.dir)

    subdirectories = os.walk(root_dir).next()[1]

    pkl_dir = "filtered_TCR_seqs"
    outdir = "{}/filtered_TCR_summary".format(root_dir)

    tracer.makeOutputDir(outdir)
    out1 = open("%s.dna_seq.fa" % args.output_prefix,"w")
    out2 = open("%s.protein_seq.fa" % args.output_prefix,"w")
    for d in subdirectories:
        cell_pkl = "{root_dir}/{d}/{pkl_dir}/{d}.pkl".format(pkl_dir=pkl_dir, d=d, root_dir=root_dir)
        if os.path.isfile(cell_pkl):
            cl = pickle.load(open(cell_pkl))
            if not cl.is_empty and not (cl.is_inkt and args.ignore_inkt):
                for locus in ['A','B','D', 'G']:
                    if cl.all_recombinants[locus] is not None:
                        for recombinant in cl.all_recombinants[locus]:
                            aaseq = Seq(str(recombinant.dna_seq), generic_dna).translate()
                            seqAnn = "productive=%s in_frame=%s stop_codon=%s cdr3=%s TPM=%f" % (recombinant.productive, recombinant.in_frame, recombinant.stop_codon, recombinant.cdr3,recombinant.TPM)
                            if recombinant.productive and ( (locus == "A" and recombinant.TPM > 10) or ( (locus == "B" and recombinant.TPM > 15) ) ) :
                                print(">%s %s\n%s" % ("|".join([cl.name,locus, recombinant.contig_name, recombinant.identifier]), seqAnn, recombinant.dna_seq), file = out1)
                                print(">%s %s\n%s" % ("|".join([cl.name,locus, recombinant.contig_name, recombinant.identifier]), seqAnn, str(aaseq)), file = out2)
        print("",file = out1)
        print("",file = out2)
def main():
    parser = argparse.ArgumentParser(description = "get cell inform from pickle file")
    parser.add_argument('dir', metavar="<DIR>", help='directory containing subdirectories for each cell to be summarised')
    parser.add_argument('--ignore_inkt', '-i', help='ignore iNKT cells ', action="store_true")
    parser.add_argument("--sample", dest="sample_id", type=str, nargs='?', default="SAMPLE", help="sample id")
    parser.add_argument("-o", "--output_prefix", metavar="STR", type=str, dest="output_prefix",
                        default="output_prefix", help="output prefix [default: %(default)s]")
    args = parser.parse_args()

    root_dir = os.path.abspath(args.dir)

    subdirectories = os.walk(root_dir).next()[1]

    pkl_dir = "filtered_TCR_seqs"
    outdir = "{}/filtered_TCR_summary".format(root_dir)

    tracer.makeOutputDir(outdir)
    out1 = open("%s.dna_seq.fa" % args.output_prefix,"w")
    out2 = open("%s.protein_seq.fa" % args.output_prefix,"w")
    for d in subdirectories:
        cell_pkl = "{root_dir}/{d}/{pkl_dir}/{d}.pkl".format(pkl_dir=pkl_dir, d=d, root_dir=root_dir)
        if os.path.isfile(cell_pkl):
            cl = pickle.load(open(cell_pkl))
            if not cl.is_empty and not (cl.is_inkt and args.ignore_inkt):
                for locus in ['A','B','D', 'G']:
                    if cl.all_recombinants[locus] is not None:
                        for recombinant in cl.all_recombinants[locus]:
                            aaseq = Seq(str(recombinant.dna_seq), generic_dna).translate()
                            seqAnn = "productive=%s in_frame=%s stop_codon=%s cdr3=%s" % (recombinant.productive, recombinant.in_frame, recombinant.stop_codon, recombinant.cdr3)
                            print(">%s %s\n%s" % ("|".join([cl.name,locus, recombinant.contig_name, recombinant.identifier]), seqAnn, recombinant.dna_seq), file = out1)
                            print(">%s %s\n%s" % ("|".join([cl.name,locus, recombinant.contig_name, recombinant.identifier]), seqAnn, str(aaseq)), file = out2)
        print("",file = out1)
        print("",file = out2)
Example #3
0
def main():
    parser = argparse.ArgumentParser(description = "Summarise set of cells with reconstructed TCR sequences")
    parser.add_argument('dir', metavar="<DIR>", help='directory containing subdirectories for each cell to be summarised')
    parser.add_argument('--keep_inkt', '-i', help='ignore iNKT cells when constructing networks', action="store_true")
    parser.add_argument("--sample", dest="sample_id", type=str, nargs='?', default="SAMPLE", help="sample id")
    args = parser.parse_args()

    root_dir = os.path.abspath(args.dir)

    cells = {}
    empty_cells = []
    NKT_cells = {}
    subdirectories = os.walk(root_dir).next()[1]

    pkl_dir = "filtered_TCR_seqs"
    outdir = "{}/filtered_TCR_summary".format(root_dir)

    tracer.makeOutputDir(outdir)

    for d in subdirectories:
        cell_pkl = "{root_dir}/{d}/{pkl_dir}/{d}.pkl".format(pkl_dir=pkl_dir, d=d, root_dir=root_dir)
        if os.path.isfile(cell_pkl):
            cl = pickle.load(open(cell_pkl))
            cells[d] = cl
            if cl.is_empty:
                empty_cells.append(d)
            if cl.is_inkt:
                NKT_cells[d] = (cl.is_inkt, cl.getMainRecombinantIdentifiersForLocus('B'))

    for cell_name in empty_cells:
        del cells[cell_name]
    if not args.keep_inkt:
        for cell_name in NKT_cells.keys():
            del cells[cell_name]
    #output cells' info
    get_cells_component(cells, args.sample_id)
    #plot clonotype sizes
    plt.figure()
    clonotype_sizes = tracer.get_component_groups_sizes(cells)
    w = 0.85
    x_range = range(1, len(clonotype_sizes) + 1)
    plt.bar(x_range, height=clonotype_sizes, width=w, color='black', align='center')
    plt.gca().set_xticks(x_range)
    plt.savefig("{}/clonotype_sizes_test.pdf".format(outdir))