import IsolateClone import ParseTable import ClusterClone import translator import AnnotateProtein import WriteFast import ReadIgBlastn parser= argparse.ArgumentParser(prog='cat all.xls files',description="python PostAnalysis.py -d path -s species -c chain",epilog='') parser.add_argument ('-d','--directory',help='input file directory',default='/home/zhaiqi1/NGS/mycode/Ab_NGS_4/test/results',action='store') parser.add_argument('-s', '--species', help='mouse, rabbit or human', default="mouse") parser.add_argument('-c', '--chain', help="folder", default="H") args=parser.parse_args() ############### read the table from the Fastq2fastA################ raw_AbDict,count_seq=ParseTable.ParseTable(args.directory) print "There are total %s sequences in the table." % str(count_seq) print "Total number of sequences meets the keywords requirement\t:%s\n" % str(len(raw_AbDict)) Outfile_summary=open(os.path.join(args.directory,"Summary.txt"),'w') Outfile_summary.write("Total number of sequences meets the keywords requirement\t:%s\n" % str(len(raw_AbDict))) #print raw_AbDict ############################## cluster the clone based on the keywords_3, and then correct the pcr error ######## keywords_3=['CDR3-PRO','RID','DNAlen'] groupDict = IsolateClone.identifyClone(raw_AbDict,keywords_3) Outfile_keywords3=os.path.join(args.directory,"uniqueclone.txt") IsolateClone.writeCount(groupDict,Outfile_keywords3,keywords_3) #this output has not been corrected Outfile_summary.write("There are DNA sequences by same CDR3-DNA, GERMLINE-V, RID, DNAlen : %s \n " % str(len(groupDict))) print ("There are DNA sequences by same CDR3-DNA, GERMLINE-V, RID, DNAlen : %s \n" % str(len(groupDict)))