def optParse(errorflag): #This function handles the command line options. parser = argparse.ArgumentParser(description="Unique substitution counting."); parser.add_argument("-i", dest="input", help="Input directory containing a single run_codeml output directory or codeml_combined directory."); parser.add_argument("-t", dest="target_specs", help="A list contained in quotes. Spaces separate groups, commas separate species within groups. E.g: \"triMan1 lepWed1,odoRosDiv1 orcOrc1,turTru2\""); parser.add_argument("-o", dest="output_suffix", help="The SUFFIX of the directory name to be created by the script. Something descriptive for your run.", default=""); args = parser.parse_args(); if errorflag == 0: if args.input == None: gwctcore.errorOut(1, "-i must be defined"); optParse(1); if args.target_specs == None: gwctcore.errorOut(2, "-t must be defined"); optParse(1); return args.input, args.target_specs, args.output_suffix; elif errorflag == 1: parser.print_help(); sys.exit();
def optParse(): # This function handles the command line options and does some error checking. parser = argparse.ArgumentParser(description="Runs codeml on a directory full of .fa files. Files MUST have .fa extension. Dependencies: PAML, newickutils (if you want to prune your tree with --prune)"); parser.add_argument("-i", dest="input", help="Input. A directory containing many FASTA (.fa) files."); parser.add_argument("-p", dest="paml_path", help="You must specify the full path to your PAML DIRECTORY here."); parser.add_argument("-t", dest="tree_file", help="A user specified tree for codeml to use. If not specified, codeml will infer the tree.", default=""); parser.add_argument("--prune", dest="prune_opt", help="If not all species present in the tree will be present in each alignment, set this flag to prune the tree for each file.", action="store_true"); # parser.add_argument("-seqtype", dest="paml_seqtype", help="Enter either 'codon' or 'aa'. Default value is 'codon'.", default='codon'); parser.add_argument("-v", dest="verbosity", help="An option to control the output printed to the screen. 1: print all codeml output, 0: print only a progress bar. Default: 1", type=int, default=1); parser.add_argument("-o", dest="output", help="Desired output directory. If none is entered, will be determined automatically.", default=False); args = parser.parse_args(); if args.input == None or args.paml_path == None: sys.exit(gc.errorOut(1, "Both -i must be set.")); if not os.path.isdir(args.input) or not os.path.isdir(args.paml_path): sys.exit(gc.errorOut(2, "Both -i must be valid directory paths!")); else: args.input = os.path.abspath(args.input); args.paml_path = os.path.abspath(args.paml_path); if not os.path.isfile(args.tree_file): sys.exit(gc.errorOut(3, "-t must be a valid file name.")); try: td, tree, r = gt.treeParse(open(args.tree_file, "r").read().replace("\n","")); except: sys.exit(gc.errorOut(4, "-t does not contain a valid Newick string!")); if args.verbosity not in [0,1]: sys.exit(gc.errorOut(6, "-v must take values of either 1 or 0")); return args.input, args.paml_path, args.tree_file, args.prune_opt, args.verbosity, args.output;
def optParse(errorflag): #This function handles the command line options. parser = argparse.ArgumentParser( description="Unique substitution counting.") parser.add_argument( "-i", dest="input", help= "Input directory containing a single run_codeml output directory or codeml_combined directory." ) parser.add_argument( "-t", dest="target_specs", help= "A list contained in quotes. Spaces separate groups, commas separate species within groups. E.g: \"triMan1 lepWed1,odoRosDiv1 orcOrc1,turTru2\"" ) parser.add_argument( "-o", dest="output_suffix", help= "The SUFFIX of the directory name to be created by the script. Something descriptive for your run.", default="") args = parser.parse_args() if errorflag == 0: if args.input == None: gwctcore.errorOut(1, "-i must be defined") optParse(1) if args.target_specs == None: gwctcore.errorOut(2, "-t must be defined") optParse(1) return args.input, args.target_specs, args.output_suffix elif errorflag == 1: parser.print_help() sys.exit()
def optParse(errorflag): #This function handles the command line options. parser = argparse.ArgumentParser(description="Runs codeml on a single .fa file or a directory full of .fa files. Dependencies: PAML, newickutils (if you want to prune your tree)"); parser.add_argument("-i", dest="input", help="Input. Either a directory containing many FASTA files or a single FASTA file."); parser.add_argument("-c", dest="paml_path", help="You must specify the full path to your PAML DIRECTORY here."); parser.add_argument("-t", dest="tree_file", help="A user specified tree for codeml to use. If not specified, codeml will infer the tree.", default=""); parser.add_argument("-p", dest="prune_opt", help="If not all species present in the tree will be present in each alignment, set this to 1 to prune the tree for each file. Default: 0", type=int, default=0); parser.add_argument("-s", dest="paml_seqtype", help="The seqtype for codeml to use. 1 (default): Codons; 2: Amino Acids", type=int, default=1); parser.add_argument("-b", dest="branch_site", help="Specifies the type of run for PAML's branch site test. 0 (default): Do not do branch site test; 1: Do the null model of the branch site test (model=2, NSsite=2, fix_omega=1, omega=1); 2: Do the alternate model of the branch site test (model=2, NSsite=2, fix_omega=0, omega=1). A branch must be specified in your tree file.", type=int, default=0); parser.add_argument("-a", dest="anc_opt", help="Option to tell PAML to do ancestral reconstruction (1) or not (0). Default: 0.", type=int, default=0); parser.add_argument("-v", dest="verbosity", help="An option to control the output printed to the screen. 1: print all codeml output, 0: print only a progress bar. Default: 1", type=int, default=1); parser.add_argument("-l", dest="log_opt", help="A boolean option to tell the script whether to create a logfile (1) or not (0). Default: 1", type=int, default=1); parser.add_argument("-x", dest="logdir_suffix", help="A string to add on to the end of the output directory."); args = parser.parse_args(); if errorflag == 0: if args.input == None or args.paml_path == None: gwctcore.errorOut(1, "Both -i and -c must be set"); optParse(1); if not os.path.isfile(args.tree_file): gwctcore.errorOut(2, "-t must be a valid tree file name"); optParse(1); if args.prune_opt not in [0,1]: gwctcore.errorOut(3, "-p must take values of either 1 or 0"); optParse(1); if args.prune_opt == 1 and args.tree_file == "": gwctcore.errorOut(4, "With -p set to 1 a tree file must be specified"); optParse(1); if args.paml_seqtype not in [1,2]: gwctcore.errorOut(5, "-s must taked values of either 1 or 2"); optParse(1); if args.branch_site not in [0,1,2]: gwctcore.errorOut(6, "-b must take values of 0, 1, or 2"); optParse(1); if args.anc_opt not in [0,1]: gwctcore.errorOut(7, "-a must take values of 1 or 0"); optParse(1); if args.verbosity not in [0,1]: gwctcore.errorOut(8, "-v must take values of either 1 or 0"); optParse(1); if args.log_opt not in [0,1]: gwctcore.errorOut(9, "-l must take values of either 1 or 0"); optParse(1); if args.logdir_suffix == None: args.logdir_suffix = ""; return args.input, args.paml_path, args.tree_file, args.prune_opt, args.paml_seqtype, args.branch_site, args.anc_opt, args.verbosity, args.log_opt, args.logdir_suffix; elif errorflag == 1: parser.print_help(); sys.exit();
#def core.logCheck(lopt, lfilename, outline): # if lopt == 1: # core.printWrite(lfilename, outline); # else: # print outline; ############################################ #Main Block ############################################ ins, ppath, treefile, prune, seqtype, bsopt, aopt, v, l, outdir_suffix = optParse(0); starttime = gwctcore.getLogTime(); if not os.path.isdir(ppath): gwctcore.errorOut(10, "-c must be a valid directory path"); optParse(1); if os.path.isfile(ins): fileflag = 1; indir = os.path.dirname(os.path.realpath(ins)); indir, script_outdir = gwctcore.getOutdir(indir, "run_codeml", starttime, outdir_suffix); outdir = os.path.join(script_outdir, "codeml_out"); if aopt == 1: ancdir = os.path.join(script_outdir, "anc_seqs_fa"); filelist = [ins]; else: fileflag = 0; indir, script_outdir = gwctcore.getOutdir(ins, "run_codeml", starttime, outdir_suffix); outdir = os.path.join(script_outdir, "codeml_out");
def optParse(): #This function handles the command line options. parser = argparse.ArgumentParser( description= "Convergent substitution counting. Dependencies: ancestral sequences from PAML formatted with gwct_codeml.py" ) parser.add_argument( "-i", dest="input", help= "Input directory. This is the output directory of a gwct_codeml.py run and should include the folder 'anc-seqs-fa'. Will count convergent, divergent, and unique substitutions, or make pairwise comparisons if -w is set." ) parser.add_argument( "-t", dest="target_specs", help= "A list contained in quotes. Spaces separate groups, commas separate species within groups. E.g: \"triMan1 lepWed1,odoRosDiv1 orcOrc1,turTru2\". If a target species is missing from a given alignment, it will be removed from the list of target species.", default="") parser.add_argument( "-w", dest="pairwise_opt", help= "Option to tell the program to simply do all pairwise comparisons of tip branches (1) or pairwise comparisons of all branches (2) and make a C/D graph. With -w set, unique substitutions will not be counted. Default: 0, do not do pairwise comparisons", type=int, default=0) parser.add_argument( "-p", dest="prob_thresh", help= "A probability threshold to only retrieve convergent sites with probabilities greater than or equal to. Set to 0 for no threshold. Default: 0", type=float, default=0) parser.add_argument( "-n", dest="number_threads", help= "The number of threads on which to run the job. NOTE: One thread will be reserved for the main process, so by entering '4' here, only 3 threads will be utilized on the data. Entering '2' is equivalent to running the data on 1 thread. Default: 1", type=int, default=1) parser.add_argument( "-o", dest="output", help= "Desired output directory. If none is entered, will be determined automatically.", default=False) args = parser.parse_args() if args.input == None or not os.path.isdir(args.input): sys.exit(gc.errorOut(1, "-i must be a valid directory path.")) if args.pairwise_opt not in [0, 1, 2]: sys.exit(gc.errorOut(2, "-w must take values of 0, 1, or 2.")) if args.target_specs == "" and args.pairwise_opt == 0: sys.exit(gc.errorOut(3, "With -w set to 0, -t must be defined.")) if args.target_specs != "" and args.pairwise_opt != 0: sys.exit(gc.errorOut(4, "Only one of -t and -w should be set.")) if args.prob_thresh < 0.0 or args.prob_thresh > 1.0: sys.exit(gc.errorOut(5, "-p can only take values between 0.0 and 1.0.")) if args.number_threads <= 0: sys.exit(gc.errorOut(8, "-n must be a positive, non-zero integer.")) return args.input, args.target_specs, args.pairwise_opt, args.prob_thresh, args.number_threads, args.output
indir, orig_targets, pairwise, prob_thresh, num_threads, output = optParse() if platform.system() == "Windows" and num_threads != 1: print "\n** Warning! Multi-processing not currently supported on Windows. Switching to serial version (1 process).\n" num_threads = 1 outdir = gc.defaultOut(output, indir, "-gwct") print gc.getTime() + " | + Creating main output directory:\t" + outdir os.system("mkdir " + outdir) ancdir = os.path.join(indir, "anc-seqs-fa") if not os.path.isdir(ancdir): ancdir = os.path.join(indir, "anc_seqs_fa") if not os.path.isdir(ancdir): sys.exit( gc.errorOut( 9, "Cannot find anc-seqs-fa directory within input directory.")) filelist = os.listdir(ancdir) filelist = [f for f in filelist if "ancprobs" in f] num_files = len(filelist) if prob_thresh == 0: convfilename = os.path.join(outdir, "conv-sites.txt") divfilename = os.path.join(outdir, "div-sites.txt") elif prob_thresh != 0: convfilename = os.path.join(outdir, "conv-sites-" + str(prob_thresh) + ".txt") divfilename = os.path.join(outdir, "div-sites-" + str(prob_thresh) + ".txt") uniqfilename = os.path.join(outdir, "unique-sites.txt") main_header = "# GeneID\tAlignLen\tPosition\tAncAlleles\tTargetAlleles\n" uniq_header = "# GeneID\tAlignLen\tPosition\tBackgroundAlleles\tTargetAlleles\n" if pairwise == 0:
def optParse(errorflag): #This function handles the command line options. parser = argparse.ArgumentParser(description="Convergent substitution counting. Dependencies: ancestral sequences from PAML"); parser.add_argument("-i", dest="input", help="Input directory containing a single run_codeml output directory or codeml_combined directory."); parser.add_argument("-t", dest="target_specs", help="A list contained in quotes. Spaces separate groups, commas separate species within groups. E.g: \"triMan1 lepWed1,odoRosDiv1 orcOrc1,turTru2\"", default=""); parser.add_argument("-u", dest="uniq_subs", help="A boolean to output the sites that are unique substitutions in the tip branches of interest (1) or not (0). Default: 0", type=int, default = 0); parser.add_argument("-w", dest="pairwise_opt", help="Option to tell the program to simply do all pairwise comparisons of tip branches (1) or pairwise comparisons of all branches (2) and make a C/D graph. Default: 0, do not do pairwise comparisons", type=int, default=0); parser.add_argument("-p", dest="prob_thresh", help="A probability threshold to only retrieve convergent sites with probabilities greater than or equal to. Set to 0 for no threshold. Default: 0", type=float, default=0); parser.add_argument("-n", dest="number_threads", help="The number of threads on which to run the job. NOTE: One thread will be reserved for the main process, so by entering '4' here, only 3 threads will be utilized on the data. Entering '2' is equivalent to running the data on 1 thread. Default: 1", type=int, default=1); parser.add_argument("-o", dest="output_suffix", help="The SUFFIX of the directory name to be created by the script. Something descriptive for your run.", default=""); args = parser.parse_args(); if errorflag == 0: if args.input == None: gwctcore.errorOut(1, "-i must be defined"); optParse(1); if args.pairwise_opt not in [0,1,2]: gwctcore.errorOut(2, "-w must take values of 0, 1, or 2"); optParse(1); if args.target_specs == "" and args.pairwise_opt == 0: gwctcore.errorOut(3, "With -w set to 0, -t must be defined"); optParse(1); if args.target_specs != "" and args.pairwise_opt != 0: gwctcore.errorOut(4, "Only one of -t and -w should be set"); optParse(1); if args.prob_thresh < 0.0 or args.prob_thresh > 1.0: gwctcore.errorOut(5, "-p can only take values between 0.0 and 1.0"); optParse(1); if args.uniq_subs not in [0,1]: gwctcore.errorOut(6, "-u can only take values of 0 or 1"); optParse(1); if args.uniq_subs == 1 and args.target_specs == "": gwctcore.errorOut(7, "-u can only be set when a set of target species is defined with -t"); optParse(1); if args.uniq_subs == 1 and args.target_specs == None: gwctcore.errorOut(8, "With -u set to 1, -t must also be defined and -w must be 0"); optParse(1); if args.number_threads <= 0: gwctcore.errorOut(9, "-n must be a positive, non-zero integer"); optParse(1); if args.output_suffix == None: args.output_suffix = ""; return args.input, args.target_specs, args.uniq_subs, args.pairwise_opt, args.prob_thresh, args.number_threads, args.output_suffix; elif errorflag == 1: parser.print_help(); sys.exit();
def optParse(errorflag): #This function handles the command line options. parser = argparse.ArgumentParser( description= "Convergent substitution counting. Dependencies: ancestral sequences from PAML" ) parser.add_argument( "-i", dest="input", help= "Input directory containing a single run_codeml output directory or codeml_combined directory." ) parser.add_argument( "-t", dest="target_specs", help= "A list contained in quotes. Spaces separate groups, commas separate species within groups. E.g: \"triMan1 lepWed1,odoRosDiv1 orcOrc1,turTru2\"", default="") parser.add_argument( "-u", dest="uniq_subs", help= "A boolean to output the sites that are unique substitutions in the tip branches of interest (1) or not (0). Default: 0", type=int, default=0) parser.add_argument( "-w", dest="pairwise_opt", help= "Option to tell the program to simply do all pairwise comparisons of tip branches (1) or pairwise comparisons of all branches (2) and make a C/D graph. Default: 0, do not do pairwise comparisons", type=int, default=0) parser.add_argument( "-p", dest="prob_thresh", help= "A probability threshold to only retrieve convergent sites with probabilities greater than or equal to. Set to 0 for no threshold. Default: 0", type=float, default=0) parser.add_argument( "-n", dest="number_threads", help= "The number of threads on which to run the job. NOTE: One thread will be reserved for the main process, so by entering '4' here, only 3 threads will be utilized on the data. Entering '2' is equivalent to running the data on 1 thread. Default: 1", type=int, default=1) parser.add_argument( "-o", dest="output_suffix", help= "The SUFFIX of the directory name to be created by the script. Something descriptive for your run.", default="") args = parser.parse_args() if errorflag == 0: if args.input == None: gwctcore.errorOut(1, "-i must be defined") optParse(1) if args.pairwise_opt not in [0, 1, 2]: gwctcore.errorOut(2, "-w must take values of 0, 1, or 2") optParse(1) if args.target_specs == "" and args.pairwise_opt == 0: gwctcore.errorOut(3, "With -w set to 0, -t must be defined") optParse(1) if args.target_specs != "" and args.pairwise_opt != 0: gwctcore.errorOut(4, "Only one of -t and -w should be set") optParse(1) if args.prob_thresh < 0.0 or args.prob_thresh > 1.0: gwctcore.errorOut(5, "-p can only take values between 0.0 and 1.0") optParse(1) if args.uniq_subs not in [0, 1]: gwctcore.errorOut(6, "-u can only take values of 0 or 1") optParse(1) if args.uniq_subs == 1 and args.target_specs == "": gwctcore.errorOut( 7, "-u can only be set when a set of target species is defined with -t" ) optParse(1) if args.uniq_subs == 1 and args.target_specs == None: gwctcore.errorOut( 8, "With -u set to 1, -t must also be defined and -w must be 0") optParse(1) if args.number_threads <= 0: gwctcore.errorOut(9, "-n must be a positive, non-zero integer") optParse(1) if args.output_suffix == None: args.output_suffix = "" return args.input, args.target_specs, args.uniq_subs, args.pairwise_opt, args.prob_thresh, args.number_threads, args.output_suffix elif errorflag == 1: parser.print_help() sys.exit()