コード例 #1
0
ファイル: uniq_subs.py プロジェクト: gwct/gwct
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser(description="Unique substitution counting.");

	parser.add_argument("-i", dest="input", help="Input directory containing a single run_codeml output directory or codeml_combined directory.");
	parser.add_argument("-t", dest="target_specs", help="A list contained in quotes. Spaces separate groups, commas separate species within groups. E.g: \"triMan1 lepWed1,odoRosDiv1 orcOrc1,turTru2\"");
	parser.add_argument("-o", dest="output_suffix", help="The SUFFIX of the directory name to be created by the script. Something descriptive for your run.", default="");
	
	args = parser.parse_args();

	if errorflag == 0:

		if args.input == None:
			gwctcore.errorOut(1, "-i must be defined");
			optParse(1);

		if args.target_specs == None:
			gwctcore.errorOut(2, "-t must be defined");
			optParse(1);

		return args.input, args.target_specs, args.output_suffix;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
コード例 #2
0
ファイル: gwct_codeml.py プロジェクト: gwct/gwct
def optParse():
# This function handles the command line options and does some error checking.

	parser = argparse.ArgumentParser(description="Runs codeml on a directory full of .fa files. Files MUST have .fa extension. Dependencies: PAML, newickutils (if you want to prune your tree with --prune)");

	parser.add_argument("-i", dest="input", help="Input. A directory containing many FASTA (.fa) files.");
	parser.add_argument("-p", dest="paml_path", help="You must specify the full path to your PAML DIRECTORY here.");
	parser.add_argument("-t", dest="tree_file", help="A user specified tree for codeml to use. If not specified, codeml will infer the tree.", default="");
	parser.add_argument("--prune", dest="prune_opt", help="If not all species present in the tree will be present in each alignment, set this flag to prune the tree for each file.", action="store_true");
	# parser.add_argument("-seqtype", dest="paml_seqtype", help="Enter either 'codon' or 'aa'. Default value is 'codon'.", default='codon');
	parser.add_argument("-v", dest="verbosity", help="An option to control the output printed to the screen. 1: print all codeml output, 0: print only a progress bar. Default: 1", type=int, default=1);
	parser.add_argument("-o", dest="output", help="Desired output directory. If none is entered, will be determined automatically.", default=False);

	args = parser.parse_args();

	if args.input == None or args.paml_path == None:
		sys.exit(gc.errorOut(1, "Both -i must be set."));
	if not os.path.isdir(args.input) or not os.path.isdir(args.paml_path):
		sys.exit(gc.errorOut(2, "Both -i must be valid directory paths!"));
	else:
		args.input = os.path.abspath(args.input);
		args.paml_path = os.path.abspath(args.paml_path);
	if not os.path.isfile(args.tree_file):
		sys.exit(gc.errorOut(3, "-t must be a valid file name."));

	try:
		td, tree, r = gt.treeParse(open(args.tree_file, "r").read().replace("\n",""));
	except:
		sys.exit(gc.errorOut(4, "-t does not contain a valid Newick string!"));

	if args.verbosity not in [0,1]:
		sys.exit(gc.errorOut(6, "-v must take values of either 1 or 0"));

	return args.input, args.paml_path, args.tree_file, args.prune_opt, args.verbosity, args.output;
コード例 #3
0
def optParse(errorflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser(
        description="Unique substitution counting.")

    parser.add_argument(
        "-i",
        dest="input",
        help=
        "Input directory containing a single run_codeml output directory or codeml_combined directory."
    )
    parser.add_argument(
        "-t",
        dest="target_specs",
        help=
        "A list contained in quotes. Spaces separate groups, commas separate species within groups. E.g: \"triMan1 lepWed1,odoRosDiv1 orcOrc1,turTru2\""
    )
    parser.add_argument(
        "-o",
        dest="output_suffix",
        help=
        "The SUFFIX of the directory name to be created by the script. Something descriptive for your run.",
        default="")

    args = parser.parse_args()

    if errorflag == 0:

        if args.input == None:
            gwctcore.errorOut(1, "-i must be defined")
            optParse(1)

        if args.target_specs == None:
            gwctcore.errorOut(2, "-t must be defined")
            optParse(1)

        return args.input, args.target_specs, args.output_suffix

    elif errorflag == 1:
        parser.print_help()
        sys.exit()
コード例 #4
0
ファイル: gwct_codeml.py プロジェクト: gwct/gwct
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser(description="Runs codeml on a single .fa file or a directory full of .fa files. Dependencies: PAML, newickutils (if you want to prune your tree)");

	parser.add_argument("-i", dest="input", help="Input. Either a directory containing many FASTA files or a single FASTA file.");
	parser.add_argument("-c", dest="paml_path", help="You must specify the full path to your PAML DIRECTORY here.");
	parser.add_argument("-t", dest="tree_file", help="A user specified tree for codeml to use. If not specified, codeml will infer the tree.", default="");
	parser.add_argument("-p", dest="prune_opt", help="If not all species present in the tree will be present in each alignment, set this to 1 to prune the tree for each file. Default: 0", type=int, default=0);
	parser.add_argument("-s", dest="paml_seqtype", help="The seqtype for codeml to use. 1 (default): Codons; 2: Amino Acids", type=int, default=1);
	parser.add_argument("-b", dest="branch_site", help="Specifies the type of run for PAML's branch site test. 0 (default): Do not do branch site test; 1: Do the null model of the branch site test (model=2, NSsite=2, fix_omega=1, omega=1); 2: Do the alternate model of the branch site test (model=2, NSsite=2, fix_omega=0, omega=1). A branch must be specified in your tree file.", type=int, default=0);
	parser.add_argument("-a", dest="anc_opt", help="Option to tell PAML to do ancestral reconstruction (1) or not (0). Default: 0.", type=int, default=0);
	parser.add_argument("-v", dest="verbosity", help="An option to control the output printed to the screen. 1: print all codeml output, 0: print only a progress bar. Default: 1", type=int, default=1);
	parser.add_argument("-l", dest="log_opt", help="A boolean option to tell the script whether to create a logfile (1) or not (0). Default: 1", type=int, default=1);
	parser.add_argument("-x", dest="logdir_suffix", help="A string to add on to the end of the output directory.");

	args = parser.parse_args();

	if errorflag == 0:

		if args.input == None or args.paml_path == None:
			gwctcore.errorOut(1, "Both -i and -c must be set");
			optParse(1);

		if not os.path.isfile(args.tree_file):
			gwctcore.errorOut(2, "-t must be a valid tree file name");
			optParse(1);

		if args.prune_opt not in [0,1]:
			gwctcore.errorOut(3, "-p must take values of either 1 or 0");
			optParse(1);

		if args.prune_opt == 1 and args.tree_file == "":
			gwctcore.errorOut(4, "With -p set to 1 a tree file must be specified");
			optParse(1);

		if args.paml_seqtype not in [1,2]:
			gwctcore.errorOut(5, "-s must taked values of either 1 or 2");
			optParse(1);

		if args.branch_site not in [0,1,2]:
			gwctcore.errorOut(6, "-b must take values of 0, 1, or 2");
			optParse(1);

		if args.anc_opt not in [0,1]:
			gwctcore.errorOut(7, "-a must take values of 1 or 0");
			optParse(1);

		if args.verbosity not in [0,1]:
			gwctcore.errorOut(8, "-v must take values of either 1 or 0");
			optParse(1);

		if args.log_opt not in [0,1]:
			gwctcore.errorOut(9, "-l must take values of either 1 or 0");
			optParse(1);

		if args.logdir_suffix == None:
			args.logdir_suffix = "";

		return args.input, args.paml_path, args.tree_file, args.prune_opt, args.paml_seqtype, args.branch_site, args.anc_opt, args.verbosity, args.log_opt, args.logdir_suffix;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
コード例 #5
0
ファイル: gwct_codeml.py プロジェクト: gwct/gwct
#def core.logCheck(lopt, lfilename, outline):
#	if lopt == 1:
#		core.printWrite(lfilename, outline);
#	else:
#		print outline;

############################################
#Main Block
############################################

ins, ppath, treefile, prune, seqtype, bsopt, aopt, v, l, outdir_suffix = optParse(0);

starttime = gwctcore.getLogTime();

if not os.path.isdir(ppath):
	gwctcore.errorOut(10, "-c must be a valid directory path");
	optParse(1);

if os.path.isfile(ins):
	fileflag = 1;
	indir = os.path.dirname(os.path.realpath(ins));
	indir, script_outdir = gwctcore.getOutdir(indir, "run_codeml", starttime, outdir_suffix);
	outdir = os.path.join(script_outdir, "codeml_out");
	if aopt == 1:
		ancdir = os.path.join(script_outdir, "anc_seqs_fa");
	filelist = [ins];

else:
	fileflag = 0;
	indir, script_outdir = gwctcore.getOutdir(ins, "run_codeml", starttime, outdir_suffix);
	outdir = os.path.join(script_outdir, "codeml_out");
コード例 #6
0
ファイル: gwct.py プロジェクト: gwct/gwct
def optParse():
    #This function handles the command line options.

    parser = argparse.ArgumentParser(
        description=
        "Convergent substitution counting. Dependencies: ancestral sequences from PAML formatted with gwct_codeml.py"
    )

    parser.add_argument(
        "-i",
        dest="input",
        help=
        "Input directory. This is the output directory of a gwct_codeml.py run and should include the folder 'anc-seqs-fa'. Will count convergent, divergent, and unique substitutions, or make pairwise comparisons if -w is set."
    )
    parser.add_argument(
        "-t",
        dest="target_specs",
        help=
        "A list contained in quotes. Spaces separate groups, commas separate species within groups. E.g: \"triMan1 lepWed1,odoRosDiv1 orcOrc1,turTru2\". If a target species is missing from a given alignment, it will be removed from the list of target species.",
        default="")
    parser.add_argument(
        "-w",
        dest="pairwise_opt",
        help=
        "Option to tell the program to simply do all pairwise comparisons of tip branches (1) or pairwise comparisons of all branches (2) and make a C/D graph. With -w set, unique substitutions will not be counted. Default: 0, do not do pairwise comparisons",
        type=int,
        default=0)
    parser.add_argument(
        "-p",
        dest="prob_thresh",
        help=
        "A probability threshold to only retrieve convergent sites with probabilities greater than or equal to. Set to 0 for no threshold. Default: 0",
        type=float,
        default=0)
    parser.add_argument(
        "-n",
        dest="number_threads",
        help=
        "The number of threads on which to run the job. NOTE: One thread will be reserved for the main process, so by entering '4' here, only 3 threads will be utilized on the data. Entering '2' is equivalent to running the data on 1 thread. Default: 1",
        type=int,
        default=1)
    parser.add_argument(
        "-o",
        dest="output",
        help=
        "Desired output directory. If none is entered, will be determined automatically.",
        default=False)

    args = parser.parse_args()

    if args.input == None or not os.path.isdir(args.input):
        sys.exit(gc.errorOut(1, "-i must be a valid directory path."))
    if args.pairwise_opt not in [0, 1, 2]:
        sys.exit(gc.errorOut(2, "-w must take values of 0, 1, or 2."))
    if args.target_specs == "" and args.pairwise_opt == 0:
        sys.exit(gc.errorOut(3, "With -w set to 0, -t must be defined."))
    if args.target_specs != "" and args.pairwise_opt != 0:
        sys.exit(gc.errorOut(4, "Only one of -t and -w should be set."))
    if args.prob_thresh < 0.0 or args.prob_thresh > 1.0:
        sys.exit(gc.errorOut(5,
                             "-p can only take values between 0.0 and 1.0."))
    if args.number_threads <= 0:
        sys.exit(gc.errorOut(8, "-n must be a positive, non-zero integer."))

    return args.input, args.target_specs, args.pairwise_opt, args.prob_thresh, args.number_threads, args.output
コード例 #7
0
ファイル: gwct.py プロジェクト: gwct/gwct
indir, orig_targets, pairwise, prob_thresh, num_threads, output = optParse()

if platform.system() == "Windows" and num_threads != 1:
    print "\n** Warning! Multi-processing not currently supported on Windows. Switching to serial version (1 process).\n"
    num_threads = 1

outdir = gc.defaultOut(output, indir, "-gwct")
print gc.getTime() + " | + Creating main output directory:\t" + outdir
os.system("mkdir " + outdir)

ancdir = os.path.join(indir, "anc-seqs-fa")
if not os.path.isdir(ancdir):
    ancdir = os.path.join(indir, "anc_seqs_fa")
if not os.path.isdir(ancdir):
    sys.exit(
        gc.errorOut(
            9, "Cannot find anc-seqs-fa directory within input directory."))
filelist = os.listdir(ancdir)
filelist = [f for f in filelist if "ancprobs" in f]
num_files = len(filelist)
if prob_thresh == 0:
    convfilename = os.path.join(outdir, "conv-sites.txt")
    divfilename = os.path.join(outdir, "div-sites.txt")
elif prob_thresh != 0:
    convfilename = os.path.join(outdir,
                                "conv-sites-" + str(prob_thresh) + ".txt")
    divfilename = os.path.join(outdir,
                               "div-sites-" + str(prob_thresh) + ".txt")
uniqfilename = os.path.join(outdir, "unique-sites.txt")
main_header = "# GeneID\tAlignLen\tPosition\tAncAlleles\tTargetAlleles\n"
uniq_header = "# GeneID\tAlignLen\tPosition\tBackgroundAlleles\tTargetAlleles\n"
if pairwise == 0:
コード例 #8
0
ファイル: gwct.py プロジェクト: gwct/gwct
def optParse(errorflag):
#This function handles the command line options.

	parser = argparse.ArgumentParser(description="Convergent substitution counting. Dependencies: ancestral sequences from PAML");

	parser.add_argument("-i", dest="input", help="Input directory containing a single run_codeml output directory or codeml_combined directory.");
	parser.add_argument("-t", dest="target_specs", help="A list contained in quotes. Spaces separate groups, commas separate species within groups. E.g: \"triMan1 lepWed1,odoRosDiv1 orcOrc1,turTru2\"", default="");
	parser.add_argument("-u", dest="uniq_subs", help="A boolean to output the sites that are unique substitutions in the tip branches of interest (1) or not (0). Default: 0", type=int, default = 0);
	parser.add_argument("-w", dest="pairwise_opt", help="Option to tell the program to simply do all pairwise comparisons of tip branches (1) or pairwise comparisons of all branches (2) and make a C/D graph. Default: 0, do not do pairwise comparisons", type=int, default=0);
	parser.add_argument("-p", dest="prob_thresh", help="A probability threshold to only retrieve convergent sites with probabilities greater than or equal to. Set to 0 for no threshold. Default: 0", type=float, default=0);
	parser.add_argument("-n", dest="number_threads", help="The number of threads on which to run the job. NOTE: One thread will be reserved for the main process, so by entering '4' here, only 3 threads will be utilized on the data. Entering '2' is equivalent to running the data on 1 thread. Default: 1", type=int, default=1);
	parser.add_argument("-o", dest="output_suffix", help="The SUFFIX of the directory name to be created by the script. Something descriptive for your run.", default="");

	args = parser.parse_args();

	if errorflag == 0:
		if args.input == None:
			gwctcore.errorOut(1, "-i must be defined");
			optParse(1);

		if args.pairwise_opt not in [0,1,2]:
			gwctcore.errorOut(2, "-w must take values of 0, 1, or 2");
			optParse(1);

		if args.target_specs == "" and args.pairwise_opt == 0:
			gwctcore.errorOut(3, "With -w set to 0, -t must be defined");
			optParse(1);

		if args.target_specs != "" and args.pairwise_opt != 0:
			gwctcore.errorOut(4, "Only one of -t and -w should be set");
			optParse(1);

		if args.prob_thresh < 0.0 or args.prob_thresh > 1.0:
			gwctcore.errorOut(5, "-p can only take values between 0.0 and 1.0");
			optParse(1);

		if args.uniq_subs not in [0,1]:
			gwctcore.errorOut(6, "-u can only take values of 0 or 1");
			optParse(1);

		if args.uniq_subs == 1 and args.target_specs == "":
			gwctcore.errorOut(7, "-u can only be set when a set of target species is defined with -t");
			optParse(1);

		if args.uniq_subs == 1 and args.target_specs == None:
			gwctcore.errorOut(8, "With -u set to 1, -t must also be defined and -w must be 0");
			optParse(1);

		if args.number_threads <= 0:
			gwctcore.errorOut(9, "-n must be a positive, non-zero integer");
			optParse(1);

		if args.output_suffix == None:
			args.output_suffix = "";

		return args.input, args.target_specs, args.uniq_subs, args.pairwise_opt, args.prob_thresh, args.number_threads, args.output_suffix;

	elif errorflag == 1:
		parser.print_help();
		sys.exit();
コード例 #9
0
ファイル: gwct.py プロジェクト: gwct/gwct
def optParse(errorflag):
    #This function handles the command line options.

    parser = argparse.ArgumentParser(
        description=
        "Convergent substitution counting. Dependencies: ancestral sequences from PAML"
    )

    parser.add_argument(
        "-i",
        dest="input",
        help=
        "Input directory containing a single run_codeml output directory or codeml_combined directory."
    )
    parser.add_argument(
        "-t",
        dest="target_specs",
        help=
        "A list contained in quotes. Spaces separate groups, commas separate species within groups. E.g: \"triMan1 lepWed1,odoRosDiv1 orcOrc1,turTru2\"",
        default="")
    parser.add_argument(
        "-u",
        dest="uniq_subs",
        help=
        "A boolean to output the sites that are unique substitutions in the tip branches of interest (1) or not (0). Default: 0",
        type=int,
        default=0)
    parser.add_argument(
        "-w",
        dest="pairwise_opt",
        help=
        "Option to tell the program to simply do all pairwise comparisons of tip branches (1) or pairwise comparisons of all branches (2) and make a C/D graph. Default: 0, do not do pairwise comparisons",
        type=int,
        default=0)
    parser.add_argument(
        "-p",
        dest="prob_thresh",
        help=
        "A probability threshold to only retrieve convergent sites with probabilities greater than or equal to. Set to 0 for no threshold. Default: 0",
        type=float,
        default=0)
    parser.add_argument(
        "-n",
        dest="number_threads",
        help=
        "The number of threads on which to run the job. NOTE: One thread will be reserved for the main process, so by entering '4' here, only 3 threads will be utilized on the data. Entering '2' is equivalent to running the data on 1 thread. Default: 1",
        type=int,
        default=1)
    parser.add_argument(
        "-o",
        dest="output_suffix",
        help=
        "The SUFFIX of the directory name to be created by the script. Something descriptive for your run.",
        default="")

    args = parser.parse_args()

    if errorflag == 0:
        if args.input == None:
            gwctcore.errorOut(1, "-i must be defined")
            optParse(1)

        if args.pairwise_opt not in [0, 1, 2]:
            gwctcore.errorOut(2, "-w must take values of 0, 1, or 2")
            optParse(1)

        if args.target_specs == "" and args.pairwise_opt == 0:
            gwctcore.errorOut(3, "With -w set to 0, -t must be defined")
            optParse(1)

        if args.target_specs != "" and args.pairwise_opt != 0:
            gwctcore.errorOut(4, "Only one of -t and -w should be set")
            optParse(1)

        if args.prob_thresh < 0.0 or args.prob_thresh > 1.0:
            gwctcore.errorOut(5, "-p can only take values between 0.0 and 1.0")
            optParse(1)

        if args.uniq_subs not in [0, 1]:
            gwctcore.errorOut(6, "-u can only take values of 0 or 1")
            optParse(1)

        if args.uniq_subs == 1 and args.target_specs == "":
            gwctcore.errorOut(
                7,
                "-u can only be set when a set of target species is defined with -t"
            )
            optParse(1)

        if args.uniq_subs == 1 and args.target_specs == None:
            gwctcore.errorOut(
                8,
                "With -u set to 1, -t must also be defined and -w must be 0")
            optParse(1)

        if args.number_threads <= 0:
            gwctcore.errorOut(9, "-n must be a positive, non-zero integer")
            optParse(1)

        if args.output_suffix == None:
            args.output_suffix = ""

        return args.input, args.target_specs, args.uniq_subs, args.pairwise_opt, args.prob_thresh, args.number_threads, args.output_suffix

    elif errorflag == 1:
        parser.print_help()
        sys.exit()