Example #1
0
def fit_demography(syn_sfs, args):
    '''
    The second step in the procedure. Fits a dadi demographic model, with hard-coded assumptions about development and growth, to the synonymous mutation set.
    Uses the dadi_pipeline wrapper package.
    '''
    pts = [args.samples + 10, args.samples + 20, args.samples + 30]
    #constrain optimization to Nu > 1 only. My initial round of 6k cells is definitely not shrinking.
    p_labels = "nu, T"
    upper = [1000, 1000]
    lower = [1, .001]
    Optimize_Functions.Optimize_Routine(syn_sfs,
                                        pts,
                                        str(args.samples),
                                        args.model_name,
                                        exponential_development,
                                        3,
                                        2,
                                        fs_folded=False,
                                        param_labels=p_labels,
                                        in_upper=upper,
                                        in_lower=lower)
Example #2
0
#	Below labels and upper and lower bounds of parameter values

p_labels = "nu1, nu2, T1, m12, m21"
upper = [20, 20, 10, 200, 200]
lower = [1e-3, 1e-3, 1e-3, 1e-5, 1e-5]

#	Here details of the optimization routines: 4 rounds of optimization for large dataset and unfolded spectrum

reps = [10, 10, 20, 20]
maxiters = [10, 20, 20, 30]
folds = [3, 3, 2, 1]

# 	Run 10 Independent optimizations routines, at the end use the "Summarize_Output.py" to keep the best run from each optimization routine

for i in range(1, 11):
    prefix = infile + "_OPTI_Number_{}".format(i)
    Optimize_Functions.Optimize_Routine(data,
                                        prefix,
                                        "IM",
                                        IM,
                                        4,
                                        5,
                                        data_folded=False,
                                        param_labels=p_labels,
                                        in_upper=upper,
                                        in_lower=lower,
                                        reps=reps,
                                        maxiters=maxiters,
                                        folds=folds)
Example #3
0
    Portik, D.M., Leache, A.D., Rivera, D., Blackburn, D.C., Rodel, M.-O.,
    Barej, M.F., Hirschfeld, M., Burger, M., and M.K.Fujita. 2017.
    Evaluating mechanisms of diversification in a Guineo-Congolian forest
    frog using demographic model selection. Molecular Ecology 26: 5245-5263.
    doi: 10.1111/mec.14266

'''

# Split into two populations, no migration.
Optimize_Functions.Optimize_Routine(fs,
                                    pts,
                                    prefix,
                                    "no_mig",
                                    Models_2D.no_mig,
                                    rounds,
                                    3,
                                    fs_folded=fs_folded,
                                    reps=reps,
                                    maxiters=maxiters,
                                    folds=folds,
                                    param_labels="nu1, nu2, T")

# Split into two populations, with continuous symmetric migration.
Optimize_Functions.Optimize_Routine(fs,
                                    pts,
                                    prefix,
                                    "sym_mig",
                                    Models_2D.sym_mig,
                                    rounds,
                                    4,
                                    fs_folded=fs_folded,
# These are the grid point settings will use for extrapolation.
pts = [120, 160, 200]

prefix = "optim_a/priorsize_asym_mig"

p_labels = "nua, T1, nu1b, nu2b, T2, m12, m21"
upper = [100, 10, 100, 100, 10, 100, 100]
lower = [1e-2, 0, 1e-2, 1e-2, 0, 0, 0]
p0 = [1, 1, 1, 1, 1, 1, 1]

reps = [10, 10, 10, 10]
maxiters = [5, 5, 10, 20]
folds = [5, 3, 2, 1]

i = sys.argv[1]
Optimize_Functions.Optimize_Routine(fs,
                                    pts,
                                    prefix + "_{}".format(i),
                                    func.func_name,
                                    func,
                                    len(reps),
                                    len(p0),
                                    fs_folded=True,
                                    param_labels=p_labels,
                                    in_upper=upper,
                                    in_lower=lower,
                                    reps=reps,
                                    maxiters=maxiters,
                                    folds=folds)
Example #5
0
'''
Example 1. Now let's use the function to run an optimization routine for our data and this model.
We need to specify the first seven arguments in this function, but there are other options
we can also use if we wanted more control over the optimization scheme. We'll start with
the basic version here. The argument explanations are above. This would perform three
rounds of optimizations, using a default number of replicates for each round (see documentation
for explanation of default values).
'''
#create a prefix to label the output files
prefix = "V1"
#make sure to define your extrapolation grid size
pts = [50,60,70]

#Remember the order for mandatory arguments as below
#Optimize_Routine(fs, pts, outfile, model_name, func, rounds, param_number, fs_folded)
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "sym_mig", sym_mig, 3, 4, fs_folded=True)




'''
Example 2. It is a good idea to include the labels of the parameters so they can get written to the
output file, otherwise you'll have to go back to the model each time you wanted to see their
order. The optional arguments require using the = sign to assign a variable or value to the argument.
'''
prefix = "V2"
pts = [50,60,70]

p_labels = "nu1, nu2, m, T"

Optimize_Functions.Optimize_Routine(fs, pts, prefix, "sym_mig", sym_mig, 3, 4, fs_folded=True,
'''
Diversification Model Set

This first set of models come from the following publication:

    Portik, D.M., Leache, A.D., Rivera, D., Blackburn, D.C., Rodel, M.-O.,
    Barej, M.F., Hirschfeld, M., Burger, M., and M.K. Fujita. 2017.
    Evaluating mechanisms of diversification in a Guineo-Congolian forest
    frog using demographic model selection. Molecular Ecology 26: 5245-5263.
    doi: 10.1111/mec.14266

'''

# Split into three populations, no migration.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_nomig", Models_3D.split_nomig, rounds, 6, fs_folded=fs_folded,
                                        reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, T1, T2")

# Split into three populations, symmetric migration between all populations (1<->2, 2<->3, and 1<->3).
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_symmig_all", Models_3D.split_symmig_all, rounds, 10, fs_folded=fs_folded,
                                        reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, mA, m1, m2, m3, T1, T2")

# Split into three populations, symmetric migration between 'adjacent' populations (1<->2, 2<->3, but not 1<->3).
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_symmig_adjacent", Models_3D.split_symmig_adjacent, rounds, 9, fs_folded=fs_folded,
                                        reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, mA, m1, m2, m3, T1, T2")

# Adjacent Secondary contact, longest isolation - Split between pop 1 and (2,3) with no migration, then split between pop 2 and 3 with no migration. Period of symmetric secondary contact occurs between adjacent populations (ie 1<->2, 2<->3, but not 1<->3) after all splits are complete.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "refugia_adj_1", Models_3D.refugia_adj_1, rounds, 9, fs_folded=fs_folded,
                                        reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, m1, m2, T1, T2, T3")

# Adjacent Secondary contact, shorter isolation - Split between pop 1 and (2,3), gene flow does not occur. Split between pop 2 and 3 occurs with gene flow. After appearance of 2 and 3, gene flow also occurs between 1 and 2.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "refugia_adj_2", Models_3D.refugia_adj_2, rounds, 8, fs_folded=fs_folded,
# 3 values
no_mig_params = [0.357, 0.7876, 0.1456]

#"sym_mig"
#4 Values
sym_mig_params = [0.6832, 1.4416, 0.6072, 2.5373]

#"asym_mig"
#5 Values
asym_mig_params = [0.938, 3.558, 0.5284, 0.1714, 5.8838]

#======================================================================================
# Call the function with the relevant arguments.

# Divergence with no migration
Optimize_Functions.Optimize_Round2(pts, fs, outfile, reps, maxiter, "no_mig",
                                   no_mig_params)

# Split into two populations, with continuous symmetric migration.
Optimize_Functions.Optimize_Round2(pts, fs, outfile, reps, maxiter, "sym_mig",
                                   sym_mig_params)

# Split into two populations, with continuous asymmetric migration.
Optimize_Functions.Optimize_Round2(pts, fs, outfile, reps, maxiter, "asym_mig",
                                   asym_mig_params)

#===========================================================================
#clock the amount of time to complete the script
t_finish = datetime.now()
elapsed = t_finish - t_begin
print '\n', '\n', "-----------------------------------------------------------------------------------------------------"
print "Finished all analyses!"
Example #8
0
pts = [30, 40, 50]
#prefix for output file naming
outfile = "C-O"

#spectrum object name (we defined this above)
fs = fs_1
#integer to control number of replicates per model
reps = int(30)
#max number of iterations per optimization step (though see dadi user group for explanation)
maxiter = int(20)

#======================================================================================
# Call the function with the relevant arguments.

# 1 Standard neutral model, populations never diverge
Optimize_Functions.Optimize_Round1(pts, fs, outfile, reps, maxiter,
                                   "no_divergence")

# 2 Split into two populations, no migration.
Optimize_Functions.Optimize_Round1(pts, fs, outfile, reps, maxiter, "no_mig")

# 3 Split into two populations, with continuous symmetric migration.
Optimize_Functions.Optimize_Round1(pts, fs, outfile, reps, maxiter, "sym_mig")

# 4 Split into two populations, with continuous asymmetric migration.
Optimize_Functions.Optimize_Round1(pts, fs, outfile, reps, maxiter, "asym_mig")

#===========================================================================
#clock the amount of time to complete the script
t_finish = datetime.now()
elapsed = t_finish - t_begin
print '\n', '\n', "-----------------------------------------------------------------------------------------------------"
Example #9
0
#======================================================================================
# Now call the function with the relevant arguments.

# There are many models to test here. A brief definition is given for each, but the actual
# models are defined in the Models_2D.py script. The first 15 were implemented in Portik
# et al. 2016 (doi: 10.1111/mec.14266), the following 9 are newer for various projects.

# Here it is set up to call each model one by one sequentially, which could finish relatively quickly.
# If it takes too long, create multiple verisions of this script, block out some models (use hashes or delete),
# and execute one version for every core you have available. It will greatly speed up these steps,
# and sometimes if extrapolations fail the script will crash too and this could prevent it from
# happening too many times.

# Standard neutral model, populations never diverge
Optimize_Functions.Optimize_Round3(pts, fs, outfile, reps, maxiter,
                                   "no_divergence", no_divergence_params)

# Split into two populations, no migration.
Optimize_Functions.Optimize_Round3(pts, fs, outfile, reps, maxiter, "no_mig",
                                   no_mig_params)

# Split into two populations, with continuous symmetric migration.
Optimize_Functions.Optimize_Round3(pts, fs, outfile, reps, maxiter, "sym_mig",
                                   sym_mig_params)

# Split into two populations, with continuous asymmetric migration.
Optimize_Functions.Optimize_Round3(pts, fs, outfile, reps, maxiter, "asym_mig",
                                   asym_mig_params)

# Split with continuous symmetric migration, followed by isolation.
Optimize_Functions.Optimize_Round3(pts, fs, outfile, reps, maxiter,
Example #10
0
	summary_file = open("%s/results_summary/%s_%s_%s_%s_results_summary.txt" % (out_dir,taxa,is_folded,model,sfs_type), "a+")
	param_list = '\t'.join(map(str,param_labels[model].replace(" ","").split(",")))
	summary_file.write("sfs_type\trun_num\ttaxa\tmodel\tlog-likelihood\taic\ttheta\t%s\n" % (param_list))
	summary_file.close()

#Show grid points in case they need to be checked in the standard output file
print "\n\nGrid points: {}".format(pts)

print "\n\nBeginning optimization run {}".format(run_num)

#Set filename prefix
prefix = "%s/%s_%s_%s_run%s" % (out_dir,taxa,is_folded,sfs_type,run_num)

#Run optimization run with dadi_pipeline
if is_folded == "fold":
	params = Optimize_Functions.Optimize_Routine(fs, pts, prefix, model, func, len(reps), len(lower), fs_folded = True, param_labels = p_labels, reps = reps, folds = folds, in_params = in_params, in_upper = upper, in_lower = lower, maxiters = maxiters)
elif is_folded == "unfold":
	params = Optimize_Functions.Optimize_Routine(fs, pts, prefix, model, func, len(reps), len(lower), fs_folded = False, param_labels = p_labels, reps = reps, folds = folds, in_params = in_params, in_upper = upper, in_lower = lower, maxiters = maxiters)
else:
	print "\n\nSyntax incorrect for folding argument, please use 'fold' or 'unfold'"

#Print notable results and where they are saved to.
print "\n\nHighest likelihood of run: {}".format(params[0])
print "\nOptimized parameter values: {}".format(params[1])
print "\nAdding run results to analysis summary file: %s/results_summary/%s_%s_%s_%s_results_summary.txt" % (out_dir,taxa,is_folded,model,sfs_type)

#Once again make sure summary file is there.
if not os.path.isfile("%s/results_summary/%s_%s_%s_%s_results_summary.txt" % (out_dir,taxa,is_folded,model,sfs_type)):
	summary_file = open("%s/results_summary/%s_%s_%s_%s_results_summary.txt" % (out_dir,taxa,is_folded,model,sfs_type), "a+")
	param_list = '\t'.join(map(str,param_labels[model].replace(" ","").split(",")))
	summary_file.write("sfs_type\trun_num\ttaxa\tmodel\tlog-likelihood\taic\ttheta\t%s\n" % (param_list))
Example #11
0
#prefix for output file naming
outfile = "N_v_S"

#These can be left alone, unless you want more searches:
#spectrum object name (we defined this above)
fs = fs_1

#===========================================================================
# Now call the function with the relevant arguments.

# There are several models to optimize.
# Each model is executed with one replicate using fixed parameter values.
# The simulated model is stored as an object to be called on in the plotting function.

# Standard neutral model, populations never diverge
no_divergence = Optimize_Functions.Optimize_Single(pts, fs, "no_divergence",
                                                   no_divergence_params)

# Split into two populations, no migration.
no_mig = Optimize_Functions.Optimize_Single(pts, fs, "no_mig", no_mig_params)

# Split into two populations, with continuous symmetric migration.
sym_mig = Optimize_Functions.Optimize_Single(pts, fs, "sym_mig",
                                             sym_mig_params)

# Split into two populations, with continuous asymmetric migration.
asym_mig = Optimize_Functions.Optimize_Single(pts, fs, "asym_mig",
                                              asym_mig_params)

# Split with continuous symmetric migration, followed by isolation.
anc_sym_mig = Optimize_Functions.Optimize_Single(pts, fs, "anc_sym_mig",
                                                 anc_sym_mig_params)
Example #12
0
fs_folded = True

#**************
# To change bounds, edit upper and lower.
# To use default, comment out upper and lower and remove "in_upper=upper, in_lower=lower," from Optimize_Routine.

# Split into two populations, no migration.
upper = [30, 30, 30]
lower = [1e-05, 1e-05, 1e-05]
Optimize_Functions.Optimize_Routine(fs,
                                    prefix,
                                    "no_mig",
                                    Models_2D.no_mig,
                                    rounds,
                                    3,
                                    fs_folded=fs_folded,
                                    reps=reps,
                                    maxiters=maxiters,
                                    folds=folds,
                                    in_upper=upper,
                                    in_lower=lower,
                                    param_labels="nu1, nu2, T")

# Split into two populations, with continuous symmetric migration.
upper = [30, 30, 30, 30]
lower = [1e-05, 1e-05, 0, 1e-05]
Optimize_Functions.Optimize_Routine(fs,
                                    prefix,
                                    "sym_mig",
                                    Models_2D.sym_mig,
                                    rounds,
Example #13
0
#======================================================================================
# Now call the function with the relevant arguments.

# There are many models to test here. A brief definition is given for each, but the actual
# models are defined in the Models_2D.py script. The first 15 were implemented in Portik
# et al. 2016 (doi: 10.1111/mec.14266), the following 9 are newer for various projects.

# Here it is set up to call each model one by one sequentially, which could finish relatively quickly.
# If it takes too long, create multiple verisions of this script, block out some models (use hashes or delete),
# and execute one version for every core you have available. It will greatly speed up these steps,
# and sometimes if extrapolations fail the script will crash too and this could prevent it from
# happening too many times.

# Standard neutral model, populations never diverge
Optimize_Functions.Optimize_Round1(pts, fs, outfile, reps, maxiter,
                                   "no_divergence")

# Split into two populations, no migration.
Optimize_Functions.Optimize_Round1(pts, fs, outfile, reps, maxiter, "no_mig")

# Split into two populations, with continuous symmetric migration.
Optimize_Functions.Optimize_Round1(pts, fs, outfile, reps, maxiter, "sym_mig")

# Split into two populations, with continuous asymmetric migration.
Optimize_Functions.Optimize_Round1(pts, fs, outfile, reps, maxiter, "asym_mig")

# Split with continuous symmetric migration, followed by isolation.
Optimize_Functions.Optimize_Round1(pts, fs, outfile, reps, maxiter,
                                   "anc_sym_mig")

# Split with continuous asymmetric migration, followed by isolation.
Example #14
0
reps = [10, 20, 30, 40]
maxiters = [3, 5, 10, 15]
folds = [3, 2, 2, 1]

fs_folded = True

prefix = "Two_pop_"

#no dovergence
Optimize_Functions.Optimize_Routine(fs,
                                    pts,
                                    prefix,
                                    "no_divergence",
                                    Models_2D.no_divergence,
                                    rounds,
                                    1,
                                    fs_folded=fs_folded,
                                    optimizer="log",
                                    maxiters=maxiters,
                                    folds=folds,
                                    param_labels="nu1")

#Split into two populations, no migration.
Optimize_Functions.Optimize_Routine(fs,
                                    pts,
                                    prefix,
                                    "no_mig",
                                    Models_2D.no_mig,
                                    rounds,
                                    3,
                                    fs_folded=fs_folded,
Example #15
0
'''
Diversification Model Set

This first set of models come from the following publication:

    Portik, D.M., Leache, A.D., Rivera, D., Blackburn, D.C., Rodel, M.-O.,
    Barej, M.F., Hirschfeld, M., Burger, M., and M.K. Fujita. 2017.
    Evaluating mechanisms of diversification in a Guineo-Congolian forest
    frog using demographic model selection. Molecular Ecology 26: 5245-5263.
    doi: 10.1111/mec.14266

'''

# Split into three populations, no migration.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_nomig", Models_3D.split_nomig, rounds, 6, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, T1, T2")

# Split into three populations, symmetric migration between all populations (1<->2, 2<->3, and 1<->3).
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_symmig_all", Models_3D.split_symmig_all, rounds, 10, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, mA, m1, m2, m3, T1, T2")

# Split into three populations, symmetric migration between 'adjacent' populations (1<->2, 2<->3, but not 1<->3).
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "split_symmig_adjacent", Models_3D.split_symmig_adjacent, rounds, 9, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, mA, m1, m2, m3, T1, T2")

# Adjacent Secondary contact, longest isolation - Split between pop 1 and (2,3) with no migration, then split between pop 2 and 3 with no migration. Period of symmetric secondary contact occurs between adjacent populations (ie 1<->2, 2<->3, but not 1<->3) after all splits are complete.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "refugia_adj_1", Models_3D.refugia_adj_1, rounds, 9, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, m1, m2, T1, T2, T3")

# Adjacent Secondary contact, shorter isolation - Split between pop 1 and (2,3), gene flow does not occur. Split between pop 2 and 3 occurs with gene flow. After appearance of 2 and 3, gene flow also occurs between 1 and 2.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "refugia_adj_2", Models_3D.refugia_adj_2, rounds, 8, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, m1, m2, T1, T2")

# Adjacent Secondary contact, shortest isolation - Split between pop 1 and (2,3) with no migration. Split between pop 2 and 3 occurs with gene flow, and gene flow occurs between 1 and 2 as well.
Optimize_Functions.Optimize_Routine(fs, pts, prefix, "refugia_adj_3", Models_3D.refugia_adj_3, rounds, 10, reps=reps, maxiters=maxiters, folds=folds, param_labels = "nu1, nuA, nu2, nu3, mA, m1, m2, T1a, T1b, T2")
Example #16
0
pts = [120, 130, 140]
p_labels = "nu1, nu2, m12, m21, T1,T2"
upper = [20, 20, 10, 10, 5, 5]
lower = [1e-3, 1e-3, 1e-5, 1e-5, 1e-3, 1e-3]

#	Here details of the optimization routines: 3 rounds of optimization for the simple IM and SC models

reps = [10, 10, 10]
maxiters = [10, 20, 30]
folds = [3, 2, 1]

# 	Run 3 Independent optimizations routines, at the end use the "Summarize_Output.py" to keep the best run from each optimization routine

for i in range(1, 4):
    prefix = infile + "_OPTI_Number_{}".format(i)
    Optimize_Functions.Optimize_Routine(data,
                                        pts,
                                        prefix,
                                        "SC",
                                        SC,
                                        3,
                                        6,
                                        data_folded=False,
                                        param_labels=p_labels,
                                        in_upper=upper,
                                        in_lower=lower,
                                        reps=reps,
                                        maxiters=maxiters,
                                        folds=folds,
                                        optimizer="log")