Ejemplo n.º 1
0
def cook_constants_SizeChange(fasta_dict,
                              s1=1092,
                              NeC=2e5,
                              Nef=4e5,
                              Grate=1.03,
                              dir_data="./data/",
                              dir_vcf="vcf_data/sims/",
                              slim_dir='./',
                              batch_name=''):
    '''
    set up conditions.
    constants:
        - vcf_file;
        - fasta_file - writes fasta; mkdir fasta_dir
        - sampling: 
            s1 (int) to vary in range=nrange as proportion of Nmax;
        - NeC: initial population eff. size.
        - Nef: effective population size after change.
        - Grate: growth rate during change. 
    '''
    cookID = 'sizeChange'
    sim_store = {}

    for chrom in fasta_dict.keys():
        for start in fasta_dict[chrom].keys():
            fasta = fasta_dict[chrom][start]

            ### set up names and directories.
            SIMname = batch_name + 'C{}.{}'.format(chrom, str(start))
            SIM_dir = dir_data + SIMname

            os.makedirs(SIM_dir, exist_ok=True)

            vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom)
            #ref_dir= SIM_dir + SIMname + '_reference'

            ### write fasta file for SLiM.
            fasta_file = write_fastaEx(fasta,
                                       chrom=chrom,
                                       start=start,
                                       ID=SIMname,
                                       fasta_dir=SIM_dir)

            sim_store[SIMname] = {
                "vcf_file": vcf_file,
                "fasta_file": fasta_file,
                "s1": s1,
                "NeC": NeC,
                "Nef": Nef,
                "Grate": Grate,
            }

            ### write arguments to file
            write_args(sim_store[SIMname], SIMname, SIM_dir)
            ### population identifiers file
            sample_sizes = [sim_store[SIMname][x] for x in ["s1"]]
            write_popIDs(sample_sizes, file_dir=SIM_dir)

    return sim_store, cookID
Ejemplo n.º 2
0
def cook_constants_Gravel2sampleRange(fasta_dict, nrange= [.05,.5], step= 10,
            Nmax= 100, dir_data= "./data/", dir_vcf= "vcf_data/sims/", 
            slim_dir= './', batch_name= ''):
    '''
    set up conditions.
    constants:
        - vcf_file;
        - fasta_file - writes fasta; mkdir fasta_dir
        - sampling: 
            s1 (int) to vary in range=nrange as proportion of Nmax;
            s2= Nmax - s1;
            s3= 0
    '''
    cookID= 'Gravel2sampleRange'
    sim_store= {}
    
    s1range= np.linspace(nrange[0],nrange[1],step) * Nmax
    s1range= np.array(s1range,dtype=int)
    s2range= Nmax - s1range
    s3= 0
    
    d= 0
    
    for chrom in fasta_dict.keys():
        for start in fasta_dict[chrom].keys():
            fasta= fasta_dict[chrom][start]

            ### set up names and directories.
            SIMname= batch_name + 'C{}.{}'.format(chrom,str(start))
            SIM_dir= dir_data + SIMname
            
            os.makedirs(SIM_dir, exist_ok=True)
            
            vcf_file= SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom)
            #ref_dir= SIM_dir + SIMname + '_reference'
               
            ### write fasta file for SLiM.
            fasta_file= write_fastaEx(fasta,chrom=chrom,start= start,
                          ID= SIMname,fasta_dir= SIM_dir)
            
            sim_store[SIMname]= {
                "vcf_file": vcf_file,
                "fasta_file": fasta_file,
                "s1": s1range[d],
                "s2": s2range[d],
                "s3": s3
            }
            
            ### write arguments to file
            write_args(sim_store[SIMname],SIMname,SIM_dir)
            ### population identifiers file
            sample_sizes= [sim_store[SIMname][x] for x in ["s1","s2","s3"]]
            write_popIDs(sample_sizes,file_dir= SIM_dir)
            
            d += 1
    
    return sim_store, cookID
Ejemplo n.º 3
0
def cook_constants_v1(fasta_dict,
                      s1=216,
                      s2=108,
                      s3=206,
                      dir_data="./data/sims/",
                      dir_vcf="vcf_data/",
                      slim_dir='./',
                      batch_name=''):
    '''
    set up conditions.
    constants:
        - vcf_file;
        - fasta_file - writes fasta; mkdir fasta_dir
        - sampling: s1= 216;s2=108;s3=206
    '''
    cookID = 'v1'
    sim_store = {}

    for chrom in fasta_dict.keys():
        for start in fasta_dict[chrom].keys():
            fasta = fasta_dict[chrom][start]

            ### set up names and directories.
            SIMname = batch_name + 'C{}.{}'.format(chrom, str(start))
            SIM_dir = dir_data + SIMname + '/'

            os.makedirs(SIM_dir, exist_ok=True)
            vcf_file = SIM_dir + SIMname + "_chr{}.vcf".format(chrom)
            #ref_dir= SIM_dir + SIMname + '_reference'

            ### write fasta file for SLiM.
            fasta_file = write_fastaEx(fasta,
                                       chrom=chrom,
                                       start=start,
                                       ID=SIMname,
                                       fasta_dir=SIM_dir)

            sim_store[SIMname] = {
                "vcf_file": vcf_file,
                "fasta_file": fasta_file,
                "s1": s1,
                "s2": s2,
                "s3": s3
            }

            ### write arguments to file
            write_args(sim_store[SIMname], SIMname, SIM_dir)
            ### population identifiers file
            sample_sizes = [sim_store[SIMname][x] for x in ["s1", "s2", "s3"]]
            write_popIDs(sample_sizes, file_dir=SIM_dir)

    return sim_store, cookID
Ejemplo n.º 4
0
def cook_constants_ABC_ratevar(fasta_dict,
                               demo_file='Prado&Martinez2013_M4A.txt',
                               template='template_simple.slim',
                               mut_file='M4AM0_grid.txt',
                               Nsamp=5,
                               anc_r='0',
                               sizes=500,
                               burnin=5e4,
                               mu=1e-8,
                               rec=1e-8,
                               bases='ACGT',
                               rate_change=10,
                               rate_range=[1, 2],
                               Nmat=1,
                               sim_scale=1,
                               dir_data="./data/",
                               dir_vcf="vcf_data/sims/",
                               slim_dir='./',
                               batch_name=''):
    '''
    cookbook ABC_ratevar takes a mutation skew file (arg: mut_file) instead of generating them.
    '''

    from tools.ABC_utilities import demo_to_recipe
    '''
    set up conditions.
    constants:
        - vcf_file;
        - fasta_file - writes fasta; mkdir fasta_dir
        - sampling: 
            s1 (int) to vary in range=nrange as proportion of Nmax;
        - NeC: initial population eff. size.
        - Nef: effective population size after change.
        - Grate: growth rate during change. 
    '''

    cookID = 'ABCdemo'

    recipe_dir = '/'.join(template.split('/')[:-1])

    sim_store = {}

    for chrom in fasta_dict.keys():
        for start in fasta_dict[chrom].keys():
            fasta = fasta_dict[chrom][start]

            ## create sim recipes from template
            pops, files = demo_to_recipe(demo_file,
                                         template,
                                         batch=batch_name,
                                         anc_r=anc_r,
                                         Nsamp=Nsamp,
                                         recipe_dir=recipe_dir,
                                         sim_scale=sim_scale)

            for idx in range(Nsamp):

                ### set up names and directories.
                SIMname = batch_name + 'C{}.{}.{}'.format(
                    chrom, str(start), idx)
                SIM_dir = dir_data + SIMname

                recipe = files[idx]

                os.makedirs(SIM_dir, exist_ok=True)

                vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom)
                #ref_dir= SIM_dir + SIMname + '_reference'

                ### write fasta file for SLiM.
                fasta_file = write_fastaEx(fasta,
                                           chrom=chrom,
                                           start=start,
                                           ID=SIMname,
                                           fasta_dir=SIM_dir)

                rec_here = rec / [1, sim_scale][int(rec != 0.5)]

                sim_store[SIMname] = {
                    "vcf_file": vcf_file,
                    "fasta_file": fasta_file,
                    "mu": mu / sim_scale,
                    "mut_file": mut_file,
                    'recipe': recipe,
                    'rec': rec_here
                }

                pop_dict = {x: sizes for x in pops}
                #sim_store[SIMname].update(pop_dict)

                ### write arguments to file
                write_args(sim_store[SIMname], SIMname, SIM_dir)
                ### population identifiers file
                sample_sizes = [pop_dict[x] for x in pop_dict.keys()]
                write_popIDs(sample_sizes, file_dir=SIM_dir)

    return sim_store, cookID
Ejemplo n.º 5
0
def cook_constants_ABC(fasta_dict,
                       demo_file='Prado&Martinez2013_M4A.txt',
                       template='template_simple.slim',
                       Nsamp=5,
                       anc_r='0',
                       sizes=500,
                       burnin=5e4,
                       mu=1e-8,
                       rec=1e-8,
                       bases='ACGT',
                       rate_change=10,
                       rate_range=[1, 5],
                       Nmat=0,
                       sim_scale=1,
                       dir_data="./data/",
                       dir_vcf="vcf_data/sims/",
                       slim_dir='./',
                       batch_name=''):

    from tools.ABC_utilities import demo_to_recipe
    '''
    set up conditions.
    constants:
        - vcf_file;
        - fasta_file - writes fasta; mkdir fasta_dir
        - sampling: 
            s1 (int) to vary in range=nrange as proportion of Nmax;
        - NeC: initial population eff. size.
        - Nef: effective population size after change.
        - Grate: growth rate during change. 
    '''

    cookID = 'ABCdemo'

    recipe_dir = '/'.join(template.split('/')[:-1])

    sim_store = {}
    mutations_full_dict, mutations_full_list, mut_org = mutation_dict_full(
        bases=bases)
    var_store = {
        "M{}".format(x): rate_mods(mut_org,
                                   rate_range=rate_range,
                                   rate_change=rate_change,
                                   bases=bases,
                                   mu=mu)
        for x in range(1, Nmat + 1)
    }

    var_store["M0"] = {}
    mat_names = {
        mat: batch_name + mat + '_grid.txt'
        for mat in var_store.keys()
    }

    for mat in var_store.keys():
        with open(mat_names[mat], 'w') as fp:
            for mut in var_store[mat].keys():
                rates = var_store[mat][mut]
                rates = ','.join([str(x) for x in rates])
                fp.write('\t'.join([mut, rates]) + '\n')

    for chrom in fasta_dict.keys():
        for start in fasta_dict[chrom].keys():
            fasta = fasta_dict[chrom][start]

            for mat in var_store.keys():

                ## create sim recipes from template
                pops, files = demo_to_recipe(demo_file,
                                             template,
                                             batch=batch_name,
                                             anc_r=anc_r,
                                             Nsamp=Nsamp,
                                             recipe_dir=recipe_dir,
                                             sim_scale=sim_scale)

                for idx in range(Nsamp):

                    ### set up names and directories.
                    SIMname = batch_name + mat + 's' + str(
                        idx) + 'C{}.{}'.format(chrom, str(start))
                    SIM_dir = dir_data + SIMname

                    recipe = files[idx]

                    os.makedirs(SIM_dir, exist_ok=True)

                    vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(
                        chrom)
                    #ref_dir= SIM_dir + SIMname + '_reference'

                    ### write fasta file for SLiM.
                    fasta_file = write_fastaEx(fasta,
                                               chrom=chrom,
                                               start=start,
                                               ID=SIMname,
                                               fasta_dir=SIM_dir)

                    rec_here = rec / [1, sim_scale][int(rec != 0.5)]

                    sim_store[SIMname] = {
                        "vcf_file": vcf_file,
                        "fasta_file": fasta_file,
                        "mu": mu,
                        "rec": rec_here,
                        "mut_file": mat_names[mat],
                        'recipe': recipe
                    }

                    pop_dict = {x: sizes for x in pops}
                    #sim_store[SIMname].update(pop_dict)

                    ### write arguments to file
                    write_args(sim_store[SIMname], SIMname, SIM_dir)
                    ### population identifiers file
                    sample_sizes = [pop_dict[x] for x in pop_dict.keys()]
                    write_popIDs(sample_sizes, file_dir=SIM_dir)

    return sim_store, cookID
Ejemplo n.º 6
0
def cook_constants_rateVarMat(fasta_dict,
                              mu=1e-8,
                              bases='ACGT',
                              rate_change=10,
                              rate_range=[1, 5],
                              Nmat=5,
                              s1=2000,
                              NeC=2e5,
                              Nef=4e5,
                              Grate=1.03,
                              dir_data="./data/",
                              dir_vcf="vcf_data/sims/",
                              slim_dir='./',
                              batch_name=''):
    '''
    set up conditions.
    constants:
        - vcf_file;
        - fasta_file - writes fasta; mkdir fasta_dir
        - sampling: 
            s1 (int) to vary in range=nrange as proportion of Nmax;
        - NeC: initial population eff. size.
        - Nef: effective population size after change.
        - Grate: growth rate during change. 
    '''
    cookID = 'rateVarII'
    sim_store = {}
    mutations_full_dict, mutations_full_list, mut_org = mutation_dict_full(
        bases=bases)
    var_store = {
        "M{}".format(x): rate_mods(mut_org,
                                   rate_range=rate_range,
                                   rate_change=rate_change,
                                   bases=bases,
                                   mu=mu)
        for x in range(1, Nmat + 1)
    }
    var_store["M0"] = {}
    mat_names = {
        mat: batch_name + mat + '_grid.txt'
        for mat in var_store.keys()
    }

    for mat in var_store.keys():
        with open(mat_names[mat], 'w') as fp:
            for mut in var_store[mat].keys():
                rates = var_store[mat][mut]
                rates = ','.join([str(x) for x in rates])
                fp.write('\t'.join([mut, rates]) + '\n')

    for chrom in fasta_dict.keys():
        for start in fasta_dict[chrom].keys():
            fasta = fasta_dict[chrom][start]

            for mat in var_store.keys():
                ### set up names and directories.
                SIMname = batch_name + mat + 'C{}.{}'.format(chrom, str(start))
                SIM_dir = dir_data + SIMname

                os.makedirs(SIM_dir, exist_ok=True)

                vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom)
                #ref_dir= SIM_dir + SIMname + '_reference'

                ### write fasta file for SLiM.
                fasta_file = write_fastaEx(fasta,
                                           chrom=chrom,
                                           start=start,
                                           ID=SIMname,
                                           fasta_dir=SIM_dir)

                sim_store[SIMname] = {
                    "vcf_file": vcf_file,
                    "fasta_file": fasta_file,
                    "s1": s1,
                    "NeC": NeC,
                    "Nef": Nef,
                    "Grate": Grate,
                    "mu": mu,
                    "mut_file": mat_names[mat]
                    #"other": {'//mut_file': '\tfile_mut= readFile("{}");\n'.format(mut_file)}
                }

                ### write arguments to file
                write_args(sim_store[SIMname], SIMname, SIM_dir)
                ### population identifiers file
                sample_sizes = [sim_store[SIMname][x] for x in ["s1"]]
                write_popIDs(sample_sizes, file_dir=SIM_dir)

    return sim_store, cookID
Ejemplo n.º 7
0
def cook_constants_Burnin(fasta_dict,
                          bt=50000,
                          sdelay=1000,
                          s1=1092,
                          NeC=2e5,
                          Nef=4e5,
                          Grate=1.03,
                          dir_data="./data/",
                          dir_vcf="vcf_data/sims/",
                          slim_dir='./',
                          batch_name=''):
    '''
    set up conditions.
    constants:
        - vcf_file;
        - fasta_file - writes fasta; mkdir fasta_dir
        - sampling: 
            s1 (int) to vary in range=nrange as proportion of Nmax;
        - NeC: initial population eff. size.
        - Nef: effective population size after change.
        - Grate: growth rate during change. 
    '''
    cookID = 'burnin'
    sim_store = {}

    possible = sum([len(fasta_dict[x]) for x in fasta_dict.keys()])
    burnin_list = np.linspace(1, bt, possible, dtype=int)
    d = 0

    for chrom in fasta_dict.keys():
        for start in fasta_dict[chrom].keys():
            fasta = fasta_dict[chrom][start]

            # burnin time
            evt = burnin_list[d]
            st = burnin_list[d] + sdelay

            ### set up names and directories.
            SIMname = batch_name + 'T{}'.format(evt) + 'C{}.{}'.format(
                chrom, str(start))
            SIM_dir = dir_data + SIMname

            os.makedirs(SIM_dir, exist_ok=True)

            vcf_file = SIM_dir + '/' + SIMname + "_chr{}.vcf".format(chrom)
            #ref_dir= SIM_dir + SIMname + '_reference'

            ### write fasta file for SLiM.
            fasta_file = write_fastaEx(fasta,
                                       chrom=chrom,
                                       start=start,
                                       ID=SIMname,
                                       fasta_dir=SIM_dir)

            sim_store[SIMname] = {
                "vcf_file": vcf_file,
                "fasta_file": fasta_file,
                "s1": s1,
                "NeC": NeC,
                "Nef": Nef,
                "Grate": Grate,
                "evt": evt,
                "other": {
                    "//grow": "{}: ".format(evt) + "{\n",
                    "//sample": "{} late() ".format(st) + "{\n"
                }
            }

            ### write arguments to file
            write_args(sim_store[SIMname], SIMname, SIM_dir)
            ### population identifiers file
            sample_sizes = [sim_store[SIMname][x] for x in ["s1"]]
            write_popIDs(sample_sizes, file_dir=SIM_dir)

            d += 1

    return sim_store, cookID