##======
    # read in simulated sequences
    ##======
    
    # trees, recomb_sites, seg_sites, positions, seqs
    sim = sim_process.read_one_sim(ms_f, sim_args['num_sites'], sim_args['num_samples'])

    ##======
    # predict introgressed/non-introgressed tracts
    ##======
    
    state_seq, probs, hmm, hmm_init, ps = \
        sim_predict.predict_introgressed(sim, sim_args, predict_args, \
                                         train=True, method='posterior', \
                                         only_poly=only_poly)
    state_seq_blocks = sim_process.convert_to_blocks(state_seq, \
                                                     predict_args['states'])

    ##======
    # output
    ##======
    
    if write_ps:
        predict.write_positions(ps, ps_f, str(i), 'I')

    # summary info about HMM (before training)
    sim_predict.write_hmm_line(hmm_init, out_init_f, i==0) 

    # summary info about HMM (after training)
    sim_predict.write_hmm_line(hmm, out_f, i==0) 

    # locations of introgression
Ejemplo n.º 2
0
#x = 20
#thresholds = [float(i) / x for i in range(0, x + 2)]
thresholds = [0, .00001, .00005, .0001, .0005, .001, .005, .01, .05, .1, .5, .6, .7, .8, .9, .99, 1, 1.1]
header = True
for threshold in thresholds:
    print 'threshold:', threshold
    all_stats = []
    for rep in range(sim_args['num_reps']):
        paths_t = {}
        for ind in probs[rep]:
            paths_t[ind] = roc.threshold_probs(probs[rep][ind], \
                                               threshold, \
                                               sim_args['species_to'], \
                                               ps[rep], 0, \
                                               sim_args['num_sites']-1)
        predicted = sim_process.convert_to_blocks(paths_t, predict_args['states'])
        stats = roc.get_stats(actual[rep], predicted, sim_args)
        all_stats.append(stats)
    avg_stats = {}
    for key in all_stats[0].keys():
        x = []
        for rep in range(sim_args['num_reps']):
            x.append(all_stats[rep][key])
        avg_stats[key] = mystats.mean(x)
    roc.write_roc_line(f, threshold, avg_stats, header)
    header = False
f.close()



Ejemplo n.º 3
0
    # ======
    # read in simulated sequences
    # ======

    # trees, recomb_sites, seg_sites, positions, seqs
    sim = sim_process.read_one_sim(ms_f, sim_args['num_sites'],
                                   sim_args['num_samples'])

    # ======
    # predict introgressed/non-introgressed tracts
    # ======

    state_seq, hmm, hmm_init = sim_predict.predict_introgressed(
        sim, sim_args, predict_args, train=True, method="viterbi")
    state_seq_blocks = sim_process.convert_to_blocks(state_seq,
                                                     predict_args['states'])

    # ======
    # output
    # ======

    # summary info about HMM (before training)
    sim_predict.write_hmm_line(hmm_init, out_init_f, i == 0)

    # summary info about HMM (after training)
    sim_predict.write_hmm_line(hmm, out_f, i == 0)

    # locations of introgression
    sim_process.write_introgression_blocks(state_seq_blocks, introgression_f,
                                           i, predict_args['states'])
Ejemplo n.º 4
0
# x = 20
# thresholds = [float(i) / x for i in range(0, x + 2)]
thresholds = [
    0, .00001, .00005, .0001, .0005, .001, .005, .01, .05, .1, .5, .6, .7, .8,
    .9, .99, 1, 1.1
]
header = True
for threshold in thresholds:
    print('threshold:', threshold)
    all_stats = []
    for rep in range(sim_args['num_reps']):
        paths_t = {}
        for ind in probs[rep]:
            paths_t[ind] = roc.threshold_probs(probs[rep][ind], threshold,
                                               sim_args['species_to'], ps[rep],
                                               0, sim_args['num_sites'] - 1)
        predicted = sim_process.convert_to_blocks(paths_t,
                                                  predict_args['states'])
        stats = roc.get_stats(actual[rep], predicted, sim_args)
        all_stats.append(stats)
    avg_stats = {}
    for key in all_stats[0].keys():
        x = []
        for rep in range(sim_args['num_reps']):
            x.append(all_stats[rep][key])
        avg_stats[key] = mystats.mean(x)
    roc.write_roc_line(f, threshold, avg_stats, header)
    header = False
f.close()
Ejemplo n.º 5
0
    # ======

    stats = sim_stats(sim, args)

    # ======
    # calculate frequency of ILS (or of possible ILS...)
    # ======

    concordance_info = calculate_ils(sim, args)

    # ======
    # find introgressed/non-introgressed tracts
    # ======

    introgression_stats, actual_state_seq = find_introgressed(sim, args)
    actual_state_seq_blocks = sim_process.convert_to_blocks(
        actual_state_seq, args['species'])

    # ======
    # output
    # ======

    # general summary statistics about simulated sequences
    write_output_line(stats, concordance_info, introgression_stats, out_f,
                      i == 0)

    # specific locations of introgression (for comparing predictions
    # to)
    sim_process.write_introgression_blocks(actual_state_seq_blocks,
                                           introgression_f, i, args['species'])

ms_f.close()
    ##======

    stats = sim_stats(sim, args)

    ##======
    # calculate frequency of ILS (or of possible ILS...)
    ##======

    concordance_info = calculate_ils(sim, args)

    ##======
    # find introgressed/non-introgressed tracts
    ##======

    introgression_stats, actual_state_seq = find_introgressed(sim, args)
    actual_state_seq_blocks = sim_process.convert_to_blocks(actual_state_seq, \
                                                            args['species'])

    ##======
    # output
    ##======

    # general summary statistics about simulated sequences
    write_output_line(stats, concordance_info, introgression_stats, out_f, i==0) 

    # specific locations of introgression (for comparing predictions
    # to)
    sim_process.write_introgression_blocks(actual_state_seq_blocks, introgression_f, \
                                           i, args['species'])

ms_f.close()
out_f.close()
    # read in simulated sequences
    # ======

    # trees, recomb_sites, seg_sites, positions, seqs
    sim = sim_process.read_one_sim(ms_f, sim_args['num_sites'],
                                   sim_args['num_samples'])

    # ======
    # predict introgressed/non-introgressed tracts
    # ======

    state_seq, probs, init, emis, trans, ps = \
        sim_predict_phylohmm.predict_introgressed(sim, sim_args,
                                                  predict_args, i, gp_dir)

    state_seq_blocks = sim_process.convert_to_blocks(state_seq,
                                                     sim_args['species'])

    # ======
    # output
    # ======

    if write_ps:
        predict.write_positions(ps, ps_f, str(i), 'I')

    # summary info about HMM
    # sim_predict.write_hmm_line(hmm, out_f, i==0)

    # locations of introgression
    sim_process.write_introgression_blocks(state_seq_blocks, introgression_f,
                                           i, sim_args['species'])