Esempi in Python per parse_gene_list

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: parse_file

Metodo/funzione: parse_gene_list

Esempi su hotexamples.com: 6

parse_gene_list in Python: 6 esempi trovati. Questi sono i migliori esempi reali in Python per parse_file.parse_gene_list, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

        axis.set_ylabel('Fixed mutations')

        axis.set_xlim([-1, 22])

        if population_idx == 5:
            axis.set_xlabel('Clones')

########################################
#
# Now do the plotting (focal first, then rest)
#
########################################

theory_times = numpy.arange(0, 121) * 500

gene_names, start_positions, end_positions, promoter_start_positions, promoter_end_positions, gene_sequences, strands = parse_file.parse_gene_list(
)

gene_name_position_map = {
    gene_names[i]: (start_positions[i], end_positions[i])
    for i in xrange(0, len(gene_names))
}

state_color_map = {
    parse_file.clade_hmm_states['FB']: '0.7',
    parse_file.clade_hmm_states['FM']: '#7a0177',
    parse_file.clade_hmm_states['Fm']: '#f768a1'
}

for metapopulation_idx in xrange(0, 2):
    for population_idx in xrange(0, 6):

Esempio n. 2

Mostra file

File: plot_allele_multiplicity.py Progetto: thomaslabar/LTEE-metagenomic

mutator_axis.text(0.6,
                  1.5e05,
                  figure_utils.get_panel_label('b'),
                  fontsize=6,
                  fontweight='bold')

####
#
# Do calculation
#
####

excluded_types = set(['sv', 'indel'])

reference_sequence = parse_file.parse_reference_genome()
gene_data = parse_file.parse_gene_list()
repeat_data = parse_file.parse_repeat_list()
mask_data = parse_file.parse_mask_list()

position_gene_map, effective_gene_lengths, substitution_specific_synonymous_fraction = parse_file.create_annotation_map(
    gene_data, repeat_data, mask_data)

#Ltot = 4.4e06
Ltot = len(reference_sequence) - effective_gene_lengths['masked']
sys.stderr.write("Ltot = %d\n" % Ltot)

for population_group in ['nonmutators', 'mutators']:

    if population_group == 'nonmutators':
        populations = parse_file.complete_nonmutator_lines
        color = figure_utils.nonmutator_group_color

Esempio n. 3

Mostra file

File: plot_multiplicity.py Progetto: MURI2/Bacillus_Evol_Timeseries

def plot_within_taxon_paralleliism(taxon, slope_null=1):

    fig = plt.figure(figsize=(12, 8))

    gene_data = parse_file.parse_gene_list(taxon)

    gene_names, gene_start_positions, gene_end_positions, promoter_start_positions, promoter_end_positions, gene_sequences, strands, genes, features, protein_ids = gene_data
    # to get the common gene names for each ID

    ax_multiplicity = plt.subplot2grid((2, 3), (0, 0), colspan=1)
    ax_mult_freq = plt.subplot2grid((2, 3), (0, 1), colspan=1)
    ax_venn = plt.subplot2grid((2, 3), (0, 2), colspan=1)

    ax_multiplicity.set_xscale('log', base=10)
    ax_multiplicity.set_yscale('log', base=10)
    ax_multiplicity.set_xlabel('Gene multiplicity, ' + r'$m$', fontsize=14)
    ax_multiplicity.set_ylabel('Fraction mutations ' + r'$\geq m$',
                               fontsize=14)
    ax_multiplicity.text(-0.1,
                         1.07,
                         pt.sub_plot_labels[0],
                         fontsize=18,
                         fontweight='bold',
                         ha='center',
                         va='center',
                         transform=ax_multiplicity.transAxes)

    ax_multiplicity.set_ylim([0.001, 1.1])
    ax_multiplicity.set_xlim([0.07, 130])

    ax_mult_freq.set_xscale('log', base=10)
    ax_mult_freq.set_yscale('log', base=10)
    ax_mult_freq.set_xlabel('Gene multiplicity, ' + r'$m$', fontsize=14)
    ax_mult_freq.set_ylabel('Mean maximum allele frequency, ' +
                            r'$\overline{f}_{max}$',
                            fontsize=11)
    ax_mult_freq.text(-0.1,
                      1.07,
                      pt.sub_plot_labels[1],
                      fontsize=18,
                      fontweight='bold',
                      ha='center',
                      va='center',
                      transform=ax_mult_freq.transAxes)

    ax_venn.axis('off')
    ax_venn.text(-0.1,
                 1.07,
                 pt.sub_plot_labels[2],
                 fontsize=18,
                 fontweight='bold',
                 ha='center',
                 va='center',
                 transform=ax_venn.transAxes)

    alpha_treatment_dict = {'0': 0.5, '1': 0.5, '2': 0.8}

    significant_multiplicity_dict = {}

    significant_multiplicity_values_dict = {}

    multiplicity_dict = {}

    g_score_p_label_dict = {}

    all_mults = []
    all_freqs = []

    treatments_in_taxon = []

    label_y_axes = [0.3, 0.2, 0.1]

    for treatment_idx, treatment in enumerate(pt.treatments):

        significan_multiplicity_taxon_path = pt.get_path(
        ) + '/data/timecourse_final/parallel_genes_%s.txt' % (treatment +
                                                              taxon)
        if os.path.exists(significan_multiplicity_taxon_path) == False:
            continue
        treatments_in_taxon.append(treatment)
        significan_multiplicity_taxon = open(
            significan_multiplicity_taxon_path, "r")

        significan_multiplicity_list = []
        for i, line in enumerate(significan_multiplicity_taxon):
            if i == 0:
                continue
            line = line.strip()
            items = line.split(",")
            significan_multiplicity_list.append(items[0])

            if items[0] not in significant_multiplicity_values_dict:
                significant_multiplicity_values_dict[items[0]] = {}
                significant_multiplicity_values_dict[
                    items[0]][treatment] = float(items[-2])
            else:
                significant_multiplicity_values_dict[
                    items[0]][treatment] = float(items[-2])

        significant_multiplicity_dict[treatment] = significan_multiplicity_list

        populations = [
            treatment + taxon + replicate for replicate in pt.replicates
        ]

        # Load convergence matrix
        convergence_matrix = parse_file.parse_convergence_matrix(
            pt.get_path() + '/data/timecourse_final/' +
            ("%s_convergence_matrix.txt" % (treatment + taxon)))
        gene_parallelism_statistics = mutation_spectrum_utils.calculate_parallelism_statistics(
            convergence_matrix, populations, Lmin=100)
        #print(gene_parallelism_statistics)
        G, pvalue = mutation_spectrum_utils.calculate_total_parallelism(
            gene_parallelism_statistics)

        sys.stdout.write("Total parallelism for %s = %g (p=%g)\n" %
                         (treatment + taxon, G, pvalue))

        predictors = []
        responses = []

        gene_hits = []
        gene_predictors = []
        mean_gene_freqs = []

        Ls = []

        ax_mult_freqs_x = []
        ax_mult_freqs_y = []

        for gene_name in convergence_matrix.keys():

            convergence_matrix[gene_name][
                'length'] < 50 and convergence_matrix[gene_name]['length']

            Ls.append(convergence_matrix[gene_name]['length'])
            m = gene_parallelism_statistics[gene_name]['multiplicity']

            if gene_name not in multiplicity_dict:
                multiplicity_dict[gene_name] = {}
                multiplicity_dict[gene_name][treatment] = m
            else:
                multiplicity_dict[gene_name][treatment] = m

            n = 0
            nfixed = 0
            freqs = []
            nf_max = 0

            for population in populations:
                for t, L, f, f_max in convergence_matrix[gene_name][
                        'mutations'][population]:
                    fixed_weight = timecourse_utils.calculate_fixed_weight(
                        L, f)

                    predictors.append(m)
                    responses.append(fixed_weight)

                    n += 1
                    nfixed += fixed_weight

                    # get freqs for regression
                    #if L == parse_file.POLYMORPHIC:
                    #freqs.append(f_max)
                    nf_max += timecourse_utils.calculate_fixed_weight(L, f_max)

            if n > 0.5:
                gene_hits.append(n)
                gene_predictors.append(m)
                #mean_gene_freqs.append(np.mean(freqs))

                if nf_max > 0:
                    ax_mult_freqs_x.append(m)
                    ax_mult_freqs_y.append(nf_max / n)

        Ls = np.asarray(Ls)
        ntot = len(predictors)
        mavg = ntot * 1.0 / len(Ls)

        predictors, responses = (np.array(x) for x in zip(
            *sorted(zip(predictors, responses), key=lambda pair: (pair[0]))))

        gene_hits, gene_predictors = (np.array(x) for x in zip(*sorted(
            zip(gene_hits, gene_predictors), key=lambda pair: (pair[0]))))

        rescaled_predictors = np.exp(np.fabs(np.log(predictors / mavg)))

        null_survival_function = mutation_spectrum_utils.NullMultiplicitySurvivalFunction.from_parallelism_statistics(
            gene_parallelism_statistics)

        # default base is 10
        theory_ms = np.logspace(-2, 2, 100)
        theory_survivals = null_survival_function(theory_ms)
        theory_survivals /= theory_survivals[0]

        sys.stderr.write("Done!\n")

        ax_multiplicity.plot(theory_ms,
                             theory_survivals,
                             lw=3,
                             color=pt.get_colors(treatment),
                             alpha=0.8,
                             ls=':',
                             zorder=1)

        ax_multiplicity.plot(
            predictors, (len(predictors) - np.arange(0, len(predictors))) *
            1.0 / len(predictors),
            lw=3,
            color=pt.get_colors(treatment),
            alpha=0.8,
            ls='--',
            label=str(int(10**int(treatment))) + '-day',
            drawstyle='steps',
            zorder=2)

        #ax_multiplicity.text(0.2, 0.3, g_score_p_label_dict['0'], fontsize=25, fontweight='bold', ha='center', va='center', transform=ax_multiplicity.transAxes)
        #ax_multiplicity.text(0.2, 0.2, g_score_p_label_dict['1'], fontsize=25, fontweight='bold', ha='center', va='center', transform=ax_multiplicity.transAxes)
        #ax_multiplicity.text(0.2, 0.1, g_score_p_label_dict['2'], fontsize=25, fontweight='bold', ha='center', va='center', transform=ax_multiplicity.transAxes)

        if pvalue < 0.001:
            pretty_pvalue = r'$\ll 0.001$'
        else:
            pretty_pvalue = '=' + str(round(pvalue, 4))

        g_score_p_label = r'$\Delta \ell_{{{}}}=$'.format(
            str(10**int(treatment))) + str(round(
                G, 3)) + ', ' + r'$P$' + pretty_pvalue

        text_color = pt.lighten_color(pt.get_colors(treatment), amount=1.3)

        ax_multiplicity.text(0.26,
                             label_y_axes[treatment_idx],
                             g_score_p_label,
                             fontsize=7,
                             ha='center',
                             va='center',
                             color='k',
                             transform=ax_multiplicity.transAxes)

        ax_mult_freq.scatter(ax_mult_freqs_x,
                             ax_mult_freqs_y,
                             color=pt.get_colors(treatment),
                             edgecolors=pt.get_colors(treatment),
                             marker=pt.plot_species_marker(taxon),
                             alpha=alpha_treatment_dict[treatment])

        all_mults.extend(ax_mult_freqs_x)
        all_freqs.extend(ax_mult_freqs_y)

        #slope, intercept, r_value, p_value, std_err = stats.linregress(np.log10(ax_mult_freqs_x), np.log10(ax_mult_freqs_y))
        #print(slope, p_value)

    # make treatment pairs
    treatments_in_taxon.sort(key=float)

    for i in range(0, len(treatments_in_taxon)):

        for j in range(i + 1, len(treatments_in_taxon)):

            ax_mult_i_j = plt.subplot2grid((2, 3), (1, i + j - 1), colspan=1)
            ax_mult_i_j.set_xscale('log', base=10)
            ax_mult_i_j.set_yscale('log', base=10)
            ax_mult_i_j.set_xlabel(str(10**int(treatments_in_taxon[i])) +
                                   '-day gene multiplicity, ' + r'$m$',
                                   fontsize=14)
            ax_mult_i_j.set_ylabel(str(10**int(treatments_in_taxon[j])) +
                                   '-day gene multiplicity, ' + r'$m$',
                                   fontsize=14)
            ax_mult_i_j.plot([0.05, 200], [0.05, 200],
                             lw=3,
                             c='grey',
                             ls='--',
                             zorder=1)
            ax_mult_i_j.set_xlim([0.05, 200])
            ax_mult_i_j.set_ylim([0.05, 200])

            ax_mult_i_j.text(-0.1,
                             1.07,
                             pt.sub_plot_labels[2 + i + j],
                             fontsize=18,
                             fontweight='bold',
                             ha='center',
                             va='center',
                             transform=ax_mult_i_j.transAxes)

            multiplicity_pair = [
                (multiplicity_dict[gene_name][treatments_in_taxon[i]],
                 multiplicity_dict[gene_name][treatments_in_taxon[j]])
                for gene_name in sorted(multiplicity_dict)
                if (multiplicity_dict[gene_name][treatments_in_taxon[i]] > 0)
                and (multiplicity_dict[gene_name][treatments_in_taxon[j]] > 0)
            ]
            significant_multiplicity_pair = [
                (significant_multiplicity_values_dict[gene_name][
                    treatments_in_taxon[i]],
                 significant_multiplicity_values_dict[gene_name][
                     treatments_in_taxon[j]])
                for gene_name in sorted(significant_multiplicity_values_dict)
                if (treatments_in_taxon[i] in
                    significant_multiplicity_values_dict[gene_name]) and (
                        treatments_in_taxon[j] in
                        significant_multiplicity_values_dict[gene_name])
            ]

            # get mean colors
            ccv = ColorConverter()

            color_1 = np.array(
                ccv.to_rgb(pt.get_colors(treatments_in_taxon[i])))
            color_2 = np.array(
                ccv.to_rgb(pt.get_colors(treatments_in_taxon[j])))

            mix_color = 0.7 * (color_1 + color_2)
            mix_color = np.min([mix_color, [1.0, 1.0, 1.0]], 0)

            if (treatments_in_taxon[i] == '0') and (treatments_in_taxon[j]
                                                    == '1'):
                #mix_color = pt.lighten_color(mix_color, amount=2.8)
                mix_color = 'gold'

            mult_i = [x[0] for x in multiplicity_pair]
            mult_j = [x[1] for x in multiplicity_pair]

            ax_mult_i_j.scatter(mult_i,
                                mult_j,
                                marker=pt.plot_species_marker(taxon),
                                facecolors=mix_color,
                                edgecolors='none',
                                alpha=0.8,
                                s=90,
                                zorder=2)

            mult_significant_i = [x[0] for x in significant_multiplicity_pair]
            mult_significant_j = [x[1] for x in significant_multiplicity_pair]
            ax_mult_i_j.scatter(mult_significant_i,
                                mult_significant_j,
                                marker=pt.plot_species_marker(taxon),
                                facecolors=mix_color,
                                edgecolors='k',
                                lw=1.5,
                                alpha=0.7,
                                s=90,
                                zorder=3)

            #slope_mult, intercept_mult, r_value_mult, p_value_mult, std_err_mult = stats.linregress(np.log10(mult_significant_i), np.log10(mult_significant_j))

            mult_ij = mult_significant_i + mult_significant_j + mult_i + mult_j

            ax_mult_i_j.set_xlim([min(mult_ij) * 0.5, max(mult_ij) * 1.5])
            ax_mult_i_j.set_ylim([min(mult_ij) * 0.5, max(mult_ij) * 1.5])

            # null slope of 1
            #ratio = (slope_mult - slope_null) / std_err_mult
            #p_value_mult_new_null = stats.t.sf(np.abs(ratio), len(mult_significant_j)-2)*2

            #if p_value_mult_new_null < 0.05:
            #    x_log10_fit_range =  np.linspace(np.log10(min(mult_i) * 0.5), np.log10(max(mult_i) * 1.5), 10000)

            #    y_fit_range = 10 ** (slope_mult*x_log10_fit_range + intercept_mult)
            #    ax_mult_i_j.plot(10**x_log10_fit_range, y_fit_range, c='k', lw=3, linestyle='--', zorder=4)

            #ax_mult_i_j.text(0.05, 0.9, r'$\beta_{1}=$'+str(round(slope_mult,3)), fontsize=12, transform=ax_mult_i_j.transAxes)
            #ax_mult_i_j.text(0.05, 0.82, r'$r^{2}=$'+str(round(r_value_mult**2,3)), fontsize=12, transform=ax_mult_i_j.transAxes)
            #ax_mult_i_j.text(0.05, 0.74, pt.get_p_value_latex(p_value_mult_new_null), fontsize=12, transform=ax_mult_i_j.transAxes)

    #if taxon == 'F':
    #    subset_tuple = (len( significant_multiplicity_dict['0']), \
    #                    len( significant_multiplicity_dict['1']), \
    #                    len(set(significant_multiplicity_dict['0']) & set(significant_multiplicity_dict['1'])))

    #    venn = venn2(subsets = subset_tuple, ax=ax_venn, set_labels=('', '', ''), set_colors=(pt.get_colors('0'), pt.get_colors('1')))
    #    c = venn2_circles(subsets=subset_tuple, ax=ax_venn, linestyle='dashed')

    subset_tuple = (len( significant_multiplicity_dict['0']), \
                    len( significant_multiplicity_dict['1']), \
                    len(set(significant_multiplicity_dict['0']) & set(significant_multiplicity_dict['1'])), \
                    len(significant_multiplicity_dict['2']), \
                    len(set(significant_multiplicity_dict['0']) & set(significant_multiplicity_dict['2'])), \
                    len(set(significant_multiplicity_dict['1']) & set(significant_multiplicity_dict['2'])),  \
                    len(set(significant_multiplicity_dict['1']) & set(significant_multiplicity_dict['1']) & set(significant_multiplicity_dict['2'])))

    venn = venn3(subsets=subset_tuple,
                 ax=ax_venn,
                 set_labels=('', '', ''),
                 set_colors=(pt.get_colors('0'), pt.get_colors('1'),
                             pt.get_colors('2')))
    c = venn3_circles(subsets=subset_tuple, ax=ax_venn, linestyle='dashed')

    ax_mult_freq.set_xlim([min(all_mults) * 0.5, max(all_mults) * 1.5])
    ax_mult_freq.set_ylim([min(all_freqs) * 0.5, max(all_freqs) * 1.5])

    fig.suptitle(pt.latex_dict[taxon], fontsize=30)

    fig.subplots_adjust(wspace=0.3)  #hspace=0.3, wspace=0.5
    fig_name = pt.get_path() + "/figs/multiplicity_%s.jpg" % taxon
    fig.savefig(fig_name,
                format='jpg',
                bbox_inches="tight",
                pad_inches=0.4,
                dpi=600)
    plt.close()

Esempio n. 4

Mostra file

    # mutual information / joing entropy
    joint_entropy =  stats.entropy(array_1,array_2)



standardized_gene_overlap = {}
for taxon in pt.taxa:

    #if taxon == 'J':
    #    continue

    gene_dict = {}
    N_significant_genes_dict = {}

    gene_data = parse_file.parse_gene_list(taxon)
    gene_names, gene_start_positions, gene_end_positions, promoter_start_positions, promoter_end_positions, gene_sequences, strands, genes, features, protein_ids = gene_data

    #locus_tag_to_gene_dict = {}
    #for gene_name_idx, gene_name in enumerate(gene_names):
    #    gene = genes[gene_name_idx]
    #    if gene == '':
    #        continue
    #    locus_tag_to_gene_dict[gene_name] = genes[gene_name_idx]


    if taxon == 'J':
        treatments_convergence = ['0', '1']

    else:
        treatments_convergence = ['0', '1', '2']

Esempio n. 5

Mostra file

import phik

np.random.seed(123456789)


# to-do: re-do analysis for enriched genes in *either* treatment you're comparing
# read in nonsignificant genes and add those conts in..


permutations_divergence = 10000

treatment_pairs = [['0','1'],['0','2'],['1','2']]


gene_data_B = parse_file.parse_gene_list('B')
gene_names_B, gene_start_positions_B, gene_end_positions_B, promoter_start_positions_B, promoter_end_positions_B, gene_sequences_B, strands_B, genes_B, features_B, protein_ids_B = gene_data_B
gene_name_dict = dict(zip(gene_names_B, genes_B ))
protein_id_dict = dict(zip(gene_names_B, protein_ids_B ))

significant_multiplicity_dict = {}
significant_n_mut_dict = {}
gene_size_dict = {}
gene_mean_size_dict = {}
for taxon in pt.taxa:
    significant_multiplicity_dict[taxon] = {}
    significant_n_mut_dict[taxon] = {}
    gene_size_dict[taxon] = {}

    gene_data = parse_file.parse_gene_list(taxon)

Esempio n. 6

Mostra file

File: make_test_matrix.py Progetto: MURI2/Bacillus_Evol_Timeseries

def process_output():
    for strain in strains:
        parse_gene_list = pf.parse_gene_list(taxon=strain)
        for treatment in treatments:
            for rep in reps:
                print('%s%s%s' % (treatment, strain, rep))
                sample = '%s%s%s' % (treatment, strain, rep)
                snp_timecourse_filename = '%s%s%s_snp_timecourse.bz' % (
                    treatment, strain, rep)
                snp_timecourse_path = pt.get_path(
                ) + '/data/timecourse_snp/' + snp_timecourse_filename
                snp_file = bz2.open(snp_timecourse_path, "rt")
                depth_timecourse_filename = '%s%s%s_depth_timecourse.bz' % (
                    treatment, strain, rep)
                depth_timecourse_path = pt.get_path(
                ) + '/data/timecourse_depth/' + depth_timecourse_filename
                depth_file = bz2.open(depth_timecourse_path, "rt")
                for depth in depth_file:
                    depth_split = [x.strip() for x in depth.split(',')]
                    D_pt_median = depth_split[-1].split(' ')
                    D_pt_median = np.asarray([float(x) for x in D_pt_median])

                file_mutations = []
                for snp in snp_file:
                    snp_split = [x.strip() for x in snp.split(',')]
                    t_pm = snp_split[3].split(' ')
                    t_pm = np.asarray([int(x) for x in t_pm])
                    A_pm = snp_split[4].split(' ')
                    A_pm = np.asarray([int(x) for x in A_pm])
                    D_pm = snp_split[5].split(' ')
                    D_pm = np.asarray([int(x) for x in D_pm])
                    if len(t_pm) == 1:
                        continue
                    # remove D_pmt < 5
                    remove_D_pmt = np.asarray(
                        [x for x, y in enumerate(D_pm) if y < 5])
                    D_pt_median_copy = np.empty_like(D_pt_median)
                    D_pt_median_copy[:] = D_pt_median
                    if len(remove_D_pmt) > 0:
                        t_pm = np.delete(t_pm, remove_D_pmt)
                        A_pm = np.delete(A_pm, remove_D_pmt)
                        D_pm = np.delete(D_pm, remove_D_pmt)
                        D_pt_median_copy = np.delete(D_pt_median_copy,
                                                     remove_D_pmt)
                    # remove low coverage timepoints
                    d_pm = D_pm / D_pt_median_copy
                    # don't look at trajectories with fewer than four
                    if len(t_pm[1:]) < 4:
                        continue
                    l_list = [
                        get_likelihood(t_, t=t_pm, d_pm=d_pm)
                        for t_ in t_pm[1:-2]
                    ]
                    l_list = sorted(l_list, key=lambda x: x[-1])
                    max_l = l_list[-1]
                    # r threshold of 0.5 too conservative
                    #if max_l[-2] >= 0.5:
                    #    continue
                    n = len(t_pm)
                    sigma = np.sqrt((1 / n) * sum(d_pm**2) -
                                    (((1 / n) * sum(d_pm))**2))
                    if (max_l[4] == 0) or (max_l[2] == 0):
                        continue
                    delta_l = max_l[3] * np.log(
                        sigma / max_l[4]) + max_l[1] * np.log(sigma / max_l[2])
                    # try permutation test for log likelihood?

                    # need upper threshold for delta_l, chose 20 for now
                    #if delta_l > 20:
                    #    continue

                    C_star_mut, I_mut, T_mut = get_test_statistics(
                        t_pm, A_pm, D_pm)
                    if np.isnan(np.sum([C_star_mut, I_mut, T_mut])):
                        continue

                    f_pm = A_pm / D_pm
                    if (f_pm[0] > 0.9) and (f_pm[-1] > 0.9):
                        continue
                    #if f_pm[-1] > 0.05:
                    if (sample == '0C1') or (sample == '0C4'):
                        last_timepoint = -2
                    else:
                        last_timepoint = -1
                    if f_pm[last_timepoint] > 0.05:

                        allele_split = snp_split[2].split('->')
                        anc = allele_split[0]
                        der = allele_split[1]
                        site = int(snp_split[1])
                        genes_names_site = []
                        for k in list(range(len(parse_gene_list[0]))):
                            gene_name = parse_gene_list[0][k]
                            start = int(parse_gene_list[1][k])
                            stop = int(parse_gene_list[2][k])
                            if (site >= start) and (site <= stop):
                                genes_names_site.append(gene_name)
                        genes_names_site_merged = ",".join(genes_names_site)

                        file_mutations.append([
                            snp_split[0], genes_names_site_merged,
                            snp_split[1], anc, der,
                            str(f_pm[-1])
                        ])
                # out file
                header = [
                    'Contig', 'Locus_tag', 'Site', 'Ancestral', 'Derived',
                    'Final_freq'
                ]
                mutation_filename = '%s%s%s_snp_final.txt' % (treatment,
                                                              strain, rep)
                mutation_file = open(
                    pt.get_path() + '/data/timecourse_prelim_poly/' +
                    mutation_filename, 'w')
                mutation_file.write('\t'.join(header) + '\n')
                for mutation in file_mutations:

                    mutation_file.write("\t".join(mutation))
                    mutation_file.write("\n")

                mutation_file.close()