Exemple #1
0
plt.close()

# Plot unique MSAs with largest deviations
OGids = set()
outliers = sorted([
    record for record in records
    if record[0] < -1 and record[2] < a * record[0]**2
])  # Use -1 to exclude near-zero floating point rounding errors
for record in outliers:
    # Unpack variables
    OGid, msa, regions = record[3], record[4], record[5]
    if OGid in OGids:
        continue
    OGids.add(OGid)

    # Plot MSA with regions
    msa = [seq for _, seq in sorted(msa, key=lambda x: tip_order[x[0]])
           ]  # Re-order sequences and extract seq only
    line = np.zeros(len(msa[0]))
    for region in regions:
        line[region] = 1
    plot_msa_lines(msa, line, figsize=(16, 6))
    plt.savefig(f'out/{len(OGids)-1}_{OGid}.png', bbox_inches='tight', dpi=400)
    plt.close()
"""
DEPENDENCIES
../../ortho_tree/ctree_WAG/ctree_WAG.py
    ../../ortho_tree/ctree_WAG/out/100red_ni.txt
../realign_hmmer1/realign_hmmer1.py
    ../realign_hmmer1/out/*.mfa
"""
Exemple #2
0
            labels[state].append((int(start), int(stop)))
            OGid2labels[OGid] = labels

if not os.path.exists('out/'):
    os.mkdir('out/')

for OGid, labels in OGid2labels.items():
    msa = trim_terminals(read_fasta(f'../../ortho_MSA/realign_hmmer1/out/{OGid}.mfa'))

    if labels['0'] and labels['0'][0][0] == 0:
        offset = labels['0'][0][1]
    else:
        offset = 0

    lines = {}
    for state in ['1A', '1B', '2', '3']:
        line = np.zeros(len(msa[0][1]))
        for start, stop in labels[state]:
            line[slice(start-offset, stop-offset)] = 1
        lines[state] = line

    plot_msa_lines([seq[1].upper() for seq in msa], [lines['1A'], lines['2'], lines['3'], lines['1B']], figsize=(15, 6))
    plt.savefig(f'out/{OGid}.png', bbox_inches='tight')
    plt.close()

"""
DEPENDENCIES
../../ortho_MSA/realign_hmmer1/realign_hmmer1.py
    ../../ortho_MSA/realign_hmmer1/out/*.mfa
../config/segments.tsv
"""
Exemple #3
0
        emit1 = sum(col)
        emits.append((emit0, emit1))
        col0 = col

    # Instantiate model
    e_dists_rv = {
        state: bernoulli_betabinom_frozen(p,
                                          len(msa) - 1, a, b)
        for state, (p, a, b) in params['e_dists'].items()
    }
    model = hmm.HMM(params['t_dists'], e_dists_rv, params['start_dist'])

    # Decode states and plot
    fbs = model.forward_backward(emits)
    draw.plot_msa_lines([seq.upper() for _, seq in msa],
                        [fbs['1A'], fbs['2'], fbs['3'], fbs['1B']],
                        figsize=(15, 6))
    plt.savefig(f'out/{OGid}_wide.png', bbox_inches='tight')
    plt.close()

    draw.plot_msa_lines([seq.upper() for _, seq in msa],
                        [fbs['1A'], fbs['2'], fbs['3'], fbs['1B']],
                        figsize=(8, 8))
    plt.savefig(f'out/{OGid}_tall.png', bbox_inches='tight')
    plt.close()
"""
DEPENDENCIES
../../ortho_MSA/realign_hmmer1/realign_hmmer1.py
    ../../ortho_MSA/realign_hmmer1/out/*.mfa
../config/segments.tsv
./fit.py
Exemple #4
0
            emits.append((j, 1))
        else:
            emits.append((j, 0))

    # Instantiate model
    e_dists_rv = {
        '0': msaBernoulli(ps),
        '1': msaBernoulli([params['e_param'] for _ in range(len(ps))])
    }
    model = hmm.HMM(params['t_dists'], e_dists_rv, params['start_dist'])

    # Decode states and plot
    fbs = model.forward_backward(emits)
    draw.plot_msa_lines(
        [seq.upper() for _, seq in msa],
        fbs['1'],
        msa_labels=[ppid if ppid in header else '' for header, _ in msa],
        msa_labelsize=4,
        figsize=(15, 6))
    plt.savefig(f'out/{OGid}_{ppid}_wide.png', bbox_inches='tight')
    plt.close()

    draw.plot_msa_lines(
        [seq.upper() for _, seq in msa],
        fbs['1'],
        msa_labels=[ppid if ppid in header else '' for header, _ in msa],
        msa_labelsize=4,
        figsize=(8, 8))
    plt.savefig(f'out/{OGid}_{ppid}_tall.png', bbox_inches='tight')
    plt.close()
"""
DEPENDENCIES
Exemple #5
0
        # Load decoded states
        posterior = []
        with open(f'../insertion_trim/out/{row.OGid}.tsv') as file:
            header = file.readline().rstrip('\n').split('\t')
            for line in file:
                fields = {key: float(value) for key, value in zip(header, line.rstrip('\n').split('\t'))}
                posterior.append(fields['2'] + fields['3'])
        posterior = np.array(posterior)
        gradient = np.gradient(posterior)

        # Make trim plot
        slices = get_slices(msa, posterior, gradient)
        trims = np.zeros(len(posterior))
        for s in slices:
            trims[s] = 1

        msa = [seq.upper() for _, seq in sorted(msa, key=lambda x: tip_order[x[0]])]  # Re-order sequences and extract seq only
        plot_msa_lines(msa, [posterior, trims], figsize=(15, 6))
        plt.savefig(f'out/{label}/{i}_{row.OGid}.png', bbox_inches='tight')
        plt.close()

"""
DEPENDENCIES
../../ortho_MSA/realign_hmmer2/realign_hmmer2.py
    ../../ortho_MSA/realign_hmmer2/out/*
../../ortho_tree/ctree_WAG/ctree_WAG.py
    ../../ortho_tree/ctree_WAG/out/100red_ni.txt
../insertion_trim/decode.py
    ./insertion_trim/out/*.tsv
"""