Example #1
0
def directed_evolution(
    s_wt, num_iterations, T, Model, params
):  # input = (wild-type sequence, number of mutation iterations, "temperature")

    s_traj = [
    ]  # initialize an array to keep records of the protein sequences for this trajectory
    y_traj = [
    ]  # initialize an array to keep records of the fitness scores for this trajectory

    mut_loc_seed = random.randint(
        0, len(s_wt)
    )  # randomely choose the location of the first mutation in the trajectory
    s, new_mut_loc = mutate_sequence(
        s_wt, (np.random.poisson(2) + 1), mut_loc_seed
    )  # initial mutant sequence for this trajectory, with m = Poisson(2)+1 mutations

    x, _, _ = get_reps(
        [s], params=params
    )  # eUniRep representation of the initial mutant sequence for this trajectory

    y = Model.predict(
        x
    )  # predicted fitness score for the initial mutant sequence for this trajectory

    # iterate through the trial mutation steps for the directed evolution trajectory
    for i in range(num_iterations):
        mu = np.random.uniform(
            1, 2.5
        )  # "mu" parameter for poisson function: used to control how many mutations to introduce
        m = np.random.poisson(
            mu -
            1) + 1  # how many random mutations to apply to current sequence

        s_new, new_mut_loc = mutate_sequence(
            s, m, new_mut_loc
        )  # new trial sequence, produced from "m" random mutations

        x_new, _, _ = get_reps([s_new], params=params)

        y_new = Model.predict(x_new)  # new fitness value for trial sequence

        p = min(1, np.exp(
            (y_new - y) / T))  # probability function for trial sequence
        rand_var = random.random()

        if rand_var < p:  # metropolis-Hastings update selection criterion
            print(
                str(new_mut_loc + 1) + " " + s[new_mut_loc] + "->" +
                s_new[new_mut_loc])
            s, y = s_new, y_new  # if criteria is met, update sequence and corresponding fitness

        s_traj.append(
            s
        )  # update the sequence trajectory records for this iteration of mutagenesis
        y_traj.append(
            y
        )  # update the fitness trajectory records for this iteration of mutagenesis

    return s_traj, y_traj  # output = (sequence record for trajectory, fitness score recorf for trajectory)
def test_get_reps():
    a, b, c = get_reps(["ABC"])
    d, e, f = get_reps("ABC")

    assert np.array_equal(a, d)
    assert np.array_equal(b, e)
    assert np.array_equal(c, f)

    h_final, c_final, h_avg = get_reps(["ABC", "DEFGH", "DEF"])

    assert h_final.shape == (3, 1900)
    assert c_final.shape == (3, 1900)
    assert h_avg.shape == (3, 1900)
Example #3
0
def test_sample_one_chain():
    starter_sequence = "AAC"
    n_steps = 10
    scoring_func = lambda sequence: get_reps(sequence)[0].sum()

    chain_data = sample_one_chain(starter_sequence, n_steps, scoring_func)
    assert set(chain_data.keys()) == set(["sequences", "scores", "accept"])
    for k, v in chain_data.items():
        # +1 because the first step is included too.
        assert len(v) == n_steps + 1
def scoring_func_reverse(sequence: str):
    reps, _, _ = ju.get_reps(sequence, params=deepcopy(params))
    return 1 / top_model.predict(reps)
Example #5
0
# Optional input: Training parameters
parser.add_argument('-p',
                    '--parameters',
                    type=str,
                    default=None,
                    help='Parameter directory for mLSTM.')

# Parse arguments
args = parser.parse_args()

sequence_file = args.infile
parameter_dir = args.parameters
outfile = args.outfile

# Load input sequences
sequences = [x.strip() for x in open(sequence_file).readlines()]

# Load UniRep parameters
if parameter_dir:
    params = ju.utils.load_params(folderpath=parameter_dir)[0]
else:
    params = None

# Make representations for each input sequence
h_avg, _, _ = ju.get_reps(sequences, params=params)

# Save representations
with open(outfile, 'w') as o:
    np.savetxt(o, h_avg, delimiter="\t")
Sequence sampler test.

In this Python script, we start with a sequence,
and try to optimize for a better version of it,
as measured by the sum over reps.
It's a silly task,
but I think it gives us ability to sanity-check that we have
the right thing going.
"""

from jax_unirep import get_reps
from jax_unirep.sampler import is_accepted, propose

starting_sequence = "ASDFGHJKL"

current_sequence = starting_sequence
current_score = get_reps(current_sequence)[0].sum()
sequences = [current_sequence]
scores = [current_score]


for i in range(100):
    new_sequence = propose(current_sequence)
    new_score = get_reps(new_sequence)[0].sum()

    if is_accepted(best=current_score, candidate=new_score, temperature=1):
        current_sequence = new_sequence

    sequences.append(current_sequence)
    print(i, new_sequence, new_score)