Exemple #1
0
def get_forward_neighbors(source, current, mutations):
    """List all neighbors that are a single a mutation away from genotype and
    move away from the source.

    Parameters
    ----------
    source : str
        source genotype which determines the direction to be moving away.
    current: str
        reference genotype.
    mutations : dict
        sites (keys) mapped to an alphabet list in genotype space (values).

    Returns
    -------
    neighbors : list
        List of neighbor genotypes
    """
    s_sites = list(source)
    sites = list(current)
    hd = hamming_distance(source, current)
    neighbors = []
    for i, alphabet in mutations.items():
        if alphabet is not None:
            # Copy alphabet to avoid over-writing
            alphabet = alphabet[:]
            alphabet.remove(sites[i])
            # Replace letters
            for a in alphabet:
                g = sites[:]
                g[i] = a
                if hamming_distance(source, g) > hd:
                    neighbors.append("".join(g))
    return neighbors
Exemple #2
0
def test_hamming_distance():
    """
    Test hamming distance function.
    """

    test_pairs = [("THIS IS A TEST", "HWIS IT A TEBT", 4),
                  ("ROCKING", "ROCKING", 0)]

    for p in test_pairs:
        assert utils.hamming_distance(p[0], p[1]) == p[2]

    with pytest.raises(ValueError):
        utils.hamming_distance("TEST", "NOT")

    with pytest.raises(ValueError):
        utils.hamming_distance(0, "NOT")
Exemple #3
0
def mean_path_divergence(G, paths):
    """Calculate the divergence of a paths ensemble according to Lobkovsky, 2011 [1].

    Parameters
    ----------
    G : GenotypePhenotypeGraph object.
        Any GenotypePhenotypeGraph object or objects of classes that inherit from one,
        like GenotypePhenotypeMSM.

    paths : dict.
        Dictionary of paths (keys) and probabilities (values).
        Example: {(0,1,3): 0.9, (0,2,3): 0.1}

    Returns
    -------
    divergence : float.
        A measure of divergence published as equation (2) in [1].

    References
    ----------
    [1] A. E. Lobkovsky, Y. I. Wolf, and E. V. Koonin.
    Predictability of evolutionary trajecto- ries in
    fitness landscapes. PLoS Comput. Biol., 7:e1002302, 2011.
    """

    # Get all possible pairwise combinations of paths.
    ppairs = itertools.combinations(paths, 2)

    divergence = 0

    for ppair in ppairs:
        ppair_hdist = 0
        # Set combined length of pair
        l = len(ppair[0]) + len(ppair[1])

        for i, path in enumerate(ppair):
            # Define other path
            other_path = ppair[abs(i - 1)]
            for node in path:
                # Repeat node, so we can get all combinations of
                # that node with all nodes of the other path.
                a = [node] * len(other_path)
                npairs = zip(a, other_path)
                for npair in npairs:
                    # Get hamming distance
                    ppair_hdist += hamming_distance(G.node[npair[0]]["binary"],
                                                    G.node[npair[1]]["binary"])

        # Distance between paths.
        ppair_dist = ppair_hdist / l
        # Get both path probabilities.
        path_probs = list(paths.values())
        # Add divergence of this pair to total divergence
        divergence += ppair_dist * path_probs[0] * path_probs[1]

    return divergence
Exemple #4
0
def forward_paths(paths, msm, source, target):
    fp = []

    comb = combinations(source, target)
    min_dist = hamming_distance(msm.gpm.data.binary[source[0]], msm.gpm.data.binary[target[0]])

    for path in paths:
        if len(path) - 1 == min_dist:
            fp.append(path)

    return fp
Exemple #5
0
 def hamming(self):
     """Hamming distance from reference"""
     try:
         return self._hamming
     # calculate the hamming distance if not done already
     except AttributeError:
         hd = np.empty(self.n, dtype=int)
         for i, g in enumerate(self.genotypes):
             hd[i] = utils.hamming_distance(self.wildtype, g)
         self._hamming = hd
         return self._hamming
Exemple #6
0
 def hamming(self):
     """Hamming distances from each peak"""
     try:
         return self._hamming
     # calculate the hamming distance if not done already
     except AttributeError:
         hd = np.empty([len(self.peaks), len(self.genotypes)], dtype=int)
         for i, peak in enumerate(self.peaks):
             for j, g in enumerate(self.genotypes):
                 hd[i][j] = utils.hamming_distance(peak, g)
         self._hamming = hd
         return self._hamming
Exemple #7
0
def adaptive_walk(lattice, n_mutations):
    """Given a lattice object, adaptive walk to a sequence n_mutations away.
    Only works for <10 conformations in the landscapes!!!
    """
    # Sanity check
    if type(lattice) != LatticeThermodynamics:
        raise TypeError("lattice must be a LatticeThermodynamics object")
    elif len(lattice.conf_list) > 10:
        raise Exception(
            "too many conformations to compute in a reasonable time.")

    wildtype = lattice.sequence
    mutant = list(wildtype)

    hamming = 0
    indices = list(range(len(wildtype)))
    fracfolded = lattice.fracfolded
    failed = 0
    while hamming < n_mutations and failed < 100:
        # Select a site to mutate
        mut = mutant[:]
        index = random.choice(indices)

        # Choose a mutation
        mutation = random.choice(AMINO_ACIDS)
        mut[index] = mutation

        # New lattice
        mlattice = LatticeThermodynamics(
            "".join(mut),
            lattice.conf_list,
            lattice.temperature,
            interaction_energies=lattice.interaction_energies)

        if mlattice.fracfolded > fracfolded and mlattice.native_conf == lattice.native_conf:
            indices.remove(index)
            mutant[index] = mutation
            hamming = hamming_distance(wildtype, mutant)
            fracfolded = mlattice.fracfolded
        else:
            failed += 1

    if failed == 100:
        raise Exception("No adaptive paths n_mutations away.")

    return mlattice
Exemple #8
0
 def peaks(self):
     if self._peaks:
         return self._peaks
     else:
         """Find n peaks that meet the max_dist/min_dist requirement"""
         self._peaks = [self.b_state, self.a_state]
         while len(self._peaks) < self.peak_n:
             proposed = random.choice(self.genotypes)  # Propose a new peak.
             add = False
             for peak in self._peaks:
                 dist = utils.hamming_distance(peak, proposed)
                 if dist >= self.min_dist and dist <= self.max_dist:  # Check dist. requirements
                     add = True
                 else:
                     add = False
                     break
             if add:
                 self._peaks.append(proposed)
         return self._peaks
Exemple #9
0
def adaptive_walk2(seq, n_mutations, temp=1.0, target=None):
    """
    """
    length = len(seq)
    c = Conformations(length, database)
    dGdependence = "fracfolded"

    wildtype = seq
    mutant = list(wildtype)

    hamming = 0
    indices = list(range(len(wildtype)))
    fracfolded = lattice.fracfolded
    attempts = 0

    path = []

    fitness = Fitness(temp, c, dGdependence=dGdependence, targets=target)
    while hamming < n_mutations and attempts < 100:
        # Calculate stability of all amino acids at all sites
        AA_grid = np.array([AMINO_ACIDS] * length)
        dG = np.zeros(AA_grid.shape, dtype=float)
        for (i, j), AA in np.ndenumerate(AA_grid):
            seq1 = mutant[:]
            seq1[i] = AA_grid[i, j]
            fitness.Fitness(seq1)

        x, y = np.where(dG=dG.max)
        best_AA = AA_grid[x[0], y[0]]
        mutant[x[0]] = best_AA
        path.append(mutant)
        hamming = hamming_distance(wildtype, mutant)
        attempts += 0

    if failed == 100:
        raise Exception("No adaptive paths n_mutations away.")

    return path