Esempio n. 1
0
    def __init__(self, length, order,
        coeff_range=(-1, 1),
        distribution=None,
        model_type='local',
        neg_coeffs=True
        ):
        wildtype = "0"*length
        mutations = binary_mutations_map(wildtype, "1"*length)
        # Initialize a genotype-phenotype map
        super(NkSimulation, self).__init__(
            wildtype,
            mutations,
            log_transform=False,
        )
        self.epistasis = EpistasisMap(self)
        # Construct the NK epistasis table.
        self.epistasis._order = order
        keys = np.array(["".join(r) for r in it.product('01', repeat=self.epistasis.order)])
        vals = np.empty(len(keys), dtype=float)
        for i,key in enumerate(keys):
            m = key.count('1')
            vals[i] = np.random.uniform(coeff_range[0], coeff_range[1])
        self.epistasis.keys = keys
        self.epistasis.values = vals

        # Build the genotype-phenotype map.
        self.build()
Esempio n. 2
0
    def X_constructor(self,
                      genotypes=None,
                      coeff_labels=None,
                      mutations=None,
                      **kwargs):
        """A helper method that constructs an X matrix for this model. Attaches
        an `EpistasisMap` object to the `epistasis` attribute of the model.

        The simplest way to construct X is to give a set of binary genotypes and
        epistatic labels. If not given, will try to infer these features from an
        attached genotype-phenotype map. If no genotype-phenotype map is attached,
        raises an exception.

        Parameters
        ----------
        genotypes : list
            list of genotypes.
        coeff_labels: list
            list of lists. Each sublist contains site-indices that represent
            participants in that epistatic interaction.
        mutations : dict
            mutations dictionary mapping sites to alphabet at the site.
        """
        # First check genotypes are available
        if genotypes is None:
            try:
                genotypes = self.gpm.binary.genotypes
            except AttributeError:
                raise AttributeError(
                    "genotypes must be given, because no GenotypePhenotypeMap is attached to this model."
                )
        # Build epistasis map
        if coeff_labels is None:
            # See if an epistasis map was already created
            if hasattr(self, "epistasis") is False:
                # Mutations dictionary given? if not, try to infer one.
                if mutations is None:
                    try:
                        mutations = self.gpm.mutations
                    except AttributeError:
                        mutations = extract_mutations_from_genotypes(genotypes)
                # Construct epistasis mapping
                self.epistasis = EpistasisMap.from_mutations(
                    mutations, self.order, model_type=self.model_type)
        else:
            self.epistasis = EpistasisMap.from_labels(
                coeff_labels, model_type=self.model_type)
        # Construct the X matrix (convert to binary if necessary).
        try:
            return generate_dv_matrix(genotypes,
                                      self.epistasis.labels,
                                      model_type=self.model_type)
        except:
            mapping = self.gpm.map("complete_genotypes",
                                   "binary.complete_genotypes")
            binaries = [mapping[g] for g in genotypes]
            return generate_dv_matrix(binaries,
                                      self.epistasis.labels,
                                      model_type=self.model_type)
Esempio n. 3
0
    def __init__(self,
                 wildtype,
                 genotypes,
                 phenotypes,
                 stdeviations=None,
                 log_transform=False,
                 mutations=None,
                 n_replicates=1,
                 logbase=np.log10):

        # Defaults to binary mapping if not specific mutations are named
        if mutations is None:
            mutant = farthest_genotype(wildtype, genotypes)
            mutations = binary_mutations_map(wildtype, mutant)

        super(BaseModel, self).__init__(wildtype,
                                        genotypes,
                                        phenotypes,
                                        stdeviations=stdeviations,
                                        log_transform=log_transform,
                                        mutations=mutations,
                                        n_replicates=n_replicates,
                                        logbase=logbase)

        # Attach the epistasis model.
        self.epistasis = EpistasisMap(self)
        # Add plotting object if matplotlib is installed
        try:
            self.plot = EpistasisPlotting(self)
        except Warning:
            pass
Esempio n. 4
0
 def __init__(self,
              wildtype,
              mutations,
              log_transform=False,
              logbase=np.log10,
              **kwargs):
     genotypes = np.array(utils.mutations_to_genotypes(wildtype, mutations))
     phenotypes = np.ones(len(genotypes))
     # Initialize a genotype-phenotype map
     super(BaseSimulation, self).__init__(wildtype,
                                          genotypes,
                                          phenotypes,
                                          log_transform=log_transform,
                                          logbase=logbase,
                                          mutations=mutations,
                                          **kwargs)
     # Attach an epistasis model.
     self.epistasis = EpistasisMap()
Esempio n. 5
0
    def add_gpm(self, gpm):
        """Add a GenotypePhenotypeMap object to the epistasis model.
        """
        self._gpm = gpm

        # Reset Xbuilt.
        self.Xbuilt = {}

        # Construct columns for X matrix
        self.Xcolumns = encoding_to_sites(self.order, self.gpm.encoding_table)

        # Map those columns to epistastalis dataframe.
        self.epistasis = EpistasisMap(sites=self.Xcolumns, gpm=gpm)
        return self
Esempio n. 6
0
    def _fit_additive(self, X=None, y=None):
        # Construct an additive model.
        self.Additive = EpistasisLinearRegression(
            order=1, model_type=self.model_type)

        self.Additive.add_gpm(self.gpm)

        # Prepare a high-order model
        self.Additive.epistasis = EpistasisMap(
            sites=self.Additive.Xcolumns,
        )

        # Fit the additive model and infer additive phenotypes
        self.Additive.fit(X=X, y=y)
        return self
Esempio n. 7
0
    def _fit_additive(self, X=None, y=None, **kwargs):
        # Fit with an additive model
        self.Additive.epistasis = EpistasisMap(
            sites=self.Additive.Xcolumns,
        )

        # Use a first order matrix only.
        if type(X) == np.ndarray or type(X) == pd.DataFrame:
            Xadd = X[:, :self.Additive.epistasis.n]
        else:
            Xadd = X

        # Fit Additive model
        self.Additive.fit(X=Xadd, y=y)
        self.Additive.epistasis.values = self.Additive.coef_
        return self
Esempio n. 8
0
 def __init__(self,
              wildtype,
              mutations,
              order,
              coeff_range=(-1, 1),
              model_type='local'):
     # Construct epistasis mapping objects (empty)
     super(MultiplicativeSimulation, self).__init__(
         wildtype,
         mutations,
         log_transform=True,
     )
     self.model_type = model_type
     self.epistasis = EpistasisMap(self)
     # Add values to epistatic interactions
     self.epistasis.order = order
     self.epistasis.values = self.base**np.random.uniform(
         coeff_range[0], coeff_range[1], size=len(self.epistasis.keys))
     # build the phenotypes from the epistatic interactions
     self.build()
Esempio n. 9
0
class NkSimulation(BaseSimulation):
    """ Generate genotype-phenotype map from NK fitness models.

    """
    def __init__(self, length, order,
        coeff_range=(-1, 1),
        distribution=None,
        model_type='local',
        neg_coeffs=True
        ):
        wildtype = "0"*length
        mutations = binary_mutations_map(wildtype, "1"*length)
        # Initialize a genotype-phenotype map
        super(NkSimulation, self).__init__(
            wildtype,
            mutations,
            log_transform=False,
        )
        self.epistasis = EpistasisMap(self)
        # Construct the NK epistasis table.
        self.epistasis._order = order
        keys = np.array(["".join(r) for r in it.product('01', repeat=self.epistasis.order)])
        vals = np.empty(len(keys), dtype=float)
        for i,key in enumerate(keys):
            m = key.count('1')
            vals[i] = np.random.uniform(coeff_range[0], coeff_range[1])
        self.epistasis.keys = keys
        self.epistasis.values = vals

        # Build the genotype-phenotype map.
        self.build()

    @classmethod
    def quick_start(cls, length, order, **kwargs):
        """Construct the genotype-phenotype map"""
        return cls(length, order, **kwargs)

    def build(self):
        """Build phenotypes from NK table
        """
        nk_table = self.epistasis.map("keys", "values")
        # Check for even interaction
        neighbor = int(self.epistasis.order/2)
        if self.epistasis.order%2 == 0:
            pre_neighbor = neighbor - 1
        else:
            pre_neighbor = neighbor

        # Use NK table to build phenotypes
        phenotypes = np.zeros(self.n, dtype=float)
        for i in range(len(self.genotypes)):
            f_total = 0
            for j in range(self.length):
                if j-pre_neighbor < 0:
                    pre = self.genotypes[i][-pre_neighbor:]
                    post = self.genotypes[i][j:neighbor+j+1]
                    f = "".join(pre) + "".join(post)
                elif j+neighbor > self.length-1:
                    pre = self.genotypes[i][j-pre_neighbor:j+1]
                    post = self.genotypes[i][0:neighbor]
                    f = "".join(pre) + "".join(post)
                else:
                    f = "".join(self.genotypes[i][j-pre_neighbor:j+neighbor+1])
                f_total += nk_table[f]
            phenotypes[i] = f_total
        self.phenotypes = phenotypes
Esempio n. 10
0
class BaseSimulation(GenotypePhenotypeMap):
    """ Base class for simulating genotype-phenotype maps built from epistatic
    interactions.

    Parameters
    ----------
    wildtype : str
        wildtype sequence.
    mutations : dict
        dictionary mapping each site the possible mutations
    """
    def __init__(self,
                 wildtype,
                 mutations,
                 log_transform=False,
                 logbase=np.log10,
                 **kwargs):
        genotypes = np.array(utils.mutations_to_genotypes(wildtype, mutations))
        phenotypes = np.ones(len(genotypes))
        # Initialize a genotype-phenotype map
        super(BaseSimulation, self).__init__(wildtype,
                                             genotypes,
                                             phenotypes,
                                             log_transform=log_transform,
                                             logbase=logbase,
                                             mutations=mutations,
                                             **kwargs)
        # Attach an epistasis model.
        self.epistasis = EpistasisMap()

    @assert_epistasis
    def set_coefs_order(self, order):
        """Set coefs from an epistatic order."""
        self.epistasis._from_mutations(self.mutations, order)

    @assert_epistasis
    def set_coefs_labels(self, labels):
        """Set coefs from list of coefs labels.
        """
        self.epistasis.labels = labels

    @assert_epistasis
    def set_coefs(self, labels, values):
        """Set the epistatic coefs

        Parameters
        ----------
        labels : List
            List of epistatic coefficient labels.
        values : List
            list of floats representing to epistatic coefficients.
        """
        self.epistasis.labels = labels
        self.epistasis.values = values
        self.build()

    @assert_epistasis
    def set_coefs_values(self, values):
        """Set coefficient values.
        """
        self.epistasis.values = values
        self.build()

    @assert_epistasis
    def set_coefs_random(self, coef_range):
        """Set coefs to values drawn from a random, uniform distribution between
        coef_range.

        Parameters
        ----------
        coef_range : 2-tuple
            low and high bounds for coeff values.
        """
        # Add values to epistatic interactions
        self.epistasis.values = np.random.uniform(coef_range[0],
                                                  coef_range[1],
                                                  size=len(
                                                      self.epistasis.labels))
        self.build()

    @classmethod
    def from_length(cls, length, **kwargs):
        """Constructs genotype from binary sequences with given length and
        phenotypes from epistasis with a given order.

        Parameters
        ----------
        length : int
            length of the genotypes
        order : int
            order of epistasis in phenotypes.

        Returns
        -------
        GenotypePhenotypeMap
        """
        wildtype = "0" * length
        mutations = utils.binary_mutations_map(wildtype, "1" * length)
        return cls(wildtype, mutations, **kwargs)

    @classmethod
    def from_coefs(cls,
                   wildtype,
                   mutations,
                   labels,
                   coefs,
                   model_type="local",
                   **kwargs):
        """Construct a genotype-phenotype map from epistatic coefficients.

        Parameters
        ----------
        wildtype : str
            wildtype sequence
        mutations : dict
            dictionary mapping each site to their possible mutations.
        order : int
            order of epistasis
        coefs : list or array
            epistatic coefficients
        model_type : str
            epistatic model to use in composition matrix. (`'global'` or `'local'`)

        Returns
        -------
        GenotypePhenotypeMap
        """
        order = max([len(l) for l in labels])
        self = cls(wildtype, mutations, model_type=model_type, **kwargs)
        if len(betas) != space.epistasis.n:
            raise Exception(
                """Number of betas does not match order/mutations given.""")
        self.set_coefs(labels, coefs)
        return self

    def build(self, values=None, **kwargs):
        """ Method for construction phenotypes from model. """
        raise Exception("""Must be implemented in subclass. """)

    def set_stdeviations(self, sigma):
        """Add standard deviations to the simulated phenotypes, which can then be
        used for sampling error in the genotype-phenotype map.

        Parameters
        ----------
        sigma : float or array-like
            Adds standard deviations to the phenotypes. If float, all phenotypes
            are given the same stdeviations. Else, array must be same length as
            phenotypes and will be assigned to each phenotype.
        """
        stdeviations = np.ones(len(self.phenotypes)) * sigma
        self.stdeviations = stdeviations
Esempio n. 11
0
    def add_gpm(self,
                gpm,
                genotype_column="genotype",
                phenotype_column=None,
                uncertainty_column=None):
        """
        Add a GenotypePhenotypeMap object to the epistasis model.

        Parameters
        ----------
        gpm : gpmap.GenotypePhenotypeMap
            genotype phenotype map with genotypes and phenotypes
        genotype_column : str
            name of the genotype column in the gpm
        phenotype_column : str
            name of the phenotype column in the gpm. If None, take the first
            numeric column beside the genotype_column in the gpm
        uncertainty_column : str
            name of column with phenotype uncertainty in gpm. if None, make a
            column `epi_zero_uncertainty` with 1e-6*np.min(phenotype)
        """

        # Make sure gpm is a GenotypePhenotypeMap and append it
        if not isinstance(gpm, gpmap.GenotypePhenotypeMap):
            err = "gpm must be a gpmap.GenotypePhenotypeMap instance\n"
            raise TypeError(err)

        self._gpm = gpm

        # Make sure attached genotype-phenotype map has the specified genotype
        # column.
        if type(genotype_column) is not str:
            err = f"invalid genotype_column {genotype_column}. Should be a\n"
            err += "column name (string)\n"
            raise TypeError(err)

        try:
            self._gpm.data.loc[:, genotype_column]
        except KeyError:
            err = "gpm does not have the specified genotype_column\n"
            err += f"'{genotype_column}'\n"
            raise KeyError(err)

        self._genotype_column = genotype_column

        # If the phenotype_column is not specified, grab the first numeric
        # non-reserved column
        if phenotype_column is None:
            for c in self._gpm.data.columns:
                if c not in gpmap.reserved_data_columns:
                    if np.issubdtype(self._gpm.data.loc[:, c].dtype,
                                     np.number):
                        phenotype_column = c
                        break

        # If no phenotype column was found
        if phenotype_column is None:
            err = "No phenotype column was specified and none was found in\n"
            err += "the GenotypePhenotypeMap.\n"
            raise ValueError(err)

        # Make sure attached genotype-phenotype map has the specified phenotype
        # column and that this column is numeric.
        try:
            self._gpm.data.loc[:, phenotype_column]
        except KeyError:
            err = "gpm does not have the specified phenotype_column\n"
            err += f"'{phenotype_column}'\n"
            raise KeyError(err)
        if not np.issubdtype(self._gpm.data.loc[:, phenotype_column].dtype,
                             np.number):
            err = f"'{phenotype_column}' must be numeric\n"
            raise ValueError(err)

        self._phenotype_column = phenotype_column

        # If uncertainty_column is not specified, make a new fake uncertainty
        # column with a value of 0.0
        if uncertainty_column is None:
            uncertainty_column = "epi_zero_uncertainty"
            v = np.min(np.abs(self._gpm.data.loc[:, phenotype_column])) * 1e-6
            self._gpm.data.loc[:, "epi_zero_uncertainty"] = v
        else:
            if uncertainty_column == self._phenotype_column:
                err = "phenotype_column and uncertainty_column cannot be the same\n"
                raise ValueError(err)

        # Make sure attached genotype-phenotype map has the specified uncertainty
        # column and that this column is numeric.
        try:
            self._gpm.data.loc[:, uncertainty_column]
        except KeyError:
            err = "gpm does not have the specified uncertainty_column\n"
            err += f"'{uncertainty_column}'\n"
            raise KeyError(err)
        if not np.issubdtype(self._gpm.data.loc[:, uncertainty_column].dtype,
                             np.number):
            err = f"'{uncertainty_column}' must be numeric\n"
            raise ValueError(err)

        self._uncertainty_column = uncertainty_column

        # Construct columns for X matrix
        self.Xcolumns = encoding_to_sites(self.order, self.gpm.encoding_table)

        # Map those columns to epistasis dataframe.
        self.epistasis = EpistasisMap(sites=self.Xcolumns, gpm=gpm)

        # Wipe out previous X (or create empty previous X) because we just
        # added a new gpmap
        self._previous_X = None

        return self