Example #1
0
    def __init__(self, Y, Cn, G, F=None, A=None, rank=1, Cr=None):
        """
        Args:
            Y:      [N, P] phenotype matrix
            F:      Sample fixed effect design (first dimension must be N)
            A:      Trait fixed effect design (second dimension must be P)
            Cn:     Limix covariance matrix for Cn (dimension P)
            G:      [N, rank_r] numpy covariance matrix for G
            rank:   rank of column low-rank covariance (default = 1)
        """
        assert_type(Y, NP.ndarray, 'Y')
        assert_subtype(Cn, Covariance, 'Cn')
        assert_type(G, NP.ndarray, 'G')

        covar = Cov2KronSumLR(Cn=Cn, G=G, rank=rank, Cr=Cr)
        mean = MeanKronSum(Y=Y, F=F, A=A)
        assert mean.n_terms <= 1, ('GP2KronSum supports MeanKronSum'
                                   ' means with maximum 1 term!')
        GP.__init__(self, covar=covar, mean=mean)
Example #2
0
    def __init__(self, Y, Cn, G, F=None, A=None, rank=1, Cr=None):
        """
        Args:
            Y:      [N, P] phenotype matrix
            F:      Sample fixed effect design (first dimension must be N)
            A:      Trait fixed effect design (second dimension must be P)
            Cn:     Limix covariance matrix for Cn (dimension P)
            G:      [N, rank_r] numpy covariance matrix for G
            rank:   rank of column low-rank covariance (default = 1)
        """
        assert_type(Y, NP.ndarray, 'Y')
        assert_subtype(Cn, Covariance, 'Cn')
        assert_type(G, NP.ndarray, 'G')

        covar = Cov2KronSumLR(Cn=Cn, G=G, rank=rank, Cr=Cr)
        mean = MeanKronSum(Y=Y, F=F, A=A)
        assert mean.n_terms <= 1, ('GP2KronSum supports MeanKronSum'
                                   ' means with maximum 1 term!')
        GP.__init__(self, covar=covar, mean=mean)
Example #3
0
def define_gp(Y, Xr, mean, Ie, type):
    P = 2
    if type == 'null':
        _Cr = FixedCov(sp.ones([2, 2]))
        _Cr.scale = 1e-9
        _Cr.act_scale = False
        covar = CategoricalLR(_Cr, sp.ones((Xr.shape[0], 1)), Ie)
    else:
        if type == 'block': _Cr = FixedCov(sp.ones((P, P)))
        elif type == 'rank1': _Cr = LowRankCov(P, 1)
        elif type == 'full': _Cr = FreeFormCov(P)
        else: print('poppo')
        covar = CategoricalLR(_Cr, Xr, Ie)
    _gp = GP(covar=covar, mean=mean)
    return _gp
    import ipdb
    print 'Change Params covar:'
    ipdb.set_trace()
    gp.covar.diff(gp.covar.setRandomParams)
    print 'Change Params gp:'
    ipdb.set_trace()
    gp.diff(gp.covar.setRandomParams)
    print 'Change G covar:'
    ipdb.set_trace()
    gp.covar.diff(gp.covar.setG, 1. * (sp.rand(N, f) < 0.2))
    print 'Change G gp:'
    ipdb.set_trace()
    gp.diff(gp.covar.setG, 1. * (sp.rand(N, f) < 0.2))
    ipdb.set_trace()

    gp0 = GP(covar=copy.deepcopy(gp.covar), mean=copy.deepcopy(gp.mean))

    t0 = time.time()
    print 'GP2KronSum.LML():', gp.LML()
    print 'Time elapsed:', time.time() - t0

    # compare with normal gp
    # assess compatibility with this GP
    t0 = time.time()
    print 'GP.LML():', gp0.LML()
    print 'Time elapsed:', time.time() - t0

    t0 = time.time()
    print 'GP2KronSum.LML_grad():', gp.LML_grad()
    print 'Time elapsed:', time.time() - t0
Example #5
0
        # local_noise_cov.scale = 0
        # direct_cov.scale = 0
        # cov = SumCov(noise_cov, local_noise_cov)

        cov = SumCov(cov, environment_cov)

        # fixing length scale of ZKZ and SE
        environment_cov.length = N_cells / 50
        # environment_cov.scale=0
        # environment_cov.act_length = False

        # local_noise_cov.length = N_cells/10.0
        # local_noise_cov.act_length = False

        # define and optimise GP
        gp = GP(covar=cov, mean=mean)

        try:
            gp.optimize()
        except:
            print('optimisation', str(phen), 'failed')
            continue

        # rescale each terms to sample variance one
        # direct cov: unnecessary as fixed covariance rescaled before optimisation
        # local noise covariance
        tmp = covar_rescaling_factor(local_noise_cov.K()/local_noise_cov.scale)
        local_noise_cov.scale /= tmp
        # env effect
        tmp = covar_rescaling_factor(environment_cov.K()/environment_cov.scale**2)
        environment_cov.scale = environment_cov.scale**2/tmp
Example #6
0
        # debug covarianec
        cov = Cov2KronSumLR(Cn = Cn, G = X, rank = 1)
        cov.setRandomParams()
        pdb.set_trace()
        print ((cov.inv_debug()-cov.inv())**2).mean()<1e-9
        print (cov.logdet_debug()-cov.logdet())**2
        print (cov.logdet_grad_i_debug(0)-cov.logdet_grad_i(0))**2
if 0:

    t0 = time.time()
    print 'GP2KronSum.LML():', gp.LML()
    print 'Time elapsed:', time.time() - t0

    # compare with normal gp
    # assess compatibility with this GP
    gp0 = GP(covar = copy.deepcopy(gp.covar), mean = copy.deepcopy(gp.mean))
    t0 = time.time()
    print 'GP.LML():', gp0.LML()
    print 'Time elapsed:', time.time() - t0

    if 0:
        pdb.set_trace()
        print gp.LML() - gp0.LML()
        print ((gp.LML_grad()['covar'] - gp0.LML_grad()['covar'])**2).mean()
        pdb.set_trace()
        gp.covar.setRandomParams()
        gp0.covar.setParams(gp.covar.getParams())
        print gp.LML() - gp0.LML()
        print ((gp.LML_grad()['covar'] - gp0.LML_grad()['covar'])**2).mean()

    pdb.set_trace()
Example #7
0
print('model 1 : complete model ')
print('...........................................................')
# total cov and mean
cov = SumCov(direct_cov, noise_cov)
cov = SumCov(cov, local_noise_cov)
cov = SumCov(cov, environment_cov)

# fixing length scale of ZKZ and SE
environment_cov.length = N_cells / 50
environment_cov.act_length = False

# local_noise_cov.length = N_cells/10.0
# local_noise_cov.act_length = False

# define and optimise GP
gp = GP(covar=cov, mean=mean)
gp.optimize()

# show results
print("inferred parameters ")
print("direct_scale = ", " ", direct_cov.scale)
print("noise_scale = ", " ", noise_cov.scale)
print("local_noise_scale = ", " ", local_noise_cov.scale)
print("local_noise_length = ", " ", local_noise_cov.length)
print("environment_scale = ", " ", environment_cov.scale)
print("environment_length = ", " ", environment_cov.length)

#######################################################################
# MODEL 2: no social effect
#######################################################################
print('...........................................................')
Example #8
0
class Model(object):
    """
        Model is a general class for building and training a spatial variance model.
        Contains all the functions which are not specific to a given model
    """
    def __init__(self):
        pass

    ##########################
    # Preprocessing steps
    ##########################
    '''
    Normalisation of Y
    '''
    def preprocess_input(self):
        # normalise phenotype
        if self.norm == 'quantile':
            # import pdb; pdb.set_trace()
            self.Y = utils.quantile_normalise_phenotype(self.Y)
        elif self.norm == 'std':
            self.Y = utils.normalise_phenotype(self.Y)
        else:
            raise Exception('normalisation method not understood')

    '''
    Define a training set and a test set for out of sample prediction
    '''
    def def_training_set(self, oos_predictions):
        if self.cv_ix is None:
            tmp = np.array([True for i in range(self.n_samples)])
            if oos_predictions == 0.:
                self.train_set = tmp
            elif 0. < oos_predictions < 1.:
                test_ix = np.random.choice(range(self.n_samples), int(oos_predictions * self.n_samples), replace=False)
                tmp[test_ix] = False
                self.train_set = tmp
            else:
                raise Exception('oos_predictions out of range, should be in [0;1[')

        else:
            # set seed and get an index permutation and step size
            np.random.seed(0)
            permuted_indices = np.random.permutation(self.X.shape[0])
            step_size = len(permuted_indices) * oos_predictions

            # select test set
            first_ix = int(self.cv_ix * step_size)
            last_ix = int(self.cv_ix  * step_size + step_size)
            test_set = permuted_indices[first_ix:last_ix]

            # define boolean vector for train set
            self.train_set = np.array([True for i in range(self.n_samples)])
            self.train_set[test_set] = False

    ##########################
    # Building Model
    ##########################
    '''
        General way of initialising a mdel:
    '''
    def init_model(self, cov_terms):
        self.preprocess_input()   # defined in parent
        self.build_Kinship()
        self.build_cov(cov_terms)
        self.build_mean()
        self.build_gp()

    '''
        The following functions are specific to a given model and have to be implemented
        in the relevant children class
    '''
    def build_Kinship(self):
        pass

    def build_cov(self):
        pass

    def add_cov(self):
        pass

    def rm_cov(self):
        pass

    '''
        General way to build the mean term of a GP model for limix
    '''
    def build_mean(self):
        Y_tmp = self.Y
        Y_tmp = Y_tmp[self.train_set, :]
        self.mean = MeanBase(Y_tmp)

    '''
        Creating a limix GP object
    '''
    def build_gp(self):
        self.gp = GP(self.mean, self.covar)

    ##########################
    # Train model
    ##########################
    '''
        The way the model is trained is specific to the model and has to be implemented
        in the relevant classes
    '''
    def train_gp(self):
        pass

    ##########################
    # Prediction from model
    ##########################
    '''
        General functions for out of sample prediction
    '''
    def predict(self):
        try:
            return self.gp.predict()
        except:
            return np.array([[np.nan]])

    def r2(self):
        Y_pred = self.predict()[:,0]
        Y_true = self.Y[:,0][~self.train_set]

        res = ((Y_true - Y_pred)**2.).sum()
        var = ((Y_true - Y_true.mean())**2.).sum()

        return 1. - res/var

    def pred(self):
        Y_pred = self.predict()[:,0]
        Y_true = self.Y[:,0][~self.train_set]

        return np.concatenate((Y_true[:, None], Y_pred[:, None]), axis=1)
Example #9
0
 def build_gp(self):
     self.gp = GP(self.mean, self.covar)
Example #10
0
    Y = sp.sin(X) + sp.sqrt(v_noise) * sp.randn(N, 1)

    # for out-of-sample preditions
    Xstar = sp.linspace(0,2,1000)[:,sp.newaxis]

    # define mean term
    W = 1. * (sp.rand(N, 2) < 0.2)
    mean = lin_mean(Y, W)

    # define covariance matrices
    sqexp = SQExpCov(X, Xstar = Xstar)
    noise = FixedCov(sp.eye(N))
    covar  = SumCov(sqexp, noise)

    # define gp
    gp = GP(covar=covar,mean=mean)
    # initialize params
    sqexp.scale = 1e-4
    sqexp.length = 1
    noise.scale = Y.var()
    # optimize
    gp.optimize(calc_ste=True)
    # predict out-of-sample
    Ystar = gp.predict()

    # print optimized values and standard errors
    print('weights of fixed effects')
    print(mean.b[0, 0], '+/-', mean.b_ste[0, 0])
    print(mean.b[1, 0], '+/-', mean.b_ste[1, 0])
    print('scale of sqexp')
    print(sqexp.scale, '+/-', sqexp.scale_ste)
def run_individual_model(model, expression_file, position_file, output_directory,
                         permute_positions=False, random_start_point=False):

    rm_diag = True

    if model is not 'full' and model is not 'env':
        raise Exception('model not understood. Please specify a model between full and env')

    # read phenotypes data
    with open(expression_file, 'r') as f:
        prot_tmp = f.readline()
    protein_names = prot_tmp.split(' ')
    protein_names[-1] = protein_names[-1][0:-1]  # removing the newline sign at the end of the last protein
    protein_names = np.reshape(protein_names, [len(protein_names), 1])
    phenotypes = np.loadtxt(expression_file, delimiter=' ', skiprows=1)

    # read position data
    X = np.genfromtxt(position_file, delimiter=',')
    if permute_positions:
        X = X[np.random.permutation(X.shape[0]), :]
    if X.shape[0] != phenotypes.shape[0]:
        raise Exception('cell number inconsistent between position and epression levels ')

    # define output file name
    output_file = output_directory+'/inferred_parameters_' + model
    if permute_positions:
        output_file += '_permuted.txt'
    else:
        output_file += '.txt'

    N_cells = phenotypes.shape[0]

    parameters = np.zeros([phenotypes.shape[1], 6])

    log_lik = np.zeros(phenotypes.shape[1])

    for phen in range(0, phenotypes.shape[1]):

        phenotype = phenotypes[:, phen]
        phenotype -= phenotype.mean()
        phenotype /= phenotype.std()
        phenotype = np.reshape(phenotype, [N_cells, 1])

        phenotypes_tmp = np.delete(phenotypes, phen, axis=1)
        phenotypes_tmp = normalise(phenotypes_tmp)

        Kinship = phenotypes_tmp.dot(phenotypes_tmp.transpose())
        Kinship -= np.linalg.eigvalsh(Kinship).min() * np.eye(N_cells)
        Kinship *= covar_rescaling_factor(Kinship)

        # create different models and print the result including likelihood
        # create all the covariance terms
        direct_cov = FixedCov(Kinship)

        # noise
        noise_cov = FixedCov(np.eye(N_cells))

        # local_noise
        local_noise_cov = SQExpCov(X)
        local_noise_cov.length = 100
        local_noise_cov.act_length = False
        # environment effect
        environment_cov = ZKZCov(X, Kinship, rm_diag)

        # mean term
        mean = MeanBase(phenotype)

        #######################################################################
        # defining model
        #######################################################################
        cov = SumCov(noise_cov, local_noise_cov)
        cov = SumCov(cov, environment_cov)
        if random_start_point:
            environment_cov.length = np.random.uniform(10, 300)
            environment_cov.scale = np.random.uniform(1, 15)

        else:
            environment_cov.length = 200
        # environment_cov.act_length = False

        if model == 'full':
            cov = SumCov(cov, direct_cov)
        else:
            direct_cov.scale = 0

        # define and optimise GP
        gp = GP(covar=cov, mean=mean)

        try:
            gp.optimize()
        except:
            print('optimisation', str(phen), 'failed')
            continue

        log_lik[phen] = gp.LML()


        # rescale each terms to sample variance one
        # direct cov: unnecessary as fixed covariance rescaled before optimisation
        # local noise covariance
        tmp = covar_rescaling_factor(local_noise_cov.K()/local_noise_cov.scale)
        local_noise_cov.scale /= tmp
        # env effect
        tmp = covar_rescaling_factor(environment_cov.K()/environment_cov.scale**2)
        environment_cov.scale = environment_cov.scale**2/tmp

        parameters[phen, :] = [direct_cov.scale,
                               noise_cov.scale,
                               local_noise_cov.scale,
                               local_noise_cov.length,
                               environment_cov.scale,
                               environment_cov.length]

    result_header = 'direct_scale' + ' ' + \
                    'noise_scale' + ' ' + \
                    'local_noise_scale' + ' ' + \
                    'local_noise_length' + ' ' + \
                    'environment_scale' + ' ' + \
                    'environment_length'

    with open(output_file, 'w') as f:
        np.savetxt(f,
                   np.hstack((protein_names, parameters)),
                   delimiter=' ',
                   header=result_header,
                   fmt='%s',
                   comments='')

    log_lik_file = output_file + '_loglik'
    with open(log_lik_file, 'w') as f:
        np.savetxt(f, log_lik)