Esempio n. 1
0
    def work_sequence(self):

        # is it OK to do the intersect and the linear regression 23 extra times?


        # clear
        G, y, snp_name, _ = load_intersect(self.snp_reader, self.pheno_fn)

        # compute linear regression
        _, p_values_lin = f_regression(G, y, center=True)

        # set up empty return structures
        #self.rs = snp_name
        #self.p_values = -np.ones(len(snp_name))

        # get chr names/id
        chr_ids = self.snp_reader.pos[:,0]

        #self.pos = self.snp_reader.pos

        #loco = [[range(0,5000), range(5000,10000)]]
        loco = LeaveOneChromosomeOut(chr_ids, indices=True)

        if len(loco) is not self.chrom_count :  raise Exception("The snp reader has {0} chromosome, not {1} as specified".format(len(loco),self.chrom_count))

    
        for i, (train_snp_idx, test_snp_idx) in enumerate(loco):
            if i == 0:
                result = {"p_values":-np.ones(len(snp_name)),
                          "p_values_lin": p_values_lin,
                          "rs":snp_name,
                          "pos":self.snp_reader.pos}
            else:
                result = None
            yield lambda i=i, train_snp_idx=train_snp_idx,test_snp_idx=test_snp_idx,result=result,G=G,y=y: self.dowork(i,train_snp_idx,test_snp_idx,result,G,y)  # the 'i=i',etc is need to get around a strangeness in Python
Esempio n. 2
0
    def test_loco(self):
        """
        test leave one chromosome out iterator
        """

        names = ["a", "b", "a", "c", "b", "c", "b"]

        loco = LeaveOneChromosomeOut(names)

        expect = [[[1, 3, 4, 5, 6], [0, 2]], [[0, 2, 3, 5], [1, 4, 6]],
                  [[0, 1, 2, 4, 6], [3, 5]]]

        for i, (train_idx, test_idx) in enumerate(loco):
            assert (expect[i][0] == train_idx).all()
            assert (expect[i][1] == test_idx).all()