class SparseTestCase(unittest.TestCase): def setUp(self): self.file_bed = ["./utest/test1_divided_merged.bed", "./utest/test1_different.bed"] self.file_len = "./utest/test1_divided.len" self.file_test = "./utest/test1_similar.bed" self.file_dhs = "./utest/test1_dhs.bed" self.estimate_extracted = [ "./utest/test1_divided_merged_extracted.vec", "./utest/test1_different_extracted.vec", ] self.bsm = BedNMF(self.file_bed, self.file_len, self.file_dhs, 1) def testDhsExtraction(self): self.bsm.init_dhs_extraction() cnt = 0 for efile in self.estimate_extracted: with open(efile) as ef: vector = read_vector(ef) result = ravel(self.bsm.sparse[:, cnt].todense()) print "estimate extracted", vector print "result extracted", result self.assertTrue(alltrue(vector == result)) cnt += 1 def testNmfClassification(self): self.bsm.init_dhs_extraction() self.bsm.init_factorization() print self.bsm.classification(self.file_bed[0]) print self.bsm.classification(self.file_bed[1]) print self.bsm.classification(self.file_test)
class CtcfTestCase(unittest.TestCase): def setUp(self): self.train_path = glob.glob("./CTCF_train/*.bed") print self.train_path self.len_path = "./static_data/hg19_len" self.dhs_path = "./static_data/DHS_hg19.bed" self.test_path = "./CTCF_test/5011_peaks.bed" self.test2_path = "./CTCF_train/1252_peaks.bed" self.dump_path = "./persist/train.p" self.bin_size = 50000 self.InitNMF() def InitNMF(self): try: self.bsm = cPickle.load(open(self.dump_path,"r")) print "Load from database" except: print "Init database" self.bsm = BedNMF(self.train_path, self.len_path, self.dhs_path, self.bin_size) self.bsm.init_dhs_extraction() self.bsm.init_factorization(rank = 10) cPickle.dump(self.bsm, open(self.dump_path, 'w')) def gen_testClassification(self, test_path): result = self.bsm.classification(test_path) pprint(n_nearest(result, self.train_path)) return result def testClassification1(self): self.gen_testClassification(self.test_path) self.gen_testClassification(self.test2_path)
def InitNMF(self): try: self.bsm = cPickle.load(open(self.dump_path,"r")) print "Load from database" except: print "Init database" self.bsm = BedNMF(self.train_path, self.len_path, self.dhs_path, self.bin_size) self.bsm.init_dhs_extraction() self.bsm.init_factorization(rank = 10) cPickle.dump(self.bsm, open(self.dump_path, 'w'))
def setUp(self): self.file_bed = ["./utest/test1_divided_merged.bed", "./utest/test1_different.bed"] self.file_len = "./utest/test1_divided.len" self.file_test = "./utest/test1_similar.bed" self.file_dhs = "./utest/test1_dhs.bed" self.estimate_extracted = [ "./utest/test1_divided_merged_extracted.vec", "./utest/test1_different_extracted.vec", ] self.bsm = BedNMF(self.file_bed, self.file_len, self.file_dhs, 1)