def test_transformation(self): R12 = np.random.rand(5, 3) t1 = ObjectType('type1', 2) t2 = ObjectType('type2', 2) relation = Relation(R12, t1, t2) fusion_graph = FusionGraph() fusion_graph.add_relation(relation) rnds = np.random.RandomState(0) fuser = Dfmf(init_type='random', random_state=rnds, max_iter=100).fuse(fusion_graph) new_R12 = R12[:2].copy() new_graph = FusionGraph([Relation(new_R12, t1, t2)]) new_rnds = np.random.RandomState(0) transformer = DfmfTransform(random_state=new_rnds).transform( t1, new_graph, fuser) new_G1 = transformer.factor(t1) G1 = fuser.factor(t1) G2 = fuser.factor(t2) S12 = fuser.backbone(relation) new_R12_hat = np.dot(new_G1, np.dot(S12, G2.T)) R12_hat = np.dot(G1, np.dot(S12, G2.T)) diff_G1 = new_G1 - G1[:2] diff_hat = new_R12_hat - R12_hat[:2] self.assertLess(np.sum(diff_G1**2) / diff_G1.size, 1e-5) self.assertLess(np.sum(diff_hat**2) / diff_hat.size, 1e-5)
def test_pipeline(self): rnds = np.random.RandomState(0) R12 = rnds.rand(50, 30) R13 = rnds.rand(50, 40) R23 = rnds.rand(30, 40) t1 = ObjectType('type1', 30) t2 = ObjectType('type2', 40) t3 = ObjectType('type3', 40) relations = [ Relation(R12, t1, t2), Relation(R13, t1, t3), Relation(R23, t2, t3) ] fusion_graph = FusionGraph() fusion_graph.add_relations_from(relations) fuser = Dfmf(random_state=rnds).fuse(fusion_graph) self.assertEqual(fuser.factor(t1).shape, (50, 30)) self.assertEqual(fuser.factor(t2).shape, (30, 40)) self.assertEqual(fuser.factor(t3).shape, (40, 40)) self.assertEqual(fuser.backbone(relations[0]).shape, (30, 40)) self.assertEqual(fuser.backbone(relations[1]).shape, (30, 40)) self.assertEqual(fuser.backbone(relations[2]).shape, (40, 40)) new_R12 = rnds.rand(15, 30) new_R13 = rnds.rand(15, 40) new_relations = [Relation(new_R12, t1, t2), Relation(new_R13, t1, t3)] new_graph = FusionGraph(new_relations) transformer = DfmfTransform(random_state=rnds).transform( t1, new_graph, fuser) self.assertEqual(transformer.factor(t1).shape, (15, 30))
def test_pipeline(self): rnds = np.random.RandomState(0) R12 = rnds.rand(50, 30) R13 = rnds.rand(50, 40) R23 = rnds.rand(30, 40) t1 = ObjectType('type1', 30) t2 = ObjectType('type2', 40) t3 = ObjectType('type3', 40) relations = [Relation(R12, t1, t2), Relation(R13, t1, t3), Relation(R23, t2, t3)] fusion_graph = FusionGraph() fusion_graph.add_relations_from(relations) fuser = Dfmf(random_state=rnds).fuse(fusion_graph) self.assertEqual(fuser.factor(t1).shape, (50, 30)) self.assertEqual(fuser.factor(t2).shape, (30, 40)) self.assertEqual(fuser.factor(t3).shape, (40, 40)) self.assertEqual(fuser.backbone(relations[0]).shape, (30, 40)) self.assertEqual(fuser.backbone(relations[1]).shape, (30, 40)) self.assertEqual(fuser.backbone(relations[2]).shape, (40, 40)) new_R12 = rnds.rand(15, 30) new_R13 = rnds.rand(15, 40) new_relations = [Relation(new_R12, t1, t2), Relation(new_R13, t1, t3)] new_graph = FusionGraph(new_relations) transformer = DfmfTransform(random_state=rnds).transform(t1, new_graph, fuser) self.assertEqual(transformer.factor(t1).shape, (15, 30))
def test_dfmf(self): rnds = np.random.RandomState(0) R12_1 = np.random.rand(30, 30) R12_2 = np.random.rand(30, 30) R13 = np.random.rand(30, 20) t1 = ObjectType('type1', 30) t2 = ObjectType('type2', 30) t3 = ObjectType('type3', 20) relations = [Relation(R12_1, t1, t2), Relation(R12_2, t1, t2), Relation(R13, t1, t3)] fusion_graph = FusionGraph() fusion_graph.add_relations_from(relations) self.assertEqual(len(fusion_graph.relations), 3) self.assertEqual(len(fusion_graph.object_types), 3) fuser = Dfmf(init_type='random', random_state=rnds).fuse(fusion_graph) self.assertEqual(fuser.backbone(relations[0]).shape, (30, 30)) self.assertEqual(fuser.backbone(relations[1]).shape, (30, 30)) self.assertEqual(fuser.backbone(relations[2]).shape, (30, 20)) G1 = fuser.factor(t1) G2 = fuser.factor(t2) S12_1 = fuser.backbone(relations[0]) S12_2 = fuser.backbone(relations[1]) R12_1_hat = np.dot(G1, np.dot(S12_1, G2.T)) R12_2_hat = np.dot(G1, np.dot(S12_2, G2.T)) np.testing.assert_almost_equal(fuser.complete(relations[0]), R12_1_hat) np.testing.assert_almost_equal(fuser.complete(relations[1]), R12_2_hat)
def test_dfmf(self): rnds = np.random.RandomState(0) R12 = rnds.rand(30, 30) R13 = rnds.rand(30, 30) t1 = ObjectType('type1', 50) t2 = ObjectType('type2', 30) t3 = ObjectType('type3', 10) fusion_graph = FusionGraph() relations = [Relation(R12, t1, t2), Relation(R13, t1, t3)] fusion_graph.add_relations_from(relations) fuser = Dfmf(init_type='random', random_state=rnds, n_run=3 ).fuse(fusion_graph) self.assertEqual(len(list(fuser.factor(t1))), 3) self.assertEqual(len(list(fuser.factor(t2))), 3) self.assertEqual(len(list(fuser.factor(t3))), 3) self.assertEqual(len(list(fuser.backbone(relations[0]))), 3) self.assertEqual(len(list(fuser.backbone(relations[1]))), 3) for object_type in [t1, t2, t3]: for factor in fuser.factor(object_type): self.assertEqual(factor.shape, (30, object_type.rank)) G1 = fuser.factor(t1, run=1) S13 = fuser.backbone(relations[1], run=1) G3 = fuser.factor(t3, run=1) R13_hat = np.dot(G1, np.dot(S13, G3.T)) completed = fuser.complete(relations[1], run=1) np.testing.assert_almost_equal(completed, R13_hat)
def test_transformation(self): R12 = np.random.rand(5, 3) t1 = ObjectType('type1', 2) t2 = ObjectType('type2', 2) relation = Relation(R12, t1, t2) fusion_graph = FusionGraph() fusion_graph.add_relation(relation) rnds = np.random.RandomState(0) fuser = Dfmf(init_type='random', random_state=rnds, max_iter=100 ).fuse(fusion_graph) new_R12 = R12[:2].copy() new_graph = FusionGraph([Relation(new_R12, t1, t2)]) new_rnds = np.random.RandomState(0) transformer = DfmfTransform(random_state=new_rnds).transform( t1, new_graph, fuser) new_G1 = transformer.factor(t1) G1 = fuser.factor(t1) G2 = fuser.factor(t2) S12 = fuser.backbone(relation) new_R12_hat = np.dot(new_G1, np.dot(S12, G2.T)) R12_hat = np.dot(G1, np.dot(S12, G2.T)) diff_G1 = new_G1 - G1[:2] diff_hat = new_R12_hat - R12_hat[:2] self.assertLess(np.sum(diff_G1 ** 2) / diff_G1.size, 1e-5) self.assertLess(np.sum(diff_hat ** 2) / diff_hat.size, 1e-5)
def test_dfmf(self): rnds = np.random.RandomState(0) R12 = rnds.rand(30, 30) R13 = rnds.rand(30, 30) t1 = ObjectType('type1', 50) t2 = ObjectType('type2', 30) t3 = ObjectType('type3', 10) fusion_graph = FusionGraph() relations = [Relation(R12, t1, t2), Relation(R13, t1, t3)] fusion_graph.add_relations_from(relations) fuser = Dfmf(init_type='random', random_state=rnds, n_run=3).fuse(fusion_graph) self.assertEqual(len(list(fuser.factor(t1))), 3) self.assertEqual(len(list(fuser.factor(t2))), 3) self.assertEqual(len(list(fuser.factor(t3))), 3) self.assertEqual(len(list(fuser.backbone(relations[0]))), 3) self.assertEqual(len(list(fuser.backbone(relations[1]))), 3) for object_type in [t1, t2, t3]: for factor in fuser.factor(object_type): self.assertEqual(factor.shape, (30, object_type.rank)) G1 = fuser.factor(t1, run=1) S13 = fuser.backbone(relations[1], run=1) G3 = fuser.factor(t3, run=1) R13_hat = np.dot(G1, np.dot(S13, G3.T)) completed = fuser.complete(relations[1], run=1) np.testing.assert_almost_equal(completed, R13_hat)
def test_dfmf(self): rnds = np.random.RandomState(0) R12_1 = np.random.rand(30, 30) R12_2 = np.random.rand(30, 30) R13 = np.random.rand(30, 20) t1 = ObjectType('type1', 30) t2 = ObjectType('type2', 30) t3 = ObjectType('type3', 20) relations = [ Relation(R12_1, t1, t2), Relation(R12_2, t1, t2), Relation(R13, t1, t3) ] fusion_graph = FusionGraph() fusion_graph.add_relations_from(relations) self.assertEqual(len(fusion_graph.relations), 3) self.assertEqual(len(fusion_graph.object_types), 3) fuser = Dfmf(init_type='random', random_state=rnds).fuse(fusion_graph) self.assertEqual(fuser.backbone(relations[0]).shape, (30, 30)) self.assertEqual(fuser.backbone(relations[1]).shape, (30, 30)) self.assertEqual(fuser.backbone(relations[2]).shape, (30, 20)) G1 = fuser.factor(t1) G2 = fuser.factor(t2) S12_1 = fuser.backbone(relations[0]) S12_2 = fuser.backbone(relations[1]) R12_1_hat = np.dot(G1, np.dot(S12_1, G2.T)) R12_2_hat = np.dot(G1, np.dot(S12_2, G2.T)) np.testing.assert_almost_equal(fuser.complete(relations[0]), R12_1_hat) np.testing.assert_almost_equal(fuser.complete(relations[1]), R12_2_hat)
def test_retrieval(self): fusion_graph = FusionGraph(self.relations2) self.assertEqual(fusion_graph.get_object_type('Type 1'), self.t1) self.assertEqual(list(fusion_graph.get_relations(self.t1, self.t2)), self.relations2[:2]) self.assertEqual(fusion_graph[self.t1][self.t2], self.relations2[:2]) out_degree1 = len(list(fusion_graph.out_relations(self.t4))) out_degree2 = sum(len(rels) for rels in fusion_graph[self.t4].values()) self.assertEqual(out_degree1, out_degree2)
def test_inspection(self): fusion_graph = FusionGraph(self.relations2) self.assertEqual(set(fusion_graph.in_relations(self.t1)), {self.relations2[6]}) self.assertEqual(set(fusion_graph.out_relations(self.t1)), set(self.relations2[:2])) out_nbs = {self.relations2[4], self.relations2[7], self.relations2[8]} self.assertEqual(set(fusion_graph.out_relations(self.t4)), out_nbs)
def test_removal_of_loops(self): fusion_graph = FusionGraph() fusion_graph.add_relation(self.relations2[-1]) self.assertEqual(fusion_graph.n_relations, 1) self.assertEqual(fusion_graph.n_object_types, 1) fusion_graph.remove_relation(self.relations2[-1]) self.assertEqual(fusion_graph.n_relations, 0) self.assertEqual(fusion_graph.n_object_types, 0)
def test_removal_single_relation(self): fusion_graph = FusionGraph() fusion_graph.add_relation(self.relations1[0]) self.assertEqual(fusion_graph.n_relations, 1) self.assertEqual(fusion_graph.n_object_types, 2) fusion_graph.remove_relation(self.relations1[0]) self.assertEqual(fusion_graph.n_relations, 0) self.assertEqual(fusion_graph.n_object_types, 0)
def test_get_names_by_object_type(self): rnds = np.random.RandomState(0) X = rnds.rand(10, 10) t1_names = list('ABCDEFGHIJ') t2_names = list('KLMNOPQRST') rel = Relation(X, name='Test', row_type=self.t1, row_names=t1_names, col_type=self.t2, col_names=t2_names) rel2 = Relation(X, name='Test2', row_type=self.t2, row_names=t2_names, col_type=self.t3) fusion_graph = FusionGraph() fusion_graph.add_relation(rel) fusion_graph.add_relation(rel2) self.assertEqual(fusion_graph.get_names(self.t1), t1_names) self.assertEqual(fusion_graph.get_names(self.t2), t2_names) t3_names = fusion_graph.get_names(self.t3) self.assertEqual(len(t3_names), 10)
def test_dfmc(self): rnds = np.random.RandomState(0) R12 = rnds.rand(50, 30) t1 = ObjectType('type1', 50) t2 = ObjectType('type2', 30) relation = Relation(R12, t1, t2) fusion_graph = FusionGraph() fusion_graph.add_relation(relation) fuser = Dfmc(init_type='random', random_state=rnds).fuse(fusion_graph) self.assertEqual(fuser.backbone(relation).shape, (50, 30)) self.assertEqual(fuser.factor(t1).shape, (50, 50)) self.assertEqual(fuser.factor(t2).shape, (30, 30)) np.testing.assert_almost_equal(fuser.complete(relation), relation.data)
def load_dicty(): """Construct fusion graph from molecular biology of Dictyostelium.""" gene = ObjectType('Gene', 50) go_term = ObjectType('GO term', 15) exprc = ObjectType('Experimental condition', 5) data, rn, cn = load_source(join('dicty', 'dicty.gene_annnotations.csv.gz')) ann = Relation(data=data, row_type=gene, col_type=go_term, name='ann', row_names=rn, col_names=cn) data, rn, cn = load_source(join('dicty', 'dicty.gene_expression.csv.gz')) expr = Relation(data=data, row_type=gene, col_type=exprc, name='expr', row_names=rn, col_names=cn) expr.data = np.log(np.maximum(expr.data, np.finfo(np.float).eps)) data, rn, cn = load_source(join('dicty', 'dicty.ppi.csv.gz')) ppi = Relation(data=data, row_type=gene, col_type=gene, name='ppi', row_names=rn, col_names=cn) return FusionGraph([ann, expr, ppi])
def test_get_object_type_metadata(self): rnds = np.random.RandomState(0) X = rnds.rand(10, 10) a, b, c = list('ABCDEFGHIJ'), list('0123456789'), list('KLMNOPQRST') t1_metadata = [{'a': x} for x in a] t2_metadata = [{'b': x} for x in b] t2_metadata2 = [{'d': x} for x in b] rel = Relation(X, name='Test', row_type=self.t1, row_metadata=t1_metadata, col_type=self.t2, col_metadata=t2_metadata) rel2 = Relation(X, name='Test2', row_type=self.t2, row_metadata=t2_metadata2, col_type=self.t3) fusion_graph = FusionGraph() fusion_graph.add_relation(rel) fusion_graph.add_relation(rel2) def merge(d1, d2): d = {} d.update(d1) d.update(d2) return d self.assertEqual(fusion_graph.get_metadata(self.t1), t1_metadata) self.assertEqual(fusion_graph.get_metadata(self.t2), list(map(merge, t2_metadata, t2_metadata2))) t3_metadata = fusion_graph.get_metadata(self.t3) self.assertEqual(len(t3_metadata), 10) for md in t3_metadata: self.assertFalse(md)
def test_preprocessors(self): rnds = np.random.RandomState(0) R12 = rnds.rand(50, 30) t1 = ObjectType('type1', 50) t2 = ObjectType('type2', 30) def preprocessor(data): return np.ones_like(data) relation = Relation(R12, t1, t2, preprocessor=preprocessor) fusion_graph = FusionGraph() fusion_graph.add_relation(relation) fuser = Dfmf(init_type='random', random_state=rnds).fuse(fusion_graph) self.assertEqual(fuser.backbone(relation).shape, (50, 30)) self.assertEqual(fuser.factor(t1).shape, (50, 50)) self.assertEqual(fuser.factor(t2).shape, (30, 30)) trnf = np.ones_like(relation.data) np.testing.assert_almost_equal(fuser.complete(relation), trnf)
def test_postprocessors(self): rnds = np.random.RandomState(0) R12 = rnds.rand(50, 30) R12 = np.ma.masked_greater(R12, 0.7) t1 = ObjectType('type1', 50) t2 = ObjectType('type2', 30) def postprocessor(data): return data - 10 relation = Relation(R12, t1, t2, name='R', postprocessor=postprocessor) fusion_graph = FusionGraph() fusion_graph.add_relation(relation) fuser = Dfmc(init_type='random', random_state=rnds).fuse(fusion_graph) self.assertEqual(fuser.backbone(relation).shape, (50, 30)) self.assertEqual(fuser.factor(t1).shape, (50, 50)) self.assertEqual(fuser.factor(t2).shape, (30, 30)) trnf = relation.data - 10 np.testing.assert_almost_equal(fuser.complete(relation), trnf) np.testing.assert_equal(fusion_graph.get_relation('R').data, R12)
def test_manipulation(self): fusion_graph = FusionGraph() fusion_graph.add_relations_from(self.relations2) self.assertEqual(fusion_graph['Test2'], self.relations2[0]) self.assertEqual(fusion_graph['Test3'], self.relations2[8]) self.assertEqual(fusion_graph.n_object_types, 5) self.assertEqual(fusion_graph.n_relations, 10) fusion_graph.remove_relation(self.relations2[6]) self.assertEqual(fusion_graph.n_object_types, 5) self.assertEqual(fusion_graph.n_relations, 9) fusion_graph.remove_relations_from( [self.relations2[9], self.relations2[4], self.relations2[5]]) self.assertEqual(fusion_graph.n_object_types, 4) self.assertEqual(fusion_graph.n_relations, 6)
def load_pharma(): """Construct fusion graph from the pharmacology domain.""" action = ObjectType('Action', 5) pmid = ObjectType('PMID', 5) depositor = ObjectType('Depositor', 5) fingerprint = ObjectType('Fingerprint', 20) depo_cat = ObjectType('Depositor category', 5) chemical = ObjectType('Chemical', 10) data, rn, cn = load_source(join('pharma', 'pharma.actions.csv.gz')) actions = Relation(data=data, row_type=chemical, col_type=action, row_names=rn, col_names=cn) data, rn, cn = load_source(join('pharma', 'pharma.pubmed.csv.gz')) pubmed = Relation(data=data, row_type=chemical, col_type=pmid, row_names=rn, col_names=cn) data, rn, cn = load_source(join('pharma', 'pharma.depositors.csv.gz')) depositors = Relation(data=data, row_type=chemical, col_type=depositor, row_names=rn, col_names=cn) data, rn, cn = load_source(join('pharma', 'pharma.fingerprints.csv.gz')) fingerprints = Relation(data=data, row_type=chemical, col_type=fingerprint, row_names=rn, col_names=cn) data, rn, cn = load_source(join('pharma', 'pharma.depo_cats.csv.gz')) depo_cats = Relation(data=data, row_type=depositor, col_type=depo_cat, row_names=rn, col_names=cn) data, rn, cn = load_source(join('pharma', 'pharma.tanimoto.csv.gz')) tanimoto = Relation(data=data, row_type=chemical, col_type=chemical, row_names=rn, col_names=cn) return FusionGraph( [actions, pubmed, depositors, fingerprints, depo_cats, tanimoto])
def test_manipulation(self): fusion_graph = FusionGraph() fusion_graph.add_relations_from(self.relations2) self.assertEqual(fusion_graph.n_object_types, 5) self.assertEqual(fusion_graph.n_relations, 10) fusion_graph.remove_relation(self.relations2[6]) self.assertEqual(fusion_graph.n_object_types, 5) self.assertEqual(fusion_graph.n_relations, 9) fusion_graph.remove_relations_from([ self.relations2[9], self.relations2[4], self.relations2[5]]) self.assertEqual(fusion_graph.n_object_types, 4) self.assertEqual(fusion_graph.n_relations, 6)
def run_parallel(n_jobs=1): start_t = time.time() n1, n2 = 500, 500 # poizkusil tudi z: # n1, n2 = 10000, 10000 R12 = np.random.rand(n1, n2) print(f"Number of jobs is {n_jobs}") r1, r2 = 10, 10 # Poizkusil tudi z # r1, r2 = 150, 150 t1 = ObjectType('type1', r1) t2 = ObjectType('type2', r2) relations = [Relation(R12, t1, t2)] fusion_graph = FusionGraph(relations) fuser = Dfmf(init_type='random_vcol', n_jobs=n_jobs).fuse(fusion_graph) preds = fuser.complete(relations[0]) # print(f"Error is {rmse(R12, preds)}") print(f"Done in {time.time() - start_t} sec.")
def test_infinite(self): rnds = np.random.RandomState(0) R12 = rnds.rand(50, 30) R13 = rnds.rand(50, 10) R12 = np.ma.masked_greater(R12, 0.7) R12[R12 < 0.1] = np.nan R13[R13 < 0.5] = np.inf t1 = ObjectType('type1', 50) t2 = ObjectType('type2', 30) t3 = ObjectType('type3', 10) relations = [ Relation(R12, t1, t2, fill_value='row_mean'), Relation(R13, t1, t3, fill_value='col_mean') ] fusion_graph = FusionGraph(relations) fuser = Dfmf(init_type='random', random_state=rnds).fuse(fusion_graph) self.assertEqual(fuser.backbone(relations[0]).shape, (50, 30)) self.assertEqual(fuser.backbone(relations[1]).shape, (50, 10)) self.assertEqual(fuser.factor(t1).shape, (50, 50)) self.assertEqual(fuser.factor(t2).shape, (30, 30)) size = np.sum(np.isfinite(fuser.complete(relations[0]))) np.testing.assert_equal(size, R12.size)
def test_drawing(self): fusion_graph = FusionGraph() fusion_graph.add_relations_from(self.relations1)