Ejemplo n.º 1
0
def main():
    from AnyQt.QtWidgets import QApplication
    t1 = fusion.ObjectType('Users', 10)
    t2 = fusion.ObjectType('Movies', 30)
    t3 = fusion.ObjectType('Actors', 40)

    # test that MeanFuser completes correctly
    R = np.ma.array([[1, 1, 0], [3, 0, 0]],
                    mask=[[0, 0, 1], [0, 1, 1]],
                    dtype=float)
    rel = fusion.Relation(R, t1, t2)
    assert (MeanFuser(0).complete(rel) == [[1, 1, 5 / 3], [3, 1, 5 / 3]]).all()
    assert (MeanFuser(1).complete(rel) == [[1, 1, 1], [3, 3, 3]]).all()
    assert (MeanFuser(2).complete(rel) == [[1, 1, 5 / 3], [3, 5 / 3,
                                                           5 / 3]]).all()

    R1 = np.ma.array(np.random.random((20, 20)))
    R2 = np.ma.array(np.random.random((40, 40)),
                     mask=np.random.random((40, 40)) > .8)
    relations = [
        fusion.Relation(R1, t1, t2, name='like'),
        fusion.Relation(R2, t3, t2, name='feature in'),
    ]
    G = fusion.FusionGraph()
    G.add_relations_from(relations)
    app = QApplication([])
    w = OWMeanFuser()
    w.on_fusion_graph_change(G)
    w.show()
    app.exec()
Ejemplo n.º 2
0
def main():
    # example from https://github.com/marinkaz/scikit-fusion
    import numpy as np
    from AnyQt.QtWidgets import QApplication
    R12 = np.random.rand(50, 100)
    R32 = np.random.rand(100, 150)
    R33 = np.random.rand(150, 150)
    R13 = np.random.rand(50, 150)
    t1 = fusion.ObjectType('Users', 10)
    t2 = fusion.ObjectType('Movies', 30)
    t3 = fusion.ObjectType('Actors', 40)
    relations = [
        fusion.Relation(R12, t1, t2, name='like'),
        fusion.Relation(R13, t1, t3, name='are fans of'),
        fusion.Relation(R12, t1, t2, name='don\'t like'),
        fusion.Relation(R33, t3, t3, name='married to'),
        fusion.Relation(R32, t2, t3, name='feature')
    ]
    G = fusion.FusionGraph()
    for rel in relations:
        G.add_relation(rel)
    fuser = fusion.Dfmf()
    fuser.fuse(G)
    app = QApplication([])
    w = OWChaining()
    w.on_fuser_change(FittedFusionGraph(fuser))
    w.show()
    app.exec()
Ejemplo n.º 3
0
def main():
    n_folds = 10
    n_genes = dicty[gene][go_term][0].data.shape[0]
    cv = cross_validation.KFold(n_genes, n_folds=n_folds)
    fold_mse = np.zeros(n_folds)
    ann_mask = np.zeros_like(dicty[gene][go_term][0].data).astype('bool')

    relations = [
        skf.Relation(dicty[gene][go_term][0].data, gene, go_term),
        skf.Relation(dicty[gene][exp_cond][0].data, gene, exp_cond),
        skf.Relation(dicty[gene][gene][0].data, gene, gene)]
    fusion_graph = skf.FusionGraph(relations)
    fuser = skf.Dfmc(max_iter=30, n_run=1, init_type='random', random_state=0)

    for i, (train_idx, test_idx) in enumerate(cv):
        ann_mask[:] = False
        ann_mask[test_idx, :] = True
        fusion_graph[gene][go_term][0].mask = ann_mask

        fuser.fuse(fusion_graph)
        pred_ann = fuser.complete(fuser.fusion_graph[gene][go_term][0])[test_idx]
        true_ann = dicty[gene][go_term][0].data[test_idx]
        fold_mse[i] = metrics.mean_squared_error(pred_ann, true_ann)

    print("MSE: %5.4f" % np.mean(fold_mse))
def main():
    # example from https://github.com/marinkaz/scikit-fusion
    import numpy as np
    R12 = np.random.rand(50, 100)
    R32 = np.random.rand(150, 100)
    R33 = np.random.rand(150, 150)
    t1 = fusion.ObjectType('Users', 10)
    t2 = fusion.ObjectType('Movies', 30)
    t3 = fusion.ObjectType('Actors', 40)
    relations = [
        fusion.Relation(R12, t1, t2, name='like'),
        fusion.Relation(R12, t1, t2, name='don\'t like'),
        fusion.Relation(R33, t3, t3, name='married to'),
        fusion.Relation(R32, t3, t2, name='play in')
    ]
    G = fusion.FusionGraph()
    for rel in relations:
        G.add_relation(rel)
    fuser = fusion.Dfmf()
    fuser.fuse(G)
    app = QtGui.QApplication([])
    w = OWLatentFactors()
    w.on_fuser_change(FittedFusionGraph(fuser))
    w.show()
    app.exec()
Ejemplo n.º 5
0
    def commit(self):
        if self.data:
            domain = self.data.domain
            metadata_cols = list(domain.class_vars) + list(domain.metas)
            metadata = [{
                var: var.to_val(value)
                for var, value in zip(metadata_cols, values.list)
            } for values in self.data[:, metadata_cols]]

            if self.transpose:
                relation = fusion.Relation(
                    self.data.X.T,
                    name=self.relation_name,
                    row_type=fusion.ObjectType(self.col_type or 'Unknown'),
                    row_names=self.col_names,
                    col_type=fusion.ObjectType(self.row_type or 'Unknown'),
                    col_names=self.row_names,
                    col_metadata=metadata)
            else:
                relation = fusion.Relation(
                    self.data.X,
                    name=self.relation_name,
                    row_type=fusion.ObjectType(self.row_type or 'Unknown'),
                    row_names=self.row_names,
                    row_metadata=metadata,
                    col_type=fusion.ObjectType(self.col_type or 'Unknown'),
                    col_names=self.col_names,
                )
            self.Outputs.relation.send(Relation(relation))
    def factorization(self):
        """
        Matrix factorization, saves predictions to self.predictions and mask to self.mask
        """
        print('\nDfmf')
        selected_features = self.selected_features
        mask = self.split_train_test(self.users_ratings, 0.2)

        R12 = self.users_ratings
        R23 = selected_features
        R14 = self.users

        t1 = fusion.ObjectType('Type 1', 10)
        t2 = fusion.ObjectType('Type 2', 10)
        t3 = fusion.ObjectType('Type 3', 10)
        t4 = fusion.ObjectType('UserData', 10)

        relations = [
            fusion.Relation(R12, t1, t2, name='User ratings'),
            fusion.Relation(R23, t2, t3, name='Images'),
            fusion.Relation(R14, t1, t4, name='Users')
        ]
        fusion_graph = fusion.FusionGraph()
        fusion_graph.add_relations_from(relations)

        fuser = fusion.Dfmf(init_type="random_vcol")
        fusion_graph['User ratings'].mask = mask
        dfmf_mod = fuser.fuse(fusion_graph)

        R12_pred = dfmf_mod.complete(fusion_graph['User ratings'])

        self.predictions = R12_pred
        self.mask = mask
        self.true_values = R12
Ejemplo n.º 7
0
def mf(train_idx, test_idx, term_idx):
    ann = dicty[gene][go_term][0].data.copy()
    ann[test_idx, :] = 0
    relations = [
        skf.Relation(ann, gene, go_term),
        skf.Relation(dicty[gene][exp_cond][0].data, gene, exp_cond),
        skf.Relation(dicty[gene][gene][0].data, gene, gene)
    ]
    fusion_graph = skf.FusionGraph(relations)

    fuser = skf.Dfmf(max_iter=10,
                     n_run=1,
                     init_type="random_vcol",
                     random_state=0)

    p = 0.7
    gene.rank = p * dicty[gene][go_term][0].data.shape[0]
    exp_cond.rank = p * dicty[gene][exp_cond][0].data.shape[1]
    go_term.rank = p * dicty[gene][go_term][0].data.shape[1]
    fuser.fuse(fusion_graph)
    X = fuser.complete(fusion_graph[gene][exp_cond][0])

    X_train = X[train_idx, :]
    y_train = dicty[gene][go_term][0].data[train_idx, term_idx]
    clf = ensemble.RandomForestClassifier(n_estimators=200)
    clf.fit(X_train, y_train)
    X_new = X[test_idx, :]
    y_pred = clf.predict_proba(X_new)[:, 1]
    return y_pred
Ejemplo n.º 8
0
def main():
    from sklearn.datasets import make_blobs
    import numpy as np
    from AnyQt.QtWidgets import QApplication
    from orangecontrib.datafusion.models import FittedFusionGraph
    from orangecontrib.datafusion.widgets.owmeanfuser import MeanFuser
    X, y = make_blobs(100,
                      3,
                      centers=2,
                      center_box=(-100, 100),
                      cluster_std=10)
    X = X.astype(int)
    X += abs(X.min())

    nrows, ncols, _ = X.max(0)
    R1 = np.zeros((nrows + 1, ncols + 1))
    R1[X[:, 0], X[:, 1]] = X[:, 2]
    R1 = np.ma.array((R1 - R1.min()) / (R1.max() - R1.min()))

    _, ncols, nrows = X.max(0)
    R2 = np.zeros((nrows + 1, ncols + 1))
    R2[X[:, 2], X[:, 1]] = X[:, 0]
    R2 = np.ma.array((R2 - R2.min()) / (R2.max() - R2.min()))

    t1 = fusion.ObjectType('Users', 10)
    t2 = fusion.ObjectType('Movies', 30)
    t3 = fusion.ObjectType('Actors', 40)
    relations = [
        fusion.Relation(R1, t1, t2, name='like'),
        fusion.Relation(R2, t3, t2, name='feature in'),
    ]
    G = fusion.FusionGraph()
    for relation in relations:
        relation.data.mask = np.random.rand(*relation.data.shape) > .8
        G.add_relation(relation)
    fuserF = fusion.Dfmf()
    fuserF.fuse(G)

    from copy import deepcopy
    G = deepcopy(G)
    fuserC = fusion.Dfmc()
    fuserC.name = 'My dfmc<3'
    fuserC.fuse(G)

    app = QApplication([])
    w = OWCompletionScoring()
    w.on_fuser_change(FittedFusionGraph(fuserF), fuserF.__class__.__name__)
    w.on_fuser_change(FittedFusionGraph(fuserC), fuserC.__class__.__name__)
    w.on_fuser_change(MeanFuser(0), 'meanfuser0')
    w.on_fuser_change(MeanFuser(1), 'meanfuser1')
    w.on_fuser_change(MeanFuser(2), 'meanfuser2')
    for i, relation in enumerate(relations, 1):
        w.on_relation_change(Relation(relation), i)
    w.show()
    app.exec()
Ejemplo n.º 9
0
def transform(fuser, test_idx):
    relations = [
        skf.Relation(dicty[gene][exp_cond][0].data[test_idx, :], gene,
                     exp_cond),
        skf.Relation(dicty[gene][gene][0].data[test_idx, :][:, test_idx], gene,
                     gene)
    ]
    fusion_graph = skf.FusionGraph(relations)
    transformer = skf.DfmfTransform(max_iter=50, init_type="random_vcol")
    transformer.transform(gene, fusion_graph, fuser)
    return transformer
Ejemplo n.º 10
0
def fuse(train_idx):
    relations = [
        skf.Relation(dicty[gene][go_term][0].data[train_idx, :], gene,
                     go_term),
        skf.Relation(dicty[gene][exp_cond][0].data[train_idx, :], gene,
                     exp_cond),
        skf.Relation(dicty[gene][gene][0].data[train_idx, :][:, train_idx],
                     gene, gene)
    ]
    fusion_graph = skf.FusionGraph(relations)

    fuser = skf.Dfmf(max_iter=50, init_type="random_vcol")
    fuser.fuse(fusion_graph)
    return fuser, fusion_graph
Ejemplo n.º 11
0
    def send_output(self):
        if self.movies is not None:
            movie_actor_mat, actors = movielens.movie_concept_matrix(self.movies, concept="actor",
                                                                     actors=self.percent)
            actor_actor_mat = movielens.actor_matrix(movie_actor_mat)

            movies_actors = fusion.Relation(movie_actor_mat.T, name='play in',
                                            row_type=movielens.ObjectType.Actors, row_names=actors,
                                            col_type=movielens.ObjectType.Movies, col_names=self.movies)
            self.Outputs.movie_actors.send(Relation(movies_actors))

            actors_actors = fusion.Relation(actor_actor_mat, name='costar with',
                                            row_type=movielens.ObjectType.Actors, row_names=actors,
                                            col_type=movielens.ObjectType.Actors, col_names=actors)
            self.Outputs.actors_actors.send(Relation(actors_actors))
Ejemplo n.º 12
0
 def create(cls, data, row_type, col_type, graph=None):
     row_names = row_metadata = col_names = col_metadata = None
     if row_type:
         if graph:
             row_names = graph.get_names(row_type)
             row_metadata = graph.get_metadata(row_type)
             if not any(row_metadata):
                 row_metadata = None
     else:
         row_type = next(GENERATE_OTYPE)
     if col_type:
         if graph:
             col_names = graph.get_names(col_type)
             col_metadata = graph.get_metadata(row_type)
             if not any(col_metadata):
                 col_metadata = None
     else:
         col_type = next(GENERATE_OTYPE), None
     return Relation(
         fusion.Relation(data,
                         row_type,
                         col_type,
                         row_names=row_names,
                         row_metadata=row_metadata,
                         col_names=col_names,
                         col_metadata=col_metadata))
 def send_output(self):
     if self.data is not None:
         relation = fusion.Relation(self.matrix,
                                    name=self.relation_name,
                                    row_type=self.row_type,
                                    row_names=self.row_names,
                                    col_type=fusion.ObjectType("Genres"),
                                    col_names=self.genres)
         self.send("Genres", Relation(relation))
Ejemplo n.º 14
0
def transform(fuser, test_idx):
    pubmed_data = pharma[chemical][pmid][0].data[test_idx]
    depositor_data = pharma[chemical][depositor][0].data[test_idx]
    fingerprint_data = pharma[chemical][fingerprint][0].data[test_idx]
    chemical_data = pharma[chemical][chemical][0].data[test_idx, :][:,
                                                                    test_idx]
    relations = [
        skf.Relation(pubmed_data, chemical, pmid),
        skf.Relation(depositor_data, chemical, depositor),
        skf.Relation(fingerprint_data, chemical, fingerprint),
        skf.Relation(chemical_data, chemical, chemical)
    ]
    fusion_graph = skf.FusionGraph(relations)

    transformer = skf.DfmfTransform(max_iter=200,
                                    init_type="random_vcol",
                                    random_state=0)
    transformer.transform(chemical, fusion_graph, fuser)
    return transformer
Ejemplo n.º 15
0
    def factorization(self, cv_results_file):
        """
        Matrix factorization, saves predictions to self.predictions and mask to self.mask

        :param cv_results_file: file for saving cv scores
        """
        print('\nDfmf')
        mask = self.split_train_test(self.users_ratings, 0.2)

        R12 = self.users_ratings

        new_R12 = np.zeros(R12.shape)
        for i in range(R12.shape[0]):
            for j in range(R12.shape[1]):
                if R12[i][j] == 0:
                    new_R12[i][j] = np.NaN
                else:
                    new_R12[i][j] = R12[i][j]
        R12 = new_R12

        # best_p_t1, best_p_t2, best_p_t3, best_p_t4
        best_p_t1 = 100
        best_p_t2 = 100

        t = [6, 7, 8]
        parameters = [10, 50, 100, 200, 400]
        k = 3
        #best_p_t1, best_p_t2, best_p_t3, best_p_t4, t = 70, 70, 8, 10, 6
        #print(self.cross_validation(k, parameters, t, mask, R12, cv_results_file))
        best_p_t1, best_p_t2, best_t = self.cross_validation(
            k, parameters, t, mask, R12, cv_results_file)
        print(str(best_p_t1) + ' ' + str(best_p_t2) + ' ' + str(best_t) + '\n')
        self.t = best_t

        # Predictions
        t1 = fusion.ObjectType('Type 1', best_p_t1)
        t2 = fusion.ObjectType('Type 2', best_p_t2)

        relations = [fusion.Relation(R12, t1, t2, name='Ratings')]
        fusion_graph = fusion.FusionGraph()
        fusion_graph.add_relations_from(relations)

        fuser = fusion.Dfmf(init_type="random_vcol")
        fusion_graph['Ratings'].mask = mask.astype('bool')
        dfmf_mod = fuser.fuse(fusion_graph)

        R12_pred = dfmf_mod.complete(fusion_graph['Ratings'])

        self.predictions = R12_pred
        self.mask = mask
        self.true_values = R12
Ejemplo n.º 16
0
def main():
    # example from https://github.com/marinkaz/scikit-fusion
    import numpy as np
    from AnyQt.QtWidgets import QApplication
    R12 = np.random.rand(50, 100)
    R22 = np.random.rand(100, 100)
    R13 = np.random.rand(50, 40)
    R31 = np.random.rand(40, 50)
    R23 = np.random.rand(100, 40)
    R23 = np.random.rand(100, 40)
    R24 = np.random.rand(100, 40)
    R34 = np.random.rand(40, 40)
    t1 = fusion.ObjectType('Users', 10)
    t2 = fusion.ObjectType('Actors', 20)
    t3 = fusion.ObjectType('Movies', 30)
    t4 = fusion.ObjectType('Genres', 40)
    relations = [
        fusion.Relation(R12, t1, t2, name='like'),
        fusion.Relation(R13, t1, t3, name='rated'),
        fusion.Relation(R13, t1, t3, name='mated'),
        fusion.Relation(R23, t2, t3, name='play in'),
        fusion.Relation(R31, t3, t1),
        fusion.Relation(R24, t2, t4, name='prefer'),
        fusion.Relation(R34, t3, t4, name='belong to'),
        fusion.Relation(R22, t2, t2, name='married to')
    ]

    app = QApplication(['asdf'])
    w = OWFusionGraph()
    w.show()

    def _add_next_relation(event,
                           id=iter(range(len(relations))),
                           relation=iter(map(Relation, relations))):
        try:
            w.on_relation_change(next(relation), next(id))
        except StopIteration:
            w.killTimer(w.timer_id)
            w.on_relation_change(None, 4)  # Remove relation #4

    w.timerEvent = _add_next_relation
    w.timer_id = w.startTimer(500)
    app.exec()
Ejemplo n.º 17
0
    def send_output(self):
        if self.method == 0:
            matrix, movies, users = movielens.movie_user_matrix(
                percentage=self.percent)
        else:
            try:
                matrix, movies, users = movielens.movie_user_matrix(
                    start_year=self.start, end_year=self.end)
            except ValueError:
                self.error(0, "Invalid starting years")
                self.Outputs.relation.send(None)

        relation = fusion.Relation(matrix.T,
                                   name='rate',
                                   row_type=movielens.ObjectType.Users,
                                   row_names=users,
                                   col_type=movielens.ObjectType.Movies,
                                   col_names=movies)
        self.Outputs.relation.send(Relation(relation))
Ejemplo n.º 18
0
    def send_output(self):
        if self.method == 0:
            matrix, movies, users = movielens.movie_user_matrix(
                percentage=self.percent)
        else:
            try:
                matrix, movies, users = movielens.movie_user_matrix(
                    start_year=self.start, end_year=self.end)
            except ValueError:
                self.error(0, "Invalid starting years")
                self.send("Ratings", None)

        def scale(X):
            return (X - np.nanmin(X)) / (np.nanmax(X) - np.nanmin(X))

        relation = fusion.Relation(matrix.T,
                                   name='rate',
                                   row_type=movielens.ObjectType.Users,
                                   row_names=users,
                                   col_type=movielens.ObjectType.Movies,
                                   col_names=movies,
                                   preprocessor=scale)
        self.send("Ratings", Relation(relation))
Ejemplo n.º 19
0
    def fit(self):
        self.types = dict(
            zip(
                self.nodes.keys(),
                map(lambda x: fusion.ObjectType(*x), self.nodes.items()),
            ))
        print(self.types)

        self.relations = map(
            lambda x: map(
                lambda r: fusion.Relation(r.values, self.types[x[0][0]], self.
                                          types[x[0][1]]),
                x[1],
            ),
            self.relation_definitions.items(),
        )
        self.relations = list(chain(*self.relations))
        print(self.relations)

        self.indices = {}
        for (src, dst), dfs in self.relation_definitions.items():
            if not src in self.indices:
                self.indices[src] = list(dfs[0].index)
            if not dst in self.indices:
                self.indices[dst] = list(dfs[0].columns)

        random.seed(self.random_state)
        np.random.seed(self.random_state)

        self.fusion_graph = fusion.FusionGraph(self.relations)

        self.fuser = fusion.Dfmf(init_type=self.init_type,
                                 random_state=self.random_state,
                                 n_jobs=self.n_jobs)

        self.fuser.fuse(self.fusion_graph)
Ejemplo n.º 20
0
def fuse(train_idx):
    action_data = pharma[chemical][action][0].data[train_idx]
    pubmed_data = pharma[chemical][pmid][0].data[train_idx]
    depositor_data = pharma[chemical][depositor][0].data[train_idx]
    fingerprint_data = pharma[chemical][fingerprint][0].data[train_idx]
    depo_cat_data = pharma[depositor][depo_cat][0].data
    chemical_data = pharma[chemical][chemical][0].data[train_idx, :][:,
                                                                     train_idx]
    relations = [
        skf.Relation(action_data, chemical, action),
        skf.Relation(pubmed_data, chemical, pmid),
        skf.Relation(depositor_data, chemical, depositor),
        skf.Relation(fingerprint_data, chemical, fingerprint),
        skf.Relation(depo_cat_data, depositor, depo_cat),
        skf.Relation(chemical_data, chemical, chemical)
    ]
    fusion_graph = skf.FusionGraph(relations)

    fuser = skf.Dfmf(max_iter=200, init_type="random_vcol", random_state=0)
    fuser.fuse(fusion_graph)
    return fuser
Ejemplo n.º 21
0
    import numpy as np
    from skfusion import fusion
    R12 = np.random.rand(50, 100)
    R22 = np.random.rand(100, 100)
    R13 = np.random.rand(50, 40)
    R31 = np.random.rand(40, 50)
    R23 = np.random.rand(100, 40)
    R23 = np.random.rand(100, 40)
    R24 = np.random.rand(100, 400)
    R34 = np.random.rand(40, 400)
    t1 = fusion.ObjectType('Users', 10)
    t2 = fusion.ObjectType('Actors', 20)
    t3 = fusion.ObjectType('Movies', 30)
    t4 = fusion.ObjectType('Genres', 40)
    relations = [
        fusion.Relation(R12, t1, t2, name='like'),
        fusion.Relation(R13, t1, t3, name='rated'),
        fusion.Relation(R13, t1, t3, name='mated'),
        fusion.Relation(R23, t2, t3, name='play in'),
        fusion.Relation(R31, t3, t1),
        fusion.Relation(R24, t2, t4, name='prefer'),
        fusion.Relation(R34, t3, t4, name='belong to'),
        fusion.Relation(R22, t2, t2, name='married to')
    ]

    for rel in relations:
        widget.addRelation(rel)

    sys.exit(app.exec_())
Ejemplo n.º 22
0
    def cross_validation(self, k, parameters, parameters_t, mask, R12,
                         results_file):
        """
        Makes k masks for cv

        :param k: number of cv masks for each parameter combination
        :param parameters: array of parameters for cross validation
        :param mask: mask for primary test and train set
        :param R12: matrix for dfmf
        :param R23: matrix for dfmf
        :param R14: matrix for dfmf
        :param results_file: file for saving cv scores

        :returns: best_p_t1, best_p_t2, best_p_t3, best_p_t4 (best parameters)
        """
        if path.exists(results_file):
            return self.load_results(results_file)

        cv_masks = self.get_cv_masks(self.users_ratings, mask, k)

        #best_cv_score = math.inf
        best_cv_score = 0
        best_p_t1 = 0
        best_p_t2 = 0
        best_t = 0

        all_p_t1 = []
        all_p_t2 = []
        all_t = []
        all_scores = []
        for p_t1 in parameters:
            for p_t2 in parameters:
                for t in parameters_t:
                    scores = []
                    for current_cv_mask in cv_masks:
                        t1 = fusion.ObjectType('Type 1', p_t1)
                        t2 = fusion.ObjectType('Type 2', p_t2)

                        relations = [
                            fusion.Relation(R12, t1, t2, name='Ratings')
                        ]
                        fusion_graph = fusion.FusionGraph()
                        fusion_graph.add_relations_from(relations)

                        fuser = fusion.Dfmf(init_type="random_vcol")
                        fusion_graph['Ratings'].mask = current_cv_mask
                        dfmf_mod = fuser.fuse(fusion_graph)

                        R12_pred = dfmf_mod.complete(fusion_graph['Ratings'])

                        predictions = R12_pred
                        mask = current_cv_mask
                        true_values = R12

                        ratings_true = []
                        ratings_predicted = []

                        for i in range(predictions.shape[0]):
                            for j in range(predictions.shape[1]):
                                if mask[i][j]:
                                    ratings_true.append(true_values[i][j])
                                    ratings_predicted.append(predictions[i][j])

                        new_ratings_true = []
                        new_ratings_predicted = []
                        for r_true, r_predicted in zip(ratings_true,
                                                       ratings_predicted):
                            if r_true > t:
                                new_ratings_true.append(2)
                            else:
                                new_ratings_true.append(1)
                            if r_predicted > t:
                                new_ratings_predicted.append(2)
                            else:
                                new_ratings_predicted.append(1)
                        ratings_true = new_ratings_true
                        ratings_predicted = new_ratings_predicted

                        ratings_true = np.asarray(ratings_true)
                        ratings_predicted = np.asarray(ratings_predicted)

                        # Rmse
                        score = roc_auc_score(ratings_true, ratings_predicted)
                        #score = rmse(ratings_true, ratings_predicted)
                        #print('\nrmse: ' + str(score))
                        scores.append(score)

                    score = sum(scores) / len(scores)
                    all_p_t1.append(p_t1)
                    all_p_t2.append(p_t2)
                    all_t.append(t)
                    all_scores.append(score)

                    # Save best scores to a variable

                    if score >= best_cv_score:
                        best_cv_score = score
                        best_p_t1 = p_t1
                        best_p_t2 = p_t2
                        best_t = t

        # Save cv scores to a csv file
        data = {
            'p_t1': all_p_t1,
            'p_t2': all_p_t2,
            't': all_t,
            'score': all_scores
        }
        df = pd.DataFrame(data, columns=['p_t1', 'p_t2', 't', 'score'])
        df.to_csv(results_file)

        return best_p_t1, best_p_t2, best_t
    def factorization(self, cv_results_file, use_user_data=True):
        """
        Matrix factorization, saves predictions to self.predictions and mask to self.mask

        :param cv_results_file: file for saving cv scores
        """
        print('\nDfmf')
        selected_features = self.selected_features

        r = []
        for i in range(self.users_ratings.shape[0]):
            for j in range(self.users_ratings.shape[1]):
                if self.users_ratings[i][j] != 0:
                    r.append(self.users_ratings[i][j])
        r.sort()

        mask = self.split_train_test(self.users_ratings, 0.2)

        R12 = self.users_ratings
        R23 = selected_features
        R14 = self.users

        new_R12 = np.zeros(self.users_ratings.shape)
        for i in range(self.users_ratings.shape[0]):
            for j in range(self.users_ratings.shape[1]):
                if self.users_ratings[i][j] == 0:
                    new_R12[i][j] = np.NaN
                else:
                    new_R12[i][j] = self.users_ratings[i][j]

        R12 = new_R12

        if self.z_score:
            R12 = zscore(R12, axis=0, nan_policy='omit')

        # Parameters choice
        #parameters = [2, 4, 6, 8, 10]
        parameters_k1 = [10, 20, 30, 40, 50, 60, 70]
        parameters_k2 = [10, 20, 30, 40, 50, 60, 70]
        parameters_k3 = [2, 4, 6, 8, 10, 12]
        parameters_k4 = [2, 4, 6, 8, 10, 12]
        t = [4, 5, 6, 7, 8]
        parameters_k1 = [60, 70]
        parameters_k2 = [60, 70]
        parameters_k3 = [8, 10]
        parameters_k4 = [8, 10]
        t = [6, 7]
        k = 3
        #best_p_t1, best_p_t2, best_p_t3, best_p_t4, t = 70, 70, 8, 10, 6
        best_p_t1, best_p_t2, best_p_t3, best_p_t4, best_t = self.cross_validation(
            k, parameters_k1, parameters_k2, parameters_k3, parameters_k4, t,
            mask, R12, R23, R14, cv_results_file)
        print(
            str(best_p_t1) + ' ' + str(best_p_t2) + ' ' + str(best_p_t3) +
            ' ' + str(best_p_t4) + ' ' + str(best_t) + '\n')

        # Save best threshold for positive and negative class
        self.t = best_t

        # Predictions
        t1 = fusion.ObjectType('Type 1', best_p_t1)
        t2 = fusion.ObjectType('Type 2', best_p_t2)
        t3 = fusion.ObjectType('Type 3', best_p_t3)
        t4 = fusion.ObjectType('UserData', best_p_t4)

        if use_user_data:
            relations = [
                fusion.Relation(R12, t1, t2, name='Ratings'),
                fusion.Relation(R23, t2, t3, name='Images'),
                fusion.Relation(R14, t1, t4, name='Users')
            ]
        else:
            relations = [
                fusion.Relation(R12, t1, t2, name='Ratings'),
                fusion.Relation(R23, t2, t3, name='Images')
            ]

        fusion_graph = fusion.FusionGraph()
        fusion_graph.add_relations_from(relations)

        fuser = fusion.Dfmf(init_type="random_vcol")
        fusion_graph['Ratings'].mask = mask
        dfmf_mod = fuser.fuse(fusion_graph)

        R12_pred = dfmf_mod.complete(fusion_graph['Ratings'])

        self.predictions = R12_pred
        self.mask = mask
        self.true_values = R12
    def cross_validation(self, k, parameters_k1, parameters_k2, parameters_k3,
                         parameters_k4, parameters_t, mask, R12, R23, R14,
                         results_file):
        """
        Makes k masks for cv

        :param k: number of cv masks for each parameter combination
        :param parameters: array of parameters for cross validation
        :param mask: mask for primary test and train set
        :param R12: matrix for dfmf
        :param R23: matrix for dfmf
        :param R14: matrix for dfmf
        :param results_file: file for saving cv scores

        :returns: best_p_t1, best_p_t2, best_p_t3, best_p_t4 (best parameters)
        """
        print('\nCross validation\n')

        if path.exists(results_file):
            p1, p2, p3, p4 = self.load_results(results_file)
            return p1, p2, p3, p4, 7

        cv_masks = self.get_cv_masks(self.users_ratings, mask, k)

        new_R12 = np.zeros(self.users_ratings.shape)
        for i in range(self.users_ratings.shape[0]):
            for j in range(self.users_ratings.shape[1]):
                if self.users_ratings[i][j] == 0:
                    new_R12[i][j] = np.NaN
                else:
                    new_R12[i][j] = self.users_ratings[i][j]

        R12 = new_R12

        if self.z_score:
            R12 = zscore(R12, axis=0)

        best_cv_score = 0
        best_p_t1 = 0
        best_p_t2 = 0
        best_p_t3 = 0
        best_p_t4 = 0
        best_t = 0

        all_p_t1 = []
        all_p_t2 = []
        all_p_t3 = []
        all_p_t4 = []
        all_t = []
        all_scores = []
        all_scores_rmse = []
        for p_t1 in parameters_k1:
            for p_t2 in parameters_k2:
                for p_t3 in parameters_k3:
                    for p_t4 in parameters_k4:
                        for t in parameters_t:
                            scores = []
                            scores_rmse = []
                            for current_cv_mask in cv_masks:
                                t1 = fusion.ObjectType('Type 1', p_t1)
                                t2 = fusion.ObjectType('Type 2', p_t2)
                                t3 = fusion.ObjectType('Type 3', p_t3)
                                t4 = fusion.ObjectType('UserData', p_t4)

                                relations = [
                                    fusion.Relation(R12,
                                                    t1,
                                                    t2,
                                                    name='Ratings'),
                                    fusion.Relation(R23, t2, t3,
                                                    name='Images'),
                                    fusion.Relation(R14, t1, t4, name='Users')
                                ]
                                fusion_graph = fusion.FusionGraph()
                                fusion_graph.add_relations_from(relations)

                                fuser = fusion.Dfmf(init_type="random_vcol")
                                fusion_graph['Ratings'].mask = current_cv_mask
                                dfmf_mod = fuser.fuse(fusion_graph)

                                R12_pred = dfmf_mod.complete(
                                    fusion_graph['Ratings'])

                                predictions = R12_pred
                                mask = current_cv_mask
                                true_values = R12

                                if self.z_score:
                                    new_predictions = np.zeros(
                                        predictions.shape)
                                    for i in range(predictions.shape[0]):
                                        for j in range(predictions.shape[1]):
                                            if predictions[i][j] == 0:
                                                new_predictions[i][j] = np.NaN
                                            else:
                                                new_predictions[i][
                                                    j] = predictions[i][j]

                                    a = np.asanyarray(new_predictions)
                                    mns = np.nanmean(a=a,
                                                     axis=0,
                                                     keepdims=True)
                                    sstd = np.nanstd(a=a,
                                                     axis=0,
                                                     keepdims=True)

                                    predictions = (a * sstd) + mns

                                ratings_true = []
                                ratings_predicted = []

                                for i in range(predictions.shape[0]):
                                    for j in range(predictions.shape[1]):
                                        if mask[i][j]:
                                            ratings_true.append(
                                                true_values[i][j])
                                            ratings_predicted.append(
                                                predictions[i][j])

                                new_ratings_true = []
                                new_ratings_predicted = []
                                for r_true, r_predicted in zip(
                                        ratings_true, ratings_predicted):
                                    if r_true > t:
                                        new_ratings_true.append(2)
                                    else:
                                        new_ratings_true.append(1)
                                    if r_predicted > t:
                                        new_ratings_predicted.append(2)
                                    else:
                                        new_ratings_predicted.append(1)
                                ratings_true = new_ratings_true
                                ratings_predicted = new_ratings_predicted

                                ratings_true = np.asarray(ratings_true)
                                ratings_predicted = np.asarray(
                                    ratings_predicted)

                                # Score
                                score = roc_auc_score(ratings_true,
                                                      ratings_predicted)
                                score_rmse = rmse(ratings_true,
                                                  ratings_predicted)
                                scores.append(score)
                                scores_rmse.append(score_rmse)

                            score = sum(scores) / len(scores)
                            score_rmse = sum(scores_rmse) / len(scores_rmse)
                            all_p_t1.append(p_t1)
                            all_p_t2.append(p_t2)
                            all_p_t3.append(p_t3)
                            all_p_t4.append(p_t4)
                            all_t.append(t)
                            all_scores.append(score)
                            all_scores_rmse.append(score_rmse)

                            # Save best scores to a variable

                            if score >= best_cv_score:
                                best_cv_score = score
                                best_p_t1 = p_t1
                                best_p_t2 = p_t2
                                best_p_t3 = p_t3
                                best_p_t4 = p_t4
                                best_t = t

        # Save cv scores to a csv file
        data = {
            'p_t1': all_p_t1,
            'p_t2': all_p_t2,
            'p_t3': all_p_t3,
            'p_t4': all_p_t4,
            't': all_t,
            'score': all_scores,
            'rmse': all_scores_rmse
        }
        df = pd.DataFrame(
            data,
            columns=['p_t1', 'p_t2', 'p_t3', 'p_t4', 't', 'score', 'rmse'])
        df.to_csv(results_file)

        return best_p_t1, best_p_t2, best_p_t3, best_p_t4, best_t
Ejemplo n.º 25
0
        for line in fin:
            try:
               yield line.split(delimiter, 1)[1]
            except IndexError:
               continue



t1 = fusion.ObjectType('Type 1', nfactors)
tdata = [ fusion.ObjectType(dataset, nfactors) for dataset in datasets ]
relations = []
for i in range(len(datasets)):
    relations.append(
        fusion.Relation(
            np.transpose(
                np.loadtxt( strip_first_col(os.path.join(source_folder, datasets[i])), delimiter=sep, skiprows=1)
            ), 
            t1, tdata[i]
        )
    )

fusion_graph = fusion.FusionGraph()
fusion_graph.add_relations_from(relations)
print(fusion_graph)

fuser = fusion.Dfmf()
fuser.fuse(fusion_graph)


np.savetxt(os.path.join(output_folder,"signals.txt"), fuser.factor(t1), delimiter='\t')
for i in range(len(datasets)):
    np.savetxt(os.path.join(output_folder, "proj%s" %datasets[i]), fuser.factor(tdata[i]), delimiter='\t')
    def factorization(self, cv_results_file, use_user_data=True):
        """
        Matrix factorization, saves predictions to self.predictions and mask to self.mask

        :param cv_results_file: file for saving cv scores
        """
        print('\nDfmf')
        selected_features = self.selected_features

        r = []
        for i in range(self.users_ratings.shape[0]):
            for j in range(self.users_ratings.shape[1]):
                if self.users_ratings[i][j] != 0:
                    r.append(self.users_ratings[i][j])
        r.sort()
        t = r[round(len(r)/2)]
        self.t = t

        mask = self.split_train_test(self.users_ratings, 0.2)

        R12 = self.users_ratings
        R23 = selected_features
        R14 = self.users

        new_R12 = np.zeros(self.users_ratings.shape)
        for i in range(self.users_ratings.shape[0]):
            for j in range(self.users_ratings.shape[1]):
                if self.users_ratings[i][j] == 0:
                    new_R12[i][j] = np.NaN
                else:
                    new_R12[i][j] = self.users_ratings[i][j]

        R12 = new_R12

        if self.z_score:
            R12 = zscore(R12, axis=0)

        # Parameters choice
        print('\nParameters\n')
        #parameters = [2, 4, 6, 8, 10]
        parameters = [2, 4, 6, 8, 10, 12, 14, 16, 18]
        k = 3
        best_p_t1, best_p_t2, best_p_t3, best_p_t4 = self.cross_validation(k, parameters, mask, R12, R23, R14, cv_results_file)
        print(str(best_p_t1) + ' ' + str(best_p_t2) + ' ' + str(best_p_t3) + ' ' + str(best_p_t4) + '\n')

        # Predictions
        t1 = fusion.ObjectType('Type 1', best_p_t1)
        t2 = fusion.ObjectType('Type 2', best_p_t2)
        t3 = fusion.ObjectType('Type 3', best_p_t3)
        t4 = fusion.ObjectType('UserData', best_p_t4)

        if use_user_data:
            relations = [fusion.Relation(R12, t1, t2, name='Ratings'),
                         fusion.Relation(R23, t2, t3, name='Images'),
                         fusion.Relation(R14, t1, t4, name='Users')]
        else:
            relations = [fusion.Relation(R12, t1, t2, name='Ratings'),
                         fusion.Relation(R23, t2, t3, name='Images')]

        fusion_graph = fusion.FusionGraph()
        fusion_graph.add_relations_from(relations)

        fuser = fusion.Dfmf(init_type="random_vcol")
        fusion_graph['Ratings'].mask = mask
        dfmf_mod = fuser.fuse(fusion_graph)

        R12_pred = dfmf_mod.complete(fusion_graph['Ratings'])

        self.predictions = R12_pred
        self.mask = mask
        self.true_values = R12
Ejemplo n.º 27
0
import numpy as np
from skfusion import fusion
from skfusion import datasets

R12 = np.random.rand(50, 100)
R13 = np.random.rand(50, 40)
R23 = np.random.rand(100, 40)

t1 = fusion.ObjectType('Type 1', 10)
t2 = fusion.ObjectType('Type 2', 20)
t3 = fusion.ObjectType('Type 3', 30)
relations = [fusion.Relation(R12, t1, t2),
                 fusion.Relation(R13, t1, t3),
                 fusion.Relation(R23, t2, t3)]
fusion_graph = fusion.FusionGraph()
fusion_graph.add_relations_from(relations)
fuser = fusion.Dfmf()
fuser.fuse(fusion_graph)
print(fuser.factor(t1).shape)


new_R12 = np.random.rand(10, 100)
new_R13 = np.random.rand(10, 40)
new_relations = [fusion.Relation(new_R12, t1, t2),
                     fusion.Relation(new_R13, t1, t3)]
new_graph = fusion.FusionGraph(new_relations)
transformer = fusion.DfmfTransform()
transformer.transform(t1, new_graph, fuser)
print(transformer.factor(t1).shape)