def __init__(self,
              data_file_name=None,
              dataset_type=IRIS,
              algorithm_mode=DIRECT):
     self.data_file_name = data_file_name if data_file_name else "dataset/iris.csv"
     self.simplex_tree = None
     self.algorithm_mode = algorithm_mode
     self.dataset_handler = DatasetHandler(dataset_type, 4)
     self.classifier_evaluator = None
     self.filtrations = None
     self.simplex_tree = None
     self.complex = None
     self.memory = None
Ejemplo n.º 2
0
 def __init__(self):
     self.dh = DatasetHandler()
     self._utility_mat = None
class TDABasedClassifier:
    def __init__(self,
                 data_file_name=None,
                 dataset_type=IRIS,
                 algorithm_mode=DIRECT):
        self.data_file_name = data_file_name if data_file_name else "dataset/iris.csv"
        self.simplex_tree = None
        self.algorithm_mode = algorithm_mode
        self.dataset_handler = DatasetHandler(dataset_type, 4)
        self.classifier_evaluator = None
        self.filtrations = None
        self.simplex_tree = None
        self.complex = None
        self.memory = None

    def init_data(self):
        self.dataset_handler.load_dataset()

    def split_dataset(self, k=None, j=None):
        self.dataset_handler.split_dataset(k, fold_position=j)

    def unify_dataset(self):
        return self.dataset_handler.unify_dataset()

    def destroy(self):
        if self.filtrations:
            del self.filtrations
            self.filtrations = None
        if self.simplex_tree:
            del self.simplex_tree
        self.simplex_tree = None
        if self.complex:
            del self.complex
        self.complex = None

        self.dataset_handler.clean()

    '''
    get_link calcula el link(sigma) debido a que gudhi no computa esta funcion
    '''

    def get_link(self, sigma):
        """
        as gudhi SimplexTree dont have link method
        :param sigma:
        :return:
        """

        if self.simplex_tree is None:
            return set()

        link = set()

        if not (type(sigma) == list or type(sigma) == tuple):
            sigma = [sigma]
        try:
            size = len(sigma)
            _star = self.simplex_tree.get_star(sigma)

            for simplex, _ in _star:  # _ is the filtration value, its not necessary here
                # if len(sigma)-size == 1:
                simplex = set(simplex).difference(sigma)
                link = link.union(simplex)

            del _star
        except BaseException as e:
            print("ERROR en get_lik: {0}".format(e))

        print("link({0}) = {1}".format(sigma, link))
        return link

    '''
    Psi es la funcion de asignacion que hace corresponder un conjunto de etiquetas t \in P(T) a cada simplice sigma \in K
    '''

    def Psi(self, sigma):
        if sigma is None:
            return []
        if not type(sigma) == list or not type(sigma) == tuple:
            sigma_key = str([sigma])
        else:
            sigma_key = str(sigma)

        if sigma_key in self.dataset_handler.tags_training:
            t = self.dataset_handler.tags_training[sigma_key]
            return t if type(t) in [list, tuple, dict, np.ndarray] else [
                t
            ]  # then t \neq None this may occure when ksimplex \in S,
            # or the computation was completed before

        card = self.Card(sigma)  # here we need to compute associations
        self.dataset_handler.tags_training.update({sigma_key: []})

        result = []
        if card == 1:  # then ksimplex \in X and t = None
            link = self.get_link(sigma)

            for tau in link:
                psi_val = self.Psi(tau)
                result.extend(psi_val)
        else:
            for tau in sigma:
                psi_val = self.Psi(tau)
                result.extend(psi_val)

        self.dataset_handler.tags_training.update({sigma_key: result})
        return result

    def Card(self, sigma):
        return len(sigma) if type(sigma) == list or type(sigma) == tuple else 1

    '''
    La funcion Gamma retorna un vector V, donde cada elemento 
    v_i \in V representa la cantidad de apariciones (o votos) obtenidos por la etiqueta 
    t_i \in T durante el calculo de Psi(\sigma).  
    '''

    def Gamma(self, sigma):
        card = self.Card(sigma)

        size_tags = len(self.dataset_handler.tags_set)
        V = [0] * size_tags
        if card == 1:
            _tags = self.Psi(sigma)
            for t in _tags:  # como Psi(sigma) devuelve un set lo expando.
                _idx = self.G2(t)
                if _idx > -1:
                    V[_idx] += 1

        elif card > 1:
            for tau in sigma:
                V = list(map(sum, zip(V, self.Gamma(tau))))

        return V

    #Upsilon asigna a sigma la etiqueta con mayor cantidad de votos
    def Upsilon(self, sigma):
        V = self.Gamma(sigma)
        i = self.M(V)

        return self.G(i)

    # G es una funcion que dado un entero i devuelve la etiqueta
    # que ocupa la posicion i asumiento algun orden lexicografico sobre T
    def G(self, idx):
        if idx is None or idx >= len(self.dataset_handler.tags_set) or idx < 0:
            return None
        '''
        Naive code:
        
        for _idx, t in enumerate(self.tags_set):
            if idx == _idx:
                return t
        But if we convert the set in a list we can index it and return
        '''

        return list(self.dataset_handler.tags_set)[idx]

    def G2(self, tag):
        if tag not in self.dataset_handler.tags_position:
            return -1

        return self.dataset_handler.tags_position[tag]

    # M es una función que dado un vector V ∈ R^{|T|} devuelve un entero 0 <= i <= |T|,
    # donde i es la posicion de la componente de V con valor máximo
    def M(self, vector):
        size = len(vector)
        if size < 1:
            return 0

        major = vector[0]
        pos = 0
        for idx, element in enumerate(vector):
            if major < element:
                pos = idx
                major = element

        del major
        return pos

    # I es una función que dado una condicion retorna 1 si es verdadera y cero en otro caso
    def I(self, condition):
        return 1 if condition else 0

    def build_filtered_simplicial_complex(self):
        S = self.unify_dataset()

        # self.complex = gudhi.AlphaComplex(points=S)
        self.complex = gudhi.RipsComplex(points=S, max_edge_length=8.0)

        self.simplex_tree = self.complex.create_simplex_tree(max_dimension=3)
        # self.simplex_tree = self.complex.create_simplex_tree(max_alpha_square=2)
        # self.simplex_tree = self.complex.create_simplex_tree(max_dimension=3)
        # del self.complex
        # self.complex = None

        # self.simplex_tree.initialize_filtration()

        # diag = self.simplex_tree.persistence()

        # return diag

    def get_desired_persistence_interval2(self, choice=MAXIMAL):
        dimension = self.simplex_tree.dimension()
        print("\nDIMENSION := {0}\n".format(dimension))
        dimension -= 1
        pintervals = []
        while len(pintervals) == 0 and dimension > -1:
            pintervals = self.simplex_tree.persistence_intervals_in_dimension(
                dimension)
            dimension -= 1

        # get maximal persistence filtration
        if len(pintervals) == 0:
            return None

        intervals_count = len(pintervals)
        if choice == MAXIMAL:
            major = pintervals[0][1] - pintervals[0][0]
            desired_pos = 0
            for idx, interv in enumerate(pintervals):
                i = interv[1] - interv[0]
                if major < i and not math.isinf(i):
                    major = i
                    desired_pos = idx

            print("el mayor es ", major)
        elif choice == RANDOMIZED:  # get randomized persistence filtration
            desired_pos = random.randint(int(intervals_count / 2),
                                         intervals_count -
                                         1)  # to maximize posibilities
            # desired_pos = random.randint(int(intervals_count/2), intervals_count-1) # to maximize posibilities

            print("\nLa duracion de vida seleccionado aleatoriamente es {0}\n".
                  format(pintervals[desired_pos][1] -
                         pintervals[desired_pos][0]))
        else:  # get average persistence filtration
            Avg = 0
            for interv in pintervals:
                Avg += interv[1] - interv[0]
            Avg /= intervals_count
            desired_pos = 0
            min_d = math.fabs((pintervals[0][1] - pintervals[0][0]) - Avg)
            for idx, interv in enumerate(pintervals):
                i = math.fabs((interv[1] - interv[0]) - Avg)
                if min_d > i and not math.isinf(i):
                    min_d = i
                    desired_pos = idx

        print("el intervalo de persistencia elegido es ",
              pintervals[desired_pos])
        inter = pintervals[desired_pos]
        del pintervals
        return inter

    def get_desired_persistence_interval(self, choice=MAXIMAL):
        dimension = self.simplex_tree.dimension()
        print("\nDIMENSION := {0}\n".format(dimension))
        dimension -= 1
        pintervals = []
        while len(pintervals) == 0 and dimension > -1:
            pintervals = self.simplex_tree.persistence_intervals_in_dimension(
                dimension)
            dimension -= 1

        # get maximal persistence filtration
        if len(pintervals) == 0:
            return None

        major = pintervals[0][1] - pintervals[0][
            0]  # compute the persistent-interval with maximal lifetime
        desired_pos = 0
        for idx, interv in enumerate(pintervals):
            i = interv[1] - interv[0]
            if major < i and not math.isinf(i):
                major = i
                desired_pos = idx

        print("el mayor es ", major)

        if choice == MAXIMAL:
            return pintervals[desired_pos]
        else:
            high_lifetimes_pi = [
            ]  # We seek for all persistent-intervals which birth is greater than the birth of the maximal persistent interval
            max_pi = pintervals[desired_pos]
            lifetime = max_pi[1] - max_pi[0]
            for idx, interv in enumerate(pintervals):
                if interv[0] >= max_pi[0] and lifetime < (interv[1] -
                                                          interv[0]) * 1.5:
                    high_lifetimes_pi.append(interv)

            intervals_count = len(pintervals)
            init = 0
            if len(high_lifetimes_pi) == 1:
                high_lifetimes_pi = pintervals
                init = int(intervals_count / 2)

            intervals_count = len(high_lifetimes_pi)

            if choice == RANDOMIZED:  # get randomized persistence filtration
                desired_pos = random.randint(init, intervals_count -
                                             1)  # to maximize posibilities

                print(
                    "\nLa duracion de vida seleccionado aleatoriamente es {0}\n"
                    .format(high_lifetimes_pi[desired_pos][1] -
                            high_lifetimes_pi[desired_pos][0]))
                return high_lifetimes_pi[desired_pos]
            else:  # get average persistence filtration
                Avg = 0
                for interv in high_lifetimes_pi:
                    Avg += interv[1] - interv[0]
                if intervals_count > 0:
                    Avg /= intervals_count
                else:
                    return None

                desired_pos = 0
                min_d = math.fabs(
                    (high_lifetimes_pi[0][1] - high_lifetimes_pi[0][0]) - Avg
                )  # we get the first persistent-interval superior tu average
                for idx, interv in enumerate(pintervals):
                    i = math.fabs((interv[1] - interv[0]) - Avg)
                    if min_d > i and not math.isinf(i):
                        min_d = i
                        desired_pos = idx

        print("el intervalo de persistencia elegido es ",
              high_lifetimes_pi[desired_pos])

        # return pintervals, desired_pos
        return high_lifetimes_pi[desired_pos]

    def execute(self):

        self.init_data()
        persistence_selector = {
            RANDOMIZED: "RANDOMIZED",
            MAXIMAL: "MAXIMAL",
            AVERAGE: "AVERAGE"
        }

        all_data = []
        size_data = len(self.dataset_handler.dataset)

        for selector in persistence_selector:

            self.classifier_evaluator = ClassifierEvaluator(
                "TDABC_{0}".format(persistence_selector[selector]),
                classes=self.dataset_handler.tags_set)
            self.knn_classifier_evaluator = ClassifierEvaluator(
                "kNN_{0}".format(persistence_selector[selector]),
                classes=self.dataset_handler.tags_set)
            for k in [5, 10, 15, 20, 25]:
                print("\n#####################################")
                print("\n#####################################")
                print("\nEXECUTING REPEATED CROSS VALIDATION")
                folds = int((size_data + k - 1) / k)
                for j in range(folds):
                    print("\nEXECUTE K-FOLD k={0}, n={1}".format(k, j))
                    self.split_dataset(k, j)

                    diag = self.build_filtered_simplicial_complex(
                    )  # to compute simplicial complex and filtrations
                    print("persistence diagrams: ", diag)

                    persistence_interval = self.get_desired_persistence_interval(
                        choice=selector)

                    if persistence_interval is None:  # we ignore the process
                        self.destroy()
                        print(
                            "we destroy all simlicial complex information because we couldnt find any persistent interval"
                        )
                        continue

                    self.simplex_tree.prune_above_filtration(
                        persistence_interval[0])

                    predicted_values = []
                    real_values = []
                    elems = []
                    ttags = [
                        self.dataset_handler.tags_position[
                            self.dataset_handler.tags_training[i]]
                        for i in self.dataset_handler.tags_training
                    ]
                    ttraining = [e for e in self.dataset_handler.training]

                    for idx, x0 in self.dataset_handler.test:
                        idx_key = str([idx])
                        value = self.Upsilon(idx)
                        elems.append(x0)

                        predicted_values.append(value)
                        real_values.append(
                            self.dataset_handler.tags_test[idx_key])

                    acc = accuracy_score(real_values, predicted_values) * 100

                    self.classifier_evaluator.add_metrics(
                        real_values, predicted_values)

                    self.destroy()
                    knn = kNNClassifier(ttraining, ttags)
                    in_values = knn.execute(elems)
                    all_data.extend(elems)
                    acc_knn = "None"
                    if len(in_values) > 0:
                        predicted_values2 = [self.G(i) for i in in_values]
                        self.knn_classifier_evaluator.add_metrics(
                            real_values, predicted_values2)
                        acc_knn = accuracy_score(real_values,
                                                 predicted_values2) * 100

                    print("\nTDABC accuracy = {0}".format(acc))
                    print("\nKNN accuracy = {0}".format(acc_knn))

            self.classifier_evaluator.plot_all()
            self.knn_classifier_evaluator.plot_all()

        plt.show()

    def draw_simplex_tree(self):
        path = "./docs/SIMPLEX_TREES"
        file_name = time.strftime(
            "./docs/SIMPLEX_TREES/simplex_tree_%y.%m.%d__%H.%M.%S.txt")
        if not os.path.exists(path):
            os.makedirs(path)

        simplex_tree_file = open(file_name, "w")

        filtrations = self.simplex_tree.get_filtration()

        fmt = "%s:(%s):%.2f"
        points = self.unify_dataset()
        for filtered_value in filtrations:
            qsimplex = str(filtered_value[0])
            filt = filtered_value[1]
            point = ""

            inner_simplex = qsimplex[1:-1]
            if inner_simplex.find(",") == -1:
                point = points[int(inner_simplex)]

            line = fmt % tuple((qsimplex, point, filt))
            print(line)
            simplex_tree_file.write(str(line) + "\n")
class DatasetPlotter:
    def __init__(self, data_mgr=None):
        self.data = data_mgr \

        if data_mgr is None:
            self.data = DatasetHandler(IRIS)
            self.data.load_dataset()

    def draw_data(self):
        if self.data.is_dataset(IRIS):
            self.draw_iris()
        elif self.data.is_dataset(SWISSROLL):
            self.draw_swiss_roll()
        else:
            self.draw_iris()

    def draw_iris(self):
        data_A_sample = self.data.unify_dataset()

        fig = plt.figure()
        fig.set_size_inches(10, 8)
        ax = fig.add_subplot(111)

        tag = None

        ks = list(self.data.tags_set)

        points = {ks[0]: [[], []]}
        points.update({ks[1]: [[], []]})
        points.update({ks[2]: [[], []]})

        for i in self.data.tags_training:
            idx = int(i[1:-1])
            k = self.data.tags_training[i]

            points[k][0].append(data_A_sample[idx][0])
            points[k][1].append(data_A_sample[idx][1])

        for i in self.data.tags_test:
            idx = int(i[1:-1])
            k = self.data.tags_test[i]

            points[k][0].append(data_A_sample[idx][0])
            points[k][1].append(data_A_sample[idx][1])

        area = (15)**2
        for idx, c in enumerate(['r', 'b', 'g']):
            values = points[ks[idx]]

            l = self.data.labels[ks[idx]].strip()
            if l.find("setosa") != -1:
                l = "Setosa"
            elif l.find("versicolor") != -1:
                l = "Versicolor"
            elif l.find("virginica") != -1:
                l = "Virginica"

            ax.scatter(values[0], values[1], s=area, c=c, marker="o", label=l)

        ax.set_xlabel('Sepal length', size=15)
        ax.set_ylabel('Sepal width', size=15)
        ax.legend(fontsize=20)
        plt.savefig('DATA_GRAPHICS/iris.png')

    def draw_swiss_roll(self):
        fig = plt.figure()
        fig.set_size_inches(10, 8)
        ax = p3.Axes3D(fig)
        ax.view_init(7, -80)
        label = self.data.tags
        X = self.data.dataset
        for l in np.unique(label):
            ax.scatter(X[label == l, 0],
                       X[label == l, 1],
                       X[label == l, 2],
                       color=plt.cm.jet(np.float(l) / np.max(label + 1)),
                       s=20,
                       edgecolor='k')
        plt.title('Swiss Roll')
        plt.savefig('DATA_GRAPHICS/swissroll.png')
        plt.show()

    def draw_hyperplanes(self, classifiers, names, scores):
        h = .02  # step size in the mesh

        #names = ["Nearest Neighbors", "TDA-Based Classifier (TDABC)"]

        figure = plt.figure(figsize=(27, 9))
        i = 1
        # iterate over datasets
        X = np.array(self.data.dataset)
        X_train = np.array(self.data.training)
        X_test = np.array(self.data.test)
        y_train = [
            self.data.tags_position[self.data.tags_training[i]]
            for i in self.data.tags_training
        ]
        y_test = None
        if len(self.data.tags_test) > 0:
            y_test = [
                self.data.tags_position[self.data.tags_test[i]]
                for i in self.data.tags_test
            ]

        x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
        y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))

        # just plot the dataset first
        cm = plt.cm.RdBu
        cm_bright = ListedColormap(['#FF0000', '#0000FF'])
        ax = plt.subplot(1, len(classifiers) + 1, i)
        ax.set_title("Input data")
        # Plot the training points
        ax.scatter(X_train[:, 0],
                   X_train[:, 1],
                   c=y_train,
                   cmap=cm_bright,
                   edgecolors='k')
        # Plot the testing points
        if y_test is not None:
            ax.scatter(X_test[:, 0],
                       X_test[:, 1],
                       c=y_test,
                       cmap=cm_bright,
                       alpha=0.6,
                       edgecolors='k')
        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())
        i += 1

        # iterate over classifiers
        for name, clf, score in zip(names, classifiers, scores):
            ax = plt.subplot(1, len(classifiers) + 1, i)

            # Plot the decision boundary. For that, we will assign a color to each
            # point in the mesh [x_min, x_max]x[y_min, y_max].
            Z = np.array(clf)

            # Put the result into a color plot
            Z = Z.reshape(xx.shape)
            ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

            # Plot the training points
            ax.scatter(X_train[:, 0],
                       X_train[:, 1],
                       c=y_train,
                       cmap=cm_bright,
                       edgecolors='k')
            # Plot the testing points
            ax.scatter(X_test[:, 0],
                       X_test[:, 1],
                       c=y_test,
                       cmap=cm_bright,
                       edgecolors='k',
                       alpha=0.6)

            ax.set_xlim(xx.min(), xx.max())
            ax.set_ylim(yy.min(), yy.max())
            ax.set_xticks(())
            ax.set_yticks(())
            ax.set_title(name)
            if score is not None:
                ax.text(xx.max() - .3,
                        yy.min() + .3, ('%.2f' % score).lstrip('0'),
                        size=15,
                        horizontalalignment='right')

        plt.tight_layout()
        plt.show()
    def __init__(self, data_mgr=None):
        self.data = data_mgr \

        if data_mgr is None:
            self.data = DatasetHandler(IRIS)
            self.data.load_dataset()
Ejemplo n.º 6
0
                                                    n_file_item=items_per_file)
                                res_storer.upload_and_store(
                                    base_dir, triplestore_url, base_iri, context_path,
                                    temp_dir_for_rdf_loading)

                                prov_storer = Storer(prov,
                                                     context_map={context_path: context_file_path},
                                                     dir_split=dir_split_number,
                                                     n_file_item=items_per_file)
                                prov_storer.store_all(
                                    base_dir, base_iri, context_path,
                                    temp_dir_for_rdf_loading)

                                dset_handler = DatasetHandler(triplestore_url_real,
                                                              context_path,
                                                              context_file_path, base_iri,
                                                              base_dir, info_dir, dataset_home,
                                                              temp_dir_for_rdf_loading)
                                dset_handler.update_dataset_info(result)

                                # If everything went fine, move the input file to the done directory
                                move_file(cur_file_path,
                                          reference_dir_done + os.sep + cur_local_dir_path)

                            # If something in the process went wrong, move the input file
                            # in an appropriate directory
                            else:
                                if crp.reperr.is_empty():  # The resource has been already processed
                                    move_file(cur_file_path,
                                              reference_dir_done + os.sep + cur_local_dir_path)
                                else:
Ejemplo n.º 7
0
                                    context_path, temp_dir_for_rdf_loading)

                                prov_storer = Storer(
                                    prov,
                                    context_map={
                                        context_path: context_file_path
                                    },
                                    dir_split=dir_split_number,
                                    n_file_item=items_per_file)
                                prov_storer.store_all(
                                    base_dir, base_iri, context_path,
                                    temp_dir_for_rdf_loading)

                                dset_handler = DatasetHandler(
                                    triplestore_url_real, context_path,
                                    context_file_path, base_iri, base_dir,
                                    info_dir, dataset_home,
                                    temp_dir_for_rdf_loading)
                                dset_handler.update_dataset_info(result)

                                # If everything went fine, move the input file to the done directory
                                move_file(
                                    cur_file_path, reference_dir_done +
                                    os.sep + cur_local_dir_path)

                            # If something in the process went wrong, move the input file
                            # in an appropriate directory
                            else:
                                if crp.reperr.is_empty(
                                ):  # The resource has been already processed
                                    move_file(
Ejemplo n.º 8
0
def dataset_generator():
    rospy.init_node('dataset_generator_node')

    moveit_handler = MoveItHandler()
    ring_handler = RingHandler()

    rospy.Subscriber("/vrep_ros_interface/image", Image, image_callback)
    rospy.Subscriber("/ring_current_position", Pose,
                     ring_handler.update_ring_pose)

    pub_ring = rospy.Publisher(conf.get('Ring', 'PositionTopic'),
                               Pose,
                               queue_size=1)
    pub_joint_controller = rospy.Publisher('/dagger/joint_states',
                                           JointState,
                                           queue_size=1)
    pub_delta_controller = rospy.Publisher('/dagger/delta_pose',
                                           PoseStamped,
                                           queue_size=1)

    rospy.sleep(3)
    init_ring = True
    init_panda = True

    for i in range(50):
        print "Iteration: ", i

        dataset_handler = DatasetHandler(i)
        while not rospy.is_shutdown():
            if init_panda:
                print "moving to ready position"
                pub_joint_controller.publish(
                    moveit_handler.target_joint_states)
                init_panda = False
                moveit_handler.wait(moveit_handler.target_joint_states)
                continue
            if init_ring:
                print "setting ring to random pose"
                ring_handler.set_random_valid_pose()
                ring_pose = ring_handler.get_ring_pose()
                pub_ring.publish(ring_pose)
                # delta between vrep and rviz on x of 0.5!!
                ring_handler.ring_coordinate.x += 0.5
                init_ring = False
                rospy.sleep(1)
                continue
            if moveit_handler.get_step_size(
                    ring_handler.ring_coordinate) < conf.getfloat(
                        'Goal', 'MinStep'):
                rospy.sleep(3)
                init_ring = True
                init_panda = True
                dataset_handler.save()
                break
            moveit_handler.compute_master_policy(ring_handler)

            dataset_handler.append((LAST_IMAGE, [
                moveit_handler.delta_pose.pose.position.x,
                moveit_handler.delta_pose.pose.position.y,
                moveit_handler.delta_pose.pose.position.z
            ]))

            pub_delta_controller.publish(moveit_handler.delta_pose)
            moveit_handler.update_target_pose()
            moveit_handler.wait(moveit_handler.target_pose)
Ejemplo n.º 9
0
        print("Prediction and error calculation of SVD with 90% energy took " +
              str(time.time() - next_part) + " secs")
        print("\n\nOverall process: " + str(time.time() - start) + " secs")

    def error(self, A, test_ratings):
        """
        Computes the error of the input ratings vs predicted values from model.

        Args:
            ratings (np.ndarray): An array of <user_id, item_id, true_rating> tuples

        Returns:
            The Root Mean Square Error and Mean Absolute Error values.
        """
        sq_err, abs_err = 0, 0
        for user_id, item_id, rating in test_ratings:
            predicted = A[user_id - 1][item_id - 1]
            diff = predicted - rating
            abs_err += abs(diff)
            sq_err += diff * diff

        rmse = np.sqrt(sq_err / len(test_ratings))
        mae = abs_err / len(test_ratings)
        return rmse, mae


if __name__ == "__main__":
    s = SVD()
    dh = DatasetHandler()
    s.predict_and_find_error(dh.test_ratings.values)
Ejemplo n.º 10
0
        for user_id, item_id, rating in test:
            predicted = A[user_id - 1][item_id - 1]
            diff = abs(predicted) - rating
            abs_err += abs(diff)
            sq_err += diff * diff

        rmse = np.sqrt(sq_err / len(test))
        mae = abs_err / len(test)
        return rmse, mae


if __name__ == "__main__":
    t = CUR()
    path1 = "data/test_ratings.csv"
    M = UtilityMatrix().utility_mat.values
    Test_Set = DatasetHandler().test_ratings.values

    for i in range(M.shape[0]):
        sum = 0
        count = 0
        for j in range(M.shape[1]):
            if not math.isnan(M[i][j]):
                sum += M[i][j]
                count += 1
        for j in range(M.shape[1]):
            if math.isnan(M[i][j]):
                M[i][j] = sum / count

    C, U, R = t.mycur(M, 1000)

    A = np.dot(C, np.dot(U, R))