def test_weighted_distance(self):
     test_weights = [2, 2, 1, 1]
     scaled_x1 = [ sqrt(w) * v for w, v in zip(test_weights, self.x1) ]
     scaled_x2 = [ sqrt(w) * v for w, v in zip(test_weights, self.x2) ]
     ref_res = euclidean(scaled_x1, scaled_x2)
     test_res = weighted_euclidean(self.x1, self.x2, test_weights)
     self.assertEqual(test_res, ref_res)
 def test_unweighted_distnace(self):
     """Test unweighted(or euqally weighted) distance calculation
        which effectively same to standard euclidean distance
     """
     ref_res = euclidean(self.x1, self.x2)
     test_res = weighted_euclidean(self.x1, self.x2)
     self.assertEqual(test_res, ref_res)
Beispiel #3
0
    def fitted_dist_func(self, x, y):
        """ Returned the distance functions used in fitting model

        Returns:
        --------
        func: {function} a function accept (x1, x2, *arg)
        """
        if self._transform_matrix is not None:
            w = self._transform_matrix
            g = lambda x, y: weighted_euclidean(x, y, w)
        return g(x, y)
Beispiel #4
0
    def fitted_dist_func(self, x, y):
        """ Returned the distance functions used in fitting model

        Returns:
        --------
        func: {function} a function accept (x1, x2, *arg)
        """
        if self._transform_matrix is not None:
            w = self._transform_matrix
            g = lambda x, y: weighted_euclidean(x, y, w)
        return g(x, y)
Beispiel #5
0
    def _fit(self, X, S, D=None):
        """ Fit the model with given information: X, S, D

        Fit the learning distance metrics: (1) if only S is given, all pairs of
        items in X but not in S are considered as in D; (2) if both S and D
        given, items in X but neither in S nor in D will be removed from
        fitting process.

        Parameters:
        ----------
        X: {matrix-like, np.array}, shape (n_sample, n_features) matrix of
           observations with 1st column keeping observation ID
        S: {vector-like, list} a list of tuples which define a pair of data
           points known as similiar
        D: {vector-like, list} a list of tuples which define a pair of data
           points known as different

        Returns:
        --------
        _trans_vec: {matrix-like, np.array}, shape(n_features, n_features)
                    A transformation matrix (A)
        _ratio: float
        """
        # if isinstance(X, pd.DataFrame):
        #    X = X.as_matrix()
        try:
            # ids = X["ID"]
            # X = X[[c for c in X.columns if c != "ID"]]
            ids = X.index.tolist()
        except ValueError:
            print "Oops! No 'ID' column is found !"
            # ids = [int(i) for i in X.ix[:, 0]]
            # X = X.ix[:, 1:]

        n_sample, n_features = X.shape

        bnds = [(0, None)] * n_features  # boundaries
        init = [1] * n_features  # initial weights

        if D == None:
            all_pairs = [p for p in combinations(ids, 2)]
            D = get_exclusive_pairs(all_pairs, S)
        else:
            # if D is provided, keep only users not being
            # covered either by S or D
            covered_items = get_unique_items(S, D)
            keep_items = [find_index(i, ids) for i in ids \
                if i in covered_items]
            X = X.ix[keep_items, :]

        # Convert ids in D and S into row index, in order to provide them to
        # a set of two distance functions, squared_sum_grouped_dist() and
        # sum_grouped_dist()
        S_idx = [(find_index(a, ids), find_index(b, ids)) for (a, b) in S]
        D_idx = [(find_index(a, ids), find_index(b, ids)) for (a, b) in D]

        def objective_func(w):
            a = squared_sum_grouped_dist(S_idx, X, w) * 1.0
            b = sum_grouped_dist(D_idx, X, w) * 1.0
            return a - b

        if self._is_debug:
            try:
                print "Examples of S: %s" % S[:5], len(S)
                print "Examples of D: %s" % D[:5], len(D)
                print "Examples of X: %s" % X[:5, :], X.shape
            except:
                print "Examples of S: %s" % S, len(S)
                print "Examples of D: %s" % D, len(D)
                print "Examples of X: %s" % X, X.shape

        start_time = time.time()
        fitted = minimize(objective_func, init, method="L-BFGS-B", bounds=bnds, options={'maxiter':10, 'disp' : True})
        duration = time.time() - start_time

        if self._report_excution_time:
            print("--- %.2f seconds ---" % duration)

        w = self._transform_matrix
        self._transform_matrix = vec_normalized(fitted['x'])
        # optimized value vs. value of initial setting
        self._ratio = fitted['fun'] / objective_func(init)
        self._dist_func = lambda x, y: weighted_euclidean(x, y, w)

        return (self._transform_matrix, self._ratio)
Beispiel #6
0
    def _fit(self, X, S, D=None):
        """ Fit the model with given information: X, S, D

        Fit the learning distance metrics: (1) if only S is given, all pairs of
        items in X but not in S are considered as in D; (2) if both S and D
        given, items in X but neither in S nor in D will be removed from
        fitting process.

        Parameters:
        ----------
        X: {matrix-like, np.array}, shape (n_sample, n_features) matrix of
           observations with 1st column keeping observation ID
        S: {vector-like, list} a list of tuples which define a pair of data
           points known as similiar
        D: {vector-like, list} a list of tuples which define a pair of data
           points known as different

        Returns:
        --------
        _trans_vec: {matrix-like, np.array}, shape(n_features, n_features)
                    A transformation matrix (A)
        _ratio: float
        """
        # if isinstance(X, pd.DataFrame):
        #    X = X.as_matrix()
        try:
            # ids = X["ID"]
            # X = X[[c for c in X.columns if c != "ID"]]
            ids = X.index.tolist()
        except ValueError:
            print "Oops! No 'ID' column is found !"
            # ids = [int(i) for i in X.ix[:, 0]]
            # X = X.ix[:, 1:]

        n_sample, n_features = X.shape

        bnds = [(0, None)] * n_features  # boundaries
        init = [1] * n_features  # initial weights

        if D == None:
            all_pairs = [p for p in combinations(ids, 2)]
            D = get_exclusive_pairs(all_pairs, S)
        else:
            # if D is provided, keep only users not being
            # covered either by S or D
            covered_items = get_unique_items(S, D)
            keep_items = [find_index(i, ids) for i in ids \
                if i in covered_items]
            X = X.ix[keep_items, :]

        # Convert ids in D and S into row index, in order to provide them to
        # a set of two distance functions, squared_sum_grouped_dist() and
        # sum_grouped_dist()
        S_idx = [(find_index(a, ids), find_index(b, ids)) for (a, b) in S]
        D_idx = [(find_index(a, ids), find_index(b, ids)) for (a, b) in D]

        def objective_func(w):
            a = squared_sum_grouped_dist(S_idx, X, w) * 1.0
            b = sum_grouped_dist(D_idx, X, w) * 1.0
            return a - b

        if self._is_debug:
            try:
                print "Examples of S: %s" % S[:5], len(S)
                print "Examples of D: %s" % D[:5], len(D)
                print "Examples of X: %s" % X[:5, :], X.shape
            except:
                print "Examples of S: %s" % S, len(S)
                print "Examples of D: %s" % D, len(D)
                print "Examples of X: %s" % X, X.shape

        start_time = time.time()
        fitted = minimize(objective_func,
                          init,
                          method="L-BFGS-B",
                          bounds=bnds,
                          options={
                              'maxiter': 10,
                              'disp': True
                          })
        duration = time.time() - start_time

        if self._report_excution_time:
            print("--- %.2f seconds ---" % duration)

        w = self._transform_matrix
        self._transform_matrix = vec_normalized(fitted['x'])
        # optimized value vs. value of initial setting
        self._ratio = fitted['fun'] / objective_func(init)
        self._dist_func = lambda x, y: weighted_euclidean(x, y, w)

        return (self._transform_matrix, self._ratio)