def normalize(x, copy=False):
    """
    A helper function that wraps the function of the same name in sklearn.
    This helper handles the case of a single column vector.
    """
    if type(x) == np.ndarray and len(x.shape) == 1:
        return np.squeeze(sknormalize(x.reshape(1,-1), copy=copy))
    else:
        return sknormalize(x, copy=copy)
def normalize(x, copy=False):
    """
    A helper function that wraps the function of the same name in sklearn.
    This helper handles the case of a single column vector.
    """
    if type(x) == np.ndarray and len(x.shape) == 1:
        return np.squeeze(sknormalize(x.reshape(1, -1), copy=copy))
        #return np.squeeze(x / np.sqrt((0.28864568x ** 2).sum(-1))[..., np.newaxis])
    else:
        return sknormalize(x, copy=copy)
Exemple #3
0
    def transformed(self, data):
        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(methods=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
            replace_infs(data.X)
        elif self.method == Normalize.Attribute:
            if self.attr in data.domain and isinstance(
                    data.domain[self.attr], Orange.data.ContinuousVariable):
                ndom = Orange.data.Domain([data.domain[self.attr]])
                factors = data.transform(ndom)
                data.X /= factors.X
                replace_infs(data.X)
                nd = data.domain[self.attr]
            else:  # invalid attribute for normalization
                data.X *= float("nan")
        return data.X
    def transformed(self, data):
        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(methods=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
        elif self.method == Normalize.Attribute:
            if self.attr in data.domain and isinstance(data.domain[self.attr], Orange.data.ContinuousVariable):
                ndom = Orange.data.Domain([data.domain[self.attr]])
                factors = data.transform(ndom)
                data.X /= factors.X
                nd = data.domain[self.attr]
            else:  # invalid attribute for normalization
                data.X *= float("nan")
        return data.X
    def __call__(self, data):
        if data.domain != self.domain:
            data = data.from_table(self.domain, data)

        if data.X.shape[0] == 0:
            return data.X
        data = data.copy()

        if self.method == Normalize.Vector:
            nans = np.isnan(data.X)
            nan_num = nans.sum(axis=1, keepdims=True)
            ys = data.X
            if np.any(nan_num > 0):
                # interpolate nan elements for normalization
                x = getx(data)
                ys = interp1d_with_unknowns_numpy(x, ys, x)
                ys = np.nan_to_num(ys)  # edge elements can still be zero
            data.X = sknormalize(ys, norm='l2', axis=1, copy=False)
            if np.any(nan_num > 0):
                # keep nans where they were
                data.X[nans] = float("nan")
        elif self.method == Normalize.Area:
            norm_data = Integrate(method=self.int_method,
                                  limits=[[self.lower, self.upper]])(data)
            data.X /= norm_data.X
        elif self.method == Normalize.Attribute:
            # attr normalization applies to entire spectrum, regardless of limits
            # meta indices are -ve and start at -1
            if self.attr not in (None, "None", ""):
                attr_index = -1 - data.domain.index(self.attr)
                factors = data.metas[:, attr_index].astype(float)
                data.X /= factors[:, None]
        return data.X
    def spoc_aggregation(self, X):
        """
        Given a tensor of activations, compute the aggregate Spoc feature, weighted
        spatially and channel-wise.

        :param ndarray X:
            3d tensor of activations with dimensions (channels, height, width)
        :returns ndarray:
            Spoc aggregated global image feature    
        """
        # original method
        '''
        X = X * self.kernel
        S = X.sum(axis=(1,2))
        return sknormalize(S.reshape(1,-1))[0]
        '''

        # New method1: spoc + Ch_weight
        ker = self.kernel[:, 1:13, 1:13]
        X = X[:, 1:13, 1:13]
        C = self.compute_crow_channel_weight(X)
        X = X * ker
        X = X.sum(axis=(1, 2))
        X = X * C
        '''

        # New method2: spoc + Ch_weight + rmac
        C = self.compute_crow_channel_weight(X)
        X = X * self.kernel                
        L0 = X.sum(axis=(1,2))
        L1 = X[:,].
        X = X * C
        '''

        return sknormalize(X.reshape(1, -1))[0]
Exemple #7
0
 def normalize(self,replace=False):
     if self.normed is False:
         data_stacked=sknormalize(self.data+self.data_test)
         self.data=data_stacked[:len(self.data)]
         self.data_test=data_stacked[len(self.data):]
         data_stacked=None
         self.normed=True
         #reset models
         self.flush()
     else:
         print "already normalized!"
Exemple #8
0
 def normalize(self, datanum, replace=False):
     if self.normed is False:
         data_stacked = sknormalize(self.data[datanum] +
                                    self.data_test[datanum])
         self.data[datanum] = data_stacked[:len(self.data[datanum])]
         self.data_test[datanum] = data_stacked[len(self.data[datanum]):]
         data_stacked = None
         self.normed = True
         #reset models
         self.flush()
     else:
         print "already normalized!"
Exemple #9
0
def load_nx_W_data(nx_graph, label_file, exist_fea=False):
    label_dataframe = pd.read_pickle(label_file)

    beta_par = 1
    idx2node = {i: node for i, node in enumerate(nx_graph.nodes())}
    #node2idx = {node:idx for idx, node in enumerate(nx_graph.nodes())}
    n_nodes = nx_graph.number_of_nodes()

    labels = get_label(idx2node, label_dataframe)
    # get adjacency matrix
    adj = nx.adjacency_matrix(nx_graph)
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

    # https://github.com/tkipf/pygcn/issues/3 explain
    # get M
    C_p = adj + adj.T * adj - csr_matrix(np.diag((adj.T * adj).diagonal()))
    degrees = [nx_graph.degree(node) for node in nx_graph.nodes()]
    degrees = np.array(degrees, dtype=np.float)
    indics = [i for i, node in enumerate(range(degrees.size))]
    D = csr_matrix((degrees**(-0.5), (indics, indics)),
                   shape=(degrees.size, degrees.size))
    D1 = csr_matrix((degrees**(-beta_par), (indics, indics)),
                    shape=(degrees.size, degrees.size))
    D2 = csr_matrix((degrees**(-beta_par), (indics, indics)),
                    shape=(degrees.size, degrees.size))
    M = D1.T * C_p * D2
    M = D * M * D
    #M = adj

    # get labels

    if exist_fea:
        features = get_features(idx2node, exist_fea)
        print('basic features shape', features.shape)
    else:
        features = np.identity(n_nodes)
    features = sknormalize(features, axis=1)

    # get idx
    # idx_train = range(140)
    # idx_val = range(200, 500)
    # idx_test = range(500, 1500)
    idx_train, idx_val, idx_test = get_idx(labels)

    features = torch.FloatTensor(features)
    labels = torch.LongTensor(labels)
    M = sparse_mx_to_torch_sparse_tensor(M)

    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    return M, features, labels, idx_train, idx_val, idx_test, idx2node
 def get_cnn_feat(self, image_name):
     image = caffe.io.load_image(image_name)
     self.net.blobs['data'].data[...] = self.transformer.preprocess(
         'data', image)
     self.net.forward()
     if self.model_id == 0:
         #feat = self.net.blobs['pool5'].data[0]
         feat = self.net.blobs['conv5_3'].data[0]
     elif self.model_id == 1:  # HVR
         feat = self.net.blobs['fc6'].data[0]
         feat = sknormalize(np.reshape(feat, (1, -1)))[0]
     elif self.model_id == 2:  # Alexnet
         feat = self.net.blobs['conv5'].data[0]
     return feat
    def crow_aggregation(self, X):
        """
        Given a tensor of activations, compute the aggregate Spoc feature, weighted
        spatially and channel-wise.

        :param ndarray X:
            3d tensor of activations with dimensions (channels, height, width)
        :returns ndarray:
            Spoc aggregated global image feature    
        """
        S = self.compute_crow_spatial_weight(X)
        C = self.compute_crow_channel_weight(X)
        X = X * S
        X = X.sum(axis=(1, 2))
        X = X * C
        return sknormalize(np.reshape(X, (1, -1)))[0]
Exemple #12
0
    def fit_predict(self, mm_fname):
        mm = MatrixMarket(mm_fname)
        self._initialize(mm)

        mm.iter_line = False
        for n_iter in range(1, self.max_iter + 1):
            c1 = np.zeros((self.n_clusters, self.n_words))
            self.labels = [-1] * self.n_docs
            for n_nonempty_doc, (i, j_dict) in enumerate(mm):
                best_c = self._most_similar(j_dict)
                self.labels[i] = best_c
                for j, v in j_dict.items():
                    c1[best_c, j] += v
                if self.verbose and n_nonempty_doc % 100 == 99:
                    print('\r  - iter = {} / {}, {} %'.format(
                        n_iter, self.max_iter, '%.2f' %
                        (100 * n_nonempty_doc / self._n_nonempty_docs)),
                          flush=True,
                          end='')
            self.c0 = sknormalize(c1)
            if self.verbose:
                print('\rIteration = {} was done{}'.format(n_iter, ' ' * 40),
                      flush=True)
        return self.labels
Exemple #13
0
def normalize(x, copy=False):
    if type(x) == np.ndarray and len(x.shape) == 1:
        return np.squeeze(sknormalize(x.reshape(1, -1), copy=copy))
    else:
        return sknormalize(x, copy=copy)
def normalize(x, copy=False):
    if type(x) == np.ndarray and len(x.shape) == 1:
        return np.squeeze(sknormalize(x.reshape(1, -1), copy=copy))
        #return np.squeeze(x / np.sqrt((x ** 2).sum(-1))[..., np.newaxis])
    else:
        return sknormalize(x, copy=copy)
Exemple #15
0
 def normalize(self, ndarr):
     if self.norm:
         ndarr = sknormalize(ndarr, axis = 1 )
     return ndarr