def save_new_data(dataset, n_components, iteration):
    X, y = load_dataset(dataset)
    data = X
    rp = GaussianRandomProjection(n_components=n_components)
    rp.fit(data)

    matrix = rp.components_
    new_data = rp.transform(data)

    plot_data('rp',
              new_data,
              y,
              dataset.title() + ': RP',
              filename='-'.join(
                  ['rp', dataset,
                   str(iteration), 'data', 'trans']))

    results = np.array(new_data)
    np.savetxt('data/' + ('-'.join(
        [dataset, str(n_components),
         str(iteration) + 'rp.csv'])),
               results,
               delimiter=",")

    new_data_inv = np.dot(new_data, matrix)
    loss = metrics.mean_squared_error(data, new_data_inv)
    print loss
Beispiel #2
0
 def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     
     ks = []
     for i in range(1000):
         ##
         ## Random Projection
         ##
         rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
         rp.fit(X_train_scl)
         X_train_rp = rp.transform(X_train_scl)
         
         ks.append(kurtosis(X_train_rp))
         
     mean_k = np.mean(ks, 0)
         
     ##
     ## Plots
     ##
     ph = plot_helper()
     
     title = 'Kurtosis (Randomized Projection) for ' + data_set_name
     name = data_set_name.lower() + '_rp_kurt'
     filename = './' + self.out_dir + '/' + name + '.png'
     
     ph.plot_simple_bar(np.arange(1, len(mean_k)+1, 1),
                        mean_k,
                        np.arange(1, len(mean_k)+1, 1).astype('str'),
                        'Feature Index',
                        'Kurtosis',
                        title,
                        filename)
Beispiel #3
0
    def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)

        ks = []
        for i in range(1000):
            ##
            ## Random Projection
            ##
            rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
            rp.fit(X_train_scl)
            X_train_rp = rp.transform(X_train_scl)

            ks.append(kurtosis(X_train_rp))

        mean_k = np.mean(ks, 0)

        ##
        ## Plots
        ##
        ph = plot_helper()

        title = 'Kurtosis (Randomized Projection) for ' + data_set_name
        name = data_set_name.lower() + '_rp_kurt'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_simple_bar(np.arange(1,
                                     len(mean_k) + 1, 1), mean_k,
                           np.arange(1,
                                     len(mean_k) + 1, 1).astype('str'),
                           'Feature Index', 'Kurtosis', title, filename)
def rp(X, y, n_components='auto', eps=0.1, random_state=None, plot=1, dataset='german'):
    rp_model = GaussianRandomProjection(n_components=n_components, eps=eps, random_state=random_state)
    rp_model.fit(X)
    X_new = rp_model.transform(X)
    if plot:
        if dataset == 'german':
            plt.scatter(X_new[y == 1, 0], X_new[y == 1, 1], c='red', label='Samples with label 1')
            plt.scatter(X_new[y == 0, 0], X_new[y == 0, 1], c='green', label='Samples with label 0')
            plt.title("German dataset after Randomized Projection")
            plt.legend()
            plt.xlabel("Component 1")
            plt.ylabel("Component 2")
            plt.savefig("german-after-Random-Projection.png")
            plt.close()

        elif dataset == 'australian':
            plt.scatter(X_new[y == 1, 0], X_new[y == 1, 1], c='red', label='Samples with label 1')
            plt.scatter(X_new[y == 0, 0], X_new[y == 0, 1], c='green', label='Samples with label 0')
            plt.title("Australian dataset after Randomized Projection")
            plt.legend()
            plt.xlabel("Component 1")
            plt.ylabel("Component 2")
            plt.savefig("australian-after-Random-Projection.png")
            plt.close()
    return X_new
def comp1(K):
    Sum_of_squared_distances = []
    k = []
    accuracy_train = []
    accuracy_test = []
    score = []
    for i in range(1, K):
        print(i)
        agglo = GaussianRandomProjection(n_components=10, eps=0.6)
        #X_new_train,y_new_train=transformer.fit(X_train,y_train)
        #X_new_test,y_new_test = transformer.transform(X_test,y_test)
        agglo.fit(X)
        X_reduced = agglo.transform(X)
        X_train, X_test, y_train, y_test = train_test_split(X_reduced,
                                                            y,
                                                            test_size=0.20)
        km = MLPClassifier(solver='lbfgs',
                           alpha=1e-5,
                           hidden_layer_sizes=[8, 8, 8, 8, 8],
                           random_state=1)
        km.fit(X_train, y_train)
        km.fit(X_test, y_test)
        #transformer1 = GaussianRandomProjection(n_components=i,eps=0.5)
        #transformer2 = GaussianRandomProjection(n_compo
        label_train = km.predict(X_train)
        label_test = km.predict(X_test)
        accu_train = km.score(X_test, y_test)
        accu_test = km.score(X_train, y_train)
        #score_train1=metrics.silhouette_score(X_new,label, metric='euclidean')
        #Sum_of_squared_distances.append(km.inenents=i,eps=0.6)
        #label=transformer.predicn)rtia_)
        k.append(i)
        accuracy_train.append(accu_train)
        accuracy_test.append(accu_test)
        #score.append(score_train1)
        #print(accuracy)
    k = np.array(k)
    Sum_of_squared_distances = np.array(Sum_of_squared_distances)
    score = np.array(score)
    accuracy_train = np.array(accuracy_train)
    accuracy_test = np.asarray(accuracy_test)
    #line1,=plt.plot(k, Sum_of_squared_distances, 'bx-',marker='o')
    #line2,=plt.plot(k,score,color='g',marker='o')
    line3, = plt.plot(k,
                      accuracy_train,
                      color='r',
                      marker='o',
                      label='train_accuracy')
    line4, = plt.plot(k,
                      accuracy_test,
                      color='g',
                      marker='o',
                      label='test_accuracy')
    #plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
    plt.xlabel('k')
    plt.legend()
    plt.ylabel('accuracy')
    #plt.ylim(0,1)
    plt.show()
    return None
def PerformRandomProjections(X,Y,num_components,random_state):
    """
    For each num_components, random_state number of times
    random projection is done and that projection is kept
    that gives minimum reconstruction error
    """
    result = {}
    recons_errs = []
    for n in num_components:
        prefix = "rp_" + str(n) + "_"
        best_grp = None
        best_reconstruction_error = np.Infinity;
        reconstruction_errors = []
        for i in np.arange(random_state) + 1:
            grp = GaussianRandomProjection(n,random_state=i)
            grp.fit(X)
            _x = grp.transform(X)
            p_inv = np.linalg.pinv(grp.components_)
            X_recons = np.dot(p_inv,_x.T).T
            recons_err = ComputeReconstructionSSE(X,X_recons)
            reconstruction_errors.append(recons_err)
            #print(r"n = {0} i ={1} error = {2}".format(n,i,recons_err))
            if(best_grp is None or best_reconstruction_error > recons_err):
                best_grp = grp
                best_reconstruction_error = recons_err
        result[prefix+"data"] = best_grp.transform(X)
        result[prefix+"reconstruction_errors_all"] = reconstruction_errors
        result[prefix+"reconstruction_error"] = best_reconstruction_error
    return result
def DPPro(pureTrainingData, pureTestingData, k, epsilon, randomProjector=None):
    '''
    projMatrixLength = pureTrainingData.shape[1] * k;
    oneDimNormalSamples = np.random.normal(0, np.divide(1.0, k), projMatrixLength);
    projMatrix = np.reshape(oneDimNormalSamples, (pureTrainingData.shape[1], -1));
    projTrainingData = np.dot(pureTrainingData, projMatrix);
    projTestingData = np.dot(pureTestingData, projMatrix);
    '''
    if randomProjector is None:
        print('Initialize random projector')
        randomProjector = GaussianRandomProjection(n_components=k)
        randomProjector.fit(pureTrainingData)

    projTrainingData = randomProjector.transform(pureTrainingData)
    projTestingData = randomProjector.transform(pureTestingData)

    projMatrix_norms = np.linalg.norm(randomProjector.components_, axis=0)
    # The dimension of projMatrix_norms should be n_features, pureTrainingData.shape[1];
    #print(projMatrix_norms.shape);
    l2Sensitivity = np.amax(projMatrix_norms)
    delta = np.divide(1.0, pureTrainingData.shape[0])
    noiseLength = pureTrainingData.shape[0] * k
    oneDimNoise = DiffPrivImpl.OneDimGaussian(epsilon,
                                              delta,
                                              noiseLength,
                                              l2Sensitivity=l2Sensitivity)
    noiseMatrix = np.reshape(oneDimNoise, (pureTrainingData.shape[0], -1))

    noisyProjTrainingData = projTrainingData + noiseMatrix

    return noisyProjTrainingData, projTestingData
def randomfaces(X_train,X_test, n_components=120):
    t0 = time()
    randomface = GaussianRandomProjection(n_components=n_components) #Gaussian projection
    randomface.fit(X_train)
    X_train_random = randomface.transform(X_train)
    X_test_random = randomface.transform(X_test)
    print("Random projection done in %0.3fs" % (time() - t0))
    return X_train_random, X_test_random
Beispiel #9
0
def randproj(tx, ty, rx, ry):
    compressor = RandomProjection(tx[1].size)
    compressor.fit(tx, y=ty)
    newtx = compressor.transform(tx)
    newrx = compressor.transform(rx)
    em(newtx, ty, newrx, ry, add="wRPtr", times=10)
    km(newtx, ty, newrx, ry, add="wRPtr", times=10)
    nn(newtx, ty, newrx, ry, add="wRPtr")
Beispiel #10
0
def randproj(tx, ty, rx, ry):
    compressor = RandomProjection(tx[1].size)
    compressor.fit(tx, y=ty)
    newtx = compressor.transform(tx)
    newrx = compressor.transform(rx)
    em(newtx, ty, newrx, ry, add="wRPtr", times=10)
    km(newtx, ty, newrx, ry, add="wRPtr", times=10)
    nn(newtx, ty, newrx, ry, add="wRPtr")
def append_cluster_labels(train, test, bestK):
    """Append best K label to train & test."""
    clu = DimRedux(bestK, random_state=42)
    clu.fit(train.X)

    train = helpers.Data(clu.transform(train.X), train.y)
    test = helpers.Data(clu.transform(test.X), test.y)

    return helpers.scale_test_train(train, test)
Beispiel #12
0
def dump_data(data_path, task, reduce_sizes, trials=10):
    X, y, _, _ = load_data(data_path, is_shuffle=True, is_split=False)
    pca_components = reduce_sizes[0]
    pca = PCA(n_components=pca_components, random_state=10)
    X_PCA = pca.fit_transform(X)
    X_reconstructed = pca.inverse_transform(X_PCA)
    print("Reconstruction Error for PCA: %.6f" % np.mean(
        (X - X_reconstructed)**2))

    data = np.hstack((X_PCA, np.array([y]).T))
    PCA_path = create_path('data', task, filename='PCA.csv')
    np.savetxt(PCA_path, data, delimiter=",")

    ica_components = reduce_sizes[1]
    ica = FastICA(n_components=ica_components, random_state=10)
    X_ICA = ica.fit_transform(X)
    X_reconstructed = ica.inverse_transform(X_ICA)
    print("Reconstruction Error for ICA: %.6f" % np.mean(
        (X - X_reconstructed)**2))

    data = np.hstack((X_ICA, np.array([y]).T))
    ICA_path = create_path('data', task, filename='ICA.csv')
    np.savetxt(ICA_path, data, delimiter=",")

    rp_components = reduce_sizes[2]
    re_list = []
    min_re_error = float("inf")
    X_RP = None
    for i in range(trials):
        rp = GaussianRandomProjection(n_components=rp_components)
        rp.fit(X)
        X_transformed = rp.transform(X)
        c_square = np.dot(rp.components_.T, rp.components_)
        X_reconstructed = np.dot(X_transformed, rp.components_)

        error = np.mean((X - X_reconstructed)**2)
        if error < min_re_error:
            min_re_error = error
            X_RP = X_transformed

        re_list.append(error)

    print(np.mean(re_list))
    print(np.std(re_list))
    print("Reconstruction Error for RP: %.6f" % min_re_error)

    data = np.hstack((X_RP, np.array([y]).T))
    RP_path = create_path('data', task, filename='RP.csv')
    np.savetxt(RP_path, data, delimiter=",")

    mi_components = reduce_sizes[3]
    X_MI = SelectKBest(mutual_info_classif,
                       k=mi_components).fit_transform(X, y)
    data = np.hstack((X_MI, np.array([y]).T))
    MI_path = create_path('data', task, filename='MI.csv')
    np.savetxt(MI_path, data, delimiter=",")
def get_encoder(metas, train_data, target_output_dim):
    tmpdir = metas['workspace']
    model_path = os.path.join(tmpdir, 'random_gaussian.model')

    model = GaussianRandomProjection(n_components=target_output_dim,
                                     random_state=42)
    model.fit(train_data)
    pickle.dump(model, open(model_path, 'wb'))

    return RandomGaussianEncoder(model_path=model_path)
Beispiel #14
0
class RandomProjectionSLFN(SLFN):
    def __init__(self, X, n_neurons, ufunc=np.tanh, random_state=None):
        self.n_neurons = n_neurons
        self.ufunc = ufunc
        self.projection = GaussianRandomProjection(n_components=n_neurons,
                                                   random_state=random_state)
        self.projection.fit(X)

    def transform(self, X):
        return self.ufunc(self.projection.transform(X))
def randproj(tx, ty, rx, ry):
    print "randproj"
    compressor = RandomProjection(tx[1].size)
    compressor.fit(tx, y=ty)
    newtx = compressor.transform(tx)
    # compressor = RandomProjection(tx[1].size)
    newrx = compressor.transform(rx)
    em(newtx, ty, newrx, ry, add="wRPtr")
    km(newtx, ty, newrx, ry, add="wRPtr")
    nn(newtx, ty, newrx, ry, add="wRPtr")
    print "randproj done"
Beispiel #16
0
    def rp(self):
        # manipulating an experiment identifier in the output file
        self.prefix = self.prefix + 'RandP_'
        # GRP
        rf = GaussianRandomProjection(eps=0.5)
        rf.fit(self.X_train)

        # applying GRP to the whole training and the test set.
        self.X_train = rf.transform(self.X_train)
        self.X_test = rf.transform(self.X_test)
        self.printDataShapes()
Beispiel #17
0
def reduce_embedding_dimensions_GRP(vocab_embeddings_full, dimension,
                                    output_path):
    GRP = GaussianRandomProjection(n_components=dimension,
                                   eps=0.5,
                                   random_state=2019)
    GRP.fit(vocab_embeddings_full[:10000, :])
    vocab_embeddings_reduced = GRP.transform(vocab_embeddings_full)
    np.save(os.path.join(output_path, 'vocab_embeddings'),
            vocab_embeddings_reduced)

    return vocab_embeddings_reduced
class GaussianRandomProjectionImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
    def rand_guas(self, n_comp, data=None):
        if data is None:
            data = self.train
        else:
            data = pd.DataFrame(data)

        rand_guas = GaussianRandomProjection(n_components=n_comp)
        rand_guas.fit(data)
        self.rand_guas_train_data = rand_guas.transform(data)
        self.RAND_GUAS = rand_guas

        rand_test = GaussianRandomProjection(n_components=n_comp)
        rand_test.fit(self.test)
        self.rand_guas_test_data = rand_test.transform(self.test)
Beispiel #20
0
    def __call__(self, x, y, train_idx):
        from sklearn.random_projection import GaussianRandomProjection
        method = GaussianRandomProjection(n_components=self.n_components,
                                          random_state=42)
        method.fit(x[train_idx])
        x_t = method.transform(x)

        # need to rescale
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        scaler.fit(x_t[train_idx])
        x_t = scaler.transform(x_t)

        return x_t
Beispiel #21
0
def get_vectors(words, dims):
    word_vectors = []
    for word in words:
        word_vectors.append(get_word_vector(word))

    # convert vectors with specific dimension
    g = GaussianRandomProjection(dims)
    g.fit(np.array(word_vectors))
    random_mat = g.components_.transpose()

    vectors = {}
    for word, word_vector in zip(words, word_vectors):
        vectors[word] = g.transform(np.array([word_vector]))[0].tolist()

    return vectors
Beispiel #22
0
class Coder(object):
    def __init__(self, n_sketches, sketch_dim):
        self.n_sketches = n_sketches
        self.sketch_dim = sketch_dim
        self.ss = StandardScaler()
        self.sp = GaussianRandomProjection(n_components=16 * n_sketches)

    def fit(self, v):
        self.ss = self.ss.fit(v)
        vv = self.ss.transform(v)
        self.sp = self.sp.fit(vv)
        vvv = self.sp.transform(vv)
        self.init_biases(vvv)

    def transform(self, v):
        v = self.ss.transform(v)
        v = self.sp.transform(v)
        v = self.discretize(v)
        v = np.packbits(v, axis=-1)
        v = np.frombuffer(np.ascontiguousarray(v), dtype=np.uint16).reshape(
            v.shape[0], -1) % self.sketch_dim
        return v

    def transform_to_absolute_codes(self, v, labels=None):
        codes = self.transform(v)
        pos_index = np.array(
            [i * self.sketch_dim for i in range(self.n_sketches)],
            dtype=np.int_)
        index = codes + pos_index
        return index
Beispiel #23
0
def reducer_rand_proj_gauss(data, params):

    if params is None:
        params = {'n_components': 5}

    X = data['X_train']
    y = data['y_train']

    reducer = GaussianRandomProjection(n_components=params['n_components'])
    reducer.fit(X)

    do = deepcopy(data)
    do['X_train'] = reducer.transform(data['X_train'])
    do['X_valid'] = reducer.transform(data['X_valid'])

    return do
Beispiel #24
0
def transform(data, alg):
    a = np.array(data)
    x = a[:, 0:-1]
    y = a[:, -1]
    if alg == 'pca':
        pca = PCA(n_components=6, whiten=True)
        x = pca.fit(x).transform(x)
        print(pca.components_)
        print(pca.explained_variance_ratio_)
    if alg == 'ica':
        kur0 = sum(kurtosis(x))
        ica = FastICA(n_components=3, whiten=False, algorithm="parallel")
        ica = ica.fit(x)
        x = ica.transform(x)
        print("kurtosis: ", sum(kurtosis(x)) - kur0)
    if alg == 'rp':
        rp = GaussianRandomProjection(n_components=1)
        rp = rp.fit(x)
        x = rp.transform(x)
        print(rp.components_)
    if alg == 'vtresh':
        kb = VarianceThreshold(threshold=.04)
        x = kb.fit_transform(x)
        print(kb.variances_)
    data = np.column_stack((x, y))
    return data
def search_best_k(datasets, targets):
    """Search for best K by Mean Classifier Score."""

    plt.figure(figsize=(8, 4))

    subindex = 0
    for dataset, target in zip(datasets, targets):
        subindex += 1

        logging.info(f"Initializing RP search for {dataset}...")
        data = helpers.load_dataset_df(dataset)

        train, test = helpers.split_test_train(data, target)
        train, test = helpers.scale_test_train(train, test)

        slf = dict()
        for k in range(1, train.X.shape[1]):
            dim = DimRedux(k, random_state=42)
            dim.fit(train.X)

            split_ = train.X.shape[0] // 3 * 2

            clf1, clf2 = SimpleClf1(), SimpleClf2()
            clf1.fit(dim.transform(train.X[:split_, ]), train.y[:split_, ])
            clf2.fit(dim.transform(train.X[:split_, ]), train.y[:split_, ])

            sco1 = clf1.score(dim.transform(train.X[split_:, ]),
                              train.y[split_:, ])
            sco2 = clf2.score(dim.transform(train.X[split_:, ]),
                              train.y[split_:, ])

            slf[k] = (sco1 + sco2) / 2

        plt.subplot(1, len(datasets), subindex)
        plt.plot(list(slf.keys()), list(slf.values()))
        plt.xlabel("Components")
        plt.ylabel("Classifier Train Score")
        plt.xticks(np.arange(1, train.X.shape[1] + 1, step=1))
        plt.title(f"{dataset}", fontsize=10)

        plt.tight_layout()

        outpath = os.path.join(helpers.BASEDIR, "img", f"dim-rp-both.png")
        plt.savefig(outpath)

    return None
Beispiel #26
0
class DReduction:

    _N_COMP = 0            ### Number of decomposition components ###

    _pca    = 0
    _tsvd   = 0
    _ica    = 0
    _grp    = 0
    _srp    = 0

    def __init__(self, nComp):
        self._N_COMP = nComp
        self._pca = PCA(n_components=self._N_COMP, random_state=17)
        self._tsvd = TruncatedSVD(n_components=self._N_COMP, random_state=17)
        self._ica = FastICA(n_components=self._N_COMP, random_state=17)
        self._grp = GaussianRandomProjection(n_components=self._N_COMP, eps=0.1, random_state=17)
        self._srp = SparseRandomProjection(n_components=self._N_COMP, dense_output=True, random_state=17)


    def fit(self, X):
        self._pca.fit(X)
        self._tsvd.fit(X)
        self._ica.fit(X)
        self._grp.fit(X)
        self._srp.fit(X)


    def transform(self, X):
        res_pca  = self._pca.transform(X)
        res_tsvd = self._tsvd.transform(X)
        res_ica  = self._ica.transform(X)
        res_grp  = self._grp.transform(X)
        res_srp  = self._srp.transform(X)


        df = pd.DataFrame()

        for i in range(1, self._N_COMP + 1):
            df['pca_' + str(i)] = res_pca[:, i - 1]
            df['tsvd_' + str(i)] = res_tsvd[:, i - 1]
            df['ica_' + str(i)] = res_ica[:, i - 1]
            df['grp_' + str(i)] = res_grp[:, i - 1]
            df['srp_' + str(i)] = res_srp[:, i - 1]

        return df
def find_best_state_RCA(X,comp = 2, n_state = 20):
	reconstuction_error = []
	for i in range(n_state):
		rca = GaussianRandomProjection(n_components=comp,random_state=i)
		X_r = rca.fit(X).transform(X)
		X_inverse = np.matmul(X_r, rca.components_)
		similarity = cosine_similarity(X_inverse,X)[0][0]
		reconstuction_error.append(similarity)
	return reconstuction_error
Beispiel #28
0
class GaussianRandomProjectionImpl():
    def __init__(self, n_components='auto', eps=0.1, random_state=None):
        self._hyperparams = {
            'n_components': n_components,
            'eps': eps,
            'random_state': random_state
        }

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def transform(self, X):
        return self._sklearn_model.transform(X)
def random_projection(X,
                      y,
                      components,
                      max_cluster,
                      num_classes,
                      run_nn=False):
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        train_size=0.7,
                                                        shuffle=True)
    random_proj = GaussianRandomProjection(n_components=components)
    random_proj.fit(X_train, y=y_train)
    print(random_proj.components_)
    X_train_new = random_proj.transform(X_train)
    X_test_new = random_proj.transform(X_test)
    inverse_components = np.linalg.pinv(random_proj.components_)
    reconstructed_instances = utils.extmath.safe_sparse_dot(
        X_test_new, inverse_components.T)
    loss = ((X_test - reconstructed_instances)**2).mean()
    print("Reconstruction Error " + str(loss))
    if run_nn:
        mlp_classifier(X_train_new,
                       y_train,
                       0.3,
                       plot=True,
                       X_test=X_test_new,
                       y_test=y_test)
    X_new = np.concatenate((X_train_new, X_test_new), axis=0)
    y = np.concatenate((y_train, y_test), axis=0)
    kmeans(X_new,
           y,
           max_cluster,
           num_classes,
           run_nn=run_nn,
           plot_cluster=True,
           reduction_algo='Random Projection')
    expectation_max(X_new,
                    y,
                    max_cluster,
                    num_classes,
                    run_nn=run_nn,
                    plot_cluster=True,
                    reduction_algo='Random Projection')
Beispiel #30
0
def randproj(tx, ty, rx, ry, dataset):
    compressor = RandomProjection(tx[1].size/2)
    compressor = RandomProjection(tx[1].size/2)
    compressor.fit(tx, y=ty)
    pca = RandomProjection(2)
    pca.fit(tx)
    result=pd.DataFrame(pca.transform(tx), columns=['RP%i' % i for i in range(2)])
    my_color = pd.Series(ty).astype('category').cat.codes
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='2d')
    ax = fig.add_subplot(111)
    ax.scatter(result['RP0'], result['RP1'], c=my_color, cmap="Dark2_r", s=60)
    ax.set_xlabel("RP1")
    ax.set_ylabel("RP2")
    ax.set_title("RP on the "+  dataset + " data set")
    plt.show()
    Store results of PCA in a data frame
    result=pd.DataFrame(compressor.transform(tx), columns=['ICA%i' % i for i in range(3)])
    my_color = pd.Series(ty).astype('category').cat.codes
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(result['ICA0'], result['ICA1'], result['ICA2'], c=my_color, cmap="Dark2_r", s=60)

    xAxisLine = ((min(result['ICA0']), max(result['ICA0'])), (0, 0), (0,0))
    ax.plot(xAxisLine[0], xAxisLine[1], xAxisLine[2], 'r')
    yAxisLine = ((0, 0), (min(result['ICA1']), max(result['ICA1'])), (0,0))
    ax.plot(yAxisLine[0], yAxisLine[1], yAxisLine[2], 'r')
    zAxisLine = ((0, 0), (0,0), (min(result['ICA2']), max(result['ICA2'])))
    ax.plot(zAxisLine[0], zAxisLine[1], zAxisLine[2], 'r')

    ax.set_xlabel("RP1")
    ax.set_ylabel("RP2")
    ax.set_zlabel("RP3")
    ax.set_title("RP on the Car data set")
    plt.show()
    reduced_data = RandomProjection(2).fit_transform(tx)
    em(tx, ty, rx, ry, reduced_data, add="", times=4, dataset=dataset, alg="RP")
    newtx = compressor.transform(tx)
    newrx = compressor.transform(rx)
    em(newtx, ty, newrx, ry, [], add="", times=4, dataset=dataset, alg="RP")
    em(newtx, ty, newrx, ry, RandomProjection(n_components=2).fit_transform(tx), add="wRPtr", times=9, dataset=dataset, alg="RandProj")
    # nn(newtx, ty, newrx, ry, add="wRPtr")
    myNN(newtx, ty, newrx, ry, "RandProj")
Beispiel #31
0
def run_rand(data, target, targets, name):
    plt.subplots(figsize=(18, 10))
    for i in range(len(seeds)):
        transformer = GaussianRandomProjection(n_components=2,
                                               random_state=seeds[i])
        transformer.fit(data)
        randTrain = transformer.transform(data)
        plt.subplot(plots[i])
        for i, target_name in zip(targets, targets):
            plt.scatter(randTrain[target == i, 0],
                        randTrain[target == i, 1],
                        alpha=.8,
                        lw=2,
                        label=target_name)

        plt.legend(loc='best', shadow=False, scatterpoints=1)
        plt.title("Randomized Projection of " + name)
    plt.savefig(name + " random")
    plt.close()
 def test_gaussian_random_projection_float64(self):
     rng = np.random.RandomState(42)
     pt = GaussianRandomProjection(n_components=4)
     X = rng.rand(10, 5).astype(np.float64)
     model = pt.fit(X)
     model_onnx = to_onnx(model, X[:1], dtype=np.float64,
                          target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X, model,
                         model_onnx, basename="GaussianRandomProjection64")
    def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)

        ks = []
        for i in range(1000):
            ##
            ## Random Projection
            ##
            rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
            rp.fit(X_train_scl)
            X_train_rp = rp.transform(X_train_scl)

            ks.append(kurtosis(X_train_rp))

        mean_k = np.mean(ks, 0)

        ##
        ## Plots
        ##
        ph = plot_helper()

        title = 'Kurtosis (Randomized Projection) for ' + data_set_name
        name = data_set_name.lower() + '_rp_kurt'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_simple_bar(np.arange(1,
                                     len(mean_k) + 1, 1), mean_k,
                           np.arange(1,
                                     len(mean_k) + 1, 1).astype('str'),
                           'Feature Index', 'Kurtosis', title, filename)
        ##
        ## Reconstruction Error
        ##
        all_mses, rng = self.reconstruction_error(X_train_scl,
                                                  GaussianRandomProjection)

        title = 'Reconstruction Error (RP) for ' + data_set_name
        name = data_set_name.lower() + '_rp_rec_err'
        filename = './' + self.out_dir + '/' + name + '.png'
        ph.plot_series(rng, [all_mses.mean(0)], [all_mses.std(0)], ['mse'],
                       ['red'], ['o'], title, 'Number of Features',
                       'Reconstruction Error', filename)
class ProjClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_components=1, knn=100):
        self.n_components = n_components
        self.knn = knn

    def fit(self, X, y, sample_weight=None):
        self.classes_ = numpy.array([0, 1])
        self.proj = GaussianRandomProjection(n_components=self.n_components)
        # self.knner = KNeighborsClassifier(n_neighbors=self.knn)
        self.knner = Knn1dClassifier(self.knn)
        self.proj.fit(X)
        X_new = self.proj.transform(X)
        # TODO sample weight!!
        self.knner.fit(X_new, y, sample_weight=sample_weight)
        print('ok')
        return self

    def predict_proba(self, X):
        X_new = self.proj.transform(X)
        return self.knner.predict_proba(X_new)

    def predict(self, X):
        return numpy.argmax(self.predict_proba(X), axis=1)
					champion_items[key['champ']] [(key["patch"], region, tier)] ["second"] [key['second']] += build['value']
					champion_items[key['champ']] [(key["patch"], region, tier)] ["third"] [key['third']] += build['value']

					#update champion games played
					champion_games[key['champ']] [(key['patch'], region, tier)] += build['value']

items_json = []
for champ in champion_builds.keys():

	# perform GaussianRandomProjection
	all_builds = []
	for key in champion_builds[champ]:
		all_builds += champion_builds[champ][key]

	grp = GaussianRandomProjection(2, random_state = 0)
	grp.fit(all_builds)

	for key in champion_builds[champ]:
		builds = champion_builds[champ][key]
		reduction = grp.transform(builds)

		# get top 100 builds
		zipped = zip(list(reduction), build_games[champ][key], build_objects[champ][key])
		sorted_zipped = sorted(zipped, key=lambda x: x[1], reverse=True)
		top_builds = sorted_zipped[0:100]

		builds_json = []
		for i in top_builds:
			x = list(i[0])[0]
			y = list(i[0])[1]
			builds_json.append( {
Beispiel #36
0
ap_region_data = { k:v for (k,v) in region_data.items() if ap_champs[k[2]]}
ap_tier_data = { k:v for (k,v) in tier_data.items() if ap_champs[k[2]]}
ap_cross_data = { k:v for (k,v) in cross_data.items() if ap_champs[k[3]]}
ap_patch_data = { k:v for (k,v) in patch_data.items() if ap_champs[k[1]]}

all_ap_data = ap_region_data.values() + (ap_tier_data.values()) + ap_cross_data.values() + ap_patch_data.values()

#print(ap_champs)

#pca = PCA(n_components=2)
#reduction = pca.fit_transform(ap_champs.values())
#print(pca.explained_variance_ratio_)

grp = GaussianRandomProjection(2, random_state = 0)
grp.fit(all_ap_data)
region_reduction = grp.transform(ap_region_data.values())
tier_reduction = grp.transform(ap_tier_data.values())
cross_reduction = grp.transform(ap_cross_data.values())
patch_reduction = grp.transform(ap_patch_data.values())

region_json_data = []
for i in range(0,len(ap_region_data.keys())):
	key = ap_region_data.keys()[i]
	data = list(region_reduction[i])
	num_games = region_games[key]
	region_json_data.append( {
		"patch":key[0],
		"region":key[1],
		#"tier":key[2],
		"champion":key[2],
def select_features_GaussianRandomProjections(train_X, train_y, test_X, k):
    selector = GaussianRandomProjection(n_components=k, random_state=42)
    selector.fit(train_X)
    train_X = selector.transform(train_X)
    test_X = selector.transform(test_X)
    return train_X, test_X
Beispiel #38
0
class RCAReducer():

    def __init__(self, dataset, dataset_name, num_components=10):
        self.dataset = dataset
        self.dataset_name = dataset_name
        self.labels = dataset.target
        self.scaler = MinMaxScaler()
        self.data = self.scaler.fit_transform(dataset.data)
        self.n_samples, self.n_features = self.data.shape

        self.reducer = GaussianRandomProjection(n_components=num_components)

    def reduce(self):
        self.reducer.fit(self.data)
        self.reduced = self.scaler.fit_transform(self.reducer.transform(self.data))
        return self.reduced

    def benchmark(self, estimator, name, data):
        t0 = time()
        sample_size = 300
        labels = self.labels

        estimator.fit(data)
        print('% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f   %.3f    %.3f'
              % (name, (time() - t0), estimator.inertia_,
                 metrics.homogeneity_score(labels, estimator.labels_),
                 metrics.completeness_score(labels, estimator.labels_),
                 metrics.v_measure_score(labels, estimator.labels_),
                 metrics.adjusted_rand_score(labels, estimator.labels_),
                 metrics.adjusted_mutual_info_score(labels,  estimator.labels_),
                 metrics.silhouette_score(data, estimator.labels_,
                                          metric='euclidean',
                                          sample_size=sample_size)))

    def display_reduced_digits(self):
        sys.stdout = open('out/RCAReduceDigitsOutput.txt', 'w')
        print("RCA Reduction of %s:\n" % self.dataset_name)
        print(40 * '-')
        print("Length of 1 input vector before reduction: %d \n" % len(self.data.tolist()[0]))
        print("Length of 1 input vector after reduction: %d \n" % len(self.reduced.tolist()[0]))
        print("\nProjection axes:\n")
        for i,axis in enumerate(self.reducer.components_.tolist()):
            print("Axis %d:\n" % i, axis)
        self.compute_plane_variance()

    def compute_plane_variance(self):
        points_along_dimension = self.reduced.T
        for i,points in enumerate(points_along_dimension):
            print("\nVariance of dimension %d:" % i)
            print(np.var(points), "\n")

    def display_reduced_iris(self):
        sys.stdout = open('out/RCAReduceIrisOutput.txt', 'w')
        print("RCA Reduction of %s:\n" % self.dataset_name)
        print(40 * '-')
        print("Length of 1 input vector before reduction: %d \n" % len(self.data.tolist()[0]))
        print("Length of 1 input vector after reduction: %d \n" % len(self.reduced.tolist()[0]))
        print("\nProjection axes:\n")
        for i,axis in enumerate(self.reducer.components_.tolist()):
            print("Axis %d:\n" % i, axis)
        self.compute_plane_variance()

    def reduce_crossvalidation_set(self, X_train, X_test):
        self.reducer.fit(X_train)
        reduced_X_train = self.scaler.transform(X_train)
        reduced_X_test = self.scaler.transform(X_test)
        return reduced_X_train, reduced_X_test
# Load 20 newsgroup dataset
# Selec tonly sci.crypt category
# Other categories include 
# sci.med sci.space soc.religion.christian
cat = ['sci.crypt']
data = fetch_20newsgroups(categories=cat)

# Creat a term document matrix with term frequencies as the values frmo the
# above dataset
vectorizer = TfidfVectorizer(use_idf=False)
vector = vectorizer.fit_transform(data.data)

# Perform the projection. In this case we reduce the dimension to 1000
gauss_proj = GaussianRandomProjection(n_components=1000)
gauss_proj.fit(vector)
# Transform the original data to the new space
vector_t = gauss_proj.transform(vector)

# Print transformed vector shape
print vector.shape
print vector_t.shape

# To validate if the transformation has preserved the distance, we calculate the
# old and the new distance between the points
org_dist = euclidean_distances(vector)
red_dist = euclidean_distances(vector_t)
diff_dist = abs(org_dist - red_dist)

# We take the difference between these points and plot them as a heatmap (only
# the first 1000 documents).