def ica(self, n_comp, data=None):
        if data is None:
            data = self.train

        # Initializing the PCA instance with a percentage is telling the algorithm how much variance we wish to keep within the
        # dataset. Dropping below 80% we would be losing a lot of data
        ica_features = ICA(n_comp)
        ica_features.fit(data)
        self.ica_train_data = ica_features.transform(data)
        self.ICA = ica_features

        ica_test = ICA(n_comp)
        ica_test.fit(self.test)
        self.ica_test_data = ica_test.transform(self.test)
Ejemplo n.º 2
0
def ica(tx, ty, rx, ry, dataset):
    reduced_data = ICA(n_components=2).fit_transform(tx)
    em(tx, ty, rx, ry, reduced_data, add="", times=4, dataset=dataset, alg="ICA")
    x,y = tx.shape
    for i in range(0, y):
        print(kurtosis(tx[:,i], fisher=False))
    compressor = ICA(n_components = tx[1].size/2, max_iter=10000, tol=0.001)  # for some people, whiten needs to be off
    compressor.fit(tx, y=ty)
    newtx = compressor.transform(tx)
    newrx = compressor.transform(rx)
    # km(newtx, ty, newrx, ry, [], add="", times=7, dataset=dataset, alg="ICA")
    # Store results of PCA in a data frame

    pca = ICA(n_components=2)
    pca.fit(tx)
    result=pd.DataFrame(pca.transform(tx), columns=['PCA%i' % i for i in range(2)])
    my_color = pd.Series(ty).astype('category').cat.codes
    fig = plt.figure()
    # ax = fig.add_subplot(111, projection='2d')
    ax = fig.add_subplot(111)
    ax.scatter(result['PCA0'], result['PCA1'], c=my_color, cmap="Dark2_r", s=60)
    plt.show()
    result=pd.DataFrame(compressor.transform(tx), columns=['ICA%i' % i for i in range(3)])
    my_color = pd.Series(ty).astype('category').cat.codes
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(result['ICA0'], result['ICA1'], result['ICA2'], c=my_color, cmap="Dark2_r", s=60)

    xAxisLine = ((min(result['ICA0']), max(result['ICA0'])), (0, 0), (0,0))
    ax.plot(xAxisLine[0], xAxisLine[1], xAxisLine[2], 'r')
    yAxisLine = ((0, 0), (min(result['ICA1']), max(result['ICA1'])), (0,0))
    ax.plot(yAxisLine[0], yAxisLine[1], yAxisLine[2], 'r')
    zAxisLine = ((0, 0), (0,0), (min(result['ICA2']), max(result['ICA2'])))
    ax.plot(zAxisLine[0], zAxisLine[1], zAxisLine[2], 'r')

    ax.set_xlabel("IC1")
    ax.set_ylabel("IC2")
    ax.set_zlabel("IC3")
    ax.set_title("ICA on the Phishing data set")
    plt.show()
    print("-----------")
    x,y = newtx.shape
    for i in range(0, y):
        print(kurtosis(newtx[:,i], fisher=False))
    em(newtx, ty, newrx, ry, add="wICAtr", times=21, dataset=dataset, alg="ICA")
    em(newtx, ty, newrx, ry, ICA(n_components=2).fit_transform(tx), add="wICAtr", times=9, dataset=dataset, alg = "Ica")
    nn(newtx, ty, newrx, ry, add="wICAtr")
    myNN(newtx, ty, newrx, ry, "ica")
Ejemplo n.º 3
0
def reduce_dim(data, labels, n_components, **kwargs):
    ''' performs dimensionality reduction'''
    if kwargs['method'] == 'pca':

        matrix = data
        #transformer = Normalizer()
        #transformer.fit(matrix)

        pca = PCA(n_components=n_components, svd_solver='full')
        pca.fit(matrix)
        #return pca.fit_transform(matrix)
        #pass
        return pca.transform(matrix)

    if kwargs['method'] == 'lda':
        transformer = Normalizer()

        label = labels
        matrix = data
        transformer.fit(matrix)
        lda = LDA(n_components=n_components)
        lda.fit(transformer.transform(matrix), label)
        return lda.transform(matrix)
    #pass

    if kwargs['method'] == 'ica':

        matrix = data
        ica = ICA(n_components=n_components, random_state=0)
        return ica.fit_transform(matrix)
Ejemplo n.º 4
0
def ica_analysis(X, y, plot_path):

    feat_cols = list(X)
    df = X  #pd.DataFrame(X,columns=feat_cols)
    df['y'] = y
    df['label'] = df['y'].apply(lambda i: str(i))

    X, y = None, None
    print('Size of the dataframe: {}'.format(df.shape))
    # For reproducability of the results
    #np.random.seed(42)
    #rndperm = np.random.permutation(df.shape[0])
    label_list = df['y'].tolist()
    label_list = list(map(int, label_list))
    print(label_list)
    ica = ICA(n_components=3)
    ica_result = ica.fit_transform(df[feat_cols].values)
    df['ica-one'] = ica_result[:, 0]
    df['ica-two'] = ica_result[:, 1]
    df['ica-three'] = ica_result[:, 2]
    ax = plt.figure(figsize=(16, 10)).gca(projection='3d')
    ax.scatter(xs=df["ica-one"],
               ys=df["ica-two"],
               zs=df["ica-three"],
               c=label_list,
               cmap='tab10')
    ax.set_xlabel('ica-one')
    ax.set_ylabel('ica-two')
    ax.set_zlabel('ica-three')
    plt.savefig(plot_path + '/ica_3D')
def reduce_dim(data,labels,n_components,**kwargs):
    ''' performs dimensionality reduction'''
    if kwargs['method'] == 'pca':
        

        matrix = data
        pca = PCA(n_components=n_components)
        pca.fit(matrix)
        #return pca.fit_transform(matrix)
        #pass
        return pca.transform(matrix)
        
    if kwargs['method'] == 'lda':
        
        label = labels
        matrix = data
        lda = LDA(n_components = n_components)
        lda.fit(matrix,label)
        LDA(n_components= n_components, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)
        #return lda.fit_transform(matrix,label)
        return lda.transform(matrix)


        pass    


    if kwargs['method'] == 'ica':
    	
    	matrix = data
    	ica = ICA(n_components = n_components,random_state = 0)
    	return ica.fit_transform(matrix)
Ejemplo n.º 6
0
def split_ica(combined_data, label_1, label_2):

    ica = ICA()
    result = ica.fit(combined_data).transform(combined_data)

    plt.plot(result[0:100, 0],
             result[0:100, 1],
             'o',
             markersize=7,
             color='blue',
             alpha=0.5,
             label=label_1)
    plt.plot(result[100:200, 0],
             result[100:200, 1],
             '^',
             markersize=7,
             color='red',
             alpha=0.5,
             label=label_2)

    plt.xlabel('x_values')
    plt.ylabel('y_values')
    plt.xlim([-0.3, 0.3])
    plt.ylim([-0.3, 0.3])
    plt.legend()
    #plt.title('Transformed samples with class labels from matplotlib.mlab.PCA()')

    plt.show()

    return result
Ejemplo n.º 7
0
def ica(tx, ty, rx, ry):
    compressor = ICA(whiten=True)  # for some people, whiten needs to be off
    newtx = compressor.fit_transform(tx)
    newrx = compressor.fit_transform(rx)
    em(newtx, ty, newrx, ry, add="wICAtr", times=10)
    km(newtx, ty, newrx, ry, add="wICAtr", times=10)
    nn(newtx, ty, newrx, ry, add="wICAtr")
Ejemplo n.º 8
0
def caller(tx, ty, rx, ry):
    nums = [4,8,12,16]
    for n in nums:
        print("PCA")
        print(n)
        compressor = PCA(n_components = n)
        compressor.fit(tx, y=ty)
        newtx = compressor.transform(tx)
        newrx = compressor.transform(rx)
        nnTable(newtx, ty, newrx, ry, alg="PCA")
    for n in nums:
        print("ICA")
        print(n)
        compressor = ICA(n_components = n)
        compressor.fit(tx, y=ty)
        newtx = compressor.transform(tx)
        newrx = compressor.transform(rx)
        nnTable(newtx, ty, newrx, ry, alg="ICA")
    for n in nums:
        print("RandProj")
        print(n)
        compressor = RandomProjection(n)
        compressor.fit(tx, y=ty)
        newtx = compressor.transform(tx)
        newrx = compressor.transform(rx)
        nnTable(newtx, ty, newrx, ry, alg="PCA")
    for n in nums:
        print("kbest")
        print(n)
        compressor = best(k=n)
        compressor.fit(tx, y=ty)
        newtx = compressor.transform(tx)
        newrx = compressor.transform(rx)
        nnTable(newtx, ty, newrx, ry, alg="PCA")
Ejemplo n.º 9
0
def graphCallerNN(tx, ty, rx, ry):
    n = tx[1].size/2
    compressor = PCA(n_components = n)
    compressor.fit(tx, y=ty)
    newtx = compressor.transform(tx)
    newrx = compressor.transform(rx)
    newtx = oneem(newtx, ty, newrx, ry)
    myNN(newtx, ty, newrx, ry, "EM-PCA")
    # nnTable(newtx, ty, newrx, ry, alg="EM-PCA")

    compressor = ICA(n_components = n)
    compressor.fit(tx, y=ty)
    newtx = compressor.transform(tx)
    newrx = compressor.transform(rx)
    newtx = oneem(newtx, ty, newrx, ry)
    nnTable(newtx, ty, newrx, ry, alg="EM-ICA")
    myNN(newtx, ty, newrx, ry, "EM-Ica")

    compressor = RandomProjection(n)
    compressor.fit(tx, y=ty)
    newtx = compressor.transform(tx)
    newrx = compressor.transform(rx)
    newtx = oneem(newtx, ty, newrx, ry)
    nnTable(newtx, ty, newrx, ry, alg="EM-RP")
    myNN(newtx, ty, newrx, ry, "EM-RP")

    compressor = best(k=n)
    compressor.fit(tx, y=ty)
    newtx = compressor.transform(tx)
    newrx = compressor.transform(rx)
    newtx = oneem(newtx, ty, newrx, ry)
    nnTable(newtx, ty, newrx, ry, alg="EM-KB")
    myNN(newtx, ty, newrx, ry, "EM-KB")
Ejemplo n.º 10
0
def ica_orth(A, r=None):
    if r is None:
        r = pca_rank_est(A)

    I = ICA(n_components=r).fit(A.T)
    P = I.transform(A.T)
    K = A @ P
    return K
Ejemplo n.º 11
0
Archivo: plot.py Proyecto: tedil/lyner
 def apply_ica():
     cons = np.mat(np.zeros((data.shape[1], data.shape[1])))
     for components in num_components:
         pca = ICA(n_components=components)
         X_r = pca.fit_transform(data.T)
         cons += connectivity(data, X_r.T)
     C = 1 - (cons / len(num_components))
     return C, X_r
Ejemplo n.º 12
0
def credit_risk_data():
    data_X = credit_data.drop([
        'credit_amount', 'other_parties', 'purpose', 'own_telephone',
        'foreign_worker'
    ],
                              axis=1)
    data_y = credit_data[['class']]

    features_to_encode = [
        'personal_status', 'checking_status', 'credit_history',
        'savings_status', 'employment', 'property_magnitude',
        'other_payment_plans', 'housing', 'job', 'class'
    ]
    enc = my_encoder()
    enc.fit(data_X, features_to_encode)
    X_train = enc.transform(data_X)
    # X_test = enc.transform(X_test)

    run_PCA(X_train, "Credit Data")
    run_ICA(X_train, "Credit Data")
    run_RCA(X_train, "Credit Data")

    pca_credit = PCA(n_components=3, random_state=5).fit_transform(X_train)
    ica_credit = ICA(n_components=2, random_state=5).fit_transform(X_train)
    rca_credit = RCA(n_components=29, random_state=5).fit_transform(X_train)

    run_kmeans(pca_credit, X_train, "KMEANS")
    run_kmeans(ica_credit, X_train, "KMEANS")
    run_kmeans(rca_credit, X_train, "KMEANS")

    run_EM(pca_credit, X_train, 'PCA Credit Risk Data')
    run_EM(ica_credit, X_train, 'ICA Credit Risk Data')
    run_EM(rca_credit, X_train, 'RCA Credit Risk Data')

    km = KMeans(n_clusters=3, random_state=0)
    y_km = km.fit_predict(X_train)

    score = silhouette_score(X_train, km.labels_, metric='euclidean')
    print('Silhouetter Score: %.3f' % score)

    # kmeans_silhoutte_analysis(X_train)

    elbow_function(X_train)
    run_kmeans(X_train, y_km, "KMEANS")

    em = EM(n_components=2, covariance_type='spherical', random_state=100)
    y_em = em.fit_predict(X_train)
    plot_EM(em, X_train)
    run_EM(X_train, y_em, "EM")
    # evaluate_EM(em, X_train, y_em)

    X_train, X_test, y_train, y_test = train_test_split(data_X,
                                                        data_y,
                                                        test_size=0.2,
                                                        random_state=0)
Ejemplo n.º 13
0
def ica(tx, ty, rx, ry):
    compressor = ICA(whiten=True)  # for some people, whiten needs to be off
    newtx = compressor.fit_transform(tx)
    newrx = compressor.fit_transform(rx)
    kurtS = kurtosis(compressor.components_, axis = 1)
    kurtIdx = np.argmax(kurtS)
    print kurtS
    print 'Kurtosis: ' +str(kurtS[kurtIdx])
    # em(newtx, ty, newrx, ry, add="wICAtr", times=10)
    # km(newtx, ty, newrx, ry, add="wICAtr", times=10)
    nn(newtx, ty, newrx, ry, add="wICA")    
Ejemplo n.º 14
0
def transform(X, factors, get_model, method, y=None):
    if method == "raw" or method is None:
        return X
    if not factors or factors == "full":
        factors = np.prod(X.shape[1:])
        if method == "lda":
            factors -= 1

    if not isinstance(method, str):
        raise RuntimeError("Please supply a method name (pca, lda, ica, cca, pls)")
    method = method.lower()

    if method == "pca":
        from sklearn.decomposition import PCA
        model = PCA(n_components=factors, whiten=True)
    elif method == "lda":
        from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
        model = LDA(n_components=factors)
    elif method == "ica":
        from sklearn.decomposition import FastICA as ICA
        model = ICA(n_components=factors)
    elif method == "cca":
        from sklearn.cross_decomposition import CCA
        model = CCA(n_components=factors)
    elif method == "pls":
        from sklearn.cross_decomposition import PLSRegression as PLS
        model = PLS(n_components=factors)
        if str(y.dtype)[:3] not in ("flo", "int"):
            y = dummycode(y, get_translator=False)
    else:
        raise ValueError("Method {} unrecognized!".format(method))

    X = rtm(X)
    if method in ("lda", "cca", "pls"):
        if y is None:
            raise RuntimeError("y must be supplied for {}!".format(method))
        latent = model.fit_transform(X, y)
    else:
        if y is not None:
            warnings.warn("y supplied for {}. Ignoring!".format(method))
        latent = model.fit_transform(X)

    if isinstance(latent, tuple):
        latent = latent[0]
    if get_model:
        return latent, model
    else:
        return latent
    def find_ica(self, n_comp, data=None):
        if data is None:
            data = self.train
        ica_kutosis = pd.DataFrame()
        ss = ShuffleSplit(n_splits=10, test_size=0.2)
        for x in range(2, n_comp + 1):
            ica_kutosis_train = 0
            ica_kutosis_cv = 0
            for train, cv in ss.split(self.train):
                ica_features = ICA(n_components=x,
                                   algorithm='parallel',
                                   max_iter=500)
                ica_features.fit(data.iloc[train])
                ica_train_data = ica_features.transform(data.iloc[train])
                ica_kutosis_train += np.sum(np.abs(kurtosis(ica_train_data)))

                ica_features.fit(data.iloc[cv])
                ica_cv_data = ica_features.transform(data.iloc[cv])
                ica_kutosis_cv += np.sum(np.abs(kurtosis(ica_cv_data)))

            ica_kutosis_train = ica_kutosis_train / 10
            ica_kutosis_cv = ica_kutosis_cv / 10
            ica_kutosis = ica_kutosis.append(
                {
                    "K": x,
                    "Kurtosis_Train": ica_kutosis_train,
                    "Kurtosis_CV": ica_kutosis_cv
                },
                ignore_index=True)

        print ica_kutosis
        plt.plot(ica_kutosis["K"],
                 ica_kutosis["Kurtosis_Train"],
                 color='red',
                 label="Train")
        plt.plot(ica_kutosis["K"],
                 ica_kutosis["Kurtosis_CV"],
                 color='blue',
                 label="CV")
        plt.xlabel("N Components")
        plt.ylabel("Absolute Sum Kurtosis")
        plt.title("ICA: N Components vs Kurtosis")
        plt.grid()
        plt.legend()
        plt.show()
Ejemplo n.º 16
0
def ICA_experiment(X, y, title, folder=""):
    n_components_range = list(np.arange(2, X.shape[1], 1))
    ica = ICA(random_state=200)
    kurtosis_scores = []

    for n in n_components_range:
        ica.set_params(n_components=n)
        ice_score = ica.fit_transform(X)
        ice_score = pd.DataFrame(ice_score)
        ice_score = ice_score.kurt(axis=0)
        kurtosis_scores.append(ice_score.abs().mean())

    plt.figure()
    plt.title("ICA Kurtosis: " + title)
    plt.xlabel("Independent Components")
    plt.ylabel("Avg Kurtosis Across IC")
    plt.plot(n_components_range, kurtosis_scores)
    plt.savefig(folder + '/ICA.png')
    plt.close()
Ejemplo n.º 17
0
def run_ICA(X, title):
    dims = list(np.arange(2, (X.shape[1] - 1), 3))
    dims.append(X.shape[1])
    ica = ICA(random_state=5)
    kurt = []

    for dim in dims:
        ica.set_params(n_components=dim)
        tmp = ica.fit_transform(X)
        tmp = pd.DataFrame(tmp)
        tmp = tmp.kurt(axis=0)
        kurt.append(tmp.abs().mean())

    plt.figure()
    plt.title("ICA Kurtosis: " + title)
    plt.xlabel("Independent Components")
    plt.ylabel("Avg Kurtosis Across IC")
    plt.plot(dims, kurt, 'b-')
    plt.grid(False)
    plt.show()
Ejemplo n.º 18
0
def do_ica(data, class_label):

    # ica
    ica = ICA()
    result = ica.fit(data).transform(data)

    plt.plot(result[:, 0],
             result[:, 1],
             'o',
             markersize=7,
             color='blue',
             alpha=0.5,
             label=class_label)

    plt.xlabel('x_values')
    plt.ylabel('y_values')
    plt.xlim([-0.5, 0.5])
    plt.ylim([-0.5, 0.5])
    plt.legend()
    plt.title('Transformed samples versus original data')

    plt.show()
Ejemplo n.º 19
0
def run_ICA(X, y, plot_path):

    dims = list(np.arange(2, (X.shape[1] - 1), 3))
    #dims = list(np.arange(2,80,3))
    dims.append(X.shape[1])
    ica = ICA(random_state=1, max_iter=10)
    kurt = []

    for dim in dims:
        print(dim)
        ica.set_params(n_components=dim)
        tmp = ica.fit_transform(X)
        tmp = pd.DataFrame(tmp)
        tmp = tmp.kurt(axis=0)
        kurt.append(tmp.abs().mean())

    plt.figure()
    plt.title("ICA Kurtosis")
    plt.xlabel("Independent Components")
    plt.ylabel("Avg Kurtosis Across IC")
    plt.plot(dims, kurt, 'b-')
    plt.grid(False)
    plt.savefig(plot_path + '/ICA_DR')
Ejemplo n.º 20
0
def run_ICA(X, y, title):

    dims = list(np.arange(2, (X.shape[1] - 1), 3))
    dims.append(X.shape[1])
    ica = ICA(random_state=randomSeed, whiten=True)
    kurt = []

    for dim in dims:
        ica.set_params(n_components=dim)
        tmp = ica.fit_transform(X)
        tmp = pd.DataFrame(tmp)
        tmp = tmp.kurt(axis=0)
        kurt.append(tmp.abs().mean())

    plt.figure()
    plt.title("ICA Kurtosis: " + title)
    plt.xlabel("Independent Components")
    plt.ylabel("Avg Kurtosis Across IC")
    plt.plot(dims, kurt, 'b-')
    plt.grid(False)
    d = plotsdir + "/" + title
    if not os.path.exists(d):
        os.makedirs(d)
    plt.savefig(d + "/ICA Kurtosis.png")
Ejemplo n.º 21
0
X = []
y = []

for l in fin:
    X.append(l.split(",")[:-2])
    y.append(int(l.split(",")[-2]))

X = np.array(X,dtype=np.float32)

X = np.array(X,dtype=np.float32)
scaler = StandardScaler()
scaler.fit(X)

X = scaler.transform(X)

ica = ICA(n_components=4)
ica.fit(X)

X = ica.transform(X)


#plot_bic(X)

gmm = mixture.GaussianMixture(n_components=2,covariance_type='tied')
gmm.fit(X)



newX = []

for pt in X:
Ejemplo n.º 22
0

run_PCA(diabetes_X,diabetes_Y,"Diabetes Data")
run_ICA(diabetes_X,diabetes_Y,"Diabetes Data")
run_RCA(diabetes_X,diabetes_Y,"Diabetes Data")


X_train, X_test, y_train, y_test = train_test_split(np.array(creditX),np.array(creditY), test_size=0.2)
run_PCA(X_train,creditY,"Credit Data")
run_ICA(X_train,creditY,"Credit Data")
run_RCA(X_train,creditY,"Credit Data")


imp_diabetes, topcols_diabetes = run_RFC(diabetes_X,diabetes_Y,df_diabetes)
pca_diabetes = PCA(n_components=3,random_state=5).fit_transform(diabetes_X)
ica_diabetes = ICA(n_components=5,random_state=5).fit_transform(diabetes_X)
rca_diabetes = ICA(n_components=6,random_state=5).fit_transform(diabetes_X)
rfc_diabetes = df_diabetes[topcols_diabetes]
rfc_diabetes = np.array(rfc_diabetes.values,dtype='int64')



run_kmeans(pca_diabetes,diabetes_Y,'PCA Diabetes Data')
run_kmeans(ica_diabetes,diabetes_Y,'ICA Diabetes Data')
run_kmeans(rca_diabetes,diabetes_Y,'RCA Diabetes Data')
run_kmeans(rfc_diabetes,diabetes_Y,'RFC Diabetes Data')


run_EM(pca_diabetes,diabetes_Y,'PCA Diabetes Data')
run_EM(ica_diabetes,diabetes_Y,'ICA Diabetes Data')
run_EM(rca_diabetes,diabetes_Y,'RCA Diabetes Data')
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import FastICA as ICA
import sklearn.model_selection as k
r=pd.read_csv('bank_contacts.csv')
x=r.drop('credit_application',axis=1)
y=r['credit_application']
train_x,test_x,train_y,test_y=k.train_test_split(x,y,test_size=0.2,random_state=42)
sc=StandardScaler()
train_x=sc.fit_transform(train_x)
test_x=sc.transform(test_x)
ica=ICA(n_components=4,random_state=42)
train_x=ica.fit_transform(train_x,train_y)
test_x=ica.transform(test_x)
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(max_depth=2, random_state=0)

classifier.fit(train_x,train_y)
pred = classifier.predict(test_x)
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

print(confusion_matrix(test_y,pred))
print('Accuracy:',accuracy_score(test_y,pred))
plt.scatter(pred,test_x[:,0],marker='o')
plt.scatter(pred,test_x[:,1],marker='o')
plt.scatter(pred,test_x[:,2],marker='o')
plt.show()
Ejemplo n.º 24
0
        ica = ICA(n_components = i)
        Z = ica.fit_transform(X)
        kur[i-1] = np.mean(kurtosis(Z))
        scr[i-1] = np.amin(kurtosis(Z))



    plt.plot(ks,kur,ks,scr)
    plt.title("Adult Income Data - ICA")
    plt.xlabel("# of components")
    plt.ylabel("Score")
    plt.legend(["kurtosis-avg",'kurtosis-min axis'])
    plt.show()
    """

    pca = ICA(n_components=6)
    Z = pca.fit_transform(X)
    for k in ks:
        clust = KMeans(n_clusters=k).fit(Z)
        W = clust.predict(Z)
        ss[k - 1] = clust.inertia_

    plt.plot(ks, ss)
    plt.title("Wine Quality Data - KM")
    plt.xlabel("# of clusters")
    plt.ylabel("Sum of Squares")
    plt.legend(["kmeans"])
    plt.show()

    for k in ks:
        clust = GaussianMixture(n_components=k).fit(Z)
Ejemplo n.º 25
0
# play(song*3)

plt.plot(np.arange(1000)**2)

signal1 = np.sin(np.arange(256) * 0.15)
signal2 = np.sin(np.arange(256) * 0.06)

combinations = np.dot(np.random.random((200, 2)), np.vstack(
    (signal1, signal2)))
combinations += np.random.random(combinations.shape) * .1

PCA_t = PCA(n_components=10)
PCA_t.fit(combinations)
ideal_n = np.where(np.cumsum(PCA_t.explained_variance_ratio_) > .99)[0][0] + 1

ICA_t = ICA(n_components=ideal_n, tol=1e-8)
scores = ICA_t.fit_transform(combinations)
mixings = ICA_t.mixing_
loadings = ICA_t.components_

# tops = mixings

plt.close()
fig, ax = plt.subplots(2, 2)
ax[0, 0].plot(signal1)
ax[0, 1].plot(signal2)
for i in range(combinations.shape[0]):
    ax[1, 0].plot(combinations[i, :])

for i in range(loadings.shape[0]):
    ax[1, 1].plot(loadings[i, :])
Ejemplo n.º 26
0
 def separateComponents(self):
     self.ica = ICA(n_components=len(self.signals[0]), max_iter=300)
     self.components = np.matrix.transpose(
         self.ica.fit_transform(self.signals))
     self.amUnits = [np.amax(self.components), np.amin(self.components)]
     self.selectedComponents = list(range(len(self.components)))
Ejemplo n.º 27
0
def get_ica_data(X, components):
    return ICA(n_components=components,
               random_state=randomSeed).fit_transform(X)
Ejemplo n.º 28
0
def main():
    df = pd.read_csv("../Dataset/winequality-white.csv", delimiter=";")
    seed = 200
    np.random.seed(seed)

    lowquality = df.loc[df['quality'] <= 6].index
    highquality = df.loc[df['quality'] > 6].index
    df.iloc[lowquality, df.columns.get_loc('quality')] = 0
    df.iloc[highquality, df.columns.get_loc('quality')] = 1

    X = np.array(df.iloc[:, 0:-1])
    wine_Y = np.array(df.iloc[:, -1])

    standardScalerX = StandardScaler()
    wine_x = standardScalerX.fit_transform(X)

    pca_wine = PCA(n_components=7, random_state=seed).fit_transform(wine_x)
    ica_wine = ICA(n_components=9, random_state=seed).fit_transform(wine_x)
    rca_wine = RCA(n_components=8, random_state=seed).fit_transform(wine_x)
    imp_wine, top_columns_wine = run_RFC(wine_x, wine_Y, df)

    rfc_wine = df[top_columns_wine]
    rfc_wine = np.array(rfc_wine.values, dtype='int64')

    X_train, X_test, y_train, y_test = train_test_split(np.array(wine_x),
                                                        np.array(wine_Y),
                                                        test_size=0.30)
    learner = MLPClassifier(hidden_layer_sizes=(22, ),
                            activation='relu',
                            learning_rate_init=0.0051,
                            random_state=seed)

    evaluate(learner, X_train, X_test, y_train, y_test, title="FullDataset")

    X_train, X_test, y_train, y_test = train_test_split(np.array(pca_wine),
                                                        np.array(wine_Y),
                                                        test_size=0.30)
    learner = MLPClassifier(hidden_layer_sizes=(22, ),
                            activation='relu',
                            learning_rate_init=0.0051,
                            random_state=seed)

    evaluate(learner, X_train, X_test, y_train, y_test, title="PCA")

    X_train, X_test, y_train, y_test = train_test_split(np.array(ica_wine),
                                                        np.array(wine_Y),
                                                        test_size=0.30)
    learner = MLPClassifier(hidden_layer_sizes=(22, ),
                            activation='relu',
                            learning_rate_init=0.0051,
                            random_state=seed)

    evaluate(learner, X_train, X_test, y_train, y_test, title="ICA")

    X_train, X_test, y_train, y_test = train_test_split(np.array(rca_wine),
                                                        np.array(wine_Y),
                                                        test_size=0.30)
    learner = MLPClassifier(hidden_layer_sizes=(22, ),
                            activation='relu',
                            learning_rate_init=0.0051,
                            random_state=seed)

    evaluate(learner, X_train, X_test, y_train, y_test, title="RP")

    X_train, X_test, y_train, y_test = train_test_split(np.array(rfc_wine),
                                                        np.array(wine_Y),
                                                        test_size=0.30)
    learner = MLPClassifier(hidden_layer_sizes=(22, ),
                            activation='relu',
                            learning_rate_init=0.0051,
                            random_state=seed)

    evaluate(learner, X_train, X_test, y_train, y_test, title="RFC")
ttX,ttY,bankX,bankY = import_data()
X_train, X_test, y_train, y_test = train_test_split(np.array(bankX),np.array(bankY), test_size=0.2)
run_PCA(X_train,y_train,"Banking Data")
#run_ICA(X_train,y_train,"Banking Data")
#run_RCA(bankX,bankY,"Banking Data")
#imp_bank, topcols_bank = run_RFC(X_train,y_train,df_bank)
#imp_bank


# In[15]:


ttX,ttY,bankX,bankY = import_data()
imp_tt, topcols_tt = run_RFC(ttX,ttY,df_tt)
pca_tt = PCA(n_components=10,random_state=5).fit_transform(ttX)
ica_tt = ICA(n_components=38,random_state=5).fit_transform(ttX)
rca_tt = ICA(n_components=29,random_state=5).fit_transform(ttX)
rfc_tt = df_tt[topcols_tt]
rfc_tt = np.array(rfc_tt.values,dtype='int64')


# In[26]:


print('pca')
run_kmeans(pca_tt,ttY,'PCA titanic Data')
print('ica')
run_kmeans(ica_tt,ttY,'ICA titanic Data')
print('rca')
run_kmeans(rca_tt,ttY,'RCA titanic Data')
print('rfc')
run_PCA(loanX, loanY, "Loan Data")
run_ICA(loanX, loanY, "Loan Data")
run_RCA(loanX, loanY, "Loan Data")
imp_loan, topcols_loan = run_RFC(loanX, loanY, df_loan)

loanX, loanY, telescopeX, telescopeY = import_data()
run_PCA(telescopeX, telescopeY, "Telescope Data")
run_ICA(telescopeX, telescopeY, "Telescope Data")
run_RCA(telescopeX, telescopeY, "Telescope Data")
imp_telescope, topcols_telescope = run_RFC(telescopeX, telescopeY,
                                           df_telescope)

loanX, loanY, telescopeX, telescopeY = import_data()
imp_loan, topcols_loan = run_RFC(loanX, loanY, df_loan)
pca_loan = PCA(n_components=4, random_state=5).fit_transform(loanX)
ica_loan = ICA(n_components=8, random_state=5).fit_transform(loanX)
rca_loan = ICA(n_components=6, random_state=5).fit_transform(loanX)
rfc_loan = df_loan[topcols_loan]
rfc_loan = np.array(rfc_loan.values, dtype='int64')

run_kmeans(pca_loan, loanY, 'PCA loan Data')
run_kmeans(ica_loan, loanY, 'ICA loan Data')
run_kmeans(rca_loan, loanY, 'RCA loan Data')
run_kmeans(rfc_loan, loanY, 'RFC loan Data')

evaluate_kmeans(KMeans(n_clusters=10, n_init=10, random_state=100, n_jobs=-1),
                pca_loan, loanY)
evaluate_kmeans(KMeans(n_clusters=10, n_init=10, random_state=100, n_jobs=-1),
                ica_loan, loanY)
evaluate_kmeans(KMeans(n_clusters=5, n_init=10, random_state=100, n_jobs=-1),
                rca_loan, loanY)