Exemple #1
0
def Analysis(vector, K=2):
    arr = (np.array(vector))

    # mean normalization of the data . converting into normal distribution having mean=0 , -0.1<x<0.1
    sc = StandardScaler()
    x = sc.fit_transform(arr)

    # Breaking into principle components
    pca = PCA(n_components=2)
    components = (pca.fit_transform(x))
    # Applying kmeans algorithm for finding centroids

    kmeans = KMeans(n_clusters=K, n_jobs=-1)
    kmeans.fit_transform(components)
    print("labels: ", kmeans.labels_)
    centers = kmeans.cluster_centers_

    # lables are assigned by the algorithm if 2 clusters then lables would be 0 or 1
    lables = kmeans.labels_
    colors = ["r.", "g.", "b.", "y.", "c."]
    colors = colors[:K + 1]

    for i in range(len(components)):
        plt.plot(components[i][0],
                 components[i][1],
                 colors[lables[i]],
                 markersize=10)

    plt.scatter(centers[:, 0],
                centers[:, 1],
                marker="x",
                s=150,
                linewidths=10,
                zorder=15)
    plt.xlabel("1st Principle Component")
    plt.ylabel("2nd Principle Component")
    title = "Styles Clusters"
    plt.title(title)
    plt.savefig("Results" + ".png")
    #plt.show()
    return kmeans.labels_
Exemple #2
0
#statistical analysis
data.append(1)

print("the attribute array generated is \n",data)

#random forest algorithm

df = pd.read_csv("D:\\Codes\\Sem 7\\PMMS\\Dataset\\Training_Dataset.csv")

attributes = df.iloc[:,0:30].values
result = df.Result

attributes_train,attributes_test,result_train,result_test = train_test_split(attributes,result,test_size=0.5,random_state=0)

sc = StandardScaler()
attributes_train = sc.fit_transform(attributes_train)
attribites_test = sc.transform(attributes_test)

regressor = RandomForestRegressor(n_estimators=1000,random_state=42)
regressor.fit(attributes_train,result_train)
resul_pred = regressor.predict(attributes_test)

new_input = []
new_input.append(data)
array = np.array(new_input)
array.reshape(-1,1)
new_output = regressor.predict(array)
print(new_output)

if(new_output[0]<0):
    print ("The website is a phishing website")
Exemple #3
0
shuffle_split = StratifiedShuffleSplit(n_splits=1,
                                       test_size=0.3,
                                       train_size=0.7)
Wi = np.array([tt for tt in valori_ts])
Yi = np.array([c for c in classi])

for i in range(0, NUM_FEATURES_TOT):

    Xi = np.array([f[0:i + 1] for f in valori_features])

    for train_index, test_index in shuffle_split.split(Xi, Yi):
        X_train, X_test = Xi[train_index], Xi[test_index]
        Y_train, Y_test = Yi[train_index], Yi[test_index]

    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    accuracyRF = accuracyRandomForest(X_train, X_test, Y_train, Y_test)
    accuracyDT = accuracyDecisionTree(X_train, X_test, Y_train, Y_test)
    accuracyKN = accuracyKNN(X_train, X_test, Y_train, Y_test)
    accuracySVM = accuracySVC(X_train, X_test, Y_train, Y_test)
    accuracyNB = accuracyNaiveBayes(X_train, X_test, Y_train, Y_test)
    accuracyADA = accuracyAdaBoost(X_train, X_test, Y_train, Y_test)

    acc_values = [
        accuracyRF, accuracyDT, accuracyKN, accuracySVM, accuracyNB,
        accuracyADA
    ]
    accuracy_finali.append(acc_values)
Exemple #4
0
    ##################################
    for n in range(1, len(df.columns) + 1):
        ica = FastICA(n_components=n)
        X_new = ica.fit_transform(X)  # Reconstruct signals
        acc, clusters = run_clustering(X_new)
        print "average EM score after X modified with ICA", n, "components, clusters =", clusters, "silhouette score =", acc

        
if dralg == 'rp':
    #######################################################
    ######## KMeans after Sparse Random Projection ########
    #######################################################
    for n in range(1, len(df.columns) + 1):
        # create the random projection
        sp = SparseRandomProjection(n_components = n)
        X_new = sp.fit_transform(X)
        acc, clusters = run_clustering(X_new)
        print "average EM score after X modified with Random Projectsion", n, "components, clusters =", clusters, "silhouette score =", acc

        
if dralg == 'lda':
    ##################################
    ######## KMeans after LDA ########
    ##################################
    for n in range(1, len(df.columns) + 1):
        for solver in ['svd', 'eigen']:
        # create the random projection
            lda = LDA(n_components = n, solver = solver)
            X_new = lda.fit_transform(X, y)
            acc, clusters = run_clustering(X_new)
            print "average EM score after X modified with LDA", n, "components, clusters =", clusters, "silhouette score =", acc