def Analysis(vector, K=2): arr = (np.array(vector)) # mean normalization of the data . converting into normal distribution having mean=0 , -0.1<x<0.1 sc = StandardScaler() x = sc.fit_transform(arr) # Breaking into principle components pca = PCA(n_components=2) components = (pca.fit_transform(x)) # Applying kmeans algorithm for finding centroids kmeans = KMeans(n_clusters=K, n_jobs=-1) kmeans.fit_transform(components) print("labels: ", kmeans.labels_) centers = kmeans.cluster_centers_ # lables are assigned by the algorithm if 2 clusters then lables would be 0 or 1 lables = kmeans.labels_ colors = ["r.", "g.", "b.", "y.", "c."] colors = colors[:K + 1] for i in range(len(components)): plt.plot(components[i][0], components[i][1], colors[lables[i]], markersize=10) plt.scatter(centers[:, 0], centers[:, 1], marker="x", s=150, linewidths=10, zorder=15) plt.xlabel("1st Principle Component") plt.ylabel("2nd Principle Component") title = "Styles Clusters" plt.title(title) plt.savefig("Results" + ".png") #plt.show() return kmeans.labels_
#statistical analysis data.append(1) print("the attribute array generated is \n",data) #random forest algorithm df = pd.read_csv("D:\\Codes\\Sem 7\\PMMS\\Dataset\\Training_Dataset.csv") attributes = df.iloc[:,0:30].values result = df.Result attributes_train,attributes_test,result_train,result_test = train_test_split(attributes,result,test_size=0.5,random_state=0) sc = StandardScaler() attributes_train = sc.fit_transform(attributes_train) attribites_test = sc.transform(attributes_test) regressor = RandomForestRegressor(n_estimators=1000,random_state=42) regressor.fit(attributes_train,result_train) resul_pred = regressor.predict(attributes_test) new_input = [] new_input.append(data) array = np.array(new_input) array.reshape(-1,1) new_output = regressor.predict(array) print(new_output) if(new_output[0]<0): print ("The website is a phishing website")
shuffle_split = StratifiedShuffleSplit(n_splits=1, test_size=0.3, train_size=0.7) Wi = np.array([tt for tt in valori_ts]) Yi = np.array([c for c in classi]) for i in range(0, NUM_FEATURES_TOT): Xi = np.array([f[0:i + 1] for f in valori_features]) for train_index, test_index in shuffle_split.split(Xi, Yi): X_train, X_test = Xi[train_index], Xi[test_index] Y_train, Y_test = Yi[train_index], Yi[test_index] sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) accuracyRF = accuracyRandomForest(X_train, X_test, Y_train, Y_test) accuracyDT = accuracyDecisionTree(X_train, X_test, Y_train, Y_test) accuracyKN = accuracyKNN(X_train, X_test, Y_train, Y_test) accuracySVM = accuracySVC(X_train, X_test, Y_train, Y_test) accuracyNB = accuracyNaiveBayes(X_train, X_test, Y_train, Y_test) accuracyADA = accuracyAdaBoost(X_train, X_test, Y_train, Y_test) acc_values = [ accuracyRF, accuracyDT, accuracyKN, accuracySVM, accuracyNB, accuracyADA ] accuracy_finali.append(acc_values)
################################## for n in range(1, len(df.columns) + 1): ica = FastICA(n_components=n) X_new = ica.fit_transform(X) # Reconstruct signals acc, clusters = run_clustering(X_new) print "average EM score after X modified with ICA", n, "components, clusters =", clusters, "silhouette score =", acc if dralg == 'rp': ####################################################### ######## KMeans after Sparse Random Projection ######## ####################################################### for n in range(1, len(df.columns) + 1): # create the random projection sp = SparseRandomProjection(n_components = n) X_new = sp.fit_transform(X) acc, clusters = run_clustering(X_new) print "average EM score after X modified with Random Projectsion", n, "components, clusters =", clusters, "silhouette score =", acc if dralg == 'lda': ################################## ######## KMeans after LDA ######## ################################## for n in range(1, len(df.columns) + 1): for solver in ['svd', 'eigen']: # create the random projection lda = LDA(n_components = n, solver = solver) X_new = lda.fit_transform(X, y) acc, clusters = run_clustering(X_new) print "average EM score after X modified with LDA", n, "components, clusters =", clusters, "silhouette score =", acc