y_pred = clf.predict(X) y_pred = np.asarray(y_pred) anomalies = np.where(y_pred == 'anomaly')[0] all_anom.append(DataFrame(data.ix[anomalies])) if save_models: joblib.dump(clf, "OneClassSVM_"+str(500+i)+".pkl") del clf anom_df = concat(all_anom) anom_df.to_csv("anomalies_lsq.csv") if view: import triangle from dim_red_vis import dim_red # Use PCA to look at a projection of the set. subspace = dim_red(X, verbose=True) # Do it again with a higher dimension, then project that subspace = dim_red(X, n_comp=6, verbose=False) fig = \ triangle.corner(subspace, labels=['c'+str(i) for i in range(1, 7)]) p.show()
from dim_red_vis import dim_red execfile( "/Users/eric/Dropbox/Phys 595/Phys-595/project_code/Machine Learning/hist2d.py" ) data = read_csv("all_spec_data_cleaned.csv") X = data[data.columns[1:]] X = np.asarray(X) # Standardize the data X = (X - np.mean(X, axis=0)) / np.std(X, axis=0) # Use PCA to look at a projection of the set. subspace = dim_red(X, verbose=False) hist2d(subspace[:, 0], subspace[:, 1], **{'bins': 10}) # Overplot anomalies on the space svc_anoms = np.asarray(read_csv("svc_anom_cut.csv")["Unnamed: 0.1"]) p.scatter(subspace[svc_anoms, 0], subspace[svc_anoms, 1], c='r', s=30, label="SVC Anomalies") lsq_anoms = np.unique(read_csv("anomalies_lsq.csv")["Unnamed: 0"]) p.scatter(subspace[lsq_anoms, 0], subspace[lsq_anoms, 1], c='b',
y_pred = clf.predict(X) anomalies = np.where(y_pred == -1)[0] all_anom.append(DataFrame(data.ix[anomalies])) print "Number of anomalies found: " + str(anomalies.shape) if save_models: joblib.dump(clf, "OneClassSVM_" + str(500 + i) + ".pkl") del clf anom_df = concat(all_anom) anom_df.to_csv("anomalies_ocsvm.csv") if view: import triangle from dim_red_vis import dim_red # Use PCA to look at a projection of the set. subspace = dim_red(X, verbose=True) # Do it again with a higher dimension, then project that subspace = dim_red(X, n_comp=6, verbose=False) fig = \ triangle.corner(subspace, labels=['c'+str(i) for i in range(1, 7)]) p.show()
if make_pca: import triangle from dim_red_vis import dim_red execfile("/Users/eric/Dropbox/Phys 595/Phys-595/project_code/Machine Learning/hist2d.py") data = read_csv("all_spec_data_cleaned.csv") X = data[data.columns[1:]] X = np.asarray(X) # Standardize the data X = (X - np.mean(X, axis=0))/np.std(X, axis=0) # Use PCA to look at a projection of the set. subspace = dim_red(X, verbose=False) hist2d(subspace[:, 0], subspace[:, 1], **{'bins': 10}) # Overplot anomalies on the space svc_anoms = np.asarray(read_csv("svc_anom_cut.csv")["Unnamed: 0.1"]) p.scatter(subspace[svc_anoms, 0], subspace[svc_anoms, 1], c='r', s=30, label="SVC Anomalies") lsq_anoms = np.unique(read_csv("anomalies_lsq.csv")["Unnamed: 0"]) p.scatter(subspace[lsq_anoms, 0], subspace[lsq_anoms, 1], c='b', s=30, label="LSQ Anomalies") both_anoms = np.asarray(list(set(svc_anoms) & set(lsq_anoms))) p.scatter(subspace[both_anoms, 0], subspace[both_anoms, 1], c='g', s=30, label="Both")