y_pred = clf.predict(X)
        y_pred = np.asarray(y_pred)
        anomalies = np.where(y_pred == 'anomaly')[0]

        all_anom.append(DataFrame(data.ix[anomalies]))

        if save_models:
            joblib.dump(clf, "OneClassSVM_"+str(500+i)+".pkl")
        del clf

    anom_df = concat(all_anom)
    anom_df.to_csv("anomalies_lsq.csv")

if view:
    import triangle
    from dim_red_vis import dim_red

    # Use PCA to look at a projection of the set.

    subspace = dim_red(X, verbose=True)

    # Do it again with a higher dimension, then project that

    subspace = dim_red(X, n_comp=6, verbose=False)

    fig = \
        triangle.corner(subspace,
                        labels=['c'+str(i) for i in range(1, 7)])
    p.show()
    from dim_red_vis import dim_red
    execfile(
        "/Users/eric/Dropbox/Phys 595/Phys-595/project_code/Machine Learning/hist2d.py"
    )

    data = read_csv("all_spec_data_cleaned.csv")

    X = data[data.columns[1:]]
    X = np.asarray(X)

    # Standardize the data
    X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

    # Use PCA to look at a projection of the set.

    subspace = dim_red(X, verbose=False)

    hist2d(subspace[:, 0], subspace[:, 1], **{'bins': 10})

    # Overplot anomalies on the space
    svc_anoms = np.asarray(read_csv("svc_anom_cut.csv")["Unnamed: 0.1"])
    p.scatter(subspace[svc_anoms, 0],
              subspace[svc_anoms, 1],
              c='r',
              s=30,
              label="SVC Anomalies")

    lsq_anoms = np.unique(read_csv("anomalies_lsq.csv")["Unnamed: 0"])
    p.scatter(subspace[lsq_anoms, 0],
              subspace[lsq_anoms, 1],
              c='b',
Exemple #3
0
        y_pred = clf.predict(X)

        anomalies = np.where(y_pred == -1)[0]

        all_anom.append(DataFrame(data.ix[anomalies]))
        print "Number of anomalies found: " + str(anomalies.shape)
        if save_models:
            joblib.dump(clf, "OneClassSVM_" + str(500 + i) + ".pkl")
        del clf

    anom_df = concat(all_anom)
    anom_df.to_csv("anomalies_ocsvm.csv")

if view:
    import triangle
    from dim_red_vis import dim_red

    # Use PCA to look at a projection of the set.

    subspace = dim_red(X, verbose=True)

    # Do it again with a higher dimension, then project that

    subspace = dim_red(X, n_comp=6, verbose=False)

    fig = \
        triangle.corner(subspace,
                        labels=['c'+str(i) for i in range(1, 7)])
    p.show()
if make_pca:
    import triangle
    from dim_red_vis import dim_red
    execfile("/Users/eric/Dropbox/Phys 595/Phys-595/project_code/Machine Learning/hist2d.py")

    data = read_csv("all_spec_data_cleaned.csv")

    X = data[data.columns[1:]]
    X = np.asarray(X)

    # Standardize the data
    X = (X - np.mean(X, axis=0))/np.std(X, axis=0)

    # Use PCA to look at a projection of the set.

    subspace = dim_red(X, verbose=False)

    hist2d(subspace[:, 0], subspace[:, 1], **{'bins': 10})

    # Overplot anomalies on the space
    svc_anoms = np.asarray(read_csv("svc_anom_cut.csv")["Unnamed: 0.1"])
    p.scatter(subspace[svc_anoms, 0], subspace[svc_anoms, 1], c='r', s=30,
              label="SVC Anomalies")

    lsq_anoms = np.unique(read_csv("anomalies_lsq.csv")["Unnamed: 0"])
    p.scatter(subspace[lsq_anoms, 0], subspace[lsq_anoms, 1], c='b', s=30,
              label="LSQ Anomalies")

    both_anoms = np.asarray(list(set(svc_anoms) & set(lsq_anoms)))
    p.scatter(subspace[both_anoms, 0], subspace[both_anoms, 1], c='g', s=30,
              label="Both")