def __init__(self,
                 X_train,
                 freq_itemset_dict,
                 rule_list,
                 contrast_params,
                 epsilon=5e-5,
                 eps_step=0.1,
                 reduced_rate=None):
        self.rule_list = rule_list
        self.freq_itemset_dict = freq_itemset_dict

        self.original_features = X_train
        self.sample_features = X_train

        self.contrast_params = contrast_params
        self.epsilon = epsilon
        self.eps_step = eps_step

        if reduced_rate is not None:
            ncomponents = int(X_train.shape[1] * reduced_rate)
            pca = IncrementalPCA(n_components=ncomponents)
            self.sample_features = pca.fit_transform(X_train)
Ejemplo n.º 2
0
                       'cluster'      : ('', 'Path of clusters file'),
                       'output'      : ('', 'Path of output file'),
                       'title'       : ('Dataset', 'Title of charts')
                       })
 
 if not config.load(sys.argv):
     print ('Argument is not correct. Please try again')
     sys.exit(2)
     
 X, association_rules = load_feature_vectors(config.get_value('feature'))
 
 m = 2
 print('dimensional reduce: ' + str(m))
 
 pca = IncrementalPCA(n_components = X.shape[1]//m)
 new_X = pca.fit_transform(X)
 clusters, number_of_clusters = load_clusters(config.get_value('cluster'))
 print (number_of_clusters)
 
 unique_colors = get_N_HexCol(number_of_clusters + 1)
 Y = []
 for rule in association_rules:
     cluster_id = clusters[rule]
     Y.append(unique_colors[cluster_id + 1])
 
 
 #plt.scatter(new_X[:,0], new_X[:,1], c = np.array(Y), alpha = 0.9, s = 10)
 #plt.title(config.get_value('title'))
 #plt.savefig(config.get_value('output'), format='PNG',bbox_inches='tight')
 
 #np.array(Y)
Ejemplo n.º 3
0
        print()

    # show PCA
    pca_queries = [
        "ID:wrapper", "ID:container", "ID:msg", "ID:alert", "ID:list",
        "ID:seq", "ID:lst", "ID:list", "LIT:error"
    ]
    pca_vectors = []
    pca_labels = []
    for _, name in enumerate(pca_queries):
        if name.startswith("LIT:"):
            print_name = "\"" + name.replace(
                "LIT:", "") + "\""  # assumes string literals only
        else:
            print_name = name.replace("ID:", "")
        pca_labels.append(print_name)
        pca_vectors.append(model.wv[name])

    ipca = IncrementalPCA(n_components=2)
    reduced_vectors = ipca.fit_transform(pca_vectors)

    fig, ax = pyplot.subplots()
    x = reduced_vectors[:, 0]
    y = reduced_vectors[:, 1]
    ax.scatter(x, y)
    for idx, label in enumerate(pca_labels):
        #escaped_label = re.escape(label)
        ax.annotate(label, (x[idx], y[idx]))

    pyplot.show()