Esempio n. 1
0
def run_prob2a_tree(single_node_cliques=True):
    (ubg, data) = create_prob2a_tree(single_node_cliques)    
    ipf = IPF(ubg, data)
    
    for k in range(5):
        ipf.iterate()
    
    return ipf
Esempio n. 2
0
def run_simple_graph():
    
    (ubg, data) = create_simple_graph()    
    ipf = IPF(ubg, data)
    ipf.iterate()
    ipf.iterate()
    
    return ipf
Esempio n. 3
0
    def generate(self, beta, tol=1e-3):
        """
        Generates a single instance of a synthetic traffic matrix based on noise
        parameter beta.

        Example:
        >>print(sanm.generate(0.1))
        [[ 0.19755992  0.40244008]
        [ 0.20244008  0.89755992]]

        :param beta: noise strength parameter
        :param tol: tolerance for IPF's scaling
        :return: a single instance of a synthetic traffic matrix
        """
        tm_size = self.predicted.shape
        tm_generated = np.zeros(tm_size)

        # SANM
        for i in range(tm_generated.shape[0]):
            for j in range(tm_generated.shape[1]):
                tm_generated[i, j] = (np.sqrt(self.predicted[i, j]) + beta*gauss(0, 1))**2

        # run IPF
        IPF().run(tm_generated, self.row_sums, self.col_sums, tol=tol)
        return tm_generated
Esempio n. 4
0
def Pipeline(X_train, y_train, X_test, n_dims=44):
    id_train = np.array(X_train["id"])
    X_train = X_train.drop(columns=["id"])
    id_test = np.array(X_test["id"])
    X_test = X_test.drop(columns=["id"])

    X_train = np.array(X_train)
    y_train = np.array(y_train)
    X_test = np.array(X_test)

    ind_numeric = []
    for i in range(len(X_train[0])):
        if len(np.unique(X_train[:, i])) > 2:
            ind_numeric.append(i)

    print("Hay " + str(len(ind_numeric)) + " variables numericas")
    '''
    ind_delete = np.where(y_train=="functional needs repair")[0]
    y_train = np.delete(y_train, ind_delete, axis=0)
    X_train = np.delete(X_train, ind_delete, axis=0)
    '''

    #plotData(X_train, y_train, "raw")

    print("Scaling data...")
    X_train = preprocessing.scale(X_train)
    X_test = preprocessing.scale(X_test)

    #plotData(X_train, y_train, "scaled")

    print("PCA con " + str(n_dims) + " componentes...")
    X_train_binary = np.delete(X_train, ind_numeric, axis=1)
    X_test_binary = np.delete(X_test, ind_numeric, axis=1)
    X_train_numeric = X_train[:, ind_numeric]
    X_test_numeric = X_test[:, ind_numeric]
    pca = PCA(n_components=n_dims)
    #pca = KernelPCA(n_components=n_dims, kernel="linear", n_jobs=-1)
    X1 = pca.fit_transform(X_train_binary)
    X2 = pca.transform(X_test_binary)
    X_train = np.hstack((X_train_numeric, X1))
    X_test = np.hstack((X_test_numeric, X2))
    print("Numero de features: " + str(len(X_train[0])))

    #plotData(X_train, y_train, "PCA")
    '''
    print("Reduccion de dimensionalidad con AutoEncoder...")
    hid = [50,60,50]
    X_train, X_test = autoencoder.fitTransform(X_train, X_test, 50, hid, bsize=32)
    print("Numero de features: " + str(len(X_train[0])))
    '''
    '''
    print("Reduccion de dimensionalidad con AutoEncoder...")
    hid = [250,200,150,100,50]
    X_train_binary = np.delete(X_train, ind_numeric, axis=1)
    X_test_binary = np.delete(X_test, ind_numeric, axis=1)
    X_train_numeric = X_train[:,ind_numeric]
    X_test_numeric = X_test[:,ind_numeric]
    X1, X2 = autoencoder.fitTransform(X_train_binary, X_test_binary, 30, hid, bsize=32)
    X_train = np.hstack((X_train_numeric, X1))
    X_test = np.hstack((X_test_numeric, X2))
    print("Numero de features: " + str(len(X_train[0])))
    '''

    print("IPF...")
    X_train, y_train = IPF(X_train, y_train)
    print("Numero de instancias: " + str(len(X_train)))
    print("Instancias por clase:")
    print(np.unique(y_train, return_counts=True))

    #plotData(X_train, y_train, "IPF")
    '''
    print("Denoising autoencoder...")
    hid = [32,16,32]
    X_train, X_test = autoencoder_denoising.fitTransform(X_train, X_test, 250, hid, bsize=32, kreg=None, areg=None)
    '''
    '''
    print("AllKNN...")
    X_train, y_train = AllKNN(n_neighbors=7, n_jobs=8).fit_resample(X_train, y_train)
    print("Numero de instancias: " + str(len(X_train)))
    print("Instancias por clase:")
    print(np.unique(y_train,return_counts=True))
    '''
    '''
    print("Feature selection...")
    feature_selector = SelectKBest(f_classif, k="all").fit(X_train, y_train)
    X_train = feature_selector.transform(X_train)
    X_test = feature_selector.transform(X_test)
    print("Numero de features: " + str(len(X_train[0])))
    '''

    print("SMOTE...")
    X_train, y_train = SMOTE(sampling_strategy={
        "functional needs repair": 7500,
        "non functional": 22000
    },
                             random_state=123456789,
                             n_jobs=20,
                             k_neighbors=7).fit_resample(X_train, y_train)
    print("Numero de instancias: " + str(len(X_train)))
    print("Instancias por clase:")
    print(np.unique(y_train, return_counts=True))

    #plotData(X_train, y_train, "SMOTE")
    '''
    print("ADASYN...")
    X_train,y_train = ADASYN(sampling_strategy = {"functional needs repair": 5000, "non functional": 22500}, random_state=123456789, n_jobs=8, n_neighbors=7).fit_resample(X_train,y_train)
    print("Numero de instancias: " + str(len(X_train)))
    print("Instancias por clase:")
    print(np.unique(y_train,return_counts=True))
    '''

    print("Cleaning anomalies...")
    ind_functional = np.where(y_train == "functional")[0]
    ind_non_functional = np.where(y_train == "non functional")[0]
    ind_functional_repair = np.where(y_train == "functional needs repair")[0]
    X1, y1 = cleanAnomalies(X_train[ind_functional], y_train[ind_functional])
    X2, y2 = cleanAnomalies(X_train[ind_non_functional],
                            y_train[ind_non_functional])
    X3, y3 = cleanAnomalies(X_train[ind_functional_repair],
                            y_train[ind_functional_repair])
    X_train = np.concatenate((X1, X2), axis=0)
    X_train = np.concatenate((X_train, X3), axis=0)
    y_train = np.concatenate((y1, y2), axis=0)
    y_train = np.concatenate((y_train, y3), axis=0)
    print("Instancias por clase:")
    print(np.unique(y_train, return_counts=True))

    #plotData(X_train, y_train, "anomalias_knn")
    '''
    print("EditedNearestNeighbours...")
    X_train, y_train = EditedNearestNeighbours(sampling_strategy="not minority", n_neighbors=15, n_jobs=20, kind_sel="mode").fit_resample(X_train, y_train)
    print("Numero de instancias: " + str(len(X_train)))
    print("Instancias por clase:")
    print(np.unique(y_train,return_counts=True))
    '''
    '''
    print("SSMA...")
    selector = SSMA(n_neighbors=1, alpha=0.95, max_loop=10, initial_density=0.9).fit(X_train,y_train)
    X_train = selector.X_
    y_train = selector.y_
    print("Numero de instancias: " + str(len(X_train)))
    print("Instancias por clase:")
    print(np.unique(y_train,return_counts=True))
    '''
    '''
    print("Generando la métrica con DML...")
    train_set, _, train_labels, _ = train_test_split(X_train, y_train, train_size=0.5, random_state=123456789)
    print("Tamaño del conjunto original: " + str(len(X_train)) + ", tamaño del train: " + str(len(train_set)))
    dml = KLMNN().fit(train_set, train_labels)
    X_train = dml.transform(X_train)
    X_test = dml.transform(X_test)
    '''

    return X_train, y_train, id_train, X_test, id_test