Ejemplo n.º 1
0
def preprocess(X_train,X_test,y_train,y_test,fac,std=True):
    if std:
        X_train,X_test=standardize(X_train,X_test)
    nb_classes = len(np.unique(np.concatenate((y_train,y_test),axis =0)))
    
    classes = np.unique(y_train)
    le = LabelEncoder()
    y_ind = le.fit_transform(y_train.ravel())
    recip_freq = len(y_train) / (len(le.classes_) *
                           np.bincount(y_ind).astype(np.float64))
    class_weight = recip_freq[le.transform(classes)]
    

    # make the min to zero of labels
    y_train,y_test = transform_labels(y_train,y_test)
    
    # save orignal y because later we will use binary
    y_true = y_test.astype(np.int64) 
    # transform the labels from integers to one hot vectors
    enc = sklearn.preprocessing.OneHotEncoder(categories='auto')
    enc.fit(np.concatenate((y_train,y_test),axis =0).reshape(-1,1))
    y_train = enc.transform(y_train.reshape(-1,1)).toarray()
    y_test = enc.transform(y_test.reshape(-1,1)).toarray()
    
    
    X_train = X_train.reshape((X_train.shape[0],X_train.shape[2]//fac,X_train.shape[1]*fac))
    X_test = X_test.reshape((X_test.shape[0],X_test.shape[2]//fac,X_test.shape[1]*fac))
    
    return X_train,X_test,y_train,y_test,y_true,nb_classes,class_weight
def prepare_data():
    x_train = datasets_dict[dataset_name][0]
    y_train = datasets_dict[dataset_name][1]
    x_test = datasets_dict[dataset_name][2]
    y_test = datasets_dict[dataset_name][3]

    # unique class in y_train and y_test
    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))

    # make the min to zero of labels
    y_train, y_test = transform_labels(y_train, y_test)

    # save orignal y because later we will use binary
    y_true = y_test.astype(np.int64)
    y_true_train = y_train.astype(np.int64)

    # transform the labels from integers to one hot vectors
    enc = sklearn.preprocessing.OneHotEncoder()
    enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
    y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
    y_test = enc.transform(y_test.reshape(-1, 1)).toarray()

    if len(x_train.shape) == 2:
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    return x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc
Ejemplo n.º 3
0
def read_data_from_dataset(use_init_clusters=True):

    x_train = datasets_dict[dataset_name][0]
    y_train = datasets_dict[dataset_name][1]
    x_test = datasets_dict[dataset_name][2]
    y_test = datasets_dict[dataset_name][3]

    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))
    # make the min to zero of labels
    y_train, y_test = transform_labels(y_train, y_test)

    classes, classes_counts = np.unique(y_train, return_counts=True)

    if len(x_train.shape) == 2:  # if univariate
        # add a dimension to make it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    # maximum number of prototypes which is the minimum count of a class
    max_prototypes = min(classes_counts.max() + 1,
                         MAX_PROTOTYPES_PER_CLASS + 1)
    init_clusters = None

    if use_init_clusters == True:
        # set the array that contains the initial clusters for k-means
        init_clusters = get_random_initial_for_kmeans(x_train, y_train,
                                                      max_prototypes,
                                                      nb_classes)
    return x_train, y_train, x_test, y_test, nb_classes, classes, max_prototypes, init_clusters
Ejemplo n.º 4
0
def fit_classifier():
    x_train = datasets_dict[dataset_name][0]
    y_train = datasets_dict[dataset_name][1]
    x_test = datasets_dict[dataset_name][2]
    y_test = datasets_dict[dataset_name][3]

    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))

    # make the min to zero of labels
    y_train, y_test = transform_labels(y_train, y_test)

    # save orignal y because later we will use binary
    y_true = y_test.astype(np.int64)
    # transform the labels from integers to one hot vectors
    enc = sklearn.preprocessing.OneHotEncoder()
    enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
    y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
    y_test = enc.transform(y_test.reshape(-1, 1)).toarray()

    if len(x_train.shape) == 2:  # if univariate
        # add a dimension to make it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    input_shape = x_train.shape[1:]
    classifier = create_classifier(classifier_name, input_shape, nb_classes,
                                   output_directory)

    classifier.fit(x_train, y_train, x_test, y_test, y_true)
Ejemplo n.º 5
0
def read_data_from_dataset(use_init_clusters=True):
    dataset_out_dir = root_dir_output + archive_name + '/' + dataset_name + '/'

    x_train = datasets_dict[dataset_name][0]
    y_train = datasets_dict[dataset_name][1]
    x_test = datasets_dict[dataset_name][2]
    y_test = datasets_dict[dataset_name][3]

    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))
    # make the min to zero of labels
    y_train, y_test = transform_labels(y_train, y_test)

    classes, classes_counts = np.unique(y_train, return_counts=True)

    if len(x_train.shape) == 2:  # if univariate
        # add a dimension to make it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    # maximum number of prototypes which is the minimum count of a class
    max_prototypes = min(classes_counts.max() + 1,
                         MAX_PROTOTYPES_PER_CLASS + 1)
    init_clusters = None

    return x_train, y_train, x_test, y_test, nb_classes, classes, max_prototypes, init_clusters
Ejemplo n.º 6
0
def fit_classifier(test_spec=False): 
    x_train = datasets_dict[dataset_name][0]
    y_train = datasets_dict[dataset_name][1]
    x_test = datasets_dict[dataset_name][2]
    y_test = datasets_dict[dataset_name][3]

    nb_classes = len(np.unique(np.concatenate((y_train,y_test),axis =0)))

    print(nb_classes)
    # make the min to zero of labels
    print(y_train.shape)
    y_train,y_test = transform_labels(y_train,y_test)
    print(y_train.shape)
    # save orignal y because later we will use binary
    y_true = y_test.astype(np.int64) 
    print("Y_TREU !!!!!: {} {}".format(y_true, y_true.shape))
    # transform the labels from integers to one hot vectors
    if nb_classes<=2:
        nb_classes=1
        y_train=y_train.reshape(-1,1)
        y_test=y_test.reshape(-1,1)
        y_true=y_true.reshape(-1,1)
    else:
        enc = sklearn.preprocessing.OneHotEncoder()
        enc.fit(np.concatenate((y_train,y_test),axis =0).reshape(-1,1))
        y_train = enc.transform(y_train.reshape(-1,1)).toarray()
        y_test = enc.transform(y_test.reshape(-1,1)).toarray()
    print(y_train.shape)
    if len(x_train.shape) == 2: # if univariate 
        # add a dimension to make it multivariate with one dimension 
        x_train = x_train.reshape((x_train.shape[0],x_train.shape[1],1))
        x_test = x_test.reshape((x_test.shape[0],x_test.shape[1],1))

    print("Data preparationis finished with:",x_train.shape, y_train.shape)
    input_shape = x_train.shape[1:]
    classifier = create_classifier(classifier_name,input_shape, nb_classes, output_directory)

    if test_spec:
        classifier.fit(x_train,y_train,x_test,y_test, y_true)
    else:
        classifier.fit(x_train,y_train,x_test,y_test, y_true)
Ejemplo n.º 7
0
def fit_classifier():
    x_train = datasets_dict[dataset_name][0]
    y_train = datasets_dict[dataset_name][1]
    x_test = datasets_dict[dataset_name][2]
    y_test = datasets_dict[dataset_name][3]

    if NORMALIZE:
        for chan in range(x_train.shape[2]):
            scaler = MinMaxScaler()
            scaler.fit(x_train[:, :, chan])
            x_train[:, :, chan] = scaler.transform(x_train[:, :, chan])
            x_test[:, :, chan] = scaler.transform(x_test[:, :, chan])

    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))

    # make the min to zero of labels
    y_train, y_test = transform_labels(y_train, y_test)

    # save orignal y because later we will use binary
    y_true = y_test.astype(np.int64)
    # transform the labels from integers to one hot vectors
    enc = sklearn.preprocessing.OneHotEncoder()
    enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
    y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
    y_test = enc.transform(y_test.reshape(-1, 1)).toarray()

    if len(x_train.shape) == 2:  # if univariate
        # add a dimension to make it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    input_shape = x_train.shape[1:]
    classifier = create_classifier(classifier_name, input_shape, nb_classes,
                                   output_directory_name)

    start_time = time.time()
    res = classifier.fit(x_train, y_train, x_test, y_test, y_true)
    total_time = time.time() - start_time
    return res, total_time
Ejemplo n.º 8
0
def prepare_data(classification):
    x_train = datasets_dict[dataset_name][0]
    y_train = datasets_dict[dataset_name][1]
    x_test = datasets_dict[dataset_name][2]
    y_test = datasets_dict[dataset_name][3]

    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))

    # make the min to zero of labels
    if classification:
        y_train, y_test = transform_labels(y_train, y_test)

    # save orignal y because later we will use binary
    y_true = y_test.astype(np.int64)
    y_true_train = y_train.astype(np.int64)
    # transform the labels from integers to one hot vectors
    enc = sklearn.preprocessing.OneHotEncoder()
    enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
    if classification:
        '''
        y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
        print(y_train)
        y_test = enc.transform(y_test.reshape(-1, 1)).toarray()
        '''
        T = 100
        n = len(y_train)
        a = 1
        print(a)
        y_train = np.array([np.maximum(np.zeros(100), a-np.abs(np.asarray(range(T))+1-y_train[i])) for i in range(n)])

    if len(x_train.shape) == 2:  # if univariate
        # add a dimension to make it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    return x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc
Ejemplo n.º 9
0
def train(pre_model=None):
    # read train, val and test sets
    x_train = datasets_dict[dataset_name_tranfer][0]
    y_train = datasets_dict[dataset_name_tranfer][1]

    y_true_val = None
    y_pred_val = None

    x_test = datasets_dict[dataset_name_tranfer][-2]
    y_test = datasets_dict[dataset_name_tranfer][-1]

    mini_batch_size = int(min(x_train.shape[0] / 10, batch_size))

    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))

    # make the min to zero of labels
    y_train, y_test = transform_labels(y_train, y_test)

    # save orignal y because later we will use binary
    y_true = y_test.astype(np.int64)

    # transform the labels from integers to one hot vectors
    y_train = keras.utils.to_categorical(y_train, nb_classes)
    y_test = keras.utils.to_categorical(y_test, nb_classes)

    if len(x_train.shape) == 2:  # if univariate
        # add a dimension to make it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    start_time = time.time()
    # remove last layer to replace with a new one
    input_shape = (None, x_train.shape[2])
    model = build_model(input_shape, nb_classes, pre_model)

    pre_model = None

    if verbose == True:
        model.summary()

    # b = model.layers[1].get_weights()

    hist = model.fit(x_train,
                     y_train,
                     batch_size=mini_batch_size,
                     epochs=nb_epochs,
                     verbose=verbose,
                     validation_data=(x_test, y_test),
                     callbacks=callbacks)

    # a = model.layers[1].get_weights()

    # compare_weights(a,b)

    model = keras.models.load_model(file_path)

    y_pred = model.predict(x_test)
    # convert the predicted from binary to integer
    y_pred = np.argmax(y_pred, axis=1)

    duration = time.time() - start_time

    df_metrics = save_logs(write_output_dir, hist, y_pred, y_true, duration,
                           y_true_val, y_pred_val)

    print(df_metrics)

    keras.backend.clear_session()
Ejemplo n.º 10
0
            pattern_len = pattern_len_

            nb_classes = len(pattern_len) * len(pattern_pos)

            for ts_len in ts_lens:

                for ts_n in ts_ns:

                    x_train, y_train, x_test, y_test = create_synthetic_dataset(
                        pattern_len=pattern_len,
                        pattern_pos=pattern_pos,
                        ts_len=ts_len,
                        ts_n=ts_n)

                    # make the min to zero of labels
                    y_train, y_test = transform_labels(y_train, y_test)

                    # save orignal y because later we will use binary
                    y_true = y_test.astype(np.int64)
                    y_true_train = y_train.astype(np.int64)
                    # transform the labels from integers to one hot vectors
                    enc = sklearn.preprocessing.OneHotEncoder()
                    enc.fit(
                        np.concatenate((y_train, y_test),
                                       axis=0).reshape(-1, 1))
                    y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
                    y_test = enc.transform(y_test.reshape(-1, 1)).toarray()

                    if len(x_train.shape) == 2:
                        # if uni-variate add a dimension to make it multivariate with one dimension
                        x_train = x_train.reshape(
Ejemplo n.º 11
0
    csi_train_label = csi_train_label[index]

    # 训练conv网络时,需要reshape成为4维
    # csi_train_data = csi_train_data.reshape((sample_count, sequence_max_len, input_feature, 1))

    # 划分训练集和测试集
    train, test, train_label, test_label = train_test_split(csi_train_data,
                                                            csi_train_label,
                                                            test_size=0.3)

    # 计算类别数量
    num_class = len(
        np.unique(np.concatenate((train_label, test_label), axis=0)))

    # make the min to zero of labels
    train_label, test_label = transform_labels(train_label, test_label)

    # save orignal y because later we will use binary
    y_true = test_label.astype(np.int64)

    # transform the labels from integers to one hot vectors
    train_label = keras.utils.to_categorical(train_label, num_class)
    test_label = keras.utils.to_categorical(test_label, num_class)

    # build model
    # model = bilstm_model(sequence_max_len=sequence_max_len,
    #                      input_feature=input_feature,
    #                      dropout_rate=dropout_rate,
    #                      num_class=num_class,
    #                      hidden_unit_num=hidden_unit_num)
    # model = bilstm_crf_model(sequence_max_len, input_feature, dropout_rate, num_class, hidden_unit_num)