예제 #1
0
        )
        sys.exit(2)

    # Create average vectors for the training and test sets
    print "Creating average feature vecs for training reviews ..."
    trainDataVecs = getAvgFeatureVecs(getCleanReviews(train), model,
                                      embedding_dimension)

    print "Creating average feature vecs for test reviews ..."
    testDataVecs = getAvgFeatureVecs(getCleanReviews(test), model,
                                     embedding_dimension)

    ###################
    # TRAIN THE MODEL #
    ###################
    classifier.fit(trainDataVecs, train["Problematic"])

# Use bag-of-word
else:
    # Append negative and positive examples
    train = train.append(train_pos, ignore_index=True)
    test = test.append(test_pos, ignore_index=True)

    # Create a bag of words from the training set
    print "\nCreating the bag of words...\n"

    # Initialize the "CountVectorizer" object, which is scikit-learn's
    # bag of words tool.
    vectorizer = CountVectorizer(analyzer="word",
                                 tokenizer=None,
                                 preprocessor=None,
from dbn.tensorflow import SupervisedDBNClassification

# Loading dataset
digits = load_digits()
X, Y = digits.data, digits.target

# Data scaling
X = (X / 16).astype(np.float32)

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=0)

# Training
classifier = SupervisedDBNClassification(hidden_layers_structure=[256, 256],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.1,
                                         n_epochs_rbm=10,
                                         n_iter_backprop=100,
                                         batch_size=32,
                                         activation_function='relu',
                                         dropout_p=0.2)
classifier.fit(X_train, Y_train)

# Test
Y_pred = classifier.predict(X_test)
print 'Done.\nAccuracy: %f' % accuracy_score(Y_test, Y_pred)
    mdl+=1
    print("Ukuran data model ",mdl)
    print("Ukuran x_train : ",x_train.shape)
    print("Ukuran y_train",y_train.shape)
    print("Ukuran x_test : ",x_test.shape)
    print("Ukuran y_test",y_test.shape)

    classifier = SupervisedDBNClassification(hidden_layers_structure=[len(np.asarray(X)[train]),len(np.asarray(Y)[train])],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.1,
                                         n_epochs_rbm=10,
                                         n_iter_backprop=100,
                                         batch_size=32,
                                         activation_function='relu',
                                         dropout_p=0.2)
    tr_loss=classifier.fit(np.asarray(X)[train], np.asarray(Y)[train])
    val_loss = classifier.fit(np.asarray(X)[test], np.asarray(Y)[test])
    train_loss.append(tr_loss)
    validation_loss.append(val_loss)
    predict_train = classifier.predict(np.asarray(X)[train])
    accuracy_train = accuracy_score(np.asarray(Y)[train], predict_train)
    acc_train.append(accuracy_train)    
    pred_train.append(predict_train)
    predict_test = classifier.predict(np.asarray(X)[test])
    pred_test.append(predict_test)
    accuracy_test = accuracy_score(np.asarray(Y)[test], predict_test)
    acc_test.append(accuracy_test)
    #predict=classifier.predict(np.asarray(X)[test])
    no+=1
    print('Pada model {0} Accuracy Train adalah : {1} %'.format (no,accuracy_train))    
    print('Pada model {0} Accuracy Test adalah: {1} %'.format (no,accuracy_test))    
예제 #4
0
import numpy as np

np.random.seed(1337)  # for reproducibility
from sklearn.model_selection import train_test_split
from sklearn.metrics.classification import accuracy_score
from dbn.tensorflow import SupervisedDBNClassification
# use "from dbn import SupervisedDBNClassification" for computations on CPU with numpy
from sklearn.datasets import load_iris

iris = load_iris()
data_x = iris.data
data_y = iris.target

x_train, x_test, y_train, y_test = train_test_split(data_x,
                                                    data_y,
                                                    test_size=0.2,
                                                    random_state=42)

# Training
classifier = SupervisedDBNClassification(
    hidden_layers_structure=[500, 1000, 500],
    learning_rate_rbm=0.05,
    learning_rate=0.1,
    n_epochs_rbm=20,  # RBM training steps
    n_iter_backprop=50,  # ANN training steps
    activation_function='relu',
    dropout_p=0.2)

classifier.fit(x_train, y_train)
        if (os.stat(filename).st_size != 0):
            X_Test = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences])
            y_Test = np.transpose(np.asarray([1] * (len(list(sentences)))))

# Truncate and pad input sequences
X_Train = sequence.pad_sequences(X_Train, maxlen = max_review_length)
X_Test = sequence.pad_sequences(X_Test, maxlen = max_review_length)

# Training
classifier = SupervisedDBNClassification(hidden_layers_structure=[500,250,100],
                                             learning_rate_rbm=0.1,
                                             learning_rate=0.0001,
                                             n_epochs_rbm=50,
                                             n_iter_backprop=500,
                                             batch_size=16,
                                             activation_function='sigmoid',
                                             dropout_p=0.25)
classifier.fit(X_Train, y_Train)

# Test
Y_pred = classifier.predict(X_Test)
Y_p = classifier.predict_proba(X_Test)
Y_n = classifier.predict_proba_dict(X_Test)
print(Y_n)
print(Y_p)
print(Y_p)
print(Y_pred)
print(y_Test)
print('Done.\nAccuracy: %f' % accuracy_score(y_Test, Y_pred))
res = [[Y_p[0, 0], Y_p[0, 1], Y_pred, y_Test]]
writer.writerows(res)
예제 #6
0
    """
    tmp = unpickle("CIFAR-3.pickle")
    labels = []
    for index in range(len(tmp['y'])):
        if tmp['y'][index, 0] == 1:
            #airplane
            labels.append(1)
        elif tmp['y'][index, 1] == 1:
            #dog
            labels.append(2)
        else:
            #boat
            labels.append(3)

    x_train = tmp['x'][:train_ex]
    x_train /= 255
    y_train = labels[:train_ex]
    x_test = tmp['x'][train_ex:]
    x_test /= 255
    y_test = labels[train_ex:]
    return x_train, y_train, x_test, y_test


x_train, y_train, x_test, y_test = get_data()

dbn.fit(x_train, y_train)

predictions = dbn.predict(x_test)
accuracy = accuracy_score(y_test, list(predictions))
print('Accuracy: {0}'.format(accuracy))
예제 #7
0
import numpy as np
np.random.seed(1337)  # for reproducibility
from sklearn.model_selection import train_test_split
from sklearn.metrics.classification import accuracy_score
from dbn.tensorflow import SupervisedDBNClassification
# use "from dbn import SupervisedDBNClassification" for computations on CPU with numpy
from sklearn.datasets import load_iris


iris = load_iris()
data_x = iris.data
data_y = iris.target


x_train, x_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.2, random_state=42)

# Training
classifier = SupervisedDBNClassification(hidden_layers_structure=[500, 1000, 500],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.1,
                                         n_epochs_rbm=20, # RBM training steps
                                         n_iter_backprop=50, # ANN training steps
                                         activation_function='relu',
                                         dropout_p=0.2)

classifier.fit(x_train, y_train)
예제 #8
0
    'ENIP', 'GVCP', 'NBNS', 'SSDP', 'TCP'
]].copy()
y = data[['Safe']].copy()

train_x, test_x, train_y, test_y = train_test_split(
    x, y, test_size=0.2)  #, random_state = 0)
train_x = train_x.values
train_y = train_y.values
train_y = train_y[:, 0]
test_x = test_x.values
test_y = test_y.values
test_y = test_y[:, 0]

classifier = SupervisedDBNClassification(hidden_layers_structure=[256, 256],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.1,
                                         n_epochs_rbm=10,
                                         n_iter_backprop=100,
                                         batch_size=32,
                                         activation_function='sigmoid',
                                         dropout_p=0.2)

classifier.fit(train_x, train_y)

classifier.save('model.pkl')
# Restore it
classifier = SupervisedDBNClassification.load('model.pkl')

Y_pred = classifier.predict(test_x)
print('Done.\nAccuracy: %f' % accuracy_score(test_y, Y_pred))
from sklearn.cross_validation import train_test_split
from sklearn.metrics.classification import accuracy_score

from dbn.tensorflow import SupervisedDBNClassification


# Loading dataset
digits = load_digits()
X, Y = digits.data, digits.target

# Data scaling
X = (X / 16).astype(np.float32)

# Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

# Training
classifier = SupervisedDBNClassification(hidden_layers_structure=[256, 256],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.1,
                                         n_epochs_rbm=10,
                                         n_iter_backprop=100,
                                         batch_size=32,
                                         activation_function='relu',
                                         dropout_p=0.2)
classifier.fit(X_train, Y_train)

# Test
Y_pred = classifier.predict(X_test)
print 'Done.\nAccuracy: %f' % accuracy_score(Y_test, Y_pred)
예제 #10
0
    def train(self, training_data, training_z):
        """Trains the classifier
        
        Parameters:
        -----------
        training_data: numpy array, size Ngalaxes x Nbands
          training data, each row is a galaxy, each column is a band as per
          band defined above
        training_z: numpy array, size Ngalaxies
          true redshift for the training sample
        """

        from dbn.tensorflow import SupervisedDBNClassification

        self.training_z = training_z

        # Create value-added data
        print("Creating value-added training data")
        self.training_data = get_valueadded_data(
            training_data, self.bands, self.opt['errors'], self.opt['colors'],
            self.opt['band_triplets'], self.opt['band_triplets_errors'],
            self.opt['heal_undetected'], self.wants_arrays)

        data_scaler = self.opt[
            'data_scaler'] if 'data_scaler' in self.opt else 'MinMaxScaler'
        n_bin = self.opt['bins']
        train_percent = self.opt[
            'train_percent'] if 'train_percent' in self.opt else 1
        n_epochs_rbm = self.opt[
            'n_epochs_rbm'] if 'n_epochs_rbm' in self.opt else 2
        activation = self.opt[
            'activation'] if 'activation' in self.opt else 'relu'
        learning_rate_rbm = self.opt[
            'learning_rate_rbm'] if 'learning_rate_rbm' in self.opt else 0.05
        learning_rate = self.opt[
            'learning_rate'] if 'learning_rate' in self.opt else 0.1
        n_iter_backprop = self.opt[
            'n_iter_backprop'] if 'n_iter_backprop' in self.opt else 25
        batch_size = self.opt['batch_size'] if 'batch_size' in self.opt else 32
        dropout_p = self.opt['dropout_p'] if 'dropout_p' in self.opt else 0.2
        hidden_layers_structure = self.opt[
            'hidden_layers_structure'] if 'hidden_layers_structure' in self.opt else [
                256, 256
            ]

        print("Finding bins for training data")

        # Data rescaling
        self.scaler = getattr(preprocessing, data_scaler)()

        print(f"Using {data_scaler} to rescale data for better results")

        # Fit scaler on data and use the same scaler in the future when needed
        self.scaler.fit(self.training_data)

        # apply transform to get rescaled values
        self.training_data = self.scaler.transform(
            self.training_data
        )  # inverse: data_original = scaler.inverse_transform(data_rescaled)

        # Now put the training data into redshift bins.
        # Use zero so that the one object with minimum
        # z in the whole survey will be in the lowest bin
        training_bin = np.zeros(self.training_z.size)

        # Find the edges that split the redshifts into n_z bins of
        # equal number counts in each
        p = np.linspace(0, 100, n_bin + 1)
        z_edges = np.percentile(self.training_z, p)

        # Now find all the objects in each of these bins
        for i in range(n_bin):
            z_low = z_edges[i]
            z_high = z_edges[i + 1]
            training_bin[(self.training_z > z_low)
                         & (self.training_z < z_high)] = i

        if 0 < train_percent < 100:
            # for speed, cut down to ?% of original size
            print(
                f'Cutting down to {train_percent}% of original training sample size for speed.'
            )
            cut = np.random.uniform(0, 1,
                                    self.training_z.size) < train_percent / 100
            training_bin = training_bin[cut]
            self.training_data = self.training_data[cut]
        elif train_percent == 100:
            pass
        else:
            raise ValueError('train_percent is not valid')

        print('Setting up the layers for DBN')
        # Set up the layers
        classifier = SupervisedDBNClassification(
            hidden_layers_structure=hidden_layers_structure,
            learning_rate_rbm=learning_rate_rbm,
            learning_rate=learning_rate,
            n_epochs_rbm=n_epochs_rbm,
            n_iter_backprop=n_iter_backprop,
            batch_size=batch_size,
            activation_function=activation,
            dropout_p=dropout_p)

        # Train the model
        print("Fitting classifier")
        classifier.fit(self.training_data, training_bin)

        self.classifier = classifier
        self.z_edges = z_edges
y_test = y_test.tolist()
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

mlp = SupervisedDBNClassification(hidden_layers_structure=[19, 30, 19],
                                  learning_rate_rbm=0.05,
                                  learning_rate=0.1,
                                  n_epochs_rbm=10,
                                  n_iter_backprop=50,
                                  batch_size=32,
                                  activation_function='relu',
                                  dropout_p=0.2)

mlp.fit(X_train, y_train)
# Save the model
mlp.save('model.pkl')
# Restoreit
mlp = SupervisedDBNClassification.load('model.pkl')

predictions = mlp.predict(X_test)

RMSE_sum = 0

list = []
for x in range(0, len(X_test)):

    RMSE_sum = RMSE_sum + ((y_test[x] - predictions[x])**2)
    list.append(abs(y_test[x] - predictions[x]))
예제 #12
0
def run(params):

    # ##################### get parameters and define logger ################

    # device
    os.environ['CUDA_VISIBLE_DEVICES'] = str(params.gpu)

    # get parameters
    data_name = params.data.data_name
    data_dir = params.data.data_dir
    target_dir = params.data.target_dir
    train_prop = params.data.train_prop
    val_prop = params.data.val_prop

    train_params = params.train
    method_name = params.method_name
    result_dir = params.result_dir
    folder_level = params.folder_level

    train_prop = train_prop if train_prop < 1 else int(train_prop)
    val_prop = val_prop if val_prop < 1 else int(val_prop)

    result_root = result_dir
    local_v = locals()
    for s in folder_level:
        result_dir = check_path(os.path.join(result_dir, str(local_v[s])))

    # define output dirs
    acc_dir = os.path.join(result_root, 'accuracy.csv')
    log_dir = os.path.join(result_dir, 'train.log')
    model_dir = os.path.join(result_dir, 'weights.pkl')
    # soft_dir = os.path.join(result_dir, 'soft_label.mat')
    # loss_dir = os.path.join(result_dir, 'loss_curve.png')

    # define logger
    logger = define_logger(log_dir)

    # print parameters
    num1 = 25
    num2 = 100
    logger.info('%s begin a new training: %s %s' %
                ('#' * num1, method_name, '#' * num1))
    params_str = recur_str_dict_for_show(params, total_space=num2)
    logger.info('show parameters ... \n%s' % params_str)

    # ########################### get data, train ############################

    logger.info('get data ...')
    mask_dir = os.path.dirname(data_dir)
    data, target = read_data(data_dir, target_dir)
    train_mask, val_mask, test_mask = load_masks(mask_dir, target, train_prop,
                                                 val_prop)
    x_train, y_train = get_vector_samples(data, target, train_mask)

    logger.info('get model ...')
    from dbn.tensorflow import SupervisedDBNClassification
    classifier = SupervisedDBNClassification(**train_params)

    logger.info('begin to train ...')
    s = time.time()
    classifier.fit(x_train, y_train)
    e = time.time()
    train_time = e - s
    logger.info('training time: %.4fs' % train_time)

    logger.info('save model ...')
    classifier.save(model_dir)

    # ########################### predict, output ###########################

    all_data = data.reshape(-1, data.shape[1] * data.shape[2]).T

    classifier = SupervisedDBNClassification.load(model_dir)

    logger.info('begin to predict ...')
    s = time.time()
    pred = classifier.predict(all_data)
    pred = np.array(pred)
    pred = pred.reshape(target.shape) + 1
    e = time.time()
    pred_time = (e - s)
    logger.info('predicted time: %.4fs' % pred_time)

    # output predicted map(png/mat), accuracy table and other records
    logger.info('save classification maps etc. ...')
    train_records = {
        'train_time': '%.4f' % train_time,
        'pred_time': '%.4f' % pred_time
    }

    ro = ResultOutput(pred,
                      data,
                      target,
                      train_mask,
                      val_mask,
                      test_mask,
                      result_dir,
                      acc_dir,
                      hyper_params=params,
                      train_records=train_records)
    ro.output()
예제 #13
0
        dropout_p=0.2)
    # Split Data
    X, Y = get_dataset(tz)
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=0)
    print('Size of training set == {}, Size of testing set == {}\n'.format(
        len(X_train), len(X_test)))

    start_time = timer()
    tot_start = start_time
    Matt_Net.pre_train(X_train)
    print('Time to pretrain == {:5.3f} seconds\n'.format(timer() - start_time))

    start_time = timer()
    Matt_Net.fit(X_train, Y_train, False)
    print('Time to fit == {:5.3f} seconds\n'.format(timer() - start_time))
    print('Total time == {:5.3f} seconds\n'.format(timer() - tot_start))

    Matt_Net.save('train/Matt_Net_Zone_{}.pkl'.format(tz))

    Y_pred = Matt_Net.predict(X_test)
    start_time = timer()
    score = accuracy_score(Y_test, Y_pred)
    print(
        'Done, time to predict == {:5.3}\nAccuracy == {} for zone {}\n'.format(
            timer() - start_time, score, tz))

    del Matt_Net