) sys.exit(2) # Create average vectors for the training and test sets print "Creating average feature vecs for training reviews ..." trainDataVecs = getAvgFeatureVecs(getCleanReviews(train), model, embedding_dimension) print "Creating average feature vecs for test reviews ..." testDataVecs = getAvgFeatureVecs(getCleanReviews(test), model, embedding_dimension) ################### # TRAIN THE MODEL # ################### classifier.fit(trainDataVecs, train["Problematic"]) # Use bag-of-word else: # Append negative and positive examples train = train.append(train_pos, ignore_index=True) test = test.append(test_pos, ignore_index=True) # Create a bag of words from the training set print "\nCreating the bag of words...\n" # Initialize the "CountVectorizer" object, which is scikit-learn's # bag of words tool. vectorizer = CountVectorizer(analyzer="word", tokenizer=None, preprocessor=None,
from dbn.tensorflow import SupervisedDBNClassification # Loading dataset digits = load_digits() X, Y = digits.data, digits.target # Data scaling X = (X / 16).astype(np.float32) # Splitting data X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) # Training classifier = SupervisedDBNClassification(hidden_layers_structure=[256, 256], learning_rate_rbm=0.05, learning_rate=0.1, n_epochs_rbm=10, n_iter_backprop=100, batch_size=32, activation_function='relu', dropout_p=0.2) classifier.fit(X_train, Y_train) # Test Y_pred = classifier.predict(X_test) print 'Done.\nAccuracy: %f' % accuracy_score(Y_test, Y_pred)
mdl+=1 print("Ukuran data model ",mdl) print("Ukuran x_train : ",x_train.shape) print("Ukuran y_train",y_train.shape) print("Ukuran x_test : ",x_test.shape) print("Ukuran y_test",y_test.shape) classifier = SupervisedDBNClassification(hidden_layers_structure=[len(np.asarray(X)[train]),len(np.asarray(Y)[train])], learning_rate_rbm=0.05, learning_rate=0.1, n_epochs_rbm=10, n_iter_backprop=100, batch_size=32, activation_function='relu', dropout_p=0.2) tr_loss=classifier.fit(np.asarray(X)[train], np.asarray(Y)[train]) val_loss = classifier.fit(np.asarray(X)[test], np.asarray(Y)[test]) train_loss.append(tr_loss) validation_loss.append(val_loss) predict_train = classifier.predict(np.asarray(X)[train]) accuracy_train = accuracy_score(np.asarray(Y)[train], predict_train) acc_train.append(accuracy_train) pred_train.append(predict_train) predict_test = classifier.predict(np.asarray(X)[test]) pred_test.append(predict_test) accuracy_test = accuracy_score(np.asarray(Y)[test], predict_test) acc_test.append(accuracy_test) #predict=classifier.predict(np.asarray(X)[test]) no+=1 print('Pada model {0} Accuracy Train adalah : {1} %'.format (no,accuracy_train)) print('Pada model {0} Accuracy Test adalah: {1} %'.format (no,accuracy_test))
import numpy as np np.random.seed(1337) # for reproducibility from sklearn.model_selection import train_test_split from sklearn.metrics.classification import accuracy_score from dbn.tensorflow import SupervisedDBNClassification # use "from dbn import SupervisedDBNClassification" for computations on CPU with numpy from sklearn.datasets import load_iris iris = load_iris() data_x = iris.data data_y = iris.target x_train, x_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.2, random_state=42) # Training classifier = SupervisedDBNClassification( hidden_layers_structure=[500, 1000, 500], learning_rate_rbm=0.05, learning_rate=0.1, n_epochs_rbm=20, # RBM training steps n_iter_backprop=50, # ANN training steps activation_function='relu', dropout_p=0.2) classifier.fit(x_train, y_train)
if (os.stat(filename).st_size != 0): X_Test = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences]) y_Test = np.transpose(np.asarray([1] * (len(list(sentences))))) # Truncate and pad input sequences X_Train = sequence.pad_sequences(X_Train, maxlen = max_review_length) X_Test = sequence.pad_sequences(X_Test, maxlen = max_review_length) # Training classifier = SupervisedDBNClassification(hidden_layers_structure=[500,250,100], learning_rate_rbm=0.1, learning_rate=0.0001, n_epochs_rbm=50, n_iter_backprop=500, batch_size=16, activation_function='sigmoid', dropout_p=0.25) classifier.fit(X_Train, y_Train) # Test Y_pred = classifier.predict(X_Test) Y_p = classifier.predict_proba(X_Test) Y_n = classifier.predict_proba_dict(X_Test) print(Y_n) print(Y_p) print(Y_p) print(Y_pred) print(y_Test) print('Done.\nAccuracy: %f' % accuracy_score(y_Test, Y_pred)) res = [[Y_p[0, 0], Y_p[0, 1], Y_pred, y_Test]] writer.writerows(res)
""" tmp = unpickle("CIFAR-3.pickle") labels = [] for index in range(len(tmp['y'])): if tmp['y'][index, 0] == 1: #airplane labels.append(1) elif tmp['y'][index, 1] == 1: #dog labels.append(2) else: #boat labels.append(3) x_train = tmp['x'][:train_ex] x_train /= 255 y_train = labels[:train_ex] x_test = tmp['x'][train_ex:] x_test /= 255 y_test = labels[train_ex:] return x_train, y_train, x_test, y_test x_train, y_train, x_test, y_test = get_data() dbn.fit(x_train, y_train) predictions = dbn.predict(x_test) accuracy = accuracy_score(y_test, list(predictions)) print('Accuracy: {0}'.format(accuracy))
import numpy as np np.random.seed(1337) # for reproducibility from sklearn.model_selection import train_test_split from sklearn.metrics.classification import accuracy_score from dbn.tensorflow import SupervisedDBNClassification # use "from dbn import SupervisedDBNClassification" for computations on CPU with numpy from sklearn.datasets import load_iris iris = load_iris() data_x = iris.data data_y = iris.target x_train, x_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.2, random_state=42) # Training classifier = SupervisedDBNClassification(hidden_layers_structure=[500, 1000, 500], learning_rate_rbm=0.05, learning_rate=0.1, n_epochs_rbm=20, # RBM training steps n_iter_backprop=50, # ANN training steps activation_function='relu', dropout_p=0.2) classifier.fit(x_train, y_train)
'ENIP', 'GVCP', 'NBNS', 'SSDP', 'TCP' ]].copy() y = data[['Safe']].copy() train_x, test_x, train_y, test_y = train_test_split( x, y, test_size=0.2) #, random_state = 0) train_x = train_x.values train_y = train_y.values train_y = train_y[:, 0] test_x = test_x.values test_y = test_y.values test_y = test_y[:, 0] classifier = SupervisedDBNClassification(hidden_layers_structure=[256, 256], learning_rate_rbm=0.05, learning_rate=0.1, n_epochs_rbm=10, n_iter_backprop=100, batch_size=32, activation_function='sigmoid', dropout_p=0.2) classifier.fit(train_x, train_y) classifier.save('model.pkl') # Restore it classifier = SupervisedDBNClassification.load('model.pkl') Y_pred = classifier.predict(test_x) print('Done.\nAccuracy: %f' % accuracy_score(test_y, Y_pred))
from sklearn.cross_validation import train_test_split from sklearn.metrics.classification import accuracy_score from dbn.tensorflow import SupervisedDBNClassification # Loading dataset digits = load_digits() X, Y = digits.data, digits.target # Data scaling X = (X / 16).astype(np.float32) # Splitting data X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) # Training classifier = SupervisedDBNClassification(hidden_layers_structure=[256, 256], learning_rate_rbm=0.05, learning_rate=0.1, n_epochs_rbm=10, n_iter_backprop=100, batch_size=32, activation_function='relu', dropout_p=0.2) classifier.fit(X_train, Y_train) # Test Y_pred = classifier.predict(X_test) print 'Done.\nAccuracy: %f' % accuracy_score(Y_test, Y_pred)
def train(self, training_data, training_z): """Trains the classifier Parameters: ----------- training_data: numpy array, size Ngalaxes x Nbands training data, each row is a galaxy, each column is a band as per band defined above training_z: numpy array, size Ngalaxies true redshift for the training sample """ from dbn.tensorflow import SupervisedDBNClassification self.training_z = training_z # Create value-added data print("Creating value-added training data") self.training_data = get_valueadded_data( training_data, self.bands, self.opt['errors'], self.opt['colors'], self.opt['band_triplets'], self.opt['band_triplets_errors'], self.opt['heal_undetected'], self.wants_arrays) data_scaler = self.opt[ 'data_scaler'] if 'data_scaler' in self.opt else 'MinMaxScaler' n_bin = self.opt['bins'] train_percent = self.opt[ 'train_percent'] if 'train_percent' in self.opt else 1 n_epochs_rbm = self.opt[ 'n_epochs_rbm'] if 'n_epochs_rbm' in self.opt else 2 activation = self.opt[ 'activation'] if 'activation' in self.opt else 'relu' learning_rate_rbm = self.opt[ 'learning_rate_rbm'] if 'learning_rate_rbm' in self.opt else 0.05 learning_rate = self.opt[ 'learning_rate'] if 'learning_rate' in self.opt else 0.1 n_iter_backprop = self.opt[ 'n_iter_backprop'] if 'n_iter_backprop' in self.opt else 25 batch_size = self.opt['batch_size'] if 'batch_size' in self.opt else 32 dropout_p = self.opt['dropout_p'] if 'dropout_p' in self.opt else 0.2 hidden_layers_structure = self.opt[ 'hidden_layers_structure'] if 'hidden_layers_structure' in self.opt else [ 256, 256 ] print("Finding bins for training data") # Data rescaling self.scaler = getattr(preprocessing, data_scaler)() print(f"Using {data_scaler} to rescale data for better results") # Fit scaler on data and use the same scaler in the future when needed self.scaler.fit(self.training_data) # apply transform to get rescaled values self.training_data = self.scaler.transform( self.training_data ) # inverse: data_original = scaler.inverse_transform(data_rescaled) # Now put the training data into redshift bins. # Use zero so that the one object with minimum # z in the whole survey will be in the lowest bin training_bin = np.zeros(self.training_z.size) # Find the edges that split the redshifts into n_z bins of # equal number counts in each p = np.linspace(0, 100, n_bin + 1) z_edges = np.percentile(self.training_z, p) # Now find all the objects in each of these bins for i in range(n_bin): z_low = z_edges[i] z_high = z_edges[i + 1] training_bin[(self.training_z > z_low) & (self.training_z < z_high)] = i if 0 < train_percent < 100: # for speed, cut down to ?% of original size print( f'Cutting down to {train_percent}% of original training sample size for speed.' ) cut = np.random.uniform(0, 1, self.training_z.size) < train_percent / 100 training_bin = training_bin[cut] self.training_data = self.training_data[cut] elif train_percent == 100: pass else: raise ValueError('train_percent is not valid') print('Setting up the layers for DBN') # Set up the layers classifier = SupervisedDBNClassification( hidden_layers_structure=hidden_layers_structure, learning_rate_rbm=learning_rate_rbm, learning_rate=learning_rate, n_epochs_rbm=n_epochs_rbm, n_iter_backprop=n_iter_backprop, batch_size=batch_size, activation_function=activation, dropout_p=dropout_p) # Train the model print("Fitting classifier") classifier.fit(self.training_data, training_bin) self.classifier = classifier self.z_edges = z_edges
y_test = y_test.tolist() scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) mlp = SupervisedDBNClassification(hidden_layers_structure=[19, 30, 19], learning_rate_rbm=0.05, learning_rate=0.1, n_epochs_rbm=10, n_iter_backprop=50, batch_size=32, activation_function='relu', dropout_p=0.2) mlp.fit(X_train, y_train) # Save the model mlp.save('model.pkl') # Restoreit mlp = SupervisedDBNClassification.load('model.pkl') predictions = mlp.predict(X_test) RMSE_sum = 0 list = [] for x in range(0, len(X_test)): RMSE_sum = RMSE_sum + ((y_test[x] - predictions[x])**2) list.append(abs(y_test[x] - predictions[x]))
def run(params): # ##################### get parameters and define logger ################ # device os.environ['CUDA_VISIBLE_DEVICES'] = str(params.gpu) # get parameters data_name = params.data.data_name data_dir = params.data.data_dir target_dir = params.data.target_dir train_prop = params.data.train_prop val_prop = params.data.val_prop train_params = params.train method_name = params.method_name result_dir = params.result_dir folder_level = params.folder_level train_prop = train_prop if train_prop < 1 else int(train_prop) val_prop = val_prop if val_prop < 1 else int(val_prop) result_root = result_dir local_v = locals() for s in folder_level: result_dir = check_path(os.path.join(result_dir, str(local_v[s]))) # define output dirs acc_dir = os.path.join(result_root, 'accuracy.csv') log_dir = os.path.join(result_dir, 'train.log') model_dir = os.path.join(result_dir, 'weights.pkl') # soft_dir = os.path.join(result_dir, 'soft_label.mat') # loss_dir = os.path.join(result_dir, 'loss_curve.png') # define logger logger = define_logger(log_dir) # print parameters num1 = 25 num2 = 100 logger.info('%s begin a new training: %s %s' % ('#' * num1, method_name, '#' * num1)) params_str = recur_str_dict_for_show(params, total_space=num2) logger.info('show parameters ... \n%s' % params_str) # ########################### get data, train ############################ logger.info('get data ...') mask_dir = os.path.dirname(data_dir) data, target = read_data(data_dir, target_dir) train_mask, val_mask, test_mask = load_masks(mask_dir, target, train_prop, val_prop) x_train, y_train = get_vector_samples(data, target, train_mask) logger.info('get model ...') from dbn.tensorflow import SupervisedDBNClassification classifier = SupervisedDBNClassification(**train_params) logger.info('begin to train ...') s = time.time() classifier.fit(x_train, y_train) e = time.time() train_time = e - s logger.info('training time: %.4fs' % train_time) logger.info('save model ...') classifier.save(model_dir) # ########################### predict, output ########################### all_data = data.reshape(-1, data.shape[1] * data.shape[2]).T classifier = SupervisedDBNClassification.load(model_dir) logger.info('begin to predict ...') s = time.time() pred = classifier.predict(all_data) pred = np.array(pred) pred = pred.reshape(target.shape) + 1 e = time.time() pred_time = (e - s) logger.info('predicted time: %.4fs' % pred_time) # output predicted map(png/mat), accuracy table and other records logger.info('save classification maps etc. ...') train_records = { 'train_time': '%.4f' % train_time, 'pred_time': '%.4f' % pred_time } ro = ResultOutput(pred, data, target, train_mask, val_mask, test_mask, result_dir, acc_dir, hyper_params=params, train_records=train_records) ro.output()
dropout_p=0.2) # Split Data X, Y = get_dataset(tz) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) print('Size of training set == {}, Size of testing set == {}\n'.format( len(X_train), len(X_test))) start_time = timer() tot_start = start_time Matt_Net.pre_train(X_train) print('Time to pretrain == {:5.3f} seconds\n'.format(timer() - start_time)) start_time = timer() Matt_Net.fit(X_train, Y_train, False) print('Time to fit == {:5.3f} seconds\n'.format(timer() - start_time)) print('Total time == {:5.3f} seconds\n'.format(timer() - tot_start)) Matt_Net.save('train/Matt_Net_Zone_{}.pkl'.format(tz)) Y_pred = Matt_Net.predict(X_test) start_time = timer() score = accuracy_score(Y_test, Y_pred) print( 'Done, time to predict == {:5.3}\nAccuracy == {} for zone {}\n'.format( timer() - start_time, score, tz)) del Matt_Net