def experiment_with_parameters(ser_filename, batch_sizes=[64], learning_rates=[0.05], optimizers=['Ftrl', 'RMSProp', 'Adam', 'Adagrad', 'SGD'], class_weights=[[0.4,0.6], [0.6,0.4]]): ''' Calculate and print accuracies for different combinations of hyper-paramters. ''' # Load dataset train_data, train_targets, test_data, expected = Helper.unserialize(ser_filename) # Build Classifier for b_size in batch_sizes: for l_rate in learning_rates: for optimizer in optimizers: for class_weight in class_weights: classifier = skflow.TensorFlowEstimator(model_fn=multilayer_conv_model, n_classes=2, steps=500, learning_rate=l_rate, batch_size=b_size, optimizer=optimizer, class_weight=class_weight) classifier.fit(train_data, train_targets) # Assess predictions = classifier.predict(test_data) accuracy = metrics.accuracy_score(expected, predictions) confusion_matrix = metrics.confusion_matrix(expected, predictions) print('Accuracy for batch_size %.2d learn_rate %.3f Cost Function %s: %f' % (b_size, l_rate, optimizer, accuracy)) print("Confusion matrix:\n%s" % confusion_matrix)
def run(featureRepresentation='image', glcm_distance=1, glcm_isMultidirectional=False): ''' Apply a CNN on the grain_images dataset and print test accuracies. That is, train it on training data and test it on test data. ''' train_data, train_targets, test_data, expected = Helper.extract_features_from_new_data( featureRepresentation, glcm_distance, glcm_isMultidirectional, train_size=0.5) Helper.serialize("../Datasets/grain_glcm_d1_a4_2_new.data", (train_data, train_targets, test_data, expected)) # Build Classifier classifier = skflow.TensorFlowEstimator(model_fn=multilayer_conv_model, n_classes=2, steps=500, learning_rate=0.05, batch_size=128) classifier.fit(train_data, train_targets) # Assess predictions = classifier.predict(test_data) accuracy = metrics.accuracy_score(expected, predictions) confusion_matrix = metrics.confusion_matrix(expected, predictions) print("Confusion matrix:\n%s" % confusion_matrix) print('Accuracy: %f' % accuracy)
def instantiateModel(hyperparams): # We'll copy the same model from above def custom_model(X, y): #X = learn.ops.batch_normalize(X, scale_after_normalization=True) TODO possibly include this layers = learn.ops.dnn(X, hyperparams['HIDDEN_UNITS'], activation=hyperparams['ACTIVATION_FUNCTION'], dropout=hyperparams['KEEP_PROB']) return learn.models.logistic_regression(layers, y) classifier = learn.TensorFlowEstimator( model_fn=custom_model, n_classes=y_classes, batch_size=hyperparams['BATCH_SIZE'], steps=hyperparams['STEPS'], optimizer=hyperparams['OPTIMIZER'], learning_rate=hyperparams['LEARNING_RATE'], ) # We'll make a monitor so that we can implement early stopping based on our train accuracy. This will prevent overfitting. #monitor = learn.monitors.BaseMonitor(early_stopping_rounds=int(hyperparams['MAX_BAD_COUNT']),)#, print_steps=100) return classifier # , #monitor
def dnn(nn_lr=0.1, nn_steps=5000, hidden_units=[30, 30]): def tanh_dnn(X, y): features = skflow.ops.dnn(X, hidden_units=hidden_units, activation=tf.tanh) return skflow.models.linear_regression(features, y) regressor = skflow.TensorFlowEstimator(model_fn=tanh_dnn, n_classes=0, steps=nn_steps, learning_rate=nn_lr, batch_size=100) return regressor
def get_classifier(self, n_classes, batch_size=128, learning_rate=0.1, training_steps=10): self.classifier = learn.TensorFlowEstimator( model_fn=self.rnn_model, n_classes=n_classes, batch_size=batch_size, steps=training_steps, optimizer='SGD', learning_rate=learning_rate)
def main(): images, labels, pokemon = load_images() pokemon_test = [] print (labels) # Label encoder le = preprocessing.LabelEncoder() le.fit(labels) print (le.classes_) transformed_labels = le.transform(labels) print (transformed_labels) msk = np.random.rand(714) < 0.8 print (msk) true_indexes = [] false_indexes = [] training_labels = [] test_labels = [] for idx, val in enumerate(msk): if val == 1: true_indexes.append(idx) training_labels.append(transformed_labels[idx]) else: false_indexes.append(idx) test_labels.append(transformed_labels[idx]) pokemon_test.append(pokemon[idx]) training_set = np.delete(images, false_indexes, 0) test_set = np.delete(images, true_indexes, 0) reshaped_dataset = training_set.reshape(len(training_labels), 3072) reshaped_testset = test_set.reshape(len(test_labels), 3072) # Training and predicting. classifier = learn.TensorFlowEstimator( model_fn=conv_model, n_classes=17, batch_size=100, steps=20000, learning_rate=0.001, verbose=2) classifier.fit(reshaped_dataset, training_labels, logdir=os.getcwd() + 'model_20000b_logs') classifier.save(os.getcwd() + '/model_20000b') score = metrics.accuracy_score( test_labels, classifier.predict(reshaped_testset)) print('Accuracy: {0:f}'.format(score)) prediction_labels = classifier.predict(reshaped_testset) target_names=['Bug' 'Dark' 'Dragon' 'Electric' 'Fairy' 'Fighting' 'Fire' 'Ghost' 'Grass' 'Ground' 'Ice' 'Normal' 'Poison' 'Psychic' 'Rock' 'Steel' 'Water'] print (metrics.classification_report(test_labels, prediction_labels)) print (test_labels) print (prediction_labels) print (pokemon_test)
def get_model(filename=CLASSIFIER_FILE): ''' Get CNN classifier object from file or create one if none exists on file.''' if (filename == None): # Load dataset train_data, train_targets, test_data, expected = Helper.unserialize( "../Datasets/raw_new_80.data") train_data2, train_targets2, test_data2, expected2 = Helper.unserialize( "../Datasets/raw.data") train_data = np.concatenate((train_data, train_data2), axis=0) train_targets = np.concatenate((train_targets, train_targets2), axis=0) test_data = np.concatenate((test_data, test_data2), axis=0) expected = np.concatenate((expected, expected2), axis=0) print(train_data.shape) raw_train_data = np.zeros((train_data.shape[0], 20, 20)) i = 0 for item in train_data: raw_train_data[i] = item.reshape((20, 20)) #Display.show_image(raw_train_data[i]) i = i + 1 raw_test_data = np.zeros((test_data.shape[0], 20, 20)) i = 0 for item in test_data: raw_test_data[i] = item.reshape((20, 20)) #Display.show_image(raw_test_data[i]) i = i + 1 # Build Classifier # classifier = skflow.TensorFlowEstimator(model_fn=multilayer_conv_model, n_classes=2, # steps=500, learning_rate=0.05, batch_size=128) classifier = skflow.TensorFlowEstimator(model_fn=conv_model, n_classes=2, steps=500, learning_rate=0.05, batch_size=128, optimizer='Ftrl') classifier.fit(raw_train_data, train_targets) # Assess built classifier predictions = classifier.predict(raw_test_data) accuracy = metrics.accuracy_score(expected, predictions) confusion_matrix = metrics.confusion_matrix(expected, predictions) print("Confusion matrix:\n%s" % confusion_matrix) print('Accuracy: %f' % accuracy) return classifier else: serialized_classifier = Helper.unserialize(filename) return serialized_classifier
def predict_nn(hist_data, data_to_predict, cl_data): np_hist_data, np_prd_data, np_classes_data = \ prepare_data_for_nn(hist_data, data_to_predict, cl_data) nn = skflow.TensorFlowEstimator(model_fn=nn_model, n_classes=3) nn.fit(np_hist_data, np_classes_data, logdir = './log') score = metrics.accuracy_score(np_classes_data, nn.predict(np_hist_data)) print("Accuracy NN: %f" % score) prd = nn.predict_proba(np_prd_data) return link_perc_to_cl(prd, CLASSES_PRE)
def run_with_dataset(ser_filename): ''' Apply a CNN on a dataset and print test accuracies. That is, train it on training data and test it on test data. ''' # Load dataset train_data, train_targets, test_data, expected = Helper.unserialize(ser_filename) # Build Classifier classifier = skflow.TensorFlowEstimator(model_fn=multilayer_conv_model, n_classes=2, steps=500, learning_rate=0.05, batch_size=128) classifier.fit(train_data, train_targets) # Assess predictions = classifier.predict(test_data) accuracy = metrics.accuracy_score(expected, predictions) confusion_matrix = metrics.confusion_matrix(expected, predictions) print("Confusion matrix:\n%s" % confusion_matrix) print('Accuracy: %f' % (accuracy))
def main(fx, scale): logdir = '../data/fx/ann/tensorboard_models/%s%s%s' % ( scale, fx, time.strftime(time_format, time.localtime())) # Load dataset path_f_final = ['%s/%s_%s_f.npy' % (FILE_PREX, fx, scale), '%s/%s_%s_t.pkl.npy' % (FILE_PREX, fx, scale)] path_f_in = '%s/%s_H.pkl' % (FILE_PREX, fx) pd_data = pd.read_pickle(path_f_in)['close'] fx_max = max(pd_data) fx_min = min(pd_data) data = np.load(path_f_final[0]) data_s = np.load(path_f_final[1]) data_train = data[:data.shape[0] - num_test] data_test = data[data.shape[0] - num_test:] data_s_train = data_s[:data.shape[0] - num_test] data_s_test = data_s[data.shape[0] - num_test:] regressor = learn.TensorFlowEstimator( model_fn=my_model, n_classes=0, optimizer='SGD', batch_size=len(data_train), steps=20000, learning_rate=0.2) # Fit regressor.fit(data_train, data_s_train, logdir=logdir) # Predict and score prediction = regressor.predict(data_test) data = {'close_price': [i * (fx_max - fx_min) + fx_min for i in data_s_test], 'predict': [i * (fx_max - fx_min) + fx_min for i in prediction]} frame = pd.DataFrame(data) frame.to_pickle('%s/%sprediction.pkl' % (logdir, fx)) score1 = metrics.explained_variance_score( data_s_test, prediction) score2 = metrics.mean_absolute_error( data_s_test, prediction) print(score1, score2) return score1, score2
def get_fund_classifier(): sample_x, sample_y = load_training_data() MAX_DOCUMENT_LENGTH = 50 EMBEDDING_SIZE = 200 vocab_processor = learn.preprocessing.VocabularyProcessor( MAX_DOCUMENT_LENGTH) sample_x = np.array(list(vocab_processor.fit_transform(sample_x))) n_words = len(vocab_processor.vocabulary_) logger_fund.info('Size of data') logger_fund.info(sample_x.shape) logger_fund.info('Total words: %d' % n_words) def average_model(X, y): word_vectors = learn.ops.categorical_variable( X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') features = tf.reduce_max(word_vectors, reduction_indices=1) return learn.models.logistic_regression(features, y) def rnn_model(X, y): word_vectors = learn.ops.categorical_variable( X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') word_list = tf.unpack(word_vectors, axis=1) cell = tf.nn.rnn_cell.GRUCell(EMBEDDING_SIZE) _, encoding = tf.nn.rnn(cell, word_list, dtype=tf.float32) return learn.models.logistic_regression(encoding, y) classifier = learn.TensorFlowEstimator(model_fn=rnn_model, n_classes=2, continue_training=True, steps=1000, learning_rate=0.1, optimizer='Adagrad') classifier.fit(sample_x, sample_y) return vocab_processor, classifier
save = pickle.load(f) X_valid = save['X'] y_valid = save['y'] X_valid = X_valid.reshape(-1, X_valid.shape[1], X_valid.shape[2], 1) print 'valid: X => ', X_valid.shape, 'y => ', y_valid.shape # Restore model if graph is saved into a folder. if os.path.exists("%s/graph.pbtxt" % (model_dir)): classifier = learn.TensorFlowEstimator.restore(model_dir) pred_valid = classifier.predict_proba(X_valid, batch_size=64) print pred_valid else: # Create a new resnet classifier. classifier = learn.TensorFlowEstimator(model_fn=get_image_feature_small, n_classes=0, batch_size=100, steps=100, learning_rate=0.001, continue_training=True) while True: # Train model and save summaries into logdir. classifier.fit(X, y, logdir=model_dir) # Save model graph and checkpoints. classifier.save(model_dir) # Calculate accuracy. pred_valid = classifier.predict_proba(X_valid, batch_size=5) print pred_valid print y_valid #print('Accuracy: {0:f}'.format(score))
# clf.fit(trainx, trainy) # joblib.dump(clf, os.path.join(os.path.split(os.path.realpath(__file__))[0], 'models/category.rnn.model')) # scores = cross_validation.cross_val_score(clf, trainx, trainy, scoring='precision', cv=5) # print MAX_DOCUMENT_LENGTH, EMBEDDING_SIZE, scores.mean(), scores # score = metrics.accuracy_score(trainy, clf.predict(trainx)) # score = metrics.accuracy_score(testy, clf.predict(testx)) # print('Accuracy: {0:f}'.format(score)) parameters = { 'learning_rate': [0.05, 0.1, 0.15], 'steps': [500, 1000, 1200, 1500, 2000], 'optimizer': ["Adam", "Adagrad"] } keys, values = parameters.keys(), parameters.values() cvscores = [] for parameter in itertools.product(*values): ps = {keys[i]: parameter[i] for i in xrange(3)} clf = learn.TensorFlowEstimator(model_fn=rnn_model, n_classes=2, continue_training=True, **ps) clf.fit(trainx, trainy) score = metrics.accuracy_score(testy, clf.predict(testx)) cvscores.append((ps, score)) for cvscore in cvscores: print cvscore[0], cvscore[1] print 'best score' print sorted(cvscores, key=lambda x: x[1], reverse=True)[0]
def train_cnn(): steps = 1 for i in SCALE: result_tmp0 = np.empty(0) result_tmp1 = np.empty(0) result_tmp2 = np.empty(0) # df = pd.DataFrame() for fx in FX_LIST: result_tmp3 = np.empty(0) # fs_t_path = ['%s/NFs/%s_%i.npy' % (FILE_PREX, fx, i), # '%s/T/%s_%i.pkl' % (FILE_PREX, fx, i)] # fs = np.load(fs_t_path[0]) # t = pd.read_pickle(fs_t_path[1]) f_train = np.load('%s/NFs/%s_train_%i.npy' % (FILE_PREX, fx, i)) f_test = np.load('%s/NFs/%s_test_%i.npy' % (FILE_PREX, fx, i)) t_train = pd.read_pickle('%s/T/%s_train_%i.pkl' % (FILE_PREX, fx, i)) t_test = pd.read_pickle('%s/T/%s_test_%i.pkl' % (FILE_PREX, fx, i)) for optimizer in optimizers: start = time.strftime(time_format, time.localtime()) print('%s start at %s.' % (fx, start)) model = learn.TensorFlowEstimator(model_fn=conv_model, n_classes=0, batch_size=80, steps=steps, optimizer=optimizer, learning_rate=0.001) logdir = '%s/tensorboard_models/exam/%s/%s' % (FILE_PREX, optimizer, fx) # model.fit(fs[:-num_test], # t['change'][:-num_test], # logdir=logdir) model.fit(f_train, t_train['change'].values, logdir=logdir) model.save('%s/saves/exam/%s/%s' % (FILE_PREX, optimizer, fx)) # prediction1 = model.predict(fs[-num_test:]) # prediction2 = (prediction1 / 100 + 1) * \ # t['target_open'][-num_test:] # score0 = mean_absolute_percentage_error( # t['real_target'][-num_test:].values, prediction2) # score1 = metrics.explained_variance_score( # t['change'][-num_test:].values, prediction1) # score2 = metrics.mean_squared_error( # t['real_target'][-num_test:].values, prediction2) prediction1 = model.predict(f_test) prediction2 = (prediction1 / 100 + 1) * \ t_test['target_open'].values score0 = mean_absolute_percentage_error( t_test['real_target'].values, prediction2) score1 = metrics.explained_variance_score( t_test['change'].values, prediction1) score2 = metrics.mean_squared_error( t_test['real_target'].values, prediction2) result_tmp0 = np.append(result_tmp0, score0) print(result_tmp0) result_tmp1 = np.append(result_tmp1, score1) print(result_tmp1) result_tmp2 = np.append(result_tmp2, score2) print(result_tmp2) result_tmp3 = np.append(result_tmp3, prediction2) end = time.strftime(time_format, time.localtime()) print('%s end at %s.' % (fx, end)) result_tmp3 = pd.DataFrame(result_tmp3.reshape( -1, len(optimizers)), columns=optimizers) result_tmp3['real'] = t_test['real_target'].values result_tmp3.to_pickle('%s/pre_result/%s_%i,pkl' % (FILE_PREX, fx, i)) result0 = pd.DataFrame(result_tmp0.reshape(-1, len(optimizers)), index=FX_LIST, columns=optimizers) print(result0) result1 = pd.DataFrame(result_tmp1.reshape(-1, len(optimizers)), index=FX_LIST, columns=optimizers) print(result1) result2 = pd.DataFrame(result_tmp2.reshape(-1, len(optimizers)), index=FX_LIST, columns=optimizers) print(result2) result0.to_pickle('%s/exam_mape_%i.pkl' % (FILE_PREX, i)) result1.to_pickle('%s/exam_evs_%i.pkl' % (FILE_PREX, i)) result2.to_pickle('%s/exam_mse_%i.pkl' % (FILE_PREX, i))
net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]]) return learn.models.logistic_regression(net, y) # Download and load MNIST data. mnist = input_data.read_data_sets('MNIST_data') # Restore model if graph is saved into a folder. if os.path.exists("models/resnet/graph.pbtxt"): classifier = learn.TensorFlowEstimator.restore("models/resnet/") else: # Create a new resnet classifier. classifier = learn.TensorFlowEstimator(model_fn=res_net, n_classes=10, batch_size=100, steps=100, learning_rate=0.001, continue_training=True) while True: # Train model and save summaries into logdir. classifier.fit(mnist.train.images, mnist.train.labels, logdir="models/resnet/") # Calculate accuracy. score = metrics.accuracy_score( mnist.test.labels, classifier.predict(mnist.test.images, batch_size=64)) print('Accuracy: {0:f}'.format(score))
def rnn_model(X, y): """Recurrent neural network model to predict from sequence of words to a class.""" # Convert indexes of words into embeddings. # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then # maps word indexes of the sequence into [batch_size, sequence_length, # EMBEDDING_SIZE]. word_vectors = learn.ops.categorical_variable(X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') # Split into list of embedding per word, while removing doc length dim. # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE]. word_list = learn.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors) # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE. cell = tf.nn.rnn_cell.GRUCell(EMBEDDING_SIZE) # Create an unrolled Recurrent Neural Networks to length of # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit. _, encoding = tf.nn.rnn(cell, word_list, dtype=tf.float32) # Given encoding of RNN, take encoding of last step (e.g hidden size of the # neural network of last step) and pass it as features for logistic # regression over output classes. return learn.models.logistic_regression(encoding, y) classifier = learn.TensorFlowEstimator(model_fn=rnn_model, n_classes=15, steps=1000, optimizer='Adam', learning_rate=0.01, continue_training=True) # Continuously train for 1000 steps & predict on test set. while True: classifier.fit(X_train, y_train, logdir='/tmp/tf_examples/word_rnn') score = metrics.accuracy_score(y_test, classifier.predict(X_test)) print('Accuracy: {0:f}'.format(score))
def train_all_models(): columns = np.empty(0) steps = 20000 # ! batch_size = 30 learning_rate = 0.001 result_tmp0 = np.empty(0) for i in SCALE: for fx in FX_LIST: result_tmp1 = pd.DataFrame() result_tmp2 = pd.DataFrame() for name in models: if i == SCALE[-1]: columns = np.append(columns, '%s%s_MSE' % (fx, name)) columns = np.append(columns, '%s%s_MAPE' % (fx, name)) # columns = np.append(columns, '%s%s_EVS' % (fx, name)) columns = np.append(columns, '%s%s_R2' % (fx, name)) columns = np.append(columns, '%s%s_R2_R' % (fx, name)) # columns = np.append(columns, '%s%s_MAPE_C' % (fx, name)) start = time.strftime(time_format, time.localtime()) print('%s with %s for h=%i start at %s.' % (fx, name, i, start)) logdir = '%s/tensorboard_models/exam/%s/%s_%i' % (FILE_PREX, name, fx, i) if name == 'CNN': f_train = np.load('%s/NFs/%s_train_%i.npy' % (FILE_PREX, fx, i)) f_test = np.load('%s/NFs/%s_test_%i.npy' % (FILE_PREX, fx, i)) f_plot = np.load('%s/NFs/%s_plot_%i.npy' % (FILE_PREX, fx, i)) t_train = pd.read_pickle('%s/T/%s_train_%i.pkl' % (FILE_PREX, fx, i)) t_test = pd.read_pickle('%s/T/%s_test_%i.pkl' % (FILE_PREX, fx, i)) t_plot = pd.read_pickle('%s/T/%s_plot_%i.pkl' % (FILE_PREX, fx, i)) model = learn.TensorFlowEstimator( model_fn=conv_model, n_classes=0, batch_size=batch_size, steps=steps, optimizer='Adagrad', learning_rate=learning_rate) model.fit(f_train, t_train['change'].values, logdir=logdir) model.save('%s/saves/exam/%s/%s_%i' % (FILE_PREX, name, fx, i)) else: f_train = np.load('%s/NFs/%s_train_5_%i.npy' % (FILE_PREX, fx, i)) f_test = np.load('%s/NFs/%s_test_5_%i.npy' % (FILE_PREX, fx, i)) f_plot = np.load('%s/NFs/%s_plot_5_%i.npy' % (FILE_PREX, fx, i)) t_train = pd.read_pickle('%s/T/%s_train_5_%i.pkl' % (FILE_PREX, fx, i)) t_test = pd.read_pickle('%s/T/%s_test_5_%i.pkl' % (FILE_PREX, fx, i)) t_plot = pd.read_pickle('%s/T/%s_plot_5_%i.pkl' % (FILE_PREX, fx, i)) if name == 'ANN-10': model = learn.TensorFlowEstimator( model_fn=model10, n_classes=0, optimizer='Adagrad', batch_size=batch_size, steps=steps, learning_rate=learning_rate) model.fit(f_train, t_train['change'].values, logdir=logdir) model.save('%s/saves/exam/%s/%s_%i' % (FILE_PREX, name, fx, i)) elif name == 'ANN-15': model = learn.TensorFlowEstimator( model_fn=model15, n_classes=0, optimizer='Adagrad', batch_size=batch_size, steps=steps, learning_rate=learning_rate) model.fit(f_train, t_train['change'].values, logdir=logdir) model.save('%s/saves/exam/%s/%s_%i' % (FILE_PREX, name, fx, i)) elif name == 'ANN-20': model = learn.TensorFlowEstimator( model_fn=model20, n_classes=0, optimizer='Adagrad', batch_size=batch_size, steps=steps, learning_rate=learning_rate) model.fit(f_train, t_train['change'].values, logdir=logdir) model.save('%s/saves/exam/%s/%s_%i' % (FILE_PREX, name, fx, i)) else: model = svm.SVR() model.fit(f_train, t_train['change'].values) prediction1 = model.predict(f_test) prediction2 = (prediction1 / 100 + 1) * \ t_test['target_open'].values prediction3 = model.predict(f_plot) prediction4 = (prediction3 / 100 + 1) * \ t_plot['target_open'].values score0 = metrics.mean_squared_error( t_test['real_target'].values, prediction2) score1 = mean_absolute_percentage_error( t_test['real_target'].values, prediction2) # score2 = metrics.explained_variance_score( # t_test['real_target'].values, prediction2) score2 = metrics.r2_score(t_test['change'].values, prediction1) score3 = metrics.r2_score(t_test['real_target'].values, prediction2) result_tmp0 = np.append(result_tmp0, score0) result_tmp0 = np.append(result_tmp0, score1) result_tmp0 = np.append(result_tmp0, score2) result_tmp0 = np.append(result_tmp0, score3) result_tmp1['%s' % name] = prediction2 result_tmp2['%s' % name] = prediction4 end = time.strftime(time_format, time.localtime()) print('%s with %s for h=%i end at %s.\ \nMSE: %f\nMAPE: %f\nR2: %f\nR2_R: %f' % (fx, name, i, end, score0, score1, score2, score3)) # result_tmp1 = pd.DataFrame( # result_tmp1.reshape(len(models), -1), columns=models) result_tmp1['real'] = t_test['real_target'].values result_tmp1.to_pickle('%s/pre_result/%s_all_%i.pkl' % (FILE_PREX, fx, i)) # result_tmp2 = pd.DataFrame( # result_tmp2.reshape(-1, len(models)), columns=models) result_tmp2['real'] = t_plot['real_target'].values result_tmp2.to_pickle('%s/pre_result/%s_plot_%i.pkl' % (FILE_PREX, fx, i)) result0 = pd.DataFrame(result_tmp0.reshape(-1, len(columns)), index=SCALE, columns=columns) result0.to_pickle('%s/exam_all.pkl' % FILE_PREX)
decoding, _, sampling_decoding, _ = learn.ops.rnn_seq2seq( in_X, in_y, encoder_cell, decoder_cell) return learn.ops.sequence_classifier(decoding, out_y, sampling_decoding) def get_language_model(hidden_size): """Returns a language model with given hidden size.""" def language_model(X, y): inputs = learn.ops.one_hot_matrix(X, 256) inputs = learn.ops.split_squeeze(1, MAX_DOC_LENGTH, inputs) target = learn.ops.split_squeeze(1, MAX_DOC_LENGTH, y) encoder_cell = tf.nn.rnn_cell.OutputProjectionWrapper( tf.nn.rnn_cell.GRUCell(hidden_size), 256) output, _ = tf.nn.rnn(encoder_cell, inputs, dtype=tf.float32) return learn.ops.sequence_classifier(output, target) return language_model ### Training model. estimator = learn.TensorFlowEstimator(model_fn=get_language_model(HIDDEN_SIZE), n_classes=256, optimizer='Adam', learning_rate=0.01, steps=1000, batch_size=64, continue_training=True) estimator.fit(X, y)
X_test = np.array(list(char_processor.transform(X_test))) ### Models HIDDEN_SIZE = 20 def char_rnn_model(X, y): byte_list = learn.ops.one_hot_matrix(X, loader.num_alpha) byte_list = learn.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list) cell = tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE) _, encoding = tf.nn.rnn(cell, byte_list, dtype=tf.float32) return learn.models.logistic_regression(encoding, y) classifier = learn.TensorFlowEstimator(model_fn=char_rnn_model, n_classes=loader.num_hash, steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True) # Continuously train for 100 steps & predict on test set. print("TRAIN") while True: classifier.fit(X_train, y_train) score = metrics.accuracy_score(y_test, classifier.predict(X_test)) classifier.save('save/') print("Accuracy: %f" % score)
# hyperparameters: these are adjustable and lead to different results LOG_DIR = os.path.join(os.getcwd(), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) TIMESTEPS = 8 RNN_LAYERS = [150] DENSE_LAYERS = None TRAINING_STEPS = 5000 BATCH_SIZE = 100 PRINT_STEPS = TRAINING_STEPS / 10 LEARNING_RATE = 0.05 # TensorFlowEstimator does all the training work regressor = learn.TensorFlowEstimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS), n_classes=0, verbose=1, steps=TRAINING_STEPS, optimizer='SGD', learning_rate=LEARNING_RATE, batch_size=BATCH_SIZE, continue_training=True) #read the data print("Reading CSV file...") with open('pub.csv') as f: data = list(reader(f.read().splitlines())) # get output # for 'data.csv', standardized impressions are in column 5 adOps = [float(i[5]) for i in data[1::]] tf.to_float(adOps, name='ToFloat')
vocab_processor = learn.preprocessing.ByteProcessor( max_document_length=MAX_DOCUMENT_LENGTH) x_iter = vocab_processor.transform(X_train) y_iter = vocab_processor.transform(y_train) xpred = np.array(list(vocab_processor.transform(X_test))[:20]) ygold = list(y_test)[:20] PATH = '/tmp/tf_examples/ntm/' if os.path.exists(PATH): translator = learn.TensorFlowEstimator.restore(PATH) else: translator = learn.TensorFlowEstimator(model_fn=translate_model, n_classes=256, optimizer='Adam', learning_rate=0.01, batch_size=128, continue_training=True) while True: translator.fit(x_iter, y_iter, logdir=PATH) translator.save(PATH) predictions = translator.predict(xpred, axis=2) xpred_inp = vocab_processor.reverse(xpred) text_outputs = vocab_processor.reverse(predictions) for inp_data, input_text, pred, output_text, gold in zip( xpred, xpred_inp, predictions, text_outputs, ygold): print('English: %s. French (pred): %s, French (gold): %s' % (input_text, output_text, gold.decode('utf-8'))) print(inp_data, pred)
bias=True, activation=tf.nn.relu) h_pool1 = max_pool_2x2(h_conv1) # second conv layer will compute 64 features for each 5x5 patch with tf.variable_scope('conv_layer2'): h_conv2 = learn.ops.conv2d(h_pool1, n_filters=64, filter_shape=[5, 5], bias=True, activation=tf.nn.relu) h_pool2 = max_pool_2x2(h_conv2) # reshape tensor into a batch of vectors h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) # densely connected layer with 1024 neurons h_fc1 = learn.ops.dnn(h_pool2_flat, [1024], activation=tf.nn.relu, dropout=0.5) return learn.models.logistic_regression(h_fc1, y) # Training and predicting classifier = learn.TensorFlowEstimator(model_fn=conv_model, n_classes=10, batch_size=100, steps=20000, learning_rate=0.001) classifier.fit(mnist.train.images, mnist.train.labels) score = metrics.accuracy_score(mnist.test.labels, classifier.predict(mnist.test.images)) print('Accuracy: {0:f}'.format(score))
LOG_DIR = os.path.join(os.getcwd(), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) TIMESTEPS = 80 RNN_LAYERS = [80] DENSE_LAYERS = None TRAINING_STEPS = 30000 BATCH_SIZE = 100 PRINT_STEPS = TRAINING_STEPS / 100 my_dir = os.sep.join([os.path.expanduser('~'), 'Desktop', 'sine']) regressor = learn.TensorFlowEstimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS), n_classes=0, verbose=2, steps=TRAINING_STEPS, optimizer='SGD', learning_rate=0.001, batch_size=BATCH_SIZE, class_weight = [1]) #generate SINE WAVE data X, y = generate_data(np.sin, np.linspace(0, 100, 5000), TIMESTEPS, seperate=False) # create a lstm instance and validation monitor validation_monitor = learn.monitors.ValidationMonitor(X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=100000) regressor.fit(X['train'], y['train'], monitors=[validation_monitor], logdir=LOG_DIR) # based off training, get the predictions
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function from sklearn import datasets, metrics, cross_validation from tensorflow.contrib import learn iris = datasets.load_iris() X_train, X_test, y_train, y_test = cross_validation.train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) def my_model(X, y): """This is DNN with 10, 20, 10 hidden layers, and dropout of 0.1 probability.""" layers = learn.ops.dnn(X, [10, 20, 10], dropout=0.1) return learn.models.logistic_regression(layers, y) classifier = learn.TensorFlowEstimator(model_fn=my_model, n_classes=3, steps=1000) classifier.fit(X_train, y_train) score = metrics.accuracy_score(y_test, classifier.predict(X_test)) print('Accuracy: {0:f}'.format(score))
import tensorflow.contrib.learn as skflow from sklearn import datasets, metrics, preprocessing import numpy as np import pandas as pd df = pd.read_csv("data/CHD.csv", header=0) print df.describe() def my_model(X, y): return skflow.models.logistic_regression(X, y) a = preprocessing.StandardScaler() X = a.fit_transform(df['age'].astype(float)) print a.get_params() classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=1) classifier.fit(X, df['chd'].astype(float), logdir='/tmp/logistic') print(classifier.get_tensor_value('logistic_regression/bias:0')) print(classifier.get_tensor_value('logistic_regression/weight:0')) score = metrics.accuracy_score(df['chd'].astype(float), classifier.predict(X)) print("Accuracy: %f" % score)
### Embeddings EMBEDDING_SIZE = 3 def categorical_model(X, y): features = skflow.ops.categorical_variable(X, n_classes, embedding_size=EMBEDDING_SIZE, name='embarked') return skflow.models.logistic_regression(tf.squeeze(features, [1]), y) # features has shape (712, 1, 3) classifier = skflow.TensorFlowEstimator(model_fn=categorical_model, n_classes=2) classifier.fit(X_train, y_train) print("Accuracy: {0}".format( metrics.accuracy_score(classifier.predict(X_test), y_test))) print("ROC: {0}".format( metrics.roc_auc_score(classifier.predict(X_test), y_test))) ### One Hot def one_hot_categorical_model(X, y): features = skflow.ops.one_hot_matrix(X, n_classes) return skflow.models.logistic_regression(tf.squeeze(features, [1]), y)
dropout=0.7) with tf.variable_scope('LR_Layer'): o_linear = learn.models.linear_regression(h_fc1, y) return o_linear time_format = '%Y%m%d%H%M' result_tmp = np.empty(0) num_test = 8496 if __name__ == '__main__': for fx in FX_LIST: for optimizer in optimizers: re = learn.TensorFlowEstimator(model_fn=conv_model, n_classes=0, batch_size=200, steps=20000, optimizer=optimizer, learning_rate=0.001) path_f_final = [ '%s/%s_FINAL_M_new100.npy' % (FILE_PREX, fx), '%s/%s_FINAL_S_new100.pkl' % (FILE_PREX, fx) ] data = np.load(path_f_final[0]) data_s = pd.read_pickle(path_f_final[1]) range_price = data_s['max_price'] - data_s['min_price'] data = np.array([ (data[i] - data_s['min_price'][i]) / range_price[i] for i in range(data.shape[0]) ]) data_train = data[:data.shape[0] - num_test] data_test = data[data.shape[0] - num_test:]
import tensorflow.contrib.learn as skflow from sklearn import datasets, metrics iris = datasets.load_iris() classifier = skflow.TensorFlowEstimator(hidden_units=[10, 20, 10], n_classes=3) classifier.fit(iris.data, iris.target) score = metrics.accuracy_score(iris.target, classifier.predict(iris.data)) print("Accuracy: %f" % score)
# ## Parameter definitions # # - LOG_DIR: log file # - TIMESTEPS: RNN time steps # - RNN_LAYERS: RNN layer 정보 # - DENSE_LAYERS: DNN 크기 [10, 10]: Two dense layer with 10 hidden units # - TRAINING_STEPS: 학습 스텝 # - BATCH_SIZE: 배치 학습 크기 # - PRINT_STEPS: 학습 과정 중간 출력 단계 (전체의 1% 해당하는 구간마다 출력) # In[15]: regressor = learn.TensorFlowEstimator(model_fn=lstm_model( TIMESTEPS, RNN_LAYERS, DENSE_LAYERS), n_classes=0, verbose=1, steps=TRAINING_STEPS, optimizer='Adagrad', learning_rate=0.03, batch_size=BATCH_SIZE) # ## Create a regressor with TF Learn # # : 예측을 위한 모델 생성. TF learn 라이브러리에 제공되는 TensorFlowEstimator를 사용. # # **Parameters**: # # - model_fn: 학습 및 예측에 사용할 모델 # - n_classes: label에 해당하는 클래스 수 (0: prediction, 1이상: classification) 확인필요 # - verbose: 과정 출력 # - steps: 학습 스텝 # - optimizer: 최적화 기법 ("SGD", "Adam", "Adagrad")
# Create random dataset. rng = np.random.RandomState(1) X = np.sort(200 * rng.rand(100, 1) - 100, axis=0) y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T # Fit regression DNN models. regressors = [] options = [[2], [10, 10], [20, 20]] for hidden_units in options: def tanh_dnn(X, y): features = learn.ops.dnn(X, hidden_units=hidden_units, activation=learn.tf.tanh) return learn.models.linear_regression(features, y) regressor = learn.TensorFlowEstimator(model_fn=tanh_dnn, n_classes=0, steps=500, learning_rate=0.1, batch_size=100) regressor.fit(X, y) score = mean_squared_error(regressor.predict(X), y) print("Mean Squared Error for {0}: {1:f}".format(str(hidden_units), score)) regressors.append(regressor) # Predict on new random Xs. X_test = np.arange(-100.0, 100.0, 0.1)[:, np.newaxis] y_1 = regressors[0].predict(X_test) y_2 = regressors[1].predict(X_test) y_3 = regressors[2].predict(X_test) # Plot the results plt.figure() plt.scatter(y[:, 0], y[:, 1], c="k", label="data") plt.scatter(y_1[:, 0], y_1[:, 1], c="g",