def split_dataset(dataset, train_percent=None): ''' Splits the dataset into train and test ''' if not train_percent or int(train_percent) > 100: print("Train percent Invalid, using default") train_percent = 80 # Shuffle / Randamize the indecies data_indecies = [i for i in rage(dataset.num_records)] shuffled_indecies = np.shuffe(data_indecies) # How many traininig data we need? num_train_records = int(train_percent) * dataset.num_records // 100 # Init train and test train_text, train_labels = [], [] test_text, test_labels = [], [] for index in shuffled_indecies: if index < num_train_records: train_labels.append(dataset.labels[index]) train_text.append(dataset.text[index]) else: test_labels.append(dataset.labels[index]) test_text.append(dataset.text[index]) train_dataset = DataSet(None, train_text, train_labels, dataset.isVectorized) test_dataset = DataSet(None, test_text, test_labels, dataset.isVectorized) return train_dataset, test_dataset
def plot_stations(filename): """ Read in a saved stations file and plot it. """ dataset = DataSet() dataset.read(filename) dataset.plot()
def __init__(self, target_file): ''' Superseeds data_set.DataSet with methods for reading / writing from / to CSV files :param target_file: CSV file to read or write ''' DataSet.__init__(self) self.target_file = target_file
def input_fn(data_set: DataSet, size): input_dict = {} # labeled data data_set = data_set.get_batch(size) input_dict['inputs'] = tf.constant(np.array(data_set.inputs())) input_dict['sequence_length'] = tf.constant(data_set.lengths()) input_dict['mask'] = tf.constant(data_set.masks()) labels = tf.constant(data_set.labels()) return input_dict, labels
def create_data_set(key_items, value_items, name=""): data_items = [] for key_counter in range(0, len(key_items)): key = key_items[key_counter] if key_counter < len(value_items): value = value_items[key_counter] else: value = None data_items.append(DataItem(key, value)) ds = DataSet(name) ds.add_data_items(data_items) return ds
def get_dataset(self, is_train=True): ld = LoadedData() if is_train: ld.load_data() ld.label_normalize() else: # load source speaker data ld.load_data(filename=config.src_data_dir, test_set_size=config.src_test_size, vali_set_size=config.src_vali_size) # sort the data # ld.sort_data() ld.print_info() dataset = DataSet(ld) self.ds = dataset self.loaded_data = ld self.train_dataset_iter = dataset.train_iterator self.vali_dataset_iter = dataset.vali_iterator self.test_dataset_iter = dataset.test_iterator self.dataset_iter = tf.data.Iterator.from_string_handle( self.dataset_handle, dataset.train_set.output_types, dataset.train_set.output_shapes) with tf.name_scope('batch_data'): self.batch_features, \ self.batch_labels, \ self.batch_lengths, \ self.batch_uttids = self.dataset_iter.get_next()
def load(self, dataPath, numTrain, numValid, numTest): """Load the data.""" print("Loading data from " + dataPath + "...") data = np.genfromtxt(dataPath, delimiter=",", dtype="uint8") # The last numTest instances ALWAYS comprise the test set. train, test = data[:numTrain + numValid], data[numTrain + numValid:] shuffle(train) train, valid = train[:numTrain], train[numTrain:] self.trainingSet = DataSet(train) self.validationSet = DataSet(valid) self.testSet = DataSet(test) print("Data loaded.")
def split_dataset(dataset, ratio=None): size = dataset.size if ratio is None: ratio = _choose_optimal_train_ratio(size) mask = np.zeros(size, dtype=np.bool_) train_size = int(size * ratio) mask[:train_size] = True np.random.shuffle(mask) train_x = dataset.x[mask, :] train_y = dataset.y[mask] mask = np.invert(mask) test_x = dataset.x[mask, :] test_y = dataset.y[mask] return DataSet(train_x, train_y), DataSet(test_x, test_y)
def main(): from data_set import DataSet from collections import namedtuple hps = { 'encode_step': 5, # 历史数据个数 'train_data_num': 100000, # 训练集个数 } hps = namedtuple("HParams", hps.keys())(**hps) data_set = DataSet(hps) obs = Observations(0, 0, 0, 0) print(obs.values(data_set.history_data, hps.encode_step).shape) return
def input_fn(labeled: DataSet, unlabeled: DataSet, labeled_size, unlabeled_size): input_dict = {} # labeled data labeled = labeled.get_batch(labeled_size) input_dict['labeled_inputs'] = tf.constant(np.array(labeled.inputs())) input_dict['labeled_sequence_length'] = tf.constant(labeled.lengths()) input_dict['labeled_mask'] = tf.constant(labeled.masks()) labels = tf.constant(labeled.labels()) # unlabeled data unlabeled = unlabeled.get_batch(unlabeled_size) input_dict['unlabeled_inputs'] = tf.constant( np.array(unlabeled.inputs())) input_dict['unlabeled_sequence_length'] = tf.constant( unlabeled.lengths()) input_dict['unlabeled_mask'] = tf.constant(unlabeled.masks()) return input_dict, labels
def predict_img(self, image_path): """ :param image_path: 图片地址 :return: json """ # 修改图片 ori_path = image_path image_path = resize(image_path) # 判断 res = [] predict = tf.reshape(self.output, [-1, CATEGORY_COUNT]) pred = self.sess.run(predict, feed_dict={ INPUT: [DataSet.read_image(image_path)], DROPOUT_RATE: 0. }) with open(class_path, "r") as f: contents = f.readlines() for i, content in enumerate(contents): res.append({ "name": content.split()[0], "prob": int(pred[0][i] * 10000) }) res = sorted(res, key=lambda res: float(res['prob']), reverse=True) # 选取其他人画的 nums = random.sample(range(0, 10), 2) otherpics = [] for num in nums: otherpics.append( rf"..\static\dist\img\sp\{res[0]['name']}-{num}.png") file_name = image_path.split(r"\received")[1] ori_img = cv2.imread(ori_path, cv2.IMREAD_GRAYSCALE) ori_img = cv2.resize(ori_img, (200, 200)) if not os.path.exists(rf"{BUTING_PATH}\code\static\dist\img\sh"): os.makedirs(rf"{BUTING_PATH}\code\static\dist\img\sh") plt.imsave(rf"{BUTING_PATH}\code\static\dist\img\sh" + file_name, ori_img, cmap='gray') oripics = [r"..\static\dist\img\sh" + file_name] return { "size": len(pred[0]), "res": res, "otherpic": otherpics, "oripic": oripics }
def to_dataset(df, k, target_column, with_bias): df = df[1:].reset_index(drop=True) df = df.drop(['date'], axis=1) target = df[target_column] n, cols = df.shape windows_num = n - k # effective window size, including the label, is k + 1 x = np.empty([windows_num, k * cols + int(with_bias)]) y = np.empty([windows_num]) for i in xrange(windows_num): window = df[i:i + k] row = window.as_matrix().reshape((-1, )) if with_bias: row = np.insert(row, 0, 1) x[i] = row y[i] = target[i + k] debug('data set: x=%s y=%s' % (x.shape, y.shape)) return DataSet(x, y)
def get_data_set(self, name, directory='/data_sets'): data_set = self.get_saved_data_set(name)[0] if not data_set: if not self.reset: print('Could not find dataset. Creating new one') self.captains_log.error( 'Could not find saved dataset. Creating new one') data = DataSet(name, self.save, directory) for message in data.set_up(): self._log_message(message[0], message[1]) if self.save: self.save_data_set(data) return data else: if self.reset: data = DataSet(name, self.save, directory) for message in data.set_up(): self._log_message(message[0], message[1]) if self.save: self.save_data_set(data) return data return data_set
def main(): hps = get_hps() data_set = DataSet(hps) env = Env(hps, data_set) model = Model(hps, env.observations_dim, env.actions_dim) obs = env.reset() data_set.add_data(obs, 0, 0) data_size = hps.train_data_num for i in range(data_size): print('\r{}/{}'.format(i, data_size), end='') obs, reward, _ = env.step(obs, Actions([0.3, 0.3, 0.4])) data_set.add_data(obs, 0, 0) n = hps.train_iter for i in range(n): print('\n\n{}/{}'.format(i, n)) model.price_train(1, data_set) if i % hps.eval_interval == 0: print('-'*50) model.price_test(1, data_set) print('-'*50) return
def aggregate_demo(args): """ python3 aggregate_demo.py pong --range-start=0 --range-end=5 """ if args.demo_memory_folder is not None: demo_memory_folder = args.demo_memory_folder else: demo_memory_folder = "{}_demo_samples".format(args.env) game_state = game.GameState(game=args.env) D = DataSet(args.resized_height, args.resized_width, RandomState(), args.replay_memory, args.phi_len, game_state.n_actions) data_file = '{}-dqn.pkl'.format(args.env) img_file = '{}-dqn-images.h5'.format(args.env) for index in range(args.range_start, args.range_end): print("Demonstration sample #{num:03d}".format(num=index + 1)) try: data = pickle.load( open( demo_memory_folder + '/{0:03d}/'.format(index + 1) + data_file, 'rb')) except: print("Check demo folder if it exist!") return actions = data['D.actions'] rewards = data['D.rewards'] terminal = data['D.terminal'] imgs = get_compressed_images(demo_memory_folder + '/{0:03d}/'.format(index + 1) + img_file + '.gz') print("\tMemory size: {}".format(data['D.size'])) for mem_index in range(data['D.size']): D.add_sample(imgs[mem_index], actions[mem_index], rewards[mem_index], terminal[mem_index]) # h5file.close() print("\tTotal Memory size: {}".format(D.size)) D.resize() D.create_validation_set(percent=args.validation_set_percent) data = { 'D.width': D.width, 'D.height': D.height, 'D.max_steps': D.max_steps, 'D.phi_length': D.phi_length, 'D.num_actions': D.num_actions, 'D.actions': D.actions, 'D.rewards': D.rewards, 'D.terminal': D.terminal, 'D.bottom': D.bottom, 'D.top': D.top, 'D.size': D.size, 'D.validation_set_markers': D.validation_set_markers, 'D.validation_indices': D.validation_indices, 'epsilon': args.init_epsilon, 't': 0 } images = D.imgs pickle.dump( data, open(demo_memory_folder + '/' + args.env + '-dqn-all.pkl', 'wb'), pickle.HIGHEST_PROTOCOL) print("Saving and compressing replay memory...") save_compressed_images( demo_memory_folder + '/' + args.env + '-dqn-images-all.h5', images) print("Saved and compressed replay memory")
minibatch_index, inputs, outputs ) self.test_eval_function = self.compiled_test_function( self.classifier, minibatch_index, inputs, outputs ) from data_set import DataSet if __name__ == '__main__': dataset = DataSet() dataset.load() dbn = DeepBeliefNetworkTrainer(dataset) dbn.initialize() start_time = time.clock() layer_epoch_costs = dbn.pretrain() end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) start_time = time.clock() epoch_losses, best_validation_loss, best_iter, test_score = dbn.train() end_time = time.clock() print >> sys.stderr, ('The fine tuning code for file ' +
common = { 'dev': './data/atis.pkl.dev', 'test': './data/atis.pkl.test', 'slot': './data/atis.pkl.slot', } if __name__ == '__main__': config = config_plain # experiments = experiments[5:6] if not os.path.exists('./out'): os.mkdir('./out') # for vocab size DataSet('./data/atis.pkl.slot', './data/atis.pkl.train') DataSet('./data/atis.pos.slot', './data/atis.pos.train') slot = common['slot'] validation_set = DataSet(slot, common['dev']) test_set = DataSet(slot, common['test']) print('# Experiments (%d)' % len(experiments)) print('# validation_set (%d)' % validation_set.size()) print('# test_set (%d)' % test_set.size()) pos_model = None if 'pos_model' in config: pos_set = DataSet('./data/atis.pos.slot', './data/atis.pos.train') print('# Pre-training') print('# POS training set (%d)' % pos_set.size())
# Set up axes ax.set_xticklabels([''] + input_sentence.split(' ') + ['<EOS>'], rotation=90) ax.set_yticklabels([''] + output_words) # Show label at every tick ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) plt.show() if __name__ == "__main__": # train() data_set = DataSet(opt.filename, opt.max_len, opt.min_count, device) model = build_model(data_set.english_vocab, data_set.french_vocab) # evaluate() # optim optimizer = build_optimizer(model) # loss function criterion = nn.NLLLoss(ignore_index=PAD_id, reduction='elementwise_mean') # Loading checkpoint checkpoint = None if opt.checkpoint: checkpoint = load_checkpoint(opt.checkpoint) model.load_state_dict(checkpoint['state_dict'])
def run(cls, dev, test, labeled_slot, labeled_train, unlabeled_slot, unlabeled_train, steps, gpu_memory): training_set = DataSet(labeled_slot, labeled_train) validation_set = DataSet(labeled_slot, dev) test_set = DataSet(labeled_slot, test) unlabeled_set = DataSet(unlabeled_slot, unlabeled_train) print('# training_set (%d)' % training_set.size()) print('# validation_set (%d)' % validation_set.size()) print('# test_set (%d)' % test_set.size()) print('# unlabeled_set (%d)' % unlabeled_set.size()) classifier = tf.contrib.learn.Estimator( model_fn=SlotFilling.rnn_model_fn, params={ 'num_slot': training_set.num_classes(), 'num_pos': unlabeled_set.num_classes(), 'drop_out': DROP_OUT, 'embedding_dimension': EMBEDDING_DIMENSION, 'vocab_size': DataSet.vocab_size(), 'unlabeled': unlabeled_set.size() > 0 }, config=tf.contrib.learn.RunConfig( gpu_memory_fraction=gpu_memory, save_checkpoints_secs=30, ), model_dir='./model') validation_metrics = { "accuracy": tf.contrib.learn.MetricSpec( metric_fn=tf.contrib.metrics.streaming_accuracy, prediction_key='predictions', weight_key='labeled_mask') } monitor = tf.contrib.learn.monitors.ValidationMonitor( input_fn=lambda: SlotFilling.input_fn( validation_set, unlabeled_set, validation_set.size(), 1), eval_steps=1, every_n_steps=50, metrics=validation_metrics, early_stopping_metric="loss", early_stopping_metric_minimize=True, early_stopping_rounds=300) classifier.fit(input_fn=lambda: SlotFilling.input_fn( training_set, unlabeled_set, training_set.size(), 500), monitors=[monitor], steps=steps) predictions = classifier.predict(input_fn=lambda: SlotFilling.input_fn( test_set, unlabeled_set, test_set.size(), 1)) slot_correct = 0 slot_no_match = 0 slot_mismatch = 0 slot_over_match = 0 for i, p in enumerate(predictions): target = test_set.labels()[i][:test_set.lengths()[i]] prediction = list(p['predictions'])[:test_set.lengths()[i]] for expected, actual in zip(target, prediction): actual = int(actual) if expected is actual: slot_correct += 1 elif test_set.get_slot(actual) is 'o': slot_no_match += 1 elif test_set.get_slot(expected) is 'o': slot_over_match += 1 else: slot_mismatch += 1 return { 'accuracy': slot_correct / sum(test_set.lengths()), 'correct': slot_correct, 'no_match': slot_no_match, 'mismatch': slot_mismatch, 'over_match': slot_over_match, }
self.classifier, minibatch_index, inputs, outputs, learning_rate) from data_set import DataSet if __name__ == '__main__': argparser = argparse.ArgumentParser( description='Demonstrate Multilayer Perceptron') argparser.add_argument( '--training-epochs', dest='epochs', type=int, default='1000', help='number of epochs to run the training (default: 1000)') dataset = DataSet() dataset.load() trainer = MultilayerPerceptronTrainer( dataset, n_epochs=argparser.parse_args().epochs) trainer.initialize() state = trainer.start_training(patience=10000, patience_increase=2, improvement_threshold=0.995) start_time = time.clock() while (trainer.continue_training(state)): print('epoch %d, validation error %f%%' % (state.epoch, state.epoch_losses[-1][0] * 100.0)) end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print(('Optimization complete. Best validation score of %f%% '
def main(_): # Import data ###mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) print("starting to load data...") x2 = pickle.load(open( "T1_GD_all_x_no_normalization.p", "rb" )) print("x2 loaded.") y2 = pickle.load(open( "T1_GD_all_y_no_normalization.p", "rb" )) print("y2 loaded.") validate_x2 = pickle.load(open( "T1_GD_validation_x_no_normalization_aggregated_.p", "rb" )) print("validate_x2 loaded.") validate_y2 = pickle.load(open( "T1_GD_validation_y_no_normalization_aggregated.p", "rb" )) print("validate_y2 loaded.") validate_x2_nonaggregated = pickle.load(open( "T1_GD_all__validation_x_no_normalization.p", "rb" )) print("validate_x2 loaded.") validate_y2_nonaggregated = pickle.load(open( "T1_GD_all__validation_y_no_normalization.p", "rb" )) print("validate_y2 loaded.") number_epochs = sys.argv[0] kernal_size = sys.argv[2] data_set_all = DataSet(x2,y2, fake_data=False) validation_set_all = DataSet(validate_x2_nonaggregated, validate_y2_nonaggregated, fake_data=False) # Create the convolutional model x = tf.placeholder(tf.float32, [None, 65536]) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 3]) # Build the graph for the deep net # with tf.device('/gpu:2'): y_conv, keep_prob, saver = deepnn(x) print(keep_prob) #plt.imshow(mnist.test.images[0].reshape(28,28)) #print(type(mnist.test.images)) #print(mnist.test.images.shape) #plt.show() cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #grads = new_optimizer.compute_gradients(cross_entropy) data_points = [] avg_loss = 0 total_loss = 0 avg_validation_loss = 0 total_validation_loss = 0 batch_size = sys.argv[1] batches_completed = 0 validation_batches_completed = 0 config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True output_file= open("T1_GD_validation_loss_file_no_normalization_"+epoch_size+"_epochs_"+kernal_size+"_kernalsize_"+batch_size+"_batchsize.txt","w+") with tf.Session(config = config) as sess: sess.run(tf.global_variables_initializer()) # sess.graph.finalize() for i in range(5000): batch_x, batch_y = data_set_all.next_batch(batch_size) for batch_slice in batch_x: batch_slice = numpy.reshape(batch_slice, (256, 256)) batch_slice = random_alteration(batch_slice) batch_slice = numpy.reshape(batch_slice, 65536) batches_completed += 1 loss = sess.run(cross_entropy, feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5}) total_loss += loss new_avg_loss = total_loss/batches_completed if(new_avg_loss>avg_loss and batches_completed != 1): avg_loss = new_avg_loss # break avg_loss = new_avg_loss data_points.append(loss) if i % 1000 == 0: validation_batch_x, validation_batch_y = validation_set_all.next_batch(batch_size) validation_batches_completed+=1 train_accuracy = accuracy.eval(feed_dict={x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0}) validation_loss = cross_entropy.eval(feed_dict={x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0}) total_validation_loss += validation_loss new_avg_validation_loss = total_validation_loss/validation_batches_completed if(new_avg_validation_loss>avg_validation_loss and batches_completed!=1): avg_validation_loss = new_avg_validation_loss avg_validation_loss = new_avg_validation_loss output_file.write("Validation loss at i = %d is %g\n" % (i, avg_validation_loss)) total_times = 0.0 total_accuracy = 0.0 prediction=tf.argmax(y_conv,1) probabilities=tf.nn.softmax(y_conv) probs_array = [] condensed_y = [] for j in range(len(validate_x2)): #print(test_x2[i]) #print(test_y2[i]) temp3 = accuracy.eval(feed_dict={x: validate_x2[j], y_: validate_y2[j], keep_prob: 1.0}) print('test accuracy %g' % temp3) total_accuracy = total_accuracy + temp3 total_times = total_times+1 temp4 = prediction.eval(feed_dict={x: validate_x2[j], keep_prob: 1.0}, session=sess) print("predictions", temp4) probability = probabilities.eval(feed_dict={x: validate_x2[j], keep_prob: 1.0}, session=sess) print(probability) if j==0: probs_array = probability.mean(axis=0) condensed_y = validate_y2[j].mean(axis=0) continue probs_array = numpy.vstack([probs_array, probability.mean(axis=0)]) condensed_y = numpy.vstack([condensed_y, validate_y2[j].mean(axis=0)]) fpr = dict() tpr = dict() roc_auc = dict() for j in range(3): fpr[j], tpr[j], _ = roc_curve(condensed_y[:, j], probs_array[:, j]) roc_auc[j] = auc(fpr[j], tpr[j]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(condensed_y.ravel(), probs_array.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) output_file.write("ROCs at i = %d are "%i) for j in range(3): plt.plot(fpr[j], tpr[j], label='ROC curve of class {0} (area = {1:0.2f})'''.format(j, roc_auc[j])) output_file.write(str(roc_auc[j])+", ") output_file.write("\n") output_file.flush() print('step %d, training accuracy %g' % (i, train_accuracy)) name = 'T1_GD_testing_with_intermediateROC_no_normalization_epoch_' + str(i) save_path = saver.save(sess, name) train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5}) #testing print(avg_loss) output_file.close() save_path = saver.save(sess, 'T1_GD_testing_with_intermediateROC_no_normalization_final')
def setUpClass(self): self.dataset = DataSet() self.dataset.load(100)
class TestTutorials(unittest.TestCase): """docstring for TestTutorials""" @classmethod def setUpClass(self): self.dataset = DataSet() self.dataset.load(100) def test_convolutional_multilayer_perceptron(self): lenet5 = ConvolutionalMultilayerPerceptronTrainer(self.dataset, n_epochs=1, batch_size=2) lenet5.initialize(nkerns=[2, 5]) epoch_losses, best_validation_loss, best_iter, test_score = lenet5.train( patience=10000, patience_increase=2, improvement_threshold=0.995) self.assertEqual(epoch_losses, [[0.52000000000000002, 49]]) self.assertEqual(test_score, 0.45000000000000001) def test_convolutional_multilayer_perceptron_incremental(self): lenet5 = ConvolutionalMultilayerPerceptronTrainer(self.dataset, n_epochs=1, batch_size=2) lenet5.initialize(nkerns=[2, 5]) state = lenet5.start_training(patience=10000, patience_increase=2, improvement_threshold=0.995) while lenet5.continue_training(state): pass self.assertEqual(state.epoch_losses, [[0.52000000000000002, 49]]) self.assertEqual(state.test_score, 0.45000000000000001) def test_deep_belief_network(self): dbn = DeepBeliefNetworkTrainer(self.dataset, batch_size=2, pretraining_epochs=1, training_epochs=1) dbn.initialize() layer_epoch_costs = dbn.pretrain() self.assertTrue(layer_epoch_costs[0][0] > -229.574659742916 and layer_epoch_costs[0][0] < -229.574659742915) self.assertTrue(layer_epoch_costs[1][0] > -724.564076667859 and layer_epoch_costs[1][0] < -724.564076667856) self.assertTrue(layer_epoch_costs[2][0] > -237.068920458976 and layer_epoch_costs[2][0] < -237.068920458975) epoch_losses, best_validation_loss, best_iter, test_score = dbn.train() self.assertEqual(best_validation_loss, 0.79) self.assertEqual(best_iter, 49) self.assertEqual(test_score, 0.76) def test_deep_belief_network_incremental(self): dbn = DeepBeliefNetworkTrainer(self.dataset, batch_size=2, pretraining_epochs=1, training_epochs=1) dbn.initialize() state = dbn.start_pretraining() while dbn.continue_pretraining(state): pass self.assertTrue(state.layer_epoch_costs[0] > -229.574659742916 and state.layer_epoch_costs[0] < -229.574659742915) self.assertTrue(state.layer_epoch_costs[1] > -724.564076667859 and state.layer_epoch_costs[1] < -724.564076667856) self.assertTrue(state.layer_epoch_costs[2] > -237.068920458976 and state.layer_epoch_costs[2] < -237.068920458975) state = dbn.start_training() while dbn.continue_training(state): pass self.assertEqual(state.best_validation_loss, 0.79) self.assertEqual(state.best_iter, 49) self.assertEqual(state.test_score, 0.76) def test_denoising_autoencoder(self): da = DenoisingAutoencoderTrainer(self.dataset, training_epochs=1, batch_size=2) da.initialize() uncorrupt_costs = da.train() self.assertEqual(uncorrupt_costs, [149.16503228187111]) da.initialize(corruption_level=0.3) corrupt_costs = da.train() self.assertTrue(corrupt_costs[0] > 173.6649940882978 and corrupt_costs[0] < 173.6649940882979) def test_denoising_autoencoder_incremental(self): da = DenoisingAutoencoderTrainer(self.dataset, training_epochs=1, batch_size=2) da.initialize() state = da.start_training() while da.continue_training(state): pass self.assertEqual(state.costs, [149.16503228187111]) da.initialize(corruption_level=0.3) state = da.start_training() while da.continue_training(state): pass self.assertTrue(state.costs[0] > 173.6649940882978 and state.costs[0] < 173.6649940882979) def test_logistic(self): lc = LogisticTrainer(self.dataset, batch_size=2, n_epochs=1) lc.initialize() epoch_losses, best_validation_loss, best_iter, test_score = lc.train( patience=5000, patience_increase=2, improvement_threshold=0.995) self.assertEqual(epoch_losses, [[0.40000000000000002, 49]]) self.assertEqual(test_score, 0.30) def test_logistic_incremental(self): lc = LogisticTrainer(self.dataset, batch_size=2, n_epochs=1) lc.initialize() state = lc.start_training(patience=5000, patience_increase=2, improvement_threshold=0.995) while lc.continue_training(state): pass self.assertEqual(state.epoch_losses, [[0.40000000000000002, 49]]) self.assertEqual(state.test_score, 0.30) def test_multilayer_perceptron(self): mp = MultilayerPerceptronTrainer(self.dataset, n_epochs=1, batch_size=2) mp.initialize() epoch_losses, best_validation_loss, best_iter, test_score = mp.train( patience=10000, patience_increase=2, improvement_threshold=0.995) self.assertEqual(epoch_losses, [[0.54, 49]]) self.assertEqual(test_score, 0.52) def test_multilayer_perceptron_incremental(self): mp = MultilayerPerceptronTrainer(self.dataset, n_epochs=1, batch_size=2) mp.initialize() state = mp.start_training(patience=10000, patience_increase=2, improvement_threshold=0.995) while mp.continue_training(state): pass self.assertEqual(state.epoch_losses, [[0.54, 49]]) self.assertEqual(state.test_score, 0.52) def test_restricted_boltzmann_machine(self): rbm = RestrictedBoltzmannMachineTrainer(self.dataset, training_epochs=1, batch_size=2) rbm.initialize(n_chains=2, n_samples=2, n_hidden=5) epoch_costs = rbm.train() self.assertEqual(epoch_costs, [-174.86070176730175]) def test_restricted_boltzmann_machine_incremental(self): rbm = RestrictedBoltzmannMachineTrainer(self.dataset, training_epochs=1, batch_size=2) rbm.initialize(n_chains=2, n_samples=2, n_hidden=5) state = rbm.start_training() while rbm.continue_training(state): pass self.assertEqual(state.epoch_losses, [-174.86070176730175]) def test_stacked_denoising_autoencoder(self): sda = StackedDenoisingAutoencoderTrainer(self.dataset, pretraining_epochs=1, n_epochs=1, batch_size=2) sda.preinitialize() layer_epoch_costs = sda.pretrain() self.assertEqual( layer_epoch_costs, [[328.15852933515004], [771.56755018914123], [661.65193991637716]]) sda.initialize() epoch_losses, best_validation_loss, best_iter, test_score = sda.train( None) self.assertEqual(epoch_losses, [[0.73, 49]]) self.assertEqual(best_validation_loss, 0.73) self.assertEqual(best_iter, 49) self.assertEqual(test_score, 0.67) def test_stacked_denoising_autoencoder_incremental(self): sda = StackedDenoisingAutoencoderTrainer(self.dataset, pretraining_epochs=1, n_epochs=1, batch_size=2) sda.preinitialize() state = sda.start_pretraining() while sda.continue_pretraining(state): pass self.assertEqual( state.layer_epoch_costs, [[328.15852933515004], [771.56755018914123], [661.65193991637716]]) sda.initialize() state = sda.start_training() while sda.continue_training(state): pass self.assertEqual(state.epoch_losses, [[0.73, 49]]) self.assertEqual(state.best_validation_loss, 0.73) self.assertEqual(state.best_iter, 49) self.assertEqual(state.test_score, 0.67)
def input_fn(labeled: DataSet, unlabeled: DataSet = None, size: int = BATCH_SIZE): input_dict = { } if unlabeled is not None and unlabeled.size() == 0: unlabeled = None # labeled data labeled = labeled.get_batch(size) input_dict['labeled_inputs'] = tf.constant(np.array(labeled.inputs())) input_dict['labeled_sequence_length'] = tf.constant(labeled.lengths()) input_dict['labeled_mask'] = tf.constant(labeled.masks()) labels = tf.constant(labeled.labels()) # unlabeled data unlabeled = unlabeled is None and labeled or unlabeled.get_batch(labeled.size()) input_dict['unlabeled_inputs'] = tf.constant(np.array(unlabeled.inputs())) input_dict['unlabeled_sequence_length'] = tf.constant(unlabeled.lengths()) input_dict['unlabeled_mask'] = tf.constant(unlabeled.masks()) input_dict['unlabeled_size'] = tf.constant(unlabeled.size()) input_dict['unlabeled_target'] = tf.constant(unlabeled.labels()) return input_dict, labels
def load_data(self, imu_file_name: str, att_file_name: str): '''read pixhawk log file''' self._data_set = DataSet(imu_file_name, att_file_name) self._data_set.load_imu_data()
class attitude(): """docstring for attitude""" def __init__(self): self._strategy = "none" self._data_set = None self._att = [] def load_data(self, imu_file_name: str, att_file_name: str): '''read pixhawk log file''' self._data_set = DataSet(imu_file_name, att_file_name) self._data_set.load_imu_data() # self._data_set.load_px4_att() # self._data_set.load_open_imu_data() def remove_allresults(self): self._att.clear() def calculate_att(self): '''implement in subclass''' self.remove_allresults() def add_pitch_roll_yaw(self, pitch: float, roll: float, yaw: float): ''' ''' self._att.append([pitch, roll, yaw]) def show_fig(self): '''show fig of calculated attitude and pixhawk attitude''' if not self._att: print('no result') return # ekf_times, ekf_pitchs, ekf_rolls, ekf_yaws = self._data_set.get_ekf_attitude() imu_times = self._data_set.get_imu_times() plt.figure(self._strategy) plt.subplot(311) pitch_deg = [c[0] * 57.2957795 for c in self._att] plt.plot(imu_times, pitch_deg, label="pitch") plt.ylabel('pitch(deg)') plt.title(self._strategy) plt.legend() plt.grid(True) plt.grid(linestyle='--') plt.subplot(312) roll_deg = [c[1] * 57.2957795 for c in self._att] plt.plot(imu_times, roll_deg, label="roll") plt.ylabel('roll(deg)') plt.xlabel('time(s)') plt.legend() plt.grid(True) plt.grid(linestyle='--') plt.subplot(313) yaw_deg = [c[2] * 57.2957795 for c in self._att] plt.plot(imu_times, yaw_deg, label="yaw") plt.ylabel('yaw(deg)') plt.xlabel('time(s)') plt.legend() plt.grid(True) plt.grid(linestyle='--') plt.show() def test(self): '''main test''' sensorfile = r'test\09_26_14_sensor_combined_0.csv' attfile = r'test\09_26_14_vehicle_attitude_0.csv' self.load_data(sensorfile, attfile) self.calculate_att() self.show_fig()
def main(): sys.setrecursionlimit(2000) config = Configuration() with open(config.DATA_FOLDER + '/config.txt', 'r') as f: configFile = f.read().split(',') print('Parameters', configFile) config.EPSILON_START = float(configFile[0]) config.LOAD_NET_NUMBER = int(float(configFile[1])) agentTF = AgentTF(config.STATE_SIZE, config.PHI_LENGTH, config.ACTION_SIZE, config.HIDDEN_LAYERS, config.BATCH_SIZE, config.TAU, config.GAMMA) if config.LOAD_NET_NUMBER > 0: dataSet = loadDataSet(config.DATA_FOLDER, config.LOAD_NET_NUMBER) agentTF.restore_model(config.DATA_FOLDER) countTotalSteps = config.LOAD_NET_NUMBER else: # Initialize DataSet dataSet = DataSet(config.STATE_SIZE, config.REPLAY_MEMORY_SIZE, config.PHI_LENGTH, config.RNG) countTotalSteps = 0 openLearningFile(config.DATA_FOLDER) eC = environmentControl(config.PATH_ROBOT, config.PATH_GOAL, config.PATH_LAUNCHFILE) eC.spawn(config.ROBOT_NAME) eC.spawnGoal() eC.setRandomModelState(config.ROBOT_NAME) #eC.pause() dP = dataProcessor(eC, config.ROBOT_NAME, config.PHI_LENGTH, config.STATE_SIZE, config.NUM_SENSOR_VAL, config.SENSOR_RANGE_MAX, config.SENSOR_RANGE_MIN, config.VEL, config.VEL_CURVE, config.UPDATE_TIME, config.SPEED_UP) lastState = np.zeros((1, config.STATE_SIZE)) lastReward = 0 lastAction = 0 countSteps = 0 batchCount = 0 lossAverages = np.empty([0]) epochCount = 0 epsilon = max(config.EPSILON_START, config.EPSILON_MIN) epsilonRate = config.EPSILON_DECAY quit = False try: for i in range(4): action = np.random.randint(config.ACTION_SIZE) dP.action(action) state, reward = dP.getStateReward() dataSet.addSample(lastState, action, reward, state, dP.isGoal) countTotalSteps += 1 countSteps += 1 lastState = state if config.EPSILON_START < -0: quit = True while not quit: if countTotalSteps % 1000 == 0: updateLearningFile(config.DATA_FOLDER, lossAverages, countTotalSteps) lossAverages = np.empty([0]) print(countTotalSteps) phi = dataSet.phi(lastState) action = agentTF.getAction(phi, epsilon) #action=userAction() eC.unpause() dP.action(action) state, reward = dP.getStateReward() eC.pause() if dP.isGoal: print('The goal was reached in ', countSteps, ' steps') countSteps = 1 eC.setRandomModelState(config.ROBOT_NAME) eC.setRandomModelState('goal') dP.isGoal = False if dP.flipped: eC.setRandomModelState(config.ROBOT_NAME) dP.flipped = False # After NUM_STEPS the chance is over if countSteps % config.NUM_STEPS == 0: countSteps = 1 reward -= 1 eC.setRandomModelState(config.ROBOT_NAME) eC.setRandomModelState('goal') print('Your chance is over! Try again ...') #print(reward) dataSet.addSample(lastState, action, reward, state, dP.isGoal) # Training if countTotalSteps > config.REPLAY_START_SIZE and countTotalSteps % 5 == 0: batchStates, batchActions, batchRewards, batchNextStates, batchTerminals= \ dataSet.randomBatch(config.BATCH_SIZE) loss = agentTF.train(batchStates, batchActions, batchRewards, batchNextStates, batchTerminals) #print('Loss', loss) # count How many trainings had been done batchCount += 1 # add loss to lossAverages lossAverages = np.append(lossAverages, loss) #Update Epsilon save dataSet, network if countTotalSteps % config.SIZE_EPOCH == 0: # Number of Epochs epochCount += 1 # Update Epsilon if (epsilon - epsilonRate) < config.EPSILON_MIN - 0.01: quit = True epsilon = max(epsilon - epsilonRate, config.EPSILON_MIN) print('Epsilon updated to: ', epsilon) agentTF.save_model(countTotalSteps, config.DATA_FOLDER) saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet) lastState = state countTotalSteps += 1 countSteps += 1 except rospy.exceptions.ROSException: agentTF.save_model(countTotalSteps, config.DATA_FOLDER) saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet) agentTF.close() eC.close() with open(config.DATA_FOLDER + '/config.txt', 'w') as f: out = "{},{}".format(epsilon, countTotalSteps) f.write(out)
def main(_): # Import data ###mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) print("starting to load data...") x2 = pickle.load(open("all_x_l2normalization.p", "rb")) print("x2 loaded.") y2 = pickle.load(open("all_y_l2normalization.p", "rb")) print("y2 loaded.") validate_x2 = pickle.load(open("all__validation_x_l2normalization.p", "rb")) print("validate_x2 loaded.") validate_y2 = pickle.load(open("all__validation_y_l2normalization.p", "rb")) print("validate_y2 loaded.") data_set_all = DataSet(x2, y2, fake_data=False) validation_set_all = DataSet(validate_x2, validate_y2, fake_data=False) # Create the convolutional model x = tf.placeholder(tf.float32, [None, 65536]) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 3]) # Build the graph for the deep net y_conv, keep_prob, saver = deepnn(x) print(keep_prob) #plt.imshow(mnist.test.images[0].reshape(28,28)) #print(type(mnist.test.images)) #print(mnist.test.images.shape) #plt.show() cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #grads = new_optimizer.compute_gradients(cross_entropy) data_points = [] avg_loss = 0 total_loss = 0 avg_validation_loss = 0 total_validation_loss = 0 batch_size = 10 batches_completed = 0 validation_batches_completed = 0 config = tf.ConfigProto() config.gpu_options.allow_growth = True output_file = open("validation_loss_file_l2normalization.txt", "w+") with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) # sess.graph.finalize() for i in range(1000000): batch_x, batch_y = data_set_all.next_batch(batch_size) for batch_slice in batch_x: batch_slice = numpy.reshape(batch_slice, (256, 256)) batch_slice = random_alteration(batch_slice) batch_slice = numpy.reshape(batch_slice, 65536) batches_completed += 1 loss = sess.run(cross_entropy, feed_dict={ x: batch_x, y_: batch_y, keep_prob: 0.5 }) total_loss += loss new_avg_loss = total_loss / batches_completed if (new_avg_loss > avg_loss and batches_completed != 1): avg_loss = new_avg_loss # break avg_loss = new_avg_loss data_points.append(loss) if i % 10000 == 0: validation_batch_x, validation_batch_y = validation_set_all.next_batch( batch_size) validation_batches_completed += 1 train_accuracy = accuracy.eval(feed_dict={ x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0 }) validation_loss = cross_entropy.eval(feed_dict={ x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0 }) total_validation_loss += validation_loss new_avg_validation_loss = total_validation_loss / validation_batches_completed if (new_avg_validation_loss > avg_validation_loss and batches_completed != 1): avg_validation_loss = new_avg_validation_loss avg_validation_loss = new_avg_validation_loss output_file.write("Validation loss at i = %d is %g\n" % (i, avg_validation_loss)) output_file.flush() print('step %d, training accuracy %g' % (i, train_accuracy)) name = 'my-model_testing_l2normalization_epoch_' + str(i) save_path = saver.save(sess, name) train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5}) #testing print(avg_loss) output_file.close() save_path = saver.save(sess, 'my-model_testing_l2normalization_final')
class TestTutorials(unittest.TestCase): """docstring for TestTutorials""" @classmethod def setUpClass(self): self.dataset = DataSet() self.dataset.load(100) def test_convolutional_multilayer_perceptron(self): lenet5 = ConvolutionalMultilayerPerceptronTrainer(self.dataset, n_epochs = 1, batch_size = 2) lenet5.initialize(nkerns = [2, 5]) epoch_losses, best_validation_loss, best_iter, test_score = lenet5.train(patience = 10000, patience_increase = 2, improvement_threshold = 0.995) self.assertEqual(epoch_losses, [[0.52000000000000002, 49]]) self.assertEqual(test_score, 0.45000000000000001) def test_convolutional_multilayer_perceptron_incremental(self): lenet5 = ConvolutionalMultilayerPerceptronTrainer(self.dataset, n_epochs = 1, batch_size = 2) lenet5.initialize(nkerns = [2, 5]) state = lenet5.start_training(patience = 10000, patience_increase = 2, improvement_threshold = 0.995) while lenet5.continue_training(state): pass self.assertEqual(state.epoch_losses, [[0.52000000000000002, 49]]) self.assertEqual(state.test_score, 0.45000000000000001) def test_deep_belief_network(self): dbn = DeepBeliefNetworkTrainer(self.dataset, batch_size = 2, pretraining_epochs = 1, training_epochs = 1) dbn.initialize() layer_epoch_costs = dbn.pretrain() self.assertTrue(layer_epoch_costs[0][0] > -229.574659742916 and layer_epoch_costs[0][0] < -229.574659742915) self.assertTrue(layer_epoch_costs[1][0] > -724.564076667859 and layer_epoch_costs[1][0] < -724.564076667856) self.assertTrue(layer_epoch_costs[2][0] > -237.068920458976 and layer_epoch_costs[2][0] < -237.068920458975) epoch_losses, best_validation_loss, best_iter, test_score = dbn.train() self.assertEqual(best_validation_loss, 0.79) self.assertEqual(best_iter, 49) self.assertEqual(test_score, 0.76) def test_deep_belief_network_incremental(self): dbn = DeepBeliefNetworkTrainer( self.dataset, batch_size=2, pretraining_epochs=1, training_epochs=1 ) dbn.initialize() state = dbn.start_pretraining() while dbn.continue_pretraining(state): pass self.assertTrue( state.layer_epoch_costs[0] > -229.574659742916 and state.layer_epoch_costs[0] < -229.574659742915 ) self.assertTrue( state.layer_epoch_costs[1] > -724.564076667859 and state.layer_epoch_costs[1] < -724.564076667856 ) self.assertTrue( state.layer_epoch_costs[2] > -237.068920458976 and state.layer_epoch_costs[2] < -237.068920458975 ) state = dbn.start_training() while dbn.continue_training(state): pass self.assertEqual(state.best_validation_loss, 0.79) self.assertEqual(state.best_iter, 49) self.assertEqual(state.test_score, 0.76) def test_denoising_autoencoder(self): da = DenoisingAutoencoderTrainer(self.dataset, training_epochs=1, batch_size=2) da.initialize() uncorrupt_costs = da.train() self.assertEqual(uncorrupt_costs, [149.16503228187111]) da.initialize(corruption_level = 0.3) corrupt_costs = da.train() self.assertTrue( corrupt_costs[0] > 173.6649940882978 and corrupt_costs[0] < 173.6649940882979 ) def test_denoising_autoencoder_incremental(self): da = DenoisingAutoencoderTrainer(self.dataset, training_epochs=1, batch_size=2) da.initialize() state = da.start_training() while da.continue_training(state): pass self.assertEqual(state.costs, [149.16503228187111]) da.initialize(corruption_level = 0.3) state = da.start_training() while da.continue_training(state): pass self.assertTrue( state.costs[0] > 173.6649940882978 and state.costs[0] < 173.6649940882979 ) def test_logistic(self): lc = LogisticTrainer(self.dataset, batch_size=2, n_epochs=1) lc.initialize() epoch_losses, best_validation_loss, best_iter, test_score = lc.train(patience = 5000, patience_increase = 2, improvement_threshold = 0.995) self.assertEqual(epoch_losses, [[0.40000000000000002, 49]]) self.assertEqual(test_score, 0.30) def test_logistic_incremental(self): lc = LogisticTrainer(self.dataset, batch_size=2, n_epochs=1) lc.initialize() state = lc.start_training(patience=5000, patience_increase=2, improvement_threshold=0.995) while lc.continue_training(state): pass self.assertEqual(state.epoch_losses, [[0.40000000000000002, 49]]) self.assertEqual(state.test_score, 0.30) def test_multilayer_perceptron(self): mp = MultilayerPerceptronTrainer(self.dataset, n_epochs = 1, batch_size = 2) mp.initialize() epoch_losses, best_validation_loss, best_iter, test_score = mp.train(patience = 10000, patience_increase = 2, improvement_threshold = 0.995) self.assertEqual(epoch_losses, [[0.54, 49]]) self.assertEqual(test_score, 0.52) def test_multilayer_perceptron_incremental(self): mp = MultilayerPerceptronTrainer(self.dataset, n_epochs = 1, batch_size = 2) mp.initialize() state = mp.start_training(patience = 10000, patience_increase = 2, improvement_threshold = 0.995) while mp.continue_training(state): pass self.assertEqual(state.epoch_losses, [[0.54, 49]]) self.assertEqual(state.test_score, 0.52) def test_restricted_boltzmann_machine(self): rbm = RestrictedBoltzmannMachineTrainer(self.dataset, training_epochs = 1, batch_size = 2) rbm.initialize(n_chains = 2, n_samples = 2, n_hidden = 5) epoch_costs = rbm.train() self.assertEqual(epoch_costs, [-174.86070176730175]) def test_restricted_boltzmann_machine_incremental(self): rbm = RestrictedBoltzmannMachineTrainer(self.dataset, training_epochs = 1, batch_size = 2) rbm.initialize(n_chains = 2, n_samples = 2, n_hidden = 5) state = rbm.start_training() while rbm.continue_training(state): pass self.assertEqual(state.epoch_losses, [-174.86070176730175]) def test_stacked_denoising_autoencoder(self): sda = StackedDenoisingAutoencoderTrainer( self.dataset, pretraining_epochs=1, n_epochs=1, batch_size=2 ) sda.preinitialize() layer_epoch_costs = sda.pretrain() self.assertEqual(layer_epoch_costs, [[328.15852933515004], [771.56755018914123], [661.65193991637716]]) sda.initialize() epoch_losses, best_validation_loss, best_iter, test_score = sda.train(None) self.assertEqual(epoch_losses, [[0.73, 49]]) self.assertEqual(best_validation_loss, 0.73) self.assertEqual(best_iter, 49) self.assertEqual(test_score, 0.67) def test_stacked_denoising_autoencoder_incremental(self): sda = StackedDenoisingAutoencoderTrainer(self.dataset, pretraining_epochs = 1, n_epochs = 1, batch_size = 2) sda.preinitialize() state = sda.start_pretraining() while sda.continue_pretraining(state): pass self.assertEqual(state.layer_epoch_costs, [[328.15852933515004], [771.56755018914123], [661.65193991637716]]) sda.initialize() state = sda.start_training() while sda.continue_training(state): pass self.assertEqual(state.epoch_losses, [[0.73, 49]]) self.assertEqual(state.best_validation_loss, 0.73) self.assertEqual(state.best_iter, 49) self.assertEqual(state.test_score, 0.67)
def to_dataset(self, data): ds = DataSet(data, self.header) ds = ds.split(self.friend_index) return ds.X, ds.Y
def main(path): settings = Settings(path) # mask seg set net los vis if 'mask' in settings.stages: # need v,h raw and frames square = 'square' in settings.flags max_seg = settings.max_seg print(square) print(max_seg) v, h = data_io.read_from_file(settings.files['raw'], 'raw') mask = segment.vert_horiz_seg(v[:, settings.frames, :], h[:, settings.frames, :], square=square, max_seg=max_seg) data_io.save_to(mask, settings.files['mask'], 'mask') if 'seg' in settings.stages: mask = data_io.read_from_file(settings.files['mask'], 'mask') v, h = data_io.read_from_file(settings.files['raw'], 'raw') seg_v = segment.divide_data_to_segments(mask, v[:, settings.frames, :]) seg_h = segment.divide_data_to_segments(mask, h[:, settings.frames, :]) data_io.save_to([seg_v, seg_h], settings.files['seg'], 'seg') if 'set' in settings.stages: [seg_v, seg_h] = data_io.read_from_file(settings.files['seg'], 'seg') cv = 'cv' in settings.flags normalize = 'norm' in settings.flags sizes = settings.sizes data_sets = create_data_set.get_data(seg_v, seg_h, n_new_train=sizes['train'], normalize=normalize) data_io.save_to(data_sets, settings.files['set'], 'set') if 'net' in settings.stages: cv = 'cv' in settings.flags zero_all = 'zero_all' in settings.flags value_type = 'acc' if 'acc' in settings.flags else 'loss' data_sets = data_io.read_from_file(settings.files['set'], 'set') mask = data_io.read_from_file(settings.files['mask'], 'mask') tx, ty, vx, vy = data_sets D_in = vx.shape[1] ty = ty.astype(np.float64) vy = vy.astype(np.float64) n_data_sets = len(tx) n_frames = len(settings.frames) mask_nubmers = np.unique(mask) n_seg = len(mask_nubmers) - 1 if mask_nubmers[0] == 0 else len( mask_nubmers) frames_loss_maps = np.zeros([n_data_sets, n_frames]) seg_loss_maps = np.zeros([n_data_sets, n_seg]) all_train_losses = [] all_test_losses = [] all_acc = [] for idx, (one_tx, one_ty, one_vx, one_vy) in enumerate(zip(tx, ty, vx, vy)): one_train = DataSet(torch.from_numpy(one_tx), torch.from_numpy(one_ty)) one_test = DataSet(torch.from_numpy(one_vx.reshape([1, -1])), torch.from_numpy(one_vy.reshape([ 1, ]))) mean_t, std_t = one_train.calc_mean_std() one_train = one_train.normalize(mean_t, std_t) one_test = one_test.normalize(mean_t, std_t) print(idx) net = dense_net.get_model(D_in) training_parameters = run_nn.get_train_params(net) net, train_losses, valid_losses, valid_accuracies = run_nn.train( net, [one_train, one_test], training_parameters) all_acc.append(valid_accuracies[-1]) if valid_losses[-1] > 0.6: print('\n{}\n'.format(idx)) all_train_losses.append(train_losses) all_test_losses.append(valid_losses) frames_loss_maps[idx, :] = np.asarray( run_nn.run_with_missing_parts(net, mask, one_test, False, len(settings.frames), part_type='frames', zero_all=zero_all, value_type=value_type)) seg_loss_maps[idx, :] = run_nn.run_with_missing_parts( net, mask, one_test, False, len(settings.frames), part_type='segments', zero_all=zero_all, value_type=value_type) print('acc: {}'.format(np.mean(np.asarray(all_acc)))) frame_loss = np.mean(frames_loss_maps, axis=0) seg_loss = segment.recreate_image(mask, np.mean(seg_loss_maps, axis=0)) data_io.save_to(frame_loss, settings.files['vis_frame'], 'vis') data_io.save_to(seg_loss, settings.files['vis_seg'], 'vis') visualize_res.plot_losses(all_train_losses, all_test_losses, [], n_data_sets) if 'show_vis' in settings.stages: zero_all = 'zero_all' in settings.flags value_type = 'acc' if 'acc' in settings.flags else 'loss' zero_all_str = 'Present' if zero_all else 'Missing' value_type_str = 'Accuracy' if value_type == 'acc' else 'Loss' title_seg = 'Average {} per {} Segment'.format(value_type_str, zero_all_str) title_frame = 'Average {} per {} Frame'.format(value_type_str, zero_all_str) # images = data_io.read_from_file(settings.files['vis_both'], 'vis') # visualize_res.plot_spatial(images, settings.frame_groups_string, n_frames=len(images)) loss_map = data_io.read_from_file(settings.files['vis_frame'], 'vis') visualize_res.plot_temporal( loss_map, [x + 1 for x in settings.frames], title=title_frame, ylabel=value_type ) # counting starts from 0, so the relevant frames are +1 image = data_io.read_from_file(settings.files['vis_seg'], 'vis') visualize_res.plot_spatial(image, title=title_seg)
SIC_PATH = 'sic_day_GFDL-CM3_historical*' SIT_PATH = 'sit_day_GFDL-CM3_historical*' TAS_PATH = 'tas_3hr_GFDL-CM3_historical_r1i1p1_*.nc' CLT_PATH = 'tcdc.eatm.gauss.19[89]*.nc' BEGIN_DATE = datetime(1979, 1, 1, 0, 0, 0) NUM_YEARS = 20 DELTA_T = 150 if __name__ == '__main__': print('Creating DataSet') data_set = DataSet( sic_path=SIC_PATH, sit_path=SIT_PATH, tas_path=TAS_PATH, clt_path=CLT_PATH, sic_scale=.01, clt_scale=.01, ) print('Getting Albedos') albedos = Albedos() year = dateutil.relativedelta.relativedelta(years=1) rad_start_dates = [BEGIN_DATE + year * n for n in range(NUM_YEARS)] forcings = [] for rad_start_date in rad_start_dates: forcing = get_radiative_forcing( start_date=rad_start_date, delta_t=DELTA_T, data_set=data_set, albedos=albedos, )
probability = float(E**epsilon) / float(1 + (E**epsilon)) for i in range(run_times): result.append(self.do_randomize(dataset, probability)) return result def get_qD(self, D): count = 0 for i in D.records: if i[4] > 13 and i[14] == '<=50K': count += 1 return float(count) / float(len(D.records)) def compute_accuacy(self, D, N=100, e=0.5, beta=0.05): p = (1 + math.exp(e)) / (math.exp(e) - 1) q = 1 / (1 + math.exp(e)) alpha = p * math.sqrt(math.log(2 / beta) / (2 * N)) qD = self.get_qD(D) data_list = self.do_randomized_mechenism(D, N, e) errors = [(p * (d - q)) - qD for d in data_list] return errors, alpha D0 = DataSet() D0.create_from_csv('./adult.csv') rr = RandomizedResponse() errors, alpha = rr.compute_accuacy(D0)
self.input_x: xs, self.label_y: labels, self.diag_x: diags, self.keep_prob: 0.4 }) def predict(self, dataset): return self.sess.run(self.predict_y_label, feed_dict={ self.input_x: dataset.xs, self.diag_x: dataset.diags, self.label_y: dataset.labels, self.keep_prob: 0.4 }) if __name__ == "__main__": for i in range(10): print("start with dataset: ", i) net = NaiveNet() test_set = DataSet(i, prefix="test") net.train(1000, DataSet(i), test_set) accuracies.append(net.best_accu) precisions.append(net.best_precision) recalls.append(net.best_recall) F1s.append(net.best_f1) print("Final Average Accuracy: ", np.average(accuracies)) print("Final Average Precisions: ", np.average(precisions)) print("Final Average Recalls: ", np.average(recalls)) print("Final Average F1s: ", np.average(F1s))
def estimate_initial_conditions(self, max_comp=128, max_iter=5000): # now run bem on the combined data set to get initial conditions max_log_like = None # the highest value for all runs converged = False component_count = max_comp iteration_count = max_iter results = [] # will be a list of dicts to convert to a DataFrame cpu_count = multiprocessing.cpu_count() bem_pool = multiprocessing.Pool(processes=cpu_count) data = np.vstack( [np.vstack(ds.blobs.values()) for ds in self.data_sets] ) while not converged: print component_count new_comp_counts = [] # set of dictionaries for this comp run, one for each seed input_dicts = [ { 'data': data, 'component_count': component_count, 'iteration_count': iteration_count, 'random_seed': seed } for seed in range(1, 17) ] tmp_results_list = bem_pool.map(bem_cluster, input_dicts) for r in tmp_results_list: if r['log_like'] > max_log_like: max_log_like = r['log_like'] for r in tmp_results_list: # if the new log_like is close to the max (within 1%), # see if there are any empty components (pi < 0.0001) if abs(max_log_like - r['log_like']) < abs(max_log_like * 0.01): new_comp_counts.append(r['true_comp']) # save good run to our results results.append(r) if len(new_comp_counts) > 0: if int(np.mean(new_comp_counts)) < component_count: component_count = int(np.min(new_comp_counts)) else: converged = True else: converged = True results_df = pd.DataFrame( results, columns=['comp', 'true_comp', 'seed', 'log_like'] ) min_comp = results_df.comp.min() best_index = results_df[results_df.comp == min_comp].log_like.argmax() best_run = results[best_index] # create a data set that's the combination of all data sets prelim_ds = DataSet(parameter_count=self._parameter_count) for i, ds in enumerate(self.data_sets): # start blob labels at 1 (i + 1) prelim_ds.add_blob(i + 1, np.vstack(ds.blobs.values())) prelim_ds.cluster( component_count=best_run['comp'], burn_in=0, iteration_count=iteration_count, random_seed=best_run['seed'], model='bem' ) log_like = prelim_ds.get_log_likelihood_trace()[0] print log_like # get classifications to calculate weights for each data set pis = [] for label in sorted(prelim_ds.labels): label_classes = prelim_ds.get_classifications(0, [label]) ds_pis = [] for c in range(best_run['comp']): ds_pis.append(np.sum(label_classes == c) / float(len(label_classes))) pis.append(ds_pis) # list of lists # convert LoL pis to numpy array pis = np.array(pis) prelim_ds.plot_classifications(0) # Re-run a chain using the initial conditions from the last iteration last_iter = prelim_ds.raw_results.get_iteration(0) initial_conditions = { 'pis': pis, 'mus': last_iter.mus, 'sigmas': last_iter.sigmas } return best_run['comp'], initial_conditions