def split_dataset(dataset, train_percent=None): ''' Splits the dataset into train and test ''' if not train_percent or int(train_percent) > 100: print("Train percent Invalid, using default") train_percent = 80 # Shuffle / Randamize the indecies data_indecies = [i for i in rage(dataset.num_records)] shuffled_indecies = np.shuffe(data_indecies) # How many traininig data we need? num_train_records = int(train_percent) * dataset.num_records // 100 # Init train and test train_text, train_labels = [], [] test_text, test_labels = [], [] for index in shuffled_indecies: if index < num_train_records: train_labels.append(dataset.labels[index]) train_text.append(dataset.text[index]) else: test_labels.append(dataset.labels[index]) test_text.append(dataset.text[index]) train_dataset = DataSet(None, train_text, train_labels, dataset.isVectorized) test_dataset = DataSet(None, test_text, test_labels, dataset.isVectorized) return train_dataset, test_dataset
def get_dataset(self, is_train=True): ld = LoadedData() if is_train: ld.load_data() ld.label_normalize() else: # load source speaker data ld.load_data(filename=config.src_data_dir, test_set_size=config.src_test_size, vali_set_size=config.src_vali_size) # sort the data # ld.sort_data() ld.print_info() dataset = DataSet(ld) self.ds = dataset self.loaded_data = ld self.train_dataset_iter = dataset.train_iterator self.vali_dataset_iter = dataset.vali_iterator self.test_dataset_iter = dataset.test_iterator self.dataset_iter = tf.data.Iterator.from_string_handle( self.dataset_handle, dataset.train_set.output_types, dataset.train_set.output_shapes) with tf.name_scope('batch_data'): self.batch_features, \ self.batch_labels, \ self.batch_lengths, \ self.batch_uttids = self.dataset_iter.get_next()
def load(self, dataPath, numTrain, numValid, numTest): """Load the data.""" print("Loading data from " + dataPath + "...") data = np.genfromtxt(dataPath, delimiter=",", dtype="uint8") # The last numTest instances ALWAYS comprise the test set. train, test = data[:numTrain + numValid], data[numTrain + numValid:] shuffle(train) train, valid = train[:numTrain], train[numTrain:] self.trainingSet = DataSet(train) self.validationSet = DataSet(valid) self.testSet = DataSet(test) print("Data loaded.")
def split_dataset(dataset, ratio=None): size = dataset.size if ratio is None: ratio = _choose_optimal_train_ratio(size) mask = np.zeros(size, dtype=np.bool_) train_size = int(size * ratio) mask[:train_size] = True np.random.shuffle(mask) train_x = dataset.x[mask, :] train_y = dataset.y[mask] mask = np.invert(mask) test_x = dataset.x[mask, :] test_y = dataset.y[mask] return DataSet(train_x, train_y), DataSet(test_x, test_y)
def get_data_set(self, name, directory='/data_sets'): data_set = self.get_saved_data_set(name)[0] if not data_set: if not self.reset: print('Could not find dataset. Creating new one') self.captains_log.error( 'Could not find saved dataset. Creating new one') data = DataSet(name, self.save, directory) for message in data.set_up(): self._log_message(message[0], message[1]) if self.save: self.save_data_set(data) return data else: if self.reset: data = DataSet(name, self.save, directory) for message in data.set_up(): self._log_message(message[0], message[1]) if self.save: self.save_data_set(data) return data return data_set
def create_data_set(key_items, value_items, name=""): data_items = [] for key_counter in range(0, len(key_items)): key = key_items[key_counter] if key_counter < len(value_items): value = value_items[key_counter] else: value = None data_items.append(DataItem(key, value)) ds = DataSet(name) ds.add_data_items(data_items) return ds
def main(): from data_set import DataSet from collections import namedtuple hps = { 'encode_step': 5, # 历史数据个数 'train_data_num': 100000, # 训练集个数 } hps = namedtuple("HParams", hps.keys())(**hps) data_set = DataSet(hps) obs = Observations(0, 0, 0, 0) print(obs.values(data_set.history_data, hps.encode_step).shape) return
def to_dataset(df, k, target_column, with_bias): df = df[1:].reset_index(drop=True) df = df.drop(['date'], axis=1) target = df[target_column] n, cols = df.shape windows_num = n - k # effective window size, including the label, is k + 1 x = np.empty([windows_num, k * cols + int(with_bias)]) y = np.empty([windows_num]) for i in xrange(windows_num): window = df[i:i + k] row = window.as_matrix().reshape((-1, )) if with_bias: row = np.insert(row, 0, 1) x[i] = row y[i] = target[i + k] debug('data set: x=%s y=%s' % (x.shape, y.shape)) return DataSet(x, y)
def main(): hps = get_hps() data_set = DataSet(hps) env = Env(hps, data_set) model = Model(hps, env.observations_dim, env.actions_dim) obs = env.reset() data_set.add_data(obs, 0, 0) data_size = hps.train_data_num for i in range(data_size): print('\r{}/{}'.format(i, data_size), end='') obs, reward, _ = env.step(obs, Actions([0.3, 0.3, 0.4])) data_set.add_data(obs, 0, 0) n = hps.train_iter for i in range(n): print('\n\n{}/{}'.format(i, n)) model.price_train(1, data_set) if i % hps.eval_interval == 0: print('-'*50) model.price_test(1, data_set) print('-'*50) return
SIC_PATH = 'sic_day_GFDL-CM3_historical*' SIT_PATH = 'sit_day_GFDL-CM3_historical*' TAS_PATH = 'tas_3hr_GFDL-CM3_historical_r1i1p1_*.nc' CLT_PATH = 'tcdc.eatm.gauss.19[89]*.nc' BEGIN_DATE = datetime(1979, 1, 1, 0, 0, 0) NUM_YEARS = 20 DELTA_T = 150 if __name__ == '__main__': print('Creating DataSet') data_set = DataSet( sic_path=SIC_PATH, sit_path=SIT_PATH, tas_path=TAS_PATH, clt_path=CLT_PATH, sic_scale=.01, clt_scale=.01, ) print('Getting Albedos') albedos = Albedos() year = dateutil.relativedelta.relativedelta(years=1) rad_start_dates = [BEGIN_DATE + year * n for n in range(NUM_YEARS)] forcings = [] for rad_start_date in rad_start_dates: forcing = get_radiative_forcing( start_date=rad_start_date, delta_t=DELTA_T, data_set=data_set, albedos=albedos, )
self.input_x: xs, self.label_y: labels, self.diag_x: diags, self.keep_prob: 0.4 }) def predict(self, dataset): return self.sess.run(self.predict_y_label, feed_dict={ self.input_x: dataset.xs, self.diag_x: dataset.diags, self.label_y: dataset.labels, self.keep_prob: 0.4 }) if __name__ == "__main__": for i in range(10): print("start with dataset: ", i) net = NaiveNet() test_set = DataSet(i, prefix="test") net.train(1000, DataSet(i), test_set) accuracies.append(net.best_accu) precisions.append(net.best_precision) recalls.append(net.best_recall) F1s.append(net.best_f1) print("Final Average Accuracy: ", np.average(accuracies)) print("Final Average Precisions: ", np.average(precisions)) print("Final Average Recalls: ", np.average(recalls)) print("Final Average F1s: ", np.average(F1s))
def aggregate_demo(args): """ python3 aggregate_demo.py pong --range-start=0 --range-end=5 """ if args.demo_memory_folder is not None: demo_memory_folder = args.demo_memory_folder else: demo_memory_folder = "{}_demo_samples".format(args.env) game_state = game.GameState(game=args.env) D = DataSet(args.resized_height, args.resized_width, RandomState(), args.replay_memory, args.phi_len, game_state.n_actions) data_file = '{}-dqn.pkl'.format(args.env) img_file = '{}-dqn-images.h5'.format(args.env) for index in range(args.range_start, args.range_end): print("Demonstration sample #{num:03d}".format(num=index + 1)) try: data = pickle.load( open( demo_memory_folder + '/{0:03d}/'.format(index + 1) + data_file, 'rb')) except: print("Check demo folder if it exist!") return actions = data['D.actions'] rewards = data['D.rewards'] terminal = data['D.terminal'] imgs = get_compressed_images(demo_memory_folder + '/{0:03d}/'.format(index + 1) + img_file + '.gz') print("\tMemory size: {}".format(data['D.size'])) for mem_index in range(data['D.size']): D.add_sample(imgs[mem_index], actions[mem_index], rewards[mem_index], terminal[mem_index]) # h5file.close() print("\tTotal Memory size: {}".format(D.size)) D.resize() D.create_validation_set(percent=args.validation_set_percent) data = { 'D.width': D.width, 'D.height': D.height, 'D.max_steps': D.max_steps, 'D.phi_length': D.phi_length, 'D.num_actions': D.num_actions, 'D.actions': D.actions, 'D.rewards': D.rewards, 'D.terminal': D.terminal, 'D.bottom': D.bottom, 'D.top': D.top, 'D.size': D.size, 'D.validation_set_markers': D.validation_set_markers, 'D.validation_indices': D.validation_indices, 'epsilon': args.init_epsilon, 't': 0 } images = D.imgs pickle.dump( data, open(demo_memory_folder + '/' + args.env + '-dqn-all.pkl', 'wb'), pickle.HIGHEST_PROTOCOL) print("Saving and compressing replay memory...") save_compressed_images( demo_memory_folder + '/' + args.env + '-dqn-images-all.h5', images) print("Saved and compressed replay memory")
def to_dataset(self, data): ds = DataSet(data, self.header) ds = ds.split(self.friend_index) return ds.X, ds.Y
def get_demo(args): """ Human: python3 run_experiment.py pong --demo-time-limit=5 --collect-demo --demo-type=0 --file-num=1 Random: python3 run_experiment.py pong --demo-time-limit=5 --collect-demo --demo-type=1 --file-num=1 Model: python3 run_experiment.py pong --demo-time-limit=5 --collect-demo --demo-type=2 --file-num=1 python3 run_experiment.py pong --demo-time-limit=5 --collect-demo --demo-type=2 --model-folder=pong_networks_rms_1 --file-num=1 """ if args.demo_type == 2: os.environ['CUDA_VISIBLE_DEVICES'] = '' import tensorflow as tf from dqn_net import DqnNet from collect_demo import CollectDemonstration if args.folder is not None: folder = '{}_{}'.format(args.env, args.folder) else: folder = '{}_demo_samples'.format(args.env) if args.demo_type == 1: folder = '{}_demo_samples_random'.format(args.env) elif args.demo_type == 2: folder = '{}_demo_samples_model'.format(args.env) game_state = game.GameState( human_demo=True if args.demo_type==0 else False, frame_skip=1, game=args.env) if False: # Deterministic rng = RandomState(123456) else: rng = RandomState() D = DataSet( args.resized_height, args.resized_width, rng, (args.demo_time_limit * 5000), args.phi_len, game_state.n_actions) model_net = None if args.demo_type == 2: # From model if args.model_folder is not None: model_folder = args.model_folder else: model_folder = '{}_networks_{}'.format(args.env, args.optimizer.lower()) sess = tf.Session() with tf.device('/cpu:0'): model_net = DqnNet( sess, args.resized_height, args.resized_width, args.phi_len, game_state.n_actions, args.env, gamma=args.gamma, copy_interval=args.c_freq, optimizer=args.optimizer, learning_rate=args.lr, epsilon=args.epsilon, decay=args.decay, momentum=args.momentum, verbose=args.verbose, path=None, folder=None, slow=args.use_slow, tau=args.tau) model_net.load(folder=model_folder) collect_demo = CollectDemonstration( game_state, args.resized_height, args.resized_width, args.phi_len, args.env, D, terminate_loss_of_life=args.terminate_life_loss, folder=folder, sample_num=args.file_num ) collect_demo.run( minutes_limit=args.demo_time_limit, demo_type=args.demo_type, model_net=model_net)
def setUpClass(self): self.dataset = DataSet() self.dataset.load(100)
network=DeepQLearner(config.STATE_SIZE, config.ACTION_SIZE, config.PHI_LENGTH, config.BATCH_SIZE, config.DISCOUNT, config.RHO, config.MOMENTUM, config.LEARNING_RATE, config.RMS_EPSILON, config.RNG, config.UPDATE_RULE, config.BATCH_ACCUMULATOR, config.FREEZE_INTERVAL) # Initialize DataSet dataSet=DataSet(config.STATE_SIZE, config.REPLAY_MEMORY_SIZE, config.PHI_LENGTH, config.RNG) eC=environmentControl(config.PATH_ROBOT, config.PATH_GOAL, config.PATH_LAUNCHFILE) #eC.spawn(config.ROBOT_NAME) eC.spawnGoal() eC.setRandomModelState(config.ROBOT_NAME) eC.setRandomModelState('goal') #eC.pause() dP=dataProcessor(eC, config.ROBOT_NAME, config.UPDATES_PER_STEP, config.PHI_LENGTH,
probability = float(E**epsilon) / float(1 + (E**epsilon)) for i in range(run_times): result.append(self.do_randomize(dataset, probability)) return result def get_qD(self, D): count = 0 for i in D.records: if i[4] > 13 and i[14] == '<=50K': count += 1 return float(count) / float(len(D.records)) def compute_accuacy(self, D, N=100, e=0.5, beta=0.05): p = (1 + math.exp(e)) / (math.exp(e) - 1) q = 1 / (1 + math.exp(e)) alpha = p * math.sqrt(math.log(2 / beta) / (2 * N)) qD = self.get_qD(D) data_list = self.do_randomized_mechenism(D, N, e) errors = [(p * (d - q)) - qD for d in data_list] return errors, alpha D0 = DataSet() D0.create_from_csv('./adult.csv') rr = RandomizedResponse() errors, alpha = rr.compute_accuacy(D0)
common = { 'dev': './data/atis.pkl.dev', 'test': './data/atis.pkl.test', 'slot': './data/atis.pkl.slot', } if __name__ == '__main__': config = config_plain # experiments = experiments[5:6] if not os.path.exists('./out'): os.mkdir('./out') # for vocab size DataSet('./data/atis.pkl.slot', './data/atis.pkl.train') DataSet('./data/atis.pos.slot', './data/atis.pos.train') slot = common['slot'] validation_set = DataSet(slot, common['dev']) test_set = DataSet(slot, common['test']) print('# Experiments (%d)' % len(experiments)) print('# validation_set (%d)' % validation_set.size()) print('# test_set (%d)' % test_set.size()) pos_model = None if 'pos_model' in config: pos_set = DataSet('./data/atis.pos.slot', './data/atis.pos.train') print('# Pre-training') print('# POS training set (%d)' % pos_set.size())
def update_model(super_params, url, id, flag, model_name, start_index): ckpt = tf.train.get_checkpoint_state('./' + model_name + '/') out_length = 0 if ckpt: tf.train.import_meta_graph(ckpt.model_checkpoint_path +'.meta') graph = tf.get_default_graph() out_length = graph.get_tensor_by_name("fc3/out:0").shape[1] # 每次训练重置Graph tf.reset_default_graph() log = [] y_ = tf.placeholder(tf.float32, shape=[None, super_params['out_length']], name="y_") keep_prob = tf.placeholder(tf.float32, name='keep_prob') out = define(super_params['out_length'], keep_prob) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") # 计算训练数据的正确率 correct_prediction = tf.equal(tf.argmax(out, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy') loss_temp = tf.losses.softmax_cross_entropy(onehot_labels=y_, logits=out) # 计算平均损失值 cross_entropy_loss = tf.reduce_mean(loss_temp, name='cross_entropy_loss') # 反向传播调整参数 train_step = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08).minimize(cross_entropy_loss) saver = tf.train.Saver(max_to_keep=2) train_set = DataSet(super_params['train_set_path'], 1, (super_params['input_width'], super_params['input_height'], 3), super_params['batch_size']) test_set = DataSet(super_params['test_set_path'], 1, (super_params['input_width'], super_params['input_height'], 3), super_params['batch_size']) tf.add_to_collection("predict", out) with tf.Session() as sess: writer = tf.summary.FileWriter('logs/2', sess.graph) #将训练日志写入到logs文件夹下 train_accuracy_scalar = tf.summary.scalar('train_accuracy', accuracy) train_loss_scalar = tf.summary.scalar('train_loss', cross_entropy_loss) ckpt = tf.train.get_checkpoint_state('./' + model_name + '/') sess.run(tf.global_variables_initializer()) if out_length != super_params['out_length']: loader = tf.train.Saver( var_list=[var for var in tf.trainable_variables() if not var.name.startswith("fc3")], max_to_keep=2) else: loader = tf.train.Saver(var_list=[var for var in tf.trainable_variables()], max_to_keep=2) print(ckpt) if ckpt: if os.path.exists(ckpt.model_checkpoint_path + '.meta'): print("restore") loader.restore(sess, ckpt.model_checkpoint_path) print('restored') for epoch in range(super_params['epoch']): train_set.reset() test_set.reset() step = 0 train_loss = 0 train_accuracy = 0 while train_set.is_end(): input_x, input_y, _ = train_set.next_bath() input_y = input_y.astype(int) input_y = np.eye(super_params['out_length'])[input_y] feed_dict = {images_placeholder: input_x, y_: input_y, keep_prob: super_params['keep_prob'], phase_train_placeholder: False} train_accuracy = accuracy.eval(feed_dict=feed_dict) train_loss = cross_entropy_loss.eval(feed_dict=feed_dict) train_step.run(feed_dict=feed_dict) step_info = "epoch:{} step:{} loss: {:.5f} train_accuracy:{:.5f}".format(epoch, step, train_loss, train_accuracy) step += 1 print(step_info) log.append(step_info) if flag: status_handler.handleTrainStep(url, id, step_info) accuracy_scalar, loss_scalar = sess.run([train_accuracy_scalar, train_loss_scalar], feed_dict=feed_dict) writer.add_summary(accuracy_scalar, epoch) writer.add_summary(loss_scalar, epoch) if epoch % 5 == 0: total_accuracy = 0 total_loss = 0 test_step = 0 while test_set.is_end(): test_x, test_y, _ = test_set.next_bath() test_y = test_y.astype(int) test_y = np.eye(super_params['out_length'])[test_y] feed_dict = {images_placeholder: test_x, y_: test_y, keep_prob: super_params['keep_prob'], phase_train_placeholder: False} test_accuracy = accuracy.eval(feed_dict=feed_dict) test_loss = cross_entropy_loss.eval(feed_dict=feed_dict) total_accuracy += test_accuracy total_loss += test_loss test_step += 1 test_info = "TEST: epoch:{} loss: {:.5f} test_accuracy:{:.5f}".format(epoch, total_loss / test_step, total_accuracy / test_step) log.append(test_info) print(test_info) saver.save(sess, './' + model_name + '/my-model', global_step=epoch) if ((total_loss / test_step) < 0.001) & ((total_accuracy / test_step) > 0.99): break saver.save(sess, './' + model_name + '/my-model', global_step=epoch) write_log(log, './' + model_name + '/log.txt') return log
# if TEST_LIM > 0: # X_test = X_test[0:TEST_LIM] # y_test = y_test[0:TEST_LIM] print('X_train.shape: %s' % str(X_train.shape)) print('y_train.shape: %s' % str(y_train.shape)) print('X_val.shape: %s' % str(X_val.shape)) print('y_val.shape: %s' % str(y_val.shape)) # print('X_test.shape: %s' % str(X_test.shape)) # print('y_test.shape: %s' % str(y_test.shape)) assert X_train.shape[0] == y_train.shape[0] assert X_val.shape[0] == y_val.shape[0] # assert X_test.shape[0] == y_test.shape[0] data_train_ = DataSet(X=X_train, y=y_train, batch_size=BATCH_SIZE_INT) data_val_ = DataSet(X=X_val, y=y_val, batch_size=BATCH_SIZE_INT) # data_test_ = DataSet(X=X_test, y=y_test, batch_size=BATCH_SIZE_INT) if TRAIN: history, best_train_acc, best_val_acc = \ vgg.train(data_train=data_train_, data_val=data_val_, save_path=SAVE_PATH, weights_path=WEIGHTS_PATH, restore_path=RESTORE_PATH, save_summaries_every=SAVE_SUMMARIES_EVERY, display_every=DISPLAY_EVERY, display=DISPLAY, nb_to_display=NB_TO_DISPLAY, nb_epochs=NB_EPOCHS, save_best_only=SAVE_BEST_ONLY)
axarr[0].legend([alpha_line_lp], ['alpha = {:.6f}'.format(laplace_alpha)]) axarr[0].plot(laplace_errors, 'go') axarr[1].set_title('Randomized Response') axarr[1].axhline(0, color='g') alpha_line_rr = axarr[1].axhline(rr_alpha, color='r') axarr[1].axhline(-rr_alpha, color='r') axarr[1].set_xlabel('Nth run') axarr[1].set_ylabel('Error') axarr[1].legend([alpha_line_rr], ['alpha = ' + '{:.6f}'.format(rr_alpha)]) axarr[1].plot(rr_errors, 'go') plt.title('BETA = ' + str(beta) + 'Epsilon = ' + str(e) + 'N = ' + str(N)) plt.show() D0 = DataSet() D0.create_from_csv('adult.csv') D1 = DataSet() D1.copy_from_dataset(D0) D1.records.pop() # eliminate one element laplace = Laplace() D0_histagram_data, qD0 = laplace.do_mechanism(D0, 1000) D1_histagram_data, qD1 = laplace.do_mechanism(D1, 1000) # parameters are all laplace's parameter draw_privacy_loss(D0_histagram_data, D1_histagram_data, 1000, e=0.5) draw_accuracy(D0_histagram_data, qD0)
if __name__ == "__main__": if len(sys.argv) != 6: print("\nUsage: python3 program_name data_path class_col_name test_size prune_size runs") print("\ntest_size and prune_size are in range (0, 1)") print("\ndata_set is divided into temp_set and test_set") print("temp_set is then divided into training_set and prune_set\n") sys.exit() data_path = sys.argv[1] class_name = sys.argv[2] test_size = float(sys.argv[3]) prune_size = float(sys.argv[4]) runs = int(sys.argv[5]) data = DataSet(data_path, class_name) class_range = len(id3.get_attr_values(data.data_set)[class_name]) results_path = f'./benchmarks/t{sys.argv[3]}_p{sys.argv[4]}.csv' print("\ndata set: " + data_path) print("set size: " + str(len(data.data_set.index))) print("------------------ ") print("training set size: " + str(len(data.train_set.index))) print("prune set size: " + str(len(data.prune_set.index))) print("test set size: " + str(len(data.test_set.index))) with open(results_path, 'w') as file: for i in range(runs): data.resplit_dataset(test_size=test_size, prune_size=prune_size) id3_tree = id3.build_id3(data.train_set, data.data_set) c45_tree = id3.build_c45(data.prune_set, data.data_set, id3_tree)
# Set up axes ax.set_xticklabels([''] + input_sentence.split(' ') + ['<EOS>'], rotation=90) ax.set_yticklabels([''] + output_words) # Show label at every tick ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) plt.show() if __name__ == "__main__": # train() data_set = DataSet(opt.filename, opt.max_len, opt.min_count, device) model = build_model(data_set.english_vocab, data_set.french_vocab) # evaluate() # optim optimizer = build_optimizer(model) # loss function criterion = nn.NLLLoss(ignore_index=PAD_id, reduction='elementwise_mean') # Loading checkpoint checkpoint = None if opt.checkpoint: checkpoint = load_checkpoint(opt.checkpoint) model.load_state_dict(checkpoint['state_dict'])
def main(path): settings = Settings(path) # mask seg set net los vis if 'mask' in settings.stages: # need v,h raw and frames square = 'square' in settings.flags max_seg = settings.max_seg print(square) print(max_seg) v, h = data_io.read_from_file(settings.files['raw'], 'raw') mask = segment.vert_horiz_seg(v[:, settings.frames, :], h[:, settings.frames, :], square=square, max_seg=max_seg) data_io.save_to(mask, settings.files['mask'], 'mask') if 'seg' in settings.stages: mask = data_io.read_from_file(settings.files['mask'], 'mask') v, h = data_io.read_from_file(settings.files['raw'], 'raw') seg_v = segment.divide_data_to_segments(mask, v[:, settings.frames, :]) seg_h = segment.divide_data_to_segments(mask, h[:, settings.frames, :]) data_io.save_to([seg_v, seg_h], settings.files['seg'], 'seg') if 'set' in settings.stages: [seg_v, seg_h] = data_io.read_from_file(settings.files['seg'], 'seg') cv = 'cv' in settings.flags normalize = 'norm' in settings.flags sizes = settings.sizes data_sets = create_data_set.get_data(seg_v, seg_h, n_new_train=sizes['train'], normalize=normalize) data_io.save_to(data_sets, settings.files['set'], 'set') if 'net' in settings.stages: cv = 'cv' in settings.flags zero_all = 'zero_all' in settings.flags value_type = 'acc' if 'acc' in settings.flags else 'loss' data_sets = data_io.read_from_file(settings.files['set'], 'set') mask = data_io.read_from_file(settings.files['mask'], 'mask') tx, ty, vx, vy = data_sets D_in = vx.shape[1] ty = ty.astype(np.float64) vy = vy.astype(np.float64) n_data_sets = len(tx) n_frames = len(settings.frames) mask_nubmers = np.unique(mask) n_seg = len(mask_nubmers) - 1 if mask_nubmers[0] == 0 else len( mask_nubmers) frames_loss_maps = np.zeros([n_data_sets, n_frames]) seg_loss_maps = np.zeros([n_data_sets, n_seg]) all_train_losses = [] all_test_losses = [] all_acc = [] for idx, (one_tx, one_ty, one_vx, one_vy) in enumerate(zip(tx, ty, vx, vy)): one_train = DataSet(torch.from_numpy(one_tx), torch.from_numpy(one_ty)) one_test = DataSet(torch.from_numpy(one_vx.reshape([1, -1])), torch.from_numpy(one_vy.reshape([ 1, ]))) mean_t, std_t = one_train.calc_mean_std() one_train = one_train.normalize(mean_t, std_t) one_test = one_test.normalize(mean_t, std_t) print(idx) net = dense_net.get_model(D_in) training_parameters = run_nn.get_train_params(net) net, train_losses, valid_losses, valid_accuracies = run_nn.train( net, [one_train, one_test], training_parameters) all_acc.append(valid_accuracies[-1]) if valid_losses[-1] > 0.6: print('\n{}\n'.format(idx)) all_train_losses.append(train_losses) all_test_losses.append(valid_losses) frames_loss_maps[idx, :] = np.asarray( run_nn.run_with_missing_parts(net, mask, one_test, False, len(settings.frames), part_type='frames', zero_all=zero_all, value_type=value_type)) seg_loss_maps[idx, :] = run_nn.run_with_missing_parts( net, mask, one_test, False, len(settings.frames), part_type='segments', zero_all=zero_all, value_type=value_type) print('acc: {}'.format(np.mean(np.asarray(all_acc)))) frame_loss = np.mean(frames_loss_maps, axis=0) seg_loss = segment.recreate_image(mask, np.mean(seg_loss_maps, axis=0)) data_io.save_to(frame_loss, settings.files['vis_frame'], 'vis') data_io.save_to(seg_loss, settings.files['vis_seg'], 'vis') visualize_res.plot_losses(all_train_losses, all_test_losses, [], n_data_sets) if 'show_vis' in settings.stages: zero_all = 'zero_all' in settings.flags value_type = 'acc' if 'acc' in settings.flags else 'loss' zero_all_str = 'Present' if zero_all else 'Missing' value_type_str = 'Accuracy' if value_type == 'acc' else 'Loss' title_seg = 'Average {} per {} Segment'.format(value_type_str, zero_all_str) title_frame = 'Average {} per {} Frame'.format(value_type_str, zero_all_str) # images = data_io.read_from_file(settings.files['vis_both'], 'vis') # visualize_res.plot_spatial(images, settings.frame_groups_string, n_frames=len(images)) loss_map = data_io.read_from_file(settings.files['vis_frame'], 'vis') visualize_res.plot_temporal( loss_map, [x + 1 for x in settings.frames], title=title_frame, ylabel=value_type ) # counting starts from 0, so the relevant frames are +1 image = data_io.read_from_file(settings.files['vis_seg'], 'vis') visualize_res.plot_spatial(image, title=title_seg)
def main(_): # Import data ###mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) print("starting to load data...") x2 = pickle.load(open( "T1_GD_all_x_no_normalization.p", "rb" )) print("x2 loaded.") y2 = pickle.load(open( "T1_GD_all_y_no_normalization.p", "rb" )) print("y2 loaded.") validate_x2 = pickle.load(open( "T1_GD_validation_x_no_normalization_aggregated_.p", "rb" )) print("validate_x2 loaded.") validate_y2 = pickle.load(open( "T1_GD_validation_y_no_normalization_aggregated.p", "rb" )) print("validate_y2 loaded.") validate_x2_nonaggregated = pickle.load(open( "T1_GD_all__validation_x_no_normalization.p", "rb" )) print("validate_x2 loaded.") validate_y2_nonaggregated = pickle.load(open( "T1_GD_all__validation_y_no_normalization.p", "rb" )) print("validate_y2 loaded.") number_epochs = sys.argv[0] kernal_size = sys.argv[2] data_set_all = DataSet(x2,y2, fake_data=False) validation_set_all = DataSet(validate_x2_nonaggregated, validate_y2_nonaggregated, fake_data=False) # Create the convolutional model x = tf.placeholder(tf.float32, [None, 65536]) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 3]) # Build the graph for the deep net # with tf.device('/gpu:2'): y_conv, keep_prob, saver = deepnn(x) print(keep_prob) #plt.imshow(mnist.test.images[0].reshape(28,28)) #print(type(mnist.test.images)) #print(mnist.test.images.shape) #plt.show() cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #grads = new_optimizer.compute_gradients(cross_entropy) data_points = [] avg_loss = 0 total_loss = 0 avg_validation_loss = 0 total_validation_loss = 0 batch_size = sys.argv[1] batches_completed = 0 validation_batches_completed = 0 config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True output_file= open("T1_GD_validation_loss_file_no_normalization_"+epoch_size+"_epochs_"+kernal_size+"_kernalsize_"+batch_size+"_batchsize.txt","w+") with tf.Session(config = config) as sess: sess.run(tf.global_variables_initializer()) # sess.graph.finalize() for i in range(5000): batch_x, batch_y = data_set_all.next_batch(batch_size) for batch_slice in batch_x: batch_slice = numpy.reshape(batch_slice, (256, 256)) batch_slice = random_alteration(batch_slice) batch_slice = numpy.reshape(batch_slice, 65536) batches_completed += 1 loss = sess.run(cross_entropy, feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5}) total_loss += loss new_avg_loss = total_loss/batches_completed if(new_avg_loss>avg_loss and batches_completed != 1): avg_loss = new_avg_loss # break avg_loss = new_avg_loss data_points.append(loss) if i % 1000 == 0: validation_batch_x, validation_batch_y = validation_set_all.next_batch(batch_size) validation_batches_completed+=1 train_accuracy = accuracy.eval(feed_dict={x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0}) validation_loss = cross_entropy.eval(feed_dict={x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0}) total_validation_loss += validation_loss new_avg_validation_loss = total_validation_loss/validation_batches_completed if(new_avg_validation_loss>avg_validation_loss and batches_completed!=1): avg_validation_loss = new_avg_validation_loss avg_validation_loss = new_avg_validation_loss output_file.write("Validation loss at i = %d is %g\n" % (i, avg_validation_loss)) total_times = 0.0 total_accuracy = 0.0 prediction=tf.argmax(y_conv,1) probabilities=tf.nn.softmax(y_conv) probs_array = [] condensed_y = [] for j in range(len(validate_x2)): #print(test_x2[i]) #print(test_y2[i]) temp3 = accuracy.eval(feed_dict={x: validate_x2[j], y_: validate_y2[j], keep_prob: 1.0}) print('test accuracy %g' % temp3) total_accuracy = total_accuracy + temp3 total_times = total_times+1 temp4 = prediction.eval(feed_dict={x: validate_x2[j], keep_prob: 1.0}, session=sess) print("predictions", temp4) probability = probabilities.eval(feed_dict={x: validate_x2[j], keep_prob: 1.0}, session=sess) print(probability) if j==0: probs_array = probability.mean(axis=0) condensed_y = validate_y2[j].mean(axis=0) continue probs_array = numpy.vstack([probs_array, probability.mean(axis=0)]) condensed_y = numpy.vstack([condensed_y, validate_y2[j].mean(axis=0)]) fpr = dict() tpr = dict() roc_auc = dict() for j in range(3): fpr[j], tpr[j], _ = roc_curve(condensed_y[:, j], probs_array[:, j]) roc_auc[j] = auc(fpr[j], tpr[j]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(condensed_y.ravel(), probs_array.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) output_file.write("ROCs at i = %d are "%i) for j in range(3): plt.plot(fpr[j], tpr[j], label='ROC curve of class {0} (area = {1:0.2f})'''.format(j, roc_auc[j])) output_file.write(str(roc_auc[j])+", ") output_file.write("\n") output_file.flush() print('step %d, training accuracy %g' % (i, train_accuracy)) name = 'T1_GD_testing_with_intermediateROC_no_normalization_epoch_' + str(i) save_path = saver.save(sess, name) train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5}) #testing print(avg_loss) output_file.close() save_path = saver.save(sess, 'T1_GD_testing_with_intermediateROC_no_normalization_final')
def main(_): # Import data ###mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) print("starting to load data...") x2 = pickle.load(open("all_x_l2normalization.p", "rb")) print("x2 loaded.") y2 = pickle.load(open("all_y_l2normalization.p", "rb")) print("y2 loaded.") validate_x2 = pickle.load(open("all__validation_x_l2normalization.p", "rb")) print("validate_x2 loaded.") validate_y2 = pickle.load(open("all__validation_y_l2normalization.p", "rb")) print("validate_y2 loaded.") data_set_all = DataSet(x2, y2, fake_data=False) validation_set_all = DataSet(validate_x2, validate_y2, fake_data=False) # Create the convolutional model x = tf.placeholder(tf.float32, [None, 65536]) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 3]) # Build the graph for the deep net y_conv, keep_prob, saver = deepnn(x) print(keep_prob) #plt.imshow(mnist.test.images[0].reshape(28,28)) #print(type(mnist.test.images)) #print(mnist.test.images.shape) #plt.show() cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #grads = new_optimizer.compute_gradients(cross_entropy) data_points = [] avg_loss = 0 total_loss = 0 avg_validation_loss = 0 total_validation_loss = 0 batch_size = 10 batches_completed = 0 validation_batches_completed = 0 config = tf.ConfigProto() config.gpu_options.allow_growth = True output_file = open("validation_loss_file_l2normalization.txt", "w+") with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) # sess.graph.finalize() for i in range(1000000): batch_x, batch_y = data_set_all.next_batch(batch_size) for batch_slice in batch_x: batch_slice = numpy.reshape(batch_slice, (256, 256)) batch_slice = random_alteration(batch_slice) batch_slice = numpy.reshape(batch_slice, 65536) batches_completed += 1 loss = sess.run(cross_entropy, feed_dict={ x: batch_x, y_: batch_y, keep_prob: 0.5 }) total_loss += loss new_avg_loss = total_loss / batches_completed if (new_avg_loss > avg_loss and batches_completed != 1): avg_loss = new_avg_loss # break avg_loss = new_avg_loss data_points.append(loss) if i % 10000 == 0: validation_batch_x, validation_batch_y = validation_set_all.next_batch( batch_size) validation_batches_completed += 1 train_accuracy = accuracy.eval(feed_dict={ x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0 }) validation_loss = cross_entropy.eval(feed_dict={ x: validation_batch_x, y_: validation_batch_y, keep_prob: 1.0 }) total_validation_loss += validation_loss new_avg_validation_loss = total_validation_loss / validation_batches_completed if (new_avg_validation_loss > avg_validation_loss and batches_completed != 1): avg_validation_loss = new_avg_validation_loss avg_validation_loss = new_avg_validation_loss output_file.write("Validation loss at i = %d is %g\n" % (i, avg_validation_loss)) output_file.flush() print('step %d, training accuracy %g' % (i, train_accuracy)) name = 'my-model_testing_l2normalization_epoch_' + str(i) save_path = saver.save(sess, name) train_step.run(feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5}) #testing print(avg_loss) output_file.close() save_path = saver.save(sess, 'my-model_testing_l2normalization_final')
self.classifier, minibatch_index, inputs, outputs, learning_rate) from data_set import DataSet if __name__ == '__main__': argparser = argparse.ArgumentParser( description='Demonstrate Multilayer Perceptron') argparser.add_argument( '--training-epochs', dest='epochs', type=int, default='1000', help='number of epochs to run the training (default: 1000)') dataset = DataSet() dataset.load() trainer = MultilayerPerceptronTrainer( dataset, n_epochs=argparser.parse_args().epochs) trainer.initialize() state = trainer.start_training(patience=10000, patience_increase=2, improvement_threshold=0.995) start_time = time.clock() while (trainer.continue_training(state)): print('epoch %d, validation error %f%%' % (state.epoch, state.epoch_losses[-1][0] * 100.0)) end_time = time.clock() print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) print(('Optimization complete. Best validation score of %f%% '
def main(): sys.setrecursionlimit(2000) config = Configuration() with open(config.DATA_FOLDER + '/config.txt', 'r') as f: configFile = f.read().split(',') print('Parameters', configFile) config.EPSILON_START = float(configFile[0]) config.LOAD_NET_NUMBER = int(float(configFile[1])) agentTF = AgentTF(config.STATE_SIZE, config.PHI_LENGTH, config.ACTION_SIZE, config.HIDDEN_LAYERS, config.BATCH_SIZE, config.TAU, config.GAMMA) if config.LOAD_NET_NUMBER > 0: dataSet = loadDataSet(config.DATA_FOLDER, config.LOAD_NET_NUMBER) agentTF.restore_model(config.DATA_FOLDER) countTotalSteps = config.LOAD_NET_NUMBER else: # Initialize DataSet dataSet = DataSet(config.STATE_SIZE, config.REPLAY_MEMORY_SIZE, config.PHI_LENGTH, config.RNG) countTotalSteps = 0 openLearningFile(config.DATA_FOLDER) eC = environmentControl(config.PATH_ROBOT, config.PATH_GOAL, config.PATH_LAUNCHFILE) eC.spawn(config.ROBOT_NAME) eC.spawnGoal() eC.setRandomModelState(config.ROBOT_NAME) #eC.pause() dP = dataProcessor(eC, config.ROBOT_NAME, config.PHI_LENGTH, config.STATE_SIZE, config.NUM_SENSOR_VAL, config.SENSOR_RANGE_MAX, config.SENSOR_RANGE_MIN, config.VEL, config.VEL_CURVE, config.UPDATE_TIME, config.SPEED_UP) lastState = np.zeros((1, config.STATE_SIZE)) lastReward = 0 lastAction = 0 countSteps = 0 batchCount = 0 lossAverages = np.empty([0]) epochCount = 0 epsilon = max(config.EPSILON_START, config.EPSILON_MIN) epsilonRate = config.EPSILON_DECAY quit = False try: for i in range(4): action = np.random.randint(config.ACTION_SIZE) dP.action(action) state, reward = dP.getStateReward() dataSet.addSample(lastState, action, reward, state, dP.isGoal) countTotalSteps += 1 countSteps += 1 lastState = state if config.EPSILON_START < -0: quit = True while not quit: if countTotalSteps % 1000 == 0: updateLearningFile(config.DATA_FOLDER, lossAverages, countTotalSteps) lossAverages = np.empty([0]) print(countTotalSteps) phi = dataSet.phi(lastState) action = agentTF.getAction(phi, epsilon) #action=userAction() eC.unpause() dP.action(action) state, reward = dP.getStateReward() eC.pause() if dP.isGoal: print('The goal was reached in ', countSteps, ' steps') countSteps = 1 eC.setRandomModelState(config.ROBOT_NAME) eC.setRandomModelState('goal') dP.isGoal = False if dP.flipped: eC.setRandomModelState(config.ROBOT_NAME) dP.flipped = False # After NUM_STEPS the chance is over if countSteps % config.NUM_STEPS == 0: countSteps = 1 reward -= 1 eC.setRandomModelState(config.ROBOT_NAME) eC.setRandomModelState('goal') print('Your chance is over! Try again ...') #print(reward) dataSet.addSample(lastState, action, reward, state, dP.isGoal) # Training if countTotalSteps > config.REPLAY_START_SIZE and countTotalSteps % 5 == 0: batchStates, batchActions, batchRewards, batchNextStates, batchTerminals= \ dataSet.randomBatch(config.BATCH_SIZE) loss = agentTF.train(batchStates, batchActions, batchRewards, batchNextStates, batchTerminals) #print('Loss', loss) # count How many trainings had been done batchCount += 1 # add loss to lossAverages lossAverages = np.append(lossAverages, loss) #Update Epsilon save dataSet, network if countTotalSteps % config.SIZE_EPOCH == 0: # Number of Epochs epochCount += 1 # Update Epsilon if (epsilon - epsilonRate) < config.EPSILON_MIN - 0.01: quit = True epsilon = max(epsilon - epsilonRate, config.EPSILON_MIN) print('Epsilon updated to: ', epsilon) agentTF.save_model(countTotalSteps, config.DATA_FOLDER) saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet) lastState = state countTotalSteps += 1 countSteps += 1 except rospy.exceptions.ROSException: agentTF.save_model(countTotalSteps, config.DATA_FOLDER) saveDataSet(config.DATA_FOLDER, countTotalSteps, dataSet) agentTF.close() eC.close() with open(config.DATA_FOLDER + '/config.txt', 'w') as f: out = "{},{}".format(epsilon, countTotalSteps) f.write(out)
def run(cls, dev, test, labeled_slot, labeled_train, unlabeled_slot, unlabeled_train, steps, gpu_memory): training_set = DataSet(labeled_slot, labeled_train) validation_set = DataSet(labeled_slot, dev) test_set = DataSet(labeled_slot, test) unlabeled_set = DataSet(unlabeled_slot, unlabeled_train) print('# training_set (%d)' % training_set.size()) print('# validation_set (%d)' % validation_set.size()) print('# test_set (%d)' % test_set.size()) print('# unlabeled_set (%d)' % unlabeled_set.size()) classifier = tf.contrib.learn.Estimator( model_fn=SlotFilling.rnn_model_fn, params={ 'num_slot': training_set.num_classes(), 'num_pos': unlabeled_set.num_classes(), 'drop_out': DROP_OUT, 'embedding_dimension': EMBEDDING_DIMENSION, 'vocab_size': DataSet.vocab_size(), 'unlabeled': unlabeled_set.size() > 0 }, config=tf.contrib.learn.RunConfig( gpu_memory_fraction=gpu_memory, save_checkpoints_secs=30, ), model_dir='./model') validation_metrics = { "accuracy": tf.contrib.learn.MetricSpec( metric_fn=tf.contrib.metrics.streaming_accuracy, prediction_key='predictions', weight_key='labeled_mask') } monitor = tf.contrib.learn.monitors.ValidationMonitor( input_fn=lambda: SlotFilling.input_fn( validation_set, unlabeled_set, validation_set.size(), 1), eval_steps=1, every_n_steps=50, metrics=validation_metrics, early_stopping_metric="loss", early_stopping_metric_minimize=True, early_stopping_rounds=300) classifier.fit(input_fn=lambda: SlotFilling.input_fn( training_set, unlabeled_set, training_set.size(), 500), monitors=[monitor], steps=steps) predictions = classifier.predict(input_fn=lambda: SlotFilling.input_fn( test_set, unlabeled_set, test_set.size(), 1)) slot_correct = 0 slot_no_match = 0 slot_mismatch = 0 slot_over_match = 0 for i, p in enumerate(predictions): target = test_set.labels()[i][:test_set.lengths()[i]] prediction = list(p['predictions'])[:test_set.lengths()[i]] for expected, actual in zip(target, prediction): actual = int(actual) if expected is actual: slot_correct += 1 elif test_set.get_slot(actual) is 'o': slot_no_match += 1 elif test_set.get_slot(expected) is 'o': slot_over_match += 1 else: slot_mismatch += 1 return { 'accuracy': slot_correct / sum(test_set.lengths()), 'correct': slot_correct, 'no_match': slot_no_match, 'mismatch': slot_mismatch, 'over_match': slot_over_match, }
def load_data(self, imu_file_name: str, att_file_name: str): '''read pixhawk log file''' self._data_set = DataSet(imu_file_name, att_file_name) self._data_set.load_imu_data()