def main(): tf.set_random_seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) random.seed(FLAGS.random_seed) # Build up environment to prevent segfault if not FLAGS.train: if 'reach' in FLAGS.experiment: env = gym.make('ReacherMILTest-v1') ob = env.reset() # import pdb; pdb.set_trace() # setup session graph = tf.Graph() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) tf_config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(graph=graph, config=tf_config) network_config = { 'num_filters': [FLAGS.num_filters]*FLAGS.num_conv_layers, 'strides': [[1, 2, 2, 1]]*FLAGS.num_strides + [[1, 1, 1, 1]]*(FLAGS.num_conv_layers-FLAGS.num_strides), 'filter_size': FLAGS.filter_size, 'image_width': FLAGS.im_width, 'image_height': FLAGS.im_height, 'image_channels': FLAGS.num_channels, 'n_layers': FLAGS.num_fc_layers, 'layer_size': FLAGS.layer_size, 'initialization': FLAGS.init, } # generate expert demonstrations data_generator = DataGenerator() state_idx = data_generator.state_idx img_idx = range(len(state_idx), len(state_idx)+FLAGS.im_height*FLAGS.im_width*FLAGS.num_channels) # need to compute x_idx and img_idx from data_generator memory_size = (FLAGS.T * FLAGS.meta_batch_size * FLAGS.num_updates if FLAGS.memory_size is None else FLAGS.memory_size) vocab_size = (len(state_idx+img_idx)) * FLAGS.update_batch_size # dim input = sum of idxs model = MIL_LRRE(data_generator._dU, FLAGS.rep_dim, memory_size, vocab_size, use_lsh=FLAGS.use_lsh, state_idx=state_idx, img_idx=img_idx, network_config=network_config, graph=graph) # TODO: figure out how to save summaries and checkpoints exp_string = FLAGS.experiment+ '.' + FLAGS.init + '_init.' + str(FLAGS.num_conv_layers) + '_conv' + '.' + str(FLAGS.num_strides) + '_strides' + '.' + str(FLAGS.num_filters) + '_filters' + \ '.' + str(FLAGS.num_fc_layers) + '_fc' + '.' + str(FLAGS.layer_size) + '_dim' + '.bt_dim_' + str(FLAGS.bt_dim) + '.mbs_'+str(FLAGS.meta_batch_size) + \ '.ubs_' + str(FLAGS.update_batch_size) + '.numstep_' + str(FLAGS.num_updates) + '.updatelr_' + str(FLAGS.train_update_lr) if FLAGS.clip: exp_string += '.clip_' + str(int(FLAGS.clip_max)) if FLAGS.conv_bt: exp_string += '.conv_bt' if FLAGS.all_fc_bt: exp_string += '.all_fc_bt' if FLAGS.fp: exp_string += '.fp' if FLAGS.learn_final_eept: exp_string += '.learn_ee_pos' if FLAGS.no_action: exp_string += '.no_action' if FLAGS.zero_state: exp_string += '.zero_state' if FLAGS.two_head: exp_string += '.two_heads' if FLAGS.two_arms: exp_string += '.two_arms' if FLAGS.training_set_size != -1: exp_string += '.' + str(FLAGS.training_set_size) + '_trials' date_time = datetime.today().strftime('%Y%m%d_%H%M%S') log_dir = FLAGS.log_dir + '/' + str(date_time) + '_' + exp_string + '_lrre' # put here for now if FLAGS.train: data_generator.generate_batches(noisy=FLAGS.use_noisy_demos) with graph.as_default(): train_image_tensors = data_generator.make_batch_tensor(network_config, restore_iter=FLAGS.restore_iter) inputa = train_image_tensors[:, :FLAGS.update_batch_size*FLAGS.T, :] inputb = train_image_tensors[:, FLAGS.update_batch_size*FLAGS.T:, :] train_input_tensors = {'inputa': inputa, 'inputb': inputb} val_image_tensors = data_generator.make_batch_tensor(network_config, restore_iter=FLAGS.restore_iter, train=False) inputa = val_image_tensors[:, :FLAGS.update_batch_size*FLAGS.T, :] inputb = val_image_tensors[:, FLAGS.update_batch_size*FLAGS.T:, :] val_input_tensors = {'inputa': inputa, 'inputb': inputb} model.init_network(graph, input_tensors=train_input_tensors, restore_iter=FLAGS.restore_iter) model.init_network(graph, input_tensors=val_input_tensors, restore_iter=FLAGS.restore_iter, prefix='Validation_') else: model.init_network(graph, prefix='Testing') with graph.as_default(): # Set up saver. saver = tf.train.Saver(max_to_keep=10) # Initialize variables. init_op = tf.global_variables_initializer() sess.run(init_op, feed_dict=None) # Start queue runners (used for loading videos on the fly) tf.train.start_queue_runners(sess=sess) if FLAGS.resume: if not os.path.exists(FLAGS.lrre_log_dir): model_file = tf.train.latest_checkpoint(log_dir) else: model_file = tf.train.latest_checkpoint(FLAGS.lrre_log_dir) #model_file = tf.train.latest_checkpoint(log_dir) if FLAGS.restore_iter > 0: model_file = model_file[:model_file.index('model')] + 'model_' + str(FLAGS.restore_iter) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1+6:]) print("Restoring model weights from " + model_file) with graph.as_default(): saver.restore(sess, model_file) if FLAGS.train: train(graph, model, saver, sess, data_generator, log_dir, restore_itr=FLAGS.restore_iter) else: if 'reach' in FLAGS.experiment: generate_test_demos(data_generator) evaluate_vision_reach(env, graph, model, data_generator, sess, exp_string, FLAGS.record_gifs, log_dir) elif 'push' in FLAGS.experiment: evaluate_push(sess, graph, model, data_generator, exp_string, log_dir, FLAGS.demo_file + '/', save_video=FLAGS.record_gifs) else: raise NotImplementedError
def main(): tf.set_random_seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) random.seed(FLAGS.random_seed) # Build up environment to prevent segfault if not FLAGS.train: if 'reach' in FLAGS.experiment: env = gym.make('ReacherMILTest-v1') ob = env.reset() # import pdb; pdb.set_trace() graph = tf.Graph() gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) tf_config = tf.ConfigProto(gpu_options=gpu_options) tf_config.gpu_options.allow_growth = True sess = tf.Session(graph=graph, config=tf_config) network_config = { 'num_filters': [FLAGS.num_filters] * FLAGS.num_conv_layers, 'strides': [[1, 2, 2, 1]] * FLAGS.num_strides + [[1, 1, 1, 1]] * (FLAGS.num_conv_layers - FLAGS.num_strides), 'filter_size': FLAGS.filter_size, 'image_width': FLAGS.im_width, 'image_height': FLAGS.im_height, 'image_channels': FLAGS.num_channels, 'n_layers': FLAGS.num_fc_layers, 'layer_size': FLAGS.layer_size, 'initialization': FLAGS.init, } data_generator = DataGenerator() state_idx = data_generator.state_idx img_idx = range( len(state_idx), len(state_idx) + FLAGS.im_height * FLAGS.im_width * FLAGS.num_channels) # need to compute x_idx and img_idx from data_generator model = MIL(data_generator._dU, state_idx=state_idx, img_idx=img_idx, network_config=network_config) # TODO: figure out how to save summaries and checkpoints exp_string = FLAGS.experiment+ '.' + FLAGS.init + '_init.' + str(FLAGS.num_conv_layers) + '_conv' + '.' + str(FLAGS.num_strides) + '_strides' + '.' + str(FLAGS.num_filters) + '_filters' + \ '.' + str(FLAGS.num_fc_layers) + '_fc' + '.' + str(FLAGS.layer_size) + '_dim' + '.bt_dim_' + str(FLAGS.bt_dim) + '.mbs_'+str(FLAGS.meta_batch_size) + \ '.ubs_' + str(FLAGS.update_batch_size) + '.numstep_' + str(FLAGS.num_updates) + '.updatelr_' + str(FLAGS.train_update_lr) if FLAGS.clip: exp_string += '.clip_' + str(int(FLAGS.clip_max)) if FLAGS.conv_bt: exp_string += '.conv_bt' if FLAGS.all_fc_bt: exp_string += '.all_fc_bt' if FLAGS.fp: exp_string += '.fp' if FLAGS.learn_final_eept: exp_string += '.learn_ee_pos' if FLAGS.no_action: exp_string += '.no_action' if FLAGS.zero_state: exp_string += '.zero_state' if FLAGS.two_head: exp_string += '.two_heads' if FLAGS.two_arms: exp_string += '.two_arms' if FLAGS.temporal_conv_2_head: exp_string += '.1d_conv_act_' + str( FLAGS.temporal_num_layers) + '_' + str(FLAGS.temporal_num_filters) if FLAGS.temporal_conv_2_head_ee: exp_string += '_ee_' + str( FLAGS.temporal_num_layers_ee) + '_' + str( FLAGS.temporal_num_filters_ee) exp_string += '_' + str(FLAGS.temporal_filter_size) + 'x1_filters' if FLAGS.training_set_size != -1: exp_string += '.' + str(FLAGS.training_set_size) + '_trials' log_dir = FLAGS.log_dirs + '/' + exp_string # put here for now if FLAGS.train: data_generator.generate_batches(noisy=FLAGS.use_noisy_demos) with graph.as_default(): # train_image_tensors = data_generator.make_batch_tensor(network_config, restore_iter=FLAGS.restore_iter) train_image_tensors = data_generator.make_compare_batch_tensor( network_config, restore_iter=FLAGS.restore_iter) inputa = train_image_tensors[:, :FLAGS.update_batch_size * FLAGS.T, :] inputb = train_image_tensors[:, FLAGS.update_batch_size * FLAGS.T:(FLAGS.update_batch_size + 1) * FLAGS.T, :] inputc = train_image_tensors[:, (FLAGS.update_batch_size + 1) * FLAGS.T:, :] # train_input_tensors = {'inputa': inputa, 'inputb': inputb} train_input_tensors = { 'inputa': inputa, 'inputb': inputb, 'inputc': inputc } # val_image_tensors = data_generator.make_batch_tensor(network_config, restore_iter=FLAGS.restore_iter, train=False) # inputa = val_image_tensors[:, :FLAGS.update_batch_size*FLAGS.T, :] # inputb = val_image_tensors[:, FLAGS.update_batch_size*FLAGS.T:, :] # val_input_tensors = {'inputa': inputa, 'inputb': inputb} model.init_network(graph, input_tensors=train_input_tensors, restore_iter=FLAGS.restore_iter) # model.init_network(graph, input_tensors=val_input_tensors, restore_iter=FLAGS.restore_iter, prefix='Validation_') else: model.init_network(graph, prefix='Testing') with graph.as_default(): # Set up saver. saver = tf.train.Saver(max_to_keep=10) # Initialize variables. init_op = tf.global_variables_initializer() sess.run(init_op, feed_dict=None) # Start queue runners (used for loading videos on the fly) tf.train.start_queue_runners(sess=sess) if FLAGS.resume: model_file = tf.train.latest_checkpoint(log_dir) if FLAGS.restore_iter > 0: model_file = model_file[:model_file.index('model' )] + 'model_' + str( FLAGS.restore_iter) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1 + 6:]) print("Restoring model weights from " + model_file) with graph.as_default(): saver.restore(sess, model_file) if FLAGS.train: train(graph, model, saver, sess, data_generator, log_dir, restore_itr=FLAGS.restore_iter) else: model_file = tf.train.latest_checkpoint(log_dir) if (FLAGS.begin_restore_iter != FLAGS.end_restore_iter): iter_index = FLAGS.begin_restore_iter while iter_index <= FLAGS.end_restore_iter: print('iter_index', iter_index) if FLAGS.restore_iter >= 0: model_file = model_file[:model_file.index( 'model')] + 'model_' + str(iter_index) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1 + 6:]) print("Restoring model weights from " + model_file) # saver = tf.train.Saver() saver.restore(sess, model_file) if 'reach' in FLAGS.experiment: env = gym.make('ReacherMILTest-v1') env.reset() generate_test_demos(data_generator) evaluate_vision_reach(env, graph, model, data_generator, sess, exp_string, FLAGS.record_gifs, log_dir) # evaluate_rl_vision_reach(graph, data_generator, sess, exp_string, FLAGS.record_gifs, log_dirs) elif 'push' in FLAGS.experiment: evaluate_push(sess, graph, model, data_generator, exp_string, log_dir, FLAGS.demo_file + '/', save_video=FLAGS.record_gifs) iter_index += 100 else: if FLAGS.restore_iter > 0: model_file = model_file[:model_file. index('model')] + 'model_' + str( FLAGS.restore_iter) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1 + 6:]) print("Restoring model weights from " + model_file) # saver = tf.train.Saver() saver.restore(sess, model_file) if 'reach' in FLAGS.experiment: env = gym.make('ReacherMILTest-v1') env.reset() generate_test_demos(data_generator) evaluate_vision_reach(env, graph, model, data_generator, sess, exp_string, FLAGS.record_gifs, log_dir) # evaluate_vision_reach(env, graph, data_generator, sess, exp_string, FLAGS.record_gifs, log_dir) # evaluate_rl_vision_reach(graph, data_generator, sess, exp_string, FLAGS.record_gifs, log_dirs) elif 'push' in FLAGS.experiment: evaluate_push(sess, graph, model, data_generator, exp_string, log_dir, FLAGS.demo_file + '/', save_video=FLAGS.record_gifs)
def main(): print('STARTING MAIN') tf.set_random_seed(FLAGS.random_seed) np.random.seed(FLAGS.random_seed) random.seed(FLAGS.random_seed) # Build up environment to prevent segfault if not FLAGS.train: if 'reach' in FLAGS.experiment: env = gym.make('ReacherMILTest-v1') ob = env.reset() # import pdb; pdb.set_trace() # setup session print('MAKING SESS') graph = tf.Graph() gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) tf_config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(graph=graph, config=tf_config) #sess = tf_debug.LocalCLIDebugWrapperSession(sess) print('MADE SESS') network_config = { 'num_filters': [FLAGS.num_filters] * FLAGS.num_conv_layers, 'strides': [[1, 2, 2, 1]] * FLAGS.num_strides + [[1, 1, 1, 1]] * (FLAGS.num_conv_layers - FLAGS.num_strides), 'filter_size': FLAGS.filter_size, 'image_width': FLAGS.im_width, 'image_height': FLAGS.im_height, 'image_channels': FLAGS.num_channels, 'n_layers': FLAGS.num_fc_layers, 'layer_size': FLAGS.layer_size, 'initialization': FLAGS.init, } # generate expert demonstrations img_idx = range(20, 20 + 125 * 125 * 3) # need to compute x_idx and img_idx from data_generator model = MIL(7, state_idx=range(20), img_idx=img_idx, network_config=network_config) # TODO: figure out how to save summaries and checkpoints exp_string = FLAGS.experiment+ '.' + FLAGS.init + '_init.' + str(FLAGS.num_conv_layers) + '_conv' + '.' + str(FLAGS.num_strides) + '_strides' + '.' + str(FLAGS.num_filters) + '_filters' + \ '.' + str(FLAGS.num_fc_layers) + '_fc' + '.' + str(FLAGS.layer_size) + '_dim' + '.bt_dim_' + str(FLAGS.bt_dim) + '.mbs_'+str(FLAGS.meta_batch_size) + \ '.ubs_' + str(FLAGS.update_batch_size) + '.numstep_' + str(FLAGS.num_updates) + '.updatelr_' + str(FLAGS.train_update_lr) if FLAGS.clip: exp_string += '.clip_' + str(int(FLAGS.clip_max)) if FLAGS.conv_bt: exp_string += '.conv_bt' if FLAGS.all_fc_bt: exp_string += '.all_fc_bt' if FLAGS.fp: exp_string += '.fp' if FLAGS.learn_final_eept: exp_string += '.learn_ee_pos' if FLAGS.no_action: exp_string += '.no_action' if FLAGS.zero_state: exp_string += '.zero_state' if FLAGS.two_head: exp_string += '.two_heads' if FLAGS.two_arms: exp_string += '.two_arms' if FLAGS.training_set_size != -1: exp_string += '.' + str(FLAGS.training_set_size) + '_trials' reptile_exp_string = FLAGS.experiment + '.' + '_num_shots.' + str(FLAGS.num_shots_reptile) + '_inner_iters.' + str(FLAGS.inner_iters_reptile) + \ '_meta_batch_size.' + str(FLAGS.meta_batch_size_reptile) + '_meta_step_size.' + str(FLAGS.meta_step_size_reptile) + \ '_num_classes.' + str(FLAGS.num_classes_reptile) + '_reptile_iterations.' + str(FLAGS.reptile_iterations) date_time = datetime.today().strftime('%Y%m%d_%H%M%S') log_dir = FLAGS.log_dir + '/' + str( date_time) + '_' + reptile_exp_string + '_reptile_new_data' # put here for now if FLAGS.train: print('FLAGS.use_noisy_demos:', FLAGS.use_noisy_demos) model.init_network(graph, restore_iter=FLAGS.restore_iter) #model.init_network(graph, input_tensors=val_input_tensors, restore_iter=FLAGS.restore_iter, prefix='Validation_') else: model.init_network(graph, prefix='Testing') with graph.as_default(): # Set up saver. saver = tf.train.Saver(max_to_keep=3) # TODO: change later # Initialize variables. init_op = tf.global_variables_initializer() sess.run(init_op, feed_dict=None) if FLAGS.resume: if not os.path.exists(FLAGS.reptile_log_dir): model_file = tf.train.latest_checkpoint(log_dir) else: model_file = tf.train.latest_checkpoint(FLAGS.reptile_log_dir) if FLAGS.restore_iter > 0: model_file = model_file[:model_file.index('model' )] + 'model_' + str( FLAGS.restore_iter) if model_file: ind1 = model_file.index('model') resume_itr = int(model_file[ind1 + 6:]) print("Restoring model weights from " + model_file) with graph.as_default(): saver.restore(sess, model_file) if FLAGS.train: train(graph, model, saver, sess, log_dir, restore_itr=FLAGS.restore_iter, network_config=network_config) else: data_generator = DataGenerator() if 'reach' in FLAGS.experiment: generate_test_demos(data_generator) evaluate_vision_reach(env, graph, model, data_generator, sess, exp_string, FLAGS.record_gifs, log_dir) elif 'push' in FLAGS.experiment: evaluate_push(sess, graph, model, data_generator, exp_string, log_dir, FLAGS.demo_file + '/', save_video=FLAGS.record_gifs) else: raise NotImplementedError