Example #1
0
def main():
    tf.set_random_seed(FLAGS.random_seed)
    np.random.seed(FLAGS.random_seed)
    random.seed(FLAGS.random_seed)
    # Build up environment to prevent segfault
    if not FLAGS.train:
        if 'reach' in FLAGS.experiment:
            env = gym.make('ReacherMILTest-v1')
            ob = env.reset()
            # import pdb; pdb.set_trace()
    # setup session
    graph = tf.Graph()
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    tf_config = tf.ConfigProto(gpu_options=gpu_options)
    sess = tf.Session(graph=graph, config=tf_config)
    network_config = {
        'num_filters': [FLAGS.num_filters]*FLAGS.num_conv_layers,
        'strides': [[1, 2, 2, 1]]*FLAGS.num_strides + [[1, 1, 1, 1]]*(FLAGS.num_conv_layers-FLAGS.num_strides),
        'filter_size': FLAGS.filter_size,
        'image_width': FLAGS.im_width,
        'image_height': FLAGS.im_height,
        'image_channels': FLAGS.num_channels,
        'n_layers': FLAGS.num_fc_layers,
        'layer_size': FLAGS.layer_size,
        'initialization': FLAGS.init,
    }
    # generate expert demonstrations
    data_generator = DataGenerator()
    state_idx = data_generator.state_idx
    img_idx = range(len(state_idx), len(state_idx)+FLAGS.im_height*FLAGS.im_width*FLAGS.num_channels)
    # need to compute x_idx and img_idx from data_generator
    
    memory_size = (FLAGS.T * FLAGS.meta_batch_size * FLAGS.num_updates
    if FLAGS.memory_size is None else FLAGS.memory_size)
    vocab_size = (len(state_idx+img_idx)) * FLAGS.update_batch_size # dim input = sum of idxs
    model = MIL_LRRE(data_generator._dU, FLAGS.rep_dim, memory_size, vocab_size,
        use_lsh=FLAGS.use_lsh, state_idx=state_idx, img_idx=img_idx, network_config=network_config, graph=graph)
    # TODO: figure out how to save summaries and checkpoints
    exp_string = FLAGS.experiment+ '.' + FLAGS.init + '_init.' + str(FLAGS.num_conv_layers) + '_conv' + '.' + str(FLAGS.num_strides) + '_strides' + '.' + str(FLAGS.num_filters) + '_filters' + \
                '.' + str(FLAGS.num_fc_layers) + '_fc' + '.' + str(FLAGS.layer_size) + '_dim' + '.bt_dim_' + str(FLAGS.bt_dim) + '.mbs_'+str(FLAGS.meta_batch_size) + \
                '.ubs_' + str(FLAGS.update_batch_size) + '.numstep_' + str(FLAGS.num_updates) + '.updatelr_' + str(FLAGS.train_update_lr)

    if FLAGS.clip:
        exp_string += '.clip_' + str(int(FLAGS.clip_max))
    if FLAGS.conv_bt:
        exp_string += '.conv_bt'
    if FLAGS.all_fc_bt:
        exp_string += '.all_fc_bt'
    if FLAGS.fp:
        exp_string += '.fp'
    if FLAGS.learn_final_eept:
        exp_string += '.learn_ee_pos'
    if FLAGS.no_action:
        exp_string += '.no_action'
    if FLAGS.zero_state:
        exp_string += '.zero_state'
    if FLAGS.two_head:
        exp_string += '.two_heads'
    if FLAGS.two_arms:
        exp_string += '.two_arms'
    if FLAGS.training_set_size != -1:
        exp_string += '.' + str(FLAGS.training_set_size) + '_trials'
    date_time = datetime.today().strftime('%Y%m%d_%H%M%S')
    log_dir = FLAGS.log_dir + '/' + str(date_time) + '_' + exp_string + '_lrre'

    # put here for now
    if FLAGS.train:
        data_generator.generate_batches(noisy=FLAGS.use_noisy_demos)
        with graph.as_default():
            train_image_tensors = data_generator.make_batch_tensor(network_config, restore_iter=FLAGS.restore_iter)
            inputa = train_image_tensors[:, :FLAGS.update_batch_size*FLAGS.T, :]
            inputb = train_image_tensors[:, FLAGS.update_batch_size*FLAGS.T:, :]
            train_input_tensors = {'inputa': inputa, 'inputb': inputb}
            val_image_tensors = data_generator.make_batch_tensor(network_config, restore_iter=FLAGS.restore_iter, train=False)
            inputa = val_image_tensors[:, :FLAGS.update_batch_size*FLAGS.T, :]
            inputb = val_image_tensors[:, FLAGS.update_batch_size*FLAGS.T:, :]
            val_input_tensors = {'inputa': inputa, 'inputb': inputb}
        model.init_network(graph, input_tensors=train_input_tensors, restore_iter=FLAGS.restore_iter)
        model.init_network(graph, input_tensors=val_input_tensors, restore_iter=FLAGS.restore_iter, prefix='Validation_')
    else:
        model.init_network(graph, prefix='Testing')
    with graph.as_default():
        # Set up saver.
        saver = tf.train.Saver(max_to_keep=10)
        # Initialize variables.
        init_op = tf.global_variables_initializer()
        sess.run(init_op, feed_dict=None)
        # Start queue runners (used for loading videos on the fly)
        tf.train.start_queue_runners(sess=sess)
    if FLAGS.resume:
        if not os.path.exists(FLAGS.lrre_log_dir):
            model_file = tf.train.latest_checkpoint(log_dir)
        else:
            model_file = tf.train.latest_checkpoint(FLAGS.lrre_log_dir)

        #model_file = tf.train.latest_checkpoint(log_dir)
        if FLAGS.restore_iter > 0:
            model_file = model_file[:model_file.index('model')] + 'model_' + str(FLAGS.restore_iter)
        if model_file:
            ind1 = model_file.index('model')
            resume_itr = int(model_file[ind1+6:])
            print("Restoring model weights from " + model_file)
            with graph.as_default():
                saver.restore(sess, model_file)
    if FLAGS.train:
        train(graph, model, saver, sess, data_generator, log_dir, restore_itr=FLAGS.restore_iter)
        
    else:
        if 'reach' in FLAGS.experiment:
            generate_test_demos(data_generator)
            evaluate_vision_reach(env, graph, model, data_generator, sess, exp_string, FLAGS.record_gifs, log_dir)
        elif 'push' in FLAGS.experiment:
            evaluate_push(sess, graph, model, data_generator, exp_string, log_dir, FLAGS.demo_file + '/', save_video=FLAGS.record_gifs)
        else:
            raise NotImplementedError
def main():
    tf.set_random_seed(FLAGS.random_seed)
    np.random.seed(FLAGS.random_seed)
    random.seed(FLAGS.random_seed)
    # Build up environment to prevent segfault
    if not FLAGS.train:
        if 'reach' in FLAGS.experiment:
            env = gym.make('ReacherMILTest-v1')
            ob = env.reset()
            # import pdb; pdb.set_trace()
    graph = tf.Graph()
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    tf_config = tf.ConfigProto(gpu_options=gpu_options)
    tf_config.gpu_options.allow_growth = True
    sess = tf.Session(graph=graph, config=tf_config)
    network_config = {
        'num_filters': [FLAGS.num_filters] * FLAGS.num_conv_layers,
        'strides': [[1, 2, 2, 1]] * FLAGS.num_strides + [[1, 1, 1, 1]] *
        (FLAGS.num_conv_layers - FLAGS.num_strides),
        'filter_size':
        FLAGS.filter_size,
        'image_width':
        FLAGS.im_width,
        'image_height':
        FLAGS.im_height,
        'image_channels':
        FLAGS.num_channels,
        'n_layers':
        FLAGS.num_fc_layers,
        'layer_size':
        FLAGS.layer_size,
        'initialization':
        FLAGS.init,
    }
    data_generator = DataGenerator()
    state_idx = data_generator.state_idx
    img_idx = range(
        len(state_idx),
        len(state_idx) + FLAGS.im_height * FLAGS.im_width * FLAGS.num_channels)
    # need to compute x_idx and img_idx from data_generator
    model = MIL(data_generator._dU,
                state_idx=state_idx,
                img_idx=img_idx,
                network_config=network_config)
    # TODO: figure out how to save summaries and checkpoints
    exp_string = FLAGS.experiment+ '.' + FLAGS.init + '_init.' + str(FLAGS.num_conv_layers) + '_conv' + '.' + str(FLAGS.num_strides) + '_strides' + '.' + str(FLAGS.num_filters) + '_filters' + \
                '.' + str(FLAGS.num_fc_layers) + '_fc' + '.' + str(FLAGS.layer_size) + '_dim' + '.bt_dim_' + str(FLAGS.bt_dim) + '.mbs_'+str(FLAGS.meta_batch_size) + \
                '.ubs_' + str(FLAGS.update_batch_size) + '.numstep_' + str(FLAGS.num_updates) + '.updatelr_' + str(FLAGS.train_update_lr)

    if FLAGS.clip:
        exp_string += '.clip_' + str(int(FLAGS.clip_max))
    if FLAGS.conv_bt:
        exp_string += '.conv_bt'
    if FLAGS.all_fc_bt:
        exp_string += '.all_fc_bt'
    if FLAGS.fp:
        exp_string += '.fp'
    if FLAGS.learn_final_eept:
        exp_string += '.learn_ee_pos'
    if FLAGS.no_action:
        exp_string += '.no_action'
    if FLAGS.zero_state:
        exp_string += '.zero_state'
    if FLAGS.two_head:
        exp_string += '.two_heads'
    if FLAGS.two_arms:
        exp_string += '.two_arms'
    if FLAGS.temporal_conv_2_head:
        exp_string += '.1d_conv_act_' + str(
            FLAGS.temporal_num_layers) + '_' + str(FLAGS.temporal_num_filters)
        if FLAGS.temporal_conv_2_head_ee:
            exp_string += '_ee_' + str(
                FLAGS.temporal_num_layers_ee) + '_' + str(
                    FLAGS.temporal_num_filters_ee)
        exp_string += '_' + str(FLAGS.temporal_filter_size) + 'x1_filters'
    if FLAGS.training_set_size != -1:
        exp_string += '.' + str(FLAGS.training_set_size) + '_trials'

    log_dir = FLAGS.log_dirs + '/' + exp_string

    # put here for now
    if FLAGS.train:
        data_generator.generate_batches(noisy=FLAGS.use_noisy_demos)
        with graph.as_default():
            # train_image_tensors = data_generator.make_batch_tensor(network_config, restore_iter=FLAGS.restore_iter)
            train_image_tensors = data_generator.make_compare_batch_tensor(
                network_config, restore_iter=FLAGS.restore_iter)

            inputa = train_image_tensors[:, :FLAGS.update_batch_size *
                                         FLAGS.T, :]
            inputb = train_image_tensors[:, FLAGS.update_batch_size *
                                         FLAGS.T:(FLAGS.update_batch_size +
                                                  1) * FLAGS.T, :]
            inputc = train_image_tensors[:, (FLAGS.update_batch_size + 1) *
                                         FLAGS.T:, :]

            # train_input_tensors = {'inputa': inputa, 'inputb': inputb}
            train_input_tensors = {
                'inputa': inputa,
                'inputb': inputb,
                'inputc': inputc
            }

            # val_image_tensors = data_generator.make_batch_tensor(network_config, restore_iter=FLAGS.restore_iter, train=False)
            # inputa = val_image_tensors[:, :FLAGS.update_batch_size*FLAGS.T, :]
            # inputb = val_image_tensors[:, FLAGS.update_batch_size*FLAGS.T:, :]
            # val_input_tensors = {'inputa': inputa, 'inputb': inputb}
        model.init_network(graph,
                           input_tensors=train_input_tensors,
                           restore_iter=FLAGS.restore_iter)
        # model.init_network(graph, input_tensors=val_input_tensors, restore_iter=FLAGS.restore_iter, prefix='Validation_')
    else:
        model.init_network(graph, prefix='Testing')
    with graph.as_default():
        # Set up saver.
        saver = tf.train.Saver(max_to_keep=10)
        # Initialize variables.
        init_op = tf.global_variables_initializer()
        sess.run(init_op, feed_dict=None)
        # Start queue runners (used for loading videos on the fly)
        tf.train.start_queue_runners(sess=sess)

    if FLAGS.resume:
        model_file = tf.train.latest_checkpoint(log_dir)
        if FLAGS.restore_iter > 0:
            model_file = model_file[:model_file.index('model'
                                                      )] + 'model_' + str(
                                                          FLAGS.restore_iter)
        if model_file:
            ind1 = model_file.index('model')
            resume_itr = int(model_file[ind1 + 6:])
            print("Restoring model weights from " + model_file)
            with graph.as_default():
                saver.restore(sess, model_file)
    if FLAGS.train:
        train(graph,
              model,
              saver,
              sess,
              data_generator,
              log_dir,
              restore_itr=FLAGS.restore_iter)

    else:
        model_file = tf.train.latest_checkpoint(log_dir)
        if (FLAGS.begin_restore_iter != FLAGS.end_restore_iter):
            iter_index = FLAGS.begin_restore_iter
            while iter_index <= FLAGS.end_restore_iter:
                print('iter_index', iter_index)
                if FLAGS.restore_iter >= 0:
                    model_file = model_file[:model_file.index(
                        'model')] + 'model_' + str(iter_index)
                if model_file:
                    ind1 = model_file.index('model')
                    resume_itr = int(model_file[ind1 + 6:])
                    print("Restoring model weights from " + model_file)
                    # saver = tf.train.Saver()
                    saver.restore(sess, model_file)
                if 'reach' in FLAGS.experiment:
                    env = gym.make('ReacherMILTest-v1')
                    env.reset()
                    generate_test_demos(data_generator)
                    evaluate_vision_reach(env, graph, model, data_generator,
                                          sess, exp_string, FLAGS.record_gifs,
                                          log_dir)

                    # evaluate_rl_vision_reach(graph, data_generator, sess, exp_string, FLAGS.record_gifs, log_dirs)
                elif 'push' in FLAGS.experiment:
                    evaluate_push(sess,
                                  graph,
                                  model,
                                  data_generator,
                                  exp_string,
                                  log_dir,
                                  FLAGS.demo_file + '/',
                                  save_video=FLAGS.record_gifs)
                iter_index += 100
        else:
            if FLAGS.restore_iter > 0:
                model_file = model_file[:model_file.
                                        index('model')] + 'model_' + str(
                                            FLAGS.restore_iter)
            if model_file:
                ind1 = model_file.index('model')
                resume_itr = int(model_file[ind1 + 6:])
                print("Restoring model weights from " + model_file)
                # saver = tf.train.Saver()
                saver.restore(sess, model_file)
            if 'reach' in FLAGS.experiment:
                env = gym.make('ReacherMILTest-v1')
                env.reset()
                generate_test_demos(data_generator)
                evaluate_vision_reach(env, graph, model, data_generator, sess,
                                      exp_string, FLAGS.record_gifs, log_dir)
                # evaluate_vision_reach(env, graph, data_generator, sess, exp_string, FLAGS.record_gifs, log_dir)
                # evaluate_rl_vision_reach(graph, data_generator, sess, exp_string, FLAGS.record_gifs, log_dirs)
            elif 'push' in FLAGS.experiment:
                evaluate_push(sess,
                              graph,
                              model,
                              data_generator,
                              exp_string,
                              log_dir,
                              FLAGS.demo_file + '/',
                              save_video=FLAGS.record_gifs)
Example #3
0
def main():
    print('STARTING MAIN')
    tf.set_random_seed(FLAGS.random_seed)
    np.random.seed(FLAGS.random_seed)
    random.seed(FLAGS.random_seed)
    # Build up environment to prevent segfault
    if not FLAGS.train:
        if 'reach' in FLAGS.experiment:
            env = gym.make('ReacherMILTest-v1')
            ob = env.reset()
            # import pdb; pdb.set_trace()
    # setup session
    print('MAKING SESS')
    graph = tf.Graph()
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
    tf_config = tf.ConfigProto(gpu_options=gpu_options)
    sess = tf.Session(graph=graph, config=tf_config)
    #sess = tf_debug.LocalCLIDebugWrapperSession(sess)
    print('MADE SESS')
    network_config = {
        'num_filters': [FLAGS.num_filters] * FLAGS.num_conv_layers,
        'strides': [[1, 2, 2, 1]] * FLAGS.num_strides + [[1, 1, 1, 1]] *
        (FLAGS.num_conv_layers - FLAGS.num_strides),
        'filter_size':
        FLAGS.filter_size,
        'image_width':
        FLAGS.im_width,
        'image_height':
        FLAGS.im_height,
        'image_channels':
        FLAGS.num_channels,
        'n_layers':
        FLAGS.num_fc_layers,
        'layer_size':
        FLAGS.layer_size,
        'initialization':
        FLAGS.init,
    }
    # generate expert demonstrations
    img_idx = range(20, 20 + 125 * 125 * 3)
    # need to compute x_idx and img_idx from data_generator
    model = MIL(7,
                state_idx=range(20),
                img_idx=img_idx,
                network_config=network_config)
    # TODO: figure out how to save summaries and checkpoints
    exp_string = FLAGS.experiment+ '.' + FLAGS.init + '_init.' + str(FLAGS.num_conv_layers) + '_conv' + '.' + str(FLAGS.num_strides) + '_strides' + '.' + str(FLAGS.num_filters) + '_filters' + \
                '.' + str(FLAGS.num_fc_layers) + '_fc' + '.' + str(FLAGS.layer_size) + '_dim' + '.bt_dim_' + str(FLAGS.bt_dim) + '.mbs_'+str(FLAGS.meta_batch_size) + \
                '.ubs_' + str(FLAGS.update_batch_size) + '.numstep_' + str(FLAGS.num_updates) + '.updatelr_' + str(FLAGS.train_update_lr)

    if FLAGS.clip:
        exp_string += '.clip_' + str(int(FLAGS.clip_max))
    if FLAGS.conv_bt:
        exp_string += '.conv_bt'
    if FLAGS.all_fc_bt:
        exp_string += '.all_fc_bt'
    if FLAGS.fp:
        exp_string += '.fp'
    if FLAGS.learn_final_eept:
        exp_string += '.learn_ee_pos'
    if FLAGS.no_action:
        exp_string += '.no_action'
    if FLAGS.zero_state:
        exp_string += '.zero_state'
    if FLAGS.two_head:
        exp_string += '.two_heads'
    if FLAGS.two_arms:
        exp_string += '.two_arms'
    if FLAGS.training_set_size != -1:
        exp_string += '.' + str(FLAGS.training_set_size) + '_trials'

    reptile_exp_string =  FLAGS.experiment + '.' + '_num_shots.' + str(FLAGS.num_shots_reptile) + '_inner_iters.' + str(FLAGS.inner_iters_reptile) + \
                          '_meta_batch_size.'   + str(FLAGS.meta_batch_size_reptile) + '_meta_step_size.' + str(FLAGS.meta_step_size_reptile) + \
                          '_num_classes.'       + str(FLAGS.num_classes_reptile) + '_reptile_iterations.' + str(FLAGS.reptile_iterations)
    date_time = datetime.today().strftime('%Y%m%d_%H%M%S')
    log_dir = FLAGS.log_dir + '/' + str(
        date_time) + '_' + reptile_exp_string + '_reptile_new_data'

    # put here for now
    if FLAGS.train:
        print('FLAGS.use_noisy_demos:', FLAGS.use_noisy_demos)
        model.init_network(graph, restore_iter=FLAGS.restore_iter)
        #model.init_network(graph, input_tensors=val_input_tensors, restore_iter=FLAGS.restore_iter, prefix='Validation_')
    else:
        model.init_network(graph, prefix='Testing')
    with graph.as_default():
        # Set up saver.
        saver = tf.train.Saver(max_to_keep=3)  # TODO: change later
        # Initialize variables.
        init_op = tf.global_variables_initializer()
        sess.run(init_op, feed_dict=None)
    if FLAGS.resume:
        if not os.path.exists(FLAGS.reptile_log_dir):
            model_file = tf.train.latest_checkpoint(log_dir)
        else:
            model_file = tf.train.latest_checkpoint(FLAGS.reptile_log_dir)
        if FLAGS.restore_iter > 0:
            model_file = model_file[:model_file.index('model'
                                                      )] + 'model_' + str(
                                                          FLAGS.restore_iter)
        if model_file:
            ind1 = model_file.index('model')
            resume_itr = int(model_file[ind1 + 6:])
            print("Restoring model weights from " + model_file)
            with graph.as_default():
                saver.restore(sess, model_file)
    if FLAGS.train:
        train(graph,
              model,
              saver,
              sess,
              log_dir,
              restore_itr=FLAGS.restore_iter,
              network_config=network_config)
    else:
        data_generator = DataGenerator()
        if 'reach' in FLAGS.experiment:
            generate_test_demos(data_generator)
            evaluate_vision_reach(env, graph, model, data_generator, sess,
                                  exp_string, FLAGS.record_gifs, log_dir)
        elif 'push' in FLAGS.experiment:
            evaluate_push(sess,
                          graph,
                          model,
                          data_generator,
                          exp_string,
                          log_dir,
                          FLAGS.demo_file + '/',
                          save_video=FLAGS.record_gifs)
        else:
            raise NotImplementedError