Esempio n. 1
0
    def __init__(self):
        # Load the parameters
        args = EvaluatePointConfig()
        json_path = os.path.join(args.model_dir, 'params.json')
        assert os.path.isfile(
            json_path), "No json configuration file found at {}".format(
                json_path)
        params = Params(json_path)
        if params.mlp_sizes is None or len(params.mlp_sizes) == 0:
            logging.error(
                'mlp_sizes are not set correctly, at least one MLP layer is required'
            )
        params.dict['loss_fn'] = args.loss_fn

        # Load the parameters from the dataset, that gives the size etc. into params
        json_path = os.path.join(args.data_dir, 'dataset_params.json')
        assert os.path.isfile(
            json_path), "No json file found at {}, run build.py".format(
                json_path)
        params.update(json_path)
        # Set the logger
        set_logger(os.path.join(args.model_dir, 'evaluate.log'))
        # # Get paths for tfrecords
        path_eval_tfrecords = os.path.join(args.data_dir,
                                           'test_' + args.tfrecords_filename)
        # Create the input data pipeline
        logging.info("Creating the dataset...")
        eval_dataset = load_dataset_from_tfrecords(path_eval_tfrecords)
        # Create iterator over the test set
        # eval_inputs = input_fn('test', eval_dataset, params)
        eval_inputs = online_input_fn()
        logging.info("- done.")
        # print(type(eval_inputs))

        # Define the model
        logging.info("Creating the model...")
        weak_learner_id = load_best_ndcgs(
            os.path.join(args.model_dir, args.restore_from, 'learner.json'))[0]
        self.model_spec = model_fn('test',
                                   eval_inputs,
                                   params,
                                   reuse=False,
                                   weak_learner_id=int(weak_learner_id))
        # node_names = [n.name for n in tf.get_default_graph().as_graph_def().node]
        # print(node_names)
        logging.info("- done.")
        logging.info("Starting evaluation")
        logging.info("Optimized using {} learners".format(weak_learner_id))
        self.saver = tf.train.Saver()
        self.sess = tf.Session()
        self.params = params
        self.sess.run(self.model_spec['variable_init_op'])
        save_path = os.path.join(args.model_dir, args.restore_from)
        if os.path.isdir(save_path):
            save_path = tf.train.latest_checkpoint(save_path)
        self.saver.restore(self.sess, save_path)
Esempio n. 2
0
    path_train_tfrecords = os.path.join(args.data_dir,
                                        'train_' + args.tfrecords_filename)
    path_eval_tfrecords = os.path.join(args.data_dir,
                                       'eval_' + args.tfrecords_filename)

    # Create the input data pipeline
    logging.info("Creating the datasets...")
    train_dataset = load_dataset_from_tfrecords(path_train_tfrecords)
    eval_dataset = load_dataset_from_tfrecords(path_eval_tfrecords)

    # Specify other parameters for the dataset and the model

    # Create the two iterators over the two datasets
    train_inputs = input_fn('train', train_dataset, params)
    eval_inputs = input_fn('vali', eval_dataset, params)
    logging.info("- done.")

    # Define the models (2 different set of nodes that share weights for train and validation)
    logging.info("Creating the model...")
    train_model_spec = model_fn('train', train_inputs, params)
    eval_model_spec = model_fn('vali', eval_inputs, params, reuse=True)
    logging.info("- done.")

    # Train the model
    # log tim
    # start_time = time.time()
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
                       params, args.restore_dir)
    # print("--- %s seconds ---" % (time.time() - start_time))
    params = Params(json_path)
    params.dict['loss_fn'] = args.loss_fn
    params.dict['collect'] = False
    params.dict['use_kfac'] = args.use_kfac
    params.dict['finetune'] = args.finetune    
    params.dict['training_keep_prob'] = 1.0
    # Load the parameters from the dataset, that gives the size etc. into params
    json_path = os.path.join(args.data_dir, 'dataset_params.json')
    assert os.path.isfile(json_path), "No json file found at {}, run build.py".format(json_path)
    params.update(json_path)
    # Set the logger
    set_logger(os.path.join(args.model_dir, 'test{}.log'.format(args.log)))
    # # Get paths for tfrecords
    dataset = 'test'
    path_eval_tfrecords = os.path.join(args.data_dir, dataset + args.tfrecords_filename)
    # Create the input data pipeline
    logging.info("Creating the dataset...")
    eval_dataset = load_dataset_from_tfrecords(path_eval_tfrecords)
    # Create iterator over the test set
    eval_inputs = input_fn('test', eval_dataset, params)
    logging.info("- done.")
    # Define the model
    logging.info("Creating the model...")
    # weak_learner_id = load_learner_id(os.path.join(args.model_dir, args.restore_from, 'learner.json'))[0]
    eval_model_spec = model_fn('test', eval_inputs, params, reuse=False)
    # node_names = [n.name for n in tf.get_default_graph().as_graph_def().node]
    # print(node_names)
    logging.info("- done.")
    logging.info("Starting evaluation")
    evaluate(eval_model_spec, args.model_dir, params, args.restore_from)
Esempio n. 4
0
            path_eval_tfrecords = os.path.join(
                args.data_dir, 'validation' + args.tfrecords_filename)
            print(path_train_tfrecords)
            # Create the input data pipeline
            logging.info("Creating the datasets...")
            train_dataset = load_dataset_from_tfrecords(
                glob.glob(path_train_tfrecords))
            eval_dataset = load_dataset_from_tfrecords(path_eval_tfrecords)
        # Specify other parameters for the dataset and the model
        # Create the two iterators over the two datasets
        train_inputs = input_fn('train', train_dataset, params)
        eval_inputs = input_fn('vali', eval_dataset, params)
        logging.info("- done.")
        # Define the models (2 different set of nodes that share weights for train and validation)
        logging.info("Creating the model...")
        train_model_spec = model_fn('train', train_inputs, params)
        eval_model_spec = model_fn('vali', eval_inputs, params, reuse=True)
        logging.info("- done.")

        logging.info(
            "Starting training for at most {} epoch(s) for the initial learner"
            .format(params.num_epochs))
        start_time = time.time()
        global_epoch = train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir, params, \
            learner_id=0, restore_from=args.restore_dir)
        logging.info(
            "global_epoch: {} epoch(s) at learner 0".format(global_epoch))
        logging.info("total time: %s seconds ---" % (time.time() - start_time))
        # start gradient boosting
        last_global_epoch = global_epoch
    if (params.num_learners > 1):
                                               args.train_range)
    # print(params.dict)
    global_epoch = 0
    args.restore_dir = 'best_weights'
    path_train_tfrecords = os.path.join(
        args.data_dir,
        'train-{}'.format(args.train_range) + args.tfrecords_filename)
    # Create the input data pipeline
    logging.info("Creating the datasets...")
    train_dataset = load_dataset_from_tfrecords(
        glob.glob(path_train_tfrecords))
    #########################################################
    params.dict['training_keep_prob'] = 1.0
    start_time = time.time()
    train_dataset = load_dataset_from_tfrecords(
        glob.glob(path_train_tfrecords))
    # Specify other parameters for the dataset and the model
    # Create the two iterators over the two datasets
    train_inputs = input_fn('vali', train_dataset, params)
    evaluate_on_train_model_spec = model_fn('vali',
                                            train_inputs,
                                            params,
                                            reuse=True)
    logging.info("- done.")
    args.restore_dir = 'best_weights'
    global_epoch = evaluate_on_train(evaluate_on_train_model_spec,
        args.model_dir, params, restore_from=args.restore_dir,\
        global_epoch=global_epoch)
    logging.info("global_epoch: {} epoch(s)".format(global_epoch))
    logging.info("total time: %s seconds ---" % (time.time() - start_time))
Esempio n. 6
0
        params.dict['test_size'] = params.dict['test_size'] * 2
        print('USING both Tests')
        # logging.info("test size: {}".format(params.test_size))
        dataset += '*'
        path_eval_tfrecords = os.path.join(args.data_dir, dataset + args.tfrecords_filename) 
        # Create the input data pipeline
        logging.info("Creating the dataset...")
        eval_dataset = load_dataset_from_tfrecords(glob.glob(path_eval_tfrecords))          
    else:
        if args.aug:
            print('USING augmented TEST')
            dataset += '_aug'    
        path_eval_tfrecords = os.path.join(args.data_dir, dataset + args.tfrecords_filename)
        # Create the input data pipeline
        logging.info("Creating the dataset...")
        eval_dataset = load_dataset_from_tfrecords(path_eval_tfrecords)
    # Create iterator over the test set
    eval_inputs = input_fn('test', eval_dataset, params)
    logging.info("- done.")
    # Define the model
    logging.info("Creating the model...")
    weak_learner_id = load_learner_id(os.path.join(args.model_dir, args.restore_from, 'learner.json'))[0]
    eval_model_spec = model_fn('test', eval_inputs, params, reuse=False, \
        weak_learner_id=int(weak_learner_id))
    # node_names = [n.name for n in tf.get_default_graph().as_graph_def().node]
    # print(node_names)
    logging.info("- done.")
    logging.info("Starting evaluation")
    logging.info("Optimized using {} learners".format(weak_learner_id))
    evaluate(eval_model_spec, args.model_dir, params, args.restore_from)