Ejemplo n.º 1
0
def _run(args, seed, print):
    MODEL_PATH = osp.join(ORACLE_CKPT_DIR, args.dataset, 'seed%d' % seed)
    test_rmses, test_llds = [], []
    BSZ, NBS = args.batch_size, args.n_base
    y_mean, y_std = None, None
    for i_point in range(0, args.active_iterations + 1): #TODO
        tf.reset_default_graph()
        raw_dataset = load_cached_data(args, seed, i_point)
        if i_point == 0:
            y_std = np.std(raw_dataset.train_y, 0, keepdims=True)
            y_std[y_std == 0] = 1
            y_mean = np.mean(raw_dataset.train_y, 0, keepdims=True)
        train_x, test_x = raw_dataset.train_x, raw_dataset.test_x
        train_y, test_y = raw_dataset.train_y, raw_dataset.test_y
        train_y, test_y = (train_y - y_mean) / y_std, (test_y - y_mean) / y_std
        optimizer_dataset = edict({
            'train_x': train_x, 'train_y': train_y,
            'test_x': test_x, 'test_y': test_y,
            'std_y_train': y_std,
        })
        N, input_dim = train_x.shape
        args.batch_size, args.n_base = min(BSZ, N), min(NBS, BSZ, N - 1)

        oracle_n = tf.Variable(N, trainable=False)
        model, print_values, train_op, obs_var, _, _ = get_model(
            args, train_x, train_y, test_x, None, input_dim, print, oracle_N=oracle_n)
        global_step = model.global_step
        train_summary = tf.no_op()
        saver = tf.train.Saver(max_to_keep=1)
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        restore_model(args, print, saver, sess, MODEL_PATH)
        if args.method == 'svgp':
            retrain_vars = tf.trainable_variables(model.GP.name)
            sess.run(tf.variables_initializer(retrain_vars))
            sess.run(tf.assign(oracle_n, N))

        test_rmse, test_lld = None, None
        if args.method == 'svgp':
            for epoch in range(1, args.epochs+1):
                regression_train(args, model, model.infer_only_Z, optimizer_dataset, epoch, print_values,
                                 global_step, train_summary, None, sess, logger=print)
                if epoch % args.test_interval == 0 or epoch == args.epochs:
                    test_rmse, test_lld = regression_test(args, model, optimizer_dataset, epoch, sess, obs_var, global_step,
                                                          None, seed, logger=print)
        elif args.method == 'gp':
            test_rmse, test_lld = regression_test(args, model, optimizer_dataset, 0, sess, obs_var, global_step,
                                                  None, seed, logger=print)
        else:
            raise NotImplementedError

        test_rmses.append(test_rmse[0])
        test_llds.append(test_lld[0])

    return test_rmses, test_llds
Ejemplo n.º 2
0
def predict(config):

    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = utils.data_utils.get_data_path(config.data_dir, datafile)

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        perfs = dict()

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mse, preds) = model.step(session, batch)
            # (mse, preds) = model.debug_step(session, batch)

            if math.isnan(mse) is False:
                date = batch_to_date(batch)
                if date not in perfs:
                    perfs[date] = list()
                perfs[date].append(mse)

            if config.pretty_print_preds is True:
                pretty_print_predictions(batches, batch, preds, mse)
            else:
                print_predictions(batches, batch, preds)

        if config.mse_outfile is not None:
            with open(config.mse_outfile, "w") as f:
                for date in sorted(perfs):
                    mean = np.mean(perfs[date])
                    print("%s %.6f %d" % (date, mean, len(perfs[date])),
                          file=f)
                total_mean = np.mean([x for v in perfs.values() for x in v])
                print("Total %.6f" % (total_mean), file=f)
            f.closed
        else:
            exit()
Ejemplo n.º 3
0
def init():
    try:
        print('Loading Model')
        model_params = parse_args()
        aml_model = get_model(model_name=model_params[0],
                              model_version=model_params[1],
                              tag_name=model_params[2],
                              tag_value=model_params[3])
        global model
        model_path = Model.get_model_path(model_name=aml_model.name,
                                          version=aml_model.version)
        model = joblib.load(model_path)
        print(f'model:{aml_model.name} downloading is successful')
    except Exception as ex:
        print(ex)
Ejemplo n.º 4
0
def calculate():
    request_dict = request.json
    graph = nx.node_link_graph(request_dict['graph'])
    sampler = get_sampler(request_dict['sampler'], graph)

    if type(sampler) == str:
        res_type = 'alert'
        res = sampler
    else:
        model = get_model(request_dict['algorithm'])
        res, res_type = model(graph,
                              sampler=sampler,
                              params=request_dict['params'])

    resp = {'type': res_type, 'result': res}

    return jsonify(resp)
def _run(args, seed, print):
    test_rmses, test_llds = [], []
    BSZ, NBS = args.batch_size, args.n_base
    for i_point in range(0, args.active_iterations + 1):
        tf.reset_default_graph()
        raw_dataset = load_cached_data(args, seed, i_point)
        y_std = np.std(raw_dataset.train_y, 0, keepdims=True)
        y_std[y_std == 0] = 1
        y_mean = np.mean(raw_dataset.train_y, 0, keepdims=True)

        train_x, test_x = raw_dataset.train_x, raw_dataset.test_x
        train_y, test_y = raw_dataset.train_y, raw_dataset.test_y
        train_y, test_y = (train_y - y_mean) / y_std, (test_y - y_mean) / y_std
        optimizer_dataset = edict({
            'train_x': train_x, 'train_y': train_y,
            'test_x': test_x, 'test_y': test_y,
            'std_y_train': y_std,
        })
        N, input_dim = train_x.shape
        args.batch_size, args.n_base = min(BSZ, N), min(NBS, BSZ, N - 1)

        model, print_values, train_op, obs_var, _, _ = get_model(
            args, train_x, train_y, test_x, None, input_dim, print)
        global_step = model.global_step
        train_summary = tf.no_op()
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        test_rmse, test_lld = None, None
        for epoch in range(1, args.epochs+1):
            regression_train(args, model, train_op, optimizer_dataset, epoch, print_values,
                             global_step, train_summary, None, sess, logger=print)
            if epoch % args.test_interval == 0 or epoch == args.epochs:
                test_rmse, test_lld = regression_test(args, model, optimizer_dataset, epoch, sess, obs_var, global_step,
                                                      None, seed, logger=print)

        test_rmses.append(test_rmse[0])
        test_llds.append(test_lld[0])

    return test_rmses, test_llds
Ejemplo n.º 6
0
def generate_embeddings(dataset_params):
    model_details = m_utils.get_supported_models()
    similarity_metrics = feat_utils.list_distance_metrics()
    for model_detail in model_details:
        tf.logging.info(" >>   >>> " + model_detail["name"] + " >>   >>> ")
        dataset_input_path = os.path.join(base_path_public,
                                          dataset_params["path"])
        model_params = {"name": model_detail["name"]}

        embedding_output_path = os.path.join(base_path_local, "embeddings",
                                             dataset_params["name"],
                                             model_params["name"])
        embeddings_output_params = {"path": embedding_output_path}

        model, preprocess_input = m_utils.get_model(model_detail["name"])
        layer_list = m_utils.get_model_layer_names(model_detail["name"])
        intermediate_models = m_utils.get_intermediate_models(
            model, layer_list)

        for intermediate_model in intermediate_models:
            extracted_features = feat_utils.extract_features(
                dataset_params, intermediate_model["model"], preprocess_input)

            for similarity_metric in similarity_metrics:
                tf.logging.info(" >> >> Generating similarity scores for " +
                                similarity_metric)
                similarity_output_path = os.path.join(base_path_public,
                                                      "similarity",
                                                      dataset_params["name"],
                                                      model_params["name"],
                                                      similarity_metric)
                similarity_params = {
                    "output_path": similarity_output_path,
                    "layer_name": intermediate_model["name"],
                    "similarity_metric": similarity_metric
                }
                feat_utils.generate_similarity_scores(similarity_params,
                                                      extracted_features)
    curr_device = torch.cuda.current_device()
    cfg['fixed_size'] = get_max_batchsize(curr_device)

    dataset = TractDataset(trk_fn,
                           transform=TestSampling(cfg['fixed_size']),
                           return_edges=True,
                           split_obj=True,
                           same_size=cfg['same_size'])

    dataloader = gDataLoader(dataset,
                             batch_size=1,
                             shuffle=False,
                             num_workers=0,
                             pin_memory=True)

    classifier = get_model(cfg)

    if DEVICE == 'cuda':
        torch.cuda.set_device(DEVICE)
        torch.cuda.current_device()

    if cfg['weights_path'] == '':
        cfg['weights_path'] = glob.glob(cfg['exp_path'] + '/models/best*')[0]
    state = torch.load(cfg['weights_path'], map_location=DEVICE)

    classifier.load_state_dict(state)
    classifier.to(DEVICE)
    classifier.eval()

    preds = []
    probas = []
Ejemplo n.º 8
0
def train_model(config):
    if config.start_date is not None:
        print("Training start date: ", config.start_date)
    if config.start_date is not None:
        print("Training end date: ", config.end_date)

    print("Loading training data from %s ..." % config.datafile)
    train_data = None
    valid_data = None

    if (config.validation_size > 0.0) or (config.split_date is not None):
        train_data, valid_data = data_utils.load_train_valid_data(config)
    else:
        train_data = data_utils.load_all_data(config, is_training_only=True)
        valid_data = train_data

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:
        if config.seed is not None:
            tf.set_random_seed(config.seed)

        print("Constructing model ...")
        model = model_utils.get_model(session, config, verbose=True)

        params = model_utils.get_scaling_params(config,
                                                train_data,
                                                verbose=True)
        model.set_scaling_params(session, **params)

        noise_model = None
        if config.training_noise is not None:
            print("Training noise level: %.2f * 1-stdev" %
                  config.training_noise)
            noise_model = NoiseModel(seed=config.seed,
                                     scaling_params=params,
                                     degree=config.training_noise)

        if config.early_stop is not None:
            print("Training will early stop without "
                  "improvement after %d epochs." % config.early_stop)
        sys.stdout.flush()

        train_history = list()
        valid_history = list()

        lr = model.set_learning_rate(session, config.learning_rate)

        train_data.cache(verbose=True)
        valid_data.cache(verbose=True)

        for i in range(config.max_epoch):

            (train_mse, valid_mse) = run_epoch(session,
                                               model,
                                               train_data,
                                               valid_data,
                                               keep_prob=config.keep_prob,
                                               passes=config.passes,
                                               noise_model=noise_model,
                                               verbose=True)
            print((
                'Epoch: %d Train MSE: %.6f Valid MSE: %.6f Learning rate: %.4f'
            ) % (i + 1, train_mse, valid_mse, lr))
            sys.stdout.flush()

            train_history.append(train_mse)
            valid_history.append(valid_mse)

            if re.match("Gradient|Momentum", config.optimizer):
                lr = model_utils.adjust_learning_rate(session, model, lr,
                                                      config.lr_decay,
                                                      train_history)

            if not os.path.exists(config.model_dir):
                print("Creating directory %s" % config.model_dir)
                os.mkdir(config.model_dir)

            if math.isnan(valid_mse):
                print("Training failed due to nan.")
                quit()
            elif stop_training(config, valid_history):
                print("Training stopped.")
                quit()
            else:
                if ((config.early_stop is None)
                        or (valid_history[-1] <= min(valid_history))):
                    model_utils.save_model(session, config, i)
Ejemplo n.º 9
0
def train_model(config):
    if config.start_date is not None:
        print("Training start date: ", config.start_date)
    if config.start_date is not None:
        print("Training end date: ", config.end_date)

    print("Loading training data from %s ..."%config.datafile)
    train_data = None
    valid_data = None

    if (config.validation_size > 0.0) or (config.split_date is not None):
        train_data, valid_data = data_utils.load_train_valid_data(config)
    else:
        train_data = data_utils.load_all_data(config, is_training_only=True)
        valid_data = train_data
        
    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)
    tf_config.gpu_options.allow_growth = True

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:
        if config.seed is not None:
            tf.set_random_seed(config.seed)

        print("Constructing model ...")
        model = model_utils.get_model(session, config, verbose=True)

        if config.data_scaler is not None:
            start_time = time.time()
            print("Calculating scaling parameters ...", end=' '); sys.stdout.flush()
            scaling_params = train_data.get_scaling_params(config.data_scaler)
            model.set_scaling_params(session,**scaling_params)
            print("done in %.2f seconds."%(time.time() - start_time))
            print("%-10s %-6s %-6s"%('feature','mean','std'))
            for i in range(len(train_data.feature_names)):
                center = "%.4f"%scaling_params['center'][i];
                scale  = "%.4f"%scaling_params['scale'][i];
                print("%-10s %-6s %-6s"%(train_data.feature_names[i],
                                         center,scale))
            sys.stdout.flush()

        if config.early_stop is not None:
            print("Training will early stop without "
              "improvement after %d epochs."%config.early_stop)

        train_history = list()
        valid_history = list()

        lr = model.set_learning_rate(session, config.learning_rate)

        train_data.cache(verbose=True)
        valid_data.cache(verbose=True)

        for i in range(config.max_epoch):

            # MVE Epoch
            if config.UQ_model_type == 'MVE':
                (train_mse, train_mse_var, valid_mse, valid_mse_var) = run_epoch_mve(session, model, train_data,
                                                                                     valid_data,
                                                                                     keep_prob=config.keep_prob,
                                                                                     passes=config.passes,
                                                                                     verbose=True)
                # Status to check if valid mse is nan, used to stop training
                if math.isnan(valid_mse):
                    is_metric_nan = True
                else:
                    is_metric_nan = False
                print('Epoch: %d Train MSE: %.8f Valid MSE: %.8f Learning rate: %.4f' %
                      (i + 1, train_mse, valid_mse, lr))
                print('Epoch: %d Train MSE_w_variance: %.8f Valid MSE_w_variance: %.8f Learning rate: %.4f' %
                      (i + 1, train_mse_var, valid_mse_var, lr))
                sys.stdout.flush()

                train_history.append(train_mse_var)
                valid_history.append(valid_mse_var)

            # PIE Epoch
            elif config.UQ_model_type == 'PIE':
                (train_mpiw, train_picp, train_picp_loss, valid_mpiw, valid_picp, valid_picp_loss) = \
                    run_epoch_pie(session, model, train_data, valid_data,
                                  keep_prob=config.keep_prob,
                                  passes=config.passes,
                                  verbose=True)

                train_loss = train_mpiw + config.picp_lambda*train_picp_loss
                valid_loss = valid_mpiw + config.picp_lambda*valid_picp_loss
                # Status to check if valid loss is nan, used to stop training
                if math.isnan(valid_loss):
                    is_metric_nan = True
                else:
                    is_metric_nan = False

                print('Epoch: %d Train MPIW: %.8f Valid MPIW: %.8f Learning rate: %.4f' %
                      (i + 1, train_mpiw, valid_mpiw, lr))
                print('Epoch: %d Train PICP: %.8f Valid PICP: %.8f' %
                      (i + 1, train_picp, valid_picp))
                print('Epoch: %d Train LOSS: %.8f Valid LOSS: %.8f' %
                      (i + 1, train_loss, valid_loss ))

                sys.stdout.flush()

                train_history.append(train_loss)
                valid_history.append(valid_loss)

            if re.match("Gradient|Momentum", config.optimizer):
                lr = model_utils.adjust_learning_rate(session, model, 
                                                      lr, config.lr_decay, train_history)

            if not os.path.exists(config.model_dir):
                print("Creating directory %s" % config.model_dir)
                os.mkdir(config.model_dir)

            if is_metric_nan:
                print("Training failed due to nan.")
                quit()
            elif stop_training(config, valid_history):
                print("Training stopped.")
                quit()
            else:
                if ( (config.early_stop is None) or 
                     (valid_history[-1] <= min(valid_history)) ):
                    model_utils.save_model(session, config, i)
Ejemplo n.º 10
0
def train_model(config):
    print("\nLoading training data ...")
    train_data, valid_data = data_utils.load_train_valid_data(config)

    if config.start_date is not None:
        print("Training start date: ", config.start_date)
    if config.start_date is not None:
        print("Training end date: ", config.end_date)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:
        if config.seed is not None:
            tf.set_random_seed(config.seed)

        print("\nConstructing model ...")
        model = model_utils.get_model(session, config, verbose=True)

        if config.data_scaler is not None:
            start_time = time.time()
            print("Calculating scaling parameters ...", end=' ')
            sys.stdout.flush()
            scaling_params = train_data.get_scaling_params(config.data_scaler)
            model.set_scaling_params(session, **scaling_params)
            print("done in %.2f seconds." % (time.time() - start_time))
            #print(scaling_params['center'])
            #print(scaling_params['scale'])
            #exit(0)

        if config.early_stop is not None:
            print("Training will early stop without "
                  "improvement after %d epochs." % config.early_stop)

        train_history = list()
        valid_history = list()

        lr = model.set_learning_rate(session, config.learning_rate)

        train_data.cache(verbose=True)
        valid_data.cache(verbose=True)

        for i in range(config.max_epoch):

            (train_mse, valid_mse) = run_epoch(session,
                                               model,
                                               train_data,
                                               valid_data,
                                               keep_prob=config.keep_prob,
                                               passes=config.passes,
                                               verbose=True)
            print((
                'Epoch: %d Train MSE: %.6f Valid MSE: %.6f Learning rate: %.4f'
            ) % (i + 1, train_mse, valid_mse, lr))
            sys.stdout.flush()

            train_history.append(train_mse)
            valid_history.append(valid_mse)

            if re.match("Gradient|Momentum", config.optimizer):
                lr = model_utils.adjust_learning_rate(session, model, lr,
                                                      config.lr_decay,
                                                      train_history)

            if not os.path.exists(config.model_dir):
                print("Creating directory %s" % config.model_dir)
                os.mkdir(config.model_dir)

            chkpt_file_prefix = "training.ckpt"
            if model_utils.stop_training(config, valid_history,
                                         chkpt_file_prefix):
                print("Training stopped.")
                quit()
            else:
                checkpoint_path = os.path.join(config.model_dir,
                                               chkpt_file_prefix)
                if (valid_history[-1] == min(valid_history)):
                    tf.train.Saver().save(session,
                                          checkpoint_path,
                                          global_step=i)
Ejemplo n.º 11
0
def test(cfg):
    num_classes = int(cfg['n_classes'])
    sample_size = int(cfg['fixed_size'])
    cfg['loss'] = cfg['loss'].split(' ')
    batch_size = 1
    cfg['batch_size'] = batch_size
    epoch = eval(str(cfg['n_epochs']))
    #n_gf = int(cfg['num_gf'])
    input_size = int(cfg['data_dim'])

    trans_val = []
    if cfg['rnd_sampling']:
        trans_val.append(TestSampling(sample_size))
    if cfg['standardization']:
        trans_val.append(SampleStandardization())

    if cfg['dataset'] == 'hcp20_graph':
        dataset = ds.HCP20Dataset(
            cfg['sub_list_test'],
            cfg['dataset_dir'],
            transform=transforms.Compose(trans_val),
            with_gt=cfg['with_gt'],
            #distance=T.Distance(norm=True,cat=False),
            return_edges=True,
            split_obj=True,
            train=False,
            load_one_full_subj=False,
            labels_dir=cfg['labels_dir'])

    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=0)
    print("Validation dataset loaded, found %d samples" % (len(dataset)))

    for ext in range(100):
        logdir = '%s/test_%d' % (cfg['exp_path'], ext)
        if not os.path.exists(logdir):
            break
    writer = SummaryWriter(logdir)
    if cfg['weights_path'] == '':
        cfg['weights_path'] = glob.glob(cfg['exp_path'] + '/models/best*')[0]
        epoch = int(cfg['weights_path'].rsplit('-', 1)[1].split('.')[0])
    elif 'ep-' in cfg['weights_path']:
        epoch = int(cfg['weights_path'].rsplit('-', 1)[1].split('.')[0])

    tb_log_name = glob.glob('%s/events*' % writer.logdir)[0].rsplit('/', 1)[1]
    tb_log_dir = 'tb_logs/%s' % logdir.split('/', 1)[1]
    os.system('mkdir -p %s' % tb_log_dir)
    os.system('ln -sr %s/%s %s/%s ' %
              (writer.logdir, tb_log_name, tb_log_dir, tb_log_name))

    #### BUILD THE MODEL
    classifier = get_model(cfg)

    classifier.cuda()
    classifier.load_state_dict(torch.load(cfg['weights_path']))
    classifier.eval()

    with torch.no_grad():
        pred_buffer = {}
        sm_buffer = {}
        sm2_buffer = {}
        gf_buffer = {}
        emb_buffer = {}
        print('\n\n')
        #mean_val_acc = torch.tensor([])
        #mean_val_iou = torch.tensor([])
        #mean_val_prec = torch.tensor([])
        #mean_val_recall = torch.tensor([])
        mean_val_mse = torch.tensor([])
        mean_val_mae = torch.tensor([])
        mean_val_rho = torch.tensor([])

        if 'split_obj' in dir(dataset) and dataset.split_obj:
            split_obj = True
        else:
            split_obj = False
            dataset.transform = []

        if split_obj:
            consumed = False
        else:
            consumed = True
        j = 0
        visualized = 0
        new_obj_read = True
        sls_count = 1
        while j < len(dataset):
            #while sls_count <= len(dataset):
            data = dataset[j]

            if split_obj:
                if new_obj_read:
                    #obj_pred_choice = torch.zeros(data['obj_full_size'], dtype=torch.int).cuda()
                    #obj_target = torch.zeros(data['obj_full_size'], dtype=torch.int).cuda()
                    obj_pred_choice = torch.zeros(data['obj_full_size'],
                                                  dtype=torch.float32).cuda()
                    obj_target = torch.zeros(data['obj_full_size'],
                                             dtype=torch.float32).cuda()
                    new_obj_read = False

                if len(dataset.remaining[j]) == 0:
                    consumed = True

            sample_name = data['name'] if type(
                data['name']) == str else data['name'][0]
            print(sample_name)

            #print(points)
            #if len(points.shape()) == 2:
            #points = points.unsqueeze(0)
            #print(data)
            points = gBatch().from_data_list([data['points']])
            #points = data['points']
            if 'bvec' in points.keys:
                points.batch = points.bvec.clone()
                del points.bvec
            if cfg['with_gt']:
                target = points['y']
                target = target.to('cuda')
            if cfg['same_size']:
                points['lengths'] = points['lengths'][0].item()
            #if cfg['model'] == 'pointnet_cls':
            #points = points.view(len(data['obj_idxs']), -1, input_size)
            points = points.to('cuda')

            pred = classifier(points)

            #logits = classifier(points)
            #logits = logits.view(-1, num_classes)

            #pred = F.log_softmax(logits, dim=-1).view(-1, num_classes)
            #pred_choice = pred.data.max(1)[1].int()

            if split_obj:
                obj_pred_choice[data['obj_idxs']] = pred.view(-1)
                #obj_pred_choice[data['obj_idxs']] = pred_choice
                obj_target[data['obj_idxs']] = target.float()
                #print(obj_pred_choice)
                #print(obj_target)
                #obj_target[data['obj_idxs']] = target.int()
                #if cfg['save_embedding']:
                #    obj_embedding[data['obj_idxs']] = classifier.embedding.squeeze()
            else:
                obj_data = points
                obj_pred_choice = pred_choice
                obj_target = target
                if cfg['save_embedding']:
                    obj_embedding = classifier.embedding.squeeze()

            if cfg['with_gt'] and consumed:
                print('val max class pred ', obj_pred_choice.max().item())
                print('val min class pred ', obj_pred_choice.min().item())
                print('val max class target ', obj_target.max().item())
                print('val min class target ', obj_target.min().item())
                #obj_pred_choice = obj_pred_choice.view(-1,1)
                #obj_target = obj_target.view(-1,1)

                mae = torch.mean(
                    abs(obj_target.data.cpu() -
                        obj_pred_choice.data.cpu())).item()
                mse = torch.mean((obj_target.data.cpu() -
                                  obj_pred_choice.data.cpu())**2).item()
                rho, pval = spearmanr(obj_target.data.cpu().numpy(),
                                      obj_pred_choice.data.cpu().numpy())
                np.save(writer.logdir + '/predictions_' + sample_name + '.npy',
                        obj_pred_choice.data.cpu().numpy())
                #correct = obj_pred_choice.eq(obj_target.data.int()).cpu().sum()
                #acc = correct.item()/float(obj_target.size(0))
                #tp = torch.mul(obj_pred_choice.data, obj_target.data.int()).cpu().sum().item()+0.00001
                #fp = obj_pred_choice.gt(obj_target.data.int()).cpu().sum().item()
                #fn = obj_pred_choice.lt(obj_target.data.int()).cpu().sum().item()
                #tn = correct.item() - tp
                #iou = torch.tensor([float(tp)/(tp+fp+fn)])
                #prec = torch.tensor([float(tp)/(tp+fp)])
                #recall = torch.tensor([float(tp)/(tp+fn)])

                mean_val_mae = torch.cat((mean_val_mae, torch.tensor([mae])),
                                         0)
                mean_val_mse = torch.cat((mean_val_mse, torch.tensor([mse])),
                                         0)
                mean_val_rho = torch.cat((mean_val_rho, torch.tensor([rho])),
                                         0)
                #mean_val_prec = torch.cat((mean_val_prec, prec), 0)
                #mean_val_recall = torch.cat((mean_val_recall, recall), 0)
                #mean_val_iou = torch.cat((mean_val_iou, iou), 0)
                #mean_val_acc = torch.cat((mean_val_acc, torch.tensor([acc])), 0)
                print('VALIDATION [%d: %d/%d] val mse: %f val mae: %f val rho: %f' \
                        % (epoch, j, len(dataset), mse, mae, rho))

            if cfg['save_pred'] and consumed:
                print('buffering prediction %s' % sample_name)
                #sl_idx = np.where(obj_pred.data.cpu().view(-1).numpy() == 1)[0]
                #pred_buffer[sample_name] = sl_idx.tolist()

            if consumed:
                print(j)
                j += 1
                if split_obj:
                    consumed = False
                    new_obj_read = True

        #macro_iou = torch.mean(mean_val_iou)
        #macro_prec = torch.mean(mean_val_prec)
        #macro_recall = torch.mean(mean_val_recall)

        #epoch_iou = macro_iou.item()

    #if cfg['save_pred']:
    #os.system('rm -r %s/predictions_test*' % writer.logdir)
    #   pred_dir = writer.logdir + '/predictions_test_%d' % epoch
    #  if not os.path.exists(pred_dir):
    #     os.makedirs(pred_dir)
    #print('saving files')
    #for filename, value in pred_buffer.items():
    #    with open(os.path.join(pred_dir, filename) + '.pkl', 'wb') as f:
    #        pickle.dump(
    #            value, f, protocol=pickle.HIGHEST_PROTOCOL)

    if cfg['with_gt']:
        print('TEST MSE: %f' % torch.mean(mean_val_mse).item())
        print('TEST MAE: %f' % torch.mean(mean_val_mae).item())
        print('TEST RHO: %f' % torch.mean(mean_val_rho).item())
        #print('TEST ACCURACY: %f' % torch.mean(mean_val_acc).item())
        #print('TEST PRECISION: %f' % macro_prec.item())
        #print('TEST RECALL: %f' % macro_recall.item())
        #print('TEST IOU: %f' % macro_iou.item())
        #mean_val_dsc = mean_val_prec * mean_val_recall * 2 / (mean_val_prec + mean_val_recall)
        final_scores_file = writer.logdir + '/final_scores_test_%d.txt' % epoch
        scores_file = writer.logdir + '/scores_test_%d.txt' % epoch
        print('saving scores')
        with open(scores_file, 'w') as f:
            f.write('mse\n')
            f.writelines('%f\n' % v for v in mean_val_mse.tolist())
            f.write('mae\n')
            f.writelines('%f\n' % v for v in mean_val_mae.tolist())
            f.write('rho\n')
            f.writelines('%f\n' % v for v in mean_val_rho.tolist())
            #f.write('acc\n')
            #f.writelines('%f\n' % v for v in  mean_val_acc.tolist())
            #f.write('prec\n')
            #f.writelines('%f\n' % v for v in  mean_val_prec.tolist())
            #f.write('recall\n')
            #f.writelines('%f\n' % v for v in  mean_val_recall.tolist())
            #f.write('dsc\n')
            #f.writelines('%f\n' % v for v in  mean_val_dsc.tolist())
            #f.write('iou\n')
            #f.writelines('%f\n' % v for v in  mean_val_iou.tolist())
        with open(final_scores_file, 'w') as f:
            f.write('mse\n')
            f.write('%f\n' % mean_val_mse.mean())
            f.write('%f\n' % mean_val_mse.std())
            f.write('mae\n')
            f.write('%f\n' % mean_val_mae.mean())
            f.write('%f\n' % mean_val_mae.std())
            f.write('rho\n')
            f.write('%f\n' % mean_val_rho.mean())
            f.write('%f\n' % mean_val_rho.std())
            #f.write('acc\n')
            #f.write('%f\n' % mean_val_acc.mean())
            #f.write('%f\n' % mean_val_acc.std())
            #f.write('prec\n')
            #f.write('%f\n' % mean_val_prec.mean())
            #f.write('%f\n' % mean_val_prec.std())
            #f.write('recall\n')
            #f.write('%f\n' % mean_val_recall.mean())
            #f.write('%f\n' % mean_val_recall.std())
            #f.write('dsc\n')
            #f.write('%f\n' % mean_val_dsc.mean())
            #f.write('%f\n' % mean_val_dsc.std())
            #f.write('iou\n')
            #f.write('%f\n' % mean_val_iou.mean())
            #f.write('%f\n' % mean_val_iou.std())

    print('\n\n')
Ejemplo n.º 12
0
def _run(args, seed, lprint):
    # init graph
    tf.reset_default_graph()
    summary_writer = None

    dataset = fetch_active_learning_dataset(args, seed)
    dataset = normalize_active_learning_data(dataset)
    train_x, test_x, valid_x = dataset.train_x, dataset.test_x, dataset.pool_x
    train_y, test_y, valid_y = dataset.train_y, dataset.test_y, dataset.pool_y
    std_y_train = dataset.std_y_train[0]

    N, input_dim = train_x.shape
    args.batch_size, args.n_base = min(args.batch_size,
                                       N), min(args.n_base, args.batch_size,
                                               N - 1)
    # setup model
    model, print_values, train_op, obs_var, _, _ = get_model(
        args, train_x, train_y, test_x, None, input_dim, lprint)
    test_obs_var = obs_var

    # setup summary
    train_summary = tf.no_op()
    global_step = model.global_step

    # training
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    best_epoch, best_rmse, best_lld = 0, np.inf, -np.inf  # for validation
    best_test_rmse, best_test_lld = np.inf, -np.inf  # for test
    learning_rate = args.learning_rate

    begin_epoch = 1
    for epoch in range(begin_epoch, args.epochs + 1):
        if epoch % args.lr_ann_epochs == 0:
            learning_rate = learning_rate * args.lr_ann_ratio
        regression_train(args, model, train_op, dataset, epoch, print_values,
                         global_step, train_summary, summary_writer, sess,
                         learning_rate, lprint)

        # ================================== valid ==================================
        if (epoch % args.test_interval == 0 or epoch == args.epochs):
            feed_dict = {
                model.x: train_x,
                model.y: train_y,
                model.x_pred: valid_x,
                model.y_pred: valid_y,
                model.n_particles: args.test_samples
            }
            rmse, lld, ov, gs = sess.run(
                [model.eval_rmse, model.eval_lld, obs_var, global_step],
                feed_dict=feed_dict)
            rmse, lld = rmse * std_y_train, lld - np.log(std_y_train)

            lprint(
                '>>> VALID: Seed {:5d} >>> Epoch {:5d}/{:5d} | rmse={:.5f} | lld={:.5f} | obs_var={:.5f}'
                .format(seed, epoch, args.epochs, rmse, lld, np.mean(ov)))
            if lld > best_lld:
                best_epoch, best_rmse, best_lld = epoch, rmse, lld

        # ================================== testing ==================================
        if epoch % args.test_interval == 0 or epoch == args.epochs:
            feed_dict = {
                model.x_pred: test_x,
                model.y_pred: test_y,
                model.x: train_x,
                model.y: train_y,
                model.n_particles: args.test_samples
            }
            rmse, lld, ov, gs = sess.run(
                [model.eval_rmse, model.eval_lld, test_obs_var, global_step],
                feed_dict=feed_dict)
            rmse, lld = rmse * std_y_train, lld - np.log(std_y_train)
            lprint(
                '>>> TEST: Seed {:5d} >>> Epoch {:5d}/{:5d} | rmse={:.5f} | lld={:.5f} | obs_var={:.5f}'
                .format(seed, epoch, args.epochs, rmse, lld, np.mean(ov)))
            if best_epoch == epoch:
                # save mean and covariance for computing XLL(R)
                mean, cov = sess.run(
                    [model.func_x_pred_mean, model.y_x_pred_cov],
                    feed_dict=feed_dict)
                method = 'gp' if args.method == 'svgp' else args.method
                result_dir = osp.join(RESULT_REG_PATH,
                                      '%s_%s' % (args.dataset, method))
                if not osp.exists(result_dir):
                    makedirs(osp.join(result_dir, 'a.py'))
                with open(
                        osp.join(result_dir,
                                 'test_mean_cov_seed%d.npz' % seed),
                        'wb') as file:
                    np.savez(file, mean=mean, cov=cov)

                best_test_lld, best_test_rmse = lld, rmse
                lprint('BEST EPOCH !!!')
            if epoch == args.epochs:
                if args.return_val:
                    return best_rmse, best_lld, best_test_rmse, best_test_lld
                else:
                    return best_rmse, best_lld, best_test_rmse, best_test_lld
def _run(args, seed, print):
    # ================================== load and normalize data ==================================
    raw_dataset = fetch_active_learning_dataset(args, seed)
    if not osp.exists(
            osp.join(RESULT_AL_SELECTION_PATH, args.criteria, '%s_seed%d' %
                     (args.dataset, seed))):
        makedirs(
            osp.join(RESULT_AL_SELECTION_PATH, args.criteria,
                     '%s_seed%d' % (args.dataset, seed), 'a.py'))
    np.save(
        osp.join(RESULT_AL_SELECTION_PATH, args.criteria,
                 '%s_seed%d' % (args.dataset, seed),
                 '%s_itr%d.npy' % (args.method, 0)), {
                     'test_x': raw_dataset.test_x,
                     'test_y': raw_dataset.test_y,
                     'train_x': raw_dataset.train_x,
                     'train_y': raw_dataset.train_y,
                     'pool_x': raw_dataset.pool_x,
                     'pool_y': raw_dataset.pool_y
                 })
    print("Dataset: %s, train_r: %.2f, test_r: %.2f" %
          (args.dataset, args.train_ratio, args.test_ratio))

    test_rmses, test_llds = [], []
    BSZ, NBS = args.batch_size, args.n_base
    for i_point in range(1, args.active_iterations + 2):
        tf.reset_default_graph()
        dataset = normalize_active_learning_data(copy.deepcopy(raw_dataset))
        train_x, test_x, pool_x = dataset.train_x, dataset.test_x, dataset.pool_x
        train_y = dataset.train_y
        N, input_dim = train_x.shape
        args.batch_size, args.n_base = min(BSZ, N), min(NBS, BSZ, N - 1)
        # ================================== setup model ==================================
        model, print_values, train_op, obs_var, corr_op, covar_op = get_model(
            args, train_x, train_y, test_x, pool_x, input_dim, print)
        global_step = model.global_step
        train_summary = tf.no_op()

        # ==================================  training and testing ==================================
        if args.method in ['hmc', 'emhmc']:
            print("HMC reset the config proto of Session.\n" * 10)
            config_proto = tf.ConfigProto()
            off = rewriter_config_pb2.RewriterConfig.OFF
            config_proto.graph_options.rewrite_options.arithmetic_optimization = off
            sess = tf.Session(config=config_proto)
        else:
            sess = tf.Session()

        sess.run(tf.global_variables_initializer())
        test_rmse, test_lld = None, None
        learning_rate = args.learning_rate
        for epoch in range(1, args.epochs + 1):
            if epoch % args.lr_ann_epochs == 0:
                learning_rate = learning_rate * args.lr_ann_ratio
            regression_train(args,
                             model,
                             train_op,
                             dataset,
                             epoch,
                             print_values,
                             global_step,
                             train_summary,
                             None,
                             sess,
                             learning_rate=learning_rate,
                             logger=print)
            if epoch == args.epochs:
                test_rmse, test_lld = regression_test(args, model, dataset,
                                                      epoch, sess, obs_var,
                                                      global_step, None, seed,
                                                      print)
        test_rmses.append(test_rmse[0])
        test_llds.append(test_lld[0])

        # ================== select the new data point(s). ===========================================
        data_idxs = get_selected_data_idxs(train_x,
                                           test_x,
                                           pool_x,
                                           dataset,
                                           args,
                                           covar_op,
                                           corr_op,
                                           model,
                                           sess,
                                           n_particles=args.eval_cov_samples)
        raw_dataset = update_dataset(raw_dataset, data_idxs)
        np.save(
            osp.join(RESULT_AL_SELECTION_PATH, args.criteria,
                     '%s_seed%d' % (args.dataset, seed),
                     '%s_itr%d.npy' % (args.method, i_point)), {
                         'test_x': raw_dataset.test_x,
                         'test_y': raw_dataset.test_y,
                         'train_x': raw_dataset.train_x,
                         'train_y': raw_dataset.train_y
                     })
        print("Iteration %d done. Selected data point %d/%d. (%d/%d/%d)" %
              (i_point, i_point, args.active_iterations,
               raw_dataset.train_x.shape[0], raw_dataset.pool_x.shape[0],
               raw_dataset.test_x.shape[0]))
        sess.close()

    return test_rmses, test_llds
Ejemplo n.º 14
0
def train_model(config, verbose=True):
    print("\nLoading training data ...")
    train_data, valid_data = load_train_valid_data(config, verbose)

    if config.start_date is not None:
        print("Training start date: ", config.start_date)
    if config.start_date is not None:
        print("Training end date: ", config.end_date)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False,
                               device_count={'GPU': 0})

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:
        if config.seed is not None:
            tf.set_random_seed(config.seed)

        print("\nConstructing model ...")
        model = model_utils.get_model(session, config, verbose=verbose)

        if config.data_scaler is not None:
            start_time = time.time()
            print("Calculating scaling parameters ...", end=' ')
            sys.stdout.flush()
            scaling_params = train_data.get_scaling_params(config.data_scaler)
            model.set_scaling_params(session, **scaling_params)
            print("done in %.2f seconds." % (time.time() - start_time))

        if config.early_stop is not None:
            print("Training will early stop without "
                  "improvement after %d epochs." % config.early_stop)

        train_history = list()
        valid_history = list()

        lr = model.set_learning_rate(session, config.learning_rate)

        train_data.cache(verbose=verbose)
        valid_data.cache(verbose=verbose)

        for i in range(config.max_epoch):
            (train_mse, valid_mse) = run_epoch(session,
                                               model,
                                               train_data,
                                               valid_data,
                                               keep_prob=config.keep_prob,
                                               passes=config.passes,
                                               verbose=verbose)
            if verbose:
                print(('Epoch: %d Train MSE: %.6f Valid MSE: %.6f Learning'
                       'rate: %.4f') % (i + 1, train_mse, valid_mse, lr))
                sys.stdout.flush()

            train_history.append(train_mse)
            valid_history.append(valid_mse)

            if re.match("Gradient|Momentum", config.optimizer):
                lr = model_utils.adjust_learning_rate(session, model, lr,
                                                      config.lr_decay,
                                                      train_history)

    return train_history, valid_history
def _run(args, seed, print):
    tf.reset_default_graph()
    MODEL_PATH = osp.join(ORACLE_CKPT_DIR, args.dataset, 'seed%d' % seed)

    ######## merge train+pool for pretraining the ORACLE. #########
    dataset = fetch_active_learning_dataset(args, seed)
    dataset = normalize_active_learning_data(dataset, has_pool=True)
    dataset = edict({
        'train_x':
        np.concatenate([dataset.train_x, dataset.pool_x], 0),
        'train_y':
        np.concatenate([dataset.train_y, dataset.pool_y], 0),
        'test_x':
        dataset.test_x,
        'test_y':
        dataset.test_y,
        'mean_y_train':
        dataset.mean_y_train,
        'std_y_train':
        dataset.std_y_train,
    })
    N, input_dim = dataset.train_x.shape

    ############ setup the MODEL ##############
    oracle_n = tf.Variable(N, trainable=False)
    model, print_values, train_op, obs_var, corr_op, covar_op = get_model(
        args,
        dataset.train_x,
        dataset.train_y,
        None,
        None,
        input_dim,
        print,
        oracle_N=oracle_n)
    global_step = model.global_step
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    train_summary = tf.no_op()
    saver = tf.train.Saver(max_to_keep=1)

    for epoch in range(1, args.epochs + 1):
        regression_train(args,
                         model,
                         train_op,
                         dataset,
                         epoch,
                         print_values,
                         global_step,
                         train_summary,
                         None,
                         sess,
                         logger=print)
        if epoch % args.test_interval == 0 or epoch == args.epochs:
            regression_test(args,
                            model,
                            dataset,
                            epoch,
                            sess,
                            obs_var,
                            global_step,
                            None,
                            seed,
                            logger=print)
            save_model(args, print, saver, sess, MODEL_PATH, epoch)
Ejemplo n.º 16
0
def predict(config):

    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = utils.data_utils.get_data_path(config.data_dir, datafile)

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    # Initialize DataFrames
    df_target = pd.DataFrame()
    df_output = pd.DataFrame()
    df_mse = pd.DataFrame()

    df_list = [df_target, df_output, df_mse]

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        perfs = dict()

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mse, preds) = model.step(session,
                                      batch,
                                      keep_prob=config.keep_prob_pred)
            # (mse, preds) = model.debug_step(session, batch)

            if math.isnan(mse) is False:
                date = batch_to_date(batch)
                key = batch_to_key(batch)
                if date not in perfs:
                    perfs[date] = list()
                perfs[date].append(mse)

            if config.pretty_print_preds:
                pretty_print_predictions(batches, batch, preds, mse)
            elif config.print_preds:
                print_predictions(config, batches, batch, preds, mse)

            # Get values and update DataFrames if df_dirname is provided in config
            if config.df_dirname is not None:
                if not math.isnan(mse):
                    # Get all values
                    target_val = get_value(batches, batch, 'target')
                    output_val = get_value(batches, batch, 'output', preds)
                    mse_val = mse
                    values_list = [target_val, output_val, mse_val]

                    # Update DataFrames
                    for j in range(len(df_list)):
                        assert (len(df_list) == len(values_list))
                        df_list[j] = update_df(df_list[j], date, key,
                                               values_list[j])

        # Save the DataFrames
        if config.df_dirname:
            if not os.path.isdir(config.df_dirname):
                os.makedirs(config.df_dirname)
            save_names = ['target-df.pkl', 'output-df.pkl', 'mse-df.pkl']

            for j in range(len(df_list)):
                assert (len(df_list) == len(save_names))
                df_list[j].to_pickle(
                    os.path.join(config.df_dirname, save_names[j]))

        # MSE Outfile
        if config.mse_outfile is not None:
            with open(config.mse_outfile, "w") as f:
                for date in sorted(perfs):
                    mean = np.mean(perfs[date])
                    print("%s %.6f %d" % (date, mean, len(perfs[date])),
                          file=f)
                total_mean = np.mean([x for v in perfs.values() for x in v])
                print("Total %.6f" % (total_mean), file=f)
            f.closed
        else:
            exit()
Ejemplo n.º 17
0
def predict_pie(config):
    """ Doesn't use print options. Only outputs dataframes"""
    datafile = config.datafile

    if config.predict_datafile is not None:
        datafile = config.predict_datafile

    print("Loading data from %s ..." % datafile)
    path = utils.data_utils.get_data_path(config.data_dir, datafile)

    config.batch_size = 1
    batches = BatchGenerator(path,
                             config,
                             require_targets=config.require_targets,
                             verbose=True)
    batches.cache(verbose=True)

    tf_config = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)

    # Initialize DataFrames
    df_target = pd.DataFrame()
    df_output_lb = pd.DataFrame()
    df_output_ub = pd.DataFrame()

    df_list = [df_target, df_output_lb, df_output_ub]

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        model = model_utils.get_model(session, config, verbose=True)

        for i in range(batches.num_batches):
            batch = batches.next_batch()

            (mpiw, _, _, preds_lb,
             preds_ub) = model.step(session,
                                    batch,
                                    keep_prob=config.keep_prob_pred,
                                    uq=config.UQ,
                                    UQ_model_type='PIE')
            # (mse, preds) = model.debug_step(session, batch)

            date = batch_to_date(batch)
            key = batch_to_key(batch)

            # Dummy input to be consistent with the rest of the predictions printing options. MSE = 0.0. It is not
            # evaluated in PIE case
            mse_dummy = mse_var_dummy = 0.0

            # Print every n iterations to check the progress for monitoring
            if i % 10000 == 0:
                pretty_print_predictions(batches, batch, preds_lb, preds_ub,
                                         mse_dummy, mse_var_dummy)

            # Get values and update DataFrames if df_dirname is provided in config
            if config.df_dirname is not None:
                # Get all values
                target_val = get_value(batches, batch, 'target')
                output_lb_val = get_value(batches, batch, 'output_lb',
                                          preds_lb)
                output_ub_val = get_value(batches, batch, 'output_ub',
                                          preds_ub)
                values_list = [target_val, output_lb_val, output_ub_val]

                # Update DataFrames
                for j in range(len(df_list)):
                    assert (len(df_list) == len(values_list))
                    df_list[j] = update_df(df_list[j], date, key,
                                           values_list[j])

        # Save the DataFrames
        if not os.path.isdir(config.df_dirname):
            os.makedirs(config.df_dirname)
        save_names = ['target-df.pkl', 'output-lb-df.pkl', 'output-ub-df.pkl']

        for j in range(len(df_list)):
            assert (len(df_list) == len(save_names))
            df_list[j].to_pickle(os.path.join(config.df_dirname,
                                              save_names[j]))
    return
Ejemplo n.º 18
0
def train(cfg):

    batch_size = int(cfg['batch_size'])
    n_epochs = int(cfg['n_epochs'])
    sample_size = int(cfg['fixed_size'])

    #### DATA LOADING
    trans_train = []
    trans_val = []
    if cfg['rnd_sampling']:
        trans_train.append(RndSampling(sample_size, maintain_prop=False))
        #prop_vector=[1, 1]))
        trans_val.append(RndSampling(sample_size, maintain_prop=False))

    dataset, dataloader = get_dataset(cfg, trans=trans_train)
    val_dataset, val_dataloader = get_dataset(cfg,
                                              trans=trans_val,
                                              train=False)
    # summary for tensorboard
    writer = create_tb_logger(cfg)
    dump_code(cfg, writer.logdir)

    #### BUILD THE MODEL
    classifier = get_model(cfg)
    if cfg['verbose']:
        print(classifier)

    #### SET THE TRAINING
    optimizer = get_optimizer(cfg, classifier)

    lr_scheduler = get_lr_scheduler(cfg, optimizer)

    classifier.cuda()

    num_batch = len(dataset) / batch_size
    print('num of batches per epoch: %d' % num_batch)
    cfg['num_batch'] = num_batch

    n_iter = 0
    #best_pred = 0
    best_pred = 10
    best_epoch = 0
    current_lr = float(cfg['learning_rate'])
    for epoch in range(n_epochs + 1):

        # update bn decay
        if cfg['bn_decay'] and epoch != 0 and epoch % int(
                cfg['bn_decay_step']) == 0:
            update_bn_decay(cfg, classifier, epoch)

        loss, n_iter = train_ep(cfg, dataloader, classifier, optimizer, writer,
                                epoch, n_iter)

        ### validation during training
        if epoch % int(cfg['val_freq']) == 0 and cfg['val_in_train']:
            best_epoch, best_pred = val_ep(cfg, val_dataloader, classifier,
                                           writer, epoch, best_epoch,
                                           best_pred)

        # update lr
        if cfg['lr_type'] == 'step' and current_lr >= float(cfg['min_lr']):
            lr_scheduler.step()
        if cfg['lr_type'] == 'plateau':
            lr_scheduler.step(loss)

        current_lr = get_lr(optimizer)
        writer.add_scalar('train/lr', current_lr, epoch)

    writer.close()
Ejemplo n.º 19
0
run = Run.get_context()
exp = run.experiment
workspace = exp.workspace
run_id = 'amlcompute'

if args.run_id is not None:
    run_id = args.run_id
if args.run_id == 'amlcompute':
    run_id = run.parent.run_id
model_name = args.model_name
metric_eval = 'mse'
allow_run_cancel = args.allow_run_cancel

try:
    model = get_model(model_name=model_name,
                      workspace=workspace,
                      tag_name='experiment_name',
                      tag_value=exp.name)
    if model is not None:
        model_mse = np.inf
        if metric_eval in model.tags:
            model_mse = float(model.tags[metric_eval])
        print(model_mse,run.parent.get_metrics())
        new_run_mse = float(run.parent.get_metrics().get(metric_eval))
        if model_mse is None or new_run_mse is None:
            if allow_run_cancel == 'true':
                run.parent.cancel()
        else:
            print(f'model mse: {model_mse}\nnew run mse: {new_run_mse}')
        if new_run_mse < model_mse:
            print('current run has better result than previous one,therfore you can continue')
        else:
Ejemplo n.º 20
0
def _run(args, seed, print):
    tf.reset_default_graph()
    dataset = edict(dict(np.load(os.path.join(
        root_path, TOY_DATA_PATH, 'reluLarge', 'dim%d_seed%s.npz' % (args.input_dim, seed)))))

    # ================================== setup model ==================================
    BSZ, NBS = args.batch_size, args.n_base
    train_x, test_x, pool_x = dataset.train_x, dataset.test_x, dataset.pool_x
    train_y = dataset.train_y
    N, input_dim = train_x.shape
    args.batch_size, args.n_base = min(BSZ, N), min(NBS, BSZ, N - 1)
    given_obs_var = dataset.obs_var
    model, print_values, train_op, obs_var, corr_op, covar_op = get_model(
        args, train_x, train_y, test_x, pool_x, input_dim, print,
        mini_particles=1, given_obs_var=given_obs_var)
    corr_f_op, covar_f_op = gen_f_correlation(model)
    train_summary = tf.no_op()
    global_step = model.global_step

    # ==================================  training ==================================
    if args.method in ['hmc', 'emhmc']:
        print("HMC reset the config proto of Session.\n" * 10)
        config_proto = tf.ConfigProto()
        off = rewriter_config_pb2.RewriterConfig.OFF
        config_proto.graph_options.rewrite_options.arithmetic_optimization = off
        sess = tf.Session(config=config_proto)
    else:
        sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    learning_rate = args.learning_rate

    for epoch in range(1, args.epochs + 1):
        if epoch % args.lr_ann_epochs == 0:
            learning_rate = learning_rate * args.lr_ann_ratio
        regression_train(args, model, train_op, dataset, epoch, print_values, global_step,
                        train_summary, None, sess, learning_rate=learning_rate, logger=print)


    # ==================================  testing ==================================
    pool_test_x = np.concatenate([dataset.pool_x, dataset.test_x], 0)
    feed_dict = {
        model.x_pred: pool_test_x, model.x: dataset.train_x, model.y: dataset.train_y,
        model.n_particles: args.eval_cov_samples
    }
    while True:
        # loop until the results are not nan.
        covariance_f, correlation_f, model_mu, model_var, model_obs_var = sess.run(
            [covar_f_op, corr_f_op, model.func_x_pred_mean, model.func_x_pred_var, model.obs_var],
            feed_dict=feed_dict)

        if not (np.isnan(covariance_f).any() or np.isnan(correlation_f).any()
                or np.isnan(model_mu).any() or np.isnan(model_var).any()):
            break

    # ========== compute Pearson and Spearman correlations with respect to the Oracle ============

    Kff = dataset.posterior_Kff
    reci_sqrt_diag_Kff = 1.0 / np.diag(Kff) ** 0.5
    gt_corr_f = Kff * (np.reshape(reci_sqrt_diag_Kff, [-1, 1]) @ np.reshape(reci_sqrt_diag_Kff, [1, -1]))
    var_pr_f, _, var_spr_f, _, _, _, _, _ = compute_pearson_and_spearman_r(covariance_f,
                                                                           Kff,
                                                                           pool_x.shape[0],
                                                                           test_x.shape[0])

    _, corr_pr_f, _, corr_spr_f, _, _, _, _ = compute_pearson_and_spearman_r(correlation_f,
                                                                             gt_corr_f,
                                                                             pool_x.shape[0],
                                                                             test_x.shape[0])

    print('*' * 20)
    print('[F][Var] Pearson: %.4f, Spearman: %.4f' % (var_pr_f, var_spr_f))
    print('[F][Cor] Pearson: %.4f, Spearman: %.4f' % (corr_pr_f, corr_spr_f))

    # ======== select the new data points (BMIG, MIG, TIG) and compute the Test LLD afterwards . ============
    org_crit = args.criteria

    args.criteria = 'batchMIG'
    BMIG_data_idxs = get_selected_data_idxs(train_x, test_x, pool_x,
                                            dataset, args, covar_op, corr_op,
                                            model, sess, n_particles=args.eval_cov_samples)
    BMIG_raw_dataset = update_dataset(dataset, BMIG_data_idxs)
    BMIG_raw_dataset.obs_var = given_obs_var

    args.criteria = 'mig'
    MIG_data_idxs = get_selected_data_idxs(train_x, test_x, pool_x,
                                            dataset, args, covar_op, corr_op,
                                            model, sess, n_particles=args.eval_cov_samples)
    MIG_raw_dataset = update_dataset(dataset, MIG_data_idxs)
    MIG_raw_dataset.obs_var = given_obs_var

    args.criteria = 'tig'
    TIG_data_idxs = get_selected_data_idxs(train_x, test_x, pool_x,
                                           dataset, args, covar_op, corr_op,
                                           model, sess, n_particles=args.eval_cov_samples)
    TIG_raw_dataset = update_dataset(dataset, TIG_data_idxs)
    TIG_raw_dataset.obs_var = given_obs_var

    args.criteria = org_crit

    BMIG_lld, BMIG_rmse = compute_oracle_lld_rmse(BMIG_raw_dataset)
    MIG_lld, MIG_rmse = compute_oracle_lld_rmse(MIG_raw_dataset)
    TIG_lld, TIG_rmse = compute_oracle_lld_rmse(TIG_raw_dataset)
    print('>>> BatchMIG RMSE={:5f}, LLD={:5f}. | MIG  RMSE={:5f}, LLD={:5f}. | TIG  RMSE={:5f}, LLD={:5f}.'.format(
        BMIG_rmse, BMIG_lld, MIG_rmse, MIG_lld, TIG_rmse, TIG_lld))
    print('*' * 20)

    # =================== compute predictive covariances ===================
    pred_mean, pred_cov = sess.run(
        [model.func_x_pred_mean, model.y_x_pred_cov],
        feed_dict={ model.x_pred: test_x, model.x: train_x, model.y: train_y,
                    model.n_particles: args.eval_cov_samples}
    )

    # ====================================== save results ====================================== #
    method = 'gp' if args.method == 'svgp' else args.method
    np.savez(os.path.join(root_path, RESULT_TOY_METRIC_PATH, '%s_seed%d_%s.npz' % (method, seed, args.note)),
             **{'corr_pr_f': corr_pr_f,
                'corr_spr_f': corr_spr_f,
                'var_pr_f': var_pr_f,
                'var_spr_f': var_spr_f,
                'BMIG_test_rmse': BMIG_rmse,
                'BMIG_test_lld': BMIG_lld,
                'TIG_test_rmse': TIG_rmse,
                'TIG_test_lld': TIG_lld,
                'MIG_test_lld': MIG_lld,
                'MIG_test_rmse': MIG_rmse,
                'test_pred_mean': pred_mean,
                'test_pred_cov': pred_cov
                })
Ejemplo n.º 21
0
def _run(args, seed, print):
    MODEL_PATH = osp.join(ORACLE_CKPT_DIR, args.dataset, 'seed%d' % seed)

    if not osp.exists(
            osp.join(RESULT_AL_SELECTION_PATH, args.criteria, '%s_seed%d' %
                     (args.dataset, seed))):
        makedirs(
            osp.join(RESULT_AL_SELECTION_PATH, args.criteria,
                     '%s_seed%d' % (args.dataset, seed), 'a.py'))
    raw_dataset = fetch_active_learning_dataset(args, seed)
    np.save(
        osp.join(RESULT_AL_SELECTION_PATH, args.criteria,
                 '%s_seed%d' % (args.dataset, seed),
                 '%s_itr%d.npy' % ("oracle", 0)), {
                     'test_x': raw_dataset.test_x,
                     'test_y': raw_dataset.test_y,
                     'train_x': raw_dataset.train_x,
                     'train_y': raw_dataset.train_y,
                     'pool_x': raw_dataset.pool_x,
                     'pool_y': raw_dataset.pool_y
                 })
    y_std = np.std(raw_dataset.train_y, 0, keepdims=True)
    y_std[y_std == 0] = 1
    y_mean = np.mean(raw_dataset.train_y, 0, keepdims=True)

    test_rmses, test_llds = [], []
    BSZ, NBS = args.batch_size, args.n_base
    for i_point in range(1, args.active_iterations + 2):
        tf.reset_default_graph()
        train_x, test_x, pool_x = raw_dataset.train_x, raw_dataset.test_x, raw_dataset.pool_x
        train_y, test_y, pool_y = raw_dataset.train_y, raw_dataset.test_y, raw_dataset.pool_y
        train_y, test_y, pool_y = (train_y - y_mean) / y_std, (
            test_y - y_mean) / y_std, (pool_y - y_mean) / y_std
        optimizer_dataset = edict({
            'train_x': train_x,
            'train_y': train_y,
            'test_x': test_x,
            'test_y': test_y,
            'pool_x': pool_x,
            'pool_y': pool_y,
            'std_y_train': y_std,
        })
        N, input_dim = train_x.shape
        args.batch_size, args.n_base = min(BSZ, N), min(NBS, BSZ, N - 1)

        oracle_n = tf.Variable(N, trainable=False)
        model, print_values, train_op, obs_var, corr_op, covar_op = get_model(
            args,
            train_x,
            train_y,
            test_x,
            pool_x,
            input_dim,
            print,
            oracle_N=oracle_n)
        global_step = model.global_step
        train_summary = tf.no_op()
        saver = tf.train.Saver(max_to_keep=1)
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        restore_model(args, print, saver, sess, MODEL_PATH)
        if args.method == 'svgp':
            retrain_vars = tf.trainable_variables(model.GP.name)
            sess.run(tf.variables_initializer(retrain_vars))
            sess.run(tf.assign(oracle_n, N))

        test_rmse, test_lld = None, None
        if args.method == 'svgp':
            for epoch in range(1, args.epochs + 1):
                regression_train(args,
                                 model,
                                 model.infer_only_Z,
                                 optimizer_dataset,
                                 epoch,
                                 print_values,
                                 global_step,
                                 train_summary,
                                 None,
                                 sess,
                                 logger=print)
                if epoch % args.test_interval == 0 or epoch == args.epochs:
                    test_rmse, test_lld = regression_test(args,
                                                          model,
                                                          optimizer_dataset,
                                                          epoch,
                                                          sess,
                                                          obs_var,
                                                          global_step,
                                                          None,
                                                          seed,
                                                          logger=print)
        elif args.method == 'gp':
            test_rmse, test_lld = regression_test(args,
                                                  model,
                                                  optimizer_dataset,
                                                  0,
                                                  sess,
                                                  obs_var,
                                                  global_step,
                                                  None,
                                                  seed,
                                                  logger=print)
        else:
            raise NotImplementedError

        test_rmses.append(test_rmse[0])
        test_llds.append(test_lld[0])

        # ================== select the new data point(s). ===========================================
        data_idxs = get_selected_data_idxs(train_x, test_x, pool_x,
                                           optimizer_dataset, args, covar_op,
                                           corr_op, model, sess)
        raw_dataset = update_dataset(raw_dataset, data_idxs)
        np.save(
            osp.join(RESULT_AL_SELECTION_PATH, args.criteria,
                     '%s_seed%d' % (args.dataset, seed),
                     '%s_itr%d.npy' % ("oracle", i_point)), {
                         'test_x': raw_dataset.test_x,
                         'test_y': raw_dataset.test_y,
                         'train_x': raw_dataset.train_x,
                         'train_y': raw_dataset.train_y
                     })

        print("Iteration %d done. Selected data point %d/%d. (%d/%d)" %
              (i_point, i_point, args.active_iterations,
               raw_dataset.train_x.shape[0], raw_dataset.test_x.shape[0]))

    return test_rmses, test_llds