Beispiel #1
0
def main(args):
    # things with paths
    expdname = args['dirs']['exp_dir']
    expname = args['exp_name']
    logdname = args['dirs']['log_dir']
    resdirname = os.path.join(expdname, expname)
    #logdirname = os.path.join(logdname, expname)
    logdirname = resdirname
    make_dir_if_not_exist(logdirname, remove=True)
    make_dir_if_not_exist(resdirname)
    npzfile = os.path.join(args['dirs']['data_dir'],
                           args['data']['name'],
                           get_npz_basename(**args['data']))

    # write params (including all overrides) to experiment directory
    with open(os.path.join(resdirname, 'opt.json'), 'w') as f:
        opt_dumps = dumps(args, indent=4, sort_keys=True)
        f.write(opt_dumps)

    if args['data']['use_attr']:
        args['model'].update(xdim=args['model']['xdim']+1)

    #get dataset
    data = Dataset(npzfile=npzfile, **args['data'], batch_size=args['train']['batch_size'])

    # get model
    if 'Weighted' in args['model']['class']:
        A_weights = [1. / x for x in data.get_A_proportions()]
        Y_weights = [1. / x for x in data.get_Y_proportions()]
        AY_weights = [[1. / x for x in L] for L in data.get_AY_proportions()]
        if 'Eqopp' in args['model']['class']:
            #we only care about ppl with Y = 0 --- those who didn't get sick
            AY_weights[0][1] = 0. #AY_weights[0][1]
            AY_weights[1][1] = 0. #AY_weights[1][1]
        args['model'].update(A_weights=A_weights, Y_weights=Y_weights, AY_weights=AY_weights)
    model_class = getattr(models, args['model'].pop('class'))
    print(args['model'])
    model = model_class(**args['model'], batch_size=args['train']['batch_size'])

    with tf.Session() as sess:
        reslogger = ResultLogger(resdirname)

        #create Trainer
        trainer = Trainer(model, data, sess=sess, expdir=resdirname, logs_path=logdirname,
                          **args['optim'], **args['train'])

        # training
        trainer.train(**args['train'])

        # test the trained model
        tester = Tester(model, data, sess, reslogger)
        tester.evaluate(args['train']['batch_size'])

    # flush
    tf.reset_default_graph()

    # all done
    with open(os.path.join(resdirname, 'done.txt'), 'w') as f:
        f.write('done')
Beispiel #2
0
def load_dirs_config(args, sweep_name=None):
    dirs_config_path = os.path.join(CONFIG_DIR, 'dirs', args['dirconf'])
    dirs = json.load(open(dirs_config_path, 'r'))
    if not sweep_name is None:
        dirs['exp'] = os.path.join(dirs['exp'], sweep_name)
        dirs['log'] = os.path.join(dirs['log'], sweep_name)
    for nm in dirs: make_dir_if_not_exist(dirs[nm])
    return dirs
Beispiel #3
0
def main(fnames, seed, save_dict):
    np.random.seed(seed)
    make_dir_if_not_exist(fnames['data_dir_out'])
    npzname_out = os.path.join(fnames['data_dir_out'], fnames['data_fout_name'])
    n = save_dict['X'].shape[0]
    train_inds, valid_inds, test_inds = get_split_inds(n, test_pct=0.3, valid_pct=0.2)
    split_save_dict = {}
    for inds, phase in [(train_inds, 'train'), (valid_inds, 'valid'), (test_inds, 'test')]:
        for t in save_dict:
            split_save_dict['{}_{}'.format(t, phase)] = save_dict[t][inds]
    save_tensors(split_save_dict, npzname_out)    
Beispiel #4
0
def main(args, dirs, data_kwargs, model_kwargs):
    #get dataset
    data = Dataset(**data_kwargs)
    print('Dataset loaded from {}.'.format(dirs['data']))

    #adaptively set xdim in model
    model_kwargs['xdim'] = data.get_data_dim()

    #get model
    if args['model'] == 'BinaryCFMLP':
        model = BinaryCFMLP(**model_kwargs)
    elif args['model'] == 'BinaryCFDoubleMLP':
        model = BinaryCFDoubleMLP(**model_kwargs)
    else:
        raise Exception('bad model name')
    print('Model loaded.')

    with tf.Session() as sess:
        print('Session created.')
        resdirname = os.path.join(dirs['exp'], args['name'])
        logdirname = os.path.join(dirs['log'], args['name'], 'tb_log')
        ckptdirname = os.path.join(resdirname, 'checkpoints')
        for d in [resdirname, logdirname, ckptdirname]:
            make_dir_if_not_exist(d)

        #create Trainer
        trainer = Trainer(model, data, batch_size=args['batch_size'], sess=sess, logs_path=logdirname, \
                     checkpoint_path=ckptdirname, results_path=resdirname)
        save_path = trainer.train(n_epochs=args['num_epochs'],
                                  patience=args['patience'])
        trainer.restore(save_path)
        trainer.test()

    #save args
    args['resdir'] = resdirname
    args_path = os.path.join(resdirname, 'args.json')
    json.dump(args, open(args_path, 'w'))
Beispiel #5
0
 def __init__(self, dname, saver=None):
     self.dname = dname
     make_dir_if_not_exist(self.dname)
     self.ckptdir = os.path.join(self.dname, 'checkpoints')
     make_dir_if_not_exist(self.ckptdir)
     self.npzdir = os.path.join(self.dname, 'npz')
     make_dir_if_not_exist(self.npzdir)
     self.saver = saver if not saver is None else tf.train.Saver()
     self.testcsv_name = os.path.join(self.dname, 'test_metrics.csv')
     self.testcsv = open(self.testcsv_name, 'w')
Beispiel #6
0
parser = argparse.ArgumentParser(
    description='Plot decompositions through training')
parser.add_argument('-n', '--name', help='experiment name', default='')
parser.add_argument('-dirs',
                    '--dirconf',
                    help='config file for dirs',
                    default='madras-vector.json')
parser.add_argument('-fd', '--figdir', help='dir for figs', default='figs')
args = vars(parser.parse_args())

# get params
dirs = load_dirs_config(args)
expdir = os.path.join(dirs['exp'], args['name'])
figdir = os.path.join(args['figdir'], args['name'])
make_dir_if_not_exist(figdir)

base_groups = {
    'treatment-shift': ['L', 'L_do', 'L_t_shift'],
    'value': ['V', 'V_sample', 'V_treat', 'V_star'],
    'regret': [
        'V_regret_databias', 'V_regret_fnlearn', 'V_regret_fnlearn_unbiased',
        'V_regret_databias_optfn'
    ]
}
a0_groups = {
    'A0-{}'.format(k): ['A0_{}'.format(m) for m in base_groups[k]]
    for k in base_groups
}
a1_groups = {
    'A1-{}'.format(k): ['A1_{}'.format(m) for m in base_groups[k]]
def main(args):
    args = format_transfer_args_as_train_args(args)
    # things with paths
    expdname = args['dirs']['exp_dir']
    expname = args['exp_name']
    logdname = args['dirs']['log_dir']
    resdirname = os.path.join(expdname, expname)
    make_dir_if_not_exist(resdirname)
    logdirname = os.path.join(logdname, expname)
    make_dir_if_not_exist(logdirname)
    npzfile = os.path.join(args['dirs']['data_dir'], args['data']['name'],
                           get_npz_basename(**args['data']))

    # write params (including all overrides) to experiment directory
    with open(os.path.join(resdirname, 'done.txt'), 'w') as f:
        f.write('done')

    npzdname, _ = get_repr_filename(args)
    repr_phase = args['transfer'][
        'repr_phase']  # this will be "Test" or "Valid"
    y_indices = args['transfer']['y_indices'] if hasattr(
        args['transfer']['y_indices'],
        '__iter__') else [args['transfer']['y_indices']]

    if args['transfer']['repr_name'] == 'default':
        base_data = Dataset(npzfile=npzfile,
                            **args['data'],
                            batch_size=args['train']['batch_size'])
        if repr_phase == 'Test':
            reprs = base_data.x_test
            attrs = base_data.attr_test
            y = base_data.y_test
        elif repr_phase == 'Valid':
            reprs = base_data.x_valid
            attrs = base_data.attr_valid
            y = base_data.y_valid
    else:
        # load reprs from LAFTR training as input
        repr_fname = os.path.join(npzdname, 'Z.npz')
        repr_dat = np.load(repr_fname)
        reprs = repr_dat['X']

        attr_fname = os.path.join(npzdname, 'A.npz')
        attr_dat = np.load(attr_fname)
        attrs = attr_dat['X']

        y_fname = os.path.join(npzdname, 'Y.npz')
        y_dat = np.load(y_fname)
        y = y_dat['X']
    print('shapes', reprs.shape, attrs.shape, y.shape)

    for label_index in y_indices:  # this will either be a list of ints or just 'a'
        data = TransferDataset(reprs,
                               attrs,
                               label_index,
                               npzfile=npzfile,
                               Y_loaded=y,
                               phase=repr_phase,
                               **args['data'],
                               batch_size=args['train']['batch_size'])
        model = RegularizedFairClassifier(**args['model'])

        with tf.Session() as sess:
            reslogger = ResultLogger(resdirname)

            # create Trainer
            trainer = Trainer(model,
                              data,
                              sess=sess,
                              expdir=resdirname,
                              logs_path=logdirname,
                              **args['optim'],
                              **args['train'])
            trainer.train(**args['train'])  #, train_metric=train_metric)

            # test model
            tester = Tester(model, data, sess, reslogger)
            tester.evaluate(args['train']['batch_size'])

        # flush
        tf.reset_default_graph()

    # all done
    with open(os.path.join(resdirname, 'done.json'), 'w') as f:
        opt_dumps = dumps(args, indent=4, sort_keys=True)
        f.write(opt_dumps)