Example #1
0
def main(args):
    # things with paths
    expdname = args['dirs']['exp_dir']
    expname = args['exp_name']
    logdname = args['dirs']['log_dir']
    resdirname = os.path.join(expdname, expname)
    #logdirname = os.path.join(logdname, expname)
    logdirname = resdirname
    make_dir_if_not_exist(logdirname, remove=True)
    make_dir_if_not_exist(resdirname)
    npzfile = os.path.join(args['dirs']['data_dir'],
                           args['data']['name'],
                           get_npz_basename(**args['data']))

    # write params (including all overrides) to experiment directory
    with open(os.path.join(resdirname, 'opt.json'), 'w') as f:
        opt_dumps = dumps(args, indent=4, sort_keys=True)
        f.write(opt_dumps)

    if args['data']['use_attr']:
        args['model'].update(xdim=args['model']['xdim']+1)

    #get dataset
    data = Dataset(npzfile=npzfile, **args['data'], batch_size=args['train']['batch_size'])

    # get model
    if 'Weighted' in args['model']['class']:
        A_weights = [1. / x for x in data.get_A_proportions()]
        Y_weights = [1. / x for x in data.get_Y_proportions()]
        AY_weights = [[1. / x for x in L] for L in data.get_AY_proportions()]
        if 'Eqopp' in args['model']['class']:
            #we only care about ppl with Y = 0 --- those who didn't get sick
            AY_weights[0][1] = 0. #AY_weights[0][1]
            AY_weights[1][1] = 0. #AY_weights[1][1]
        args['model'].update(A_weights=A_weights, Y_weights=Y_weights, AY_weights=AY_weights)
    model_class = getattr(models, args['model'].pop('class'))
    print(args['model'])
    model = model_class(**args['model'], batch_size=args['train']['batch_size'])

    with tf.Session() as sess:
        reslogger = ResultLogger(resdirname)

        #create Trainer
        trainer = Trainer(model, data, sess=sess, expdir=resdirname, logs_path=logdirname,
                          **args['optim'], **args['train'])

        # training
        trainer.train(**args['train'])

        # test the trained model
        tester = Tester(model, data, sess, reslogger)
        tester.evaluate(args['train']['batch_size'])

    # flush
    tf.reset_default_graph()

    # all done
    with open(os.path.join(resdirname, 'done.txt'), 'w') as f:
        f.write('done')
def main(args):
    args = format_transfer_args_as_train_args(args)
    # things with paths
    expdname = args['dirs']['exp_dir']
    expname = args['exp_name']
    logdname = args['dirs']['log_dir']
    resdirname = os.path.join(expdname, expname)
    make_dir_if_not_exist(resdirname)
    logdirname = os.path.join(logdname, expname)
    make_dir_if_not_exist(logdirname)
    npzfile = os.path.join(args['dirs']['data_dir'], args['data']['name'],
                           get_npz_basename(**args['data']))

    # write params (including all overrides) to experiment directory
    with open(os.path.join(resdirname, 'done.txt'), 'w') as f:
        f.write('done')

    npzdname, _ = get_repr_filename(args)
    repr_phase = args['transfer'][
        'repr_phase']  # this will be "Test" or "Valid"
    y_indices = args['transfer']['y_indices'] if hasattr(
        args['transfer']['y_indices'],
        '__iter__') else [args['transfer']['y_indices']]

    if args['transfer']['repr_name'] == 'default':
        base_data = Dataset(npzfile=npzfile,
                            **args['data'],
                            batch_size=args['train']['batch_size'])
        if repr_phase == 'Test':
            reprs = base_data.x_test
            attrs = base_data.attr_test
            y = base_data.y_test
        elif repr_phase == 'Valid':
            reprs = base_data.x_valid
            attrs = base_data.attr_valid
            y = base_data.y_valid
    else:
        # load reprs from LAFTR training as input
        repr_fname = os.path.join(npzdname, 'Z.npz')
        repr_dat = np.load(repr_fname)
        reprs = repr_dat['X']

        attr_fname = os.path.join(npzdname, 'A.npz')
        attr_dat = np.load(attr_fname)
        attrs = attr_dat['X']

        y_fname = os.path.join(npzdname, 'Y.npz')
        y_dat = np.load(y_fname)
        y = y_dat['X']
    print('shapes', reprs.shape, attrs.shape, y.shape)

    for label_index in y_indices:  # this will either be a list of ints or just 'a'
        data = TransferDataset(reprs,
                               attrs,
                               label_index,
                               npzfile=npzfile,
                               Y_loaded=y,
                               phase=repr_phase,
                               **args['data'],
                               batch_size=args['train']['batch_size'])
        model = RegularizedFairClassifier(**args['model'])

        with tf.Session() as sess:
            reslogger = ResultLogger(resdirname)

            # create Trainer
            trainer = Trainer(model,
                              data,
                              sess=sess,
                              expdir=resdirname,
                              logs_path=logdirname,
                              **args['optim'],
                              **args['train'])
            trainer.train(**args['train'])  #, train_metric=train_metric)

            # test model
            tester = Tester(model, data, sess, reslogger)
            tester.evaluate(args['train']['batch_size'])

        # flush
        tf.reset_default_graph()

    # all done
    with open(os.path.join(resdirname, 'done.json'), 'w') as f:
        opt_dumps = dumps(args, indent=4, sort_keys=True)
        f.write(opt_dumps)