Ejemplo n.º 1
0
def setup(wns=None, zns=None):
    if wns is None:
        wns = [100]
    if zns is None:
        zns = [100]

    dic = {}
    for wn, zn in it.product(wns, zns):
        params = load_params()
        wl = params['wl'][0]
        wu = params['wu'][0]
        w_grid = np.linspace(wl, wu, wn)
        params['w_grid'] = w_grid, 'Wage support.'

        ln_dist_lb, _ = params['ln_dist_lb']
        ln_dist_ub, _ = params['ln_dist_ub']
        ln_dist, _ = params['full_ln_dist']

        z_grid = np.linspace(ln_dist_lb, ln_dist_ub, zn)
        params['z_grid'] = z_grid, "Trucnated support of shocks."
        params['wn'] = wn, ' '
        params['zn'] = zn, ' '
        dic[(wn, zn)] = params

    return dic
Ejemplo n.º 2
0
def make_and_save(pi,
                  lambda_,
                  wses=None,
                  nperiods=4,
                  log=True,
                  figkwargs=None,
                  axkwargs=None):
    """
    Wrapper for all the plotting funcitonality.

    Returns an axes and does IO.
    """
    if wses is None:
        params = load_params()
        all_files = ar.get_all_files(params)
        wses = ar.read_output(all_files, kind='ws')

    df = get_df(pi, lambda_, wses[pi, lambda_], params)
    fig, ax = plot_wage_change_dist(df,
                                    pi,
                                    lambda_,
                                    nperiods=nperiods,
                                    log=log,
                                    figkwargs=figkwargs,
                                    axkwargs=axkwargs)
    savefig_(fig, pi, lambda_, nperiods=nperiods, log=log)
    return fig, ax
Ejemplo n.º 3
0
def get_all_files(params=None):
    """Get the files from the results path"""
    if params is None:
        params = load_params()

    pth = params['results_path'][0]
    all_files = os.listdir(pth)
    return all_files
Ejemplo n.º 4
0
def main():
    params = load_params()
    all_files = ar.get_all_files(params)
    wses = ar.read_output(all_files, kind='ws')
    keys = wses.keys()
    pis, lambdas = zip(*keys)  # FTW
    pis_u, lambdas_u = sorted(set(pis)), sorted(set(lambdas))  # unique

    for pi, lambda_ in keys:
        make_and_save(pi, lambda_)
        print('Saved {}, {}'.format(pi, lambda_))
Ejemplo n.º 5
0
 def run():
     """
     This method is the main entry point for this processing block
     """
     # pylint: disable=E1121
     ensure_data_directories_exist()
     params = load_params()  # type: dict
     input_metadata = load_metadata()  # type: FeatureCollection
     lcc = KMeansClustering.from_dict(params)
     result = lcc.process_feature(input_metadata)  # type: FeatureCollection
     save_metadata(result)
Ejemplo n.º 6
0
def main():
    params = load_params()
    all_files = ar.get_all_files(params)
    wses = ar.read_output(all_files, kind='ws')
    keys = wses.keys()
    pis, lambdas = zip(*keys)  # FTW
    pis_u, lambdas_u = sorted(set(pis)), sorted(set(lambdas))  # unique

    for pi, lambda_ in keys:
        make_and_save(pi, lambda_)
        print('Saved {}, {}'.format(pi, lambda_))
Ejemplo n.º 7
0
    def __init__(self, hyperparams):

        self.pi, self.lambda_ = hyperparams
        self.piname = str(self.pi).replace('.', '')
        self.lname = str(self.lambda_).replace('.', '')
        self.out_name = '_'.join([self.piname, self.lname])
        params = load_params()
        params['pi'] = self.pi, 'inflation target'
        params['lambda_'] = self.lambda_, 'rigidity'

        params['call_dir'] = os.getcwd(), 'Path from which the script was called.'
        self.params = params

        self.res_by_run = []
        self.res_by_cat = defaultdict(list)
Ejemplo n.º 8
0
    def __init__(self, hyperparams):

        self.pi, self.lambda_ = hyperparams
        self.piname = str(self.pi).replace('.', '')
        self.lname = str(self.lambda_).replace('.', '')
        self.out_name = '_'.join([self.piname, self.lname])
        params = load_params()
        params['pi'] = self.pi, 'inflation target'
        params['lambda_'] = self.lambda_, 'rigidity'

        params['call_dir'] = os.getcwd(
        ), 'Path from which the script was called.'
        self.params = params

        self.res_by_run = []
        self.res_by_cat = defaultdict(list)
Ejemplo n.º 9
0
def make_and_save(pi, lambda_, wses=None, nperiods=4, log=True, figkwargs=None,
                  axkwargs=None):
    """
    Wrapper for all the plotting funcitonality.

    Returns an axes and does IO.
    """
    if wses is None:
        params = load_params()
        all_files = ar.get_all_files(params)
        wses = ar.read_output(all_files, kind='ws')

    df = get_df(pi, lambda_, wses[pi, lambda_], params)
    fig, ax = plot_wage_change_dist(df, pi, lambda_, nperiods=nperiods,
                                    log=log, figkwargs=figkwargs,
                                    axkwargs=axkwargs)
    savefig_(fig, pi, lambda_, nperiods=nperiods, log=log)
    return fig, ax
Ejemplo n.º 10
0
def get_g(pi, lambda_, period=28):
    """
    Helper function to get to a wage distribution.

    Warning: Will not touch the params in your global state.
    If you go on the to more things make sure to adjust those params.
    """
    import analyze_run as ar
    params = load_params()
    params['pi'] = pi, 'a'
    params['lambda_'] = lambda_, 'b'

    all_files = ar.get_all_files(params)
    wses = ar.read_output(all_files, kind='ws')
    ws = wses[(pi, lambda_)]
    pths, shks = sample_path(ws, params, nseries=1000, nperiods=30, seed=42)

    pth, shocks = pths[28], shks[28]
    shocks = np.sort(shocks)
    g = ecdf(np.sort(pth))
    return g, shocks
Ejemplo n.º 11
0
def get_g(pi, lambda_, period=28):
    """
    Helper function to get to a wage distribution.

    Warning: Will not touch the params in your global state.
    If you go on the to more things make sure to adjust those params.
    """
    import analyze_run as ar
    params = load_params()
    params['pi'] = pi, 'a'
    params['lambda_'] = lambda_, 'b'

    all_files = ar.get_all_files(params)
    wses = ar.read_output(all_files, kind='ws')
    ws = wses[(pi, lambda_)]
    pths, shks = sample_path(ws, params, nseries=1000, nperiods=30, seed=42)

    pth, shocks = pths[28], shks[28]
    shocks = np.sort(shocks)
    g = ecdf(np.sort(pth))
    return g, shocks
Ejemplo n.º 12
0
def wage_dist_ecdf_refactor():
    """
    This implements the refactor of gp's to ecdfs.

    Also takes care of the output.
    """
    with open('results/fixup_notice.txt', 'a') as f:
        t = str(datetime.datetime.now())
        f.write("FIXED gps AT {}\n".format(t))

    params = load_params()
    params['results_path/'] = 'results/', 'a'
    all_files = ar.get_all_files(params)
    gps = ar.read_output(all_files, kind='gp')
    wses = ar.read_output(all_files, kind='ws')
    z_grid = params['z_grid'][0]
    flex_ws = Interp(z_grid, ss_wage_flexible(params, shock=z_grid))

    for key in gps.iterkeys():
        piname, lambda_ = [str(x).replace('.', '') for x in key]
        out_name = 'results/gp_' + piname + '_' + lambda_ + '.pkl'
        shutil.copy2(out_name, 'results/replaced_results/')
        ws = wses[key]
        params['pi'] = key[0], 'you'
        params['lambda_'] = key[1], 'idiot'
        new_g, shocks = get_new_g(ws, params)

        with open(out_name, 'w') as f:
            cPickle.dump(new_g, f)

        print("Fixed wage distribution for {}.".format(key))

        new_rigid_out = get_rigid_output(ws, params, flex_ws, new_g, shocks)

        out_name = 'results/rigid_output_' + piname + '_' + lambda_ + '_.txt'
        with open(out_name, 'w') as f:
            f.write(str(new_rigid_out))

        with open('results/fixup_notice.txt', 'a') as f:
            f.write("Fixed {}\n".format(key))
Ejemplo n.º 13
0
def train_lstm(
    dim_proj=128,  # word embeding dimension and LSTM number of hidden units.
    patience=10,  # Number of epoch to wait before early stop if no progress
    max_epochs=5000,  # The maximum number of epoch to run
    dispFreq=10,  # Display to stdout the training progress every N updates
    decay_c=0.,  # Weight decay for the classifier applied to the U weights.
    lrate=0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
    n_words=10000,  # Vocabulary size
    optimizer=adadelta,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
    encoder='lstm',  # TODO: can be removed must be lstm.
    saveto='lstm_model.npz',  # The best model will be saved there
    validFreq=370,  # Compute the validation error after this number of update.
    saveFreq=1110,  # Save the parameters after every saveFreq updates
    maxlen=100,  # Sequence longer then this get ignored
    batch_size=16,  # The batch size during training.
    valid_batch_size=64,  # The batch size used for validation/test set.
    dataset='imdb',

    # Parameter for extra option
    noise_std=0.,
    use_dropout=True,  # if False slightly faster, but worst test error
                       # This frequently need a bigger model.
    reload_model=None,  # Path to a saved model we want to start from.
    test_size=-1,  # If >0, we keep only this number of test example.
):

    # Model options
    model_options = locals().copy()
    print "model options", model_options

    load_data, prepare_data = get_dataset(dataset)

    print 'Loading data'
    train, valid, test = load_data(n_words=n_words, valid_portion=0.05,
                                   maxlen=maxlen)
    if test_size > 0:
        # The test set is sorted by size, but we want to keep random
        # size example.  So we must select a random selection of the
        # examples.
        idx = numpy.arange(len(test[0]))
        numpy.random.shuffle(idx)
        idx = idx[:test_size]
        test = ([test[0][n] for n in idx], [test[1][n] for n in idx])

    ydim = numpy.max(train[1]) + 1

    model_options['ydim'] = ydim

    print 'Building model'
    # This create the initial parameters as numpy ndarrays.
    # Dict name (string) -> numpy ndarray
    params = init_params(model_options)

    if reload_model:
        load_params('lstm_model.npz', params)

    # This create Theano Shared Variable from the parameters.
    # Dict name (string) -> Theano Tensor Shared Variable
    # params and tparams have different copy of the weights.
    tparams = init_tparams(params)

    # use_noise is for dropout
    (use_noise, x, mask,
     y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options)

    if decay_c > 0.:
        decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c')
        weight_decay = 0.
        weight_decay += (tparams['U'] ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    f_cost = theano.function([x, mask, y], cost, name='f_cost')

    grads = T.grad(cost, wrt=tparams.values())
    f_grad = theano.function([x, mask, y], grads, name='f_grad')

    lr = T.scalar(name='lr')
    f_grad_shared, f_update = optimizer(lr, tparams, grads,
                                        x, mask, y, cost)

    print 'Optimization'

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    print "%d train examples" % len(train[0])
    print "%d valid examples" % len(valid[0])
    print "%d test examples" % len(test[0])

    history_errs = []
    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size
    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    uidx = 0  # the number of update done
    estop = False  # early stop
    start_time = time.time()
    try:
        for eidx in xrange(max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(1.)

                # Select the random examples for this minibatch
                y = [train[1][t] for t in train_index]
                x = [train[0][t]for t in train_index]

                # Get the data in numpy.ndarray format
                # This swap the axis!
                # Return something of shape (minibatch maxlen, n samples)
                x, mask, y = prepare_data(x, y)
                n_samples += x.shape[1]

                cost = f_grad_shared(x, mask, y)
                f_update(lrate)

                if numpy.isnan(cost) or numpy.isinf(cost):
                    print 'NaN detected'
                    return 1., 1., 1.

                if numpy.mod(uidx, dispFreq) == 0:
                    print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost

                if saveto and numpy.mod(uidx, saveFreq) == 0:
                    print 'Saving...',

                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    numpy.savez(saveto, history_errs=history_errs, **params)
                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                    print 'Done'

                if numpy.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)
                    train_err = pred_error(f_pred, prepare_data, train, kf)
                    valid_err = pred_error(f_pred, prepare_data, valid,
                                           kf_valid)
                    test_err = pred_error(f_pred, prepare_data, test, kf_test)

                    history_errs.append([valid_err, test_err])

                    if (uidx == 0 or
                        valid_err <= numpy.array(history_errs)[:,
                                                               0].min()):

                        best_p = unzip(tparams)
                        bad_counter = 0

                    print ('Train ', train_err, 'Valid ', valid_err,
                           'Test ', test_err)

                    if (len(history_errs) > patience and
                        valid_err >= numpy.array(history_errs)[:-patience,
                                                               0].min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            print 'Early Stop!'
                            estop = True
                            break

            print 'Seen %d samples' % n_samples

            if estop:
                break

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.time()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)

    use_noise.set_value(0.)
    kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
    train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted)
    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
    test_err = pred_error(f_pred, prepare_data, test, kf_test)

    print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
    if saveto:
        numpy.savez(saveto, train_err=train_err,
                    valid_err=valid_err, test_err=test_err,
                    history_errs=history_errs, **best_p)
    print 'The code run for %d epochs, with %f sec/epochs' % (
        (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
    print >> sys.stderr, ('Training took %.1fs' %
                          (end_time - start_time))
    return train_err, valid_err, test_err
Ejemplo n.º 14
0
    saver = tf.train.Saver()
    saver.save(sess, save_dir)
    print('Model Trained and Saved')


# In[ ]:


helpers.save_params((seq_length, save_dir))


# In[ ]:


_, vocab_to_int, int_to_vocab, token_dict = helpers.load_preprocess()
seq_length, load_dir = helpers.load_params()


# In[ ]:




def get_tensors(loaded_graph):
    """
    Get input, initial state, final state, and probabilities tensor from <loaded_graph>
    :param loaded_graph: TensorFlow graph loaded from file
    :return: Tuple (InputTensor, InitialStateTensor, FinalStateTensor, ProbsTensor)
    """
    InputTensor = loaded_graph.get_tensor_by_name("input:0")
    InitialStateTensor = loaded_graph.get_tensor_by_name("initial_state:0")
Ejemplo n.º 15
0
        new_dir = 'previous' + str(new_num)
    except ValueError:
        new_dir = 'previous0'

    os.makedirs(os.path.join('results', new_dir))

    for f in old_files:
        shutil.copy2(os.path.join('.', f), os.path.join('.', new_dir, f))


if __name__ == '__main__':
    import sys
    params_path = sys.argv[1]
    # keep load_params outside so that each fork has the same random seed.
    np.random.seed(42)
    params = load_params(params_path)

    try:
        os.makedirs('./results/intermediate')
    except OSError:
        pass

    move_prior_runs()
    write_metadeta(params)

    pi_low = params['pi_low'][0]
    pi_high = params['pi_high'][0]
    pi_n = params['pi_n'][0]
    pi_grid = np.linspace(pi_low, pi_high, pi_n)
    # Parallel(n_jobs=-1)(delayed(iter_bellman_wrapper)(unique_params)
    # for unique_params in unique_param_generator(params))
Ejemplo n.º 16
0
        new_num = np.max(old_nums) + 1
        new_dir = 'previous' + str(new_num)
    except ValueError:
        new_dir = 'previous0'

    os.makedirs(os.path.join('results', new_dir))

    for f in old_files:
        shutil.copy2(os.path.join('.', f), os.path.join('.', new_dir, f))

if __name__ == '__main__':
    import sys
    params_path = sys.argv[1]
    # keep load_params outside so that each fork has the same random seed.
    np.random.seed(42)
    params = load_params(params_path)

    try:
        os.makedirs('./results/intermediate')
    except OSError:
        pass

    move_prior_runs()
    write_metadeta(params)

    pi_low = params['pi_low'][0]
    pi_high = params['pi_high'][0]
    pi_n = params['pi_n'][0]
    pi_grid = np.linspace(pi_low, pi_high, pi_n)
    # Parallel(n_jobs=-1)(delayed(iter_bellman_wrapper)(unique_params)
                        # for unique_params in unique_param_generator(params))