Example #1
0
def main():
    args = parse_args()

    model = Model()
    with open(args.model, 'rb') as file:
        model.load(file)

    generator = Generator(model)

    if args.output is not None:
        file = open(args.output, 'a', encoding='utf-8')
    else:
        file = stdout

    for i in range(0, args.count):
        text = generator.generate(args.length,
                                  seed=args.seed,
                                  min_n=args.min_n,
                                  n=args.n,
                                  break_on_end=args.break_on_end)
        if args.wrap:
            text = '\n'.join(wrap(text))
        print(text, '\n', file=file)

    if args.output is not None:
        file.close()
Example #2
0
def test_cl_rsd():
    ell = np.arange(2, 500)    
    
    # Anna Porredon
    cl_anna = np.loadtxt('C_ells_bin1_1_linear.txt')

    # Mehdi Rezaie
    cosmo = init_cosmology()    
    z, b, dNdz = init_sample('mock', verb=False)


    th = Model(cosmo)
    th.add_tracer(z, b, dNdz)   
    cl_null = th.compute_cl(ell, fnl=0.0, has_rsd=False, has_fnl=False)
    cl_rsd = th.compute_cl(ell, fnl=0.0, has_rsd=True, has_fnl=False)
    
    ## additional runs
    #cl_fnlp = th.compute_cl(ell, fnl=100.0, has_rsd=True, has_fnl=True)
    #cl_fnln = th.compute_cl(ell, fnl=-100.0, has_rsd=True, has_fnl=True)
    #cls_ccl = run_ccl(cosmo, (z, dNdz), (z, b), ell)
    #cls_ccl_rsd = run_ccl(cosmo, (z, dNdz), (z, b), ell, has_rsd=True)
    
    assert (abs(cl_rsd-cl_anna[2:, 1]) < 1.0e-6).all()   
    
    fig, ax = plt.subplots()
    ax.plot(ell, cl_null, 'C0--', alpha=0.5, label='FFTlog')
    ax.plot(ell, cl_rsd, 'C0-', lw=1, alpha=0.5, label='FFTlog+RSD')
    ax.plot(cl_anna[2:, 0], cl_anna[2:, 1], 'r:', label='Anna')
    
    ## additional curves
    #ax.plot(ell, cl_fnlp, 'C0-.', lw=1, label='FFTlog+RSD (fnl=100)')   
    #ax.plot(ell, cl_fnln, 'C1-.', lw=1, label='FFTlog+RSD (fnl=-100)')    
    #ax.plot(ell, a*cls_ccl, 'C1-', alpha=0.8, label='CCL (Limber)')
    #ax.plot(ell, a*cls_ccl_rsd,'C1--', lw=1, label='CCL+RSD')

    ax.legend(frameon=False, ncol=2, loc='lower left', fontsize=10)
    ax.set(xscale='log', yscale='log', xlabel=r'$\ell$', ylabel=r'C$_{\ell}$')
    ax.tick_params(direction='in', which='both', axis='both', right=True, top=True)
    ax.grid(True, ls=':', color='grey', which='both', lw=0.2)

    # ax0 = fig.add_axes([0.2, 0.2, 0.4, 0.3])
    # add_plot(ax0)
    # ax0.set(xlim=(1.9, 5), ylim=(0.9e-6, 2.1e-6))
    # ax0.set_xticks([2, 3, 4])
    ax.set_ylim(8.0e-8, 2.0e-5)
    fig.savefig('cl_fftlog_ccl_benchmark.png', 
                dpi=300, bbox_inches='tight', facecolor='w')    
Example #3
0
def train(hidden_dim_sweep=(5, 10, 25),
          n_epochs=20,
          out_dir='out',
          data_dir='data',
          device=util.device(),
          Optimizer=optim.Adam,
          seed=42):
    out_dir, data_dir = map(Path, (out_dir, data_dir))
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    tracess = []
    best_trainer = None
    best_loss = util.INF
    vocab = util.Vocab.load(data_dir / 'vocab.txt')
    for hidden_dim in hidden_dim_sweep:
        model = Model(hidden_dim=hidden_dim, vocab=vocab, out_dim=2)
        loss_fn = nn.CrossEntropyLoss()
        optimizer = Optimizer(model.parameters(), lr=1e-4)
        trainer = Trainer(model, loss_fn, vocab, device)
        traces, loss_cur = trainer.train_loop(data_dir=data_dir,
                                              n_epochs=n_epochs,
                                              optimizer=optimizer,
                                              scheduler=None)
        if loss_cur < best_loss:
            best_trainer = trainer
            best_loss = loss_cur
        tracess.append((hidden_dim, traces))

    out_dir.mkdir(exist_ok=True)
    for h, traces in tracess:
        plotting.plot_traces(traces,
                             out=out_dir / f'traces_{h}.png',
                             title=f'Loss,hidden_dim={h}')
        util.jsondump(traces, out_dir / f'traces.dim_{h}.seed_{seed}.json')

    L.info('Best model loss: %s', best_loss)

    model_file = out_dir / 'model.pt'
    L.info('Saving best model to %s', model_file)
    torch.save(best_trainer.model.state_dict(), model_file)
Example #4
0
def test(hidden_dim=25, out_dir='out', data_dir='data', device=util.device()):
    out_dir, data_dir = map(Path, (out_dir, data_dir))
    vocab = util.Vocab.load(data_dir / 'vocab.txt')
    model = Model(hidden_dim=hidden_dim, vocab=vocab)
    model.load_state_dict(torch.load(out_dir / 'model.pt'))
    loss_fn = nn.CrossEntropyLoss()
    trainer = Trainer(model, loss_fn, vocab, device)

    with data.SentPairStream(data_dir / 'dev.tsv') as dev_data:
        dev_loader = torchdata.DataLoader(dev_data,
                                          shuffle=False,
                                          batch_size=8)
        dev_metrics = trainer.eval_(dev_loader)
        L.info('Dev performance: %s', dev_metrics)

    with data.SentPairStream(data_dir / 'test.tsv') as test_data:
        test_loader = torchdata.DataLoader(test_data,
                                           shuffle=False,
                                           batch_size=8)
        test_metrics = trainer.eval_(test_loader)
        L.info('Test performance: %s', test_metrics)
Example #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_name', type=str, required=True)
    parser.add_argument('--vocabulary_file', type=str, required=True)
    parser.add_argument('--output_file', type=str, required=True)

    parser.add_argument('--seed', type=str, default="Once upon a time, ")
    parser.add_argument('--sample_length', type=int, default=1500)
    parser.add_argument('--log_frequency', type=int, default=100)
    args = parser.parse_args()

    model_name = args.model_name
    vocabulary_file = args.vocabulary_file
    output_file = args.output_file
    seed = args.seed.decode('utf-8')
    sample_length = args.sample_length
    log_frequency = args.log_frequency

    model = Model(model_name)
    model.restore()
    classifier = model.get_classifier()

    vocabulary = Vocabulary()
    vocabulary.retrieve(vocabulary_file)

    sample_file = codecs.open(output_file, 'w', 'utf_8')

    stack = deque([])
    for i in range(0, model.sequence_length - len(seed)):
        stack.append(u' ')

    for char in seed:
        if char not in vocabulary.vocabulary:
            print char,"is not in vocabulary file"
            char = u' '
        stack.append(char)
        sample_file.write(char)

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        saver = tf.train.Saver(tf.global_variables())
        ckpt = tf.train.get_checkpoint_state(model_name)

        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

            for i in range(0, sample_length):
                vector = []
                for char in stack:
                    vector.append(vocabulary.binary_vocabulary[char])
                vector = np.array([vector])
                prediction = sess.run(classifier, feed_dict={model.x: vector})
                predicted_char = vocabulary.char_lookup[np.argmax(prediction)]

                stack.popleft()
                stack.append(predicted_char)
                sample_file.write(predicted_char)

                if i % log_frequency == 0:
                    print "Progress: {}%".format((i * 100) / sample_length)

            sample_file.close()
            print "Sample saved in {}".format(output_file)
Example #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--training_file', type=str, required=True)
    parser.add_argument('--vocabulary_file', type=str, required=True)
    parser.add_argument('--model_name', type=str, required=True)

    parser.add_argument('--epoch', type=int, default=200)
    parser.add_argument('--batch_size', type=int, default=50)
    parser.add_argument('--sequence_length', type=int, default=50)
    parser.add_argument('--log_frequency', type=int, default=100)
    parser.add_argument('--learning_rate', type=int, default=0.002)
    parser.add_argument('--units_number', type=int, default=128)
    parser.add_argument('--layers_number', type=int, default=2)
    args = parser.parse_args()

    training_file = args.training_file
    vocabulary_file = args.vocabulary_file
    model_name = args.model_name

    epoch = args.epoch
    batch_size = args.batch_size
    sequence_length = args.sequence_length
    log_frequency = args.log_frequency
    learning_rate = args.learning_rate

    batch = Batch(training_file, vocabulary_file, batch_size, sequence_length)

    input_number = batch.vocabulary.size
    classes_number = batch.vocabulary.size
    units_number = args.units_number
    layers_number = args.layers_number

    print "Start training with epoch: {}, batch_size: {}, log_frequency: {}," \
          "learning_rate: {}".format(epoch, batch_size, log_frequency, learning_rate)

    if not os.path.exists(model_name):
        os.makedirs(model_name)

    model = Model(model_name)
    model.build(input_number, sequence_length, layers_number, units_number, classes_number)
    classifier = model.get_classifier()

    cost = tf.reduce_mean(tf.square(classifier - model.y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    expected_prediction = tf.equal(tf.argmax(classifier, 1), tf.argmax(model.y, 1))
    accuracy = tf.reduce_mean(tf.cast(expected_prediction, tf.float32))

    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)
        iteration = 0

        while batch.dataset_full_passes < epoch:
            iteration += 1
            batch_x, batch_y = batch.get_next_batch()
            batch_x = batch_x.reshape((batch_size, sequence_length, input_number))

            sess.run(optimizer, feed_dict={model.x: batch_x, model.y: batch_y})
            if iteration % log_frequency == 0:
                acc = sess.run(accuracy, feed_dict={model.x: batch_x, model.y: batch_y})
                loss = sess.run(cost, feed_dict={model.x: batch_x, model.y: batch_y})
                print("Iteration {}, batch loss: {:.6f}, training accuracy: {:.5f}".format(iteration * batch_size,
                                                                                           loss, acc))
        batch.clean()
        print("Optimization done")

        saver = tf.train.Saver(tf.global_variables())
        checkpoint_path = "{}/{}.ckpt".format(model_name, model_name)
        saver.save(sess, checkpoint_path, global_step=iteration * batch_size)
        print("Model saved in {}".format(model_name))
                        ], p=1.0)



t_dataset=MelanomaDataset(df=df, imfolder=test,
                          train=False, transforms=test_transform, meta_features=meta_features)

print('Length of test set is {}'.format(len(t_dataset)))

testloader=DataLoader(t_dataset, batch_size=8, shuffle=False, num_workers=8)

"""Testing"""
# model = ResNetModel()()
# model = EfficientModel()
# model = EfficientModel(n_meta_features=len(meta_features))
model = Model(arch='efficientnet-b1')
# model.load_state_dict(torch.load("../checkpoint/fold_1/efficient_256/efficientb0_256_14_0.9212.pth", map_location=torch.device(device)))
model.load_state_dict(torch.load("..//checkpoint/fold_1/efficient_320/efficientb1_320_14_0.9293.pth", map_location=torch.device(device)))
model.to(device)

model.eval()
test_prob_stack = []
img_ids = []
with torch.no_grad():
    for i in range(15):
        test_prob = []
        for img, meta, img_id in tqdm(testloader):
            if train_on_gpu:
                img, meta = img.to(device), meta.to(device)

            logits = model.forward(img)
Example #8
0
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Math
import zeus
from modules import init_cosmology, init_sample, Model
import sys
sys.path.append('/Users/rezaie/github/LSSutils')
from lssutils.utils import histogram_cell

# --- theory
cosmo = init_cosmology()
z, b, dNdz = init_sample()
model = Model(cosmo)
model.add_tracer(z, b, dNdz)

# data
cl_mocks = np.load('cl_mocks_1k.npz', allow_pickle=True)
cl_full = cl_mocks['full']  # select full sky mocks
# bin measurements
bins = np.arange(1, 501, 20)

cl_fullb = []
for i in range(cl_full.shape[0]):
    x, clb_ = histogram_cell(cl_full[i, :], bins=bins)
    cl_fullb.append(clb_)
    #print('.', end='')
cl_fullb = np.array(cl_fullb)
y = cl_fullb.mean(axis=0)

nmocks, nbins = cl_fullb.shape
hf = (nmocks - 1.0) / (nmocks - nbins - 2.0)