Beispiel #1
0
def dict_to_pickle(word_dict,py_dict,num_sum,path):
    with open(path['word_path'],'rb') as outfile:
        pickle.dump(word_path,outfile)
        outfile.close()
    with open(path['py_path'],'rb') as outfile:
        pickle.dump(py_dict,outfile)
        outfile.close()
    with open(path['num_path'],'rb') as outfile:
        pickle.dum(num_sum,outfile)
        outfile.close()
def main(sa):
    sln_filename = sa[0]
    sdf_filename = sa[1]
    sln_fps = FileHandler.SlnFile(sln_filename).get_fingerprint_list()
    sdf_fps = randomly_pick_from_sdf(sdf_filename, 400)
    pain_train, pain_test = train_test_split(sln_fps,
                                             test_size=0.2,
                                             random_state=24)
    control_train, control_test = train_test_split(sdf_fps,
                                                   test_size=0.2,
                                                   random_state=24)
    #rf = train_rf(pain_train + pain_test,
    #              control_train + control_test,
    #              n_est=300, rand_state=1986)
    #pickle.dump(rf, open("rf_n300.p", "wb"))
    #test_rf(pain_test, control_test, rf)
    #control_train = randomly_pick_from_sdf(sdf_filename, 400)
    #pain_test = sln_fps
    optimize_rf(pain_train, control_train)
    rf = train_rf(pain_train, control_train, n_est=300, rand_state=1986)
    pickle.dum(rf, open("rf_n300.p", "wb"))
    test_rf(pain_test, control_test, rf)
Beispiel #3
0
def train(train_mode):
    train_data = load_training()
    print("Total events:", len(train_data))
    print("Total truth particles:", sum([y.shape[0] for x, y in train_data]))

    model = ModuleIDBasedRNN(input_dim=modules + 1,
                             hidden_dim=20,
                             output_dim=modules + 1,
                             batch_size=1,
                             device=device)

    model.to(device)
    print("total parameters:", tunable_parameters(model))

    criterion = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)

    loss_file_name = os.path.join('output', 'loss', 'ModuelRNN.pkg.gz')
    training_mode = True
    if training_mode:
        nepochs = 50
        all_losses = []

        for epoch in tqdm(range(nepochs)):
            # go through half of training data.
            loss_evt = []
            for ievt in tqdm(range(int(len(train_data) / 2))):
                event, truth = train_data[ievt]
                total_loss = 0
                for pID in truth.values:
                    hits = event[event['particle_id'] == pID]['uID'].values
                    input_, target_ = input_target(hits)
                    input_ = input_.to(device)
                    target_ = target_.to(device)

                    model.hidden = model.init_hidden()
                    model.zero_grad()
                    output = model(input_)
                    loss = criterion(output, target_)
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item() / input_.size(1)

                loss_evt.append(total_loss / truth.shape[0])
            all_losses.append(loss_evt)
            torch.save(
                model.state_dict(),
                os.path.join('output', 'model', 'RNNModule_' + str(epoch)))
            with open(
                    os.path.join('output', 'loss', 'RNNModule_' + str(epoch)),
                    'wb') as fp:
                pickle.dum(loss_evt, fp)

        with open(loss_file_name, 'wb') as fp:
            pickle.dump(all_losses, fp)
    else:
        # in testing mode
        with open(loss_file_name, 'rb') as fp:
            all_losses = pickle.load(fp)

        print(all_losses)
Beispiel #4
0
    val_loss = evaluate(val_data)
    print('-' * 89)
    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
            'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                       val_loss, math.exp(val_loss)))
    print('-' * 89)
    # Anneal the learning rate.
    if prev_val_loss and val_loss > prev_val_loss:
        lr /= 4
    prev_val_loss = val_loss


# Run on test data and save the model.
test_loss = evaluate(test_data)
print('=' * 89)
print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
    test_loss, math.exp(test_loss)))
print('=' * 89)
if args.save != '':
    with open(args.save, 'wb') as f:
        torch.save(model, f)

##Saving Embedding ###
embeddings = model.encoder
#Embeddings_to_plot = embeddings.weight.data.numpy()
Embeddings_to_plot = embeddings.weight.data.cpu().numpy()
if args.saveembed != '':
    with open(args.saveembed, 'wb') as f_2:
        pickle.dum(embeddings_to_plot, f_2)