Ejemplo n.º 1
0
def pre_process(args):
    # Process training set and use it to decide on the word/character vocabularies
    word_counter, char_counter = Counter(), Counter()
    train_examples, train_eval = process_file(args.train_file, "train", word_counter, char_counter)

    if args.short_test:
        word_emb_mat = load(args.word_emb_file)
        word2idx_dict = load(args.word2idx_file)
        char2idx_dict = load(args.char2idx_file)
        # these are not used in these models (even for the actual BiDAF)
        #char_emb_mat = None
        #char_emb_mat = load(args.char_emb_file, char_emb_mat)
    else:
        word_emb_mat, word2idx_dict = get_embedding(
            word_counter, 'word', emb_file=args.glove_file, vec_size=args.glove_dim, num_vectors=args.glove_num_vecs)
        char_emb_mat, char2idx_dict = get_embedding(char_counter, 'char', emb_file=None, vec_size=args.char_dim)

    # Process dev and test sets
    dev_examples, dev_eval = process_file(args.dev_file, "dev", word_counter, char_counter)
    build_features(args, train_examples, "train", args.train_record_file, word2idx_dict, char2idx_dict)

    # dev_examples used in build_features, which writes the npz file used to eval
    dev_meta = build_features(args, dev_examples, "dev", args.dev_record_file, word2idx_dict, char2idx_dict)

    if args.include_test_examples:
        test_examples, test_eval = process_file(args.test_file, "test", word_counter, char_counter)
        save(args.test_eval_file, test_eval, message="test eval")
        test_meta = build_features(args, test_examples, "test",
                                   args.test_record_file, word2idx_dict, char2idx_dict, is_test=True)
        save(args.test_meta_file, test_meta, message="test meta")


    save(args.word2idx_file, word2idx_dict, message="word dictionary")  # word2idx.json (seems not to be loaded by test)
    save(args.word_emb_file, word_emb_mat, message="word embedding")    # word_emb.json
    save(args.train_eval_file, train_eval, message="train eval")        # train_eval.json
    save(args.dev_eval_file, dev_eval, message="dev eval")              # dev_eval.json

    # new for paraphrase reverse lookup
    idx2word_dict = {value:key for key,value in word2idx_dict.items()}
    save(args.idx2word_file, idx2word_dict, message="NEW idx to word dictionary")
    # these are not used in these models (even for the actual BiDAF)
    #save(args.char_emb_file, char_emb_mat, message="char embedding")    # char_emb.json
    #save(args.char2idx_file, char2idx_dict, message="char dictionary")
    save(args.dev_meta_file, dev_meta, message="dev meta")              # dev_meta.json (not important)
Ejemplo n.º 2
0
import pygame
import setup
import pygame.surfarray as surfarray
from pygame.locals import *
from itertools import cycle

FPS = 30
SCREENWIDTH = 288
SCREENHEIGHT = 512

pygame.init()
FPSCLOCK = pygame.time.Clock()
SCREEN = pygame.display.set_mode((SCREENWIDTH, SCREENHEIGHT))
pygame.display.set_caption('Flappy Bird')

IMAGES, SOUNDS, HITMASKS = setup.load()
PIPEGAPSIZE = 150  # gap between upper and lower part of pipe
BASEY = SCREENHEIGHT * 0.79

PLAYER_WIDTH = IMAGES['player'][0].get_width()
PLAYER_HEIGHT = IMAGES['player'][0].get_height()
PIPE_WIDTH = IMAGES['pipe'][0].get_width()
PIPE_HEIGHT = IMAGES['pipe'][0].get_height()
BACKGROUND_WIDTH = IMAGES['background'].get_width()

PLAYER_INDEX_GEN = cycle([0, 1, 2, 1])


class GameState:
    def __init__(self):
        self.score = self.playerIndex = self.loopIter = 0
Ejemplo n.º 3
0
import setup
setup.load()

import reachrAPI
import manageFiles

import processor
import score
import recommender

processor.parameters['index'] = "codigo"
processor.parameters['properties'] = ["desc","titulo","area_atuacao"]

processor.parameters['new_path'] = reachrAPI.getJobToRecommend()
processor.parameters['current_path'] = reachrAPI.getAllJobs()

print(processor.parameters['new_path'])
print(processor.parameters['current_path'])

score.parameters['properties'] =  ["desc","titulo","area_atuacao"]
score.parameters['weights'] = {"desc":0.3, "titulo":0.6, "area_atuacao":0.1}
score.parameters['input'] = processor.run()

recommender.parameters['threshold'] = 0.2
recommender.parameters['selection-step'] = 2
recommender.parameters['key'] = processor.parameters['index']
recommender.parameters['inner-list'] = 'processo_seletivo'
recommender.parameters['candidates-list'] = 'candidatos'

recommender.parameters['input-jobs'] = manageFiles.read(processor.parameters['current_path'])
recommender.parameters['input-similars'] = score.run()
Ejemplo n.º 4
0
def main(args):
    # Set up logging and devices (unchanged from train.py)
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)                  # train only, not in test
    device, args.gpu_ids = util.get_available_devices() # todo(small): should this be args (compare test_para)
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))        # args.py: default size is 64

    # Set random seed (unchanged) - train only
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Prepare BiDAF model (must already trained)
    log.info('Building BiDAF model (should be pretrained)')
    bidaf_model = BiDAF(word_vectors=word_vectors,          # todo: these word vectors shouldn't matter?
                          hidden_size=args.hidden_size)     # since they will be loaded in during load_model?
                          #drop_prob=args.drop_prob)        # no drop probability since we are not training
    bidaf_model = nn.DataParallel(bidaf_model, args.gpu_ids)

    if args.short_test:
        args.hidden_size = 5
    elif not args.load_path:
        log.info("Trying to trian paraphraser withou bidaf model. "
                 "First train BiDAF and then specify the load path. Exiting")
        exit(1)
    else:
        log.info(f'Loading checkpoint from {args.load_path}...')
        bidaf_model = util.load_model(bidaf_model, args.load_path, args.gpu_ids, return_step=False) # don't need step since we aren't training
        bidaf_model = bidaf_model.to(device)
        bidaf_model.eval()                  # we eval only (vs train)

    # todo: Setup the Paraphraser model
    paraphaser_model = Paraphraser(word_vectors=word_vectors,
                                   hidden_size=args.hidden_size,
                                   drop_prob=args.drop_prob)


    # Get data loader
    log.info('Building dataset...')
    # New for paraphrase: squad_paraphrase has extra fields
    train_dataset = SQuAD_paraphrase(args.train_record_file, args.use_squad_v2)    # train.npz (from setup.py, build_features())
    train_loader = data.DataLoader(train_dataset,                       # this dataloader used for all epoch iteration
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn_para)
    dev_dataset = SQuAD_paraphrase(args.dev_record_file, args.use_squad_v2)        # dev.npz (same as above)
    dev_loader = data.DataLoader(dev_dataset,                           # dev.npz used in evaluate() fcn
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn_para)

    # todo: this is just for looking at the paraphrases
    idx2word_dict = load(args.idx2word_file)

    #Get saver
    # saver = util.CheckpointSaver(args.save_dir,
    #                              max_checkpoints=args.max_checkpoints,
    #                              metric_name=args.metric_name,
    #                              maximize_metric=args.maximize_metric,
    #                              log=log)

    #Get optimizer and scheduler
    # ema = util.EMA(paraphaser_model, args.ema_decay)
    # optimizer = optim.Adadelta(paraphaser_model.parameters(), args.lr,
    #                            weight_decay=args.l2_wd)
    # scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR
    # Train
    step = 0
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)


    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, cphr_idxs, qphr_idxs, qphr_types, ids in train_loader:
                # Setup for forward
                # note that cc_idxs, qc_idxs are not used! (character indices)
                cw_idxs = cw_idxs.to(device)        # todo what does this actually do
                qw_idxs = qw_idxs.to(device)

                cphr_idxs = cphr_idxs.to(device)
                qphr_idxs = qphr_idxs.to(device)
                qphr_types = qphr_types.to(device)

                batch_size = cw_idxs.size(0)
                # if args.short_test:
                #     print(f'batch size: {batch_size}')
                #     for i, type in enumerate(cphr_idxs[0]):
                #         print(f'type: {i}')
                #         pp(type)
                #     for x in (qphr_idxs[0], qphr_types[0]):
                #         pp(x)
                #     return

                paraphrased = paraphaser_model(qphr_idxs, qphr_types, cphr_idxs)
                for idx, p in enumerate(paraphrased):   # enumerate over batch_size
                    non_zeros = p[p.nonzero()].squeeze()
                    #paraphrased[idx] = non_zeros
                    sentence_as_list = [idx2word_dict[str(w.item())] for w in non_zeros]
                    pp(" ".join(sentence_as_list))
                    #pp([idx2word_dict[w] for w in non_zeros])

                if args.short_test:
                    return

                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cw_idxs, qw_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)      # // is floor division
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch,
                                         NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR',
                               optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,    # call eval with dev_loader
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Ejemplo n.º 5
0
from Crypto.Util.number import bytes_to_long, long_to_bytes
from base64 import b64encode, b64decode
from json import dumps, loads

from secret import USERNAME, PASSWORD, FLAG
import setup

key = setup.load()
encrypt = lambda x: b64encode(
    long_to_bytes(pow(bytes_to_long(x), key['e'], key['n'])))
decrypt = lambda x: long_to_bytes(
    pow(bytes_to_long(b64decode(x)), key['d'], key['n']))


def generate_token(username, password):
    credential = {'username': username, 'password': password, 'role': 'user'}
    token = encrypt(dumps(credential))
    return token


def validate_token(token):
    try:
        credential = loads(decrypt(token))
        if credential['username'] == USERNAME and credential[
                'password'] == PASSWORD or credential['role'] == 'admin':
            return FLAG
        elif credential[
                'username'] == USERNAME and credential['password'] != PASSWORD:
            return 'incorrect password'
        else:
            return 'dear ' + credential[