def pre_process(args): # Process training set and use it to decide on the word/character vocabularies word_counter, char_counter = Counter(), Counter() train_examples, train_eval = process_file(args.train_file, "train", word_counter, char_counter) if args.short_test: word_emb_mat = load(args.word_emb_file) word2idx_dict = load(args.word2idx_file) char2idx_dict = load(args.char2idx_file) # these are not used in these models (even for the actual BiDAF) #char_emb_mat = None #char_emb_mat = load(args.char_emb_file, char_emb_mat) else: word_emb_mat, word2idx_dict = get_embedding( word_counter, 'word', emb_file=args.glove_file, vec_size=args.glove_dim, num_vectors=args.glove_num_vecs) char_emb_mat, char2idx_dict = get_embedding(char_counter, 'char', emb_file=None, vec_size=args.char_dim) # Process dev and test sets dev_examples, dev_eval = process_file(args.dev_file, "dev", word_counter, char_counter) build_features(args, train_examples, "train", args.train_record_file, word2idx_dict, char2idx_dict) # dev_examples used in build_features, which writes the npz file used to eval dev_meta = build_features(args, dev_examples, "dev", args.dev_record_file, word2idx_dict, char2idx_dict) if args.include_test_examples: test_examples, test_eval = process_file(args.test_file, "test", word_counter, char_counter) save(args.test_eval_file, test_eval, message="test eval") test_meta = build_features(args, test_examples, "test", args.test_record_file, word2idx_dict, char2idx_dict, is_test=True) save(args.test_meta_file, test_meta, message="test meta") save(args.word2idx_file, word2idx_dict, message="word dictionary") # word2idx.json (seems not to be loaded by test) save(args.word_emb_file, word_emb_mat, message="word embedding") # word_emb.json save(args.train_eval_file, train_eval, message="train eval") # train_eval.json save(args.dev_eval_file, dev_eval, message="dev eval") # dev_eval.json # new for paraphrase reverse lookup idx2word_dict = {value:key for key,value in word2idx_dict.items()} save(args.idx2word_file, idx2word_dict, message="NEW idx to word dictionary") # these are not used in these models (even for the actual BiDAF) #save(args.char_emb_file, char_emb_mat, message="char embedding") # char_emb.json #save(args.char2idx_file, char2idx_dict, message="char dictionary") save(args.dev_meta_file, dev_meta, message="dev meta") # dev_meta.json (not important)
import pygame import setup import pygame.surfarray as surfarray from pygame.locals import * from itertools import cycle FPS = 30 SCREENWIDTH = 288 SCREENHEIGHT = 512 pygame.init() FPSCLOCK = pygame.time.Clock() SCREEN = pygame.display.set_mode((SCREENWIDTH, SCREENHEIGHT)) pygame.display.set_caption('Flappy Bird') IMAGES, SOUNDS, HITMASKS = setup.load() PIPEGAPSIZE = 150 # gap between upper and lower part of pipe BASEY = SCREENHEIGHT * 0.79 PLAYER_WIDTH = IMAGES['player'][0].get_width() PLAYER_HEIGHT = IMAGES['player'][0].get_height() PIPE_WIDTH = IMAGES['pipe'][0].get_width() PIPE_HEIGHT = IMAGES['pipe'][0].get_height() BACKGROUND_WIDTH = IMAGES['background'].get_width() PLAYER_INDEX_GEN = cycle([0, 1, 2, 1]) class GameState: def __init__(self): self.score = self.playerIndex = self.loopIter = 0
import setup setup.load() import reachrAPI import manageFiles import processor import score import recommender processor.parameters['index'] = "codigo" processor.parameters['properties'] = ["desc","titulo","area_atuacao"] processor.parameters['new_path'] = reachrAPI.getJobToRecommend() processor.parameters['current_path'] = reachrAPI.getAllJobs() print(processor.parameters['new_path']) print(processor.parameters['current_path']) score.parameters['properties'] = ["desc","titulo","area_atuacao"] score.parameters['weights'] = {"desc":0.3, "titulo":0.6, "area_atuacao":0.1} score.parameters['input'] = processor.run() recommender.parameters['threshold'] = 0.2 recommender.parameters['selection-step'] = 2 recommender.parameters['key'] = processor.parameters['index'] recommender.parameters['inner-list'] = 'processo_seletivo' recommender.parameters['candidates-list'] = 'candidatos' recommender.parameters['input-jobs'] = manageFiles.read(processor.parameters['current_path']) recommender.parameters['input-similars'] = score.run()
def main(args): # Set up logging and devices (unchanged from train.py) args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) # train only, not in test device, args.gpu_ids = util.get_available_devices() # todo(small): should this be args (compare test_para) log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # args.py: default size is 64 # Set random seed (unchanged) - train only log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Prepare BiDAF model (must already trained) log.info('Building BiDAF model (should be pretrained)') bidaf_model = BiDAF(word_vectors=word_vectors, # todo: these word vectors shouldn't matter? hidden_size=args.hidden_size) # since they will be loaded in during load_model? #drop_prob=args.drop_prob) # no drop probability since we are not training bidaf_model = nn.DataParallel(bidaf_model, args.gpu_ids) if args.short_test: args.hidden_size = 5 elif not args.load_path: log.info("Trying to trian paraphraser withou bidaf model. " "First train BiDAF and then specify the load path. Exiting") exit(1) else: log.info(f'Loading checkpoint from {args.load_path}...') bidaf_model = util.load_model(bidaf_model, args.load_path, args.gpu_ids, return_step=False) # don't need step since we aren't training bidaf_model = bidaf_model.to(device) bidaf_model.eval() # we eval only (vs train) # todo: Setup the Paraphraser model paraphaser_model = Paraphraser(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) # Get data loader log.info('Building dataset...') # New for paraphrase: squad_paraphrase has extra fields train_dataset = SQuAD_paraphrase(args.train_record_file, args.use_squad_v2) # train.npz (from setup.py, build_features()) train_loader = data.DataLoader(train_dataset, # this dataloader used for all epoch iteration batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn_para) dev_dataset = SQuAD_paraphrase(args.dev_record_file, args.use_squad_v2) # dev.npz (same as above) dev_loader = data.DataLoader(dev_dataset, # dev.npz used in evaluate() fcn batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn_para) # todo: this is just for looking at the paraphrases idx2word_dict = load(args.idx2word_file) #Get saver # saver = util.CheckpointSaver(args.save_dir, # max_checkpoints=args.max_checkpoints, # metric_name=args.metric_name, # maximize_metric=args.maximize_metric, # log=log) #Get optimizer and scheduler # ema = util.EMA(paraphaser_model, args.ema_decay) # optimizer = optim.Adadelta(paraphaser_model.parameters(), args.lr, # weight_decay=args.l2_wd) # scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Train step = 0 log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, cphr_idxs, qphr_idxs, qphr_types, ids in train_loader: # Setup for forward # note that cc_idxs, qc_idxs are not used! (character indices) cw_idxs = cw_idxs.to(device) # todo what does this actually do qw_idxs = qw_idxs.to(device) cphr_idxs = cphr_idxs.to(device) qphr_idxs = qphr_idxs.to(device) qphr_types = qphr_types.to(device) batch_size = cw_idxs.size(0) # if args.short_test: # print(f'batch size: {batch_size}') # for i, type in enumerate(cphr_idxs[0]): # print(f'type: {i}') # pp(type) # for x in (qphr_idxs[0], qphr_types[0]): # pp(x) # return paraphrased = paraphaser_model(qphr_idxs, qphr_types, cphr_idxs) for idx, p in enumerate(paraphrased): # enumerate over batch_size non_zeros = p[p.nonzero()].squeeze() #paraphrased[idx] = non_zeros sentence_as_list = [idx2word_dict[str(w.item())] for w in non_zeros] pp(" ".join(sentence_as_list)) #pp([idx2word_dict[w] for w in non_zeros]) if args.short_test: return optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) # // is floor division ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, # call eval with dev_loader args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
from Crypto.Util.number import bytes_to_long, long_to_bytes from base64 import b64encode, b64decode from json import dumps, loads from secret import USERNAME, PASSWORD, FLAG import setup key = setup.load() encrypt = lambda x: b64encode( long_to_bytes(pow(bytes_to_long(x), key['e'], key['n']))) decrypt = lambda x: long_to_bytes( pow(bytes_to_long(b64decode(x)), key['d'], key['n'])) def generate_token(username, password): credential = {'username': username, 'password': password, 'role': 'user'} token = encrypt(dumps(credential)) return token def validate_token(token): try: credential = loads(decrypt(token)) if credential['username'] == USERNAME and credential[ 'password'] == PASSWORD or credential['role'] == 'admin': return FLAG elif credential[ 'username'] == USERNAME and credential['password'] != PASSWORD: return 'incorrect password' else: return 'dear ' + credential[