def best_arch_search(): model.eval() result_df = pd.DataFrame(columns=['Genotype', 'Val_reward']) ntokens = len(corpus.dictionary) i = 0 hidden = model.init_hidden(eval_batch_size) for m in range(search_arch_num): parallel_model.sample_new_architecture() data, targets = get_batch(val_data, i, args) targets = targets.view(-1) hidden = repackage_hidden(hidden) #log_prob, hidden = parallel_model(data, hidden) #loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data loss, hidden = parallel_model._loss(hidden, data, targets) reward = architect.reward_c / torch.exp(loss) gene = parallel_model.genotype() temp_df = pd.DataFrame([[gene, reward.item()]], columns=['Genotype', 'Val_reward']) result_df = result_df.append(temp_df, ignore_index=True) i += args.bptt if i >= search_data.size(0) - 2: i = 0 result_df = result_df.sort_values(by='Val_reward', ascending=False) result_df.to_csv('search_result.csv')
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, batch_size=10, data_name='dev'): data_source = DataLoader(args.data_dir + '/dev.json', batch_size, opt, vocab, evaluation=True) print('Evaluating Model!') # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 # ntokens = len(corpus.dictionary) # ntokens = len(vocab.word2id) # for i in range(0, data_source.size(0) - 1, args.bptt): predictions = [] for i in range(len(data_source)): batch = data_source.next_batch() batch_size = len(batch['relation']) hidden = model.init_hidden(batch_size)[0] # data, targets = get_batch(data_source, i, args, evaluation=True) data = batch targets = batch['relation'] targets = targets.view(-1) # print('tokens: {} | hidden: {}'.format(batch['tokens'].shape, hidden.shape)) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss( log_prob, targets).data # log_prob.view(-1, log_prob.size(2)) total_loss += loss * len(data) batch_predictions = torch.argmax(log_prob, dim=-1).cpu().data.numpy() batch_predictions = [ id2label[prediction] for prediction in batch_predictions ] predictions += batch_predictions # hidden = repackage_hidden(hidden) precision, recall, f1 = scorer.score(dev_data.gold(), predictions) logging.info('{} set | Precision: {} | Recall: {} | F1: {}'.format( data_name, precision, recall, f1)) print('total loss: {}'.format(total_loss)) return total_loss / len(data_source)
def train_arch(): assert args.batch_size % args.small_batch_size == 0, 'batch_size must be divisible by small_batch_size' # Turn on training mode which enables dropout. total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden_valid = [ model.init_hidden(args.small_batch_size) for _ in range(args.batch_size // args.small_batch_size) ] batch, i = 0, 0 ep_loss = 0 model.eval() while i < search_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths # seq_len = max(5, int(np.random.normal(bptt, 5))) # # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + args.max_seq_len_delta) seq_len = int(bptt) data_valid, targets_valid = get_batch(search_data, i, args) start, end, s_id = 0, args.small_batch_size, 0 while start < args.batch_size: cur_data_valid, cur_targets_valid = data_valid[:, start: end], targets_valid[:, start: end].contiguous( ).view( -1 ) hidden_valid[s_id] = repackage_hidden(hidden_valid[s_id]) parallel_model.sample_new_architecture() if i == 0: for e in model.edge_weights: print(F.softmax(e, dim=-1)) print(F.softmax(model.weights, dim=-1)) print(model.baseline) if (batch + 1) % arch_opt_step == 0: is_opt_step = True else: is_opt_step = False if i == 0: architect.optimizer.zero_grad() hidden_valid[s_id], raw_loss = architect.step( hidden_valid[s_id], cur_data_valid, cur_targets_valid, is_opt_step) raw_loss, hidden_valid[s_id] = model._loss(hidden_valid[s_id], cur_data_valid, cur_targets_valid) raw_loss = raw_loss.detach() loss = raw_loss total_loss += raw_loss.data * args.small_batch_size / args.batch_size ep_loss += raw_loss * len(cur_data_valid) s_id += 1 start = end end = start + args.small_batch_size gc.collect() # total_loss += raw_loss.data if batch % args.log_interval == 0 and batch > 0: logging.info(parallel_model.genotype()) print(F.softmax(parallel_model.weights, dim=-1)) cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time logging.info( '| arch_epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(search_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() batch += 1 i += seq_len #Optimizer step for residual of valid queue if not is_opt_step: architect.optimizer.step() return ep_loss.item() / len(search_data)