def main(arg_list: List[str]) -> None: args = take_std_args( arg_list, "non-recurrent neural network " "model for Proverbot9001") raw_dataset = get_text_data(args) dataset, tokenizer, embedding = encode_bag_classify_data( raw_dataset, tokenizers[args.tokenizer], args.num_keywords, 2) checkpoints = train(dataset, tokenizer.numTokens(), args.hidden_size, embedding.num_tokens(), args.num_decoder_layers, args.batch_size, args.learning_rate, args.gamma, args.epoch_step, args.num_epochs, args.print_every, optimizers[args.optimizer]) for epoch, (network_state, training_loss) in enumerate(checkpoints): state = { 'epoch': epoch, 'training-loss': training_loss, 'tokenizer': tokenizer, 'embedding': embedding, 'network-state': network_state, 'training-args': args, } with open(args.save_file, 'wb') as f: print("=> Saving checkpoint at epoch {}".format(epoch)) torch.save(state, f)
def main(args_list : List[str]) -> None: parser = argparse.ArgumentParser(description= "A second-tier predictor which predicts tactic " "stems based on word frequency in the goal") parser.add_argument("--context-filter", dest="context_filter", type=str, default="default") parser.add_argument("--num-keywords", dest="num_keywords", type=int, default=100) parser.add_argument("--max-tuples", dest="max_tuples", type=int, default=None) parser.add_argument("scrape_file") parser.add_argument("save_file") args = parser.parse_args(args_list) dataset = get_text_data(args) samples, tokenizer, embedding = encode_bag_classify_data(dataset, tokenizers["no-fallback"], args.num_keywords, 2) classifier, loss = train(samples, embedding.num_tokens()) state = {'stem-embeddings': embedding, 'tokenizer':tokenizer, 'classifier': classifier, 'options': [ ("dataset size", str(len(samples))), ("context filter", args.context_filter), ("training loss", loss), ("# stems", embedding.num_tokens()), ("# tokens", args.num_keywords), ]} with open(args.save_file, 'wb') as f: pickle.dump(state, f)
def _encode_data(self, data : RawDataset, arg_values : argparse.Namespace) \ -> Tuple[ClassifyBagDataset, KNNMetadata]: samples, tokenizer, embedding = \ encode_bag_classify_data(RawDataset(list(self._preprocess_data(data, arg_values))), tokenizers[arg_values.tokenizer], arg_values.num_keywords, 2) return samples, KNNMetadata(embedding, tokenizer, arg_values.tokenizer, len(samples), arg_values.context_filter)
def main(args_list : List[str]) -> None: parser = argparse.ArgumentParser(description= "A second-tier predictor which predicts tactic " "stems based on word frequency in the goal") parser.add_argument("--learning-rate", dest="learning_rate", default=.5, type=float) parser.add_argument("--num-epochs", dest="num_epochs", default=10, type=int) parser.add_argument("--batch-size", dest="batch_size", default=256, type=int) parser.add_argument("--print-every", dest="print_every", default=10, type=int) parser.add_argument("--epoch-step", dest="epoch_step", default=5, type=int) parser.add_argument("--gamma", dest="gamma", default=0.5, type=float) parser.add_argument("--optimizer", default="SGD", choices=list(optimizers.keys()), type=str) parser.add_argument("--context-filter", dest="context_filter", type=str, default="default") parser.add_argument("scrape_file") parser.add_argument("save_file") args = parser.parse_args(args_list) print("Loading dataset...") text_dataset = get_text_data(args) samples, tokenizer, embedding = encode_bag_classify_data(text_dataset, tokenizers["char-fallback"], 100, 2) checkpoints = train(samples, args.learning_rate, args.num_epochs, args.batch_size, embedding.num_tokens(), args.print_every, args.gamma, args.epoch_step, args.optimizer) for epoch, (linear_state, loss) in enumerate(checkpoints, start=1): state = {'epoch':epoch, 'text-encoder':tokenizer, 'linear-state': linear_state, 'stem-embeddings': embedding, 'options': [ ("# epochs", str(epoch)), ("learning rate", str(args.learning_rate)), ("batch size", str(args.batch_size)), ("epoch step", str(args.epoch_step)), ("gamma", str(args.gamma)), ("dataset size", str(len(samples))), ("optimizer", args.optimizer), ("training loss", "{:10.2f}".format(loss)), ("context filter", args.context_filter), ]} with open(args.save_file, 'wb') as f: print("=> Saving checkpoint at epoch {}". format(epoch)) torch.save(state, f)