def __init__(self, state_shape, action_shape, max_action=1., device=torch.device('cpu'), seed=0, batch_size=64, lr=3e-4, discount=0.9, horizon=2048, n_epoch=10, clip_eps=0.2, lam=0.95, coef_ent=0., max_grad_norm=10.): fix_seed(seed) self.actor = GaussianActor(state_shape, action_shape).to(device) self.optim_actor = torch.optim.Adam(self.actor.parameters(), lr=lr) self.critic = Critic(state_shape).to(device) self.optim_critic = torch.optim.Adam(self.critic.parameters(), lr=lr) self.max_action = max_action self.device = device self.batch_size = batch_size self.discount = discount self.horizon = horizon self.n_epoch = n_epoch self.clip_eps = clip_eps self.lam = lam self.coef_ent = coef_ent self.max_grad_norm = max_grad_norm
def main(args): fix_seed(args.seed) model = ResNet18(num_classes=args.num_classes, pretrained=args.pretrain) trainer = Trainer(config=vars(args), model=model) if args.mode == "train": t = time.time() trainer.train() train_time = time.time() - t m, s = divmod(train_time, 60) h, m = divmod(m, 60) print() print("Training Finished.") print("** Total Time: {}-hour {}-minute".format(int(h), int(m))) else: test_model = ResNet18(num_classes=args.num_classes, pretrained=args.pretrain) state_dict = torch.load( glob.glob(os.path.join(args.ckpt_path, "*.pt"))[0]) new_state_dict = OrderedDict() for k, v in state_dict.items(): # dataparallel processing if "module" in k: k = k.replace("module.", "") new_state_dict[k] = v test_model.load_state_dict(new_state_dict) trainer.test(test_model)
def main_worker(rank, ngpus_per_node, hparams): print(f"Use GPU {rank} for training") fix_seed(hparams.seed) hparams.rank = hparams.rank * ngpus_per_node + rank dist.init_process_group( backend=hparams.dist_backend, init_method=hparams.dist_url, world_size=hparams.world_size, rank=hparams.rank, ) scaler = torch.cuda.amp.GradScaler() if hparams.amp else None model = ResNet18() # training phase trainer = Trainer(hparams, model, scaler, rank, ngpus_per_node) version = trainer.fit() # testing phase if rank == 0 and hparams.contain_test: state_dict = torch.load( glob.glob( os.path.join(hparams.ckpt_path, f"version-{version}/best_model_*.pt") )[0] ) trainer.test(state_dict)
def __init__(self, flags): torch.set_default_tensor_type('torch.cuda.FloatTensor') # fix the random seed or not fix_seed() self.setup_path(flags) self.network = mlp.MLPNet(num_classes=flags.num_classes) self.network = self.network.cuda() print(self.network) print('flags:', flags) if not os.path.exists(flags.logs): os.mkdir(flags.logs) flags_log = os.path.join(flags.logs, 'flags_log.txt') write_log(flags, flags_log) self.load_state_dict(flags.state_dict) self.configure(flags)
def main(hparams): fix_seed(hparams.seed) scaler = torch.cuda.amp.GradScaler() if hparams.amp else None model = GloveModel(1, hparams.emb_dimension) # training phase trainer = Trainer(hparams, model, scaler) version = trainer.fit()
def main(args: Namespace): fix_seed(args.seed) model_generator = SeisModel(interp_signal=args.interp_signal, min_thickness=args.min_thickness, max_thickness=args.max_thickness, smoothness=args.smoothness, height=args.height, width=args.width, filename_signal=args.filename_signal) for k in range(args.num_data): model_and_picking = model_generator.get_random_model() np.save(args.data_dir / f'model_{k}.npy', model_and_picking)
def main(hparams): fix_seed(hparams.seed) scaler = torch.cuda.amp.GradScaler() if hparams.amp else None model = ResNet18() # training phase trainer = Trainer(hparams, model, scaler) version = trainer.fit() # testing phase if hparams.contain_test: state_dict = torch.load( glob.glob( os.path.join(hparams.ckpt_path, f"version-{version}/best_model_*.pt"))[0]) trainer.test(state_dict)
def main(args: Namespace): args.freeze = parse_to_dict(args.freeze) args.aug_degree = parse_to_dict(args.aug_degree) results_path = args.log_dir / str(datetime.now()) results_path.mkdir(exist_ok=True, parents=True) write_args(results_path, vars(args)) fix_seed(args.seed) (train_frame, test_frame), labels_num2txt = prepare_dataframes(args.data_root) train_set = TinyImagenetDataset(train_frame) test_set = TinyImagenetDataset(test_frame) model = resnet18_num_classes(pretrained=True, num_classes=200, p_drop=args.prob_drop, type_net=args.arch) classifier = Classifier(net=model) stopper = Stopper(args.n_wrongs, args.delta_wrongs) trainer = Trainer(classifier=classifier, train_set=train_set, test_set=test_set, results_path=results_path, device=args.device, batch_size=args.batch_size, num_workers=args.num_workers, num_visual=args.num_visual, aug_degree=args.aug_degree, lr=args.lr, lr_min=args.lr_min, stopper=stopper, labels_num2txt=labels_num2txt, freeze=args.freeze, weight_decay=args.weight_decay, label_smooth=args.label_smooth, period_cosine=args.period_cosine) trainer.train(num_epoch=args.num_epoch)
def main(args: Namespace): results_path = args.log_dir / str(datetime.now()) results_path.mkdir(exist_ok=True, parents=True) write_args(results_path, vars(args)) fix_seed(args.seed) height_model = 1000 width_model = 24 filenames_train, filenames_valid, filenames_test = split_dataset(args.data_root, args.fracs_dataset) train_set = SeisDataset(filenames_train, height_model=height_model, width_model=width_model, prob_aug=args.prob_aug) valid_set = SeisDataset(filenames_valid, height_model=height_model, width_model=width_model, prob_aug=args.prob_aug) test_set = SeisDataset(filenames_test, height_model=height_model, width_model=width_model, prob_aug=args.prob_aug) net = UNetFB() picker = Picker(net) stopper = Stopper(args.n_wrongs, args.delta_wrongs) trainer = Trainer(picker=picker, results_path=results_path, train_set=train_set, valid_set=valid_set, test_set=test_set, device=args.device, batch_size=args.batch_size, lr=args.lr, freq_valid=args.freq_valid, num_workers=args.num_workers, dt_ms=args.dt_ms, height_model=height_model, width_model=width_model, visual=args.visual, stopper=stopper, weights=torch.tensor(args.weights)) trainer.train(num_epoch=args.num_epoch)
def __init__(self): self.batch_size = 1 self.num_classes = 2 self.unseen_index = 3 self.lr = 0.001 self.inner_loops = 1200 self.step_size = 20 self.weight_decay = 0.00005 self.momentum = 0.9 self.state_dict = '' self.logs = 'logs' self.patch_size = 64 self.test_every = 20 self.test_unseen = 20 self.epochs = 15 self.writer = SummaryWriter(comment='fewshot-baseline') torch.set_default_tensor_type('torch.cuda.DoubleTensor') self.TrainMetaData = [] self.ValidMetaData = [] self.TestMetaData = [] self.FewShotData = [] self.count1 = 0 self.count2 = 0 self.count3 = 0 # fix the random seed or not fix_seed() self.setup_path() self.network = Unet3D_meta_learning.Unet3D() # load the vanilla 3D-Unet self.network = self.network.cuda() #device = torch.device('cuda:0') #self.network = self.network.to(device) #self.network = torch.nn.DataParallel(self.network) self.configure()
def __init__(self, flags): self.batch_size = flags.batch_size self.TrainMetaData = [] self.ValidMetaData = [] self.TestMetaData = [] #torch.set_default_tensor_type('torch.cuda.FloatTensor') # fix the random seed or not fix_seed() self.setup_path(flags) self.network = mlp.Net(num_classes=flags.num_classes) #it was MLPNet before #self.network = self.network.cuda() if not os.path.exists(flags.logs): os.makedirs(flags.logs) flags_log = os.path.join(flags.logs, 'flags_log.txt') write_log(flags, flags_log) # self.load_state_dict(flags.state_dict) self.configure(flags)
def main(hparams): fix_seed(hparams.seed) resultwriter = ResultWriter(hparams.result_path) scaler = torch.cuda.amp.GradScaler() if hparams.amp else None model = ResNet18() # model = ResNet34() # model = ResNet50() # training phase trainer = Trainer(hparams, model, scaler, resultwriter) best_result = trainer.fit() # testing phase if hparams.contain_test: version = best_result["version"] state_dict = torch.load( glob.glob(f"checkpoints/version-{version}/best_model_*.pt")[0]) test_result = trainer.test(state_dict) # save result best_result.update(test_result) resultwriter.update(hparams, **best_result)
def main(rank, hparams, ngpus_per_node: int): fix_seed(hparams.seed) resultwriter = ResultWriter(hparams.result_path) if hparams.distributed: hparams.rank = hparams.rank * ngpus_per_node + rank print(f"Use GPU {hparams.rank} for training") dist.init_process_group( backend=hparams.dist_backend, init_method=hparams.dist_url, world_size=hparams.world_size, rank=hparams.rank, ) # get shared tokenizer and vocab if hparams.distributed: if rank != 0: dist.barrier() tok = BertTokenizer.from_pretrained("bert-base-multilingual-cased") if rank == 0: dist.barrier() else: tok = BertTokenizer.from_pretrained("bert-base-multilingual-cased") # get dataloaders loaders = [ get_loader( tok=tok, batch_size=hparams.batch_size, root_path=hparams.root_path, workers=hparams.workers, max_len=hparams.max_len, mode=mode, distributed=hparams.distributed, ) for mode in ["train", "valid"] ] # get model and initialize model = Transformer( vocab_size=len(tok.vocab), num_enc_block=hparams.n_enc_block, num_dec_block=hparams.n_dec_block, num_head=hparams.num_head, hidden=hparams.hidden, fc_hidden=hparams.fc_hidden, dropout=hparams.dropout, ) for param in model.parameters(): if param.dim() > 1: nn.init.xavier_uniform_(param) # training phase trainer = Trainer(hparams, loaders, model, resultwriter, pad_idx=tok.pad_token_id) best_result = trainer.fit() # testing phase if rank in [-1, 0]: version = best_result["version"] state_dict = torch.load( glob.glob( os.path.join(hparams.ckpt_path, f"version-{version}/best_model_*.pt"))[0], ) test_loader = get_loader( tok=tok, batch_size=hparams.batch_size, root_path=hparams.root_path, workers=hparams.workers, max_len=hparams.max_len, mode="test", ) test_result = trainer.test(test_loader, state_dict) # save result best_result.update(test_result) resultwriter.update(hparams, **best_result)
def main(): root = logging.getLogger() if not torch.cuda.is_available(): root.info('no gpu device available') sys.exit(1) # Fix seed utils.fix_seed(args.seed) root.info('gpu device = %d' % args.gpu) root.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, args.greedy, args.l2) model = model.cuda() root.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # Data loading code train_queue, train_sampler, valid_queue = utils.get_train_validation_loader( args) test_queue = utils.get_test_loader(args) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) best_acc = 0 for epoch in range(args.epochs): lr = scheduler.get_lr()[0] log_value("lr", lr, epoch) root.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() root.info('genotype = %s', genotype) # training architect.alpha_forward = 0 architect.alpha_backward = 0 start_time = time.time() train_acc, train_obj, alphas_time, forward_time, backward_time = \ train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) end_time = time.time() root.info("train time %f", end_time - start_time) root.info("alphas_time %f ", alphas_time) root.info("forward_time %f", forward_time) root.info("backward_time %f", backward_time) root.info("alpha_forward %f", architect.alpha_forward) root.info("alpha_backward %f", architect.alpha_backward) log_value('train_acc', train_acc, epoch) root.info('train_acc %f', train_acc) # validation start_time2 = time.time() valid_acc, valid_obj = infer(valid_queue, model, criterion) end_time2 = time.time() root.info("inference time %f", end_time2 - start_time2) log_value('valid_acc', valid_acc, epoch) root.info('valid_acc %f', valid_acc) # test start = time.time() test_acc, test_obj = infer(test_queue, model, criterion) end = time.time() root.info("inference time %f", end - start) log_value('test_acc', test_acc, epoch) root.info('test_acc %f, test_obj %f', test_acc, test_obj) # update learning rate scheduler.step() is_best = valid_acc > best_acc best_acc = max(valid_acc, best_acc) if is_best: root.info('best valid_acc: {} at epoch: {}, test_acc: {}'.format( best_acc, epoch, test_acc)) root.info('Current best genotype = {}'.format(model.genotype())) utils.save(model, os.path.join(args.save, 'best_weights.pt'))
imgs = batch.to(device) if config.model_type == 'fcn': imgs = torch.flatten(imgs, 1) output = model(imgs) if config.model_type in ['cnn', 'resnet', 'fcn']: output = output elif config.model_type == 'vae': output = output[0] if config.model_type in ['fcn']: loss = eval_loss(output, imgs).sum(-1) else: loss = eval_loss(output, imgs).sum([1, 2, 3]) result.append(loss) anomality = torch.cat(result, axis=0).cpu() anomality = torch.sqrt(anomality).reshape(len(dataset), 1).numpy() df = pd.DataFrame(anomality, columns=['Predicted']) df.to_csv(config.out_file, index_label='Id') logger.info("Testing Completed!") if __name__ == "__main__": device = "cuda" if torch.cuda.is_available() else "cpu" utils.fix_seed(19530615) train(device) test(device)
import torch import utils from config import Configuration from memoryreplay import MemoryReplay from models import BranchingDQN from trainer import Trainer if __name__ == '__main__': args = utils.parse_arguments() # Get configuration config = Configuration(args.configuration) seed = utils.fix_seed(config.seed) # Prepare environment env_module = importlib.import_module('envs') env_class = getattr(env_module, config.env_class) env = env_class(config.env_name, config.action_bins) env.set_seed(seed) # Global initialization torch.cuda.init() device = torch.device( config.device if torch.cuda.is_available() else "cpu") # Information about environments observation_space = env.observation_space.shape[0] action_space = env.action_space.shape[0]
def run(args): print(torch.backends.cudnn.benchmark) torch.backends.cudnn.deterministic = True # Get configuration config = exh.load_json(args.CONFIG) # Global initialization torch.cuda.init() device = torch.device(config['cuda']['device'] if ( torch.cuda.is_available() and config['cuda']['ngpu'] > 0) else "cpu") seed = fix_seed(config['seed']) # Load vocabulary vocab = exh.load_json(config['data']['vocab']) # Prepare references references = exh.read_file(config['data']['test']['captions']) references = prepare_references(references) beam_dataset = CaptioningDataset(config['data']['test'], "beam", vocab, config['sampler']['test']) beam_iterator = DataLoader( beam_dataset, batch_sampler=beam_dataset.sampler, collate_fn=beam_dataset.collate_fn, pin_memory=config['iterator']['test']['pin_memory'], num_workers=config['iterator']['test']['num_workers']) # Prepare model weights = None if len(config['model']['embeddings']) > 0: weights = uvoc.init_weights(vocab, config['model']['emb_dim']) uvoc.glove_weights(weights, config['model']['embeddings'], vocab) model = WGAN(len(vocab['token_list']), config['model'], weights) model.reset_parameters() print("The state dict keys: \n\n", model.state_dict().keys()) model.load_state_dict(torch.load(config['load_dict'])) for param in list(model.parameters()): param.requires_grad = False c = torch.load(config['load_dict']) for x in model.state_dict(): if len(model.state_dict()[x].shape) == 1: model.state_dict()[x][:] = c[x] elif len(model.state_dict()[x].shape) == 2: model.state_dict()[x][:, :] = c[x] model.to(device) fix_seed(config['seed'] + 1) model.train(False) torch.set_grad_enabled(False) model.eval() model.G.emb.weight.data = c['G.emb.weight'] generated_sentences = max_search(model, beam_iterator, vocab, max_len=config['beam_search']['max_len'], device=device) output_file = 'output_argmax' output_sentences = output_file exh.write_text('\n'.join(generated_sentences), output_sentences) score = bleu_score(references, generated_sentences, 4) print(score) generated_sentences = beam_search( [model], beam_iterator, vocab, beam_size=config['beam_search']['beam_size'], max_len=config['beam_search']['max_len'], device=device) output_file = 'output_beam' output_sentences = output_file exh.write_text('\n'.join(generated_sentences), output_sentences) score = bleu_score(references, generated_sentences, 4) print(score)
mlflow_client.log_artifact(run_id, train_log_file_path) if exist_error: mlflow_client.log_artifact(run_id, error_file_path) rmtree(cur_tmp_results_dir, ignore_errors=True) return checkpoint_callback.best_model_score def main(args, tmp_results_dir: str) -> None: study = optuna.create_study(direction='maximize') study.optimize( lambda trial: objective(trial, args, tmp_results_dir), n_trials=args.OPTUNA.N_TRIALS, timeout=args.OPTUNA.TIMEOUT, ) trial = study.best_trial print(f'Best trial value: {trial.value}') print('Params') for k, v in trial.params.items(): print(f'{k}: {v}') if __name__ == '__main__': option = parse_console() args = update_args(cfg_file=option.cfg_file_path) fix_seed(args.SEED) main( args=args, tmp_results_dir=option.tmp_results_dir, )
default=0, type=int, help='warmup epoches for learning rate scheduler') parser.add_argument('--random_seed', default=12345, type=int, help='global random seed') args = parser.parse_args() project_path = '/'.join(os.path.abspath(__file__).split('/')[:-3]) config = configparser.ConfigParser() config.read(os.path.join(project_path, 'config.ini')) # set global random seed fix_seed(args.random_seed) # construct save path name def make_dataset_name(): dataset_name = args.dataset + '_' + args.tagscheme return dataset_name def make_model_name(): model_name = '' if args.use_lstm: model_name += '_bilstm' if model_name != '' else 'bilstm' if args.use_crf: model_name += '_crf' if model_name != '' else 'crf' return model_name
def run(args): # Get configuration config = exh.load_json(args.CONFIG) # Prepare folders for logging logging = config['logging']['activate'] if logging: exh.create_directory("output") output = os.path.join("output", config['logging']['output_folder']) exh.create_directory(output) # Global initialization torch.cuda.init() device = torch.device(config['cuda']['device'] if ( torch.cuda.is_available() and config['cuda']['ngpu'] > 0) else "cpu") seed = fix_seed(config['seed']) # Load vocabulary vocab = exh.load_json(config['data']['vocab']) # Prepare references references = exh.read_file(config['data']['beam']['captions']) references = prepare_references(references) # Prepare datasets and dataloaders training_dataset = CaptioningDataset(config['data']['train'], "train", vocab, config['sampler']['train']) train_iterator = DataLoader( training_dataset, batch_sampler=training_dataset.sampler, collate_fn=training_dataset.collate_fn, pin_memory=config['iterator']['train']['pin_memory'], num_workers=config['iterator']['train']['num_workers']) beam_dataset = CaptioningDataset(config['data']['beam'], "beam", vocab, config['sampler']['beam']) beam_iterator = DataLoader( beam_dataset, batch_sampler=beam_dataset.sampler, collate_fn=beam_dataset.collate_fn, pin_memory=config['iterator']['beam']['pin_memory'], num_workers=config['iterator']['beam']['num_workers']) # Prepare model weights = None if len(config['model']['embeddings']) > 0: weights = uvoc.init_weights(vocab, config['model']['emb_dim']) uvoc.glove_weights(weights, config['model']['embeddings'], vocab) model = WGAN(len(vocab['token_list']), config['model'], weights) # model = WGANBase(len(vocab['token_list']), config['model'], weights) # model = WGANBaseGP(len(vocab['token_list']), config['model'], weights) # model = WGANBaseLip(len(vocab['token_list']), config['model'], weights) # model = RelativisticGAN(len(vocab['token_list']), config['model'], weights) model.reset_parameters() lr = config['model']['optimizers']['lr'] betas = (config['model']['optimizers']['betas']['min'], config['model']['optimizers']['betas']['max']) weight_decay = config['model']['optimizers']['weight_decay'] optim_D = optim.Adam(model.D.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) optim_G = optim.Adam(model.G.parameters(), lr=lr, betas=betas, weight_decay=weight_decay) model.to(device) fix_seed(config['seed'] + 1) generator_trained = config['model']['generator']['train_iteration'] scores = {"BLEU": [], "G_loss_train": [], "D_loss_train": []} max_bleu = config['BLEU']['max_bleu'] bleus = [[]] * max_bleu best_bleu = (0, 1) # torch.autograd.set_detect_anomaly(True) model.train(True) torch.set_grad_enabled(True) # for epoch in range(config['max_epoch']): epoch = 1 cpt = 0 while True: secs = time.time() print("Starting Epoch {}".format(epoch)) iteration = 1 d_batch = 0 g_batch = 0 d_loss = 0 g_loss = 0 for batch in train_iterator: batch.device(device) out = model(batch, optim_G, optim_D, epoch, iteration) d_loss += out['D_loss'] d_batch += 1 g_loss += out['G_loss'] g_batch += 1 iteration += 1 print( "Training : Mean G loss : {} / Mean D loss : {} ({} seconds elapsed)" .format(g_loss / g_batch, d_loss / d_batch, time.time() - secs)) scores['G_loss_train'].append((g_loss / g_batch)) scores['D_loss_train'].append((d_loss / d_batch)) # Validation model.train(False) torch.set_grad_enabled(False) # Beam search print("Beam search...") # generated_sentences = beam_search(model.G, beam_iterator, vocab, config['beam_search'], device) # generated_sentences = beam_search([model], beam_iterator, vocab, beam_size=config['beam_search']['beam_size'], max_len=config['beam_search']['max_len'], device=device) generated_sentences = max_search( model, beam_iterator, vocab, max_len=config['beam_search']['max_len'], device=device) # BLEU score # for n in range(3,max_bleu): # score = bleu_score(references, generated_sentences, n+1) # bleus[n].append(score) # print("BLEU-{} score : {}".format(n+1, score)) score = bleu_score(references, generated_sentences, max_bleu) bleus[max_bleu - 1].append(score) print("BLEU-{} score : {}".format(max_bleu, score)) if score > best_bleu[0]: best_bleu = (score, epoch) filename = 'output_epoch{}_bleu{}'.format(epoch, score) out_file = os.path.join(output, filename) torch.save(model.state_dict(), out_file) print("Best BLEU so far : {} (Epoch {})".format( best_bleu[0], best_bleu[1])) if logging: output_file = 'output_{}'.format(epoch) output_sentences = os.path.join(output, output_file) exh.write_text('\n'.join(generated_sentences), output_sentences) model.train(True) torch.set_grad_enabled(True) print("Epoch finished in {} seconds".format(time.time() - secs)) if epoch - best_bleu[1] == 3: break epoch += 1 if logging: scores['BLEU'] = bleus output_scores = os.path.join(output, 'scores.json') exh.write_json(scores, output_scores) print("Scores saved in {}".format(output_scores))
from preprocess import dataframe_preprocess from dataset import SegmentationDataset from pytorch_lightning_module import ClassifyModel from models import kaeru_classify_model from configs import Configs from utils import fix_seed # sys.path.append(os.environ.get("TOGURO_LIB_PATH")) # from slack import Slack # from sheet import Sheet start = time.time() config = Configs() fix_seed(config.SEED) if __name__ == "__main__": df = dataframe_preprocess(os.path.join(config.input_path, "train.csv")) kf = KFold(n_splits=5, shuffle=True, random_state=config.SEED) for i, (train, test) in enumerate(kf.split(df)): if i == config.fold: train_loc, test_loc = train, test df_train = df.iloc[train_loc] df_valid = df.iloc[test_loc] train_dataset = SegmentationDataset(df_train, image_folder=os.path.join( config.input_path, "train_images"))
# trg = [trg sent len, batch size] # output = [trg sent len, batch size, output dim] output = output[1:].view(-1, output.shape[-1]) trg = trg[1:].view(-1) # trg = [(trg sent len - 1) * batch size] # output = [(trg sent len - 1) * batch size, output dim] loss = criterion(output, trg) epoch_loss += loss.item() return epoch_loss / len(iterator), epoch_acc / total if __name__ == '__main__': fix_seed(args.seed) # def tokenize(sentence): # return [tok for tok in sentence] Q_TEXT = Field(tokenize=lambda sen: list(sen), init_token="<sos>", eos_token="<eos>") A_TEXT = Field(tokenize=lambda sen: list(sen), init_token="<sos>", eos_token="<eos>") # associate the text in the 'Question' column with the Q_TEXT field, # and 'Answer' with A_TEXT field data_fields = [('Question', Q_TEXT), ('Answer', A_TEXT)]