session_val_ids = session_val_ids[:args.n_base] model = MLP(input_dim=len(input_attrs), hidden_dims=args.hidden_dims, output_dim=len(output_attrs), n_heads=args.n_base + n_inc) model.to(device) # restore base model if (args.save_models): # load model stored in statistics of the base training model.load_state_dict(base_stats[-1]) else: if (args.offload_aux_models): # load model from file model.load(model_path_base) else: # load model from internal copy model.load_state_dict(model_base_state_dict) for inc_id in range(n_inc): # update session new_task = sequence.pop(0) session.append(new_task) # update training, testing and valudation sets train_ids, test_ids, val_ids = parts[new_task] # scale training and validation sets to the subset fraction (if specified, defaults to 1.0) train_ids = train_ids[:max(
def run(): # Config config = { 'model_folder': 'tmp', 'embedding_size': 50, 'hidden_size': 25, 'batch_size': 50, 'epochs': 100 } # Data tokens_ted, labels = load_ted_data('ted_en-20160408.xml') tokens_train, tokens_dev, tokens_test = split_dataset(tokens_ted) labels_train, labels_dev, labels_test = split_dataset(labels) train_dataset = TedDataset(tokens_train, labels_train, min_frequency=10) dev_dataset = TedDataset(tokens_dev, labels_dev, vocabulary=train_dataset.vocabulary, raw_output=True) test_dataset = TedDataset(tokens_test, labels_test, vocabulary=train_dataset.vocabulary, raw_output=True) train_dataloader = DataLoader(train_dataset, collate_fn=train_dataset.collate_fn, batch_size=config['batch_size'], num_workers=4) dev_dataloader = DataLoader(dev_dataset, collate_fn=train_dataset.collate_fn, batch_size=config['batch_size'], num_workers=4) test_dataloader = DataLoader(test_dataset, collate_fn=train_dataset.collate_fn, batch_size=config['batch_size'], num_workers=4) # Model model = MLP(config) model.initialize_features(data=train_dataset) model.build_model() # Logger logger = BasicLogger(metric=accuracy_score, score_optimization='max') # Trainer trainer = Trainer(model=model, logger=logger) trainer.fit(train_dataloader, dev_dataloader, epochs=config['epochs']) model.load('{}/{}.torch'.format(model.config['model_folder'], type(model).__name__.lower())) target = [] for batch in test_dataloader: target.extend(batch['output'].tolist()) predictions = trainer.test(test_dataloader) print("Test Accuracy:", accuracy_score(target, predictions))