def test_tfidf(self): df_train, df_dev, df_test, metadata = get_fake_dataset( with_text_col=True) text_config = Mapping() text_config.mode = 'tfidf' text_config.max_words = 20 encoder = Encoder(metadata, text_config) y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train) y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev) y_test, X_test_struc, X_test_text = encoder.transform(df_test) model_config = get_fake_modelconfig('./outputs_test') model_config.output_dir = os.path.join(model_config.output_dir, 'tfidf_text_only') if not os.path.exists(model_config.output_dir): os.makedirs(model_config.output_dir) model = Model(text_config, model_config) hist = model.train(y_train, X_train_struc, X_train_text, y_train, X_train_struc, X_train_text) val_acc_true = 1.0 self.assertTrue(np.isclose(val_acc_true, hist.history['val_acc'][-1]))
def test_lstm(self): df_train, df_dev, df_test, metadata = get_fake_dataset( with_text_col=True) glove_file_path = 'glove/glove.6B.50d.txt' # need be changed to where you store the pre-trained GloVe file. text_config = Mapping() text_config.mode = 'glove' text_config.max_words = 20 text_config.maxlen = 5 text_config.embedding_dim = 50 text_config.embeddings_index = open_glove( glove_file_path) # need to change encoder = Encoder(metadata, text_config=text_config) y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train) y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev) y_test, X_test_struc, X_test_text = encoder.transform(df_test) text_config.embedding_matrix = encoder.embedding_matrix model_config = get_fake_modelconfig('./outputs_test') model_config.output_dir = os.path.join(model_config.output_dir, 'lstm') if not os.path.exists(model_config.output_dir): os.makedirs(model_config.output_dir) model = Model(text_config, model_config) hist = model.train(y_train, X_train_struc, X_train_text, y_train, X_train_struc, X_train_text) # print(hist.history) # y_dev, X_dev_struc, X_dev_text) val_acc_true = 1.0 self.assertTrue(np.isclose(val_acc_true, hist.history['val_acc'][-1]))
def test_strucdata_only(self): df_train, df_dev, df_test, metadata = get_fake_dataset( with_text_col=False) encoder = Encoder(metadata, text_config=None) y_train, X_train_struc, X_train_text = encoder.fit_transform(df_train) y_dev, X_dev_struc, X_dev_text = encoder.transform(df_dev) y_test, X_test_struc, X_test_text = encoder.transform(df_test) print(X_train_text, X_dev_text, X_test_text) model_config = get_fake_modelconfig('./outputs_test') model_config.output_dir = os.path.join(model_config.output_dir, 'dense_mlp') if not os.path.exists(model_config.output_dir): os.makedirs(model_config.output_dir) model = Model(text_config=None, model_config=model_config) hist = model.train(y_train, X_train_struc, X_train_text, y_train, X_train_struc, X_train_text) val_acc_true = 1.0 self.assertTrue(np.isclose(val_acc_true, hist.history['val_acc'][-1]))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--encoded_data_dir', type=str, # default='/data/home/t-chepan/projects/MS-intern-project/raw_data', help=('directory to load the encoded data.')) # this is optional parser.add_argument('--data_name', type=str, # default='KICK', help=('which data will be used? (kickstarter Or indiegogo?)')) parser.add_argument('--search_space_filepath', type=str, # default='path/to/search_space.json', help=('where to load the search space file?')) parser.add_argument('--output_dir', type=str, # default='path/to/save/outputs', help=('directory to save the trained model and related model_config.')) parser.add_argument('--task_type', type=str, default='classification', help=('what is the type of this task? (classification or regression?)')) parser.add_argument('--num_classes', type=int, # default='classification', help=('what is the number of classes (classification) or outputs (regression)?')) parser.add_argument('--model_type', type=str, default='mlp', help=('what type of NN model you want to try? (mlp or skip_connections?)')) parser.add_argument('--num_trials', type=int, default= 1, help=('how many trials you want to run the model?')) args = parser.parse_args() if args.data_name is not None and args.encoded_data_dir is not None: path_to_data = os.path.join(args.encoded_data_dir, args.data_name) path_to_save = os.path.join(args.output_dir, args.data_name) if not os.path.exists(path_to_save): os.makedirs(path_to_save) elif args.data_name is None and args.encoded_data_dir is not None: path_to_data = args.encoded_data_dir path_to_save = args.output_dir else: raise argparse.ArgumentTypeError(args.data_name + ' or ' + args.encoded_data_dir + " can't be recognized.") ########################################### ## load encoded training set and dev set ## ########################################### y_train_path = os.path.join(path_to_data, 'y_train.npy') if os.path.exists(y_train_path): y_train = np.load(y_train_path, mmap_mode='r') else: raise ValueError('y_train is not found!') X_train_struc_path = os.path.join(path_to_data, 'X_train_struc.npy') if os.path.exists(X_train_struc_path): X_train_struc = np.load(X_train_struc_path, mmap_mode='r') else: X_train_struc = None X_train_text_path = os.path.join(path_to_data, 'X_train_text.npy') if os.path.exists(X_train_text_path): X_train_text = np.load(X_train_text_path, mmap_mode='r') else: X_train_text = None y_dev_path = os.path.join(path_to_data, 'y_dev.npy') if os.path.exists(y_dev_path): y_dev = np.load(y_dev_path, mmap_mode='r') else: raise ValueError('y_dev is not found!') X_dev_struc_path = os.path.join(path_to_data, 'X_dev_struc.npy') if os.path.exists(X_dev_struc_path): X_dev_struc = np.load(X_dev_struc_path, mmap_mode='r') else: X_dev_struc = None X_dev_text_path = os.path.join(path_to_data, 'X_dev_text.npy') if os.path.exists(X_dev_text_path): X_dev_text = np.load(X_dev_text_path, mmap_mode='r') else: X_dev_text = None text_config_path = os.path.join(path_to_data, 'text_config.json') if os.path.exists(text_config_path): with open(text_config_path, 'r') as f: text_config = json.load(f) text_config = Mapping(text_config) else: text_config = None if text_config is not None and text_config.mode == 'glove': embedding_matrix_path = text_config.embedding_matrix_path if os.path.exists(embedding_matrix_path): embedding_matrix = np.load(embedding_matrix_path, mmap_mode='r') text_config.embedding_matrix = embedding_matrix else: raise ValueError('embedding_matrix is not found!') else: embedding_matrix = None ########################################### ## sample model config from search space ## ########################################### if args.task_type is not None and args.num_classes is not None: print('you are choosing ' + args.model_type + ' as the model type!') default_model_config = create_default_modelconfig(args.task_type, args.num_classes, args.model_type, path_to_save) else: raise ValueError('You are missing task_type or num_classes or both!') ## load search space file which is provided by users ## with open(args.search_space_filepath, 'r') as f: search_space = json.load(f) search_space = Mapping(search_space) ####################################################################### ## update default model_config based on search_space and train model ## ####################################################################### for i in range(args.num_trials): model_config = sample_modelconfig(search_space, default_model_config) model_name = 'model_{}'.format(i) print('*' * 20) print('model_config: ' + model_config['output_dir']) model_config = Mapping(model_config) print('*' * 20) print('model_config: ' + model_config.output_dir) model_config.output_dir = os.path.join(default_model_config.output_dir, model_name) if not os.path.exists(model_config.output_dir): os.makedirs(model_config.output_dir) model = Model(text_config, model_config) hist = model.train(y_train, X_train_struc, X_train_text, y_train, X_train_struc, X_train_text) ## save hist.history and model_config ## history_path = os.path.join(model_config.output_dir, 'history.json') with open(history_path, 'w') as hf: json.dump(hist.history, hf) model_config_savepath = os.path.join(model_config.output_dir, 'model_config.json') with open(model_config_savepath, 'w') as mf: json.dump(model_config, mf)
class Trainer(object): def __init__(self, cfg): self.storage = {} self.device = cfg.SOLVER.DEVICE self.max_iter = cfg.SOLVER.MAX_ITERS self.log_dir = cfg.SOLVER.TENSORBOARD_WRITER.LOG_DIR self.base_lr = cfg.SOLVER.LR.BASE_LR optimizer_name = cfg.SOLVER.OPTIMIZER self.weight_decay = cfg.SOLVER.WEIGHT_DECAY self.weights = cfg.SOLVER.WEIGHTS self.image_period = cfg.SOLVER.TENSORBOARD_WRITER.IMAGE_PERIOD self.scalar_period = cfg.SOLVER.TENSORBOARD_WRITER.SCALAR_PERIOD self.save_period = cfg.SOLVER.CHECKPOINT_PERIOD self.save_model_dir = cfg.SOLVER.SAVE_DIR self.model_name = cfg.SOLVER.CHECKPOINT_NAME data_loader = build_train_data_loader(cfg) self._data_loader_iter = iter(data_loader) self.model = Model(cfg, True).train().to(self.device) self.optimizer = self.build_optimizer(optimizer_name, self.model) self.lr_scheduler = build_LRscheduler(self.optimizer, cfg) self.iter = 0 self.writer = None self.tic = 0 self.toc = 0 def build_optimizer(self, name: str, model: torch.nn.Module) -> torch.optim.Optimizer: """No bias decay: Bag of Tricks for Image Classification with Convolutional Neural Networks (https://arxiv.org/pdf/1812.01187.pdf)""" weight_p, bias_p = [], [] for p_name, p in model.named_parameters(): if 'bias' in p_name: bias_p += [p] else: weight_p += [p] parameters = [{ 'params': weight_p, 'weight_decay': self.weight_decay }, { 'params': bias_p, 'weight_decay': 0 }] if name == 'Adam': return torch.optim.Adam(model.parameters(), lr=self.base_lr) if name == 'SGD': return torch.optim.SGD(model.parameters(), lr=self.base_lr) if name == 'SWA': """Stochastic Weight Averaging: Averaging Weights Leads to Wider Optima and Better Generalization (https://arxiv.org/pdf/1803.05407.pdf)""" base_opt = torch.optim.SGD(parameters, lr=self.base_lr) return SWA(base_opt, swa_start=10, swa_freq=5, swa_lr=self.base_lr) def before_train(self): if self.weights != '': checkpoint = torch.load(self.weights) self.model.load_state_dict(checkpoint) if not os.path.exists(self.save_model_dir): os.makedirs(self.save_model_dir) self.writer = SummaryWriter(self.log_dir) self.model.train() def after_train(self): model_name = self.model_name + '_' + str(self.iter) + '.pth' torch.save(self.model.state_dict(), os.path.join(self.save_model_dir, model_name)) def before_step(self): self.tic = time.time() def after_step(self): # 统计时间 self.toc = time.time() iter_time = self.toc - self.tic self.storage['iter_time'] = iter_time # 写tensorboard for key in self.storage: if isinstance(self.storage[key], dict): sub_dict = self.storage[key] for sub_key in sub_dict: value = sub_dict[sub_key] self._write_tensorboard(key + '/' + sub_key, value) else: value = self.storage[key] self._write_tensorboard(key, value) # 保存模型 if self.iter % self.save_period == 0: model_name = self.model_name + '_' + str(self.iter) + '.pth' torch.save(self.model.state_dict(), os.path.join(self.save_model_dir, model_name)) def _write_tensorboard(self, key: str, value: Union[torch.Tensor, int, float]): if isinstance(value, torch.Tensor) and len(value.shape) == 4: if self.iter % self.image_period == 0: self.writer.add_images(key, value, self.iter) elif self.iter % self.scalar_period == 0: self.writer.add_scalar(key, value, self.iter) def train(self, start_iter=0): try: self.before_train() for self.iter in range(start_iter, self.max_iter): self.before_step() self.run_step() self.after_step() self.after_train() finally: self.after_train() def run_step(self): data = next(self._data_loader_iter) total_loss, losses, metries = self.model(data) self.storage['total_loss'] = total_loss self.storage['losses'] = losses self.storage['image'] = data['image'] self.storage['training_mask'] = data['training_mask'] self.storage['metries'] = metries grads = {} self.storage['grads'] = grads self.optimizer.zero_grad() total_loss.backward() self.optimizer.step() self.storage['lr'] = self.lr_scheduler.get_lr()[0] self.lr_scheduler.step() for name, parm in self.model.named_parameters(): if parm.grad is not None: grads[name] = torch.mean(torch.abs(parm.grad))