# 5.1.1 encoders for the new model logging.info('creating and loading data loaders') data_encoder = OrderedDict() data_encoder[WordEncoder.FEATURE_NAME] = WordEncoder(os.path.join(args.data_dir, 'feats'), dim=new_params.embedding_dim) label_encoder = OrderedDict() label_encoder[ClassEncoder.FEATURE_NAME] = ClassEncoder(os.path.join(args.data_dir, 'feats')) new_params.data_feats = [] new_params.label_feats = [] for feat in data_encoder: new_params.data_feats.append(feat) for feat in label_encoder: new_params.label_feats.append(feat) # 5.1.2 encoders for the c1 model c1_data_encoder = utils.load_obj(os.path.join(c1_model_dir, 'data_encoder.pkl')) c1_label_encoder = utils.load_obj(os.path.join(c1_model_dir, 'label_encoder.pkl')) c1_params.data_feats = [] c1_params.label_feats = [] for feat in c1_data_encoder: c1_params.data_feats.append(feat) for feat in c1_label_encoder: c1_params.label_feats.append(feat) # 5.1.3 encoders for the c2 model c2_data_encoder = utils.load_obj(os.path.join(c2_model_dir, 'data_encoder.pkl')) c2_label_encoder = utils.load_obj(os.path.join(c2_model_dir, 'label_encoder.pkl')) c2_params.data_feats = [] c2_params.label_feats = [] for feat in c2_data_encoder: c2_params.data_feats.append(feat)
params.cuda = torch.cuda.is_available() # 3. Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) np.random.seed(0) # 4. Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # 5. Create the input data pipeline logging.info("Loading the datasets...") # 5.1 specify features from collections import OrderedDict data_encoder = utils.load_obj( os.path.join(pretrained_model_dir, 'data_encoder.pkl')) pretrained_label_encoder = utils.load_obj( os.path.join(pretrained_model_dir, 'label_encoder.pkl')) label_encoder = OrderedDict() label_encoder[ClassEncoder.FEATURE_NAME] = ClassEncoder( os.path.join(args.data_dir, 'feats')) # 5.2 load data k_fold = None combine_train_dev = False train_on_dev = False data_loader = DataLoader(params, args.data_dir, data_encoder, label_encoder) if k_fold:
params.cuda = torch.cuda.is_available() # 3. Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) np.random.seed(0) # 4. Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # 5. Create the input data pipeline logging.info("Loading the datasets...") # 5.1 specify features from collections import OrderedDict data_encoder = utils.load_obj( os.path.join(args.model_dir, 'data_encoder.pkl')) label_encoder = utils.load_obj( os.path.join(args.model_dir, 'label_encoder.pkl')) # 5.2 load data data_loader = DataLoader(params, args.data_dir, data_encoder, label_encoder) data = data_loader.load_data(['test']) test_data = data['test'] # 5.3 specify the train and val dataset sizes params.test_size = test_data['size'] test_data_iterator = data_loader.batch_iterator(test_data, params, shuffle=False) logging.info("- done.") # 6. Modeling