def __init__(self, params, data_dir, data_encoder, label_encoder=None, device=torch.device('cpu')): # loading dataset_params json_path = os.path.join(data_dir, 'dataset_params.json') assert os.path.isfile( json_path), "No json file found at {}, run build_vocab.py".format( json_path) self.data_encoder = data_encoder self.label_encoder = label_encoder self.data_dir = data_dir params.update(json_path) self.dataset_params = utils.Params(json_path) self.id_to_idx = None self.idx_to_id = None self.device = device
def __init__(self, params, data_dir, data_encoder, label_encoder): """ Loads dataset_params, vocabulary and tags. Ensure you have run `build_vocab.py` on data_dir before using this class. Args: data_dir: (string) directory containing the dataset params: (Params) hyperparameters of the training process. This function modifies params and appends dataset_params (such as vocab size, num_of_tags etc.) to params. """ # loading dataset_params json_path = os.path.join(data_dir, 'dataset_params.json') assert os.path.isfile(json_path), "No json file found at {}, run build_vocab.py".format(json_path) self.data_encoder = data_encoder self.label_encoder = label_encoder self.data_dir = data_dir params.update(json_path) self.dataset_params = utils.Params(json_path) torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230)
# 8. Tune word embeddings: NO # 0. pretrained model dir pretrained_model_dir = 'experiments/st_fracs/kitchen_housewares/st_100_save' all_layer = True # 1. set the device to train on device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 2. Load the parameters from json file args = parser.parse_args() network_params = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( network_params), "No json configuration file found at {}".format( network_params) params = utils.Params(network_params) # use GPU if available params.cuda = torch.cuda.is_available() # 3. Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) np.random.seed(0) # 4. Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # 5. Create the input data pipeline logging.info("Loading the datasets...") # 5.1 specify features from collections import OrderedDict
# 0. pretrained model dir pretrained_model_dir = '../ner/experiments/disease/st_fracs/germeval/st_germeval_100' target_model_dir = '../ner/experiments/disease/st/st_bc5cdr_all' # Use this when the target column is loaded with best model # 1. set the device to train on device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 2. Load the parameters from json file args = parser.parse_args() pretrained_network_params = os.path.join(pretrained_model_dir, 'params.json') # these should be loaded from the pretrained model assert os.path.isfile(pretrained_network_params), "No json configuration file found at {}".format(pretrained_network_params) new_network_params = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(new_network_params), "No json configuration file found at {}".format(new_network_params) pre_params = utils.Params(pretrained_network_params) new_params = utils.Params(new_network_params) # use GPU if available pre_params.cuda = torch.cuda.is_available() new_params.cuda = torch.cuda.is_available() # 3. Set the random seed for reproducible experiments torch.manual_seed(230) if new_params.cuda: torch.cuda.manual_seed(230) np.random.seed(0) # 4. Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # 5. Create the input data pipeline
# 1. set the device to train on device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 2. Load the parameters from json file args = parser.parse_args() c1_network_params = os.path.join(c1_model_dir, 'params.json') # these should be loaded from the pretrained model assert os.path.isfile(c1_network_params), "No json configuration file found at {}".format(c1_network_params) c2_network_params = os.path.join(c2_model_dir, 'params.json') # these should be loaded from the pretrained model assert os.path.isfile(c2_network_params), "No json configuration file found at {}".format(c2_network_params) new_network_params = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(new_network_params), "No json configuration file found at {}".format(new_network_params) c1_params = utils.Params(c1_network_params) c2_params = utils.Params(c2_network_params) new_params = utils.Params(new_network_params) # use GPU if available c1_params.cuda = torch.cuda.is_available() c2_params.cuda = torch.cuda.is_available() new_params.cuda = torch.cuda.is_available() # 3. Set the random seed for reproducible experiments torch.manual_seed(230) if new_params.cuda: torch.cuda.manual_seed(230) np.random.seed(0) # 4. Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log'))