def __init__(self,
                 params,
                 data_dir,
                 data_encoder,
                 label_encoder=None,
                 device=torch.device('cpu')):

        # loading dataset_params
        json_path = os.path.join(data_dir, 'dataset_params.json')
        assert os.path.isfile(
            json_path), "No json file found at {}, run build_vocab.py".format(
                json_path)
        self.data_encoder = data_encoder
        self.label_encoder = label_encoder
        self.data_dir = data_dir
        params.update(json_path)
        self.dataset_params = utils.Params(json_path)
        self.id_to_idx = None
        self.idx_to_id = None
        self.device = device
    def __init__(self, params, data_dir, data_encoder, label_encoder):
        """
        Loads dataset_params, vocabulary and tags. Ensure you have run `build_vocab.py` on data_dir before using this
        class.

        Args:
            data_dir: (string) directory containing the dataset
            params: (Params) hyperparameters of the training process. This function modifies params and appends
                    dataset_params (such as vocab size, num_of_tags etc.) to params.
        """

        # loading dataset_params
        json_path = os.path.join(data_dir, 'dataset_params.json')
        assert os.path.isfile(json_path), "No json file found at {}, run build_vocab.py".format(json_path)
        self.data_encoder = data_encoder
        self.label_encoder = label_encoder
        self.data_dir = data_dir
        params.update(json_path)
        self.dataset_params = utils.Params(json_path)
        torch.manual_seed(230)
        if params.cuda: torch.cuda.manual_seed(230)
Esempio n. 3
0
    # 8. Tune word embeddings: NO

    # 0. pretrained model dir
    pretrained_model_dir = 'experiments/st_fracs/kitchen_housewares/st_100_save'
    all_layer = True

    # 1. set the device to train on
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 2. Load the parameters from json file
    args = parser.parse_args()
    network_params = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        network_params), "No json configuration file found at {}".format(
            network_params)
    params = utils.Params(network_params)
    # use GPU if available
    params.cuda = torch.cuda.is_available()

    # 3. Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)
    np.random.seed(0)

    # 4. Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # 5. Create the input data pipeline
    logging.info("Loading the datasets...")
    # 5.1 specify features
    from collections import OrderedDict
    # 0. pretrained model dir
    pretrained_model_dir = '../ner/experiments/disease/st_fracs/germeval/st_germeval_100'
    target_model_dir = '../ner/experiments/disease/st/st_bc5cdr_all'  # Use this when the target column is loaded with best model

    # 1. set the device to train on
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 2. Load the parameters from json file
    args = parser.parse_args()
    pretrained_network_params = os.path.join(pretrained_model_dir, 'params.json') # these should be loaded from the pretrained model
    assert os.path.isfile(pretrained_network_params), "No json configuration file found at {}".format(pretrained_network_params)
    new_network_params = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(new_network_params), "No json configuration file found at {}".format(new_network_params)

    pre_params = utils.Params(pretrained_network_params)
    new_params = utils.Params(new_network_params)

    # use GPU if available
    pre_params.cuda = torch.cuda.is_available()
    new_params.cuda = torch.cuda.is_available()

    # 3. Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if new_params.cuda: torch.cuda.manual_seed(230)
    np.random.seed(0)

    # 4. Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # 5. Create the input data pipeline
Esempio n. 5
0
    # 1. set the device to train on
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 2. Load the parameters from json file
    args = parser.parse_args()
    c1_network_params = os.path.join(c1_model_dir, 'params.json') # these should be loaded from the pretrained model
    assert os.path.isfile(c1_network_params), "No json configuration file found at {}".format(c1_network_params)

    c2_network_params = os.path.join(c2_model_dir, 'params.json')  # these should be loaded from the pretrained model
    assert os.path.isfile(c2_network_params), "No json configuration file found at {}".format(c2_network_params)

    new_network_params = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(new_network_params), "No json configuration file found at {}".format(new_network_params)

    c1_params = utils.Params(c1_network_params)
    c2_params = utils.Params(c2_network_params)
    new_params = utils.Params(new_network_params)

    # use GPU if available
    c1_params.cuda = torch.cuda.is_available()
    c2_params.cuda = torch.cuda.is_available()
    new_params.cuda = torch.cuda.is_available()

    # 3. Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if new_params.cuda: torch.cuda.manual_seed(230)
    np.random.seed(0)

    # 4. Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))