def __init__(self, filename: str = None, **kwargs): """Creates a XConfig object from configuration file :param filename: configuration file [yaml, json, toml], defaults to None :type filename: str, optional :param replace_environment_variables: TRUE to auto replace environemnt variables placeholders, defaults to False :type replace_environment_variables: bool, optional :param plain_dict: if not None will be used as data source instead of filename, defaults to None :type plain_dict: dict, optional """ # options replace_env_variables = kwargs.get("replace_environment_variables", True) _dict = kwargs.get("plain_dict", None) no_deep_parse = kwargs.get("no_deep_parse", False) self._filename = None if _dict is None: if filename is not None: self._filename = Path(filename) self.update(box_from_file(file=Path(filename))) else: self.update(_dict) self._schema = None if not no_deep_parse: self.deep_parse( replace_environment_variables=replace_env_variables)
def main(run_name, word2vec_path): config = box_from_file(Path('config_cpc.yaml'), file_type='yaml') use_cuda = False # use CPU device = torch.device("cuda" if use_cuda else "cpu") print('use_cuda is', use_cuda) # load pretrained model print("Loading pretrained CPC model: {}".format(run_name)) cpc_model = CPCv1(config=config) checkpoint = torch.load('{}/{}-{}'.format(config.training.logging_dir, run_name,'model_best.pth')) cpc_model.load_state_dict(checkpoint['state_dict']) cpc_model.to(device) # get lookup table cpc_model.eval() output = cpc_model.get_word_embedding(torch.arange(config.dataset.vocab_size).to(device)) skip_thoughts_emb = output.detach().cpu().numpy() # load original vocab dictionary print("Loading CPC dictionary") skip_thoughts_vocab = load_dictionary(loc='vocab.pkl') assert len(skip_thoughts_vocab) == config.dataset.vocab_size # Load the Word2Vec model print('Loading word2vec vectors at {}'.format(word2vec_path)) word2vec = gensim.models.KeyedVectors.load_word2vec_format(word2vec_path, binary=True) # Run vocabulary expansion embedding_map = _expand_vocabulary(skip_thoughts_emb, skip_thoughts_vocab, word2vec) # Save expanded embeddings and dictionary print("Saving expanded embeddings and vocabulary") save_expansion(embedding_map, config)
def test_from_all(self): assert isinstance( box_from_file(Path(test_root, "data", "json_file.json")), Box) assert isinstance( box_from_file(Path(test_root, "data", "toml_file.tml")), Box) assert isinstance( box_from_file(Path(test_root, "data", "yaml_file.yaml")), Box) assert isinstance( box_from_file(Path(test_root, "data", "json_file.json"), file_type='json'), Box) assert isinstance( box_from_file(Path(test_root, "data", "toml_file.tml"), file_type='toml'), Box) assert isinstance( box_from_file(Path(test_root, "data", "yaml_file.yaml"), file_type='yaml'), Box) assert isinstance( box_from_file(Path(test_root, "data", "json_list.json")), BoxList) assert isinstance( box_from_file(Path(test_root, "data", "yaml_list.yaml")), BoxList)
def test_bad_file(self): with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt"), file_type="json") with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt"), file_type="toml") with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt"), file_type="yaml") with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt"), file_type="msgpack") with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt"), file_type="unknown") with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt")) with pytest.raises(BoxError): box_from_file("does not exist")
def test_from_all(self): assert isinstance( box_from_file(Path(test_root, "data", "json_file.json")), Box) assert isinstance( box_from_file(Path(test_root, "data", "toml_file.tml")), Box) assert isinstance( box_from_file(Path(test_root, "data", "yaml_file.yaml")), Box) assert isinstance( box_from_file(Path(test_root, "data", "json_file.json"), file_type="json"), Box) assert isinstance( box_from_file(Path(test_root, "data", "toml_file.tml"), file_type="toml"), Box) assert isinstance( box_from_file(Path(test_root, "data", "yaml_file.yaml"), file_type="yaml"), Box) assert isinstance( box_from_file(Path(test_root, "data", "json_list.json")), BoxList) assert isinstance( box_from_file(Path(test_root, "data", "yaml_list.yaml")), BoxList) assert isinstance( box_from_file(Path(test_root, "data", "msgpack_file.msgpack")), Box) assert isinstance( box_from_file(Path(test_root, "data", "msgpack_list.msgpack")), BoxList) assert isinstance( box_from_file(Path(test_root, "data", "csv_file.csv")), BoxList)
def test_bad_file(self): with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt"), file_type='json') with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt"), file_type='toml') with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt"), file_type='yaml') with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt"), file_type='unknown') with pytest.raises(BoxError): box_from_file(Path(test_root, "data", "bad_file.txt")) with pytest.raises(BoxError): box_from_file('does not exist')
## Torch import torch import torch.nn as nn from torch.utils import data import torch.optim as optim ## Custom Imports from utils.logger import setup_logs from utils.seed import set_seed from utils.train import train, snapshot from utils.validation import validation from utils.dataset import BookCorpus from model.models import CPCv1 ############ Control Center and Hyperparameter ############### config = box_from_file(Path('config_cpc.yaml'), file_type='yaml') if config.training.resume_name: run_name = config.training.resume_name else: run_name = "cpc" + time.strftime("-%Y-%m-%d_%H_%M_%S") # setup logger global_timer = timer() # global timer logger = setup_logs(config.training.logging_dir, run_name) # setup logs logger.info('### Experiment {} ###'.format(run_name)) logger.info('### Hyperparameter summary below ###\n {}'.format(config)) # setup of comet_ml if has_comet: logger.info('### Logging with comet_ml ###') if config.comet.previous_experiment: logger.info('===> using existing experiment: {}'.format( config.comet.previous_experiment))
"""Learning rate scheduling per step""" self.n_current_steps += self.delta new_lr = np.power(self.d_model, -0.5) * np.min([ np.power(self.n_current_steps, -0.5), np.power(self.n_warmup_steps, -1.5) * self.n_current_steps ]) for param_group in self.optimizer.param_groups: param_group['lr'] = new_lr return new_lr ############ Control Center and Hyperparameter ############### run_name = "cpc-clf" + time.strftime("-%Y-%m-%d_%H_%M_%S") config_encoder = box_from_file(Path('config_cpc.yaml'), file_type='yaml') config = box_from_file(Path('config_clf.yaml'), file_type='yaml') global_timer = timer() # global timer logger = setup_logs(config.training.logging_dir, run_name) # setup logs logger.info('### Experiment {} ###'.format(run_name)) logger.info('### Hyperparameter summary below ###\n {}\n'.format(config)) use_cuda = not config.training.no_cuda and torch.cuda.is_available() print('use_cuda is', use_cuda) device = torch.device("cuda" if use_cuda else "cpu") # set seed for reproducibility set_seed(config.training.seed, use_cuda) # Load pretrained CPC model cpc_model = CPCv1(config=config_encoder) checkpoint = torch.load(config.txt_classifier.cpc_path) cpc_model.load_state_dict(checkpoint['state_dict'])