def __init__(self, opt: Opt, shared: TShared = None): super().__init__(opt, shared) # Default true for HF self.special_tok_map = {} # map from HF self.add_prefix_space = opt.get('bpe_add_prefix_space', True) if self.add_prefix_space is None: self.add_prefix_space = True if opt.get('dict_loaded'): dfname = opt['dict_file'] if PathManager.exists(f'{dfname}-merges.txt'): opt['bpe_merge'] = f'{dfname}-merges.txt' if PathManager.exists(f'{dfname}-vocab.json'): opt['bpe_vocab'] = f'{dfname}-vocab.json' try: from tokenizers import ByteLevelBPETokenizer except ImportError: raise ImportError( 'Please install HuggingFace tokenizer with: pip install tokenizers' ) if self.bpe_dropout: raise NotImplementedError( '--bpe-dropout is not supported with ByteLevelBPE because tokenizers ' 'library does not allow dynamically turning BPE on/off. You can use ' '--dict-tokenizer slow_bytelevel_bpe to gain this feature.' ) if self.lower: warn_once('Are you sure you want to lower case your BPE dictionary?') if self.maxtokens > 0 or self.minfreq > 0: raise ValueError( 'You should not filter vocabulary with using --dict-tokenizer bytelevelbpe' ' (no --dict-minfreq or --dict-maxtokens).' ) if 'bpe_vocab' not in opt: raise ValueError('--bpe-vocab is required for loading pretrained tokenizer') if 'bpe_merge' not in opt: raise ValueError('--bpe-merge is required for loading pretrained tokenizer') self.vocab_path = opt['bpe_vocab'] self.merge_path = opt['bpe_merge'] if not self.vocab_path or not self.merge_path: raise IOError( '--bpe-vocab and --bpe-merge are mandatory with ' '--dict-tokenizer bytelevelbpe' ) if not PathManager.exists(self.vocab_path): raise IOError( f'File {self.vocab_path} does not exist. --bpe-vocab must be pretrained.' ) if not PathManager.exists(self.merge_path): raise IOError( f'File {self.merge_path} does not exist. --bpe-merge must be pretrained.' ) self.tokenizer = ByteLevelBPETokenizer( self.vocab_path, self.merge_path, self.add_prefix_space )
def load(self, path): """ Load from a given path. """ mode = self.opt.get('image_mode', 'raw') if mode is None or mode == 'no_image_model': # don't need to load images return None elif mode == 'raw': return self._load_image(path) elif mode == 'ascii': # convert images to ascii ¯\_(ツ)_/¯ return self._img_to_ascii(self._load_image(path)) # otherwise, looks for preprocessed version under 'mode' directory prepath, imagefn = self._get_prepath(path) dpath = os.path.join(prepath, mode) if not PathManager.exists(dpath): build_data.make_dir(dpath) imagefn = imagefn.split('.')[0] new_path = os.path.join(prepath, mode, imagefn) if not PathManager.exists(new_path): return self.extract(self._load_image(path), new_path) else: with PathManager.open(new_path, 'rb') as f: return torch.load(f)
def load_init(cls, optfile: str) -> Opt: """ Like load, but also looks in opt_presets folders. optfile may also be a comma-separated list of multiple presets/files. """ if "," in optfile: # load and combine each of the individual files new_opt = cls() for subopt in optfile.split(","): new_opt.update(cls.load_init(subopt)) return new_opt oa_filename = os.path.join("opt_presets", optfile + ".opt") user_filename = os.path.join(os.path.expanduser(f"~/.parlai"), oa_filename) if PathManager.exists(optfile): return cls.load(optfile) elif PathManager.exists(user_filename): # use a user's custom opt preset return cls.load(user_filename) elif pkg_resources.resource_exists("parlai", oa_filename): # Maybe a bundled opt preset return cls.load( pkg_resources.resource_filename("parlai", oa_filename)) else: raise FileNotFoundError( f"Could not find filename '{optfile} or opt preset '{optfile}.opt'. " "Please check https://parl.ai/docs/opt_presets.html for a list " "of available opt presets.")
def set_defaults(opt): init_model = None # check first for 'init_model' for loading model from file if opt.get('init_model') and PathManager.exists(opt['init_model']): init_model = opt['init_model'] # next check for 'model_file', this would override init_model if opt.get('model_file') and PathManager.exists(opt['model_file']): init_model = opt['model_file'] if init_model is None: # Embeddings options opt['embedding_file'] = modelzoo_path(opt.get('datapath'), opt['embedding_file']) if opt.get('embedding_file'): if not PathManager.exists(opt['embedding_file']): raise IOError('No such file: %s' % opt['embedding_file']) with PathManager.open(opt['embedding_file']) as f: dim = len(f.readline().strip().split(' ')) - 1 if dim == 1: # first line was a dud dim = len(f.readline().strip().split(' ')) - 1 opt['embedding_dim'] = dim elif not opt.get('embedding_dim'): raise RuntimeError(('Either embedding_file or embedding_dim ' 'needs to be specified.')) # Make sure tune_partial and fix_embeddings are consistent if opt['tune_partial'] > 0 and opt['fix_embeddings']: print('Setting fix_embeddings to False as tune_partial > 0.') opt['fix_embeddings'] = False # Make sure fix_embeddings and embedding_file are consistent if opt['fix_embeddings'] and not opt.get('embedding_file'): print('Setting fix_embeddings to False as embeddings are random.') opt['fix_embeddings'] = False
def create_agent_from_opt_file_and_model_class(opt, model_class): model_file = opt['model_file'] optfile = model_file + '.opt' if not PathManager.exists(optfile): return None opt_from_file = Opt.load(optfile) # delete args that we do not want to copy over when loading the model for arg in NOCOPY_ARGS: if arg in opt_from_file: del opt_from_file[arg] # only override opts specified in 'override' dict if opt.get('override'): for k, v in opt['override'].items(): if k in opt_from_file and str(v) != str(opt_from_file.get(k)): logging.warn( f'Overriding opt["{k}"] to {v} (previously: {opt_from_file.get(k)})' ) opt_from_file[k] = v if hasattr(model_class, 'upgrade_opt'): opt_from_file = model_class.upgrade_opt(opt_from_file) # add model arguments to opt_from_file if they aren't in opt_from_file already for k, v in opt.items(): if k not in opt_from_file: opt_from_file[k] = v # update model file path to the one set by opt opt_from_file['model_file'] = model_file # update init model path to the one set by opt # NOTE: this step is necessary when for example the 'init_model' is # set by the Train Loop (as is the case when loading from checkpoint) if opt.get('init_model') is not None: opt_from_file['init_model'] = opt['init_model'] # update dict file path if not opt_from_file.get('dict_file'): old_dict_file = None opt_from_file['dict_file'] = model_file + '.dict' elif opt_from_file.get('dict_file') and not PathManager.exists( opt_from_file['dict_file']): old_dict_file = opt_from_file['dict_file'] opt_from_file['dict_file'] = model_file + '.dict' if not PathManager.exists(opt_from_file['dict_file']): warn_once( 'WARNING: Neither the specified dict file ({}) nor the ' '`model_file`.dict file ({}) exists, check to make sure either ' 'is correct. This may manifest as a shape mismatch later ' 'on.'.format(old_dict_file, opt_from_file['dict_file'])) # if we want to load weights from --init-model, compare opts with # loaded ones compare_init_model_opts(opt, opt_from_file) return model_class(opt_from_file)
def __init__(self, opt, shared=None): """ Set up model if shared params not set, otherwise no work to do. """ super().__init__(opt, shared) opt = self.opt self.reset_metrics() self.id = 'Starspace' self.NULL_IDX = 0 self.cands = torch.LongTensor(1, 1, 1) self.ys_cache = [] self.ys_cache_sz = opt['cache_size'] self.truncate = opt['truncate'] if opt['truncate'] > 0 else None self.history = {} self.debugMode = False if shared: # set up shared properties self.dict = shared['dict'] self.model = shared['model'] else: print("[ creating StarspaceAgent ]") # this is not a shared instance of this class, so do full init if opt.get('model_file') and ( PathManager.exists(opt.get('model_file') + '.dict') or (opt['dict_file'] is None) ): # set default dict-file if not set opt['dict_file'] = opt['model_file'] + '.dict' # load dictionary and basic tokens & vectors self.dict = DictionaryAgent(opt) self.model = Starspace(opt, len(self.dict), self.dict) if opt.get('model_file') and PathManager.exists(opt['model_file']): self.load(opt['model_file']) else: self._init_embeddings() self.model.share_memory() # set up modules self.criterion = torch.nn.CosineEmbeddingLoss( margin=opt['margin'], size_average=False ) self.reset() self.fixedCands = False self.fixedX = None if self.opt.get('fixed_candidates_file'): self.fixedCands_txt = load_cands(self.opt.get('fixed_candidates_file')) fcs = [] for c in self.fixedCands_txt: fcs.append(torch.LongTensor(self.parse(c)).unsqueeze(0)) self.fixedCands = fcs print("[loaded candidates]")
def _build_model(self, path=None): init_model_path = None if self.opt.get('init_model') and PathManager.exists(self.opt['init_model']): init_model_path = self.opt['init_model'] elif self.opt.get('model_file') and PathManager.exists(self.opt['model_file']): init_model_path = self.opt['model_file'] elif path is not None: init_model_path = path print('Creating or loading model') self.model = TransresnetModel(self.opt, self.personalities_list, self.dict) if init_model_path is not None: self.load(init_model_path) if self.use_cuda: self.model.cuda()
def _setup_cands(self): self.fixed_cands = None self.fixed_cands_enc = None if self.fcp is not None: with PathManager.open(self.fcp) as f: self.fixed_cands = [c.replace('\n', '') for c in f.readlines()] cands_enc_file = '{}.cands_enc'.format(self.fcp) print('loading saved cand encodings') if PathManager.exists(cands_enc_file): with PathManager.open(cands_enc_file, 'rb') as f: self.fixed_cands_enc = torch.load( f, map_location=lambda cpu, _: cpu ) else: print('Extracting cand encodings') self.model.eval() pbar = tqdm.tqdm( total=len(self.fixed_cands), unit='cand', unit_scale=True, desc='Extracting candidate encodings', ) fixed_cands_enc = [] for _, batch in enumerate( [ self.fixed_cands[i : i + 50] for i in range(0, len(self.fixed_cands) - 50, 50) ] ): embedding = self.model(None, None, batch)[1].detach() fixed_cands_enc.append(embedding) pbar.update(50) self.fixed_cands_enc = torch.cat(fixed_cands_enc, 0) torch_utils.atomic_save(self.fixed_cands_enc, cands_enc_file)
def _build_model(self, path=None): init_model_path = None if self.opt.get("init_model") and PathManager.exists(self.opt["init_model"]): init_model_path = self.opt["init_model"] elif self.opt.get("model_file") and PathManager.exists(self.opt["model_file"]): init_model_path = self.opt["model_file"] elif path is not None: init_model_path = path print("Creating or loading model") self.model = TransresnetMultimodalModel( self.opt, self.personalities_list, self.dict ) if init_model_path is not None: self.load(init_model_path) if self.use_cuda: self.model.cuda()
def _check_parent_dir_exits(datapath): parent_dir = os.path.dirname(datapath) if not parent_dir or PathManager.exists(parent_dir): return logging.info( f'Parent directory ({parent_dir}) did not exist and was created.') PathManager.mkdirs(parent_dir)
def _setup_cands(self): """ Override for different call to model. """ self.fixed_cands = None self.fixed_cands_enc = None if self.fcp is not None: with PathManager.open(self.fcp) as f: self.fixed_cands = [c.replace("\n", "") for c in f.readlines()] cands_enc_file = "{}.cands_enc".format(self.fcp) print("loading saved cand encodings") if PathManager.exists(cands_enc_file): with PathManager.open(cands_enc_file, 'rb') as f: self.fixed_cands_enc = torch.load( f, map_location=lambda cpu, _: cpu) else: print("Extracting cand encodings") self.model.eval() pbar = tqdm.tqdm( total=len(self.fixed_cands), unit="cand", unit_scale=True, desc="Extracting candidate encodings", ) fixed_cands_enc = [] for _, batch in enumerate([ self.fixed_cands[i:i + 50] for i in range(0, len(self.fixed_cands) - 50, 50) ]): embedding = self.model.forward_text_encoder(batch).detach() fixed_cands_enc.append(embedding) pbar.update(50) self.fixed_cands_enc = torch.cat(fixed_cands_enc, 0) torch_utils.atomic_save(self.fixed_cands_enc, cands_enc_file)
def _get_data(self): # useful constants # all of these colors are bolded RESET = '\033[0m' RED = '\033[1;91m' YELLOW = '\033[1;93m' GREEN = '\033[1;92m' BLUE = '\033[1;96m' CYAN = '\033[1;94m' MAGENTA = '\033[1;95m' # only use colors if we're outputting to a terminal USE_COLORS = _sys.stdout.isatty() if not USE_COLORS: RESET = RED = YELLOW = GREEN = BLUE = CYAN = MAGENTA = '' # generate the rainbow stars rainbow = [RED, YELLOW, GREEN, CYAN, BLUE, MAGENTA] size = 78 // len(rainbow) stars = ''.join([color + '*' * size for color in rainbow]) stars += RESET if not os.path.exists(self.data_path): PathManager.mkdirs(self.data_path) if not PathManager.exists(os.path.join(self.data_path, 'train.csv')): raise RuntimeError( f'\n\n{stars}\nThis data must be downloaded from {self.DATA_SOURCE}' '\nIt cannot be automatically downloaded, as one must agree to ' 'the competition rules outlined on the website before ' 'gaining access to the data.\n\n' 'Once downloaded, please put the data in the following ' f'directory: \n{self.data_path}\n{stars}')
def __init__(self, opt, shared=None): opt = copy.deepcopy(opt) if 'subtask' not in opt: print('Warning: SelfFeedingteacher should be assigned subtask. ' 'Defaulting to dialog') opt['subtask'] = 'dialog' # Use 'in' to also capture 'train:ordered:stream' if 'train' in opt['datatype']: # Use the filename explicitly given with the flag if available # Otherwise, use train_xx.txt where xx is inferred from the subtask train_file_flag = f"{opt['subtask'][:3]}_train" if opt.get(train_file_flag, None): path = _path(opt, opt[train_file_flag], add_suffix=False) else: path = _path(opt, "train", add_suffix=True) else: # Use the filename explicitly given with the flag if available # Otherwise, use the datatype (valid_xx.txt or test_xx.txt) where xx is # inferred from the subtask. eval_file_flag = f"{opt['subtask'][:3]}_{opt['datatype']}" if opt.get(eval_file_flag, None): path = _path(opt, opt[eval_file_flag], add_suffix=False) else: path = _path(opt, opt['datatype'].split(':')[0], add_suffix=True) if not PathManager.exists(path): raise ValueError("Unrecognized filepath: {}".format(path)) opt['parlaidialogteacher_datafile'] = path opt['datafile'] = path super().__init__(opt, shared)
def __init__(self, opt, shared=None): super().__init__(opt, shared) # All agents keep track of the episode (for multiple questions) self.episode_done = True self.opt['cuda'] = not self.opt['no_cuda'] and torch.cuda.is_available( ) if shared is not None: # model has already been set up self.word_dict = shared['word_dict'] self.model = shared['model'] self.feature_dict = shared['feature_dict'] else: # set up model self.word_dict = DrqaAgent.dictionary_class()(opt) if self.opt.get('model_file') and PathManager.exists( opt['model_file']): self._init_from_saved(opt['model_file']) else: if self.opt.get('init_model'): self._init_from_saved(opt['init_model']) else: self._init_from_scratch() if self.opt['cuda']: print('[ Using CUDA (GPU %d) ]' % opt['gpu']) torch.cuda.set_device(opt['gpu']) self.model.cuda() # Set up params/logging/dicts self.id = self.__class__.__name__ config.set_defaults(self.opt) self.n_examples = 0
def load_eli5(self, opt): """ Load data based on data split. """ dp = opt['datapath'] dt = opt['datatype'].split(':')[0] eli_path = "eli5/processed_data/selected_15_1/explainlikeimfive_" fname = os.path.join(dp, eli_path + dt + ".json") if not PathManager.exists(fname): raise FileNotFoundError( f"{fname} not found. Please follow the instructions found at " "https://github.com/facebookresearch/ParlAI/tree/master/parlai/tasks/eli5/README.md" " to construct the dataset.") opt['datafile'] = fname with PathManager.open(fname) as json_file: data = json.load(json_file) ds = [] for d in data: if self.opt['knowledge']: text = d['document'] + "\n" + d['question'] else: text = d['question'] act = { 'id': 'eli5', 'text': text, 'labels': [d['answer']], 'episode_done': True, } ds.append(act) return ds
def compare_init_model_opts(opt: Opt, curr_opt: Opt): """ Print loud warning when `init_model` opts differ from previous configuration. """ if opt.get('init_model') is None: return opt['init_model'] = modelzoo_path(opt['datapath'], opt['init_model']) optfile = opt['init_model'] + '.opt' if not PathManager.exists(optfile): return init_model_opt = Opt.load(optfile) extra_opts = {} different_opts = {} exempt_opts = [ 'model_file', 'dict_file', 'override', 'starttime', 'init_model', 'batchindex', ] # search through init model opts for k, v in init_model_opt.items(): if (k not in exempt_opts and k in init_model_opt and init_model_opt[k] != curr_opt.get(k)): if isinstance(v, list): if init_model_opt[k] != list(curr_opt[k]): different_opts[k] = ','.join([str(x) for x in v]) else: different_opts[k] = v # search through opts to load for k, v in curr_opt.items(): if k not in exempt_opts and k not in init_model_opt: if isinstance(v, list): extra_opts[k] = ','.join([str(x) for x in v]) else: extra_opts[k] = v # print warnings extra_strs = ['{}: {}'.format(k, v) for k, v in extra_opts.items()] if extra_strs: logging.warning( 'your model is being loaded with opts that do not ' 'exist in the model you are initializing the weights with: ' '{}'.format(','.join(extra_strs))) different_strs = [ '--{} {}'.format(k.replace('_', '-'), v) for k, v in different_opts.items() ] if different_strs: logging.warning( 'your model is being loaded with opts that differ ' 'from the model you are initializing the weights with. Add the ' 'following args to your run command to change this: \n' '{}'.format(' '.join(different_strs)))
def git_ls_files(root=None, skip_nonexisting=True): """ List all files tracked by git. """ filenames = git_.ls_files(root).split('\n') if skip_nonexisting: filenames = [fn for fn in filenames if PathManager.exists(fn)] return filenames
def built(path, version_string=None): """ Check if '.built' flag has been set for that task. If a version_string is provided, this has to match, or the version is regarded as not built. """ if version_string: fname = os.path.join(path, '.built') if not PathManager.exists(fname): return False else: with PathManager.open(fname, 'r') as read: text = read.read().split('\n') return len(text) > 1 and text[1] == version_string else: return PathManager.exists(os.path.join(path, '.built'))
def set_vocab_candidates(self, shared): """ Load the tokens from the vocab as candidates. self.vocab_candidates will contain a [num_cands] list of strings self.vocab_candidate_vecs will contain a [num_cands, 1] LongTensor """ self.opt['encode_candidate_vecs'] = True if shared: self.vocab_candidates = shared['vocab_candidates'] self.vocab_candidate_vecs = shared['vocab_candidate_vecs'] self.vocab_candidate_encs = shared['vocab_candidate_encs'] else: if 'vocab' in (self.opt['candidates'], self.opt['eval_candidates']): cands = [] vecs = [] for ind in range(1, len(self.dict)): txt = self.dict[ind] cands.append(txt) vecs.append( self._vectorize_text( txt, add_start=True, add_end=True, truncate=self.label_truncate, )) self.vocab_candidates = cands self.vocab_candidate_vecs = padded_3d([vecs]).squeeze(0) print("[ Loaded fixed candidate set (n = {}) from vocabulary ]" "".format(len(self.vocab_candidates))) enc_path = self.opt.get('model_file') + '.vocab.encs' if PathManager.exists(enc_path): self.vocab_candidate_encs = self.load_candidates( enc_path, cand_type='vocab encodings') else: cand_encs = [] vec_batches = [ self.vocab_candidate_vecs[i:i + 512] for i in range(0, len(self.vocab_candidate_vecs), 512) ] print("[ Vectorizing vocab candidates ({} batch(es) of up " "to 512) ]".format(len(vec_batches))) for vec_batch in tqdm(vec_batches): cand_encs.append(self.encode_candidates(vec_batch)) self.vocab_candidate_encs = torch.cat(cand_encs, 0) self.save_candidates(self.vocab_candidate_encs, enc_path, cand_type='vocab encodings') if self.use_cuda: self.vocab_candidate_vecs = self.vocab_candidate_vecs.cuda( ) self.vocab_candidate_encs = self.vocab_candidate_encs.cuda( ) else: self.vocab_candidates = None self.vocab_candidate_vecs = None self.vocab_candidate_encs = None
def modelzoo_path(datapath, path): """ Map pretrain models filenames to their path on disk. If path starts with 'models:', then we remap it to the model zoo path within the data directory (default is ParlAI/data/models). We download models from the model zoo if they are not here yet. """ if path is None: return None if (not path.startswith('models:') and not path.startswith('zoo:') and not path.startswith('izoo:')): return path elif path.startswith('models:') or path.startswith('zoo:'): zoo = path.split(':')[0] zoo_len = len(zoo) + 1 model_path = path[zoo_len:] # Check if we need to download the model if "/" in path: animal = path[zoo_len:path.rfind('/')].replace('/', '.') else: animal = path[zoo_len:] if '.' not in animal: animal += '.build' module_name = 'parlai.zoo.{}'.format(animal) try: my_module = importlib.import_module(module_name) my_module.download(datapath) except (ImportError, AttributeError): try: # maybe we didn't find a specific model, let's try generic .build animal_ = '.'.join(animal.split(".")[:-1]) + '.build' module_name_ = 'parlai.zoo.{}'.format(animal_) my_module = importlib.import_module(module_name_) my_module.download(datapath) except (ImportError, AttributeError) as exc: # truly give up raise ImportError( f'Could not find pretrained model in {module_name} or {module_name_}.' ' Please check your spelling and make sure you\'ve pulled from master.' ) from exc return os.path.join(datapath, 'models', model_path) else: # Internal path (starts with "izoo:") -- useful for non-public # projects. Save the path to your internal model zoo in # parlai_internal/.internal_zoo_path # TODO: test the internal zoo. zoo_path = 'parlai_internal/zoo/.internal_zoo_path' if not PathManager.exists('parlai_internal/zoo/.internal_zoo_path'): raise RuntimeError( 'Please specify the path to your internal zoo in the ' 'file parlai_internal/zoo/.internal_zoo_path in your ' 'internal repository.') else: with PathManager.open(zoo_path, 'r') as f: zoo = f.read().split('\n')[0] return os.path.join(zoo, path[5:])
def save(self, dir_name: str, file_name: str): """ Save appropriate files. :param dir_name: directory to save. :param file_name: file to save. """ out_json_path = os.path.join(dir_name, file_name + "-vocab.json") out_merge_path = os.path.join(dir_name, file_name + "-merges.txt") # Possibly bad assumption: if the destination file already exists, # we don't need to copy it over again. if not PathManager.exists(out_json_path): logging.info(f"Copying {self.json_path} to {out_json_path}") PathManager.copy(self.json_path, out_json_path) if not PathManager.exists(out_merge_path): logging.info(f"Copying {self.merge_path} to {out_merge_path}") PathManager.copy(self.merge_path, out_merge_path)
def _build_data(self) -> Tuple[str, str]: """ Build data. Maybe download the appropriate data. :return (bpe_data, json_path): bpe_data and path to encoder json """ data_path = os.path.join(self.opt['datapath'], 'gpt2') vocab_path = os.path.join(data_path, 'vocab.bpe') json_path = os.path.join(data_path, 'encoder.json') if not PathManager.exists(vocab_path) or not PathManager.exists(json_path): make_dir(data_path) download(self.DEFAULT_VOCAB_BPE, data_path, 'vocab.bpe') download(self.DEFAULT_ENCODER_JSON, data_path, 'encoder.json') with PathManager.open(vocab_path, 'r', encoding="utf-8") as f: bpe_data = f.read() return bpe_data, json_path, vocab_path
def print_announcements(opt): """ Output any announcements the ParlAI team wishes to make to users. Also gives the user the option to suppress the output. """ # no annoucements to make right now return noannounce_file = os.path.join(opt.get('datapath'), 'noannouncements') if PathManager.exists(noannounce_file): # user has suppressed announcements, don't do anything return # useful constants # all of these colors are bolded RESET = '\033[0m' BOLD = '\033[1m' RED = '\033[1;91m' YELLOW = '\033[1;93m' GREEN = '\033[1;92m' BLUE = '\033[1;96m' CYAN = '\033[1;94m' MAGENTA = '\033[1;95m' # only use colors if we're outputting to a terminal USE_COLORS = _sys.stdout.isatty() if not USE_COLORS: RESET = BOLD = RED = YELLOW = GREEN = BLUE = CYAN = MAGENTA = '' # generate the rainbow stars rainbow = [RED, YELLOW, GREEN, CYAN, BLUE, MAGENTA] size = 78 // len(rainbow) stars = ''.join([color + '*' * size for color in rainbow]) stars += RESET # do the actual output print( '\n'.join( [ '', stars, BOLD, 'Announcements go here.', RESET, # don't bold the suppression command 'To suppress this message (and future announcements), run\n`touch {}`'.format( noannounce_file ), stars, ] ) )
def get_list_of_files(top_path): result = {} for path, _dirs, files in os.walk(top_path): for filename in files: if filename.endswith('.xml'): full_filename = os.path.realpath(os.path.join(path, filename)) assert PathManager.exists(full_filename), 'Bad file ' + full_filename movie_id = get_movie_id(full_filename) if movie_id not in result: result[movie_id] = [] result[movie_id].append(full_filename) return result
def _load_conversations(self, datapath): if not PathManager.exists(datapath): raise RuntimeError(f'Conversations at path {datapath} not found. ' 'Double check your path.') conversations = [] with PathManager.open(datapath, 'r') as f: lines = f.read().splitlines() for line in lines: conversations.append(Conversation(json.loads(line))) return conversations
def __init__(self, opt: Opt): try: # tensorboard is a very expensive thing to import. Wait until the # last second to import it. from tensorboardX import SummaryWriter except ImportError: raise ImportError('Please run `pip install tensorboard tensorboardX`.') tbpath = opt['model_file'] + '.tensorboard' logging.debug(f'Saving tensorboard logs to: {tbpath}') if not PathManager.exists(tbpath): PathManager.makedirs(tbpath) self.writer = SummaryWriter(tbpath, comment=json.dumps(opt))
def git_changed_files(skip_nonexisting=True): """ List all the changed files in the git repository. :param bool skip_nonexisting: If true, ignore files that don't exist on disk. This is useful for disregarding files created in master, but don't exist in HEAD. """ fork_point = git_.merge_base('origin/master', 'HEAD').strip() filenames = git_.diff('--name-only', fork_point).split('\n') if skip_nonexisting: filenames = [fn for fn in filenames if PathManager.exists(fn)] return filenames
def _load_raw(self, datapath): """ Load the data as a raw, unparsed file. Useful for fast IO stuff like random indexing. """ if not PathManager.exists(datapath): raise RuntimeError(f'Conversations at path {datapath} not found. ' 'Double check your path.') with PathManager.open(datapath, 'r') as f: lines = f.read().splitlines() for line in lines: yield line
def create_agent(opt: Opt, requireModelExists=False): """ Create an agent from the options ``model``, ``model_params`` and ``model_file``. The input is either of the form ``parlai.agents.ir_baseline.agents:IrBaselineAgent`` (i.e. the path followed by the class name) or else just ``ir_baseline`` which assumes the path above, and a class name suffixed with 'Agent'. If ``model-file`` is available in the options this function can also attempt to load the model from that location instead. This avoids having to specify all the other options necessary to set up the model including its name as they are all loaded from the options file if it exists (the file opt['model_file'] + '.opt' must exist and contain a pickled or json dict containing the model's options). """ if opt.get('datapath', None) is None: add_datapath_and_model_args(opt) if opt.get('model_file'): opt['model_file'] = modelzoo_path(opt.get('datapath'), opt['model_file']) if requireModelExists and not PathManager.exists(opt['model_file']): raise RuntimeError( 'WARNING: Model file does not exist, check to make ' 'sure it is correct: {}'.format(opt['model_file'])) # Attempt to load the model from the model file first (this way we do # not even have to specify the model name as a parameter) model = create_agent_from_opt_file(opt) if model is not None: return model else: logging.info( f"No model with opt yet at: {opt['model_file']}(.opt)") if opt.get('model'): model_class = load_agent_module(opt['model']) # if we want to load weights from --init-model, compare opts with # loaded ones compare_init_model_opts(opt, opt) model = model_class(opt) if requireModelExists and hasattr( model, 'load') and not opt.get('model_file'): # double check that we didn't forget to set model_file on loadable model logging.warning( 'model_file unset but model has a `load` function.') return model else: raise RuntimeError('Need to set `model` argument to use create_agent.')
def get_model_name(opt): """ Get the model name from either `--model` or `--model-file`. """ model = opt.get('model', None) if model is None: # try to get model name from model opt file model_file = opt.get('model_file', None) if model_file is not None: model_file = modelzoo_path(opt.get('datapath'), model_file) optfile = model_file + '.opt' if PathManager.exists(optfile): new_opt = Opt.load(optfile) model = new_opt.get('model', None) return model