def test_capture(self): global_step = tf.contrib.framework.get_or_create_global_step() # Some test computation some_weights = tf.get_variable("weigths", [2, 128]) computation = tf.nn.softmax(some_weights) hook = hooks.MetadataCaptureHook(params={"step": 5}, model_dir=self.model_dir) hook.begin() with self.test_session() as sess: sess.run(tf.global_variables_initializer()) #pylint: disable=W0212 mon_sess = monitored_session._HookedSession(sess, [hook]) # Should not trigger for step 0 sess.run(tf.assign(global_step, 0)) mon_sess.run(computation) self.assertEqual(gfile.ListDirectory(self.model_dir), []) # Should trigger *after* step 5 sess.run(tf.assign(global_step, 5)) mon_sess.run(computation) self.assertEqual(gfile.ListDirectory(self.model_dir), []) mon_sess.run(computation) self.assertEqual(set(gfile.ListDirectory(self.model_dir)), set(["run_meta", "tfprof_log", "timeline.json"]))
def validate_hourly(working_dir, validate_name=None): """ compiles a list of games based on the new hourly directory format. Then calls validate on it """ holdout_dirs = gfile.ListDirectory(fsdb.holdout_dir()) holdout_files = (os.path.join(fsdb.holdout_dir(), d, f) for d in reversed(gfile.ListDirectory(fsdb.holdout_dir())) for f in gfile.ListDirectory(os.path.join(fsdb.holdout_dir(),d)) if gfile.IsDirectory(os.path.join(fsdb.holdout_dir(),d))) holdout_files = list(itertools.islice(holdout_files, 20000)) random.shuffle(holdout_files) dual_net.validate(holdout_files)
def execute_job(): config = JobExecutorConfig() make_needed_dirs(config) configure_logging(config) if config.prediction == 'BERT': match_predictor = BertMatchPredictor() elif config.prediction == 'KEYED_VECTORS': match_predictor = KeyedVectorsFormatPredictor() else: raise Exception("Wrong prediction mode") while True: logging.info("job iteration started") dir_in = config.dir_in files_names = [ f for f in gfile.ListDirectory(dir_in) if not gfile.IsDirectory(join(dir_in, f)) ] for file_name in files_names: logging.info(file_name) file_path = join(dir_in, file_name) try: match_predictor.predict(dir_in, file_name, config.dir_result) gfile.Rename(file_path, join(config.dir_success, file_name)) except Exception: logging.error(traceback.format_exc()) gfile.Rename(file_path, join(config.dir_error, file_name)) logging.info("job iteration finished") time.sleep(config.interval)
def __init__(self, img_name_to_token_ids, img_feature_dir, num_timesteps, vocab, deterministic=False): ''' :param img_name_to_token_ids: 图像到描述字典 :param img_feature_dir: 图像特征 保存文件目录 :param num_timesteps: 时间步的数量 :param vocab: 词表 :param deterministic: 是否打乱 ''' self._vocab = vocab self._all_img_feature_filepaths = [] # 拼接出 图像特征文件的 路径 for filename in gfile.ListDirectory(img_feature_dir): self._all_img_feature_filepaths.append( os.path.join(img_feature_dir, filename)) self._img_name_to_token_ids = img_name_to_token_ids self._num_timesteps = num_timesteps self._indicator = 0 # batch_size 的 起始点 self._deterministic = deterministic self._img_feature_filenames = [] # 保存所有图像特征的路径 self._img_feature_data = [] # 保存 所有 图像特征 self._load_img_feature_pickle() if not self._deterministic: self._random_shuffle()
def produce_timeline_profile(profile_dir, resources_dir, profile_cnt, options): """Produces a timeline profile.""" timeline_path = os.path.join(resources_dir, PROFILER_COMMON_PREFIX + 'timeline') if not os.path.isfile(timeline_path): profiles = {} log_path = os.path.join(PROFILER_TMP_DIR, PROFILER_TMP_NAME) options['output'] = 'timeline:outfile=' + log_path opts = model_analyzer._build_options(options) # pylint: disable=protected-access for idx, prof in enumerate(gfile.ListDirectory(profile_dir)): prof_file = os.path.join(profile_dir, prof) if not os.path.isfile(prof_file): continue chosen_profile = os.path.join(resources_dir, PROFILER_COMMON_PREFIX + 'timeline_' + prof) profiles[prof] = chosen_profile if os.path.isfile(chosen_profile): if idx == 0: target_ts = get_timestamp(chosen_profile) continue tf.logging.info("Parse profile context %r" % prof_file) remove_tmp_files() # Parse profile context ProfilerFromFile(prof_file.encode('utf-8')) pwtf.Profile(options['view'].encode('utf-8'), opts.SerializeToString()) DeleteProfiler() if idx == 0: prof_names = get_informative_profiles(PROFILER_TMP_DIR, profile_cnt) target_ts = get_timestamp(os.path.join(PROFILER_TMP_DIR, prof_names[0])) else: prof_names = get_profiles_by_timestamp(PROFILER_TMP_DIR, target_ts, profile_cnt) tf.logging.info("Choose %r as the most informative profile context for %r" % (prof_names, prof)) gen_profile([os.path.join(PROFILER_TMP_DIR, name) for name in prof_names], chosen_profile) merge_profiles(profiles, timeline_path) return load_profile(timeline_path)
def load_config(config_path, config=None): """Loads configs from (possibly multiple) file(s). Args: config_path: Paths to configuration files. This can be a `list` of config file names, or a path to a directory in which all files are loaded, or a string of multiple file names separated by commas. config (dict, optional): A config dict to which new configurations are added. If `None`, a new config dict is created. Returns: A `dict` of configurations. """ fnames = [] if isinstance(config_path, (list, tuple)): fnames = list(config_path) elif gfile.IsDirectory(config_path): for fname in gfile.ListDirectory(config_path): fname = os.path.join(config_path, fname) if not gfile.IsDirectory(fname): fnames.append(fname) else: for fname in config_path.split(","): fname = fname.strip() if not fname: continue fnames.append(fname) if config is None: config = {} for fname in fnames: config = load_config_single(fname, config) return config
def _download(self, trial): """Downloads a single url given by the trial (thread safe). Args: trial (UriTrial): Object containing info about download. Raises: ValueError: If the destination dir is not empty """ log = util.build_log(prefix=trial.id) # Check the download dir is empty if (gfile.Exists(trial.output_path) and gfile.ListDirectory(trial.output_path)): raise ValueError('Download dir {} should be empty'.format( trial.output_path)) gfile.MakeDirs(trial.output_path) log('Start downloading...') self._backend.download(trial) # TODO(epot): Compute the checksum # Update the output path trial.output_path = get_download_filepath(trial) log('Download complete at {}', trial.output_path)
def validate_holdout_selfplay(): """Validate on held-out selfplay data.""" holdout_dirs = ( os.path.join(fsdb.holdout_dir(), d) for d in reversed(gfile.ListDirectory(fsdb.holdout_dir())) if gfile.IsDirectory(os.path.join(fsdb.holdout_dir(), d)) for f in gfile.ListDirectory(os.path.join(fsdb.holdout_dir(), d))) # This is a roundabout way of computing how many hourly directories we need # to read in order to encompass 20,000 holdout games. holdout_dirs = set(itertools.islice(holdout_dirs), 20000) cmd = ['python3', 'validate.py'] + list(holdout_dirs) + [ '--use_tpu', '--tpu_name={}'.format(TPU_NAME), '--flagfile=rl_loop/distributed_flags', '--expand_validation_dirs' ] mask_flags.run(cmd)
def CopyRecursively(src, dst, overwrite=False): entries = gfile.ListDirectory(src) for entry in entries: src_path = os.path.join(src, entry) dst_path = os.path.join(dst, entry) if gfile.IsDirectory(src_path): gfile.MkDir(dst_path) CopyRecursively(src_path, dst_path, overwrite) else: gfile.Copy(src_path, dst_path, overwrite)
def get_download_filepath(trial): """Extract the downloaded file from the completed trial.""" # Get the downloaded file: files = list(gfile.ListDirectory(trial.output_path)) if not files: # Should have been catched before, but just in case raise ValueError('Download {} failed!'.format(trial.id)) elif len(files) > 1: raise ValueError('Multiple files detected for {}.'.format(trial.id)) # Update the output path filename = files[0] return os.path.join(trial.output_path, filename)
def gather( input_directory: 'where to look for games' = 'data/selfplay/', output_directory: 'where to put collected games' = 'data/training_chunks/', examples_per_record: 'how many tf.examples to gather in each chunk' = EXAMPLES_PER_RECORD): qmeas.start_time('gather') _ensure_dir_exists(output_directory) models = [ model_dir.strip('/') for model_dir in sorted(gfile.ListDirectory(input_directory))[-50:] ] with timer("Finding existing tfrecords..."): model_gamedata = { model: gfile.Glob(os.path.join(input_directory, model, '*.tfrecord.zz')) for model in models } print("Found %d models" % len(models)) for model_name, record_files in sorted(model_gamedata.items()): print(" %s: %s files" % (model_name, len(record_files))) meta_file = os.path.join(output_directory, 'meta.txt') try: with gfile.GFile(meta_file, 'r') as f: already_processed = set(f.read().split()) except tf.errors.NotFoundError: already_processed = set() num_already_processed = len(already_processed) for model_name, record_files in sorted(model_gamedata.items()): if set(record_files) <= already_processed: continue print("Gathering files for %s:" % model_name) for i, example_batch in enumerate( tqdm( preprocessing.shuffle_tf_examples(examples_per_record, record_files))): output_record = os.path.join( output_directory, '{}-{}.tfrecord.zz'.format(model_name, str(i))) preprocessing.write_tf_examples(output_record, example_batch, serialize=False) already_processed.update(record_files) print("Processed %s new files" % (len(already_processed) - num_already_processed)) with gfile.GFile(meta_file, 'w') as f: f.write('\n'.join(sorted(already_processed))) qmeas.stop_time('gather')
def _load_data(self): dataset = np.zeros((24 * 4 * 183, 64, 64, 3)) all_files = [x for x in gfile.ListDirectory('/home/adarsh/Desktop/Disentangled-Representation-Learning-Playground/data/cars/') if ".mat" in x] for i, filename in enumerate(all_files): data_mesh = _load_mesh(filename) factor1 = np.array(list(range(4))) factor2 = np.array(list(range(24))) all_factors = np.transpose([ np.tile(factor1, len(factor2)), np.repeat(factor2, len(factor1)), np.tile(i,len(factor1) * len(factor2))]) indexes = self.index.features_to_index(all_factors) dataset[indexes] = data_mesh return dataset
def aggregate(): logger.info("Gathering game results") os.makedirs(PATHS.TRAINING_CHUNK_DIR, exist_ok=True) os.makedirs(PATHS.SELFPLAY_DIR, exist_ok=True) models = [ model_dir.strip('/') for model_dir in sorted(gfile.ListDirectory(PATHS.SELFPLAY_DIR))[-50:] ] with timer("Finding existing tfrecords..."): model_gamedata = { model: gfile.Glob(os.path.join(PATHS.SELFPLAY_DIR, model, '*.zz')) for model in models } logger.info("Found %d models" % len(models)) for model_name, record_files in sorted(model_gamedata.items()): logger.info(" %s: %s files" % (model_name, len(record_files))) meta_file = os.path.join(PATHS.TRAINING_CHUNK_DIR, 'meta.txt') try: with gfile.GFile(meta_file, 'r') as f: already_processed = set(f.read().split()) except tf.errors.NotFoundError: already_processed = set() num_already_processed = len(already_processed) for model_name, record_files in sorted(model_gamedata.items()): if set(record_files) <= already_processed: continue logger.info("Gathering files for %s:" % model_name) for i, example_batch in enumerate( tqdm( preprocessing.shuffle_tf_examples( GLOBAL_PARAMETER_STORE.EXAMPLES_PER_RECORD, record_files))): output_record = os.path.join( PATHS.TRAINING_CHUNK_DIR, '{}-{}.tfrecord.zz'.format(model_name, str(i))) preprocessing.write_tf_examples(output_record, example_batch, serialize=False) already_processed.update(record_files) logger.info("Processed %s new files" % (len(already_processed) - num_already_processed)) with gfile.GFile(meta_file, 'w') as f: f.write('\n'.join(sorted(already_processed)))
def produce_timeline_profile(): """Produces a timeline profile.""" # Find the largest profile, since every step is profiled for the "graph" # view, and the largest step tends to be the most informative. # TODO: Optimize backend to only process largest step in this scenario. largest_profile_size = 0 for file_name in gfile.ListDirectory(PROFILER_LOG_DIR): if 'profiler-ui.log_' in file_name: file_path = os.path.join(PROFILER_LOG_DIR, file_name) with gfile.GFile(file_path, 'rb') as profile: file_size = profile.size() if largest_profile_size < file_size: largest_profile_size = file_size path = os.path.join(PROFILER_LOG_DIR, file_name) return load_profile(path)
def _load_data(self): dataset = np.zeros((24 * 4 * 183, 64, 64, 3)) all_files = [x for x in gfile.ListDirectory(CARS3D_PATH) if ".mat" in x] for i, filename in enumerate(all_files): data_mesh = _load_mesh(filename) factor1 = np.array(list(range(4))) factor2 = np.array(list(range(24))) all_factors = np.transpose([ np.tile(factor1, len(factor2)), np.repeat(factor2, len(factor1)), np.tile(i, len(factor1) * len(factor2)) ]) indexes = self.index.features_to_index(all_factors) dataset[indexes] = data_mesh return dataset
def format_input(input_path, size): """Reads input path, randomly selects a sub-sample and concatenates them. Args: input_path: `str`, directory to read files from. size: `int`, number of files to read. Returns: List of `str` containing independent text reviews. """ files = [ path for path in gfile.ListDirectory(input_path) if path.endswith(constants.FILE_EXTENSION) ] files = np.random.choice(files, size, replace=False) files = [os.path.join(input_path, filename) for filename in files] return get_prediction_input(files)
def aggregate_json_results(base_path): """Aggregates all the result files in a directory into a namespaced dict. Args: base_path: String with the directory containing JSON files that only contain dictionaries. Returns: Namespaced dictionary with the results. """ result = {} compiled_pattern = re.compile(r"(.*)\.json") for filename in gfile.ListDirectory(base_path): match = compiled_pattern.match(filename) if match: path = os.path.join(base_path, filename) with tf.gfile.GFile(path, "r") as f: result[match.group(1)] = json.load(f) return namespaced_dict(**result)
def __init__(self, img_name_to_token_ids, img_feature_dir, num_timesteps, vocab, deterministic=False): self._vocab = vocab self._all_img_feature_filepaths = [] for filename in gfile.ListDirectory(img_feature_dir): self._all_img_feature_filepaths.append( os.path.join(img_feature_dir, filename)) # pprint.pprint(self._all_img_feature_filepaths) self._img_name_to_token_ids = img_name_to_token_ids self._num_timesteps = num_timesteps self._indicator = 0 self._deterministic = deterministic self._img_feature_filenames = [] self._img_feature_data = [] self._load_img_feature_pickle() if not self._deterministic: self._random_shuffle()
def __init__(self, img_feature_dir, valid_percent=0.1, test_percent=0.1, is_shuffle=True): self._all_img_feature_filepaths = [] for filename in gfile.ListDirectory(img_feature_dir): self._all_img_feature_filepaths.append(os.path.join(img_feature_dir, filename)) self.is_shuffle = is_shuffle self._img_feature_data = [] self._img_feature_labels = [] self._load_img_feature_pickle() if self.is_shuffle: self._random_shuffle() # 根据数据集加载出训练,验证,测试集 valid_len = int(self.size() * valid_percent) test_len = int(self.size() * test_percent) self._valid_data = self._img_feature_data[:valid_len, :] self._valid_labels = self._img_feature_labels[:valid_len] self._test_data = self._img_feature_data[valid_len:valid_len + test_len, :] self._test_labels = self._img_feature_labels[valid_len:valid_len + test_len] self._train_data = self._img_feature_data[valid_len + test_len:, :] self._train_labels = self._img_feature_labels[valid_len + test_len:] self._train_indicator = 0 print(self._valid_data.shape, self._valid_labels.shape) print(self._test_data.shape, self._test_labels.shape) print(self._train_data.shape, self._train_labels.shape)
def get_sgf_names(model): game_dir = HOLDOUT_PATH.format(FLAGS.base_dir, model) tf_records = map(os.path.basename, gfile.ListDirectory(game_dir)) sgfs = [record.replace('.tfrecord.zz', '.sgf') for record in tf_records] return [PATH_TEMPLATE.format(FLAGS.base_dir, model, sgf) for sgf in sgfs]
def remove_tmp_files(): """Removes temporary files created by the profiler.""" for file_name in gfile.ListDirectory(PROFILER_LOG_DIR): if 'profiler-ui.' in file_name: gfile.Remove(os.path.join(PROFILER_LOG_DIR, file_name))
def get_hour_dirs(root=None): """Gets the directories under selfplay_dir that match YYYY-MM-DD-HH.""" root = root or selfplay_dir() return list( filter(lambda s: re.match(r"\d{4}-\d{2}-\d{2}-\d{2}", s), gfile.ListDirectory(root)))