def setUp(self) -> None: logging.getLogger().setLevel(logging.DEBUG) self._data_portal_name = 'test_data_portal_job_manager' self._kvstore = DBClient('etcd', True) self._portal_input_base_dir = './portal_input_dir' self._portal_output_base_dir = './portal_output_dir' self._raw_data_publish_dir = 'raw_data_publish_dir' if gfile.Exists(self._portal_input_base_dir): gfile.DeleteRecursively(self._portal_input_base_dir) gfile.MakeDirs(self._portal_input_base_dir) self._data_fnames = ['1001/{}.data'.format(i) for i in range(100)] self._data_fnames_without_success = \ ['1002/{}.data'.format(i) for i in range(100)] self._csv_fnames = ['1003/{}.csv'.format(i) for i in range(100)] self._unused_fnames = ['{}.xx'.format(100)] self._all_fnames = self._data_fnames + \ self._data_fnames_without_success + \ self._csv_fnames + self._unused_fnames all_fnames_with_success = ['1001/_SUCCESS'] + ['1003/_SUCCESS'] +\ self._all_fnames for fname in all_fnames_with_success: fpath = os.path.join(self._portal_input_base_dir, fname) gfile.MakeDirs(os.path.dirname(fpath)) with gfile.Open(fpath, "w") as f: f.write('xxx')
def _create_data_block_dir_if_need(self): data_block_dir_wrap = self._data_block_dir_wrap() if not gfile.Exists(data_block_dir_wrap): gfile.MakeDirs(data_block_dir_wrap) if not gfile.IsDirectory(data_block_dir_wrap): logging.fatal("%s must be directory", data_block_dir_wrap) os._exit(-1)
def get_target_path(request, point_num): """Computes the output path for a specific point. Args: request: ResegmentationRequest proto point_num: index of the point of interest within the proto Returns: path to the output file where resegmentation results will be saved """ # Prepare the output directory. output_dir = request.output_directory id_a = request.points[point_num].id_a id_b = request.points[point_num].id_b if request.subdir_digits > 1: m = hashlib.md5() m.update(str(id_a)) m.update(str(id_b)) output_dir = os.path.join(output_dir, m.hexdigest()[:request.subdir_digits]) gfile.MakeDirs(output_dir) # Terminate early if the output already exists. dp = request.points[point_num].point target_path = os.path.join( output_dir, '%d-%d_at_%d_%d_%d.npz' % (id_a, id_b, dp.x, dp.y, dp.z)) if gfile.Exists(target_path): logging.info('Output already exists: %s', target_path) return return target_path
def _generate_input_data(self): self._partition_item_num = 1 << 16 self._clean_up() gfile.MakeDirs(self._input_dir) success_flag_fpath = "{}/_SUCCESS".format(self._input_dir) example_id = 1000001 for partition_id in range(self._input_partition_num): example_id = self._generate_one_partition(partition_id, example_id, self._partition_item_num) with gfile.GFile(success_flag_fpath, 'w') as fh: fh.write('')
def set_data(self, key, data): key_path = self._generate_path(key) base_dir = os.path.dirname(key_path) if not gfile.Exists(base_dir): try: gfile.MakeDirs(base_dir) except tf.errors.OpError as e: # pylint: disable=broad-except fl_logging.warning("create directory %s failed," " reason: %s", base_dir, str(e)) return False file_io.atomic_write_string_to_file(key_path, data) return True
def setUp(self): self.data_source = common_pb.DataSource() self.data_source.data_source_meta.name = 'fclh_test' self.data_source.data_source_meta.partition_num = 1 self.raw_data_dir = "./raw_data" self.kvstore = db_client.DBClient('etcd', True) self.kvstore.delete_prefix(common.data_source_kvstore_base_dir(self.data_source.data_source_meta.name)) self.assertEqual(self.data_source.data_source_meta.partition_num, 1) partition_dir = os.path.join(self.raw_data_dir, common.partition_repr(0)) if gfile.Exists(partition_dir): gfile.DeleteRecursively(partition_dir) gfile.MakeDirs(partition_dir) self.manifest_manager = raw_data_manifest_manager.RawDataManifestManager( self.kvstore, self.data_source)
def _generate_portal_input_data(self, date_time, event_time_filter, start_index, total_item_num, portal_manifest): self.assertEqual(total_item_num % portal_manifest.input_partition_num, 0) item_step = portal_manifest.input_partition_num for partition_id in range(portal_manifest.input_partition_num): cands = list(range(partition_id, total_item_num, item_step)) for i in range(len(cands)): if random.randint(1, 4) > 1: continue a = random.randint(i - 16, i + 16) b = random.randint(i - 16, i + 16) if a < 0: a = 0 if a >= len(cands): a = len(cands) - 1 if b < 0: b = 0 if b >= len(cands): b = len(cands) - 1 if abs(cands[a] // item_step - b) <= 16 and abs(cands[b] // item_step - a) <= 16: cands[a], cands[b] = cands[b], cands[a] fpath = common.encode_portal_hourly_fpath( portal_manifest.input_data_base_dir, date_time, partition_id) if not gfile.Exists(os.path.dirname(fpath)): gfile.MakeDirs(os.path.dirname(fpath)) with tf.io.TFRecordWriter(fpath) as writer: for lid in cands: real_id = lid + start_index feat = {} example_id = '{}'.format(real_id).encode() feat['example_id'] = tf.train.Feature( bytes_list=tf.train.BytesList(value=[example_id])) # if test the basic example_validator for invalid event time if real_id == 0 or not event_time_filter(real_id): event_time = 150000000 + real_id feat['event_time'] = tf.train.Feature( int64_list=tf.train.Int64List(value=[event_time])) example = tf.train.Example(features=tf.train.Features( feature=feat)) writer.write(example.SerializeToString()) succ_tag_fpath = common.encode_portal_hourly_finish_tag( portal_manifest.input_data_base_dir, date_time) with gfile.GFile(succ_tag_fpath, 'w') as fh: fh.write('')
def visualize_dataset(dataset_name, output_path, num_animations=5, num_frames=20, fps=10): """Visualizes the data set by saving images to output_path. For each latent factor, outputs 16 images where only that latent factor is varied while all others are kept constant. Args: dataset_name: String with name of dataset as defined in named_data.py. output_path: String with path in which to create the visualizations. num_animations: Integer with number of distinct animations to create. num_frames: Integer with number of frames in each animation. fps: Integer with frame rate for the animation. """ data = named_data.get_named_ground_truth_data(dataset_name) random_state = np.random.RandomState(0) # Create output folder if necessary. path = os.path.join(output_path, dataset_name) if not gfile.IsDirectory(path): gfile.MakeDirs(path) # Create still images. for i in range(data.num_factors): factors = data.sample_factors(16, random_state) indices = [j for j in range(data.num_factors) if i != j] factors[:, indices] = factors[0, indices] images = data.sample_observations_from_factors(factors, random_state) visualize_util.grid_save_images( images, os.path.join(path, "variations_of_factor%s.png" % i)) # Create animations. for i in range(num_animations): base_factor = data.sample_factors(1, random_state) images = [] for j, num_atoms in enumerate(data.factors_num_values): factors = np.repeat(base_factor, num_frames, axis=0) factors[:, j] = visualize_util.cycle_factor(base_factor[0, j], num_atoms, num_frames) images.append( data.sample_observations_from_factors(factors, random_state)) visualize_util.save_animation( np.array(images), os.path.join(path, "animation%d.gif" % i), fps)
def save_subvolume(labels, origins, output_path, **misc_items): """Saves an FFN subvolume. Args: labels: 3d zyx number array with the segment labels origins: dictionary mapping segment ID to origin information output_path: path at which to save the segmentation in the form of a .npz file **misc_items: (optional) additional values to save in the output file """ seg = segmentation.reduce_id_bits(labels) gfile.MakeDirs(os.path.dirname(output_path)) with atomic_file(output_path) as fd: np.savez_compressed(fd, segmentation=seg, origins=origins, **misc_items)
def _generate_input_data(self): self._total_item_num = 1 << 16 self.assertEqual( self._total_item_num % self._portal_manifest.input_partition_num, 0) if gfile.Exists(self._portal_manifest.input_data_base_dir): gfile.DeleteRecursively(self._portal_manifest.input_data_base_dir) if gfile.Exists(self._portal_manifest.output_data_base_dir): gfile.DeleteRecursively(self._portal_manifest.output_data_base_dir) hourly_dir = common.encode_portal_hourly_dir( self._portal_manifest.input_data_base_dir, self._date_time) gfile.MakeDirs(hourly_dir) for partition_id in range(self._portal_manifest.input_partition_num): self._generate_one_part(partition_id) succ_tag_fpath = common.encode_portal_hourly_finish_tag( self._portal_manifest.input_data_base_dir, self._date_time) with gfile.GFile(succ_tag_fpath, 'w') as fh: fh.write('')
def main(unused_argv): request = inference_pb2.InferenceRequest() with open(FLAGS.parameter_file, mode='r') as f: text_list = f.readlines() text = ' '.join(text_list) text_format.Parse(text, request) if not gfile.Exists(request.segmentation_output_dir): gfile.MakeDirs(request.segmentation_output_dir) runner = inference.Runner() runner.start(request) # runner.run((bbox.start.z, bbox.start.y, bbox.start.x), # (bbox.size.z, bbox.size.y, bbox.size.x)) runner.run((0, 0, 0), (int(FLAGS.image_size_z), int(FLAGS.image_size_y), int(FLAGS.image_size_x))) counter_path = os.path.join(request.segmentation_output_dir, 'counters.txt') if not gfile.Exists(counter_path): runner.counters.dump(counter_path)