예제 #1
0
    def test_basic(self):
        with tempfile.TemporaryDirectory() as remotedir, \
             tempfile.TemporaryDirectory() as localdir:
            with open(remotedir + '/f0.txt', 'w') as fd:
                fd.write('hello0')
            syncer = tffilesync.Syncer(remotedir, localdir)
            self.assertListEqual(gfile.listdir(localdir), ['f0.txt'])
            self.assertEqual(_read_file(localdir + '/f0.txt'), 'hello0')
            with open(localdir + '/f1.txt', 'w') as fd:
                fd.write('hello1')
            _kick_sync(syncer)

            self.assertListEqual(sorted(gfile.listdir(localdir)),
                                 ['f0.txt', 'f1.txt'])
            self.assertListEqual(sorted(gfile.listdir(remotedir)),
                                 ['f0.txt', 'f1.txt'])
            self.assertEqual(_read_file(remotedir + '/f0.txt'), 'hello0')
            self.assertEqual(_read_file(remotedir + '/f1.txt'), 'hello1')

            time.sleep(2)
            with open(localdir + '/f1.txt', 'w') as fd:
                fd.write('hello2')
            _kick_sync(syncer)
            self.assertEqual(_read_file(remotedir + '/f1.txt'), 'hello2')
            self.assertListEqual(sorted(gfile.listdir(remotedir)),
                                 ['f0.txt', 'f1.txt'])

            syncer.stop()
예제 #2
0
 def test_creates_new_trajectory_dirs(self):
     output_dir = self.get_temp_dir()
     env = self._create_env(output_dir=output_dir)
     self.assertEqual(set(gfile.listdir(output_dir)), set())
     env.reset()
     self.assertEqual(set(gfile.listdir(output_dir)), {"0"})
     env.reset()
     self.assertEqual(set(gfile.listdir(output_dir)), {"0", "1"})
예제 #3
0
파일: utils.py 프로젝트: skiler07/mst-tf
    def __init__(self, datapath):
        """        
        :param string datapath: filepath to training images
        """

        # Store the datapath
        self.datapath = datapath
        self.im_shape = (None, None, 3)
        self.crop_im_shape = (256, 256, 3)
        self.total_imgs = None
        self.k = 3
        self.vgg = self.build_vgg()
        print('Initiating DataLoader with data from {}'.format(datapath))

        # Check data source
        if self.datapath.startswith('gs://'):
            self.content_bucket = os.path.join(self.datapath, 'content')
            self.style_bucket = os.path.join(self.datapath, 'style')
            print('Content bucket: ', self.content_bucket)
            print('Style bucket: ', self.style_bucket)
            self.content_img_paths = [
                os.path.join(self.content_bucket, i)
                for i in listdir(self.content_bucket)
            ]
            self.style_img_paths = [
                os.path.join(self.style_bucket, i)
                for i in listdir(self.style_bucket)
            ]
            self.num_content_pics = len(self.content_img_paths)
            self.num_style_pics = len(self.style_img_paths)

            print(">> Found {} content images in dataset".format(
                self.num_content_pics))
            print(">> Found {} style images in dataset".format(
                self.num_style_pics))
        else:
            self.style_img_paths = []
            self.content_img_paths = []
            for dirpath, _, filenames in os.walk(self.datapath):
                for filename in [
                        f for f in filenames
                        if any(filetype in f.lower()
                               for filetype in ['jpeg', 'png', 'jpg'])
                ]:
                    if 'content' in dirpath:
                        self.content_img_paths.append(
                            os.path.join(dirpath, filename))
                    elif 'style' in dirpath:
                        self.style_img_paths.append(
                            os.path.join(dirpath, filename))

            self.num_content_pics = len(self.content_img_paths)
            self.num_style_pics = len(self.style_img_paths)

            print(">> Found {} content images in dataset".format(
                self.num_content_pics))
            print(">> Found {} style images in dataset".format(
                self.num_style_pics))
    def _next_trajectory_dir(self):
        """Assigns a new output dir for a trajectory under self._output_dir.

    Directory names are consecutive integers starting from zero. New directory
    index is assigned as the maximum of past indices plus one. Directories that
    are not integers are ignored.

    Returns:
      A path of the new directory.
    """
        trajectory_dirs = gfile.listdir(self._output_dir)

        def int_or_none(s):
            try:
                return int(s)
            except TypeError:
                return None

        past_trajectory_ids = [
            trajectory_id
            for trajectory_id in map(int_or_none, trajectory_dirs)
            if trajectory_id is not None
        ]
        next_trajectory_id = max([-1] + past_trajectory_ids) + 1

        return os.path.join(self._output_dir, str(next_trajectory_id))
예제 #5
0
def maybe_pick_models_to_evaluate(checkpoint_dir):
    """Pick a checkpoint to evaluate that has not been evaluated already."""
    logging.info("Picking checkpoint to evaluate from %s.", checkpoint_dir)

    filenames = gfile.listdir(checkpoint_dir)
    filenames = [f[:-5] for f in filenames if f[-5:] == ".meta"]
    logging.info("Found existing checkpoints: %s", filenames)

    evaluated_filenames = []
    if gfile.exists(os.path.join(checkpoint_dir, EVAL_FILENAME)):
        with gfile.GFile(os.path.join(checkpoint_dir, EVAL_FILENAME),
                         "r") as f:
            evaluated_filenames = [
                l.strip().split(",")[0] for l in f.readlines()
            ]
        logging.info("Found already evaluated checkpoints: %s",
                     evaluated_filenames)

    checkpoints_to_evaluate = [
        f for f in filenames if f not in evaluated_filenames
    ]
    logging.info("Remaining potential checkpoints: %s",
                 checkpoints_to_evaluate)

    if checkpoints_to_evaluate:
        return os.path.join(checkpoint_dir, checkpoints_to_evaluate[0])
    else:
        return None
예제 #6
0
def listdir_remote(path):
    """
    Wrapper to list paths in local dirs (alternative to using a glob, I suppose)
    """
    if is_remote_path(path):
        return gfile.listdir(path)
    return os.listdir(path)
예제 #7
0
    def ls(self, path: str, recursive=False) -> List[File]:
        def _get_file_stats(path: str):
            stat = gfile.stat(path)
            return File(path=path,
                        size=stat.length,
                        mtime=int(stat.mtime_nsec / 1e9))

        if not gfile.exists(path):
            return []
        # If it is a file
        if not gfile.isdir(path):
            return [_get_file_stats(path)]

        files = []
        if recursive:
            for root, _, res in gfile.walk(path):
                for file in res:
                    if not gfile.isdir(os.path.join(root, file)):
                        files.append(_get_file_stats(os.path.join(root, file)))
        else:
            for file in gfile.listdir(path):
                if not gfile.isdir(os.path.join(path, file)):
                    files.append(_get_file_stats(os.path.join(path, file)))
        # Files only
        return files
예제 #8
0
def _list_dir(dir_path: str) -> _DirEntries:
    ents: _DirEntries = {}
    for name in gfile.listdir(dir_path):
        path = dir_path + '/' + name
        stat = gfile.stat(path)
        ents[name] = _FileStat(length=stat.length,
                               mtime_nsec=stat.mtime_nsec,
                               is_directory=stat.is_directory)
    return ents
예제 #9
0
  def load_acic(self):
    """Loads semi-synthetic data.

    It updates the object DataSimulation.

    Args:
      self
    Returns:
      None
    """
    self.data_path = self.param_data['data_path'] + 'ACIC/'
    if self.param_data['data_low_dimension']:
      true_ate_path = self.data_path + 'lowDim_trueATE.csv'
      self.data_path = self.data_path + 'low_dimensional_datasets/'
    else:
      true_ate_path = self.data_path + 'highDim_trueATE.csv'
      self.data_path = self.data_path + 'high_dimensional_datasets/'

    np.random.seed(self.seed)
    i = np.random.randint(0, len(gfile.listdir(self.data_path)), 1)[0]

    path = gfile.listdir(self.data_path)[i]
    with gfile.GFile(self.data_path +path, 'r') as f:
      data = pd.read_csv(f, delimiter=',')

    self.outcome = data['Y'].values
    self.treatment = data['A'].values
    self.covariates = data.drop(['Y', 'A'], axis=1).values
    scaler = StandardScaler()
    self.covariates = scaler.fit_transform(self.covariates)

    self.sample_size, self.num_covariates = self.covariates.shape
    self.linear, self.noise = False, False
    self.var_covariates = None
    self.treatment_prop = self.treatment.sum()/len(self.treatment)

    with gfile.GFile(true_ate_path, 'r') as f:
      true_ate = pd.read_csv(f, delimiter=',')

    path = path[:-4]
    true_ate_row = true_ate[true_ate['filename'] == path]
    self.tau = true_ate_row['trueATE'].values[0]
예제 #10
0
파일: predictor.py 프로젝트: jgung/tf-nlp
def get_latest_savedmodel_from_jobdir(job_dir: str) -> type(Predictor):
    """
    Return the latest saved model from a given output directory of a trainer.
    :param job_dir: output directory of trainer
    """
    export_dir = os.path.join(job_dir, constants.MODEL_PATH, 'export',
                              'best_exporter')
    latest = os.path.join(
        export_dir,
        max([
            path for path in gfile.listdir(export_dir)
            if not path.startswith('temp')
        ]))
    return latest
예제 #11
0
  def __init__(self, seed, param_data):
    super(LoadImages, self).__init__()
    self.name = 'ukb'

    path = param_data['data_path']
    filenames = [os.path.join(path, item) for item in gfile.listdir(path)]
    tf_record_ds = tf.data.TFRecordDataset(filenames)

    features = {}
    features['image/encoded'] = tf.io.FixedLenFeature([], tf.string)
    features['image/id'] = tf.io.FixedLenFeature([1], tf.string)
    features[f'image/sim_{seed}_pi/value'] = tf.io.FixedLenFeature(
        [1], tf.float32)
    features[f'image/sim_{seed}_y/value'] = tf.io.FixedLenFeature(
        [1], tf.float32)
    features[f'image/sim_{seed}_mu0/value'] = tf.io.FixedLenFeature(
        [1], tf.float32)
    features[f'image/sim_{seed}_mu1/value'] = tf.io.FixedLenFeature(
        [1], tf.float32)

    ds = tf_record_ds.map(
        _get_parse_example_fn(features), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.map(_decode_img, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.map(lambda x: _filter_treatment(x, seed),
                num_parallel_calls=tf.data.AUTOTUNE)

    # split treated and non treated and pred (for full conterfactual).
    ds_treated = ds.filter(lambda x: x['t'])
    ds_control = ds.filter(lambda x: not x['t'])

    ds_treated = ds_treated.map(lambda x: _filter_cols(x, seed),
                                num_parallel_calls=tf.data.AUTOTUNE)
    ds_control = ds_control.map(lambda x: _filter_cols(x, seed),
                                num_parallel_calls=tf.data.AUTOTUNE)

    ds_all = ds.map(lambda x: _filter_cols_pred(x, seed),
                    num_parallel_calls=tf.data.AUTOTUNE)
    ds_all_ps = ds.map(lambda x: _filter_cols_ps(x, seed),
                       num_parallel_calls=tf.data.AUTOTUNE)

    self.dataset_treated = _get_dataset(ds_treated)
    self.dataset_control = _get_dataset(ds_control)
    self.dataset_all = _get_dataset(ds_all)
    self.dataset_all_ps = _get_dataset_ps(ds_all_ps)
예제 #12
0
def gen_csv_from_annotations(
    input_dir: str,
    output_file=constants.DEFAULT_CSV_FILENAME,
    out_path_prefix='',
    dataset_type=constants.DEFAULT_DATASET_TYPE):
  """Generates AutoML dataset CSV from annotation files.

  Args:
    input_dir: Directory of annotation files.
    output_file: Output CSV filename.
    out_path_prefix: Filepath prefix to prepend to the image files.
      e.g.
      src_image_filename = '/tmp/path/to/image.jpg'
      out_path_prefix = 'gs://bucket/images'
      output_image_filename = 'gs://bucket/images/image.jpg'
    dataset_type: Dataset type (TRAIN, VAL, TEST, UNSPECIFIED)
      to use for all the parsed images.
  """

  if not gfile.exists(input_dir):
    raise ValueError('Input directory not found.')

  with gfile.GFile(os.path.expanduser(output_file), 'w') as outf:
    writer = csv.writer(outf, delimiter=',')
    for filename in gfile.listdir(os.path.expanduser(input_dir)):
      filepath = os.path.join(input_dir, filename)
      image_filename, boxes = annotation.read(filepath)
      out_image_filename = os.path.join(out_path_prefix, image_filename)
      for b in boxes:
        row = [
            dataset_type,
            out_image_filename,
            b.label,
            b.xmin,
            b.ymin,
            '',
            '',
            b.xmax,
            b.ymax,
            '',
            '',
        ]
        writer.writerow(row)
예제 #13
0
def _load_data(split, dataset_config, outcomes):
    """Loads and parses TFRecords for the given dataset split.

  Elements are `TensorDictTriple`s and contain inputs, labels, and weights.
  The `inputs: TensorDict` must contain an `IMAGE_KEY` key with rgb tensor
  values of shape `dataset_config.image_size`. The `labels: TensorDict`
  should contain one key per outcome, while the `weights: TensorDict`
  should contain `subsample_weights` for each outcome.
  Args:
    split: The dataset split (train, validation, test, or predict).
    dataset_config: A dataset ConfigDict containing hparams and augmentations.
    outcomes: A list of outcome ConfigDicts used to define labels.
  Returns:
    A tf.data.Dataset containing decoded image tensors, labels, and weights.
  """
    # Build features for parsing TFRecords.
    features = _build_tf_record_features(split, outcomes)

    # Fetch the set of UKB input TFRecord shards.
    filenames_ds: List[str] = [
        filename for filename in gfile.listdir(str(dataset_config['path']))
        if filename.startswith(dataset_config[split.value])
    ]

    filenames_ds = [
        dataset_config.path + '/' + filename for filename in filenames_ds
    ]
    # Convert each filepath to a TFRecord.
    tf_record_ds = tf.data.TFRecordDataset(filenames=filenames_ds)

    # Convert each TFRecord to a TensorDict.
    ds = tf_record_ds.map(_get_parse_example_fn(features),
                          num_parallel_calls=tf.data.AUTOTUNE)

    # Rename keys and break features into inputs, labels, and weights.
    ds = ds.map(_get_rename_keys(features),
                num_parallel_calls=tf.data.AUTOTUNE)

    # Decode the images.
    ds = ds.map(_decode_img, num_parallel_calls=tf.data.AUTOTUNE)

    return ds
예제 #14
0
    def get_aggregated_scores(self, pickle_dirs):
        """Processes all the VALAN score pickle files in a directory.

    Args:
      pickle_dirs: (list) of directories of VALAN score pickles.

    Returns:
      score_dict: (dict) for lists of SR/SPL/SDTW/NDTW scores. E.g.
        {'sr': [0.35, 0.34, 0.27], 'spl', [0.46, 0.34, 0.45], ...}
      avg_scores: (dict) the average of SR/SPL/SDTW/NDTW scores. E.g.
        {'sr': 0.30, 'spl': 0.40, ...}
    """
        score_dict = {'sr': [], 'spl': [], 'sdtw': [], 'ndtw': []}
        pickle_paths = []
        for pickle_dir in pickle_dirs:
            pickle_paths += [
                os.path.join(pickle_dir, filename)
                for filename in gfile.listdir(pickle_dir)
            ]
        for pickle_path in pickle_paths:
            scores = self._get_scores(pickle_path)
            for score in scores:
                sr, spl, sdtw, ndtw = score
                score_dict['sr'].append(sr)
                score_dict['spl'].append(spl)
                score_dict['sdtw'].append(sdtw)
                score_dict['ndtw'].append(ndtw)

        avg_scores = {
            'sr': np.mean(score_dict['sr']),
            'spl': np.mean(score_dict['spl']),
            'sdtw': np.mean(score_dict['sdtw']),
            'ndtw': np.mean(score_dict['ndtw'])
        }

        return score_dict, avg_scores
예제 #15
0
  def run_eval(self,
               experiment_dir,
               rng,
               step=None,
               work_units=None,
               model_names_r=None,
               widths_r=None):
    """Evaluate models in experiment_dir for R-Precision."""
    logging.info('Local devices: %s', jax.local_devices())
    logging.info('All devices: %s', jax.devices())

    config = log.load_config_json(os.path.join(experiment_dir, '1'))
    logging.info('Config: %s', config)

    # Load retrieval models.
    if not model_names_r:
      model_names_r = config.retrieve_models
    models_r = [
        helpers.load_image_text_model(name)
        for name in tqdm.tqdm(model_names_r, desc='loading retrieval models')
    ]
    if not widths_r:
      widths_r = config.retrieve_widths

    print('model_names_r', model_names_r)
    print('widths_r', widths_r)

    # Encode retrieval set text descriptions.
    z_clip_rs = []  # text encodings of queries with all retrieval models
    # shape: [n_models, n_queries, d_model for specific model]
    if config.queries_r:
      for _, encode_text, _, tokenize_fn in tqdm.tqdm(
          models_r, desc='embedding queries with retrieval models'):
        z_clip_r = encode_text(tokenize_fn(config.queries_r))
        z_clip_rs.append(z_clip_r)

    # JIT rendering.
    kwargs_test = dict(rng=None, sigma_noise_std=0.)
    config_test_hq = ml_collections.ConfigDict(config)
    config_test_hq.update(config.test_hq)
    _, render_rays = helpers.init_nerf_model(rng.advance(1), config)

    @functools.partial(jax.pmap, in_axes=(0, None, None))
    def render_test_hq_p(rays, variables, origin):
      return render_rays(
          rays=rays,
          variables=variables,
          config=config_test_hq,
          sc=1.,
          mask_rad=config_test_hq.mr1,
          origin=origin,
          **kwargs_test)[0]

    # Render
    if work_units is None:
      work_units = gfile.listdir(experiment_dir)
      work_units = [int(wu) for wu in work_units if wu.isnumeric()]
    work_units.sort()
    work_unit_queries = []
    work_unit_configs = []
    n_wu = len(work_units)
    # create resolution -> n_wu -> 4ximg mapping
    all_renders_by_width = collections.defaultdict(list)
    for work_unit in tqdm.tqdm(work_units, 'Rendering all work units'):
      # Load query used to generate this object
      work_unit_dir = os.path.join(experiment_dir, str(work_unit))
      wu_config = log.load_config_json(work_unit_dir)
      work_unit_configs.append(wu_config)
      work_unit_queries.append(wu_config.query)  # not templated

      # Render the object
      _, renders = self.render_from_checkpoint(work_unit_dir, widths_r,
                                               render_test_hq_p, step)
      for width, render in renders.items():
        all_renders_by_width[width].append(render)

    print('all_renders_by_width keys', list(all_renders_by_width.keys()))

    def aggregate(raw):
      raw = onp.array(raw).astype(onp.float)
      return {
          'mean': onp.mean(raw),
          'sem': stats.sem(raw),
          'raw': raw,
      }

    metrics = {
        'renders_by_width': jax.tree_map(onp.array, dict(all_renders_by_width)),
        'work_unit_configs': work_unit_configs,
        'work_unit_queries': work_unit_queries,
    }

    ## Embed images with all retrieval models
    pbar = tqdm.tqdm(
        zip(model_names_r, widths_r, z_clip_rs, models_r),
        desc='Embedding renderings',
        total=len(model_names_r))
    for model_name, width, z_text, (encode_image, _, preprocess, _) in pbar:
      renders = all_renders_by_width[width]
      rgbs = np.array([rgb for rgb, _, _, _ in renders])
      print('about to encode rgbs with shape', rgbs.shape)
      print('  model_name', model_name)
      print('  width', width)
      z_est = encode_image(preprocess(rgbs))

      assert z_est.shape[0] == n_wu
      assert z_text.shape[0] == len(config.queries_r)
      cosine_sim = (z_est[:, None] * z_text[None]).sum(-1)  # [n_wu, queries_r]
      idx_true = np.array(
          [config.queries_r.index(query) for query in work_unit_queries])
      cosine_sim_true = np.take_along_axis(
          cosine_sim, idx_true[:, None], axis=1).squeeze(1)
      log_prob = nn.log_softmax(cosine_sim, axis=1)  # normalize over captions
      log_likelihood = np.take_along_axis(
          log_prob, idx_true[:, None], axis=1).squeeze(1)
      correct = np.argmax(cosine_sim, axis=1) == idx_true
      metrics[model_name] = {
          'val/retrieve_cosine_sim': aggregate(cosine_sim_true),
          'val/retrieve_loss': aggregate(-log_likelihood),
          'val/retrieve_acc': aggregate(correct),
      }

    metrics_path = os.path.join(experiment_dir, 'metrics.npy')
    with gfile.GFile(metrics_path, 'wb') as f:
      logging.info('Writing metrics to %s', metrics_path)
      onp.save(f, metrics)

    for k, v in metrics.items():
      if k not in ('renders_by_width', 'work_unit_configs'):
        logging.info('Metric %s: %s', k, v)

    return metrics
예제 #16
0
def StatisticsGen(
    examples_uri: 'ExamplesUri',
    output_statistics_uri: 'ExampleStatisticsUri',
    schema_uri: 'SchemaUri' = None,
    exclude_splits: str = None,
    beam_pipeline_args: list = None,
) -> NamedTuple('Outputs', [
    ('statistics_uri', 'ExampleStatisticsUri'),
]):
    from tfx.components.statistics_gen.component import StatisticsGen as component_class

    #Generated code
    import os
    import tempfile
    from tensorflow.io import gfile
    from google.protobuf import json_format, message
    from tfx.types import channel_utils, artifact_utils
    from tfx.components.base import base_executor

    arguments = locals().copy()

    component_class_args = {}

    for name, execution_parameter in component_class.SPEC_CLASS.PARAMETERS.items(
    ):
        argument_value = arguments.get(name, None)
        if argument_value is None:
            continue
        parameter_type = execution_parameter.type
        if isinstance(parameter_type, type) and issubclass(
                parameter_type, message.Message):
            argument_value_obj = parameter_type()
            json_format.Parse(argument_value, argument_value_obj)
        else:
            argument_value_obj = argument_value
        component_class_args[name] = argument_value_obj

    for name, channel_parameter in component_class.SPEC_CLASS.INPUTS.items():
        artifact_path = arguments.get(name + '_uri') or arguments.get(name +
                                                                      '_path')
        if artifact_path:
            artifact = channel_parameter.type()
            artifact.uri = artifact_path.rstrip(
                '/'
            ) + '/'  # Some TFX components require that the artifact URIs end with a slash
            if channel_parameter.type.PROPERTIES and 'split_names' in channel_parameter.type.PROPERTIES:
                # Recovering splits
                subdirs = gfile.listdir(artifact_path)
                # Workaround for https://github.com/tensorflow/tensorflow/issues/39167
                subdirs = [subdir.rstrip('/') for subdir in subdirs]
                split_names = [
                    subdir.replace('Split-', '') for subdir in subdirs
                ]
                artifact.split_names = artifact_utils.encode_split_names(
                    sorted(split_names))
            component_class_args[name] = channel_utils.as_channel([artifact])

    component_class_instance = component_class(**component_class_args)

    input_dict = channel_utils.unwrap_channel_dict(
        component_class_instance.inputs.get_all())
    output_dict = {}
    exec_properties = component_class_instance.exec_properties

    # Generating paths for output artifacts
    for name, channel in component_class_instance.outputs.items():
        artifact_path = arguments.get('output_' + name +
                                      '_uri') or arguments.get(name + '_path')
        if artifact_path:
            artifact = channel.type()
            artifact.uri = artifact_path.rstrip(
                '/'
            ) + '/'  # Some TFX components require that the artifact URIs end with a slash
            artifact_list = [artifact]
            channel._artifacts = artifact_list
            output_dict[name] = artifact_list

    print('component instance: ' + str(component_class_instance))

    executor_context = base_executor.BaseExecutor.Context(
        beam_pipeline_args=arguments.get('beam_pipeline_args'),
        tmp_dir=tempfile.gettempdir(),
        unique_id='tfx_component',
    )
    executor = component_class_instance.executor_spec.executor_class(
        executor_context)
    executor.Do(
        input_dict=input_dict,
        output_dict=output_dict,
        exec_properties=exec_properties,
    )

    return (output_statistics_uri, )
예제 #17
0
# Predicts recognition on Live camera input
# with the help of predefined model, train
# dataset and harcascade classifier.

import cv2
from tensorflow.io.gfile import listdir

from classes.model import Model

# get class list
class_names = listdir("Datasets/train/")

# load model
model = Model(len(class_names))
model.load_model()

# predict live on camera input
cap = cv2.VideoCapture(0)
face_classifier = cv2.CascadeClassifier(cv2.data.haarcascades +
                                        "haarcascade_frontalface_default.xml")

for i in range(100):
    ret, frame = cap.read()
    faces = face_classifier.detectMultiScale(frame, 1.3, 5)
    if faces is ():
        preds = model.predict_on_cv(frame)
    # Crop all faces found
    else:
        cropped_faces = []
        for (x, y, w, h) in faces:
            x = x - 10
예제 #18
0
def Transform(
    examples_path: InputPath('Examples'),
    schema_path: InputPath('Schema'),
    transform_graph_path: OutputPath('TransformGraph'),
    transformed_examples_path: OutputPath('Examples'),
    updated_analyzer_cache_path: OutputPath('TransformCache'),
    analyzer_cache_path: InputPath('TransformCache') = None,
    module_file: str = None,
    preprocessing_fn: str = None,
    force_tf_compat_v1: int = None,
    custom_config: str = None,
    splits_config: {
        'JsonObject': {
            'data_type': 'proto:tfx.components.transform.SplitsConfig'
        }
    } = None,
):
    from tfx.components.transform.component import Transform as component_class

    #Generated code
    import os
    import tempfile
    from tensorflow.io import gfile
    from google.protobuf import json_format, message
    from tfx.types import channel_utils, artifact_utils
    from tfx.components.base import base_executor

    arguments = locals().copy()

    component_class_args = {}

    for name, execution_parameter in component_class.SPEC_CLASS.PARAMETERS.items(
    ):
        argument_value = arguments.get(name, None)
        if argument_value is None:
            continue
        parameter_type = execution_parameter.type
        if isinstance(parameter_type, type) and issubclass(
                parameter_type, message.Message):
            argument_value_obj = parameter_type()
            json_format.Parse(argument_value, argument_value_obj)
        else:
            argument_value_obj = argument_value
        component_class_args[name] = argument_value_obj

    for name, channel_parameter in component_class.SPEC_CLASS.INPUTS.items():
        artifact_path = arguments.get(name + '_uri') or arguments.get(name +
                                                                      '_path')
        if artifact_path:
            artifact = channel_parameter.type()
            artifact.uri = artifact_path.rstrip(
                '/'
            ) + '/'  # Some TFX components require that the artifact URIs end with a slash
            if channel_parameter.type.PROPERTIES and 'split_names' in channel_parameter.type.PROPERTIES:
                # Recovering splits
                subdirs = gfile.listdir(artifact_path)
                # Workaround for https://github.com/tensorflow/tensorflow/issues/39167
                subdirs = [subdir.rstrip('/') for subdir in subdirs]
                split_names = [
                    subdir.replace('Split-', '') for subdir in subdirs
                ]
                artifact.split_names = artifact_utils.encode_split_names(
                    sorted(split_names))
            component_class_args[name] = channel_utils.as_channel([artifact])

    component_class_instance = component_class(**component_class_args)

    input_dict = channel_utils.unwrap_channel_dict(
        component_class_instance.inputs.get_all())
    output_dict = {}
    exec_properties = component_class_instance.exec_properties

    # Generating paths for output artifacts
    for name, channel in component_class_instance.outputs.items():
        artifact_path = arguments.get('output_' + name +
                                      '_uri') or arguments.get(name + '_path')
        if artifact_path:
            artifact = channel.type()
            artifact.uri = artifact_path.rstrip(
                '/'
            ) + '/'  # Some TFX components require that the artifact URIs end with a slash
            artifact_list = [artifact]
            channel._artifacts = artifact_list
            output_dict[name] = artifact_list

    print('component instance: ' + str(component_class_instance))

    executor_context = base_executor.BaseExecutor.Context(
        beam_pipeline_args=arguments.get('beam_pipeline_args'),
        tmp_dir=tempfile.gettempdir(),
        unique_id='tfx_component',
    )
    executor = component_class_instance.executor_spec.executor_class(
        executor_context)
    executor.Do(
        input_dict=input_dict,
        output_dict=output_dict,
        exec_properties=exec_properties,
    )
예제 #19
0
import numpy as np
from PIL import Image
from numpy import clip
from tensorflow.io import gfile
import os

DATA_DIR = os.getenv('DKUBE_INPUT_DATASETS', None)


def read_image(filename):
    image1 = cv2.imread(filename)
    img = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
    ret, img = cv2.threshold(img, 120, 255, cv2.THRESH_TOZERO)
    mean, std = img.mean(), img.std()
    img = (img - mean) / std
    return img


folders = gfile.listdir(DATA_DIR + '/raw_faces/')

for each_folder in folders:
    files = gfile.listdir(DATA_DIR + '/raw_faces/' + each_folder)
    gfile.makedirs(DATA_DIR + "/binarized_faces/" + each_folder)
    for each_file in files:
        b_img = read_image(DATA_DIR + '/raw_faces/' + each_folder + '/' +
                           each_file)
        # with gfile.GFile(DATA_DIR + '/binarized_faces/' + each_folder + '/' + each_file, 'wb') as f:
        #     f.write(b_img)
        cv2.imwrite(
            DATA_DIR + '/binarized_faces/' + each_folder + '/' + each_file,
            b_img)