Beispiel #1
0
    def create_inference_graph(self, input_image, base_graph):
        log = util.create_log()

        with base_graph.as_default():
            sess = util.tf_cpu_session()
            with sess.as_default():
                tf_model = create_model()

                # Create ops and load weights

                # root = tf.train.Checkpoint(model=tf_model)
                # root.restore(tf.train.latest_checkpoint(self.params.MODEL_BASEDIR))
                # log.info("Read model params from %s" % self.params.MODEL_BASEDIR)

                pred = tf_model(tf.cast(input_image, tf.float32),
                                training=False)
                checkpoint = tf.train.latest_checkpoint(
                    self.params.MODEL_BASEDIR)
                saver = tf.train.import_meta_graph(checkpoint + '.meta',
                                                   clear_devices=True)
                self.graph = util.give_me_frozen_graph(checkpoint,
                                                       nodes=self.output_names,
                                                       saver=saver,
                                                       base_graph=base_graph,
                                                       sess=sess)

        import pprint
        log.info("Loaded graph:")
        log.info(
            pprint.pformat(tf.contrib.graph_editor.get_tensors(self.graph)))
        return self.graph
Beispiel #2
0
    def create_test_fixtures(cls):
        log = util.create_log()

        log.info("Creating bdd100k test fixtures ...")
        ZIPS_TO_COPY = (cls.telemetry_zip(), )

        util.cleandir(cls.TEST_FIXTURE_DIR)
        for path in ZIPS_TO_COPY:
            util.copy_n_from_zip(path, cls.test_fixture(path), 10)

        # Videos: just copy the ones that have INFO data
        log.info("Copying videos ...")
        fws = util.ArchiveFileFlyweight.fws_from(
            cls.test_fixture(cls.telemetry_zip()))
        for fw in fws:
            if 'json' not in fw.name:
                continue

            relpath = InfoDataset.json_fname_to_video_fname(fw.name)
            relpath = relpath[len('bdd100k/info/'):]
            path = os.path.join(cls.video_dir(), relpath)
            dest = cls.test_fixture(path)
            util.mkdir(os.path.dirname(dest))
            util.run_cmd('cp -v ' + path + ' ' + dest)
        log.info("... done copying videos.")

        # For testing, create a video that has no INFO
        dest = cls.test_fixture(
            os.path.join(cls.video_dir(), '100k', 'train',
                         'video_with_no_info.mov'))
        codec = 'h264'  # Chrome will not play `png` movies
        video_bytes = testutils.VideoFixture(codec=codec).get_bytes()
        with open(dest, 'wc') as f:
            f.write(video_bytes)
        log.info("Wrote synth video to %s ..." % dest)
Beispiel #3
0
def mnist_train(params):
    log = util.create_log()
    tf.logging.set_verbosity(tf.logging.DEBUG)

    ## Model
    model_dir = params.MODEL_BASEDIR
    tf.gfile.MakeDirs(params.MODEL_BASEDIR)

    mnist_classifier = tf.estimator.Estimator(
        model_fn=model_fn,
        params=None,
        config=tf.estimator.RunConfig(
            model_dir=model_dir,
            save_summary_steps=10,
            save_checkpoints_secs=10,
            session_config=util.tf_create_session_config(),
            log_step_count_steps=10))

    ## Data
    def train_input_fn():
        from official.mnist import dataset as mnist_dataset

        # Load the datasets
        train_ds = mnist_dataset.train(params.DATA_BASEDIR)
        if params.LIMIT >= 0:
            train_ds = train_ds.take(params.LIMIT)
        train_ds = train_ds.shuffle(60000).batch(params.BATCH_SIZE)
        return train_ds

    def eval_input_fn():
        test_ds = test_dataset(params)
        # No idea why we return an interator thingy instead of a dataset ...
        return test_ds.make_one_shot_iterator().get_next()

    # Set up hook that outputs training logs every 100 steps.
    from official.utils.logs import hooks_helper
    train_hooks = hooks_helper.get_train_hooks(
        ['ExamplesPerSecondHook', 'LoggingTensorHook'],
        model_dir=model_dir,
        batch_size=params.BATCH_SIZE)

    # Train and evaluate model.
    for _ in range(params.TRAIN_EPOCHS):
        mnist_classifier.train(input_fn=train_input_fn, hooks=train_hooks)
        eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
        log.info('\nEvaluation results:\n\t%s\n' % eval_results)

    # Export the model
    # TODO do we need this placeholder junk?
    image = tf.placeholder(tf.float32, [None, 28, 28, 1], name='input_image')
    input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
        'image':
        image,
    })
    mnist_classifier.export_savedmodel(params.MODEL_BASEDIR, input_fn)
Beispiel #4
0
 def rows_from_images_dir(img_dir, pattern='*', **kwargs):
   import pathlib2 as pathlib
   
   log = create_log()
   
   log.info("Reading images from dir %s ..." % img_dir)
   paths = pathlib.Path(img_dir).glob(pattern)
   n = 0
   for path in paths:
     path = str(path) # pathlib uses PosixPath thingies ...
     yield ImageRow.from_path(path, **kwargs)
     
     n += 1
     if (n % 100) == 0:
       log.info("... read %s paths ..." % n)
   log.info("... read %s total paths." % n)
Beispiel #5
0
    def setup(cls, spark=None):
        log = util.create_log()
        log.info("Building table %s ..." % cls.TABLE_NAME)

        spark = spark or util.Spark.getOrCreate()

        img_rdd = cls.IMAGE_TABLE_CLS.as_imagerow_rdd(spark)

        model = cls.NNMODEL_CLS.load_or_train(cls.MODEL_PARAMS)
        filler = FillActivationsTFDataset(model=model)

        activated = img_rdd.mapPartitions(filler)

        def to_activation_rows(imagerows):
            from pyspark.sql import Row
            for row in imagerows:
                if row.attrs is '':
                    continue

                activations = row.attrs.get('activations')
                if not activations:
                    continue

                for act in activations:
                    for tensor_name, value in act._tensor_to_value.iteritems():
                        yield Row(
                            model_name=model.params.MODEL_NAME,
                            tensor_name=tensor_name,
                            tensor_value=value,
                            dataset=row.dataset,
                            split=row.split,
                            uri=row.uri,
                        )

        activation_row_rdd = activated.mapPartitions(to_activation_rows)

        df = spark.createDataFrame(activation_row_rdd)
        df.show()
        writer = df.write.parquet(
            path=cls.table_root(),
            mode='overwrite',
            compression='lz4',
            partitionBy=dataset.ImageRow.DEFAULT_PQ_PARTITION_COLS)
        log.info("... wrote to %s ." % cls.table_root())
Beispiel #6
0
    def load_or_train(cls, params=None):
        log = util.create_log()

        params = params or MNIST.Params()
        model = MNIST(params=params)

        if not os.path.exists(os.path.join(params.MODEL_BASEDIR,
                                           'model.ckpt')):
            log.info("Training!")
            # subprocess allows recovery of gpu memory!  See TFSessionPool comments
            # import multiprocessing
            # p = multiprocessing.Process(target=mnist_train, args=(params,))
            # p.start()
            # p.join()
            mnist_train(params)
            log.info("Done training!")

        model.igraph = MNISTGraph(params)
        return model
Beispiel #7
0
    def datasets_iter_image_rows(cls, params=None):
        params = params or MNIST.Params()

        log = util.create_log()

        def gen_dataset(ds, split):
            import imageio
            import numpy as np

            n = 0
            with util.tf_data_session(ds) as (sess, iter_dataset):
                for image, label in iter_dataset():
                    image = np.reshape(image * 255.,
                                       (28, 28, 1)).astype(np.uint8)
                    label = int(label)
                    row = dataset.ImageRow.from_np_img_labels(
                        image,
                        label,
                        dataset=cls.TABLE_NAME,
                        split=split,
                        uri='mnist_%s_%s' % (split, n))
                    yield row

                    if params.LIMIT >= 0 and n == params.LIMIT:
                        break
                    n += 1
                    if n % 100 == 0:
                        log.info("Read %s records from tf.Dataset" % n)

        from official.mnist import dataset as mnist_dataset

        # Keep our dataset ops in an isolated graph
        g = tf.Graph()
        with g.as_default():
            gens = itertools.chain(
                gen_dataset(mnist_dataset.train(params.DATA_BASEDIR), 'train'),
                gen_dataset(mnist_dataset.test(params.DATA_BASEDIR), 'test'))
            for row in gens:
                yield row
Beispiel #8
0
  def _create_egg(cls, src_root=None, tmp_path=None):
    """Build a Python Egg from the current project and return a path
    to the artifact.  

    Why an Egg?  `pyspark` supports zipfiles and egg files as Python artifacts.
    One might wish to use a wheel instead of an egg.  See this excellent
    article and repo:
     * https://bytes.grubhub.com/managing-dependencies-and-artifacts-in-pyspark-7641aa89ddb7
     * https://github.com/alekseyig/spark-submit-deps
    
    The drawbacks to using a wheel include:
     * wheels often require native libraries to be installed (e.g. via
        `apt-get`), and those deps are typically best baked into the Spark
        Worker environment (versus installed every job run).
     * The `BdistSpark` example above is actually rather slow, especially
        when Tensorflow is a dependency, and `BdistSpark` must run before
        every job is submitted.
     * Spark treats wheels as zip files and unzips them on every run; this
        unzip operation can be very expensive if the zipfile contains large
        binaries (e.g. tensorflow)
    
    In comparison, an Egg provides the main benefits we want (to ship project
    code, often pre-committed code, to workers).
    """

    log = util.create_log()

    if tmp_path is None:
      import tempfile
      tempdir = tempfile.gettempdir()

      SUBDIR_NAME = 'au_eggs'
      tmp_path = os.path.join(tempdir, SUBDIR_NAME)
      util.cleandir(tmp_path)

    if src_root is None:
      log.info("Trying to auto-resolve path to src root ...")
      try:
        import inspect
        path = inspect.getfile(inspect.currentframe())
        src_root = os.path.dirname(os.path.abspath(path))
      except Exception as e:
        log.info(
          "Failed to auto-resolve src root, "
          "falling back to %s" % cls.SRC_ROOT)
        src_root = cls.SRC_ROOT
    
    src_root = '/opt/au'
    log.info("Using source root %s " % src_root)

    # Below is a programmatic way to run something like:
    # $ cd /opt/au && python setup.py clearn bdist_egg
    # Based upon https://github.com/pypa/setuptools/blob/a94ccbf404a79d56f9b171024dee361de9a948da/setuptools/tests/test_bdist_egg.py#L30
    # See also: 
    # * https://github.com/pypa/setuptools/blob/f52b3b1c976e54df7a70db42bf59ca283412b461/setuptools/dist.py
    # * https://github.com/pypa/setuptools/blob/46af765c49f548523b8212f6e08e1edb12f22ab6/setuptools/tests/test_sdist.py#L123
    # * https://github.com/pypa/setuptools/blob/566f3aadfa112b8d6b9a1ecf5178552f6e0f8c6c/setuptools/__init__.py#L51
    from setuptools.dist import Distribution
    from setuptools import PackageFinder
    MODNAME = os.path.split(src_root)[-1]
    dist = Distribution(attrs=dict(
        script_name='setup.py',
        script_args=[
          'clean',
          'bdist_egg', 
            '--dist-dir', tmp_path,
            '--bdist-dir', os.path.join(tmp_path, 'workdir'),
        ],
        name=MODNAME,
        src_root=src_root,
        packages=PackageFinder.find(where=src_root),
    ))
    log.info("Generating egg to %s ..." % tmp_path)
    with util.quiet():
      dist.parse_command_line()
      dist.run_commands()

    egg_path = os.path.join(tmp_path, MODNAME + '-0.0.0-py2.7.egg')
    assert os.path.exists(egg_path)
    log.info("... done.  Egg at %s" % egg_path)
    return egg_path
Beispiel #9
0
  def write_to_parquet(
        rows,
        dest_dir,
        rows_per_file=-1,
        partition_cols=DEFAULT_PQ_PARTITION_COLS,
        compression='lz4',
        spark=None):
    
    is_rdd, is_pyspark_df = False, False
    try:
      import pyspark.rdd
      import pyspark.sql
      is_rdd = isinstance(rows, pyspark.rdd.RDD)
      is_pyspark_df = isinstance(rows, pyspark.sql.dataframe.DataFrame)
      if is_pyspark_df:
        df = rows
    except ImportError:
      pass
    
    if is_rdd:
      assert spark is not None
      from pyspark.sql import Row

      # RDD[ImageRow] -> DataFrame[ImageRow]
      rows_rdd = rows.map(lambda r: Row(**r.to_dict()))
      df = spark.createDataFrame(rows_rdd)
      is_pyspark_df = True
    
    if is_pyspark_df:
      util.log.info("Writing parquet to %s ..." % dest_dir)
      df.printSchema() # NB: can't .show() b/c of binary data
      df.write.parquet(
        dest_dir,
        mode='append',
        partitionBy=partition_cols,
        compression=compression)
      util.log.info("... done! Wrote to %s ." % dest_dir)
    
    else:

      # Use Pyarrow to write Parquet in this process

      import pandas as pd
      import pyarrow as pa
      import pyarrow.parquet as pq
      
      log = create_log()
      
      if rows_per_file >= 1:
        irows = util.ichunked(rows, rows_per_file)
      else:
        rows = list(rows)
        if not rows:
          return
        irows = iter([rows])
      
      util.log.info("Writing parquet to %s ..." % dest_dir)
      for row_chunk in irows:
        r = row_chunk[0]
        
        # Pandas wants dicts
        if isinstance(r, ImageRow):
          row_chunk = [r.to_dict() for r in row_chunk]

        df = pd.DataFrame(row_chunk)
        table = pa.Table.from_pandas(df)
        util.mkdir(dest_dir)
        pq.write_to_dataset(
              table,
              dest_dir,
              partition_cols=partition_cols,
              preserve_index=False, # Don't care about pandas index
              compression='snappy',
                # NB: pyarrow lz4 is totes broken https://github.com/apache/arrow/issues/3491
              flavor='spark')
        util.log.info("... wrote %s rows ..." % len(row_chunk))
      util.log.info("... done writing to %s ." % dest_dir)