예제 #1
0
def filter_keyword_quality(keywords_set, min_caption_occurence=3):
    """Filter keyword quality selecting those that occur in a minimum number of image captions.

    `min_caption_occurence`: integer 1 to 5; an image keyword must occur in at
    least this many corresponding captions.

    Return filtered keywords, see `process_caption_keywords` for format.
    """
    logging.log(
        logging.INFO,
        "Filtering keyword quality with caption occurence >= {}".format(
            min_caption_occurence))

    keywords_set_df = pd.DataFrame(
        zip(*keywords_set),
        columns=["image_uid", "caption_number", "keyword", "lemma"])

    keyword_image_groups = keywords_set_df.groupby("image_uid").apply(
        pd.Series.tolist)

    filtered_keyword_image_groups = mp.multiprocess_map(
        _filter_keyword_group_quality,
        keyword_image_groups,
        min_caption_occurence,
        n_cores=mp.num_cpus() - 1,
        mode="map")

    return tuple(
        np.concatenate(x) for x in zip(*filtered_keyword_image_groups))
예제 #2
0
파일: run.py 프로젝트: rpeloff/moonshot
def test():
    """Test baseline speech DTW matching model for one-shot learning."""

    # load Flickr Audio one-shot experiment
    one_shot_exp = flickr_speech.FlickrSpeech(
        features=FLAGS.features,
        keywords_split="one_shot_evaluation",
        preprocess_func=data_preprocess_func,
        speaker_mode=FLAGS.speaker_mode)

    # test model on L-way K-shot task
    task_accuracy, _, conf_interval_95 = experiment.test_l_way_k_shot(
        one_shot_exp,
        FLAGS.K,
        FLAGS.L,
        n=FLAGS.N,
        num_episodes=FLAGS.episodes,
        k_neighbours=FLAGS.k_neighbours,
        metric=FLAGS.metric,
        dtw=True,
        random=FLAGS.random)

    logging.log(
        logging.INFO,
        f"{FLAGS.L}-way {FLAGS.K}-shot accuracy after {FLAGS.episodes} "
        f"episodes: {task_accuracy:.3%} +- {conf_interval_95*100:.4f}")
예제 #3
0
    def _call(self, method_name, request, response):
        """Calls method with retry when Aborted error is returned.

    Args:
      method_name: the method to call.
      request: the request protobuf message.
      response: the response protobuf message.

    Returns:
      Detailed errors if the method is failed.
    """
        num_retries = self._max_num_retries
        avg_delay_sec = 2
        while True:
            try:
                return self._call_method(method_name, request, response)
            except errors.AbortedError:
                num_retries -= 1
                if num_retries == 0:
                    logging.log(logging.ERROR,
                                '%s failed after retrying %d times.',
                                method_name, self._max_num_retries)
                    raise
                wait_seconds = random.expovariate(1.0 / avg_delay_sec)
                logging.log(logging.INFO, 'mlmd client retry in %f secs',
                            wait_seconds)
                time.sleep(wait_seconds)
예제 #4
0
def test():
    """Test extracted image and speech model embeddings for one-shot learning."""

    # load embeddings from (linear) dense layer of base speech and vision models
    speech_embed_dir = os.path.join(FLAGS.audio_base_dir, "embed", "dense")
    image_embed_dir = os.path.join(FLAGS.vision_base_dir, "embed", "dense")

    # load Flickr Audio one-shot experiment
    one_shot_exp = flickr_multimodal.FlickrMultimodal(
        features="mfcc",
        keywords_split="one_shot_evaluation",
        flickr8k_image_dir=os.path.join("data", "external", "flickr8k_images"),
        speech_embed_dir=speech_embed_dir,
        image_embed_dir=image_embed_dir,
        speech_preprocess_func=data_preprocess_func,
        image_preprocess_func=data_preprocess_func,
        speaker_mode=FLAGS.speaker_mode,
        unseen_match_set=FLAGS.unseen_match_set)

    # test model on L-way K-shot task
    task_accuracy, _, conf_interval_95 = experiment.test_multimodal_l_way_k_shot(
        one_shot_exp,
        FLAGS.K,
        FLAGS.L,
        n=FLAGS.N,
        num_episodes=FLAGS.episodes,
        k_neighbours=FLAGS.k_neighbours,
        metric=FLAGS.metric)

    logging.log(
        logging.INFO,
        f"{FLAGS.L}-way {FLAGS.K}-shot accuracy after {FLAGS.episodes} "
        f"episodes: {task_accuracy:.3%} +- {conf_interval_95*100:.4f}")
예제 #5
0
    def __init__(self,
                 features="mfcc",
                 keywords_split="one_shot_evaluation",
                 flickr8k_image_dir=None,
                 speech_embed_dir=None,
                 image_embed_dir=None,
                 image_preprocess_func=None,
                 speech_preprocess_func=None,
                 speaker_mode="baseline",
                 unseen_match_set=False,
                 **kwargs):
        """TODO"""
        logging.log(logging.INFO, f"Creating Flickr multimodal experiment")

        super().__init__(features=features,
                         keywords_split=keywords_split,
                         embed_dir=speech_embed_dir,
                         preprocess_func=speech_preprocess_func,
                         speaker_mode=speaker_mode,
                         **kwargs)

        # TODO: add flickr30k/mscoco and sample paired audio with same keywords?
        self.flickr_vision_exp = flickr_vision.FlickrVision(
            keywords_split=keywords_split,
            flickr8k_image_dir=flickr8k_image_dir,
            flickr30k_image_dir=None,
            mscoco_image_dir=None,
            embed_dir=image_embed_dir,
            preprocess_func=image_preprocess_func,
            **kwargs)

        self.unseen_match_set = unseen_match_set
예제 #6
0
    def Reply(self,
              channel: types.Target,
              msg: MessageType,
              default_channel: Optional[Channel] = None,
              limit_lines: bool = False,
              max_public_lines: int = 6,
              user: Optional[types.User] = None,
              log: bool = False,
              log_level: int = logging.INFO) -> None:
        """Sends a message to the channel.

    Leaving Reply on the HypeCore allows replacing the interface to process
    nested commands. However, some change will be needed in order to actually
    create an OutputUtil for HBDS without a HypeCore.

    Args:
      channel: Who/where to send the message.
      msg: The message to send.
      default_channel: Who/where to send the message if no channel is specified.
      limit_lines: Whether to limit lines or not.
      max_public_lines: Maximum number of lines to send to a public channel.
      user: If specified, where to send the message if its too long.
      log: Whether to also log the message.
      log_level: How important the log is.
    """
        if not msg:
            return

        if log:
            text_msg = msg
            logging.log(log_level,
                        text_msg,
                        exc_info=log_level == logging.ERROR)

        channel = channel or default_channel
        if not channel:
            logging.info('Attempted to send message with no channel: %s', msg)
            return
        # Support legacy Reply to users as a string.
        if not isinstance(channel, Channel):
            # Send messages for sub-accounts to the real user.
            channel = Channel(id=channel.split(':')[0],
                              visibility=Channel.PRIVATE,
                              name=channel)

        if (limit_lines and channel.visibility == Channel.PUBLIC
                and isinstance(msg, list) and len(msg) > max_public_lines):
            if user:
                self.interface.SendMessage(
                    channel,
                    _MakeMessage('It\'s long so I sent it privately.'))
                self.interface.SendMessage(
                    Channel(id=user, visibility=Channel.PRIVATE, name=user),
                    _MakeMessage(msg))
            else:
                # If there is no user, just truncate and send to channel.
                self.interface.SendMessage(
                    channel, _MakeMessage(msg[:max_public_lines] + ['...']))
        else:
            self.interface.SendMessage(channel, _MakeMessage(msg))
def create_set_folders() -> None:
    """Create the folders necessary for the creation of the dataset.

    Create both folders data and label necessary for the three variations of sets that the
    persondet require

    Parameters
    ----------
    None

    Returns
    -------
    None

    """

    try:
        os.mkdir(os.path.join(FLAGS.output, FLAGS.set_name))
        os.mkdir(os.path.join(FLAGS.output, FLAGS.set_name, 'val_set_data'))
        os.mkdir(os.path.join(FLAGS.output, FLAGS.set_name, 'val_set_label'))
        os.mkdir(os.path.join(FLAGS.output, FLAGS.set_name, 'train_set_data'))
        os.mkdir(os.path.join(FLAGS.output, FLAGS.set_name, 'train_set_label'))
        os.mkdir(os.path.join(FLAGS.output, FLAGS.set_name, 'dev_set_label'))
        os.mkdir(os.path.join(FLAGS.output, FLAGS.set_name, 'dev_set_data'))
    except Exception:
        logging.log(logging.FATAL, 'Could not create folders')
예제 #8
0
def _make_exception(message, error_code):
    try:
        exc_type = errors.exception_type_from_error_code(error_code)
        logging.log(logging.ERROR, 'mlmd client %s: %s', exc_type.__name__,
                    message)
        return exc_type(None, None, message)
    except KeyError:
        return errors.UnknownError(None, None, message, error_code)
예제 #9
0
def create_vision_network(model_options, build_model=True):
    """Create multi-label classification model from model options."""

    # get input shape
    input_shape = None
    if build_model:
        input_shape = model_options["input_shape"]

    # train network from scratch or with imagenet weights
    inception_network = inceptionv3.create_inceptionv3_network(
        input_shape=input_shape, pretrained=model_options["pretrained"],
        include_top=False)

    # inception model with imagenet weights and our own top dense layers (NOTE: debug oracle only)
    if model_options["pretrained"]:
        if build_model:
            logging.log(
                logging.INFO,
                "Training model with imagenet weights and custom top layer")

        # train final inception module and the top dense layers
        inceptionv3.freeze_weights(
            inception_network, trainable="final_inception")

    # inception model with random weights and our own top dense layers
    elif build_model:
        logging.log(logging.INFO, "Training model from scratch")

    model_layers = [
        inception_network,
        tf.keras.layers.GlobalAveragePooling2D()
    ]

    if model_options["dropout_rate"] is not None:
        model_layers.append(
            tf.keras.layers.Dropout(model_options["dropout_rate"]))

    # add top layer hidden units
    if model_options["dense_units"] is not None:
        for dense_units in model_options["dense_units"]:

            model_layers.append(tf.keras.layers.Dense(dense_units))

            model_layers.append(tf.keras.layers.ReLU())

            if model_options["dropout_rate"] is not None:
                model_layers.append(
                    tf.keras.layers.Dropout(model_options["dropout_rate"]))

    # add final class logits layer
    model_layers.append(tf.keras.layers.Dense(model_options["n_classes"]))

    vision_network = tf.keras.Sequential(model_layers)

    if build_model:
        vision_network.summary()

    return vision_network
예제 #10
0
파일: xla.py 프로젝트: eshnil2000/jax
def _xla_callable(fun: lu.WrappedFun, device, backend, name, *arg_specs):
  if device is not None and backend is not None:
    raise ValueError("can't specify both a device and a backend for jit, "
                     "got device={} and backend={}".format(device, backend))

  abstract_args, arg_devices = unzip2(arg_specs)
  pvals: Sequence[pe.PartialVal] = [pe.PartialVal.unknown(aval) for aval in abstract_args]
  jaxpr, pvals, consts = pe.trace_to_jaxpr(
      fun, pvals, instantiate=False, stage_out=True, bottom=True)

  _map(prefetch, it.chain(consts, jaxpr_literals(jaxpr)))

  nreps = jaxpr_replicas(jaxpr)
  device = _xla_callable_device(nreps, backend, device, arg_devices)
  result_handlers = tuple(map(partial(_pval_to_result_handler, device), pvals))

  # Computations that only produce constants and/or only rearrange their inputs,
  # which are often produced from partial evaluation, don't need compilation,
  # and don't need to force their (potentially lazy) arguments.
  if not jaxpr.eqns:
    device = device or xb.get_backend(None).get_default_device_assignment(1)[0]
    return partial(_execute_trivial, jaxpr, device, consts, result_handlers)

  log_priority = logging.WARNING if FLAGS.jax_log_compiles else logging.DEBUG
  logging.log(log_priority, "Compiling %s for args %s.", fun.__name__, abstract_args)

  if nreps > xb.device_count(backend):
    raise ValueError(
        f"compiling computation that requires {nreps} replicas, but only "
        f"{xb.device_count(backend)} XLA devices are available")

  if xb.host_count() > 1 and (nreps > 1 or jaxpr_has_pmap(jaxpr)):
    raise NotImplementedError(
        "jit of multi-host pmap not implemented (and jit-of-pmap can cause "
        "extra data movement anyway, so maybe you don't want it after all).")

  tuple_args = len(abstract_args) > 100  # pass long arg lists as tuple for TPU

  c = xb.make_computation_builder("jit_{}".format(fun.__name__))
  xla_consts = _map(partial(xb.constant, c), consts)
  xla_args = _xla_callable_args(c, abstract_args, tuple_args)
  out_nodes = jaxpr_subcomp(
      c, jaxpr, backend, AxisEnv(nreps, (), ()), xla_consts,
      extend_name_stack(wrap_name(name, 'jit')), *xla_args)
  built = c.Build(xops.Tuple(c, out_nodes))

  options = xb.get_compile_options(
      num_replicas=nreps,
      num_partitions=1,
      device_assignment=(device.id,) if device else None)
  options.tuple_arguments = tuple_args
  backend = xb.get_backend(backend)
  compiled = backend.compile(built, compile_options=options)

  if nreps == 1:
    return partial(_execute_compiled, compiled, result_handlers)
  else:
    return partial(_execute_replicated, compiled, result_handlers)
예제 #11
0
파일: xla.py 프로젝트: Guillem96/jax
def _xla_callable(fun, device, backend, name, *arg_specs):
  if device is not None and backend is not None:
    raise ValueError("can't specify both a device and a backend for jit, "
                     "got device={} and backend={}".format(device, backend))

  abstract_args, arg_devices = unzip2(arg_specs)
  pvals = [pe.PartialVal((aval, core.unit)) for aval in abstract_args]
  with core.new_master(pe.StagingJaxprTrace, True) as master:
    jaxpr, (pvals, consts, env) = pe.trace_to_subjaxpr(fun, master, False).call_wrapped(pvals)
    assert not env  # no subtraces here
    del master, env
  _map(prefetch, it.chain(consts, jaxpr_literals(jaxpr)))

  nreps = jaxpr_replicas(jaxpr)
  device = _xla_callable_device(nreps, backend, device, arg_devices)
  result_handlers = tuple(map(partial(_pval_to_result_handler, device), pvals))

  # Computations that only produce constants and/or only rearrange their inputs,
  # which are often produced from partial evaluation, don't need compilation,
  # and don't need to force their (potentially lazy) arguments.
  if not jaxpr.eqns:
    device = device or xb.get_backend(None).get_default_device_assignment(1)[0]
    return partial(_execute_trivial, jaxpr, device, consts, result_handlers)

  log_priority = logging.WARNING if FLAGS.jax_log_compiles else logging.DEBUG
  logging.log(log_priority,
              "Compiling {} for args {}.".format(fun.__name__, abstract_args))

  if nreps > xb.device_count(backend):
    msg = ("compiling computation that requires {} replicas, but only {} XLA "
            "devices are available")
    raise ValueError(msg.format(nreps, xb.device_count(backend)))
  if xb.host_count() > 1 and (nreps > 1 or jaxpr_has_pmap(jaxpr)):
    raise NotImplementedError(
        "jit of multi-host pmap not implemented (and jit-of-pmap can cause "
        "extra data movement anyway, so maybe you don't want it after all).")

  tuple_args = len(abstract_args) > 100  # pass long arg lists as tuple for TPU

  c = xb.make_computation_builder("jit_{}".format(fun.__name__))
  xla_consts = _map(c.Constant, consts)
  xla_args = _xla_callable_args(c, abstract_args, tuple_args)
  out_nodes = jaxpr_subcomp(
      c, jaxpr, backend, AxisEnv(nreps, [], []), xla_consts, (),
      extend_name_stack(wrap_name(name, 'jit')), *xla_args)
  built = c.Build(c.Tuple(*out_nodes))

  options = xb.get_compile_options(
      num_replicas=nreps,
      num_partitions=1,
      device_assignment=(device.id,) if device else None)
  compiled = built.Compile(compile_options=options, backend=xb.get_backend(backend))

  if nreps == 1:
    return partial(_execute_compiled, compiled, backend, result_handlers, tuple_args)
  else:
    return partial(_execute_replicated, compiled, backend, result_handlers, tuple_args)
예제 #12
0
파일: dispatch.py 프로젝트: jbampton/jax
def log_elapsed_time(fmt: str):
  if _on_exit:
    yield
  else:
    log_priority = logging.WARNING if config.jax_log_compiles else logging.DEBUG
    start_time = time.time()
    yield
    elapsed_time = time.time() - start_time
    logging.log(log_priority, fmt.format(elapsed_time=elapsed_time))
예제 #13
0
def _make_exception(msg, error_code):
  try:
    exc_type = errors.exception_type_from_error_code(error_code)
    # log internal backend engine errors only.
    if error_code == errors.INTERNAL:
      logging.log(logging.WARNING, 'mlmd client %s: %s', exc_type.__name__, msg)
    return exc_type(None, None, msg)
  except KeyError:
    return errors.UnknownError(None, None, msg, error_code)
예제 #14
0
파일: run.py 프로젝트: rpeloff/moonshot
def test(model_options, output_dir, model_file, model_step_file):
    """Load and test spoken word classification model for one-shot learning."""

    # load Flickr Audio one-shot experiment
    one_shot_exp = flickr_speech.FlickrSpeech(
        features=model_options["features"],
        keywords_split="one_shot_evaluation",
        preprocess_func=get_data_preprocess_func(model_options),
        speaker_mode=FLAGS.speaker_mode)

    # load model
    speech_network, _ = model_utils.load_model(
        model_file=os.path.join(output_dir, model_file),
        model_step_file=os.path.join(output_dir, model_step_file),
        loss=get_training_objective(model_options))

    embedding_model_func = lambda speech_network: create_embedding_model(
        model_options, speech_network)

    # create few-shot model from speech network for one-shot testing
    if FLAGS.fine_tune_steps is not None:
        test_few_shot_model = create_fine_tune_model(model_options,
                                                     speech_network,
                                                     num_classes=FLAGS.L)
    else:
        test_few_shot_model = base.BaseModel(speech_network,
                                             None,
                                             mc_dropout=FLAGS.mc_dropout)

    classification = False
    if FLAGS.classification:
        assert FLAGS.embed_layer in ["logits", "softmax"]
        classification = True

    logging.log(logging.INFO, "Created few-shot model from speech network")
    test_few_shot_model.model.summary()

    # test model on L-way K-shot task
    task_accuracy, _, conf_interval_95 = experiment.test_l_way_k_shot(
        one_shot_exp,
        FLAGS.K,
        FLAGS.L,
        n=FLAGS.N,
        num_episodes=FLAGS.episodes,
        k_neighbours=FLAGS.k_neighbours,
        metric=FLAGS.metric,
        classification=classification,
        model=test_few_shot_model,
        embedding_model_func=embedding_model_func,
        fine_tune_steps=FLAGS.fine_tune_steps,
        fine_tune_lr=FLAGS.fine_tune_lr)

    logging.log(
        logging.INFO,
        f"{FLAGS.L}-way {FLAGS.K}-shot accuracy after {FLAGS.episodes} "
        f"episodes: {task_accuracy:.3%} +- {conf_interval_95*100:.4f}")
예제 #15
0
def filter_remove_images(keywords_set, image_uid_list):
    """Filter keyword-image pairs removing images in the specified list.
    """
    logging.log(logging.INFO,
                "Filtering keyword-image pairs (by image remove list)")

    valid_idx = np.where(np.invert(np.isin(keywords_set[0],
                                           image_uid_list)))[0]

    return tuple(x[valid_idx] for x in keywords_set)
예제 #16
0
def _test_do_logging():
    """Do some log operations."""
    logging.vlog(3, 'This line is VLOG level 3')
    logging.vlog(2, 'This line is VLOG level 2')
    logging.log(2, 'This line is log level 2')

    logging.vlog(1, 'This line is VLOG level 1')
    logging.log(1, 'This line is log level 1')
    logging.debug('This line is DEBUG')

    logging.vlog(0, 'This line is VLOG level 0')
    logging.log(0, 'This line is log level 0')
    logging.info('Interesting Stuff\0')
    logging.info('Interesting Stuff with Arguments: %d', 42)
    logging.info('%(a)s Stuff with %(b)s', {
        'a': 'Interesting',
        'b': 'Dictionary'
    })

    for i in xrange(1, 5):
        logging.log_first_n(logging.INFO, 'Info first %d of %d', 2, i, 2)
        logging.log_every_n(logging.INFO, 'Info %d (every %d)', 3, i, 3)

    logging.vlog(-1, 'This line is VLOG level -1')
    logging.log(-1, 'This line is log level -1')
    logging.warning('Worrying Stuff')
    for i in xrange(1, 5):
        logging.log_first_n(logging.WARNING, 'Warn first %d of %d', 2, i, 2)
        logging.log_every_n(logging.WARNING, 'Warn %d (every %d)', 3, i, 3)

    logging.vlog(-2, 'This line is VLOG level -2')
    logging.log(-2, 'This line is log level -2')
    try:
        raise OSError('Fake Error')
    except OSError:
        saved_exc_info = sys.exc_info()
        logging.exception('An Exception %s')
        logging.exception('Once more, %(reason)s', {'reason': 'just because'})
        logging.error('Exception 2 %s', exc_info=True)
        logging.error('Non-exception', exc_info=False)

    try:
        sys.exc_clear()
    except AttributeError:
        # No sys.exc_clear() in Python 3, but this will clear sys.exc_info() too.
        pass

    logging.error('Exception %s', '3', exc_info=saved_exc_info)
    logging.error('No traceback', exc_info=saved_exc_info[:2] + (None, ))

    logging.error('Alarming Stuff')
    for i in xrange(1, 5):
        logging.log_first_n(logging.ERROR, 'Error first %d of %d', 2, i, 2)
        logging.log_every_n(logging.ERROR, 'Error %d (every %d)', 3, i, 3)
    logging.flush()
예제 #17
0
def _xla_callable(fun, device, backend, *abstract_args):
    log_priority = logging.WARNING if FLAGS.jax_log_compiles else logging.DEBUG
    logging.log(
        log_priority,
        "Compiling {} for args {}.".format(fun.__name__, abstract_args))

    pvals = [pe.PartialVal((aval, core.unit)) for aval in abstract_args]
    with core.new_master(pe.JaxprTrace, True) as master:
        jaxpr, (pvals, consts,
                env) = pe.trace_to_subjaxpr(fun, master,
                                            False).call_wrapped(pvals)
        assert not env  # no subtraces here
        del master, env
    _map(prefetch, it.chain(consts, jaxpr_literals(jaxpr)))

    nreps = jaxpr_replicas(jaxpr)
    if nreps > xb.device_count(backend):
        msg = (
            "compiling computation that requires {} replicas, but only {} XLA "
            "devices are available")
        raise ValueError(msg.format(num_replicas, xb.device_count(backend)))
    axis_env = AxisEnv(nreps, [], [])

    if xb.host_count() > 1 and (nreps > 1 or jaxpr_has_pmap(jaxpr)):
        raise NotImplementedError(
            "jit of multi-host pmap not implemented (and jit-of-pmap can cause "
            "extra data movement anyway, so maybe you don't want it after all)."
        )

    tuple_args = len(
        abstract_args) > 100  # pass long arg lists as tuple for TPU

    c = xb.make_computation_builder("jit_{}".format(fun.__name__))
    xla_consts = _map(c.Constant, consts)
    xla_args = _xla_callable_args(c, abstract_args, tuple_args)
    out_nodes = jaxpr_subcomp(c, jaxpr, backend, axis_env, xla_consts, (),
                              *xla_args)
    built = c.Build(c.Tuple(*out_nodes))

    if device is not None and nreps > 1:
        raise ValueError("can't specify device assignment for jit-of-pmap")
    options = xb.get_compile_options(
        num_replicas=nreps,
        device_assignment=(device.id, ) if device else None)
    compiled = built.Compile(compile_options=options,
                             backend=xb.get_backend(backend))

    result_handlers = tuple(map(_pval_to_result_handler, pvals))
    if nreps == 1:
        return partial(_execute_compiled, compiled, backend, result_handlers,
                       tuple_args)
    else:
        return partial(_execute_replicated, compiled, backend, result_handlers,
                       tuple_args)
예제 #18
0
def main(argv):
    del argv  # unused

    logging.log(logging.INFO, "Logging application {}".format(__file__))

    # Compute md5 hash of Flickr 30k images
    flickr30k_path = os.path.join("data", "external", "flickr30k_images")
    flickr30k_files = os.listdir(flickr30k_path)

    logging.log(logging.INFO, "Computing Flickr 30k image hashes ...")
    flickr30k_hash = []
    for filename in flickr30k_files:
        with open(os.path.join(flickr30k_path, filename), "rb") as f:
            image_bytes = f.read()
        flickr30k_hash.append(hashlib.md5(image_bytes).hexdigest())

    flickr30k_hash = np.asarray(flickr30k_hash)

    # Compute md5 hash of MSCOCO images
    mscoco_path = os.path.join("data", "external", "mscoco", "train2017")
    mscoco_files = os.listdir(mscoco_path)

    logging.log(logging.INFO, "Computing MSCOCO image hashes ...")
    mscoco_hash = []
    for filename in mscoco_files:
        with open(os.path.join(mscoco_path, filename), "rb") as f:
            image_bytes = f.read()
        mscoco_hash.append(hashlib.md5(image_bytes).hexdigest())

    mscoco_hash = np.asarray(mscoco_hash)

    # Find Flickr 30k images with hashes in MSOCO hashes
    match_idx = np.where(np.isin(flickr30k_hash, mscoco_hash))[0]

    mscoco_remove = []
    for index in match_idx:
        mscoco_index = np.where(mscoco_hash == flickr30k_hash[index])[0][0]
        logging.log(
            logging.INFO,
            "Found Flickr30k image {} matching MSCOCO (train 2017) image {}".format(
                flickr30k_files[index],
                mscoco_files[mscoco_index]))
        mscoco_remove.append(mscoco_files[mscoco_index])

    # Write matches to file
    output_path = os.path.join("data", "splits", "mscoco", "remove_flickr30k.txt")
    logging.log(
        logging.INFO,
        "Writing list of Flickr 30k images in MSCOCO dataset: {}".format(output_path))

    file_io.write_csv(
        output_path,
        mscoco_remove)
예제 #19
0
    def __init__(self,
                 config: Union[metadata_store_pb2.ConnectionConfig,
                               metadata_store_pb2.MetadataStoreClientConfig],
                 enable_upgrade_migration: bool = False):
        """Initialize the MetadataStore.

    MetadataStore can directly connect to either the metadata database or
    the metadata store server.

    Args:
      config: metadata_store_pb2.ConnectionConfig or
        metadata_store_pb2.MetadataStoreClientConfig. Configuration to
        connect to the database or the metadata store server.
      enable_upgrade_migration: if set to True, the library upgrades the db
        schema and migrates all data if it connects to an old version backend.
        It is ignored when using GRPC client connection config.
    """
        self._max_num_retries = 5
        if isinstance(config, metadata_store_pb2.ConnectionConfig):
            self._using_db_connection = True
            migration_options = metadata_store_pb2.MigrationOptions()
            migration_options.enable_upgrade_migration = enable_upgrade_migration
            self._metadata_store = metadata_store_serialized.CreateMetadataStore(
                config.SerializeToString(),
                migration_options.SerializeToString())
            logging.log(logging.INFO,
                        'MetadataStore with DB connection initialized')
            logging.log(logging.DEBUG, 'ConnectionConfig: %s', config)
            if config.HasField('retry_options'):
                self._max_num_retries = config.retry_options.max_num_retries
                logging.log(
                    logging.INFO,
                    'retry options is overwritten: max_num_retries = %d',
                    self._max_num_retries)
            return
        if not isinstance(config,
                          metadata_store_pb2.MetadataStoreClientConfig):
            raise ValueError('MetadataStore is expecting either '
                             'metadata_store_pb2.ConnectionConfig or '
                             'metadata_store_pb2.MetadataStoreClientConfig')
        self._grpc_timeout_sec = None
        self._using_db_connection = False
        if enable_upgrade_migration:
            raise ValueError(
                'Upgrade migration is not allowed when using gRPC '
                'connection client. Upgrade needs to be performed on '
                'the server side.')
        channel = self._get_channel(config)
        self._metadata_store_stub = (
            metadata_store_service_pb2_grpc.MetadataStoreServiceStub(channel))
        logging.log(logging.INFO,
                    'MetadataStore with gRPC connection initialized')
        logging.log(logging.DEBUG, 'ConnectionConfig: %s', config)
예제 #20
0
def filter_keep_keywords(keywords_set, keyword_list, use_lemma=True):
    """Filter keywords keeping those that occur in the keyword list.

    `use_lemma`: indicates whether to compare keywords or baseform lemma.

    Return filtered keywords, see `process_caption_keywords` for format.
    """
    logging.log(logging.INFO, "Filtering keywords (by keep list) ...")

    keyword_data = keywords_set[3] if use_lemma else keywords_set[2]
    valid_idx = np.where(np.isin(keyword_data, keyword_list))[0]

    return tuple(x[valid_idx] for x in keywords_set)
예제 #21
0
파일: base.py 프로젝트: rpeloff/moonshot
def majority_vote(labels):
    """Get majority label among `labels` (random between a tied majority)."""
    counts = np.bincount(labels)
    max_idx = np.where(counts == np.max(counts))[0]
    if len(max_idx) > 1:  # choose random from tied majority labels
        if "debug" in FLAGS and FLAGS.debug:
            logging.log(
                logging.DEBUG, "Choosing randomly from tied labels: {}".format(
                    np.asarray(labels[max_idx])))
        majority_label = max_idx[np.random.choice(len(max_idx),
                                                  size=1,
                                                  replace=False)[0]]
    else:
        majority_label = max_idx[0]
    return majority_label
예제 #22
0
def filter_remove_keyword_images(keywords_set, keyword_list, use_lemma=True):
    """Filter keyword-image pairs removing all images with keywords in the specified list.

    NOTE: this should be used instead of `filter_remove_images` to remove all
    image instances associated with remove keyword, including pairs with
    keywords not in the remove keyword list.
    """
    logging.log(logging.INFO,
                "Filtering keyword-image pairs (by image remove list)")

    keyword_data = keywords_set[3] if use_lemma else keywords_set[2]
    image_idx = np.where(np.isin(keyword_data, keyword_list))[0]

    remove_uids = np.unique(keywords_set[0][image_idx])

    return filter_remove_images(keywords_set, remove_uids)
예제 #23
0
파일: a.py 프로젝트: weisystak/tutorial
def main(argv=()):
    del argv
    logging.info('running main.')

    path = FLAGS.input
    file = open(path).readlines()
    for i in range(FLAGS.repeat_times):
        if FLAGS.verbose:
            print(f'{i:3d}: ', end='')
        for line in file:
            print(line, end='')
        print()

    logging.set_verbosity(logging.DEBUG)
    logging.log(logging.DEBUG, 'Debug line!!')
    logging.error('Alarming Stuff')
예제 #24
0
def load_flickr30k_captions(captions_dir, splits_dir="data/splits/flickr30k",
                            flickr8k_splits=None):
    """Load Flickr 30k text caption corpus."""
    train, val, test = None, None, None

    split_dict = load_flickr30k_splits(splits_dir, flickr8k_splits)

    captions_path = os.path.join(
        captions_dir, "results_20130124.token")
    assert os.path.exists(captions_path)

    logging.log(logging.INFO, "Loading Flickr 30k text caption corpus: {}".format(
        captions_path))

    image_uids, captions, caption_numbers = [], [], []
    with open(captions_path, "rb") as f:
        for line in f:
            caption_image, caption = line.decode("utf8").split("\t")
            image_uid, caption_number = caption_image.split("#")
            image_uid = image_uid.split(".jpg")[0]
            image_uids.append(image_uid)
            captions.append(str(caption).strip().lower())
            caption_numbers.append(caption_number)

    # remove unrelated captions
    flickr30k_unrelated = _load_flickr30k_unrelated_captions(splits_dir)

    def filter_remove_unrelated(index):
        unrelated_idx = np.where(flickr30k_unrelated[0] == image_uids[index])[0]
        return caption_numbers[index] not in flickr30k_unrelated[1][unrelated_idx]

    filter_idx = list(filter(filter_remove_unrelated, range(len(image_uids))))

    image_uids = np.asarray(image_uids)[filter_idx]
    captions = np.asarray(captions)[filter_idx]
    caption_numbers = np.asarray(caption_numbers)[filter_idx]

    # split into train-dev-test
    train_idx = np.isin(image_uids, split_dict["train"])
    val_idx = np.isin(image_uids, split_dict["dev"])
    test_idx = np.isin(image_uids, split_dict["test"])

    train = (image_uids[train_idx], captions[train_idx], caption_numbers[train_idx])
    val = (image_uids[val_idx], captions[val_idx], caption_numbers[val_idx])
    test = (image_uids[test_idx], captions[test_idx], caption_numbers[test_idx])

    return train, val, test
예제 #25
0
    def BuildJsonSR(
            self, report_text: Text,
            metadata_json: Dict[Text, Any]) -> dicom_json.ObjectWithBulkData:
        """Builds and returns a Basic Text DICOM JSON Structured Report instance.

    This function will create a new DICOM series.

    Args:
      report_text: Text string to use for the Basic Text DICOM SR.
      metadata_json: Dict of tags (including study-level information) to add.

    Returns:
      DICOM JSON Object containing the Structured Report.
    """
        # Dicom StowJsonRs expects a list with DICOM JSON as elements.
        # Add study level tags to the SR.
        dataset = metadata_json.copy()
        series_uid = self.GenerateUID()
        instance_uid = self.GenerateUID()
        dicom_json.Insert(dataset, tags.SOP_CLASS_UID,
                          tag_values.BASIC_TEXT_SR_CUID)
        dicom_json.Insert(dataset, tags.MODALITY, tag_values.SR_MODALITY)
        dicom_json.Insert(dataset, tags.SERIES_INSTANCE_UID, series_uid)
        dicom_json.Insert(dataset, tags.SPECIFIC_CHARACTER_SET,
                          _ISO_CHARACTER_SET)

        logging.log(
            logging.INFO,
            'Creating DICOM JSON SR with Series UID: %s and Instance UID: %s',
            series_uid, instance_uid)
        dicom_json.Insert(dataset, tags.SOP_INSTANCE_UID, instance_uid)

        content_dataset = {}
        dicom_json.Insert(content_dataset, tags.RELATIONSHIP_TYPE, 'CONTAINS')
        dicom_json.Insert(content_dataset, tags.VALUE_TYPE, 'TEXT')
        dicom_json.Insert(content_dataset, tags.TEXT_VALUE, report_text)
        dicom_json.Insert(dataset, tags.CONTENT_SEQUENCE, content_dataset)

        dicom_json.Insert(dataset, tags.TRANSFER_SYNTAX_UID,
                          _IMPLICIT_VR_LITTLE_ENDIAN)
        dicom_json.Insert(dataset, tags.MEDIA_STORAGE_SOP_CLASS_UID,
                          tag_values.BASIC_TEXT_SR_CUID)
        dicom_json.Insert(dataset, tags.MEDIA_STORAGE_SOP_INSTANCE_UID,
                          instance_uid)

        return dicom_json.ObjectWithBulkData(dataset)
예제 #26
0
    def summarize(self, items, episode=True, epoch=False):
        if episode and epoch:
            raise OverflowError(
                "You cannot issue summarization for both episode and epoch at the same time!"
            )

        summary_category = None
        if episode:
            summary_category = "episode"
        if epoch:
            summary_category = "epoch"

        logging.log(
            logging.DEBUG, " | ".join([
                "%s: %s" % (k, v)
                for k, v in self.summary_data[summary_category].items()
            ]))
예제 #27
0
def build_granted(config):
    feature_map = collections.defaultdict(list)
    cnx = pvdb.incremental_granted_table(config)
    if cnx is None:
        return feature_map
    cursor = cnx.cursor()
    query = "SELECT uuid, patent_id, name_first, name_last FROM rawinventor;"
    cursor.execute(query)
    idx = 0
    for uuid, patent_id, name_first, name_last in cursor:
        im = InventorMention(uuid, patent_id, '', name_first if name_first else '', name_last if name_last else '', '',
                             '', '')
        feature_map[im.record_id].append(last_name(im))
        idx += 1
        logging.log_every_n(logging.INFO, 'Processed %s granted records - %s features', 10000, idx, len(feature_map))
    logging.log(logging.INFO, 'Processed %s granted records - %s features', idx, len(feature_map))
    return feature_map
예제 #28
0
 def log_kafka_sample(self, topic: str, sample: dict):
     """This is for testing/debugging only, use async version for production"""
     if self.cluster_config.MONITORING.KAFKA_REST_ADDRESS == "":
         return
     url = "http://{}/topics/{}".format(
         self.cluster_config.MONITORING.KAFKA_REST_ADDRESS, topic)
     try:
         record_data = json.dumps({"records": [{"value": sample}]})
         headers = {
             "Content-Type": "application/vnd.kafka.json.v2+json",
             "Accept": "application/vnd.kafka.v2+json",
         }
         response = requests.post(url, data=record_data, headers=headers)
         if response.status_code != 200:
             raise Exception("non-OK response status code: {}".format(
                 response.status_code))
     except Exception as ex:
         GLOG.log(GLOG.ERROR, "Failed to log sample to Kafka: %s", ex)
예제 #29
0
def load_isolated_word_npz(feats_dir):
    """Load train-dev-test .npz archives of isolated spoken words.

    `feats_dir` should contain 'train_words.npz', 'dev_words.npz' and
    'test_words.npz' (either mfcc or filterbank speech features).

    NOTE:
    Returns a dict of NpzFile objects for train, dev and test, each of which
    contains a file handle and should be closed during cleanup.
    """
    logging.log(
        logging.INFO,
        "Loading Flickr-Audio isolated word archives: {}".format(feats_dir))
    set_dict = {}
    for subset in ["train", "dev", "test"]:
        set_path = os.path.join(feats_dir, "{}_words.npz".format(subset))
        set_dict[subset] = np.load(set_path)
    return set_dict
예제 #30
0
    def __init__(self,
                 config: Union[metadata_store_pb2.ConnectionConfig,
                               metadata_store_pb2.MetadataStoreClientConfig],
                 enable_upgrade_migration: bool = False):
        """Initialize the MetadataStore.

    MetadataStore can directly connect to either the metadata database or
    the metadata store server.

    Args:
      config: metadata_store_pb2.ConnectionConfig or
        metadata_store_pb2.MetadataStoreClientConfig. Configuration to
        connect to the database or the metadata store server.
      enable_upgrade_migration: if set to True, the library upgrades the db
        schema and migrates all data if it connects to an old version backend.
        It is ignored when using GRPC client connection config.
    """
        if isinstance(config, metadata_store_pb2.ConnectionConfig):
            self._using_db_connection = True
            migration_options = metadata_store_pb2.MigrationOptions()
            migration_options.enable_upgrade_migration = enable_upgrade_migration
            self._metadata_store = metadata_store_serialized.CreateMetadataStore(
                config.SerializeToString(),
                migration_options.SerializeToString())
            logging.log(logging.INFO,
                        'MetadataStore with DB connection initialized')
            return
        if not isinstance(config,
                          metadata_store_pb2.MetadataStoreClientConfig):
            raise ValueError('MetadataStore is expecting either'
                             'metadata_store_pb2.ConnectionConfig or'
                             'metadata_store_pb2.MetadataStoreClientConfig')
        self._using_db_connection = False
        if enable_upgrade_migration:
            raise ValueError(
                'Upgrade migration is not allowed when using gRPC '
                'connection client. Upgrade needs to be performed on '
                'the server side.')
        target = ':'.join([config.host, str(config.port)])
        channel = self._get_channel(config, target)
        self._metadata_store_stub = (
            metadata_store_service_pb2_grpc.MetadataStoreServiceStub(channel))
        logging.log(logging.INFO,
                    'MetadataStore with gRPC connection initialized')