Ejemplo n.º 1
0
def dump(dataset, options):
    """
  :type dataset: Dataset.Dataset
  :param options: argparse.Namespace
  """
    print("Epoch: %i" % options.epoch, file=log.v3)
    rnn.train_data.init_seq_order(options.epoch)

    output_dict = {}
    for name, layer in rnn.engine.network.layers.items():
        output_dict["%s:out" % name] = layer.output.placeholder
        for i, v in layer.output.size_placeholder.items():
            output_dict["%s:shape(%i)" %
                        (name, layer.output.get_batch_axis(i))] = v

    seq_idx = options.startseq
    if options.endseq < 0:
        options.endseq = float("inf")
    while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
        print("Seq idx: %i" % (seq_idx, ), file=log.v3)
        out = rnn.engine.eval_single(dataset=dataset,
                                     seq_idx=seq_idx,
                                     output_dict=output_dict)
        for name, v in sorted(out.items()):
            print("  %s: %s" % (name, pretty_print(v)))
        seq_idx += 1

    print("Done. More seqs which we did not dumped: %s" %
          dataset.is_less_than_num_seqs(seq_idx),
          file=log.v1)
Ejemplo n.º 2
0
def dump(dataset, options):
  """
  :type dataset: Dataset.Dataset
  :param options: argparse.Namespace
  """
  print("Epoch: %i" % options.epoch, file=log.v3)
  dataset.init_seq_order(options.epoch)

  output_dict = {}
  for name, layer in rnn.engine.network.layers.items():
    output_dict["%s:out" % name] = layer.output.placeholder
    for i, v in layer.output.size_placeholder.items():
      output_dict["%s:shape(%i)" % (name, layer.output.get_batch_axis(i))] = v

  seq_idx = options.startseq
  if options.endseq < 0:
    options.endseq = float("inf")
  while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
    print("Seq idx: %i" % (seq_idx,), file=log.v3)
    out = rnn.engine.run_single(dataset=dataset, seq_idx=seq_idx, output_dict=output_dict)
    for name, v in sorted(out.items()):
      print("  %s: %s" % (name, pretty_print(v)))
    seq_idx += 1

  print("Done. More seqs which we did not dumped: %s" % dataset.is_less_than_num_seqs(seq_idx), file=log.v1)
Ejemplo n.º 3
0
def dump_dataset(dataset, options):
    """
  :type dataset: Dataset.Dataset
  :param options: argparse.Namespace
  """
    print("Epoch: %i" % options.epoch, file=log.v3)
    rnn.train_data.init_seq_order(options.epoch)

    if options.type == "numpy":
        print("Dump files: %r*%r" %
              (options.dump_prefix, options.dump_postfix),
              file=log.v3)
    elif options.type == "stdout":
        print("Dump to stdout", file=log.v3)
    else:
        raise Exception("unknown dump option type %r" % options.type)

    seq_idx = options.startseq
    if options.endseq < 0:
        options.endseq = float("inf")
    while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
        dataset.load_seqs(seq_idx, seq_idx + 1)
        data = dataset.get_data(seq_idx, "data")
        if options.type == "numpy":
            numpy.savetxt(
                "%s%i.data%s" %
                (options.dump_prefix, seq_idx, options.dump_postfix), data)
        elif options.type == "stdout":
            print("seq %i data:" % seq_idx, pretty_print(data))
        for target in dataset.get_target_list():
            targets = dataset.get_targets(target, seq_idx)
            if options.type == "numpy":
                numpy.savetxt("%s%i.targets.%s%s" %
                              (options.dump_prefix, seq_idx, target,
                               options.dump_postfix),
                              targets,
                              fmt='%i')
            elif options.type == "stdout":
                print("seq %i target %r:" % (seq_idx, target),
                      pretty_print(targets))

        seq_idx += 1

    print("Done. More seqs which we did not dumped: %s" %
          dataset.is_less_than_num_seqs(seq_idx),
          file=log.v1)
Ejemplo n.º 4
0
def dump_dataset(dataset, options):
    """
  :type dataset: Dataset.Dataset
  :param options: argparse.Namespace
  """
    print("Epoch: %i" % options.epoch, file=log.v3)
    dataset.init_seq_order(epoch=options.epoch)

    if options.get_num_seqs:
        print("Get num seqs.")
        print("estimated_num_seqs: %r" % dataset.estimated_num_seqs)
        try:
            print("num_seqs: %r" % dataset.num_seqs)
        except Exception as exc:
            print("num_seqs exception %r, which is valid, so we count." % exc)
            seq_idx = 0
            if dataset.get_target_list():
                default_target = dataset.get_target_list()[0]
            else:
                default_target = None
            while dataset.is_less_than_num_seqs(seq_idx):
                dataset.load_seqs(seq_idx, seq_idx + 1)
                if seq_idx % 10000 == 0:
                    if default_target:
                        targets = dataset.get_targets(default_target, seq_idx)
                        postfix = " (targets = %r...)" % (targets[:10], )
                    else:
                        postfix = ""
                    print("%i ...%s" % (seq_idx, postfix))
                seq_idx += 1
            print("accumulated num seqs: %i" % seq_idx)
        print("Done.")
        return

    if options.type == "numpy":
        print("Dump files: %r*%r" %
              (options.dump_prefix, options.dump_postfix),
              file=log.v3)
    elif options.type == "stdout":
        print("Dump to stdout", file=log.v3)
    else:
        raise Exception("unknown dump option type %r" % options.type)

    seq_idx = options.startseq
    if options.endseq < 0:
        options.endseq = float("inf")
    while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
        dataset.load_seqs(seq_idx, seq_idx + 1)
        data = dataset.get_data(seq_idx, "data")
        if options.type == "numpy":
            numpy.savetxt(
                "%s%i.data%s" %
                (options.dump_prefix, seq_idx, options.dump_postfix), data)
        elif options.type == "stdout":
            print("seq %i data:" % seq_idx, pretty_print(data))
        for target in dataset.get_target_list():
            targets = dataset.get_targets(target, seq_idx)
            if options.type == "numpy":
                numpy.savetxt("%s%i.targets.%s%s" %
                              (options.dump_prefix, seq_idx, target,
                               options.dump_postfix),
                              targets,
                              fmt='%i')
            elif options.type == "stdout":
                print("seq %i target %r:" % (seq_idx, target),
                      pretty_print(targets))

        seq_idx += 1

    print("Done. More seqs which we did not dumped: %s" %
          dataset.is_less_than_num_seqs(seq_idx),
          file=log.v1)
Ejemplo n.º 5
0
def dump_dataset(dataset, options):
    """
  :type dataset: Dataset.Dataset
  :param options: argparse.Namespace
  """
    print("Epoch: %i" % options.epoch, file=log.v3)
    dataset.init_seq_order(epoch=options.epoch)
    print("Dataset keys:", dataset.get_data_keys(), file=log.v3)
    print("Dataset target keys:", dataset.get_target_list(), file=log.v3)
    assert options.key in dataset.get_data_keys()

    if options.get_num_seqs:
        print("Get num seqs.")
        print("estimated_num_seqs: %r" % dataset.estimated_num_seqs)
        try:
            print("num_seqs: %r" % dataset.num_seqs)
        except Exception as exc:
            print("num_seqs exception %r, which is valid, so we count." % exc)
            seq_idx = 0
            if dataset.get_target_list():
                default_target = dataset.get_target_list()[0]
            else:
                default_target = None
            while dataset.is_less_than_num_seqs(seq_idx):
                dataset.load_seqs(seq_idx, seq_idx + 1)
                if seq_idx % 10000 == 0:
                    if default_target:
                        targets = dataset.get_targets(default_target, seq_idx)
                        postfix = " (targets = %r...)" % (targets[:10], )
                    else:
                        postfix = ""
                    print("%i ...%s" % (seq_idx, postfix))
                seq_idx += 1
            print("accumulated num seqs: %i" % seq_idx)
        print("Done.")
        return

    if options.type == "numpy":
        print("Dump files: %r*%r" %
              (options.dump_prefix, options.dump_postfix),
              file=log.v3)
    elif options.type == "stdout":
        print("Dump to stdout", file=log.v3)
    elif options.type == "print_shape":
        print("Dump shape to stdout", file=log.v3)
    elif options.type == "plot":
        print("Plot.", file=log.v3)
    elif options.type == "null":
        print("No dump.")
    else:
        raise Exception("unknown dump option type %r" % options.type)

    start_time = time.time()
    stats = Stats() if (options.stats or options.dump_stats) else None
    seq_len_stats = {key: Stats() for key in dataset.get_data_keys()}
    seq_idx = options.startseq
    if options.endseq < 0:
        options.endseq = float("inf")
    while dataset.is_less_than_num_seqs(seq_idx) and seq_idx <= options.endseq:
        dataset.load_seqs(seq_idx, seq_idx + 1)
        complete_frac = dataset.get_complete_frac(seq_idx)
        start_elapsed = time.time() - start_time
        try:
            num_seqs_s = str(dataset.num_seqs)
        except NotImplementedError:
            try:
                num_seqs_s = "~%i" % dataset.estimated_num_seqs
            except TypeError:  # a number is required, not NoneType
                num_seqs_s = "?"
        progress_prefix = "%i/%s" % (seq_idx, num_seqs_s)
        progress = "%s (%.02f%%)" % (progress_prefix, complete_frac * 100)
        if complete_frac > 0:
            total_time_estimated = start_elapsed / complete_frac
            remaining_estimated = total_time_estimated - start_elapsed
            progress += " (%s)" % hms(remaining_estimated)
        data = dataset.get_data(seq_idx, options.key)
        if options.type == "numpy":
            numpy.savetxt(
                "%s%i.data%s" %
                (options.dump_prefix, seq_idx, options.dump_postfix), data)
        elif options.type == "stdout":
            print("seq %s data:" % progress, pretty_print(data))
        elif options.type == "print_shape":
            print("seq %s data shape:" % progress, data.shape)
        elif options.type == "plot":
            plot(data)
        for target in dataset.get_target_list():
            targets = dataset.get_targets(target, seq_idx)
            if options.type == "numpy":
                numpy.savetxt("%s%i.targets.%s%s" %
                              (options.dump_prefix, seq_idx, target,
                               options.dump_postfix),
                              targets,
                              fmt='%i')
            elif options.type == "stdout":
                print("seq %i target %r:" % (seq_idx, target),
                      pretty_print(targets))
            elif options.type == "print_shape":
                print("seq %i target %r shape:" % (seq_idx, target),
                      targets.shape)
        seq_len = dataset.get_seq_length(seq_idx)
        for key in dataset.get_data_keys():
            seq_len_stats[key].collect([seq_len[key]])
        if stats:
            stats.collect(data)
        if options.type == "null":
            Util.progress_bar_with_time(complete_frac, prefix=progress_prefix)

        seq_idx += 1

    print("Done. Total time %s. More seqs which we did not dumped: %s" %
          (hms(time.time() - start_time),
           dataset.is_less_than_num_seqs(seq_idx)),
          file=log.v1)
    for key in dataset.get_data_keys():
        seq_len_stats[key].dump(stream_prefix="Seq-length %r " % key,
                                stream=log.v2)
    if stats:
        stats.dump(output_file_prefix=options.dump_stats,
                   stream_prefix="Data %r " % options.key,
                   stream=log.v2)