コード例 #1
0
def main(argv):
    ap = ArgumentParser(prog="measure-sequence-changes")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("data_dir")
    ap.add_argument("kind", choices=["train", "validation", "test"])
    ap.add_argument("sequential_dir")
    ap.add_argument("keys", nargs="+")
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir)
    minimum, maximum, sequence_changes = measure(lstm, aargs.data_dir,
                                                 aargs.kind, aargs.keys)

    for key in aargs.keys:
        distance, index, sequence = minimum[key]
        sequence_str, changes_str = stringify(sequence,
                                              sequence_changes[sequence][key])
        user_log.info("Global minimum for %s of %.4f @%d:\n  %s\n  %s" %
                      (key, distance, index, sequence_str, changes_str))
        distance, index, sequence = maximum[key]
        sequence_str, changes_str = stringify(sequence,
                                              sequence_changes[sequence][key])
        user_log.info("Global maximum for %s of %.4f @%d:\n  %s\n  %s" %
                      (key, distance, index, sequence_str, changes_str))

    return 0
コード例 #2
0
def main(argv):
    ap = ArgumentParser(prog="generate-hidden-states")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("--report", default=False, action="store_true")
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    ap.add_argument("kind", choices=["train", "validation", "test"])
    ap.add_argument("dimensions", nargs="+", type=int)
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir)
    averages = categorize_rates(lstm,
                                data.stream_data(aargs.data_dir, aargs.kind),
                                aargs.dimensions, aargs.report)
    rows = [("", "0", "1")]

    for stat, dimension_points in averages.items():
        for dimension, points in dimension_points.items():
            rows += [("%s-%s" % (stat, dimension), *points)]

    with open("counter-statistics.csv", "w") as fh:
        writer = csv_writer(fh)

        for row in rows:
            writer.writerow(row)

    return 0
コード例 #3
0
def main():
    ap = ArgumentParser(prog="pattern-query")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("--query-dir", default=None)
    ap.add_argument("--db-kind", choices=["postgres", "sqlite"])
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    ap.add_argument("predicate", nargs="+")
    aargs = ap.parse_args(sys.argv[1:])
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir)
    query_engine = domain.QueryEngine(lstm, "moot", "postgres")

    predicates = Predicates(predicate_strs=aargs.predicate)
    logging.debug("invoking query: %s" % (predicates.as_strs()))
    result = query_engine.find(0.1, predicates)
    dump = json.dumps(result.as_json())
    logging.debug("result: %s" % (dump))
    print(dump)
    return 0
コード例 #4
0
def main(argv):
    ap = ArgumentParser(prog="generate-data")
    ap.add_argument("-v", "--verbose", default=False, action="store_true", help="Turn on verbose logging.")
    #ap.add_argument("-d", "--dry-run", default=False, action="store_true")
    ap.add_argument("task", help="Either 'sa' or 'lm'.")
    ap.add_argument("form", help="How the language data should be interpreted:\n" \
                                 "raw: the text is raw (must still be run through a tokenizer)." \
                                 "tokenized: the text has been tokenized (space separate tokens, new lines separate sentences)." \
                                 "ptb: the text is tokenized and pos tagged in Penn Treebank form.")
    ap.add_argument("corpus_paths", nargs="+")
    ap.add_argument("data_dir")
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0], aargs.verbose, False, True, True)
    logging.debug(aargs)

    if aargs.task == "sa":
        assert aargs.form == "tokenized"
        train_xys, validation_xys, test_xys = sa.create(aargs.data_dir, lambda: stream_input_stanford(aargs.corpus_paths[0]))
    elif aargs.task == "lm":
        train_xys, validation_xys, test_xys = lm.create(aargs.data_dir, lambda: stream_input_text(aargs.corpus_paths, aargs.form))
    else:
        raise ValueError("Unknown task: %s" % aargs.task)

    logging.debug("data sets (train, validation, test): %d, %d, %d" % (len(train_xys), len(validation_xys), len(test_xys)))
    return 0
コード例 #5
0
def main(argv):
    ap = ArgumentParser(prog="markdowner")
    ap.add_argument("--verbose", "-v",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.  " + \
                    "**This will SIGNIFICANTLY slow down the program.**")
    ap.add_argument("markdown_file")
    ap.add_argument("output_html")
    args = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  args.verbose, True)
    logging.debug(args)

    with open(args.markdown_file, "r") as fh:
        html = markdown.markdown(fh.read(),
                                 extensions=EXTENSIONS,
                                 output_format="html5")

    with open(args.output_html, "w") as fh:
        fh.write(html_prefix)
        fh.write(html)
        fh.write(html_suffix)

    return 0
コード例 #6
0
def main(argv):
    ap = ArgumentParser(prog="server")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("-p", "--port", default=8888, type=int)
    ap.add_argument("--query-dir", default=None)
    ap.add_argument("--db-kind", choices=["postgres", "sqlite"])
    ap.add_argument("--use-fixed-buckets", default=False, action="store_true")
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    ap.add_argument("buckets_dir")
    ap.add_argument("encoding_dir")
    aargs = ap.parse_args(argv)
    #patch_thread_for_profiling()
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    words = data.get_words(aargs.data_dir)
    neural_network = domain.NeuralNetwork(aargs.data_dir, aargs.sequential_dir,
                                          aargs.buckets_dir,
                                          aargs.encoding_dir,
                                          aargs.use_fixed_buckets)

    # Quick test for seeing which mechanism is fastest for hitting the lstm.
    #logging.info("start")
    #for i in range(3):
    #    for test_sequence in data.stream_test(aargs.data_dir):
    #        for j in range(len(test_sequence.x)):
    #            neural_network.query_lstm([item[0] for item in test_sequence.x[:j + 1]], rnn.LSTM_INSTRUMENTS, False)
    #logging.info("stop")
    #sys.exit(1)

    query_engine = None
    pattern_engine = None

    if aargs.query_dir is not None:
        query_engine = domain.QueryEngine(neural_network.lstm, aargs.query_dir,
                                          aargs.db_kind)
        pattern_engine = domain.PatternEngine(neural_network.lstm)

    run_server(aargs.port, words, neural_network, query_engine, pattern_engine)

    try:
        neural_network._background_setup.join()
    except KeyboardInterrupt as e:
        if patched:
            neural_network._background_setup.complete_profile()

        raise e

    return 0
コード例 #7
0
def main(argv):
    ap = ArgumentParser(prog="analyze-data")
    ap.add_argument("-v", "--verbose", default=False, action="store_true", help="Turn on verbose logging.")
    ap.add_argument("data_dir")
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0], aargs.verbose, False, True, True)
    logging.debug(aargs)

    analyze(data.stream_train(aargs.data_dir), "train")
    analyze(data.stream_test(aargs.data_dir), "test")
    return 0
コード例 #8
0
def main(argv):
    ap = ArgumentParser(prog="server")
    ap.add_argument("--verbose", "-v",
                        default=False,
                        action="store_true",
                        # Unfortunately, logging in python 2.7 doesn't have
                        # a built-in way to log asynchronously.
                        help="Turn on verbose logging.  " + \
                        "**This will SIGNIFICANTLY slow down the program.**")
    ap.add_argument("-p", "--port", default=8888, type=int)
    args = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0], args.verbose, True)
    logging.debug(args)
    fe_converter = FeConverter()
    run(args.port, fe_converter)
コード例 #9
0
def main(argv):
    ap = ArgumentParser(prog="query-data")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument(
        "--limit",
        type=int,
        default=10,
        help=
        "Truncate the results at maximum LIMIT.  Negative indicates to find all (unlimited)."
    )
    ap.add_argument("--match",
                    choices=["include", "sequence", "relative"],
                    default="include")
    ap.add_argument("data_dir")
    ap.add_argument("kind", choices=["train", "test"])
    ap.add_argument("words", nargs="*", default=None)
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    if aargs.match == "relative":
        # Quickest way to implement relative is just to make it correct for N = 2.
        assert len(aargs.words) == 2

    truncated = False
    count = 0

    for xy in data.stream_data(aargs.data_dir, aargs.kind):
        # TODO: work for non-lm cases.
        sequence = [item[0] for item in xy.x] + [xy.y[-1][0]]

        if matches(sequence, aargs.words, aargs.match):
            count += 1
            logging.debug("Instance: %s" % " ".join(sequence))

        if aargs.limit > 0 and count >= aargs.limit:
            logging.debug("Truncating..")
            truncated = True
            break

    user_log.info("Found %d%s instances." %
                  (count, " (truncated)" if truncated else ""))
    return 0
コード例 #10
0
def main(argv):
    ap = ArgumentParser(prog="generate-reduction-buckets")
    ap.add_argument("-v", "--verbose", default=False, action="store_true", help="Turn on verbose logging.")
    ap.add_argument("--grouping", nargs="*", default=None)
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    ap.add_argument("states_dir")
    ap.add_argument("buckets_dir")
    ap.add_argument("target", type=int)
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0], aargs.verbose, False, True, True)
    logging.debug(aargs)

    lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir, True)
    part_learned_mse = {}
    part_fixed_mse = {}

    if aargs.grouping is None:
        for key in lstm.keys():
            learned_mse, fixed_mse = generate_buckets(aargs.states_dir, key, lstm.part_width(key), aargs.buckets_dir, aargs.target)
            part_learned_mse[key] = learned_mse
            part_fixed_mse[key] = fixed_mse
    else:
        learned_mse, fixed_mse = generate_buckets_grouping(lstm, aargs.states_dir, aargs.grouping, aargs.buckets_dir, aargs.target)
        part_learned_mse = learned_mse
        part_fixed_mse = fixed_mse

    with open(os.path.join(aargs.buckets_dir, "analysis.csv"), "w") as fh:
        writer = csv_writer(fh)
        writer.writerow(["technique", "key", "mse"])
        total_learned = 0.0
        total_fixed = 0.0
        count_learned = 0
        count_fixed = 0

        for key, error in sorted(part_learned_mse.items()):
            total_learned += error
            count_learned += 1
            writer.writerow(["learned", key, "%f" % error])

        for key, error in sorted(part_fixed_mse.items()):
            total_fixed += error
            count_fixed += 1
            writer.writerow(["fixed", key, "%f" % error])

        user_log.info("Total scores (learned, fixed): %s, %s" % (total_learned / count_learned, total_fixed / count_fixed))

    return 0
コード例 #11
0
def main(argv):
    ap = ArgumentParser(prog="analyze-hidden-states")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    #ap.add_argument("-d", "--dry-run", default=False, action="store_true")
    ap.add_argument("--train-data", default=False, action="store_true")
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    ap.add_argument("states_dir")
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir, True)
    stats = {}

    for key in lstm.keys():
        train_points, test_points = states.get_hidden_states(
            aargs.states_dir, key)

        if aargs.train_data:
            stats[key] = calculate_stats(train_points)
        else:
            stats[key] = calculate_stats(test_points)

    writer = csv_writer(sys.stdout)
    writer.writerow(["key"] + sorted(stats[next(iter(lstm.keys()))].keys()))
    averages = {}
    count = 0

    for key, stats in sorted(stats.items()):
        count += 1
        writer.writerow([key] + [item[1] for item in sorted(stats.items())])

        for key, value in stats.items():
            if key not in averages:
                averages[key] = 0

            averages[key] += value

    writer.writerow(["global"] +
                    [item[1] / count for item in sorted(averages.items())])
    return 0
コード例 #12
0
def main(argv):
    ap = ArgumentParser(prog="snow-patrol daemon")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("config_path")
    ap.add_argument("--dry-run", action="store_true", default=False)
    aargs = ap.parse_args(argv)
    log_file = ".%s.%s.log" % (os.path.splitext(
        os.path.basename(__file__))[0], os.path.basename(aargs.config_path))
    setup_logging(log_file, aargs.verbose, False, True, True)
    config = model.load(aargs.config_path)
    logging.debug("Running under: %s" % config)
    run_continuously(config, aargs.dry_run)
    return 0
コード例 #13
0
def main(argv):
    ap = ArgumentParser(prog="server")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("-p", "--port", default=8888, type=int)
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)
    run_server(aargs.port, "api", "resources", {
        "echo": Echo(),
        "echo/echo": Echo(),
    })
    return 0
コード例 #14
0
def main(argv):
    ap = ArgumentParser(prog="generate-query-database")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("--key-offsets", nargs="*", default=None)
    ap.add_argument("--db-kind", choices=["sqlite", "postgres"])
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    ap.add_argument("activation_dir")
    ap.add_argument("query_dir")
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir, True)
    threads = []

    for parameter in (lstm.keys()
                      if aargs.key_offsets is None else aargs.key_offsets):
        key = parameter if aargs.key_offsets is None else parameter.split(
            "#")[0]
        offset = 0 if aargs.key_offsets is None else int(
            parameter.split("#")[1])
        thread = threading.Thread(target=generate_db,
                                  args=[
                                      lstm, aargs.activation_dir, key,
                                      aargs.query_dir, aargs.db_kind, offset
                                  ])
        # Non-daemon threads will keep the program running until they finish (as per documentation).
        thread.daemon = False
        thread.start()
        threads += [thread]

    for thread in threads:
        thread.join()

    return 0
コード例 #15
0
ファイル: termnet.py プロジェクト: sawatzkylindsey/workbench
def main():
    ap = ArgumentParser(prog="termnet")
    ap.add_argument("--verbose", "-v",
                        default=False,
                        action="store_true",
                        # Unfortunately, logging in python 2.7 doesn't have
                        # a built-in way to log asynchronously.
                        help="Turn on verbose logging.  " + \
                        "**This will SIGNIFICANTLY slow down the program.**")
    ap.add_argument("-f",
                    "--input-format",
                    default=workbench.parser.WIKIPEDIA,
                    help="One of %s" % workbench.parser.FORMATS)
    ap.add_argument("-w", "--window", default=0)
    ap.add_argument("input_texts", nargs="+")
    args = ap.parse_args()
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  args.verbose, True)
    logging.debug(args)
    net = build(args.input_texts, args.input_format, args.window)
    return 0
コード例 #16
0
def main(argv):
    ap = ArgumentParser(prog="generate-hidden-states")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("-s",
                    "--sample-rate",
                    type=float,
                    default=0.1,
                    help="train then test sampling rates.")
    ap.add_argument("-d", "--dry-run", default=False, action="store_true")
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    ap.add_argument("states_dir")
    ap.add_argument("kind", choices=["train", "validation", "test"])
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    if aargs.dry_run:
        dry_run(data.stream_data(aargs.data_dir, aargs.kind),
                aargs.sample_rate, aargs.kind)
        return 0

    lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir)
    description = data.get_description(aargs.data_dir)

    if description.task == data.LM:
        annotation_fn = lambda y, i: y[i][0]
    else:
        annotation_fn = lambda y, i: y

    elicit_hidden_states(lstm, data.stream_data(aargs.data_dir,
                                                aargs.kind), annotation_fn,
                         aargs.sample_rate, aargs.states_dir, aargs.kind)
    return 0
コード例 #17
0
def main(argv):
    ap = ArgumentParser(prog="language-model")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("--corpus", default="corpus.txt")
    ap.add_argument("--epochs", default=100, type=int)
    args = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  args.verbose, False, True)
    words, xy_sequences, neural_network = domain.create(
        args.corpus, args.epochs, args.verbose)

    #while neural_network.is_setting_up():
    #    pass

    neural_network._background_training.join()
    accuracy = neural_network.lstm.test(
        [[rnn.Xy(t[0], t[1]) for t in sequence] for sequence in xy_sequences],
        True)
    user_log.info("accuracy: %s" % accuracy)
コード例 #18
0
def main(argv):
    ap = ArgumentParser(prog="generate-sequential-model")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    #ap.add_argument("-d", "--dry-run", default=False, action="store_true")
    ap.add_argument("-l", "--layers", default=2, type=int)
    ap.add_argument("-w", "--width", default=100, type=int)
    ap.add_argument("-e", "--embedding-width", default=50, type=int)
    ap.add_argument("--srnn",
                    default=False,
                    action="store_true",
                    help="use the 'srnn' ablation")
    ap.add_argument("--out",
                    default=False,
                    action="store_true",
                    help="use the 'out' ablation")
    ap.add_argument("-b", "--batch", default=32, type=int)
    ap.add_argument("-a", "--arc-epochs", default=5, type=int)
    ap.add_argument("-i", "--initial-decays", default=5, type=int)
    ap.add_argument("-c", "--convergence-decays", default=2, type=int)
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)
    hyper_parameters = sequential.HyperParameters(aargs.layers, aargs.width,
                                                  aargs.embedding_width)
    ablations = sequential.Ablations(aargs.srnn, aargs.out)
    rnn = generate_rnn(aargs.data_dir, hyper_parameters, ablations,
                       aargs.batch, aargs.arc_epochs, aargs.initial_decays,
                       aargs.convergence_decays, aargs.sequential_dir)
    return 0
コード例 #19
0
def main(argv):
    ap = ArgumentParser(prog="generate-activation-states")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    #ap.add_argument("-d", "--dry-run", default=False, action="store_true")
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    ap.add_argument("activations_dir")
    ap.add_argument("kind", choices=["train", "validation", "test"])
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir)
    description = data.get_description(aargs.data_dir)
    elicit_activation_states(lstm, data.stream_data(aargs.data_dir,
                                                    aargs.kind),
                             aargs.activations_dir)

    return 0
コード例 #20
0
def main(argv):
    ap = ArgumentParser(prog="generate-semantic-model")
    ap.add_argument("-v",
                    "--verbose",
                    default=False,
                    action="store_true",
                    help="Turn on verbose logging.")
    ap.add_argument("-i", "--initial-decays", default=5, type=int)
    ap.add_argument("-c", "--convergence-decays", default=2, type=int)
    ap.add_argument("-a", "--arc-epochs", default=3, type=int)
    ap.add_argument("-l", "--layers", default=2, type=int)
    ap.add_argument("-w", "--width", default=100, type=int)
    ap.add_argument("--word-input", default=False, action="store_true")
    ap.add_argument("-p", "--pre-existing", default=False, action="store_true")
    ap.add_argument("-m", "--monolith", default=False, action="store_true")
    ap.add_argument("--key-set", nargs="*", default=None)
    ap.add_argument("data_dir")
    ap.add_argument("sequential_dir")
    ap.add_argument("states_dir")
    ap.add_argument("encoding_dir")
    aargs = ap.parse_args(argv)
    setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
                  aargs.verbose, False, True, True)
    logging.debug(aargs)

    lstm = sequential.load_model(aargs.data_dir, aargs.sequential_dir, True)
    user_log.info("Sem")
    hyper_parameters = model.HyperParameters(aargs.layers, aargs.width)
    extra = {
        "word_input": aargs.word_input,
        "monolith": aargs.monolith,
    }

    if aargs.pre_existing:
        sem = load_sem(lstm, aargs.encoding_dir)
    else:
        sem = generate_sem(lstm, hyper_parameters, extra, aargs.states_dir,
                           aargs.arc_epochs, aargs.encoding_dir, aargs.key_set,
                           aargs.initial_decays, aargs.convergence_decays)

    keys_sem, total_sem = test_model(lstm, sem, aargs.states_dir, False,
                                     aargs.key_set)
    # TODO
    #user_log.info("Baseline")
    #baseline = generate_baseline(aargs.data_dir, lstm, hyper_parameters, extra)
    #scores_baseline, totals_baseline = test_model(lstm, baseline, aargs.states_dir, True, aargs.key_set)

    with open(os.path.join(aargs.encoding_dir, "analysis-breakdown.csv"),
              "w") as fh:
        writer = csv_writer(fh)
        writer.writerow(["technique", "key", "perplexity"])

        for key, perplexity in sorted(keys_sem.items()):
            writer.writerow(["sem", key, "%f" % perplexity])

        #for key, scores in sorted(scores_baseline.items()):
        #    for name, score in sorted(scores.items()):
        #        writer.writerow(["baseline", key, name, "%f" % score])

    with open(os.path.join(aargs.encoding_dir, "analysis-totals.csv"),
              "w") as fh:
        writer = csv_writer(fh)
        writer.writerow(["technique", "perplexity"])
        writer.writerow(["sem", "%f" % total_sem])

        #for name, score in sorted(totals_baseline.items()):
        #    writer.writerow(["baseline", name, "%f" % score])

    return 0
コード例 #21
0
import os

from ml import nn as ffnn
from ml import base as mlbase
from pytils import adjutant
from pytils.log import setup_logging, user_log

setup_logging(".%s.log" % os.path.splitext(os.path.basename(__file__))[0],
              True, False, True)

KINDS = ["outputs", "cells"]
LAYERS = 2
WIDTH = 5
words = set(["abc", "def", "ghi"])
kind_labels = mlbase.Labels(set(KINDS))
layer_labels = mlbase.Labels(set(range(LAYERS)))
activation_vector = mlbase.VectorField(WIDTH)
predictor_input = mlbase.ConcatField(
    [kind_labels, layer_labels, activation_vector])
predictor_output = mlbase.Labels(words)
predictor = ffnn.Model("predictor",
                       ffnn.HyperParameters().width(10).layers(1),
                       predictor_input, predictor_output, mlbase.SINGLE_LABEL)

data = [
    mlbase.Xy(("outputs", 0, [.1, .2, .3, .4, .5]), {
        "abc": .6,
        "def": .2,
        "ghi": .2
    }),
    mlbase.Xy(("outputs", 1, [.1, .2, .3, .4, .5]), {