Esempio n. 1
0
def get_buckets(reduction_dir, key):
    learned_path = os.path.join(reduction_dir,
                                os.path.join(LEARNED_BUCKETS, key))
    fixed_path = os.path.join(reduction_dir, os.path.join(FIXED_BUCKETS, key))
    learned = {item[0]: item[1] for item in pickler.load(learned_path)}
    fixed = {item[0]: item[1] for item in pickler.load(fixed_path)}
    return learned, fixed
Esempio n. 2
0
def main(argv):
    ap = ArgumentParser(prog="server")
    ap.add_argument("-e", "--elastic", action="store_true", default=False)
    ap.add_argument("findings", nargs="+")
    args = ap.parse_args(argv)
    cases = {}
    level_part_layers = []

    for level_finding in args.findings:
        level, finding = level_finding.split(":")
        level = int(level)
        print("%d:%s" % (level, finding))
        part, layer, data = pickler.load(finding)
        assert (
            level, part, layer
        ) not in level_part_layers, "duplicate (level, part, layer): (%s, %s, %s)" % (
            level, part, layer)
        level_part_layers += [(level, part, layer)]

        for i in data:
            distance, activation_point = i
            case = tuple(activation_point.sequence)

            if case not in cases:
                cases[case] = {}

            if (level, part, layer) not in cases[case]:
                cases[case][(level, part, layer)] = []

            cases[case][(level, part, layer)] += [i]
def main():
    global train_xys
    global validation_xys
    global test_xys
    train_xys_file = os.path.join(RESUME_DIR, "xys.train.pickle")
    validate_xys_file = os.path.join(RESUME_DIR, "xys.validation.pickle")
    test_xys_file = os.path.join(RESUME_DIR, "xys.test.pickle")

    if os.path.exists(train_xys_file):
        train_xys = pickler.load(train_xys_file)
        validate_xys = pickler.load(validate_xys_file)
        test_xys = pickler.load(test_xys_file)
    else:
        raise ValueError()

    user_input = ""

    while not user_input.startswith("quit"):
        user_input = input("enter next search (dataset|word,..): ")

        if not user_input.startswith("quit"):
            query = None

            try:
                dataset, query = parse(user_input)
                print("(%s, %s)" % (dataset, query))
            except WriteLast as e:
                with open("data.csv", "w") as fh:
                    writer = csv_writer(fh)

                    for r in result:
                        writer.writerow(r)
            except Exception as e:
                print(e)
                print("error interpreting: %s" % user_input)

            if query is not None:
                result = find_closest(dataset, query)
                print("found %d: " % len(result))

                for r in result[:TOP]:
                    print(r)
        else:
            # Exit path - don't do anything
            pass

    return 0
Esempio n. 4
0
def _get_buckets(reduction_dir, kind):
    buckets = {}

    for key in os.listdir(os.path.join(reduction_dir, kind)):
        buckets[key] = {
            item[0]: item[1]
            for item in pickler.load(
                os.path.join(reduction_dir, os.path.join(kind, key)))
        }

    return buckets
Esempio n. 5
0
def stream_data(data_dir, kind):
    description = get_description(data_dir)

    if description.task == LM:
        converter = _xy_lm
    elif description.task == SA:
        converter = _xy_sa
    else:
        raise ValueError()

    target_path = os.path.join(data_dir, XYS_TRAIN if kind == "train" else (XYS_TEST if kind == "test" else XYS_VALIDATION))
    return pickler.load(target_path, converter=converter)
Esempio n. 6
0
def main():
    global activation_data
    activation_data_file = os.path.join(RESUME_DIR, "activation_data.pickle")

    if os.path.exists(activation_data_file):
        activation_data = pickler.load(activation_data_file)
    else:
        raise ValueError()

    user_input = ""

    while not user_input.startswith("quit"):
        user_input = input(
            "enter next search (part,layer|axis:target_value,..): ")

        if not user_input.startswith("quit"):
            query = None

            try:
                part, layer, query = parse(user_input)
                print("(%s, %s, %s)" % (part, layer, query))
            except WriteLast as e:
                pickler.dump((part, layer, result[:q10]), "result-q10.pickle")
                pickler.dump((part, layer, result[:q25]), "result-q25.pickle")
                pickler.dump((part, layer, result[:q50]), "result-q50.pickle")
            except Exception as e:
                print(e)
                print("error interpreting: %s" % user_input)

            if query is not None:
                result, q10, q25, q50 = find_closest(part, layer, query)
                print("found %d: " % len(result))

                for r in result[:TOP]:
                    print(r)
        else:
            # Exit path - don't do anything
            pass

    return 0
Esempio n. 7
0
def random_stream_hidden_states(states_dir, kind, keys, sample_rate=1.0):
    streams = {}
    stream_names = []

    for name in os.listdir(states_dir):
        key = _key(name)

        if name.startswith(_folder(kind)) and (keys is None or key in keys):
            streams[name] = pickler.load(
                os.path.join(states_dir, name),
                converter=lambda item: HiddenState(*item))
            stream_names += [name]

    while len(streams) > 0:
        name = random.choice(stream_names)

        try:
            item = next(streams[name])

            if sample_rate == 1.0 or random.random() <= sample_rate:
                yield _key(name), item
        except StopIteration as e:
            del streams[name]
            stream_names.remove(name)
Esempio n. 8
0
def stream_hidden_states(states_dir, kind, key):
    return pickler.load(os.path.join(states_dir,
                                     _folder(kind) + "." + key),
                        converter=lambda item: HiddenState(*item))
Esempio n. 9
0
def get_hidden_states(states_dir, key):
    train = pickler.load(os.path.join(states_dir, STATES_TRAIN + "." + key),
                         converter=lambda item: HiddenState(*item))
    test = pickler.load(os.path.join(states_dir, STATES_TEST + "." + key),
                        converter=lambda item: HiddenState(*item))
    return train, test
Esempio n. 10
0
def stream_activations(states_dir, key):
    return pickler.load(os.path.join(states_dir,
                                     STATES_ACTIVATION + "." + key),
                        converter=lambda item: ActivationState(*item))
Esempio n. 11
0
def main(argv):
    ap = ArgumentParser(prog="sem-mse")
    ap.add_argument("resume_a")
    ap.add_argument("resume_b")
    args = ap.parse_args(argv)

    output_distributions = [
        i for i in pickler.load(os.path.join(args.resume_a, OUTPUT))
    ]
    assert len(output_distributions) == 1
    output_distribution = output_distributions[0]
    distributions_basenames = [
        os.path.basename(p)
        for p in glob.glob(os.path.join(args.resume_a, DISTRIBUTIONS_GLOB))
    ]
    size = None
    uniform_distribution = None
    count = 0
    comparison_total = 0.0
    uniform_total_a = 0.0
    uniform_total_b = 0.0
    distribution_total_a = 0.0
    distribution_total_b = 0.0

    for distributions_basename in sorted(distributions_basenames,
                                         key=file_sort_key):
        stream_a = pickler.load(
            os.path.join(args.resume_a, distributions_basename))
        stream_b = pickler.load(
            os.path.join(args.resume_b, distributions_basename))

        for distribution_a, distribution_b in zip(stream_a, stream_b):
            assert len(distribution_a) == len(distribution_b)

            if size is None:
                size = len(distribution_a)
                value = 1.0 / size
                uniform_distribution = {value for key in distribution_a.keys()}

            comparison_total += sum_squared_error(distribution_a,
                                                  distribution_b)
            uniform_total_a += sum_squared_error(distribution_a,
                                                 uniform_distribution)
            uniform_total_b += sum_squared_error(distribution_b,
                                                 uniform_distribution)
            distribution_total_a += sum_squared_error(distribution_a,
                                                      output_distribution)
            distribution_total_b += sum_squared_error(distribution_b,
                                                      output_distribution)
            count += 1

        try:
            next(stream_a)
            raise ValueError("stream a wasn't exhausted!")
        except StopIteration as e:
            pass

        try:
            next(stream_b)
            raise ValueError("stream b wasn't exhausted!")
        except StopIteration as e:
            pass

    with open("output-sem-mse-analysis.csv", "w") as fh:
        writer = csv_writer(fh)
        writer.writerow([
            "comparison", "sum of squared error", "mean squared error",
            "mse normalized"
        ])
        writer.writerow(row_data("comparison", comparison_total, count, size))
        writer.writerow(row_data("uniform a", uniform_total_a, count, size))
        writer.writerow(row_data("uniform b", uniform_total_b, count, size))
        writer.writerow(
            row_data("distribution a", distribution_total_a, count, size))
        writer.writerow(
            row_data("distribution b", distribution_total_b, count, size))

    return 0
Esempio n. 12
0
def get_output_distribution(data_dir):
    return {item[0]: item[1] for item in pickler.load(os.path.join(data_dir, OUTPUT_DISTRIBUTION))}
Esempio n. 13
0
def get_outputs(data_dir):
    outputs = set([output for output in pickler.load(os.path.join(data_dir, OUTPUTS))])
    return mlbase.Labels(outputs)
Esempio n. 14
0
def get_words(data_dir):
    words = set([word for word in pickler.load(os.path.join(data_dir, WORDS))])
    return mlbase.Labels(words.union(set([mlbase.BLANK])), unknown=nlp.UNKNOWN)
Esempio n. 15
0
def get_pos_mapping(data_dir):
    return {item[0]: item[1] for item in pickler.load(os.path.join(data_dir, POS_MAPPING))}