def get_buckets(reduction_dir, key): learned_path = os.path.join(reduction_dir, os.path.join(LEARNED_BUCKETS, key)) fixed_path = os.path.join(reduction_dir, os.path.join(FIXED_BUCKETS, key)) learned = {item[0]: item[1] for item in pickler.load(learned_path)} fixed = {item[0]: item[1] for item in pickler.load(fixed_path)} return learned, fixed
def main(argv): ap = ArgumentParser(prog="server") ap.add_argument("-e", "--elastic", action="store_true", default=False) ap.add_argument("findings", nargs="+") args = ap.parse_args(argv) cases = {} level_part_layers = [] for level_finding in args.findings: level, finding = level_finding.split(":") level = int(level) print("%d:%s" % (level, finding)) part, layer, data = pickler.load(finding) assert ( level, part, layer ) not in level_part_layers, "duplicate (level, part, layer): (%s, %s, %s)" % ( level, part, layer) level_part_layers += [(level, part, layer)] for i in data: distance, activation_point = i case = tuple(activation_point.sequence) if case not in cases: cases[case] = {} if (level, part, layer) not in cases[case]: cases[case][(level, part, layer)] = [] cases[case][(level, part, layer)] += [i]
def main(): global train_xys global validation_xys global test_xys train_xys_file = os.path.join(RESUME_DIR, "xys.train.pickle") validate_xys_file = os.path.join(RESUME_DIR, "xys.validation.pickle") test_xys_file = os.path.join(RESUME_DIR, "xys.test.pickle") if os.path.exists(train_xys_file): train_xys = pickler.load(train_xys_file) validate_xys = pickler.load(validate_xys_file) test_xys = pickler.load(test_xys_file) else: raise ValueError() user_input = "" while not user_input.startswith("quit"): user_input = input("enter next search (dataset|word,..): ") if not user_input.startswith("quit"): query = None try: dataset, query = parse(user_input) print("(%s, %s)" % (dataset, query)) except WriteLast as e: with open("data.csv", "w") as fh: writer = csv_writer(fh) for r in result: writer.writerow(r) except Exception as e: print(e) print("error interpreting: %s" % user_input) if query is not None: result = find_closest(dataset, query) print("found %d: " % len(result)) for r in result[:TOP]: print(r) else: # Exit path - don't do anything pass return 0
def _get_buckets(reduction_dir, kind): buckets = {} for key in os.listdir(os.path.join(reduction_dir, kind)): buckets[key] = { item[0]: item[1] for item in pickler.load( os.path.join(reduction_dir, os.path.join(kind, key))) } return buckets
def stream_data(data_dir, kind): description = get_description(data_dir) if description.task == LM: converter = _xy_lm elif description.task == SA: converter = _xy_sa else: raise ValueError() target_path = os.path.join(data_dir, XYS_TRAIN if kind == "train" else (XYS_TEST if kind == "test" else XYS_VALIDATION)) return pickler.load(target_path, converter=converter)
def main(): global activation_data activation_data_file = os.path.join(RESUME_DIR, "activation_data.pickle") if os.path.exists(activation_data_file): activation_data = pickler.load(activation_data_file) else: raise ValueError() user_input = "" while not user_input.startswith("quit"): user_input = input( "enter next search (part,layer|axis:target_value,..): ") if not user_input.startswith("quit"): query = None try: part, layer, query = parse(user_input) print("(%s, %s, %s)" % (part, layer, query)) except WriteLast as e: pickler.dump((part, layer, result[:q10]), "result-q10.pickle") pickler.dump((part, layer, result[:q25]), "result-q25.pickle") pickler.dump((part, layer, result[:q50]), "result-q50.pickle") except Exception as e: print(e) print("error interpreting: %s" % user_input) if query is not None: result, q10, q25, q50 = find_closest(part, layer, query) print("found %d: " % len(result)) for r in result[:TOP]: print(r) else: # Exit path - don't do anything pass return 0
def random_stream_hidden_states(states_dir, kind, keys, sample_rate=1.0): streams = {} stream_names = [] for name in os.listdir(states_dir): key = _key(name) if name.startswith(_folder(kind)) and (keys is None or key in keys): streams[name] = pickler.load( os.path.join(states_dir, name), converter=lambda item: HiddenState(*item)) stream_names += [name] while len(streams) > 0: name = random.choice(stream_names) try: item = next(streams[name]) if sample_rate == 1.0 or random.random() <= sample_rate: yield _key(name), item except StopIteration as e: del streams[name] stream_names.remove(name)
def stream_hidden_states(states_dir, kind, key): return pickler.load(os.path.join(states_dir, _folder(kind) + "." + key), converter=lambda item: HiddenState(*item))
def get_hidden_states(states_dir, key): train = pickler.load(os.path.join(states_dir, STATES_TRAIN + "." + key), converter=lambda item: HiddenState(*item)) test = pickler.load(os.path.join(states_dir, STATES_TEST + "." + key), converter=lambda item: HiddenState(*item)) return train, test
def stream_activations(states_dir, key): return pickler.load(os.path.join(states_dir, STATES_ACTIVATION + "." + key), converter=lambda item: ActivationState(*item))
def main(argv): ap = ArgumentParser(prog="sem-mse") ap.add_argument("resume_a") ap.add_argument("resume_b") args = ap.parse_args(argv) output_distributions = [ i for i in pickler.load(os.path.join(args.resume_a, OUTPUT)) ] assert len(output_distributions) == 1 output_distribution = output_distributions[0] distributions_basenames = [ os.path.basename(p) for p in glob.glob(os.path.join(args.resume_a, DISTRIBUTIONS_GLOB)) ] size = None uniform_distribution = None count = 0 comparison_total = 0.0 uniform_total_a = 0.0 uniform_total_b = 0.0 distribution_total_a = 0.0 distribution_total_b = 0.0 for distributions_basename in sorted(distributions_basenames, key=file_sort_key): stream_a = pickler.load( os.path.join(args.resume_a, distributions_basename)) stream_b = pickler.load( os.path.join(args.resume_b, distributions_basename)) for distribution_a, distribution_b in zip(stream_a, stream_b): assert len(distribution_a) == len(distribution_b) if size is None: size = len(distribution_a) value = 1.0 / size uniform_distribution = {value for key in distribution_a.keys()} comparison_total += sum_squared_error(distribution_a, distribution_b) uniform_total_a += sum_squared_error(distribution_a, uniform_distribution) uniform_total_b += sum_squared_error(distribution_b, uniform_distribution) distribution_total_a += sum_squared_error(distribution_a, output_distribution) distribution_total_b += sum_squared_error(distribution_b, output_distribution) count += 1 try: next(stream_a) raise ValueError("stream a wasn't exhausted!") except StopIteration as e: pass try: next(stream_b) raise ValueError("stream b wasn't exhausted!") except StopIteration as e: pass with open("output-sem-mse-analysis.csv", "w") as fh: writer = csv_writer(fh) writer.writerow([ "comparison", "sum of squared error", "mean squared error", "mse normalized" ]) writer.writerow(row_data("comparison", comparison_total, count, size)) writer.writerow(row_data("uniform a", uniform_total_a, count, size)) writer.writerow(row_data("uniform b", uniform_total_b, count, size)) writer.writerow( row_data("distribution a", distribution_total_a, count, size)) writer.writerow( row_data("distribution b", distribution_total_b, count, size)) return 0
def get_output_distribution(data_dir): return {item[0]: item[1] for item in pickler.load(os.path.join(data_dir, OUTPUT_DISTRIBUTION))}
def get_outputs(data_dir): outputs = set([output for output in pickler.load(os.path.join(data_dir, OUTPUTS))]) return mlbase.Labels(outputs)
def get_words(data_dir): words = set([word for word in pickler.load(os.path.join(data_dir, WORDS))]) return mlbase.Labels(words.union(set([mlbase.BLANK])), unknown=nlp.UNKNOWN)
def get_pos_mapping(data_dir): return {item[0]: item[1] for item in pickler.load(os.path.join(data_dir, POS_MAPPING))}