def save_to_json_gz(data, filename):
        converted_to_standard_format = {}
        for n, all_expressions in data.items():
            expression_dicts = [dict(Tokens=expr[0], Tree=convert_to_dict(expr[1]))
                                for expr in all_expressions]
            converted_to_standard_format[n] = dict(Original=expression_dicts[0],
                                                   Noise=expression_dicts[1:])

        save_result_as_gzipped_json(filename, converted_to_standard_format)
    def save_to_json_gz(data, filename):
        print("Converting to JSON", file=sys.stderr)
        converted_to_standard_format = {}
        for n, all_expressions in tqdm(data.items()):
            expression_dicts = [
                dict(Tokens=list("whatever"), Tree=convert_to_dict(expr))
                for expr in all_expressions
            ]
            converted_to_standard_format[n] = dict(
                Original=expression_dicts[0], Noise=expression_dicts[1:])

        print("Saving", file=sys.stderr)
        save_result_as_gzipped_json(filename, converted_to_standard_format)
Beispiel #3
0
    def save_to_json_gz(data, filename):
        converted_to_standard_format = {}
        for n, t in data.items():
            converted_to_standard_format[n] = dict(Original=dict(Tokens=t[0], Tree=convert_to_dict(t[1])), Noise=[])

        save_result_as_gzipped_json(filename, converted_to_standard_format)
Beispiel #4
0
def convert_to_dict(node: Node) -> dict:
    children = OrderedDict()
    for node_property in node.properties:
        children[node_property] = convert_to_dict(node[node_property][0])
    if len(children) > 0:
        return dict(Name=node.name, Children=children)
    else:
        return dict(Name=node.name)


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage <outputFilename>")
        sys.exit(-1)
    samples = {}
    for i in range(100000):
        tree = synthesize_random_trees(.75)
        toks = to_token_sequence(tree, [])
        if len(
            toks) > 500: continue  # Too large sequences probably cause problems to Theano (or require too much memory?)
        asstring = ''.join(toks)
        samples[asstring] = (toks, tree)
    print("Generated %s samples" % len(samples))

    converted_to_standard_format = {}
    for n, t in samples.items():
        converted_to_standard_format[n] = dict(Original=dict(Tokens=t[0], Tree=convert_to_dict(t[1])), Noise=[])

    save_result_as_gzipped_json(sys.argv[1], converted_to_standard_format)
def save_split(data: dict, filename: str):
    converted = {}
    for eq_class, samples in data.items():
        if len(samples) > 0:
            converted[eq_class] = dict(Original=samples[0], Noise=samples[1:])
    save_result_as_gzipped_json(filename, converted)