Beispiel #1
0
def build_dgi_dataset(args):
    # Expected key for each playthrough:
    # game, step, action, graph_local, graph_seen, graph_full,
    playthroughs = (json.loads(line.rstrip(",\n")) for line in open(args.input)
                    if len(line.strip()) > 1)

    graph_dataset = GraphDataset()
    dataset = []
    for example in playthroughs:
        # For each data point we want the following 3 keys:
        # game, step, graph
        dataset.append({
            "game":
            example["game"],
            "step":
            example["step"],
            "graph":
            graph_dataset.compress(example["graph_{}".format(
                args.graph_type)]),
        })

    if args.output is None:
        args.output = os.path.splitext(args.input)[0] + ".dgi.{}.json".format(
            args.graph_type)

    data = {
        "graph_index": graph_dataset.dumps(),
        "examples": dataset,
    }
    with open(args.output, "w") as f:
        json.dump(data, f)

    if args.verbose:
        print("This dataset has {:,} datapoints.".format(len(dataset)))
Beispiel #2
0
def compress_command_generation_dataset(args):
    # Expected key for each playthrough:
    # game, step, observation, previous_action, target_commands, previous_graph_seen, graph_seen
    playthroughs = (json.loads(line.rstrip(",\n")) for line in open(args.input) if len(line.strip()) > 1)

    graph_dataset = GraphDataset()
    dataset = []
    for example in tqdm(playthroughs):
        previous_graph_seen = graph_dataset.compress(example["previous_graph_seen"])
        target_commands = example["target_commands"]

        # For each data point we want the following 6 keys:
        # game, step, observation, previous_action, target_commands, previous_graph_seen
        dataset.append({
            "game": example["game"],
            "step": example["step"],
            "observation": example["observation"],
            "previous_action": example["previous_action"],
            "previous_graph_seen": previous_graph_seen,
            "target_commands": example["target_commands"],
        })

    if args.output is None:
        args.output = os.path.splitext(args.input)[0] + ".cmd_gen.json"

    data = {
        "graph_index": graph_dataset.dumps(),
        "examples": dataset,
    }
    with open(args.output, "w") as f:
        json.dump(data, f)

    if args.verbose:
        print("This dataset has {:,} datapoints.".format(len(dataset)))