Ejemplo n.º 1
0
def main(argv):
  dirname = argv[0]
  print "Parsing files in ", dirname
  agg_results_filename = os.path.join(dirname, "agg_results")
  for filename in os.listdir(argv[0]):
    full_name = os.path.join(dirname, filename)
    if os.path.isfile(full_name) and filename.endswith("job_log"):
      print "Parsing file ", full_name
      parse_logs.parse(full_name, agg_results_filename)
Ejemplo n.º 2
0
def main(argv):
    dirname = argv[0]
    print "Parsing files in ", dirname
    agg_results_filename = os.path.join(dirname, "agg_results")
    for filename in os.listdir(argv[0]):
        full_name = os.path.join(dirname, filename)
        if os.path.isfile(full_name) and filename.endswith("job_log"):
            print "Parsing file ", full_name
            parse_logs.parse(full_name, agg_results_filename)
def run_tracker(t_id, lock, tools, parsed_data):
    to_continue = True
    exe = os.path.join("..", "build", "3D_object_tracking")
    lock.acquire()
    print(f"Thread {t_id} starts")
    lock.release()
    while to_continue:
        try:
            case = tools.get(block=False)
            os.makedirs(f"./log/img/{case[0]}_{case[1]}")
            command_line = [exe] + f"-det {case[0]} -des {case[1]} -dir ./log/img/{case[0]}_{case[1]} -sel SEL_NN".split()
            proc = Popen(command_line, stderr=PIPE, stdout=PIPE)
            out_bin, err_bin = proc.communicate()
            if proc.returncode != 0:
                lock.acquire()
                print(case)
                lock.release()
            if len(err_bin):
                parsed_data.put((f"{case[0]}_{case[1]}", None))
            else:
                parsed_data.put((f"{case[0]}_{case[1]}", parse(out_bin.decode())))
                with open(os.path.join(output_log_dir, f"{case[0]}_{case[1]}.log"), "wb") as fid:
                    fid.write(out_bin)
            tools.task_done()
        except queue.Empty:
            to_continue = False
Ejemplo n.º 4
0
def load_scores(result_table, metric, domain):
    for corpus in result_table:
        for run in result_table[corpus]:
            log_file = parse(run["path"])
            scores = calculate_perfect_validation(log_file, corpus, 0)
            run["scores"] = scores
            #sort the entries by validation score in reverese order
        result_table[corpus] = sorted(result_table[corpus],
                                      key=lambda run: run["scores"][domain].
                                      get(metric, {"score": 0})["score"])[::-1]
    return result_table
Ejemplo n.º 5
0
def show_runs(corpora, log_files=LOG_FILES, metric="valid", domain="mixed"):
    """
    Prints valid and missing runs from corpora selection
    """
    if isinstance(corpora, str):
        corpora = corpora.split()
    logs = parse(
        # Load only files from current selection
        [
            log for corpus, files in log_files.items() for log in files
            if corpus in corpora
        ])
    scores = load_scores(
        {
            corpus: [run["params"] for run in logs[corpus]["train"]]
            for corpus in logs
        }, metric, domain)

    params = []
    for corpus in corpora:
        for run in scores[corpus]:
            for param in run:
                if param not in params:
                    params.append(param)
    trainings_table = PrettyTable(["corpus"] + [
        param for param in params
        if param not in ["corpus", "scores", "path", "size"]
    ] + ["score", "step", "path", "size"])
    trainings_table.align["score"] = "r"
    trainings_table.align["size"] = "r"
    trainings_table.align["path"] = "l"
    trainings_table.float_format = "0.3"
    for corpus in corpora:
        for run in scores[corpus]:
            trainings_table.add_row([corpus] + [
                run.get(k) for k in params
                if k not in ["corpus", "scores", "path", "size"]
            ] + [
                run['scores'].get(domain, {}).get(metric, {"score": 0.0})
                ["score"], run['scores'].get(domain, {}).get(
                    metric, {
                        "score": 0.0
                    }).get("step"),
                basename(run['path']),
                sizeof_fmt(getsize(run["path"]))
            ])
    print(trainings_table.get_string(sortby="score", reversesort=True))
Ejemplo n.º 6
0
def build_trainings_config(corpora,
                           log_files=LOG_FILES,
                           schedule=HYPER_CONFIGS,
                           default_params=TRAININGS_PARAMS,
                           metric="valid",
                           domain="mixed"):
    """
    Builds dictonary, containng all the configs that are missing in the traingings data.
    Works with mulitple corpora as well!
    """
    if isinstance(corpora, str):
        corpora = corpora.split()
    logs = parse(
        # Load only files from current selection
        [
            log for corpus, files in log_files.items() for log in files
            if corpus in corpora
        ])
    results = build_result_table(
        {
            corpus: configs
            for corpus, configs in schedule.items() if corpus in corpora
        }, logs)

    scores = load_scores(results, metric, domain)
    missing_configs = {}

    for corpus in corpora:
        for config in schedule[corpus]:
            runs = [
                run for run in scores[corpus]
                if config == {k: v
                              for k, v in run.items() if k in config}
                and isinstance(run["scores"]["valid"][1], float)
            ]
            if not runs:
                if corpus not in missing_configs:
                    missing_configs[corpus] = []
                missing_configs[corpus].append(config)

    for corpus, configs in missing_configs.items():
        for i, config in enumerate(configs):
            with open(f"train.{corpus}.{i}.config", "w") as config_file:
                dump({**config, **default_params.get(corpus, {})}, config_file)
Ejemplo n.º 7
0
def plot_trainings_curve(files=EXAMPLE_RUNS["training"]):
    """Plots a trainings curve for the given runs.
    !!!Currently broken and output disabled!!!
    """
    for name, path in files.items():
        train_stats = parse(path)
        for corpus in train_stats:
            fig = plt.figure()
            fig.suptitle(f"Trainings Curve: {corpus}")
            plot_index = 1
            for run in train_stats[corpus]["train"]:
                axis = plt.subplot(len(train_stats[corpus]["train"]), 1,
                                   plot_index)
                axis.set_xlabel("Trainings Steps")
                axis.set_ylabel("Accuracy")
                plot_index += 1
                axis.set_ylim(0, 100)

                data = {
                    "step": [point.get("step", 0) for point in run["steps"]],
                    "train":
                    [point.get("train_accuracy", 0) for point in run["steps"]],
                    "valid":
                    [point.get("valid_accuracy", 0) for point in run["steps"]],
                }
                plt.scatter(data["step"],
                            data["train"],
                            label="Trainings Accuracy")
                plt.scatter(data["step"],
                            data["valid"],
                            label="Validation Accuracy")
                axis.legend()

        fig.tight_layout(rect=[0, 0.03, 1, 0.95])
        #plt.savefig(f"{IMAGE_DIR}/trainings_curve-{name}.png", bbox_inches="tight", dpi=200)
        plt.clf()
Ejemplo n.º 8
0
def show_schedule(corpora,
                  log_files=LOG_FILES,
                  schedule=HYPER_CONFIGS,
                  metric="valid",
                  domain="mixed"):
    """
    Prints valid and missing runs from corpora selection
    """
    if isinstance(corpora, str):
        corpora = corpora.split()
    logs = parse(
        # Load only files from current selection
        [
            log for corpus, files in log_files.items() for log in files
            if corpus in corpora
        ])
    results = build_result_table(
        {
            corpus: configs
            for corpus, configs in schedule.items() if corpus in corpora
        }, logs)

    scores = load_scores(results, metric, domain)

    for corpus in corpora:
        for config in schedule[corpus]:
            if config == schedule[corpus][0]:
                trainings_table = PrettyTable(
                    ["corpus"] + list(config.keys()) +
                    ["runs", "score", "step", "path", "size"])
                trainings_table.align["score"] = "r"
                trainings_table.align["size"] = "r"
                trainings_table.align["path"] = "l"
                trainings_table.float_format = "0.3"
            runs = [
                run for run in scores[corpus]
                if config == {k: v
                              for k, v in run.items() if k in config}
                #and isinstance(run["scores"]["valid"][1], float)
            ]
            if runs:
                best_run = reduce(
                    lambda x, y: x
                    if x["scores"].get(domain, {}).get(metric, {"score": 0})[
                        "score"] > y["scores"].get(domain, {}).get(
                            metric, {"score": 0})["score"] else y, runs)
                trainings_table.add_row(
                    [corpus] + [str(v) for v in config.values()] + [
                        len(runs), best_run['scores'].get(domain, {}).get(
                            metric, {"score": 0.0})["score"],
                        best_run['scores'].get(domain, {}).get(
                            metric, {
                                "score": 0.0
                            }).get("step"),
                        basename(best_run["path"]),
                        sizeof_fmt(getsize(best_run["path"]))
                    ])
            else:
                trainings_table.add_row([corpus] +
                                        [str(v) for v in config.values()] +
                                        [len(runs), 0, 0.0, "Not found", 0.0])
        print(trainings_table.get_string(sortby="score", reversesort=True))
Ejemplo n.º 9
0
def plot_language_comparison(files=EXAMPLE_RUNS["side_constraint"],
                             metrics=["train", "valid", "bleu"],
                             name=None):
    results = {}
    for pair, data_sets in files.items():
        results[pair] = {}
        for metric in metrics:
            results[pair][metric] = {"labels": [], "Tagged": [], "Clean": []}

            for domain, groups in data_sets.items():
                logs = parse([f for f in groups.values()])
                scores = load_scores(
                    {
                        corpus:
                        [run["params"] for run in logs[corpus]["train"]]
                        for corpus in logs
                    }, metric, domain)
                if scores:
                    results[pair][metric]["labels"].append(domain)
                for corpus in scores:
                    group = corpus.split("-")[0]
                    best_run = reduce(
                        lambda x, y: x if x["scores"].get(domain, {}).get(
                            metric, {"score": 0})["score"] > y["scores"].get(
                                domain, {}).get(metric, {"score": 0})["score"]
                        else y, scores[corpus])
                    results[pair][metric][group].append(best_run["scores"].get(
                        domain, {}).get(metric, {"score": 0})["score"])

            results[pair][metric]["Delta"] = [
                ((results[pair][metric]["Tagged"][i] -
                  results[pair][metric]["Clean"][i]) /
                 results[pair][metric]["Clean"][i]) * 100
                for i in range(len(results[pair][metric]["labels"]))
            ]

    # Settings for the actual bars
    # stolen from https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/barchart.html
    fig = plt.figure()
    fig.suptitle(f"Score Change between Language Pairs")
    width = 0.35

    for pair in files:
        plot_index = 1
        for metric in metrics:
            axis = plt.subplot(len(metrics), 1, plot_index)
            axis.set_ylabel(f"$\\Delta$ {metric} in %")

            if pair == "de-en":
                x_positions = range(len(results[pair][metric]["labels"]))
                axis.set_xticklabels(results[pair][metric]["labels"])
                axis.set_xticks(x_positions)
                axis.spines['bottom'].set_position('zero')
                axis.spines['right'].set_visible(False)
                axis.spines['top'].set_visible(False)
                axis.xaxis.tick_top()

            # build bars
            axis.bar(
                [
                    x + (width / 2 * (-1 if pair == "de-en" else 1))
                    for x in x_positions
                ],
                results[pair][metric]["Delta"],
                width,
                edgecolor="black",
                #   hatch="" if pair == "de-en" else "o",
                label=pair)

            for i, _ in enumerate(results[pair][metric]["labels"]):
                axis.annotate(
                    "{0:.2f}".format(results[pair][metric]["Delta"][i]),
                    xy=(x_positions[i] + (width / 2 *
                                          (-1 if pair == "de-en" else 1)),
                        results[pair][metric]["Delta"][i]),
                    xytext=(0, -1
                            if results[pair][metric]["Delta"][i] < 0 else 1),
                    textcoords="offset points",
                    ha="center",
                    va="top"
                    if results[pair][metric]["Delta"][i] < 0 else "bottom")
            plot_index += 1
#            axis.legend(loc="upper right")
    fig.tight_layout(rect=[0, 0.03, 1, 0.95])
    fig.legend(files.keys(), loc="lower right")
    plt.savefig(f"{IMAGE_DIR}/{name}"
                if name else f"{IMAGE_DIR}/language-comparison.png",
                bbox_inches="tight",
                dpi=200)
    fig.clf()
Ejemplo n.º 10
0
def plot_side_constraint_comparison(files=EXAMPLE_RUNS["side_constraint"],
                                    metric="bleu",
                                    name=None):
    fig = plt.figure()
    fig.suptitle(f"Performance grouped by Domain")
    plot_index = 1
    for pair, data_sets in files.items():
        results = {"labels": [], "Tagged": [], "Clean": []}

        for domain, groups in data_sets.items():
            logs = parse([f for f in groups.values()])
            scores = load_scores(
                {
                    corpus: [run["params"] for run in logs[corpus]["train"]]
                    for corpus in logs
                }, metric, domain)
            if scores:
                results["labels"].append(domain)
            for corpus in scores:
                group = corpus.split("-")[0]
                best_run = reduce(
                    lambda x, y: x
                    if x["scores"].get(domain, {}).get(metric, {"score": 0})
                    ["score"] > y["scores"].get(domain, {}).get(
                        metric, {"score": 0})["score"] else y, scores[corpus])
                results[group].append(
                    best_run["scores"].get(domain, {}).get(
                        metric, {"score": 0})["score"] *
                    (1 if metric in ["valid", "train", "bleu", "bleu_lc"] else
                     100))

        # Settings for the actual bars
        # stolen from https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/barchart.html
        axis = plt.subplot(len(files), 1, plot_index)

        x_positions = range(len(results["labels"]))

        axis.spines['top'].set_visible(False)
        axis.spines['right'].set_visible(False)
        axis.spines['bottom'].set_position('zero')
        axis.set_ylabel(f"{metric}-Score")
        axis.set_xlabel("Domains")
        axis.set_title(f"Performance for {pair}")
        axis.set_ylim(0, 100)

        width = 0.35

        axis.set_xticks(x_positions)
        axis.set_xticklabels(results["labels"])

        # build bars
        axis.bar([x - width / 2 for x in x_positions],
                 results["Clean"],
                 width,
                 label="Clean",
                 edgecolor="black")
        axis.bar([x + width / 2 for x in x_positions],
                 results["Tagged"],
                 width,
                 label="Tagged",
                 edgecolor="black")

        for i, _ in enumerate(results["labels"]):
            axis.annotate("{0:.2f}".format(results["Clean"][i]),
                          xy=(x_positions[i] - width / 2,
                              results["Clean"][i] + 1),
                          xytext=(0, 3),
                          textcoords="offset points",
                          ha='center')
            axis.annotate("{0:.2f}".format(results["Tagged"][i]),
                          xy=(x_positions[i] + width / 2,
                              results["Tagged"][i] + 1),
                          xytext=(0, 3),
                          textcoords="offset points",
                          ha='center')

        plot_index += 1
    fig.tight_layout(rect=[0, 0.03, 1, 0.95])
    fig.legend(["Clean", "Tagged"])
    plt.savefig(f"{IMAGE_DIR}/{name}"
                if name else f"{IMAGE_DIR}/absolute-performace-comparison.png",
                bbox_inches="tight",
                dpi=200)
    fig.clf()
Ejemplo n.º 11
0
def plot_hyperparameter_optim(files=EXAMPLE_RUNS["hyper_opt"], metric="bleu"):
    """Plots a comparison of the valid score and another metric (bleu by default)
    !!!Currently broken and output disabled!!!
    """
    for group in files:
        fig = plt.figure()
        fig.suptitle(f"Model Fittness according to {metric}-Score")
        train_stats = parse(files[group])
        for corpus in train_stats:
            plot_index = 1
            axis = plt.subplot(len(train_stats) * 2, 1, plot_index)
            for i, run in enumerate(train_stats[corpus]["train"]):
                axis.set_ylim(0, 100)
                axis.set_xlabel("Trainings Steps")
                axis.set_ylabel("Score")
                data = {
                    "train-steps":
                    [int(point.get("step", 0)) for point in run["steps"][::5]],
                    "valid": [
                        float(point.get("valid_accuracy", 0))
                        for point in run["steps"][::5]
                    ],
                }
                if i == 0:
                    plt.scatter(data["train-steps"],
                                data["valid"],
                                marker="x",
                                label=f"Validation Accuracy")
                else:
                    plt.scatter(data["train-steps"],
                                data["valid"],
                                0.1,
                                marker="x")
            axis.legend()

            plot_index += 1
            axis = plt.subplot(len(train_stats) * 2, 1, plot_index)
            for i, run in enumerate(train_stats[corpus]["train"]):
                axis.set_ylim(0, 100)
                axis.set_xlabel("Trainings Steps")
                axis.set_ylabel("Score")
                data = {
                    "score-steps":
                    [int(point.get("step", 0)) for point in run["scores"]],
                    "score": [
                        float(point.get(metric, 0)) *
                        (1 if metric in ["bleu", "bleu_lc"] else 100)
                        for point in run["scores"]
                    ],
                }
                if i == 0:
                    plt.scatter(data["score-steps"],
                                data["score"],
                                marker="^",
                                label=f"{metric}-Score")
                else:
                    plt.scatter(data["score-steps"],
                                data["score"],
                                5,
                                marker="o")
            axis.legend()

        fig.tight_layout(rect=[0, 0.03, 1, 0.95])


#        plt.savefig(f"{IMAGE_DIR}/optim_comparison-{group.replace(' ','_')}.png", bbox_inches="tight", dpi=200)
    plt.clf()