예제 #1
0
def plot(config, experiments):
    print("config =", config.name)
    print("experiments =", experiments)

    # Use configuration file location as the project location.
    project_dir = os.path.dirname(config.name)
    project_dir = os.path.abspath(project_dir)
    print("project_dir =", project_dir)

    # Load and parse experiment configurations
    noise_experiments = defaultdict(list)
    configs = parse_config(config, experiments, globals(), locals())
    for exp in configs:
        config = configs[exp]
        # Load experiment state and get all the tags
        experiment_path = os.path.join(project_dir, config["path"], exp)
        experiment_state = load_ray_tune_experiment(experiment_path)
        all_experiments = experiment_state["checkpoints"]
        for experiment in all_experiments:
            noise_experiments[exp].append(experiment["experiment_tag"])

    # Plot noise experiments
    for exp in noise_experiments:
        fig, ax = plt.subplots()

        for tag in noise_experiments[exp]:
            # Load experiment results
            experiment_path = os.path.join(project_dir, "results", "noise",
                                           exp, tag)
            if not os.path.exists(experiment_path):
                continue

            experiment_state = load_ray_tune_experiment(experiment_path,
                                                        load_results=True)

            all_experiments = experiment_state["checkpoints"]
            data = {}
            for experiment in all_experiments:
                acc = experiment["results"][0]["mean_accuracy"]
                noise = experiment["config"]["noise"]
                data.setdefault(noise, acc)
            data = OrderedDict(sorted(data.items(), key=lambda i: i[0]))
            ax.plot(list(data.keys()), list(data.values()), label=tag)

        fig.suptitle("Accuracy vs noise")
        ax.set_xlabel("Noise")
        ax.set_ylabel("Accuracy (percent)")
        plt.legend()
        plt.grid(axis="y")
        plot_path = os.path.join(project_dir, "results", "noise",
                                 "{}_noise.pdf".format(exp))
        plt.savefig(plot_path)
        plt.close()
def plot(config, experiments):
  print("config =", config.name)
  print("experiments =", experiments)

  # Use configuration file location as the project location.
  projectDir = os.path.dirname(config.name)
  projectDir = os.path.abspath(projectDir)
  print("projectDir =", projectDir)

  # Load and parse experiment configurations
  noise_experiments = defaultdict(list)
  configs = parse_config(config, experiments, globals(), locals())
  for exp in configs:
    config = configs[exp]
    # Load experiment state and get all the tags
    experiment_path = os.path.join(projectDir, config["path"], exp)
    experiment_state = load_ray_tune_experiment(experiment_path)
    all_experiments = experiment_state["checkpoints"]
    for experiment in all_experiments:
      noise_experiments[exp].append(experiment["experiment_tag"])

  # Plot noise experiments
  for exp in noise_experiments:
    fig, ax = plt.subplots()

    for tag in noise_experiments[exp]:
      # Load experiment results
      experiment_path = os.path.join(projectDir, "results", "noise", exp, tag)
      if not os.path.exists(experiment_path):
        continue

      experiment_state = load_ray_tune_experiment(experiment_path, load_results=True)

      all_experiments = experiment_state["checkpoints"]
      data = {}
      for experiment in all_experiments:
        acc = experiment["results"][0]["mean_accuracy"]
        noise = experiment["config"]["noise"]
        data.setdefault(noise, acc)
      data = OrderedDict(sorted(data.items(), key=lambda i: i[0]))
      ax.plot(list(data.keys()), list(data.values()), label=tag)

    fig.suptitle("Accuracy vs noise")
    ax.set_xlabel("Noise")
    ax.set_ylabel("Accuracy (percent)")
    plt.legend()
    plt.grid(axis='y')
    plot_path = os.path.join(projectDir,  "results", "noise", "{}_noise.pdf".format(exp))
    plt.savefig(plot_path)
    plt.close()
예제 #3
0
def main(config):
  # Use configuration file location as the project location.
  project_dir = os.path.dirname(config.name)
  project_dir = os.path.abspath(project_dir)

  # Plot noisy images
  data_dir = os.path.join(project_dir, "data")
  plotPath = os.path.join(project_dir, "mnist_images_with_noise.pdf")
  plotImagesWithNoise(datadir=data_dir,
                      noise_values=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
                      plotPath=plotPath)

  # Load and parse experiment configurations
  configs = parse_config(config_file=config,
                         experiments=list(EXPERIMENTS.keys()),
                         globals=globals())

  results = {}
  for exp in configs:
    config = configs[exp]

    # Load experiment data
    experiment_path = os.path.join(project_dir, config["path"], exp)
    experiment_state = load_ray_tune_experiment(
      experiment_path=experiment_path, load_results=True)

    # Load noise score from the first checkpoint
    checkpoint = experiment_state["checkpoints"][0]
    logdir = os.path.join(experiment_path, os.path.basename(checkpoint["logdir"]))
    filename = os.path.join(logdir, "noise.json")
    with open(filename, "r") as f:
      results[exp] = json.load(f)

  plotPath = os.path.join(project_dir, "accuracy_vs_noise.pdf")
  plotNoiseCurve(configs=configs, results=results, plotPath=plotPath)
예제 #4
0
def run_noise_test(config):
    """Run noise test on the best scoring model found during training. Make
    sure to train the models before calling this function.

    :param config: The configuration of the pre-trained model.
    :return: dict with noise test results over all experiments
    """
    # Load experiment data
    name = config["name"]
    experiment_path = os.path.join(config["path"], name)
    experiment_state = load_ray_tune_experiment(
        experiment_path=experiment_path, load_results=True)

    # Go through all checkpoints in the experiment
    all_checkpoints = experiment_state["checkpoints"]
    for checkpoint in all_checkpoints:
        results = checkpoint["results"]
        if results is None:
            continue

        # For each checkpoint select the epoch with the best accuracy as the best epoch
        best_result = max(results, key=lambda x: x["mean_accuracy"])
        best_epoch = best_result["training_iteration"]
        best_config = best_result["config"]
        print("best_epoch: ", best_epoch)

        # Update path
        best_config["path"] = config["path"]
        best_config["data_dir"] = config["data_dir"]

        # Load pre-trained model from checkpoint and run noise test on it
        logdir = os.path.join(experiment_path,
                              os.path.basename(checkpoint["logdir"]))
        checkpoint_path = os.path.join(logdir,
                                       "checkpoint_{}".format(best_epoch))
        experiment = SparseSpeechExperiment(best_config)
        experiment.restore(checkpoint_path)

        # Save noise results in checkpoint log dir
        noise_test = os.path.join(logdir, "noise.json")
        with open(noise_test, "w") as f:
            res = experiment.run_noise_tests()
            json.dump(res, f)

        # Compute total noise score
        total_correct = 0
        for k, v in res.items():
            print(k, v, v["total_correct"])
            total_correct += v["total_correct"]
        print("Total across all noise values", total_correct)

    # Upload results to S3
    sync_function = config.get("sync_function", None)
    if sync_function is not None:
        upload_dir = config["upload_dir"]
        final_cmd = sync_function.format(local_dir=experiment_path,
                                         remote_dir=upload_dir)
        subprocess.Popen(final_cmd, shell=True)
예제 #5
0
def main(config, experiments, tablefmt):
  # Use configuration file location as the project location.
  project_dir = os.path.dirname(config.name)
  project_dir = os.path.abspath(project_dir)
  print("project_dir =", project_dir)
  testScoresTable = [["Network", "Test Score", "Noise Score"]]

  # Load and parse experiment configurations
  configs = parse_config(config, experiments, globals=globals())

  # Select tags ignoring seed
  key_func = lambda x: re.split("[,_]", re.sub(",|\\d+_|seed=\\d+", "",
                                               x["experiment_tag"]))

  for exp in configs:
    config = configs[exp]

    # Load experiment data
    experiment_path = os.path.join(project_dir, config["path"], exp)
    experiment_state = load_ray_tune_experiment(
      experiment_path=experiment_path, load_results=True)

    # Go through all checkpoints in the experiment
    all_checkpoints = experiment_state["checkpoints"]

    # Group checkpoints by tags
    checkpoint_groups = {k[0]: list(v) for k, v in groupby(
      sorted(all_checkpoints, key=key_func), key=key_func)}

    for tag in checkpoint_groups:
      checkpoints = checkpoint_groups[tag]
      numExps = len(checkpoints)
      testScores = np.zeros(numExps)
      noiseScores = np.zeros(numExps)

      for i, checkpoint in enumerate(checkpoints):
        results = checkpoint["results"]
        if results is None:
          continue

        # For each checkpoint select the epoch with the best accuracy as the best epoch
        best_result = max(results, key=lambda x: x["mean_accuracy"])
        testScores[i] = best_result["mean_accuracy"] * 100.0

        # Load noise score
        logdir = os.path.join(experiment_path, os.path.basename(checkpoint["logdir"]))
        filename = os.path.join(logdir, "noise.json")
        with open(filename, "r") as f:
          noise = json.load(f)

        noiseScores[i] = sum([x["total_correct"] for x in list(noise.values())])

      test_score = u"{0:.2f} ± {1:.2f}".format(testScores.mean(), testScores.std())
      noise_score = u"{0:,.0f} ± {1:.2f}".format(noiseScores.mean(), noiseScores.std())
      testScoresTable.append(["{} {}".format(exp, tag), test_score, noise_score])

  print()
  print(tabulate(testScoresTable, headers="firstrow", tablefmt=tablefmt))
예제 #6
0
def run_noise_test(config):
  """
  Run noise test on the best scoring model found during training. Make sure to
  train the models before calling this function

  :param config: The configuration of the pre-trained model.
  :return: dict with noise test results over all experiments
  """
  # Load experiment data
  name = config["name"]
  experiment_path = os.path.join(config["path"], name)
  experiment_state = load_ray_tune_experiment(
    experiment_path=experiment_path, load_results=True)

  # Go through all checkpoints in the experiment
  all_checkpoints = experiment_state["checkpoints"]
  for checkpoint in all_checkpoints:
    results = checkpoint["results"]
    if results is None:
      continue

    # For each checkpoint select the epoch with the best accuracy as the best epoch
    best_result = max(results, key=lambda x: x["mean_accuracy"])
    best_epoch = best_result["training_iteration"]
    best_config = best_result["config"]

    # Update path
    best_config["path"] = config["path"]
    best_config["data_dir"] = config["data_dir"]

    # Load pre-trained model from checkpoint and run noise test on it
    logdir = os.path.join(experiment_path, os.path.basename(checkpoint["logdir"]))
    checkpoint_path = os.path.join(logdir, "checkpoint_{}".format(best_epoch))
    experiment = SparseSpeechExperiment(best_config)
    experiment.restore(checkpoint_path)

    # Save noise results in checkpoint log dir
    noise_test = os.path.join(logdir, "noise.json")
    with open(noise_test, "w") as f:
      json.dump(experiment.runNoiseTests(), f)

  # Upload results to S3
  sync_function = config.get("sync_function", None)
  if sync_function is not None:
    upload_dir = config["upload_dir"]
    final_cmd = sync_function.format(local_dir=experiment_path,
                                     remote_dir=upload_dir)
    subprocess.Popen(final_cmd, shell=True)
예제 #7
0
def main(config):
    # Use configuration file location as the project location.
    project_dir = Path(dirname(config.name)).expanduser().resolve()
    data_dir = Path(project_dir) / "data"

    # Load and parse experiment configurations
    configs = parse_config(
        config_file=config,
        experiments=list(EXPERIMENTS.keys()),
        globals_param=globals(),
    )

    results = {}
    for exp in configs:
        config = configs[exp]

        # Load experiment data
        data_dir = Path(config["data_dir"]).expanduser().resolve()
        path = Path(config["path"]).expanduser().resolve()

        experiment_path = path / exp
        experiment_state = load_ray_tune_experiment(
            experiment_path=experiment_path, load_results=True)

        # Load noise score and compute the mean_accuracy over all checkpoints
        exp_df = pd.DataFrame()
        for checkpoint in experiment_state["checkpoints"]:
            logdir = experiment_path / basename(checkpoint["logdir"])
            filename = logdir / "noise.json"
            with open(filename, "r") as f:
                df = pd.DataFrame(json.load(f)).transpose()
                exp_df = exp_df.append(df["mean_accuracy"], ignore_index=True)

        results[exp] = exp_df.mean()

    plot_path = project_dir / "accuracy_vs_noise.pdf"
    plot_noise_curve(configs=configs, results=results, plot_path=plot_path)

    # Plot noisy images
    plot_path = project_dir / "mnist_images_with_noise.pdf"
    plot_images_with_noise(
        datadir=data_dir,
        noise_values=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
        plot_path=plot_path,
    )
예제 #8
0
def run_noise_test(config):
    """Run noise test on the best scoring model found during training. Make
    sure to train the models before calling this function.

    :param config: The configuration of the pre-trained model.
    :return: dict with noise test results over all experiments
    """
    # Load experiment data
    name = config["name"]
    experiment_path = os.path.join(config["path"], name)
    experiment_state = load_ray_tune_experiment(
        experiment_path=experiment_path, load_results=True
    )

    # Go through all checkpoints in the experiment
    all_checkpoints = experiment_state["checkpoints"]
    for checkpoint in all_checkpoints:
        results = checkpoint["results"]
        if results is None:
            continue

        # For each checkpoint select the epoch with the best accuracy as the best epoch
        best_result = max(results, key=lambda x: x["mean_accuracy"])
        best_epoch = best_result["training_iteration"]

        # Load pre-trained model from checkpoint and run noise test on it
        logdir = os.path.join(experiment_path, os.path.basename(checkpoint["logdir"]))
        checkpoint_path = os.path.join(logdir, "checkpoint_{}".format(best_epoch))

        # Get the actual config from the saved version (required for sample
        # or grid search experiments). Replace paths to be the locally correct ones
        filename = os.path.join(logdir, "params.json")
        with open(filename, "r") as f:
            saved_params = json.load(f)
        saved_params["data_dir"] = config["data_dir"]
        saved_params["path"] = config["path"]

        experiment = MNISTSparseExperiment(saved_params)
        experiment.restore(checkpoint_path)

        # Save noise results in checkpoint log dir
        noise_test = os.path.join(logdir, "noise.json")
        with open(noise_test, "w") as f:
            json.dump(experiment.run_noise_tests(), f)
예제 #9
0
def main(config):
    # Use configuration file location as the project location.
    project_dir = os.path.dirname(config.name)
    project_dir = os.path.abspath(project_dir)

    # Plot noisy images
    data_dir = os.path.join(project_dir, "data")
    plot_path = os.path.join(project_dir, "mnist_images_with_noise.pdf")
    plot_images_with_noise(
        datadir=data_dir,
        noise_values=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
        plot_path=plot_path,
    )

    # Load and parse experiment configurations
    configs = parse_config(
        config_file=config,
        experiments=list(EXPERIMENTS.keys()),
        globals_param=globals(),
    )

    results = {}
    for exp in configs:
        config = configs[exp]

        # Load experiment data
        experiment_path = os.path.join(project_dir, config["path"], exp)
        experiment_state = load_ray_tune_experiment(
            experiment_path=experiment_path, load_results=True)

        # Load noise score from the first checkpoint
        checkpoint = experiment_state["checkpoints"][0]
        logdir = os.path.join(experiment_path,
                              os.path.basename(checkpoint["logdir"]))
        filename = os.path.join(logdir, "noise.json")
        with open(filename, "r") as f:
            results[exp] = json.load(f)

    plot_path = os.path.join(project_dir, "accuracy_vs_noise.pdf")
    plot_noise_curve(configs=configs, results=results, plot_path=plot_path)
예제 #10
0
def main(config, experiments, tablefmt):
    # Use configuration file location as the project location.
    project_dir = os.path.dirname(config.name)
    project_dir = os.path.abspath(project_dir)
    print("project_dir =", project_dir)
    test_scores_table = [["Network", "Test Score", "Noise Score"]]

    # Load and parse experiment configurations
    configs = parse_config(config, experiments, globals_param=globals())

    # Select tags ignoring seed
    def key_func(x):
        re.split("[,_]", re.sub(",|\\d+_|seed=\\d+", "", x["experiment_tag"]))

    for exp in configs:
        config = configs[exp]

        # Load experiment data
        experiment_path = os.path.join(project_dir, config["path"], exp)
        experiment_state = load_ray_tune_experiment(
            experiment_path=experiment_path, load_results=True)

        # Go through all checkpoints in the experiment
        all_checkpoints = experiment_state["checkpoints"]

        # Group checkpoints by tags
        checkpoint_groups = {
            k[0]: list(v)
            for k, v in groupby(sorted(all_checkpoints, key=key_func),
                                key=key_func)
        }

        for tag in checkpoint_groups:
            checkpoints = checkpoint_groups[tag]
            num_exps = len(checkpoints)
            test_scores = np.zeros(num_exps)
            noise_scores = np.zeros(num_exps)

            for i, checkpoint in enumerate(checkpoints):
                results = checkpoint["results"]
                if results is None:
                    continue

                # For each checkpoint select the epoch with the best accuracy as
                # the best epoch
                best_result = max(results, key=lambda x: x["mean_accuracy"])
                test_scores[i] = best_result["mean_accuracy"] * 100.0

                # Load noise score
                logdir = os.path.join(experiment_path,
                                      os.path.basename(checkpoint["logdir"]))
                filename = os.path.join(logdir, "noise.json")
                with open(filename, "r") as f:
                    noise = json.load(f)

                noise_scores[i] = sum(x["total_correct"]
                                      for x in list(noise.values()))

            test_score = "{0:.2f} ± {1:.2f}".format(test_scores.mean(),
                                                    test_scores.std())
            noise_score = "{0:,.0f} ± {1:.2f}".format(noise_scores.mean(),
                                                      noise_scores.std())
            test_scores_table.append(
                ["{} {}".format(exp, tag), test_score, noise_score])

    print()
    print(tabulate(test_scores_table, headers="firstrow", tablefmt=tablefmt))
예제 #11
0
def run(config, experiments, num_cpus, num_gpus, redis_address, noise_values):
    print("config =", config.name)
    print("experiments =", experiments)
    print("num_gpus =", num_gpus)
    print("num_cpus =", num_cpus)
    print("redis_address =", redis_address)
    print("noise_values =", noise_values)

    # Use configuration file location as the project location.
    project_dir = os.path.dirname(config.name)
    project_dir = os.path.abspath(project_dir)
    print("project_dir =", project_dir)

    # Download dataset
    data_dir = os.path.join(project_dir, "data")
    datasets.CIFAR10(data_dir, download=True, train=True)

    # Initialize ray cluster
    if redis_address is not None:
        ray.init(redis_address=redis_address, include_webui=True)
    else:
        ray.init(num_cpus=num_cpus,
                 num_gpus=num_gpus,
                 local_mode=num_cpus == 1)

    # Load and parse experiment configurations
    configs = parse_config(config, experiments, globals(), locals())

    # Run all experiments in parallel
    ray_trials = []
    for exp in configs:
        config = configs[exp]

        # noise experiment tune configuration
        noise_config = {
            "iterations": 1,
            "noise": {
                "grid_search": list(noise_values)
            }
        }

        # Download results from S3 when running on the cluster
        if redis_address is not None and "upload_dir" in config:
            upload_dir = config["upload_dir"]
            download_s3_results("{}/{}".format(upload_dir, exp),
                                os.path.join(config["path"], exp))

            # Store noise results with original results in S3
            noise_config["upload_dir"] = "{}".format(upload_dir)
            noise_config[
                "sync_function"] = "aws s3 sync `dirname {local_dir}` {remote_dir}/`basename $(dirname {local_dir})`"  # noqa E501
        else:
            noise_config.pop("upload_dir", None)
            noise_config.pop("sync_function", None)

        # Load experiment results
        experiment_path = os.path.join(project_dir, config["path"], exp)
        experiment_state = load_ray_tune_experiment(experiment_path,
                                                    load_results=True)
        all_experiments = experiment_state["checkpoints"]
        for experiment in all_experiments:
            # Make logs relative to experiment path
            logdir = experiment["logdir"]
            logpath = os.path.join(experiment_path, os.path.basename(logdir))

            # Check for experiment results
            results = experiment["results"]
            if results is None:
                continue

            # Get best scoring model checkpoint from results
            best_result = max(results, key=lambda x: x["mean_accuracy"])

            epoch = best_result["training_iteration"]
            checkpoint_path = os.path.join(logpath,
                                           "checkpoint_{}".format(epoch))
            if os.path.exists(checkpoint_path):
                # Update data path
                model_config = best_result["config"]
                model_config["data_dir"] = data_dir

                # Run noise tests
                noise_config.update({
                    "name":
                    experiment["experiment_tag"],
                    "path":
                    os.path.join(project_dir, "results", "noise", exp),
                    "checkpoint_path":
                    checkpoint_path,
                    "model_config":
                    model_config,
                })

                ray_trials.append(
                    run_experiment.remote(
                        noise_config,
                        MobileNetNoiseTune,
                        num_cpus=1,
                        num_gpus=min(1, num_gpus),
                    ))

    # Wait for all experiments to complete
    ray.get(ray_trials)
    ray.shutdown()
예제 #12
0
def run(config, experiments, num_cpus, num_gpus, redis_address, noise_values):
  print("config =", config.name)
  print("experiments =", experiments)
  print("num_gpus =", num_gpus)
  print("num_cpus =", num_cpus)
  print("redis_address =", redis_address)
  print("noise_values =", noise_values)

  # Use configuration file location as the project location.
  projectDir = os.path.dirname(config.name)
  projectDir = os.path.abspath(projectDir)
  print("projectDir =", projectDir)

  # Download dataset
  data_dir = os.path.join(projectDir, "data")
  datasets.CIFAR10(data_dir, download=True, train=True)

  # Initialize ray cluster
  if redis_address is not None:
    ray.init(redis_address=redis_address, include_webui=True)
  else:
    ray.init(num_cpus=num_cpus, num_gpus=num_gpus, local_mode=num_cpus == 1)

  # Load and parse experiment configurations
  configs = parse_config(config, experiments, globals(), locals())

  # Run all experiments in parallel
  ray_trials = []
  for exp in configs:
    config = configs[exp]

    # noise experiment tune configuration
    noise_config = {
      "iterations": 1,
      "noise": {"grid_search": list(noise_values)}
    }

    # Download results from S3 when running on the cluster
    if redis_address is not None and "upload_dir" in config:
      upload_dir = config["upload_dir"]
      download_s3_results("{}/{}".format(upload_dir, exp),
                          os.path.join(config["path"], exp))

      # Store noise results with original results in S3
      noise_config["upload_dir"] = "{}".format(upload_dir)
      noise_config["sync_function"] = "aws s3 sync `dirname {local_dir}` {remote_dir}/`basename $(dirname {local_dir})`"
    else:
      noise_config.pop("upload_dir", None)
      noise_config.pop("sync_function", None)

    # Load experiment results
    experiment_path = os.path.join(projectDir, config["path"], exp)
    experiment_state = load_ray_tune_experiment(experiment_path,
                                                load_results=True)
    all_experiments = experiment_state["checkpoints"]
    for experiment in all_experiments:
      # Make logs relative to experiment path
      logdir = experiment["logdir"]
      logpath = os.path.join(experiment_path, os.path.basename(logdir))

      # Check for experiment results
      results = experiment["results"]
      if results is None:
        continue

      # Get best scoring model checkpoint from results
      best_result = max(results, key=lambda x: x["mean_accuracy"])

      epoch = best_result["training_iteration"]
      checkpoint_path = os.path.join(logpath, "checkpoint_{}".format(epoch))
      if os.path.exists(checkpoint_path):
        # Update data path
        model_config = best_result["config"]
        model_config["data_dir"] = data_dir

        # Run noise tests
        noise_config.update({
          "name": experiment["experiment_tag"],
          "path": os.path.join(projectDir, "results", "noise", exp),
          "checkpoint_path": checkpoint_path,
          "model_config": model_config
        })

        ray_trials.append(
          run_experiment.remote(noise_config, MobileNetNoiseTune,
                                num_cpus=1, num_gpus=min(1, num_gpus)))

  # Wait for all experiments to complete
  ray.get(ray_trials)
  ray.shutdown()