def check_runs_match_table(runs): natural_sort(runs) highest_run = get_trailing_num(runs[-1]) cursor = db.cursor() cursor.execute("SELECT MAX({}) FROM {}".format(params.run_prefix, params.task_table_name)) row = cursor.fetchone() if row[0]: if highest_run < row[0]: print("stored run ({}) is higher than existing run ({}); \ consider running with --clean to remake task table".format(row[0], highest_run))
def check_last_runs_table(runs): natural_sort(runs) cursor = db.cursor() cursor.execute("SELECT MAX({}), MAX({}) FROM {}".format("parsed_date", "run", params.task_table_name)) row = cursor.fetchone() if row[0]: # storing in database truncates decimal, so add 1 second last_parsed_date = row[0] + 1 last_run = row[1] print("last parsed date", last_parsed_date) print("last run", last_run) params.last_parsed_date = last_parsed_date params.last_run = last_run else: print("first population") params.last_run = 0
def initialize(self, opt): self.opt = opt label_paths, image_paths, instance_paths = self.get_paths(opt) util.natural_sort(label_paths) util.natural_sort(image_paths) if not opt.no_instance: util.natural_sort(instance_paths) label_paths = label_paths[:opt.max_dataset_size] image_paths = image_paths[:opt.max_dataset_size] instance_paths = instance_paths[:opt.max_dataset_size] if not opt.no_pairing_check: for path1, path2 in zip(label_paths, image_paths): assert self.paths_match(path1, path2), \ "The label-image pair (%s, %s) do not look like the right pair because the filenames are quite different. Are you sure about the pairing? Please see data/pix2pix_dataset.py to see what is going on, and use --no_pairing_check to bypass this." % (path1, path2) self.label_paths = label_paths self.image_paths = image_paths self.instance_paths = instance_paths size = len(self.label_paths) self.dataset_size = size
def load_history(directory, remote_dir=False): """Load the history of a training process.""" if remote_dir: path = Path(REMOTE) / directory else: path = Path(MODEL_PATH) / directory files = os.listdir(path) result_files = [x for x in files if x[-13:] == "_training.txt"] result_files = natural_sort(result_files) results = [] for x in result_files: with open(path / x, "r") as file: results.append(json.load(file)) return results
def load_configs(directory, remote_dir=False): """Load the configurations of an ablation study for plotting the results.""" if remote_dir: path = Path(REMOTE) / directory else: path = Path(MODEL_PATH) / directory files = os.listdir(path) result_files = [x for x in files if x[-12:] == "_config.json"] result_files = natural_sort(result_files) results = [] for x in result_files: with open(path / x, "r") as file: results.append(json.load(file)) return results
def load_evaluation_results(directory, remote_dir=False): """Load the evaluation results of the test data of a final model. The files should be named "..{i}_test_metrics.txt" with i being the i-th repetition of model training (final models are trained 5 times to report average and variance of the evaluation results, hyper-parameter studies are done with 3 repetitions).""" if remote_dir: path = Path(REMOTE) / directory else: path = Path(MODEL_PATH) / directory files = os.listdir(path) result_files = [x for x in files if x[-17:] == "_test_metrics.txt"] result_files = natural_sort(result_files) results = [] for x in result_files: with open(path / x, "r") as file: results.append(json.load(file)) history_files = [x for x in files if x[-13:] == "_training.txt"] histories = [] for x in history_files: with open(path / x, "r") as file: data = np.array(json.load(file)['Hits@1']) histories.append(np.where(data == max(data))[0][0]) print(f"Avg first epoch with Max Hits@1: {np.average(histories)}") return results
def plot_results(param_names, param_options, results, params): """Create a directory based on key parameters and date and plot results vs iteration Each of the parameter in results will receive its own plot, drawn by matplotlib""" # circuit/run_num where run_num is one before the existing one directory = params.circuit if not os.path.isdir(directory): os.mkdir(directory) runs = immediate_subdir(directory) latest_run = 0 if runs: natural_sort(runs) latest_run = get_trailing_num(runs[-1]) directory = os.path.join(directory, "run" + str(latest_run + 1)) print(directory) if not os.path.isdir(directory): os.mkdir(directory) with Chdir(directory): export_results_to_csv(param_names, results, params) x = results.keys() y = [] next_figure = True p = 0 plt.figure() while p < len(param_names): print(param_names[p]) if param_options[p]: nf = True for option in param_options[p].split(): # stopping has veto power (must all be True to pass) nf = nf and plot_options(option) next_figure = nf if not next_figure: # y becomes list of lists (for use with stackable plots) y.append([result[p] for result in results.values()]) p += 1 continue elif not y: y = [result[p] for result in results.values()] lx = x[-1] ly = y[-1] plot_method(x, y) plt.xlabel("iteration") plt.xlim(xmin=0) plt.ylabel(param_names[p]) # annotate the last value annotate_last(lx, ly) if next_figure: plt.savefig(param_names[p]) plt.figure() p += 1 # in case the last figure hasn't been shuffled onto file yet if not next_figure: plot_method(x, y) plt.savefig(param_names[-1])
def test_natural_sort(): l = ['11', '1', '12', '2', '13', '3'] util.natural_sort(l) assert l == ['1', '2', '3', '11', '12', '13']
def create_table(params, db): runs = get_runs(params) # select how to and which runs to use for a certain range natural_sort(runs) if len(runs) < 1: print("No runs in this directory, cannot create table"); sys.exit(2) # creates table schema based on the result file of the first run with open(get_result_file(params, runs[0]), 'rb') as res: csvreader = csv.reader(res, delimiter=params.delimiter) result_params = csvreader.next() result_params_sample = csvreader.next() # empty end if not result_params[-1]: result_params.pop() result_params_sample.pop() while len(result_params_sample) < len(result_params): result_params_sample.append('') result_params = [param.strip() for param in result_params] # check if table name already exists cursor = db.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='{}'".format(params.task_table_name.strip("[]"))) if cursor.fetchone(): return # table identifiers cannot be parameterized, so have to use string concatenation create_table_command = '''CREATE TABLE IF NOT EXISTS {}('''.format(params.task_table_name) # include one item for each result_params create_table_command += "{} INTEGER, ".format(params.run_prefix) # time is in seconds since epoch create_table_command += "parsed_date REAL, " for p in range(len(result_params)): p_schema = "".join(('\"',result_params[p].strip(),'\" ', type_map.get(type(convert_strictest(result_params_sample[p])), "TEXT"))) p_schema += ", " create_table_command += p_schema # treat with unique keys primary_keys = "PRIMARY KEY({},".format(params.run_prefix) # run always considered primary key for primary_key in params.key_params: if primary_key not in result_params: print("{} does not exist in result file of run 1".format(primary_key)) sys.exit(3) primary_keys += "".join(('\"', primary_key, '\"')) primary_keys += ',' primary_keys = primary_keys[:-1] # remove trailing , primary_keys += ')' create_table_command += primary_keys create_table_command += ')' print(create_table_command) cursor = db.cursor() cursor.execute(create_table_command) # reinitialize tracked columns for newly created table initialize_tracked_columns(params, db)
def plot_results(param_names, param_options, results, params): """Create a directory based on key parameters and date and plot results vs iteration Each of the parameter in results will receive its own plot, drawn by matplotlib""" # circuit/run_num where run_num is one before the existing one directory = params.circuit if not os.path.isdir(directory): os.mkdir(directory) runs = immediate_subdir(directory) latest_run = 0 if runs: natural_sort(runs) latest_run = get_trailing_num(runs[-1]) directory = os.path.join(directory, "run" + str(latest_run+1)) print(directory) if not os.path.isdir(directory): os.mkdir(directory) with Chdir(directory): export_results_to_csv(param_names, results, params) x = results.keys() y = [] next_figure = True p = 0 plt.figure() while p < len(param_names): print(param_names[p]) if param_options[p]: nf = True for option in param_options[p].split(): # stopping has veto power (must all be True to pass) nf = nf and plot_options(option) next_figure = nf if not next_figure: # y becomes list of lists (for use with stackable plots) y.append([result[p] for result in results.values()]) p += 1 continue elif not y: y = [result[p] for result in results.values()] lx = x[-1] ly = y[-1] plot_method(x,y) plt.xlabel('iteration') plt.xlim(xmin=0) plt.ylabel(param_names[p]) # annotate the last value annotate_last(lx,ly) if next_figure: plt.savefig(param_names[p]) plt.figure() p += 1 # in case the last figure hasn't been shuffled onto file yet if not next_figure: plot_method(x,y) plt.savefig(param_names[-1])