def check_result_exists(params, run): run_number = get_trailing_num(run) if not os.path.isfile(get_result_file(params, params.run_prefix, run_number)): parsed_call = params.parse_script.format( task_dir=params.task_dir, task_name=params.task_name, run_number=get_trailing_num(run) ) print(run, " called ", parsed_call) parsed_call = shlex.split(parsed_call) parsed_call[0] = os.path.expanduser(parsed_call[0]) call(parsed_call) else: print(run, " OK")
def add_run_to_db(params, run): resfilename = get_result_file(params, run) run_number = get_trailing_num(run) try: parsed_date = os.path.getmtime(resfilename) except OSError: print("file {} not found; skipping".format(resfilename)) return with open(resfilename, 'r') as res: # make sure table is compatible with run data by inserting any new columns # always called "run" in table (converted from whatever prefix users use) result_params = ["run", "parsed_date"] result_params.extend(res.readline().split('\t')) if result_params[-1] == '\n': result_params.pop() result_params = ["".join(('\"',p.strip(),'\"')) for p in result_params] pre_sample_pos = res.tell() result_params_sample = res.readline().split('\t') # go back to presample location for normal line iteration res.seek(pre_sample_pos, os.SEEK_SET) # add new column to table for c in range(len(result_params)): if result_params[c] not in params.tracked_columns: print("ADDING {} as new column".format(result_params[c])) add_column_to_table(params, db, result_params[c], result_params_sample[c-2]) # -2 accounts for run and parsed date # add value rows rows_to_add = [] for line in res: # run number and parsed_date are always recorded result_params_val = [run_number, parsed_date] result_params_val.extend(line.split('\t')) if result_params_val[-1] == '\n': result_params_val.pop() # something must be wrong here if len(result_params_val) > len(result_params): print("There are {} values for only {} parameters in run {}; \ skipping run".format(len(result_params_val), len(result_params), run_number)) # skip this run return # for when the last column value is the empty string while len(result_params_val) < len(result_params): result_params_val.append('') rows_to_add.append(tuple(convert_strictest(param) if param != nullval else None for param in result_params_val)) param_placeholders = ("?,"*len(result_params)).rstrip(',') # specify columns to insert into since ordering of columns in file may not match table insert_rows_command = "INSERT OR IGNORE INTO {} ({}) VALUES ({})".format( params.task_table_name, ','.join(result_params), param_placeholders) cursor = db.cursor() cursor.executemany(insert_rows_command, rows_to_add)
def check_runs_match_table(runs): natural_sort(runs) highest_run = get_trailing_num(runs[-1]) cursor = db.cursor() cursor.execute("SELECT MAX({}) FROM {}".format(params.run_prefix, params.task_table_name)) row = cursor.fetchone() if row[0]: if highest_run < row[0]: print("stored run ({}) is higher than existing run ({}); \ consider running with --clean to remake task table".format(row[0], highest_run))
def check_result_exists(params, run): if not os.path.isfile(get_result_file(params, run)): parsed_call = params.parse_script.format( task_dir = params.task_dir, task_name = params.task_name, run_number = get_trailing_num(run)) print(run, " called ", parsed_call) parsed_call = shlex.split(parsed_call) parsed_call[0] = os.path.expanduser(parsed_call[0]) call(parsed_call) else: print(run, " OK")
def plot_results(param_names, param_options, results, params): """Create a directory based on key parameters and date and plot results vs iteration Each of the parameter in results will receive its own plot, drawn by matplotlib""" # circuit/run_num where run_num is one before the existing one directory = params.circuit if not os.path.isdir(directory): os.mkdir(directory) runs = immediate_subdir(directory) latest_run = 0 if runs: natural_sort(runs) latest_run = get_trailing_num(runs[-1]) directory = os.path.join(directory, "run" + str(latest_run + 1)) print(directory) if not os.path.isdir(directory): os.mkdir(directory) with Chdir(directory): export_results_to_csv(param_names, results, params) x = results.keys() y = [] next_figure = True p = 0 plt.figure() while p < len(param_names): print(param_names[p]) if param_options[p]: nf = True for option in param_options[p].split(): # stopping has veto power (must all be True to pass) nf = nf and plot_options(option) next_figure = nf if not next_figure: # y becomes list of lists (for use with stackable plots) y.append([result[p] for result in results.values()]) p += 1 continue elif not y: y = [result[p] for result in results.values()] lx = x[-1] ly = y[-1] plot_method(x, y) plt.xlabel("iteration") plt.xlim(xmin=0) plt.ylabel(param_names[p]) # annotate the last value annotate_last(lx, ly) if next_figure: plt.savefig(param_names[p]) plt.figure() p += 1 # in case the last figure hasn't been shuffled onto file yet if not next_figure: plot_method(x, y) plt.savefig(param_names[-1])
def add_run_to_db(params, run): resfilename = get_result_file(params, run) run_number = get_trailing_num(run) try: parsed_date = os.path.getmtime(resfilename) # throw away unless newer than latest or run number greater than maximum if parsed_date <= params.last_parsed_date and run_number <= params.last_run: return except OSError: print("file {} not found; skipping".format(resfilename)) return params.last_run += 1 print("run {} added ({}) ({})".format(run_number, params.last_run, parsed_date)) with open(resfilename, 'rb') as res: # make sure table is compatible with run data by inserting any new columns # always called "run" in table (converted from whatever prefix users use) csvreader = csv.reader(res, delimiter=params.delimiter) result_params = ["run", "parsed_date"] result_params.extend(csvreader.next()) empty_end = False if not result_params[-1]: # empty or None empty_end = True result_params.pop() result_params = ["".join(('\"',p.strip(),'\"')) for p in result_params] print(result_params) result_params_sample = [params.last_run, parsed_date] result_params_sample.extend(csvreader.next()) if empty_end: result_params_sample.pop() while len(result_params_sample) < len(result_params): result_params_sample.append('') # add new column to table for c in range(len(result_params)): if result_params[c] not in params.tracked_columns: print("ADDING {} as new column".format(result_params[c])) add_column_to_table(params, db, result_params[c], result_params_sample[c]) # add value rows rows_to_add = [tuple(convert_strictest(param) if param != nullval else None for param in result_params_sample)] for line in csvreader: # run number and parsed_date are always recorded result_params_val = [params.last_run, parsed_date] result_params_val.extend(line) if empty_end: result_params_val.pop() # something must be wrong here if len(result_params_val) > len(result_params): print("There are {} values for only {} parameters in run {}; \ skipping run".format(len(result_params_val), len(result_params), run_number)) # skip this run params.last_run -= 1 return # for padding when columns have unequal depth while len(result_params_val) < len(result_params): result_params_val.append('') rows_to_add.append(tuple(convert_strictest(param) if param != nullval else None for param in result_params_val)) print("rows to add") print(rows_to_add) param_placeholders = ("?,"*len(result_params)).rstrip(',') # specify columns to insert into since ordering of columns in file may not match table insert_rows_command = "INSERT OR IGNORE INTO {} ({}) VALUES ({})".format( params.task_table_name, ','.join(result_params), param_placeholders) cursor = db.cursor() cursor.executemany(insert_rows_command, rows_to_add)
def plot_results(param_names, param_options, results, params): """Create a directory based on key parameters and date and plot results vs iteration Each of the parameter in results will receive its own plot, drawn by matplotlib""" # circuit/run_num where run_num is one before the existing one directory = params.circuit if not os.path.isdir(directory): os.mkdir(directory) runs = immediate_subdir(directory) latest_run = 0 if runs: natural_sort(runs) latest_run = get_trailing_num(runs[-1]) directory = os.path.join(directory, "run" + str(latest_run+1)) print(directory) if not os.path.isdir(directory): os.mkdir(directory) with Chdir(directory): export_results_to_csv(param_names, results, params) x = results.keys() y = [] next_figure = True p = 0 plt.figure() while p < len(param_names): print(param_names[p]) if param_options[p]: nf = True for option in param_options[p].split(): # stopping has veto power (must all be True to pass) nf = nf and plot_options(option) next_figure = nf if not next_figure: # y becomes list of lists (for use with stackable plots) y.append([result[p] for result in results.values()]) p += 1 continue elif not y: y = [result[p] for result in results.values()] lx = x[-1] ly = y[-1] plot_method(x,y) plt.xlabel('iteration') plt.xlim(xmin=0) plt.ylabel(param_names[p]) # annotate the last value annotate_last(lx,ly) if next_figure: plt.savefig(param_names[p]) plt.figure() p += 1 # in case the last figure hasn't been shuffled onto file yet if not next_figure: plot_method(x,y) plt.savefig(param_names[-1])