Example #1
0
    def check_runs_match_table(runs):
        natural_sort(runs)
        highest_run = get_trailing_num(runs[-1])
        cursor = db.cursor()
        cursor.execute("SELECT MAX({}) FROM {}".format(params.run_prefix, params.task_table_name))
        row = cursor.fetchone()
        if row[0]:
            if highest_run < row[0]:
                print("stored run ({}) is higher than existing run ({}); \
consider running with --clean to remake task table".format(row[0], highest_run))
Example #2
0
 def check_last_runs_table(runs):
     natural_sort(runs)
     cursor = db.cursor()
     cursor.execute("SELECT MAX({}), MAX({}) FROM {}".format("parsed_date", "run", params.task_table_name))
     row = cursor.fetchone()
     if row[0]:
         # storing in database truncates decimal, so add 1 second
         last_parsed_date = row[0] + 1
         last_run = row[1]
         print("last parsed date", last_parsed_date)
         print("last run", last_run)
         params.last_parsed_date = last_parsed_date
         params.last_run = last_run
     else:
         print("first population")
         params.last_run = 0
Example #3
0
    def initialize(self, opt):
        self.opt = opt

        label_paths, image_paths, instance_paths = self.get_paths(opt)

        util.natural_sort(label_paths)
        util.natural_sort(image_paths)
        if not opt.no_instance:
            util.natural_sort(instance_paths)

        label_paths = label_paths[:opt.max_dataset_size]
        image_paths = image_paths[:opt.max_dataset_size]
        instance_paths = instance_paths[:opt.max_dataset_size]

        if not opt.no_pairing_check:
            for path1, path2 in zip(label_paths, image_paths):
                assert self.paths_match(path1, path2), \
                    "The label-image pair (%s, %s) do not look like the right pair because the filenames are quite different. Are you sure about the pairing? Please see data/pix2pix_dataset.py to see what is going on, and use --no_pairing_check to bypass this." % (path1, path2)

        self.label_paths = label_paths
        self.image_paths = image_paths
        self.instance_paths = instance_paths

        size = len(self.label_paths)
        self.dataset_size = size
Example #4
0
def load_history(directory, remote_dir=False):
    """Load the history of a training process."""
    if remote_dir:
        path = Path(REMOTE) / directory
    else:
        path = Path(MODEL_PATH) / directory
    files = os.listdir(path)
    result_files = [x for x in files if x[-13:] == "_training.txt"]
    result_files = natural_sort(result_files)
    results = []
    for x in result_files:
        with open(path / x, "r") as file:
            results.append(json.load(file))
    return results
Example #5
0
def load_configs(directory, remote_dir=False):
    """Load the configurations of an ablation study for plotting the results."""
    if remote_dir:
        path = Path(REMOTE) / directory
    else:
        path = Path(MODEL_PATH) / directory
    files = os.listdir(path)
    result_files = [x for x in files if x[-12:] == "_config.json"]
    result_files = natural_sort(result_files)
    results = []
    for x in result_files:
        with open(path / x, "r") as file:
            results.append(json.load(file))
    return results
Example #6
0
def load_evaluation_results(directory, remote_dir=False):
    """Load the evaluation results of the test data of a final model. The files should be named "..{i}_test_metrics.txt"
    with i being the i-th repetition of model training (final models are trained 5 times to report average and variance
    of the evaluation results, hyper-parameter studies are done with 3 repetitions)."""
    if remote_dir:
        path = Path(REMOTE) / directory
    else:
        path = Path(MODEL_PATH) / directory
    files = os.listdir(path)
    result_files = [x for x in files if x[-17:] == "_test_metrics.txt"]
    result_files = natural_sort(result_files)
    results = []
    for x in result_files:
        with open(path / x, "r") as file:
            results.append(json.load(file))
    history_files = [x for x in files if x[-13:] == "_training.txt"]
    histories = []
    for x in history_files:
        with open(path / x, "r") as file:
            data = np.array(json.load(file)['Hits@1'])
            histories.append(np.where(data == max(data))[0][0])
    print(f"Avg first epoch with Max Hits@1: {np.average(histories)}")
    return results
Example #7
0
def plot_results(param_names, param_options, results, params):
    """Create a directory based on key parameters and date and plot results vs iteration

    Each of the parameter in results will receive its own plot, drawn by matplotlib"""

    # circuit/run_num where run_num is one before the existing one
    directory = params.circuit
    if not os.path.isdir(directory):
        os.mkdir(directory)
    runs = immediate_subdir(directory)
    latest_run = 0
    if runs:
        natural_sort(runs)
        latest_run = get_trailing_num(runs[-1])
    directory = os.path.join(directory, "run" + str(latest_run + 1))

    print(directory)
    if not os.path.isdir(directory):
        os.mkdir(directory)

    with Chdir(directory):

        export_results_to_csv(param_names, results, params)

        x = results.keys()
        y = []
        next_figure = True

        p = 0
        plt.figure()
        while p < len(param_names):
            print(param_names[p])

            if param_options[p]:
                nf = True
                for option in param_options[p].split():
                    # stopping has veto power (must all be True to pass)
                    nf = nf and plot_options(option)
                next_figure = nf

            if not next_figure:
                # y becomes list of lists (for use with stackable plots)
                y.append([result[p] for result in results.values()])
                p += 1
                continue
            elif not y:
                y = [result[p] for result in results.values()]

            lx = x[-1]
            ly = y[-1]
            plot_method(x, y)
            plt.xlabel("iteration")
            plt.xlim(xmin=0)

            plt.ylabel(param_names[p])

            # annotate the last value
            annotate_last(lx, ly)

            if next_figure:
                plt.savefig(param_names[p])
                plt.figure()

            p += 1
        # in case the last figure hasn't been shuffled onto file yet
        if not next_figure:
            plot_method(x, y)
            plt.savefig(param_names[-1])
Example #8
0
def test_natural_sort():
    l = ['11', '1', '12', '2', '13', '3']
    util.natural_sort(l)
    assert l == ['1', '2', '3', '11', '12', '13']
Example #9
0
def create_table(params, db):
    runs = get_runs(params)
    # select how to and which runs to use for a certain range
    natural_sort(runs)

    if len(runs) < 1:
        print("No runs in this directory, cannot create table");
        sys.exit(2)

    # creates table schema based on the result file of the first run
    with open(get_result_file(params, runs[0]), 'rb') as res:
        csvreader = csv.reader(res, delimiter=params.delimiter)
        result_params = csvreader.next()
        result_params_sample = csvreader.next()
        # empty end
        if not result_params[-1]:
            result_params.pop()
            result_params_sample.pop()
        while len(result_params_sample) < len(result_params):
            result_params_sample.append('')
            
        result_params = [param.strip() for param in result_params]


        # check if table name already exists
        cursor = db.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='{}'".format(params.task_table_name.strip("[]")))
        if cursor.fetchone():
            return


        # table identifiers cannot be parameterized, so have to use string concatenation
        create_table_command = '''CREATE TABLE IF NOT EXISTS {}('''.format(params.task_table_name)

        # include one item for each result_params
        create_table_command += "{} INTEGER, ".format(params.run_prefix)
        # time is in seconds since epoch
        create_table_command += "parsed_date REAL, "
        for p in range(len(result_params)):
            p_schema = "".join(('\"',result_params[p].strip(),'\" ', type_map.get(type(convert_strictest(result_params_sample[p])), "TEXT")))
            p_schema += ", "
            create_table_command += p_schema

        # treat with unique keys
        primary_keys = "PRIMARY KEY({},".format(params.run_prefix) # run always considered primary key
        for primary_key in params.key_params:
            if primary_key not in result_params:
                print("{} does not exist in result file of run 1".format(primary_key))
                sys.exit(3)
            primary_keys += "".join(('\"', primary_key, '\"'))
            primary_keys += ','
        primary_keys = primary_keys[:-1]  # remove trailing ,
        primary_keys += ')'
        create_table_command += primary_keys

        create_table_command += ')'

        print(create_table_command)
        cursor = db.cursor()
        cursor.execute(create_table_command)

        # reinitialize tracked columns for newly created table
        initialize_tracked_columns(params, db)
def plot_results(param_names, param_options, results, params):
	"""Create a directory based on key parameters and date and plot results vs iteration

	Each of the parameter in results will receive its own plot, drawn by matplotlib"""

	# circuit/run_num where run_num is one before the existing one
	directory = params.circuit
	if not os.path.isdir(directory):
		os.mkdir(directory)
	runs = immediate_subdir(directory)
	latest_run = 0
	if runs:
		natural_sort(runs)
		latest_run = get_trailing_num(runs[-1])
	directory = os.path.join(directory, "run" + str(latest_run+1))

	print(directory)
	if not os.path.isdir(directory):
		os.mkdir(directory)

	with Chdir(directory):

		export_results_to_csv(param_names, results, params)

		x = results.keys()
		y = []
		next_figure = True

		p = 0
		plt.figure()
		while p < len(param_names):
			print(param_names[p])

			if param_options[p]:
				nf = True
				for option in param_options[p].split():
					# stopping has veto power (must all be True to pass)
					nf = nf and plot_options(option)
				next_figure = nf

			if not next_figure:
				# y becomes list of lists (for use with stackable plots)
				y.append([result[p] for result in results.values()])
				p += 1
				continue
			elif not y:
				y = [result[p] for result in results.values()]

			lx = x[-1]
			ly = y[-1]
			plot_method(x,y)
			plt.xlabel('iteration')
			plt.xlim(xmin=0)
			
			plt.ylabel(param_names[p])

			# annotate the last value
			annotate_last(lx,ly)

			if next_figure:
				plt.savefig(param_names[p])
				plt.figure()

			p += 1
		# in case the last figure hasn't been shuffled onto file yet
		if not next_figure:
			plot_method(x,y)
			plt.savefig(param_names[-1])
Example #11
0
def test_natural_sort():
    l = ['11', '1', '12', '2', '13', '3']
    util.natural_sort(l)
    assert l == ['1', '2', '3', '11', '12', '13']