def process_artifact_paths(artifact_paths): main_connector = dbutils.connect_to_main_database() table = "artifact" batch_size = 100 insert_count = 0 no_measurements_count = 0 skip_count = 0 bar = progressbar.ProgressBar(max_value=len(artifact_paths)) sql_statement = "" last_index = len(artifact_paths) - 1 for index, artifact_path in enumerate(artifact_paths): bar.update(index) # Check if there is already an entry. basename = os.path.basename(artifact_path) sql_statement_select = dbutils.create_select_statement( "artifact", ["id"], [basename]) # TODO is this the proper id? results = main_connector.execute(sql_statement_select, fetch_all=True) # No results found. Insert. if len(results) == 0: insert_data = {} insert_data["id"] = basename # TODO proper? # Process the artifact. default_values = get_default_values(artifact_path, table, main_connector) if default_values != None: insert_data.update(default_values) sql_statement += dbutils.create_insert_statement( table, insert_data.keys(), insert_data.values()) insert_count += 1 else: no_measurements_count += 1 # Found a result. Update. elif len(results) != 0: skip_count += 1 # Update database. if index != 0 and ( (index % batch_size) == 0) or index == last_index: if sql_statement != "": result = main_connector.execute(sql_statement) sql_statement = "" bar.finish() print("Inserted {} new entries.".format(insert_count)) print("No measurements for {} entries.".format(no_measurements_count)) print("Skipped {} entries.".format(skip_count))
def update_media_table(file_paths, table, get_values, batch_size=1000): insert_count = 0 no_measurements_count = 0 skip_count = 0 bar = progressbar.ProgressBar(max_value=len(file_paths)) sql_statement = "" last_index = len(file_paths) - 1 for index, file_path in enumerate(file_paths): bar.update(index) # Check if there is already an entry. path = os.path.basename(file_path) sql_statement_select = dbutils.create_select_statement( table, ["path"], [file_path]) results = main_connector.execute(sql_statement_select, fetch_all=True) # No results found. Insert. if len(results) == 0: insert_data = {"path": path} default_values = get_default_values(file_path, table) if default_values != None: insert_data.update(default_values) insert_data.update(get_values(file_path)) sql_statement += dbutils.create_insert_statement( table, insert_data.keys(), insert_data.values()) insert_count += 1 else: no_measurements_count += 1 # Found a result. Update. elif len(results) != 0: # TODO check if measurement id is missing or not skip_count += 1 # Update database. if index != 0 and ((index % batch_size) == 0) or index == last_index: if sql_statement != "": #print("") #print(sql_statement) #print("") result = main_connector.execute(sql_statement) sql_statement = "" bar.finish() print("Inserted {} new entries.".format(insert_count)) print("No measurements for {} entries.".format(no_measurements_count)) print("Skipped {} entries.".format(skip_count))
def get_default_values(path, table): # Split and check the path. path_split = path.split("/") assert path_split[1] == whhdata_path[1:] assert path_split[2] == media_subpath # Get important values from path. qrcode = path_split[3] timestamp = path_split[-1].split("_")[-3] # Getting timestamp. last_updated, _ = get_last_updated() # Get id of measurement. threshold = int(60 * 60 * 24 * 1000) sql_statement = dbutils.create_select_statement("measurements", ["qrcode"], [qrcode]) sql_statement = "" sql_statement += "SELECT id" sql_statement += " FROM measurements WHERE" sql_statement += " qrcode = '{}'".format(qrcode) sql_statement += " AND type = 'manual'" sql_statement += " AND ABS(timestamp - {}) < {}".format( timestamp, threshold) sql_statement += ";" results = main_connector.execute(sql_statement, fetch_all=True) # Prepare values. values = {} values["path"] = path values["qrcode"] = qrcode values["last_updated"] = last_updated values["rejected_by_expert"] = False # Measurement id not found. if len(results) == 0: print("No measurement_id found for {}".format(path)) # Found a measurement id. else: values["measurement_id"] = results[0][0] return values
#main_connector.clear_table(table) sql_statement = "SELECT qrcode FROM measurements WHERE qrcode != 'NaN';" results = main_connector.execute(sql_statement, fetch_all=True) results = [result[0] for result in results] print(sorted(list(set(results)))) exit(0) insert_data = {} insert_data["path"] = "somepath" insert_data["qrcode"] = "someqrcode" insert_data["targets"] = "10, 20" insert_data["last_updated"] = str(datetime.datetime.now()) insert_data["rejected_by_expert"] = False insert_data["had_error"] = False insert_data["error_message"] = "" insert_data["width_px"] = 128 insert_data["height_px"] = 127 insert_data["blur_variance"] = 1.0 sql_statement = dbutils.create_insert_statement(table, insert_data.keys(), insert_data.values()) print(sql_statement) results = main_connector.execute(sql_statement) print(results) sql_statement = dbutils.create_select_statement(table, ["path"], ["somepath"]) print(sql_statement) results = main_connector.execute(sql_statement, fetch_all=True) print(len(results))
def process_person_paths(person_paths, process_index): #person_paths = person_paths[0:4] # TODO remove this! # Go through each person (qr-code). for person_path in tqdm(person_paths, position=process_index): person_path = person_path.replace('localssd/', 'localssd2/') print(person_path) # Find all artifacts for that person. artifact_paths = [] for file_extension in file_extensions: print(file_extension) glob_search_path = os.path.join( person_path, "**/*.{}".format(file_extension)) #print (glob_search_path) artifact_paths.extend(glob.glob(glob_search_path)) # print(artifact_paths) print("Found {} artifacts in {}".format(len(artifact_paths), person_path)) # Process those artifacts. main_connector = dbutils.connect_to_main_database() table = "artifact" batch_size = 100 insert_count = 0 no_measurements_count = 0 skip_count = 0 sql_statement = "" last_index = len(artifact_paths) - 1 for artifact_index, artifact_path in enumerate(artifact_paths): # Check if there is already an entry in the database. basename = os.path.basename(artifact_path) sql_statement_select = dbutils.create_select_statement( "artifact", ["id"], [basename]) results = main_connector.execute(sql_statement_select, fetch_all=True) # No results found. Insert. if len(results) == 0: insert_data = {} insert_data["id"] = basename # TODO proper? # Get the default values for the artifact. default_values = get_default_values( artifact_path, table, main_connector) # Check if there is a measure_id. if "measure_id" in default_values.keys(): insert_count += 1 else: no_measurements_count += 1 # Create SQL statement. insert_data.update(default_values) sql_statement_for_artifact = dbutils.create_insert_statement( table, insert_data.keys(), insert_data.values()) sql_statement += sql_statement_for_artifact # Found a result. Update. elif len(results) != 0: skip_count += 1 # Update database. if artifact_index != 0 and ( (artifact_index % batch_size) == 0) or artifact_index == last_index: if sql_statement != "": result = main_connector.execute(sql_statement) sql_statement = "" # Return statistics. return (insert_count, no_measurements_count, skip_count)