def process_artifact_paths(artifact_paths): main_connector = dbutils.connect_to_main_database() table = "artifact" batch_size = 100 insert_count = 0 no_measurements_count = 0 skip_count = 0 bar = progressbar.ProgressBar(max_value=len(artifact_paths)) sql_statement = "" last_index = len(artifact_paths) - 1 for index, artifact_path in enumerate(artifact_paths): bar.update(index) # Check if there is already an entry. basename = os.path.basename(artifact_path) sql_statement_select = dbutils.create_select_statement( "artifact", ["id"], [basename]) # TODO is this the proper id? results = main_connector.execute(sql_statement_select, fetch_all=True) # No results found. Insert. if len(results) == 0: insert_data = {} insert_data["id"] = basename # TODO proper? # Process the artifact. default_values = get_default_values(artifact_path, table, main_connector) if default_values != None: insert_data.update(default_values) sql_statement += dbutils.create_insert_statement( table, insert_data.keys(), insert_data.values()) insert_count += 1 else: no_measurements_count += 1 # Found a result. Update. elif len(results) != 0: skip_count += 1 # Update database. if index != 0 and ( (index % batch_size) == 0) or index == last_index: if sql_statement != "": result = main_connector.execute(sql_statement) sql_statement = "" bar.finish() print("Inserted {} new entries.".format(insert_count)) print("No measurements for {} entries.".format(no_measurements_count)) print("Skipped {} entries.".format(skip_count))
def update_media_table(file_paths, table, get_values, batch_size=1000): insert_count = 0 no_measurements_count = 0 skip_count = 0 bar = progressbar.ProgressBar(max_value=len(file_paths)) sql_statement = "" last_index = len(file_paths) - 1 for index, file_path in enumerate(file_paths): bar.update(index) # Check if there is already an entry. path = os.path.basename(file_path) sql_statement_select = dbutils.create_select_statement( table, ["path"], [file_path]) results = main_connector.execute(sql_statement_select, fetch_all=True) # No results found. Insert. if len(results) == 0: insert_data = {"path": path} default_values = get_default_values(file_path, table) if default_values != None: insert_data.update(default_values) insert_data.update(get_values(file_path)) sql_statement += dbutils.create_insert_statement( table, insert_data.keys(), insert_data.values()) insert_count += 1 else: no_measurements_count += 1 # Found a result. Update. elif len(results) != 0: # TODO check if measurement id is missing or not skip_count += 1 # Update database. if index != 0 and ((index % batch_size) == 0) or index == last_index: if sql_statement != "": #print("") #print(sql_statement) #print("") result = main_connector.execute(sql_statement) sql_statement = "" bar.finish() print("Inserted {} new entries.".format(insert_count)) print("No measurements for {} entries.".format(no_measurements_count)) print("Skipped {} entries.".format(skip_count))
def main(): models_file = str(sys.argv[1]) db_connection_file = str(sys.argv[2]) table_name = str(sys.argv[3]) if len(sys.argv) != 4: print( "usage: command_update_models.py models_file db_connection_file table_name" ) main_connector = dbutils.connect_to_main_database(db_connection_file) with open(models_file) as json_file: json_data = json.load(json_file) for data in json_data["models"]: check_existing_models = "SELECT id from {};".format(table_name) results = main_connector.execute(check_existing_models, fetch_all=True) if data["name"] in str(results): print("{} already exists in model table".format(data["name"])) continue value_mapping = {} value_mapping["id"] = data["name"] value_mapping["name"] = data["name"] version = data["name"].split('_')[0] value_mapping["version"] = version del data["name"] value_mapping["json_metadata"] = json.dumps(data) keys = [] values = [] for key in value_mapping.keys(): keys.append(key) values.append(value_mapping[key]) sql_statement = dbutils.create_insert_statement( table_name, keys, values, False, True) try: results = main_connector.execute(sql_statement) print("{} successfully added to model table".format( value_mapping["name"])) except Exception as error: print(error) main_connector.cursor.close() main_connector.connection.close()
def execute_command_updatemeasurements(): print("Updating measurements...") main_connector = dbutils.connect_to_main_database() # Where to get the data. glob_search_path = os.path.join(whhdata_path, "*.csv") csv_paths = sorted(glob.glob(glob_search_path)) csv_paths.sort(key=os.path.getmtime) csv_path = csv_paths[-1] print("Using {}".format(csv_path)) # Load the data-frame. df = pd.read_csv(csv_path) # List all columns. columns = list(df) columns_mapping = {column: column for column in columns} columns_mapping["id"] = "measurement_id" columns_mapping["personId"] = "person_id" columns_mapping["age"] = "age_days" columns_mapping["height"] = "height_cms" columns_mapping["weight"] = "weight_kgs" columns_mapping["muac"] = "muac_cms" columns_mapping["headCircumference"] = "head_circumference_cms" columns_mapping["deletedBy"] = "deleted_by" columns_mapping["createdBy"] = "created_by" columns_mapping["personId"] = "person_id" table = "measurements" # Number of rows before. rows_number = main_connector.get_number_of_rows(table) print("Number of rows before: {}".format(rows_number)) # Insert data in batches. batch_size = 1000 sql_statement = "" rows_number_df = len(df.index) bar = progressbar.ProgressBar(max_value=rows_number_df) for index, row in df.iterrows(): bar.update(index) keys = [] values = [] for df_key, db_key in columns_mapping.items(): keys.append(str(db_key)) values.append(str(row[df_key])) sql_statement += dbutils.create_insert_statement(table, keys, values) if index != 0 and ((index % batch_size) == 0 or index == rows_number_df - 1): main_connector.execute(sql_statement) sql_statement = "" bar.finish() # Number of rows after sync. rows_number = main_connector.get_number_of_rows(table) print("Number of rows after: {}".format(rows_number))
def execute_command_updatemeasurements(): print("Updating measurements...") main_connector = dbutils.connect_to_main_database() # TODO import persons # Where to get the data. glob_search_path = os.path.join(whhdata_path, "*.csv") csv_paths = sorted(glob.glob(glob_search_path)) csv_paths.sort(key=os.path.getmtime) csv_path = csv_paths[-1] print("Using {}".format(csv_path)) # Load the data-frame. df = pd.read_csv(csv_path) # List all columns. columns = list(df) print(columns) ignored_columns = ["sex", "address", "qrcode", "latitude", "longitude", "personId"] columns_mapping = { column: column for column in columns if column not in ignored_columns} columns_mapping["personId"] = "person_id" columns_mapping["headCircumference"] = "head_circumference" columns_mapping["deletedBy"] = "deleted_by" columns_mapping["createdBy"] = "created_by" table = "measure" # Number of rows before. rows_number = main_connector.get_number_of_rows(table) print("Number of rows before: {}".format(rows_number)) # Insert data in batches. batch_size = 1000 sql_statement = "" rows_number_df = len(df.index) bar = progressbar.ProgressBar(max_value=rows_number_df) for index, row in df.iterrows(): bar.update(index) keys = [] values = [] for df_key, db_key in columns_mapping.items(): keys.append(str(db_key)) values.append(str(row[df_key])) # TODO what is this? keys.append("date") values.append(int(time.time())) # TODO what is this? keys.append("artifact") values.append("UNKNOWN") sql_statement += dbutils.create_insert_statement(table, keys, values) if index != 0 and ((index % batch_size) == 0 or index == rows_number_df - 1): main_connector.execute(sql_statement) sql_statement = "" bar.finish() # Number of rows after sync. rows_number = main_connector.get_number_of_rows(table) print("Number of rows after: {}".format(rows_number))
def execute_command_persons(): print("Updating persons...") main_connector = dbutils.connect_to_main_database() # TODO import persons # Where to get the data. glob_search_path = os.path.join(whhdata_path, "*.csv") csv_paths = sorted(glob.glob(glob_search_path)) csv_paths.sort(key=os.path.getmtime) csv_path = csv_paths[-1] print("Using {}".format(csv_path)) # Load the data-frame. df = pd.read_csv(csv_path) # List all columns. columns = list(df) print(columns) # ['personId', 'qrcode', 'sex', 'type', 'age', 'height', 'weight', 'muac', 'headCircumference', 'oedema', 'latitude', 'longitude', 'address', 'timestamp', 'deleted', 'deletedBy', 'visible', 'createdBy'] """ id VARCHAR(255) PRIMARY KEY, name TEXT NOT NULL, surname TEXT NOT NULL, birthday BIGINT NOT NULL, sex TEXT NOT NULL, guardian TEXT NOT NULL, is_age_estimated BOOLEAN NOT NULL, qr_code TEXT NOT NULL, created BIGINT NOT NULL, timestamp BIGINT NOT NULL, created_by TEXT NOT NULL, deleted BOOLEAN NOT NULL, deleted_by TEXT NOT NULL """ table = "person" # Number of rows before. rows_number = main_connector.get_number_of_rows(table) print("Number of rows before: {}".format(rows_number)) # Insert data in batches. batch_size = 1000 sql_statement = "" rows_number_df = len(df.index) bar = progressbar.ProgressBar(max_value=rows_number_df) used_ids = [] for index, row in df.iterrows(): bar.update(index) # Make sure there are no duplicates. Local. select_sql_statement = "SELECT COUNT(*) FROM person WHERE id='{}'".format( row["personId"]) result = main_connector.execute(select_sql_statement, fetch_one=True)[0] if row["personId"] in used_ids or result != 0: #print(row["personId"], "already in DB") pass else: # TODO check all of these. insert_data = {} insert_data["id"] = row["personId"] insert_data["name"] = "UNKNOWN" insert_data["surname"] = "UNKNOWN" insert_data["birthday"] = 0 insert_data["sex"] = "UNKNOWN" insert_data["guardian"] = "UNKNOWN" insert_data["is_age_estimated"] = False insert_data["qr_code"] = row["qrcode"] insert_data["created"] = 0 insert_data["timestamp"] = row["timestamp"] insert_data["created_by"] = row["createdBy"] insert_data["deleted"] = row["deleted"] insert_data["deleted_by"] = row["deletedBy"] sql_statement += dbutils.create_insert_statement( table, insert_data.keys(), insert_data.values()) #print(sql_statement) used_ids.append(row["personId"]) if index != 0 and ((index % batch_size) == 0 or index == rows_number_df - 1) and sql_statement != "": main_connector.execute(sql_statement) sql_statement = "" bar.finish() # Number of rows after sync. rows_number = main_connector.get_number_of_rows(table) print("Number of rows after: {}".format(rows_number))
def execute_command_persons(): print("Updating persons...") main_connector = dbutils.connect_to_main_database() # Where to get the data. csv_path = config.measure_csv_path print("Using {}".format(csv_path)) # Load the data-frame. df = pd.read_csv(csv_path) # List all columns. columns = list(df) print(columns) table = "person" # Number of rows before. rows_number = main_connector.get_number_of_rows(table) print("Number of rows before: {}".format(rows_number)) # Insert data in batches. batch_size = 1000 sql_statement = "" rows_number_df = len(df.index) bar = progressbar.ProgressBar(max_value=rows_number_df) used_ids = [] for index, row in df.iterrows(): bar.update(index) # Make sure there are no duplicates. Local. select_sql_statement = "SELECT COUNT(*) FROM person WHERE id='{}'".format(row["personId"]) result = main_connector.execute(select_sql_statement, fetch_one=True)[0] if row["personId"] in used_ids or result != 0: #print(row["personId"], "already in DB") pass else: # TODO check all of these. insert_data = {} insert_data["id"] = row["personId"] insert_data["name"] = "UNKNOWN" insert_data["surname"] = "UNKNOWN" insert_data["birthday"] = 0 insert_data["sex"] = "UNKNOWN" insert_data["guardian"] = "UNKNOWN" insert_data["is_age_estimated"] = False insert_data["qr_code"] = row["qrcode"] insert_data["created"] = 0 insert_data["timestamp"] = row["timestamp"] insert_data["created_by"] = row["createdBy"] insert_data["deleted"] = row["deleted"] insert_data["deleted_by"] = row["deletedBy"] sql_statement += dbutils.create_insert_statement(table, insert_data.keys(), insert_data.values()) #print(sql_statement) used_ids.append(row["personId"]) if index != 0 and ((index % batch_size) == 0 or index == rows_number_df - 1) and sql_statement != "": main_connector.execute(sql_statement) sql_statement = "" bar.finish() # Number of rows after sync. rows_number = main_connector.get_number_of_rows(table) print("Number of rows after: {}".format(rows_number))
#main_connector.clear_table(table) sql_statement = "SELECT qrcode FROM measurements WHERE qrcode != 'NaN';" results = main_connector.execute(sql_statement, fetch_all=True) results = [result[0] for result in results] print(sorted(list(set(results)))) exit(0) insert_data = {} insert_data["path"] = "somepath" insert_data["qrcode"] = "someqrcode" insert_data["targets"] = "10, 20" insert_data["last_updated"] = str(datetime.datetime.now()) insert_data["rejected_by_expert"] = False insert_data["had_error"] = False insert_data["error_message"] = "" insert_data["width_px"] = 128 insert_data["height_px"] = 127 insert_data["blur_variance"] = 1.0 sql_statement = dbutils.create_insert_statement(table, insert_data.keys(), insert_data.values()) print(sql_statement) results = main_connector.execute(sql_statement) print(results) sql_statement = dbutils.create_select_statement(table, ["path"], ["somepath"]) print(sql_statement) results = main_connector.execute(sql_statement, fetch_all=True) print(len(results))
def process_person_paths(person_paths, process_index): #person_paths = person_paths[0:4] # TODO remove this! # Go through each person (qr-code). for person_path in tqdm(person_paths, position=process_index): person_path = person_path.replace('localssd/', 'localssd2/') print(person_path) # Find all artifacts for that person. artifact_paths = [] for file_extension in file_extensions: print(file_extension) glob_search_path = os.path.join( person_path, "**/*.{}".format(file_extension)) #print (glob_search_path) artifact_paths.extend(glob.glob(glob_search_path)) # print(artifact_paths) print("Found {} artifacts in {}".format(len(artifact_paths), person_path)) # Process those artifacts. main_connector = dbutils.connect_to_main_database() table = "artifact" batch_size = 100 insert_count = 0 no_measurements_count = 0 skip_count = 0 sql_statement = "" last_index = len(artifact_paths) - 1 for artifact_index, artifact_path in enumerate(artifact_paths): # Check if there is already an entry in the database. basename = os.path.basename(artifact_path) sql_statement_select = dbutils.create_select_statement( "artifact", ["id"], [basename]) results = main_connector.execute(sql_statement_select, fetch_all=True) # No results found. Insert. if len(results) == 0: insert_data = {} insert_data["id"] = basename # TODO proper? # Get the default values for the artifact. default_values = get_default_values( artifact_path, table, main_connector) # Check if there is a measure_id. if "measure_id" in default_values.keys(): insert_count += 1 else: no_measurements_count += 1 # Create SQL statement. insert_data.update(default_values) sql_statement_for_artifact = dbutils.create_insert_statement( table, insert_data.keys(), insert_data.values()) sql_statement += sql_statement_for_artifact # Found a result. Update. elif len(results) != 0: skip_count += 1 # Update database. if artifact_index != 0 and ( (artifact_index % batch_size) == 0) or artifact_index == last_index: if sql_statement != "": result = main_connector.execute(sql_statement) sql_statement = "" # Return statistics. return (insert_count, no_measurements_count, skip_count)