def process_artifact_paths(artifact_paths):
        main_connector = dbutils.connect_to_main_database()
        table = "artifact"

        batch_size = 100

        insert_count = 0
        no_measurements_count = 0
        skip_count = 0
        bar = progressbar.ProgressBar(max_value=len(artifact_paths))
        sql_statement = ""
        last_index = len(artifact_paths) - 1
        for index, artifact_path in enumerate(artifact_paths):
            bar.update(index)

            # Check if there is already an entry.
            basename = os.path.basename(artifact_path)
            sql_statement_select = dbutils.create_select_statement(
                "artifact", ["id"], [basename])  # TODO is this the proper id?
            results = main_connector.execute(sql_statement_select,
                                             fetch_all=True)

            # No results found. Insert.
            if len(results) == 0:
                insert_data = {}
                insert_data["id"] = basename  # TODO proper?

                # Process the artifact.
                default_values = get_default_values(artifact_path, table,
                                                    main_connector)
                if default_values != None:
                    insert_data.update(default_values)
                    sql_statement += dbutils.create_insert_statement(
                        table, insert_data.keys(), insert_data.values())
                    insert_count += 1
                else:
                    no_measurements_count += 1

            # Found a result. Update.
            elif len(results) != 0:
                skip_count += 1

            # Update database.
            if index != 0 and (
                (index % batch_size) == 0) or index == last_index:
                if sql_statement != "":
                    result = main_connector.execute(sql_statement)
                    sql_statement = ""

        bar.finish()
        print("Inserted {} new entries.".format(insert_count))
        print("No measurements for {} entries.".format(no_measurements_count))
        print("Skipped {} entries.".format(skip_count))
Beispiel #2
0
def update_media_table(file_paths, table, get_values, batch_size=1000):
    insert_count = 0
    no_measurements_count = 0
    skip_count = 0
    bar = progressbar.ProgressBar(max_value=len(file_paths))
    sql_statement = ""
    last_index = len(file_paths) - 1
    for index, file_path in enumerate(file_paths):
        bar.update(index)

        # Check if there is already an entry.
        path = os.path.basename(file_path)
        sql_statement_select = dbutils.create_select_statement(
            table, ["path"], [file_path])
        results = main_connector.execute(sql_statement_select, fetch_all=True)

        # No results found. Insert.
        if len(results) == 0:
            insert_data = {"path": path}
            default_values = get_default_values(file_path, table)
            if default_values != None:
                insert_data.update(default_values)
                insert_data.update(get_values(file_path))
                sql_statement += dbutils.create_insert_statement(
                    table, insert_data.keys(), insert_data.values())
                insert_count += 1
            else:
                no_measurements_count += 1

        # Found a result. Update.
        elif len(results) != 0:
            # TODO check if measurement id is missing or not
            skip_count += 1

        # Update database.
        if index != 0 and ((index % batch_size) == 0) or index == last_index:
            if sql_statement != "":
                #print("")
                #print(sql_statement)
                #print("")
                result = main_connector.execute(sql_statement)
                sql_statement = ""

    bar.finish()
    print("Inserted {} new entries.".format(insert_count))
    print("No measurements for {} entries.".format(no_measurements_count))
    print("Skipped {} entries.".format(skip_count))
Beispiel #3
0
def get_default_values(path, table):

    # Split and check the path.
    path_split = path.split("/")
    assert path_split[1] == whhdata_path[1:]
    assert path_split[2] == media_subpath

    # Get important values from path.
    qrcode = path_split[3]
    timestamp = path_split[-1].split("_")[-3]

    # Getting timestamp.
    last_updated, _ = get_last_updated()

    # Get id of measurement.
    threshold = int(60 * 60 * 24 * 1000)
    sql_statement = dbutils.create_select_statement("measurements", ["qrcode"],
                                                    [qrcode])
    sql_statement = ""
    sql_statement += "SELECT id"
    sql_statement += " FROM measurements WHERE"
    sql_statement += " qrcode = '{}'".format(qrcode)
    sql_statement += " AND type = 'manual'"
    sql_statement += " AND ABS(timestamp - {}) < {}".format(
        timestamp, threshold)
    sql_statement += ";"
    results = main_connector.execute(sql_statement, fetch_all=True)

    # Prepare values.
    values = {}
    values["path"] = path
    values["qrcode"] = qrcode
    values["last_updated"] = last_updated
    values["rejected_by_expert"] = False

    # Measurement id not found.
    if len(results) == 0:
        print("No measurement_id found for {}".format(path))

    # Found a measurement id.
    else:
        values["measurement_id"] = results[0][0]

    return values
Beispiel #4
0
#main_connector.clear_table(table)

sql_statement = "SELECT qrcode FROM measurements WHERE qrcode != 'NaN';"
results = main_connector.execute(sql_statement, fetch_all=True)
results = [result[0] for result in results]
print(sorted(list(set(results))))

exit(0)

insert_data = {}
insert_data["path"] = "somepath"
insert_data["qrcode"] = "someqrcode"
insert_data["targets"] = "10, 20"
insert_data["last_updated"] = str(datetime.datetime.now())
insert_data["rejected_by_expert"] = False
insert_data["had_error"] = False
insert_data["error_message"] = ""
insert_data["width_px"] = 128
insert_data["height_px"] = 127
insert_data["blur_variance"] = 1.0
sql_statement = dbutils.create_insert_statement(table, insert_data.keys(),
                                                insert_data.values())
print(sql_statement)
results = main_connector.execute(sql_statement)
print(results)

sql_statement = dbutils.create_select_statement(table, ["path"], ["somepath"])
print(sql_statement)
results = main_connector.execute(sql_statement, fetch_all=True)
print(len(results))
    def process_person_paths(person_paths, process_index):

        #person_paths = person_paths[0:4] # TODO remove this!

        # Go through each person (qr-code).
        for person_path in tqdm(person_paths, position=process_index):

            person_path = person_path.replace('localssd/', 'localssd2/')

            print(person_path)

            # Find all artifacts for that person.
            artifact_paths = []
            for file_extension in file_extensions:
                print(file_extension)
                glob_search_path = os.path.join(
                    person_path, "**/*.{}".format(file_extension))

                #print (glob_search_path)
                artifact_paths.extend(glob.glob(glob_search_path))
                # print(artifact_paths)

            print("Found {} artifacts in {}".format(len(artifact_paths),
                                                    person_path))

            # Process those artifacts.
            main_connector = dbutils.connect_to_main_database()
            table = "artifact"
            batch_size = 100
            insert_count = 0
            no_measurements_count = 0
            skip_count = 0
            sql_statement = ""
            last_index = len(artifact_paths) - 1
            for artifact_index, artifact_path in enumerate(artifact_paths):

                # Check if there is already an entry in the database.
                basename = os.path.basename(artifact_path)
                sql_statement_select = dbutils.create_select_statement(
                    "artifact", ["id"], [basename])
                results = main_connector.execute(sql_statement_select,
                                                 fetch_all=True)

                # No results found. Insert.
                if len(results) == 0:
                    insert_data = {}
                    insert_data["id"] = basename  # TODO proper?

                    # Get the default values for the artifact.
                    default_values = get_default_values(
                        artifact_path, table, main_connector)

                    # Check if there is a measure_id.
                    if "measure_id" in default_values.keys():
                        insert_count += 1
                    else:
                        no_measurements_count += 1

                    # Create SQL statement.
                    insert_data.update(default_values)
                    sql_statement_for_artifact = dbutils.create_insert_statement(
                        table, insert_data.keys(), insert_data.values())
                    sql_statement += sql_statement_for_artifact

                # Found a result. Update.
                elif len(results) != 0:
                    skip_count += 1

                # Update database.
                if artifact_index != 0 and (
                    (artifact_index % batch_size)
                        == 0) or artifact_index == last_index:
                    if sql_statement != "":
                        result = main_connector.execute(sql_statement)
                        sql_statement = ""

        # Return statistics.
        return (insert_count, no_measurements_count, skip_count)