def metadata_folder_to_database(folder_path, delete_db = True, db_suffix = None, explicit_database_name = None, explicit_database_location = None):
    """
    Take a metadata folder and build the database and all tables
    Args:
        delete_db bool: Delete the database before starting
        db_suffix: If provided, metadata will be modified so that the database name, and s3 data locations include the folder suffix
        explicit_database_name: if not None the database name in glue will be set to the string specified in explicit_database_location
        explicit_database_location: if not None the database location in glue will be set to the string specified in explicit_database_location.
        If explicit_database_location starts with s3:// Then the function will assume that the string provided is the full path to the database folder. Otherwise, the function
        will assume that explicit_database_location is a prefix to be added to the current location in the original json.

        If explicit_database_name or explicit_database_location are not None then it is advised to leave db_suffix as None or vis-versa.
    """

    files = os.listdir(folder_path)
    files = set([f for f in files if re.match(".+\.json$", f)])

    if "database.json" in files:
        db_metadata = read_json(os.path.join(folder_path, "database.json"))

        if db_suffix:
            str_to_add = "_" + db_suffix
            if db_metadata["location"][-1] == "/":
                db_metadata["location"] = db_metadata["location"][:-1]
            db_metadata["location"] = db_metadata["location"] + str_to_add
            db_metadata["name"] = db_metadata["name"] + str_to_add

        # Added this code to allow user to self define locations (for creation of near duplicate databases for different users)
        if explicit_database_name is not None :
            db_metadata["name"] = explicit_database_name

        if explicit_database_location is not None :
            if db_metadata["location"][-1] == "/" :
                db_metadata["location"] = db_metadata["location"][:-1]
            if 's3://' not in explicit_database_location :
                db_metadata["location"] = db_metadata["location"] + explicit_database_location
            else :
                db_metadata["location"] = explicit_database_location
        
        database_name = db_metadata["name"]

        try:
            glue_client.delete_database(Name=database_name)
        except glue_client.exceptions.EntityNotFoundException:
            pass
        overwrite_or_create_database(database_name, db_metadata["description"])

    else:
        raise ValueError("database.json not found in metadata folder")
        return None

    table_paths = files.difference({"database.json"})
    for table_path in table_paths:
        table_path = os.path.join(folder_path, table_path)
        table_metadata = read_json(table_path)
        populate_glue_catalogue_from_metadata(table_metadata, db_metadata, check_existence=False)
def get_locations_df(locations):
    locations_df = pd.io.json.json_normalize(locations)
    renames = read_json("metadata/locations_renames.json")
    locations_df = locations_df[renames.keys()].rename(columns=renames)
    locations_metadata = read_json("metadata/locations.json")

    locations_df = impose_exact_conformance_on_pd_df(
        locations_df, locations_metadata
    )

    return locations_df
def get_survey_fact_df(survey_fact):
    survey_fact_long = survey_fact_to_long_format(survey_fact)
    sensor_observations_metadata = read_json(
        "glue/meta_data/occupeye_db/sensor_observations.json"
    )
    renames = read_json("column_renames/sensor_observations_renames.json")
    sensor_observations_df = pd.DataFrame(survey_fact_long).rename(
        columns=renames
    )
    sensor_observations_df = impose_metadata_column_order_on_pd_df(
        sensor_observations_df, sensor_observations_metadata
    )

    return sensor_observations_df
def get_surveys_df(surveys):
    surveys = strip_commas_from_api_response(surveys)
    surveys_df = pd.DataFrame(surveys)

    # Rename columns to conform to metadata
    renames = read_json("column_renames/surveys_renames.json")
    surveys_df = surveys_df.rename(columns=renames)

    # Impose metadata - i.e. ensure all expected columns are present
    # and in correct order
    surveys_metadata = read_json("glue/meta_data/occupeye_db/surveys.json")
    surveys_df = impose_exact_conformance_on_pd_df(
        surveys_df, surveys_metadata
    )

    return surveys_df
def get_bookings_df(bookings):
    bookings_df = pd.io.json.json_normalize(bookings)
    renames = read_json("metadata/bookings_renames.json")
    bookings_metadata = read_json("metadata/bookings.json")

    if len(bookings_df) > 0:
        bookings_df = bookings_df.reindex(columns=renames.keys())
        bookings_df = bookings_df[renames.keys()].rename(columns=renames)
    else:
        bookings_df = pd.DataFrame(columns=renames.values())

    bookings_df = impose_exact_conformance_on_pd_df(
        bookings_df, bookings_metadata
    )

    return bookings_df
def delete_all_target_data_from_database(database_metadata_path):
    files = os.listdir(database_metadata_path)
    files = set([f for f in files if re.match(".+\.json$", f)])

    if "database.json" in files:
        db_metadata = read_json(os.path.join(database_metadata_path, "database.json"))
        database_name = db_metadata["name"]
    else:
        raise ValueError("database.json not found in metadata folder")
        return None

    table_paths = files.difference({"database.json"})
    for table_path in table_paths:
        table_path = os.path.join(database_metadata_path, table_path)
        table_metadata = read_json(table_path)
        location = table_metadata["location"]
        bucket, bucket_folder = s3_path_to_bucket_key(location)
        delete_folder_from_bucket(bucket, bucket_folder)
def get_sensor_dimension_df(sensor_dimension):
    sensor_dimension = strip_commas_from_api_response(sensor_dimension)
    sensors_df = pd.DataFrame(sensor_dimension)
    del sensors_df[
        "SurveyID"
    ]  # Because it's a partition so we don't need to duplicate

    # Rename columns to conform to metadata
    renames = read_json("column_renames/sensors_renames.json")
    sensors_df = sensors_df.rename(columns=renames)

    # Impose metadata - i.e. ensure all expected columns are present
    # and in correct order
    sensors_metadata = read_json("glue/meta_data/occupeye_db/sensors.json")
    sensors_df = impose_exact_conformance_on_pd_df(
        sensors_df, sensors_metadata
    )

    return sensors_df
 def __init__(self, filepath):
     self.meta = read_json(filepath)
     self.__update_column_names()