def load_xls(datadir, config_path, db_config_name):
    """Load xlsx into postgres for multiple files"""
    files = os.listdir(datadir)
    files_xls = [f for f in files if f.split('.')[-1] in ('xlsx', 'xls')]
    logger.info(files_xls)

    for filename in files_xls:
        df = pd.read_excel(datadir + '/' + filename)
        if df.empty:
            logger.info('No data')
            continue

        logger.info("added " + filename)
        logger.info(df.columns)

        # load the data into pg
        engine = postgres_engine_pandas(config_path, db_config_name)
        # TODO: link to to_sql function
        table_name = filename.split('.')[0]
        df.to_sql(table_name,
                  engine,
                  if_exists='replace',
                  index=True,
                  index_label='idx'
                  )  # ,dtype={geom: Geometry('POINT', srid='4326')})
        logger.info(filename + ' added as ' + table_name)
예제 #2
0
def psycopg_connection_string(config_full_path, db_config_name):
    """
    Postgres connection string for psycopg2.

    Args:
      1. config_full_path: location of the config.ini file including the name of the file, for example authentication/config.ini
      2. db_config_name: dev or docker to get the ip user/password and port values.

    Returns:
        Returns the psycopg required connection string: 'PG:host= port= user= dbname= password='******'Config names: {}'.format(config.sections()))
    print(db_config_name)
    host = config.get(db_config_name, 'host')
    logger.info(host)
    port = config.get(db_config_name, 'port')
    user = config.get(db_config_name, 'user')
    dbname = config.get(db_config_name, 'dbname')
    password = config.get(db_config_name, 'password')

    return 'host={} port={} user={} dbname={} password={}'.format(
        host, port, user, dbname, password)
예제 #3
0
def get_kvk_json(url, params, api_key = None):
    """
    Get a json response from a url, provided params + api_key.
    Args:
        url: api endpoint
        params: kvkNumber, branchNumber, rsin, street, houseNumber, postalCode,
                city, tradeName, or provide lists/dicts of values
        api_key: kvk api_key. add KVK_API_KEY to your ENV variables
    Returns:
        parsed json or error message
    """

    API_KEY = os.environ['kvk_api_key']

    if API_KEY:
        url += '&user_key={}'.format(API_KEY)
    else:
        return logger.error('please provide api_key')

    response = requests.get(url, params)

    try:
        response.raise_for_status() # Raises 'HTTPError', if one occurred
    except requests.exceptions.HTTPError as e:
        raise errors.InvalidResponse(response) from e
    json_response = response_to_json(response)

    logger.info("received data from {} ".format(url))

    return json_response
예제 #4
0
def retrywithtrailingslash(url, access_token):
    response = requests.get(url,
                            headers=access_token)  # Get first page for count
    if response.status_code != 200:
        if response.status_code == 404 or response.status_code == 401:
            logger.info('Error status: {} {}'.format(
                str(response.status_code), "trying with trailing / ..."))
            response = requests.get(url + '/', headers=access_token)
            return response
        else:
            return logger.info('Error status: ' + str(response.status_code))
    return response
def your_first_function(argname1):
    """
    Does some great stuff.

    Args:
        argname1: path/in/store

    Returns:
        A file or check, show some examples.
    """
    something_2 = 'test2'
    logger.info('Succes!')
    return something_2
예제 #6
0
def get_data(url_api, endpoint, metadata, accessToken, limit):
    """
    Get and flatten all the data from the api.

    Args:
        1. url_api: get the main api url::

            https://api.data.amsterdam.nl/tellus
        2. get one endpoint::

            tellus

        3. get a list of dictionaries from other endpoints, in this case: for tellus location, speed and length.
        4. accessToken: acces token generated using the auth helper: GetAccessToken().getAccessToken()
        5. limit: set the number of pages you want to retrieve, ideal for testing first::

           10

    Returns:
        A list containing multiple items which are all reformatted to a flattened json with added metadata.
    """
    data = []
    url = url_api + '/' + endpoint
    startPage = 1
    has_next_key = False
    nextKey = ""
    cvalues = conversionListCvalues(metadata)
    json_data = getJsonData(url, accessToken)

    number_of_items = json_data['count']
    logger.info("number of items {}".format(number_of_items))
    number_of_pages = int(abs(number_of_items / 100))

    if "next" in json_data["_links"].keys():
        has_next_key = True
        url = json_data["_links"]["next"]
        logger.info(url)
    while has_next_key and startPage < limit:
        response = getJsonData(url, accessToken)
        if "next" in response["_links"].keys():
            url = response["_links"]["next"]
            logger.info(nextKey)
        else:
            has_next_key = False
            # no next_key, stop the loop
        # logger.info('status: ' + str(response.status_code))

        for item in response["_embedded"]:
            #logger.info(item)
            newRow = reformatData(item, metadata['tellus']['_embedded'],
                                  cvalues)
            # Add c-waarde row
            #values = list(newRow.values())
            # append to main data array
            data.append(newRow)
        # json.dump(data, outputFile, indent=4, sort_keys=True)
        logger.info('Page {} of {}'.format(startPage, number_of_pages))
        startPage += 1
    #logger.info(data)
    return data
예제 #7
0
def get_config(full_path):
    """
    Get config file with all login credentials, port numbers, etc.

    Args:
        full_path: provide the full path to the config.ini file, for example authentication/config.ini

    Returns:
        The entire configuration file to use them with ``config.get(config_name, 'AUTHURL')``
     """
    config = configparser.RawConfigParser()
    config.read(full_path)
    logger.info('Found these configs.. {}'.format(config.sections()))

    return config
def your_second_function(argname1, argname2):
    """
    Does some great stuff.

    Args:
        1. argname1: path/in/store
        2. argname2: your_file_name.txt

    Returns:
        A file or check, show some examples.
    """

    data = argname1
    something = data
    logger.info('Another Succes!')
    return something
예제 #9
0
def get_layers_from_wfs(url_wfs):
    """
        Get all layer names in WFS service, print and return them in a list.
    """
    layer_names = []
    parameters = {"REQUEST": "GetCapabilities", "SERVICE": "WFS"}
    getcapabilities = requests.get(url_wfs, params=parameters)
    # print(getcapabilities.text)
    root = ET.fromstring(getcapabilities.text)

    for neighbor in root.iter('{http://www.opengis.net/wfs/2.0}FeatureType'):
        # print(neighbor.tag, neighbor.attrib)
        logger.info("layername: " +
                    neighbor[1].text)  # neighbor[0]==name, neighbor[1]==title
        layer_names.append(neighbor[1].text)
    return layer_names
예제 #10
0
def getJsonData(url, access_token):
    """
    Get a json response from a url with accesstoken.

    Args:
        1. url: api endpoint
        2. accessToken: acces token generated using the auth helper: GetAccessToken().getAccessToken(usertype='employee_plus', scopes='BRK/RS,BRK/RSN/,BRK/RO')

    Returns:
        parsed json or error message
    """

    response = retrywithtrailingslash(url, access_token)

    json_data = response.json()
    logger.info("recieved data from {} ".format(url))
    return json_data
예제 #11
0
def execute_sql(pg_str, sql):
    """
    Execute a sql query with psycopg2.

    Args:
        1. pg_str: connection string using helper function psycopg_connection_string, returning:``host= port= user= dbname= password=``
        2. sql: SQL string in triple quotes::

            ```CREATE TABLE foo (bar text)```

    Returns:
        Executed sql with conn.cursor().execute(sql)
    """
    with psycopg2.connect(pg_str) as conn:
        logger.info('connected to database')
        with conn.cursor() as cursor:
            logger.info('start exectuting sql query')
            cursor.execute(sql)
예제 #12
0
def put_object(
        connection, container: str, filename: str,
        contents, content_type: str) -> None:
    """
    Upload a file to objectstore.

    Args:
        1. container: path/in/store
        2. filename: your_file_name.txt
        3. contents: contents of file with use of with open('your_file_name.txt', 'rb') as contents:
        4. content_type:'text/csv','application/json', ... Is retrievd by using the mime package.

    Returns:
        A saved file in the container of the objectstore.
    """
    logger.info('Uploading file...')
    connection.put_object(
        container, filename, contents=contents,
        content_type=content_type)
예제 #13
0
def executeScriptsFromFile(pg_str, filename):
    """WIP does not work yet"""
    # Open and read the file as a single buffer
    fd = open(filename, 'r')
    sqlFile = fd.read()
    fd.close()

    # all SQL commands (split on ';')
    sqlCommands = sqlFile.split(';')

    # Execute every command from the input file
    for command in sqlCommands:
        logger.info(command)
        # This will skip and report errors
        # For example, if the tables do not yet exist, this will skip over
        # the DROP TABLE commands
        try:
            execute_sql(pg_str, command)
        except psycopg2.OperationalError as msg:
            logger.info("Command skipped: {}".format(msg))
예제 #14
0
def upload_file(connection, container_path, filename_path):
    """
    Upload file to the objectstore.

    Args:
        1. connection = Objectstore connection based on from helpers.connection import objectstore_connection
        2. container_path = Name of container/prefix/subfolder, for example Dataservices/aanvalsplan_schoon/crow
        3. filename_path = full path including the name of file, for example: data/test.csv

        Uses mime for content_type: https://stackoverflow.com/questions/43580/how-to-find-the-mime-type-of-a-file-in-python

    Result:
        Uploads a file to the objectstore and checks if it exists on in the defined container_path.
    """
    with open(filename_path, 'rb') as contents:
        mime = MimeTypes()
        content_type = mime.guess_type(filename_path)[0]
        logger.info("Found content type '{}'".format(content_type))
        filename = os.path.basename(filename_path)
        put_object(connection, container_path, filename, contents, content_type)
        check_existence_object(connection, container_path, filename)
예제 #15
0
def get_layer_from_wfs(url_wfs, layer_name, srs, outputformat):
    """
    Get layer from a wfs service.
    Args:
        1. url_wfs: full url of the WFS including https, excluding /?::

            https://map.data.amsterdam.nl/maps/gebieden

        2. layer_name: Title of the layer::

            stadsdeel

        3. srs: coordinate system number, excluding EPSG::

            28992

        4. outputformat: leave empty to return standard GML, else define json, geojson, txt, shapezip::

            geojson

    Returns:
        The layer in the specified output format.
    """
    parameters = {
        "REQUEST": "GetFeature",
        "TYPENAME": layer_name,
        "SERVICE": "WFS",
        "VERSION": "2.0.0",
        "SRSNAME": "EPSG:{}".format(srs),
        "OUTPUTFORMAT": outputformat
    }
    logger.info("Requesting data from {}, layer: {}".format(
        url_wfs, layer_name))
    response = requests.get(url_wfs, params=parameters)
    if outputformat in ('geojson, json'):
        geojson = response.json()
        logger.info("{} features returned.".format(
            str(len(geojson["features"]))))
        return geojson
    return response
예제 #16
0
def main():
    args = parser().parse_args()
    pg_str = psycopg_connection_string(args.full_config_path,
                                       args.db_config_name)

    load_wfs_to_postgres.main()  # Reuses pg_str

    execute_sql(pg_str, createDummyTable)
    logger.info('created createDummyTable')
    execute_sql(pg_str, createGeom)
    logger.info('geometry field created')
    execute_sql(pg_str, addAreaCodes)
    logger.info('areaCode fields added')
    execute_sql(pg_str, test_areacodesView)
    logger.info('csv view Created')
예제 #17
0
def getJsonData(url, accessToken):
    """
    Get a json response from a url with accesstoken.

    Args:
        1. url: api endpoint
        2. accessToken: acces token generated using the auth helper: GetAccessToken().getAccessToken(usertype='employee', scopes='TLLS/R')

    Returns:
        parsed json or error message
    """
    response = requests.get(url,
                            headers=accessToken)  # Get first page for count
    if response.status_code != 200:
        if response.status_code == 404 or response.status_code == 401:
            logger.info('Error status: {} {}'.format(
                str(response.status_code), "trying with trailing / ..."))
            response = requests.get(url + '/', headers=accessToken)
        else:
            return logger.info('Error status: ' + str(response.status_code))
    jsonData = response.json()
    logger.info("recieved data from {} ".format(url))
    return jsonData
예제 #18
0
def objectstore_connection(config_full_path,
                           config_name,
                           print_config_vars=None):
    """
    Get an objectsctore connection.

    Args:
        1. config_full_path: /path_to_config/config.ini or config.ini if in root.
        2. config_name: objectstore
        3. print_config_vars: if set to True: print all variables from the config file

    Returns:
        An objectstore connection session.
      """

    assert os.environ['OBJECTSTORE_PASSWORD']

    config = get_config(config_full_path)

    if print_config_vars:
        logger.info('config variables.. :{}'.format(OBJECTSTORE))

    conn = Connection(
        authurl=config.get(config_name, 'AUTHURL'),
        user=config.get(config_name, 'USER'),
        key=os.environ['OBJECTSTORE_PASSWORD'],
        tenant_name=config.get(config_name, 'TENANT_NAME'),
        auth_version=config.get(config_name, 'VERSION'),
        os_options={
            'tenant_id': config.get(config_name, 'TENANT_ID'),
            'region_name': config.get(config_name, 'REGION_NAME'),
            # 'endpoint_type': 'internalURL'
        })
    logger.info('Established successfull connection to {}'.format(
        config.get(config_name, 'TENANT_NAME')))

    return conn
def load_layers(pg_str):
    """
    Load layers into Postgres using a list of titles of each layer within the WFS service.

    Args:
        pg_str: psycopg2 connection string::

        'PG:host= port= user= dbname= password='******'stadsdeel',
                  'buurt',
                  'buurtcombinatie',
                  'gebiedsgerichtwerken']

    srsName = 'EPSG:28992'

    for areaName in layerNames:
        WFS = "https://map.data.amsterdam.nl/maps/gebieden?REQUEST=GetFeature&SERVICE=wfs&Version=2.0.0&SRSNAME=" + srsName + "&typename=" + areaName
        wfs2psql(WFS, pg_str, areaName)
        logger.info(areaName + ' loaded into PG.')
예제 #20
0
def main():
    # Return all arguments in a list
    args = parser().parse_args()

    logger.info("Getting Access token.")
    access_token = GetAccessToken().getAccessToken(usertype=args.usertype,
                                                   scopes=args.scopes)

    logger.info(
        "Setup temp database to store requests to speed up restart download if network fails."
    )
    requests_cache.install_cache('requests_db', backend='sqlite')

    logger.info("Getting data with Access token.")
    json_data = getJsonData(args.url, access_token)
    logger.info(json_data)

    save_file(json_data, args.output_folder, args.filename)
예제 #21
0
def main():
    # Return all arguments in a list
    args = parser().parse_args()
    logger.info("Getting Access token.")
    accessToken = GetAccessToken().getAccessToken(usertype='employee',
                                                  scopes='TLLS/R')
    logger.info(
        "Setup temp database to store requests to speed up restart download if network fails."
    )
    requests_cache.install_cache('requests_db', backend='sqlite')

    endpoints = ['tellus', 'snelheidscategorie', 'lengtecategorie']
    metadata = {}
    for endpoint in endpoints:
        json_data = getJsonData(args.url + '/' + endpoint, accessToken)
        # logger.info(json_data)
        metadata.update({endpoint: json_data})
        logger.info("retrieved {}".format(endpoint))
    data = get_data(args.url, 'tellusdata', metadata, accessToken, args.limit)
    save_file(data, args.output_folder, args.filename)
예제 #22
0
def check_existence_object(connection, container_path, filename):
    """
    Check if the file is present on the objectstore container_path,

    Args:
        1. connection = Objectstore connection based on from helpers.connection import objectstore_connection
        2. container_path = Name of container/prefix/subfolder
        3. filename = Name of file, for example test.csv

    Returns:
        - 'The object was successfully created'
        - 'The object was not found'
        - 'Error finding the object'
    """
    try:
        resp_headers = connection.head_object(container_path, filename)
        logger.info('The object was successfully created')
    except ClientException as e:
        if e.http_status == '404':
            logger.info('The object was not found')
        else:
            logger.info('An error occurred checking for the existence of the object')