Beispiel #1
0
def checkCreateTable(table, schema, id_field='', time_field=''):
    '''
    Create the table if it does not exist, and pull list of IDs already in the table if it does
    INPUT   table: Carto table to check or create (string)
            schema: dictionary of column names and types, used if we are creating the table for the first time (dictionary)
            id_field: optional, name of column that we want to use as a unique ID for this table; this will be used to compare the
                    source data to the our table each time we run the script so that we only have to pull data we
                    haven't previously uploaded (string)
            time_field:  optional, name of column that will store datetime information (string)
    '''
    # check it the table already exists in Carto
    if cartosql.tableExists(table, user=CARTO_USER, key=CARTO_KEY):
        # if the table does exist, get a list of all the values in the id_field column
        print('Carto table already exists.')
    else:
        # if the table does not exist, create it with columns based on the schema input
        print('Table {} does not exist, creating'.format(table))
        cartosql.createTable(table, schema, user=CARTO_USER, key=CARTO_KEY)
        # if a unique ID field is specified, set it as a unique index in the Carto table; when you upload data, Carto
        # will ensure no two rows have the same entry in this column and return an error if you try to upload a row with
        # a duplicate unique ID
        if id_field:
            cartosql.createIndex(table,
                                 id_field,
                                 unique=True,
                                 user=CARTO_USER,
                                 key=CARTO_KEY)
        # if a time_field is specified, set it as an index in the Carto table; this is not a unique index
        if time_field:
            cartosql.createIndex(table,
                                 time_field,
                                 user=CARTO_USER,
                                 key=CARTO_KEY)
Beispiel #2
0
def createTableWithIndices(table, schema, idField, otherFields=[]):
    '''Get existing ids or create table'''
    cartosql.createTable(table, schema)
    cartosql.createIndex(table, idField, unique=True)
    for field in otherFields:
        if field != idField:
            cartosql.createIndex(table, field, unique=False)
Beispiel #3
0
def checkCreateTable(table, schema, id_field, time_field=''):
    '''
    Create the table if it does not exist, and pull list of IDs already in the table if it does
    INPUT   table: Carto table to check or create (string)
            schema: dictionary of column names and types, used if we are creating the table for the first time (dictionary)
            id_field: name of column that we want to use as a unique ID for this table; this will be used to compare the
                    source data to the our table each time we run the script so that we only have to pull data we
                    haven't previously uploaded (string)
            time_field:  optional, name of column that will store datetime information (string)
    RETURN  list of existing IDs in the table, pulled from the id_field column (list of strings)
    '''
    # check it the table already exists in Carto
    if cartosql.tableExists(table, user=CARTO_USER, key=CARTO_KEY):
        # if the table does exist, get a list of all the values in the id_field column
        logging.info('Fetching existing IDs')
        r = cartosql.getFields(id_field, table, f='csv', post=True, user=CARTO_USER, key=CARTO_KEY)
        # turn the response into a list of strings, removing the first and last entries (header and an empty space at end)
        return r.text.split('\r\n')[1:-1]
    else:
        # if the table does not exist, create it with columns based on the schema input
        logging.info('Table {} does not exist, creating'.format(table))
        cartosql.createTable(table, schema, user=CARTO_USER, key=CARTO_KEY)
        # if a unique ID field is specified, set it as a unique index in the Carto table; when you upload data, Carto
        # will ensure no two rows have the same entry in this column and return an error if you try to upload a row with
        # a duplicate unique ID
        if id_field:
            cartosql.createIndex(table, id_field, unique=True, user=CARTO_USER, key=CARTO_KEY)
        # if a time_field is specified, set it as an index in the Carto table; this is not a unique index
        if time_field:
            cartosql.createIndex(table, time_field, user=CARTO_USER, key=CARTO_KEY)
        # return an empty list because there are no IDs in the new table yet
        return []
Beispiel #4
0
def checkCreateTable(table, schema, id_field, time_field):
    '''Get existing ids or create table'''
    if cartosql.tableExists(table,
                            user=os.getenv('CARTO_USER'),
                            key=os.getenv('CARTO_KEY')):
        logging.info('Fetching existing IDs')
        r = cartosql.getFields(id_field,
                               table,
                               f='csv',
                               user=os.getenv('CARTO_USER'),
                               key=os.getenv('CARTO_KEY'))
        return r.text.split('\r\n')[1:-1]
    else:
        logging.info('Table {} does not exist, creating'.format(table))
        cartosql.createTable(table,
                             schema,
                             user=os.getenv('CARTO_USER'),
                             key=os.getenv('CARTO_KEY'))
        cartosql.createIndex(table,
                             id_field,
                             unique=True,
                             user=os.getenv('CARTO_USER'),
                             key=os.getenv('CARTO_KEY'))
        cartosql.createIndex(table,
                             time_field,
                             user=os.getenv('CARTO_USER'),
                             key=os.getenv('CARTO_KEY'))
    return []
Beispiel #5
0
def processData():
    '''
    Function to download data and upload it to Carto
    Will first try to get the data for today three times
    Then decrease a day up until 8 tries until it finds one
    '''
    date = datetime.date.today() - datetime.timedelta(days=1)
    success = False
    tries = 0
    while tries < MAX_TRIES and success == False:
        logging.info("Fetching data for {}".format(str(date)))
        f = getFilename(date)
        url = SOURCE_URL.format(date=date.strftime('%Y%m%d'))
        try:
            urllib.request.urlretrieve(url, f)

        except Exception as inst:
            logging.info("Error fetching data for {}".format(str(date)))
            if tries >= 2:
                date = date - datetime.timedelta(days=1)
            tries = tries + 1
            if tries == MAX_TRIES:
                logging.error(
                    "Error fetching data for {}, and max tries reached. See source for last data update."
                    .format(str(datetime.date.today())))
            success = False
        else:
            df = pd.read_csv(f,
                             header=0,
                             usecols=[
                                 'Lat_DNB', 'Lon_DNB', 'Date_Mscan',
                                 'Date_LTZ', 'QF_Detect', 'EEZ', 'Land_Mask'
                             ])
            df = df.drop(df[df.QF_Detect == 999999].index)
            df['the_geom'] = df.apply(
                lambda row: getGeom(row['Lon_DNB'], row['Lat_DNB']), axis=1)

            df = df[[
                'the_geom', 'QF_Detect', 'Date_Mscan', 'Date_LTZ', 'Land_Mask',
                'Lon_DNB', 'Lat_DNB', 'EEZ'
            ]]
            if not cartosql.tableExists(CARTO_TABLE):
                logging.info('Table {} does not exist'.format(CARTO_TABLE))
                cartosql.createTable(CARTO_TABLE, CARTO_SCHEMA)
            else:
                cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL')
                cartosql.createTable(CARTO_TABLE, CARTO_SCHEMA)

                rows = df.values.tolist()
                logging.info('Success!')
                #logging.info('The following includes the first ten rows added to Carto:')
                #logging.info(rows[:10])
                if len(rows):
                    cartosql.blockInsertRows(CARTO_TABLE, CARTO_SCHEMA.keys(),
                                             CARTO_SCHEMA.values(), rows)
            tries = tries + 1
            success = True
Beispiel #6
0
def checkCreateTable(table, schema, id_field, time_field):
    '''Get existing ids or create table'''
    if cartosql.tableExists(table):
        logging.info('Fetching existing IDs')
        return getFieldAsList(id_field, table)
    else:
        logging.info('Table {} does not exist, creating'.format(table))
        cartosql.createTable(table, schema)
        cartosql.createIndex(table, id_field, unique=True)
        cartosql.createIndex(table, time_field)
    return []
Beispiel #7
0
def checkCreateTable(table, schema, id_field, time_field):
    '''
    Get existing ids or create table
    Return a list of existing ids in time order
    '''
    if cartosql.tableExists(table):
        logging.info('Table {} already exists'.format(table))
    else:
        logging.info('Creating Table {}'.format(table))
        cartosql.createTable(table, schema)
        cartosql.createIndex(table, id_field, unique=True)
        if id_field != time_field:
            cartosql.createIndex(table, time_field)
Beispiel #8
0
def checkCreateTable(table, schema, id_field, time_field=''):
    '''Get existing ids or create table'''
    if cartosql.tableExists(table):
        logging.info('Fetching existing IDs')
        r = cartosql.getFields(id_field, table, f='csv', post=True)
        return r.text.split('\r\n')[1:-1]
    else:
        logging.info('Table {} does not exist, creating'.format(table))
        cartosql.createTable(table, schema)
        cartosql.createIndex(table, id_field, unique=True)
        if time_field:
            cartosql.createIndex(table, time_field)
    return []
Beispiel #9
0
def createTableWithIndex(table, schema, id_field, time_field=''):
    '''Get existing ids or create table'''
    cartosql.createTable(table,
                         schema,
                         user=os.getenv('CARTO_USER'),
                         key=os.getenv('CARTO_KEY'))
    cartosql.createIndex(table,
                         id_field,
                         unique=True,
                         user=os.getenv('CARTO_USER'),
                         key=os.getenv('CARTO_KEY'))
    if time_field:
        cartosql.createIndex(table,
                             time_field,
                             user=os.getenv('CARTO_USER'),
                             key=os.getenv('CARTO_KEY'))
Beispiel #10
0
def processData():
    '''
    Function to download data and upload it to Carto
    Will first try to get the data for MAX_TRIES then quits
    '''
    success = False
    tries = 0
    df = None
    while tries < MAX_TRIES and success == False:
        logging.info('Try running feeds, try number = {}'.format(tries))
        try:
            df = feeds()
            success = True
        except Exception as inst:
            logging.info(inst)
            logging.info("Error fetching data trying again")
            tries = tries + 1
            if tries == MAX_TRIES:
                logging.error(
                    "Error fetching data, and max tries reached. See source for last data update."
                )
            success = False
    if success == True:
        if not cartosql.tableExists(CARTO_TABLE,
                                    user=os.getenv('CARTO_USER'),
                                    key=os.getenv('CARTO_KEY')):
            logging.info('Table {} does not exist'.format(CARTO_TABLE))
            cartosql.createTable(CARTO_TABLE, CARTO_SCHEMA)
            # Send dataframe to Carto
            logging.info('Writing to Carto')
            cc = cartoframes.CartoContext(
                base_url="https://{user}.carto.com/".format(user=CARTO_USER),
                api_key=CARTO_KEY)
            cc.write(df, CARTO_TABLE, overwrite=True, privacy='public')
        else:
            cartosql.deleteRows(CARTO_TABLE,
                                'cartodb_id IS NOT NULL',
                                user=os.getenv('CARTO_USER'),
                                key=os.getenv('CARTO_KEY'))
            # Send dataframe to Carto
            logging.info('Writing to Carto')
            cc = cartoframes.CartoContext(
                base_url="https://{user}.carto.com/".format(user=CARTO_USER),
                api_key=CARTO_KEY)
            cc.write(df, CARTO_TABLE, overwrite=True, privacy='public')
Beispiel #11
0
def createTableWithIndex(table, schema, id_field, time_field=''):
    '''Get existing ids or create table'''
    cartosql.createTable(table, schema)
    cartosql.createIndex(table, id_field, unique=True)
    if time_field:
        cartosql.createIndex(table, time_field)
Beispiel #12
0
def createTableWithIndices(table, schema, idField, timeField):
    '''Get existing ids or create table'''
    cartosql.createTable(table, schema)
    cartosql.createIndex(table, idField, unique=True)
    if timeField != idField:
        cartosql.createIndex(table, timeField, unique=False)
Beispiel #13
0
# UPLOAD
# specify column names and types
CARTO_SCHEMA = {
    'iso3': 'text',
    'country': 'text',
    'year': 'numeric',
    'vulnerability': 'numeric',
    'readiness': 'numeric',
    'gain': 'numeric'
}

# check if table exists
if cartosql.tableExists(CARTO_TABLE,
                        user=os.getenv('CARTO_WRI_RW_USER'),
                        key=os.getenv('CARTO_WRI_RW_KEY')):
    print('This table already exists. Please change the name and try again.')
else:
    # create table with appropriate columns
    cartosql.createTable(CARTO_TABLE,
                         CARTO_SCHEMA,
                         user=os.getenv('CARTO_WRI_RW_USER'),
                         key=os.getenv('CARTO_WRI_RW_KEY'))
    # send processed data to table
    cartosql.blockInsertRows(CARTO_TABLE,
                             CARTO_SCHEMA.keys(),
                             CARTO_SCHEMA.values(),
                             final_df.values.tolist(),
                             user=os.getenv('CARTO_WRI_RW_USER'),
                             key=os.getenv('CARTO_WRI_RW_KEY'))