def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_TABLE): cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) ### 1. Check if table exists and create table checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD) num_existing = len(existing_ids) ### 2. Fetch data from FTP, dedupe, process num_new = processData(existing_ids) num_total = num_existing + num_new ### 3. Notify results logging.info('Total rows: {}, New rows: {}, Max: {}'.format( num_total, num_new, MAX_ROWS)) deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD) # Get most recent update date #if new rows were added to table, make today the most recent update date if num_new > 0: most_recent_date = get_most_recent_date(CARTO_TABLE) lastUpdateDate(DATASET_ID, most_recent_date) logging.info("SUCCESS")
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_TABLE): logging.info("Clearing table") cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) # 1. Check if table exists and create table existing_ids = [] if cartosql.tableExists(CARTO_TABLE): existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD) else: createTableWithIndices(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) # 2. Iterively fetch, parse and post new data num_new = processNewData(existing_ids) existing_count = num_new + len(existing_ids) logging.info('Total rows: {}, New: {}, Max: {}'.format(existing_count, num_new, MAXROWS)) # 3. Remove old observations deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD, MAXAGE) # Get most recent update date most_recent_date = get_most_recent_date(CARTO_TABLE) lastUpdateDate(DATASET_ID, most_recent_date) ### logging.info('SUCCESS')
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_TABLE): cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL') # 1. Check if table exists and create table existing_ids = [] if cartosql.tableExists(CARTO_TABLE): logging.info('Fetching existing ids') existing_ids = getIds(CARTO_TABLE, UID_FIELD) else: logging.info('Table {} does not exist, creating'.format(CARTO_TABLE)) createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) # 2. Iterively fetch, parse and post new data new_ids = processNewData(existing_ids) new_count = len(new_ids) existing_count = new_count + len(existing_ids) logging.info('Total rows: {}, New: {}, Max: {}'.format( existing_count, new_count, MAXROWS)) # 3. Remove old observations deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD) # Get most recent update date most_recent_date = get_most_recent_date(CARTO_TABLE) logging.info(most_recent_date) lastUpdateDate(DATASET_ID, most_recent_date) logging.info('SUCCESS')
def delete_carto_entries(id_list, column): ''' Delete entries in Carto table based on values in a specified column INPUT id_list: list of column values for which you want to delete entries in table (list of strings) column: column name where you should search for these values (string) ''' # generate empty variable to store WHERE clause of SQL query we will send where = None # go through each ID in the list to be deleted for delete_id in id_list: # if we already have values in the SQL query, add the new value with an OR before it if where: where += f' OR {column} = {delete_id}' # if the SQL query is empty, create the start of the WHERE clause else: where = f'{column} = {delete_id}' # if where statement is long or we are on the last id, delete rows # the length of 15000 was chosen arbitrarily - all the IDs to be deleted could not be sent at once, but no # testing was done to optimize this value if len(where) > 15000 or delete_id == id_list[-1]: cartosql.deleteRows(CARTO_TABLE, where=where, user=CARTO_USER, key=CARTO_KEY) # after we have deleted a set of rows, start over with a blank WHERE clause for the SQL query so we don't # try to delete rows we have already deleted where = None
def main(): logging.basicConfig(stream=sys.stderr, level=logging.INFO) logging.info('STARTING') # clear the table before starting, if specified if CLEAR_TABLE_FIRST: logging.info('Clearing Table') # if the table exists if cartosql.tableExists(CARTO_TABLE, user=CARTO_USER, key=CARTO_KEY): # delete all the rows cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=CARTO_USER, key=CARTO_KEY) # note: we do not delete the entire table because this will cause the dataset visualization on Resource Watch # to disappear until we log into Carto and open the table again. If we simply delete all the rows, this # problem does not occur # Check if table exists, create it if it does not logging.info('Checking if table exists and getting existing IDs.') existing_ids = checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD) # Fetch, process, and upload the new data logging.info('Fetching new data') num_new = processData(existing_ids) logging.info('Previous rows: {}, New rows: {}'.format(len(existing_ids), num_new)) # Update Resource Watch updateResourceWatch(num_new) # Delete local files in Docker container delete_local() logging.info('SUCCESS')
def processData(): ''' Function to download data and upload it to Carto Will first try to get the data for today three times Then decrease a day up until 8 tries until it finds one ''' date = datetime.date.today() - datetime.timedelta(days=1) success = False tries = 0 while tries < MAX_TRIES and success == False: logging.info("Fetching data for {}".format(str(date))) f = getFilename(date) url = SOURCE_URL.format(date=date.strftime('%Y%m%d')) try: urllib.request.urlretrieve(url, f) except Exception as inst: logging.info("Error fetching data for {}".format(str(date))) if tries >= 2: date = date - datetime.timedelta(days=1) tries = tries + 1 if tries == MAX_TRIES: logging.error( "Error fetching data for {}, and max tries reached. See source for last data update." .format(str(datetime.date.today()))) success = False else: df = pd.read_csv(f, header=0, usecols=[ 'Lat_DNB', 'Lon_DNB', 'Date_Mscan', 'Date_LTZ', 'QF_Detect', 'EEZ', 'Land_Mask' ]) df = df.drop(df[df.QF_Detect == 999999].index) df['the_geom'] = df.apply( lambda row: getGeom(row['Lon_DNB'], row['Lat_DNB']), axis=1) df = df[[ 'the_geom', 'QF_Detect', 'Date_Mscan', 'Date_LTZ', 'Land_Mask', 'Lon_DNB', 'Lat_DNB', 'EEZ' ]] if not cartosql.tableExists(CARTO_TABLE): logging.info('Table {} does not exist'.format(CARTO_TABLE)) cartosql.createTable(CARTO_TABLE, CARTO_SCHEMA) else: cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL') cartosql.createTable(CARTO_TABLE, CARTO_SCHEMA) rows = df.values.tolist() logging.info('Success!') #logging.info('The following includes the first ten rows added to Carto:') #logging.info(rows[:10]) if len(rows): cartosql.blockInsertRows(CARTO_TABLE, CARTO_SCHEMA.keys(), CARTO_SCHEMA.values(), rows) tries = tries + 1 success = True
def cleanOldRows(table, time_field, max_age, date_format='%Y-%m-%d %H:%M:%S'): ''' Delete rows that are older than a certain threshold INPUT table: name of table in Carto from which we will delete the old data (string) time_field: column that stores datetime information (string) max_age: oldest date that can be stored in the Carto table (datetime object) date_format: format of dates in Carto table (string) RETURN num_expired: number of rows that have been dropped from the table (integer) ''' # initialize number of rows that will be dropped as 0 num_expired = 0 # if the table exists if cartosql.tableExists(table, CARTO_USER, CARTO_KEY): # check if max_age variable is a datetime object if isinstance(max_age, datetime.datetime): # convert datetime object to string formatted according to date_format max_age = max_age.strftime(date_format) elif isinstance(max_age, str): # raise an error if max_age is a string logging.error( 'Max age must be expressed as a datetime.datetime object') # delete rows from table which are older than the max_age r = cartosql.deleteRows(table, "{} < '{}'".format(time_field, max_age), CARTO_USER, CARTO_KEY) # get the number of rows that were dropped from the table num_expired = r.json()['total_rows'] else: # raise an error if the table doesn't exist logging.error("{} table does not exist yet".format(table)) return (num_expired)
def deleteExcessRows(table, max_rows, time_field, max_age=''): '''Delete excess rows by age or count''' num_dropped = 0 if isinstance(max_age, datetime.datetime): max_age = max_age.isoformat() # 1. delete by age if max_age: r = cartosql.deleteRows(table, "{} < '{}'".format(time_field, max_age)) num_dropped = r.json()['total_rows'] # 2. get sorted ids (old->new) r = cartosql.getFields('cartodb_id', table, order='{}'.format(time_field), f='csv') ids = r.text.split('\r\n')[1:-1] # 3. delete excess if len(ids) > max_rows: r = cartosql.deleteRowsByIDs(table, ids[:-max_rows]) num_dropped += r.json()['total_rows'] if num_dropped: logging.info('Dropped {} old rows from {}'.format(num_dropped, table)) return num_dropped
def processData(): ''' Function to download data and upload it to Carto Will first try to get the data for MAX_TRIES then quits ''' success = False tries = 0 df = None while tries < MAX_TRIES and success == False: logging.info('Try running feeds, try number = {}'.format(tries)) try: df = feeds() success = True except Exception as inst: logging.info(inst) logging.info("Error fetching data trying again") tries = tries + 1 if tries == MAX_TRIES: logging.error( "Error fetching data, and max tries reached. See source for last data update." ) success = False if success == True: if not cartosql.tableExists(CARTO_TABLE, user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')): logging.info('Table {} does not exist'.format(CARTO_TABLE)) cartosql.createTable(CARTO_TABLE, CARTO_SCHEMA) # Send dataframe to Carto logging.info('Writing to Carto') cc = cartoframes.CartoContext( base_url="https://{user}.carto.com/".format(user=CARTO_USER), api_key=CARTO_KEY) cc.write(df, CARTO_TABLE, overwrite=True, privacy='public') else: cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) # Send dataframe to Carto logging.info('Writing to Carto') cc = cartoframes.CartoContext( base_url="https://{user}.carto.com/".format(user=CARTO_USER), api_key=CARTO_KEY) cc.write(df, CARTO_TABLE, overwrite=True, privacy='public')
def main(): logging.basicConfig(stream=sys.stderr, level=logging.INFO) logging.info('STARTING') # clear the table before starting, if specified if CLEAR_TABLE_FIRST: logging.info("clearing table") # if the table exists if cartosql.tableExists(CARTO_TABLE, user=CARTO_USER, key=CARTO_KEY): # delete all the rows cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=CARTO_USER, key=CARTO_KEY) # note: we do not delete the entire table because this will cause the dataset visualization on Resource Watch # to disappear until we log into Carto and open the table again. If we simply delete all the rows, this # problem does not occur # Check if table exists, create it if it does not logging.info('Checking if table exists and getting existing IDs.') existing_ids = checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) # Delete rows that are older than a certain threshold num_expired = cleanOldRows(CARTO_TABLE, TIME_FIELD, MAX_AGE) # Get the filename from source url for which we want to download data filename = fetchDataFileName(SOURCE_URL) # Fetch, process, and upload new data logging.info('Fetching new data') num_new = processData(SOURCE_URL, filename, existing_ids) logging.info('Previous rows: {}, New rows: {}'.format( len(existing_ids), num_new)) # Delete data to get back to MAX_ROWS num_deleted = deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD) # Update Resource Watch updateResourceWatch(num_new) logging.info("SUCCESS")
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_TABLE): cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) ### 1. Check if table exists, if not, create it checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) ### 2. Retrieve existing data r = cartosql.getFields(UID_FIELD, CARTO_TABLE, order='{} desc'.format(TIME_FIELD), f='csv') existing_ids = r.text.split('\r\n')[1:-1] num_existing = len(existing_ids) ### 3. Fetch data from FTP, dedupe, process num_new = processData(existing_ids) ### 4. Delete data to get back to MAX_ROWS num_dropped = deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD, MAX_AGE) ### 5. Notify results total = num_existing + num_new - num_dropped # Get most recent update date most_recent_date = get_most_recent_date(CARTO_TABLE) lastUpdateDate(DATASET_ID, most_recent_date) logging.info('Existing rows: {}, New rows: {}, Max: {}'.format( total, num_new, MAX_ROWS)) logging.info("SUCCESS")
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_TABLE): cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) ### 1. Check if table exists, if not, create it logging.info('Checking if table exists and getting existing IDs.') existing_ids = checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD) num_existing = len(existing_ids) ### 2. Fetch data from FTP, dedupe, process logging.info('Fetching new data') num_new = processData(existing_ids) logging.info('Processing interactions') processInteractions() ### 3. Delete data to get back to MAX_ROWS logging.info('Deleting excess rows') num_dropped = deleteExcessRows(CARTO_TABLE, MAX_ROWS, AGE_FIELD) ### 4. Notify results total = num_existing + num_new - num_dropped # If updates, change update date on RW if num_new > 0: lastUpdateDate(DATASET_ID, datetime.datetime.utcnow()) logging.info('Existing rows: {}, New rows: {}, Max: {}'.format( total, num_new, MAX_ROWS)) logging.info("SUCCESS")
def cleanOldRows(table, time_field, max_age, date_format='%Y-%m-%d %H:%M:%S'): ''' Delete excess rows by age Max_Age should be a datetime object or string Return number of dropped rows ''' num_expired = 0 if cartosql.tableExists(table): if isinstance(max_age, datetime.datetime): max_age = max_age.strftime(date_format) elif isinstance(max_age, str): logging.error('Max age must be expressed as a datetime.datetime object') r = cartosql.deleteRows(table, "{} < '{}'".format(time_field, max_age)) num_expired = r.json()['total_rows'] else: logging.error("{} table does not exist yet".format(table)) return(num_expired)
def deleteExcessRows(table, max_rows, time_field, max_age=''): '''Delete excess rows by age or count''' num_dropped = 0 if isinstance(max_age, datetime.datetime): max_age = max_age.isoformat() # 1. delete by age if max_age: r = cartosql.deleteRows(table, "{} < '{}'".format(time_field, max_age)) num_dropped = r.json()['total_rows'] # 2. get sorted ids (old->new) ids = getFieldAsList(CARTO_TABLE, 'cartodb_id', orderBy=''.format(TIME_FIELD)) # 3. delete excess if len(ids) > max_rows: r = cartosql.deleteRowsByIDs(table, ids[:-max_rows]) num_dropped += r.json()['total_rows'] if num_dropped: logging.info('Dropped {} old rows from {}'.format(num_dropped, table))
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_MARKET_TABLE): cartosql.deleteRows(CARTO_MARKET_TABLE, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) if cartosql.tableExists(CARTO_ALPS_TABLE): cartosql.deleteRows(CARTO_ALPS_TABLE, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) if cartosql.tableExists(CARTO_INTERACTION_TABLE): cartosql.deleteRows(CARTO_INTERACTION_TABLE, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) # 1. Check if table exists and create table existing_markets = [] if cartosql.tableExists(CARTO_MARKET_TABLE, user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')): logging.info('Fetching existing market ids') existing_markets = getIds(CARTO_MARKET_TABLE, UID_FIELD) else: logging.info( 'Table {} does not exist, creating'.format(CARTO_MARKET_TABLE)) createTableWithIndex(CARTO_MARKET_TABLE, CARTO_MARKET_SCHEMA, UID_FIELD) existing_alps = [] if cartosql.tableExists(CARTO_ALPS_TABLE, user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')): logging.info('Fetching existing ALPS ids') existing_alps = getIds(CARTO_ALPS_TABLE, UID_FIELD) else: logging.info( 'Table {} does not exist, creating'.format(CARTO_ALPS_TABLE)) createTableWithIndex(CARTO_ALPS_TABLE, CARTO_ALPS_SCHEMA, UID_FIELD, TIME_FIELD) existing_interactions = [] if cartosql.tableExists(CARTO_INTERACTION_TABLE, user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')): logging.info('Fetching existing interaction ids') existing_interactions = getIds(CARTO_INTERACTION_TABLE, UID_FIELD) else: logging.info('Table {} does not exist, creating'.format( CARTO_INTERACTION_TABLE)) createTableWithIndex(CARTO_INTERACTION_TABLE, CARTO_INTERACTION_SCHEMA, UID_FIELD, INTERACTION_TIME_FIELD) # 2. Iterively fetch, parse and post new data num_new_markets, num_new_alps, markets_updated = processNewData( existing_markets, existing_alps) # Update Interaction table num_new_interactions = processInteractions(markets_updated) # Report new data count num_existing_markets = num_new_markets + len(existing_markets) logging.info('Total market rows: {}, New: {}, Max: {}'.format( num_existing_markets, num_new_markets, MAXROWS)) num_existing_alps = num_new_alps + len(existing_alps) logging.info('Total alps rows: {}, New: {}, Max: {}'.format( num_existing_alps, num_new_alps, MAXROWS)) num_existing_interactions = num_new_interactions + len( existing_interactions) logging.info('Total interaction rows: {}, New: {}, Max: {}'.format( num_existing_interactions, num_new_interactions, MAXROWS)) # 3. Remove old observations deleteExcessRows(CARTO_ALPS_TABLE, MAXROWS, TIME_FIELD) # MAXAGE) # Get most recent update date most_recent_date = get_most_recent_date(CARTO_ALPS_TABLE) lastUpdateDate(DATASET_ID, most_recent_date) logging.info('SUCCESS')
def processNewData(url): ''' Fetch, process and upload new data INPUT url: url where you can find the download link for the source data (string) RETURN num_new: number of rows of new data sent to Carto table (integer) ''' # specify the starting page of source url we want to pull page = 1 # generate the url and pull data for this page r = requests.get(url.format(page=page)) # pull data from request response json raw_data = r.json()['data'] # if data is available from source url if len(raw_data) > 0: # if the table exists if cartosql.tableExists(CARTO_TABLE, user=CARTO_USER, key=CARTO_KEY): # delete all the rows cartosql.deleteRows(CARTO_TABLE, 'cartodb_id IS NOT NULL', user=CARTO_USER, key=CARTO_KEY) logging.info('Updating {}'.format(CARTO_TABLE)) else: # raise an error that data is not available from source url logging.error("Source data missing. Table will not update.") # create an empty list to store new data new_data = [] # if data is available from source url while len(raw_data) > 0: logging.info('Processing page {}'.format(page)) # read in source data as a pandas dataframe df = pd.DataFrame(raw_data) # go through each rows in the dataframe for row_num in range(df.shape[0]): # get the row of data row = df.iloc[row_num] # create an empty list to store data from this row new_row = [] # go through each column in the Carto table for field in CARTO_SCHEMA: # if we are fetching data for unique id column if field == 'uid': # add the unique id to the list of data from this row new_row.append(row[UID_FIELD]) # for any other column, check if there are values available from the source for this row else: # if data available from source for this field, populate the field with the data # else populate with None val = row[field] if row[field] != '' else None # add this value to the list of data from this row new_row.append(val) # add the list of values from this row to the list of new data new_data.append(new_row) # go to the next page and check for data page += 1 # generate the url and pull data for this page r = requests.get(url.format(page=page)) # pull data from request response json raw_data = r.json()['data'] # find the length (number of rows) of new_data num_new = len(new_data) # if we have found new dates to process if num_new: # insert new data into the carto table cartosql.blockInsertRows(CARTO_TABLE, CARTO_SCHEMA.keys(), CARTO_SCHEMA.values(), new_data, user=CARTO_USER, key=CARTO_KEY) return num_new
def processInteractions(): r = cartosql.get( "SELECT * FROM {} WHERE current='True'".format(CARTO_TABLE), user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) interaction_data = r.json()['rows'] try_num = 0 #if we didn't get data back, wait a few minutes and try again while not len(interaction_data): logging.info('Sleeping and trying again.') try_num += 1 time.sleep(300) interaction_data = r.json()['rows'] if try_num > 5: logging.error('Problem fetching data to generate interactions') exit() countries_with_interaction = [] for interaction in interaction_data: ctry = interaction['country_iso3'] if ctry not in countries_with_interaction: countries_with_interaction.append(ctry) if cartosql.tableExists(CARTO_TABLE_INTERACTION, user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')): cartosql.deleteRows(CARTO_TABLE_INTERACTION, 'cartodb_id IS NOT NULL', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) #run to create new table #existing_interaction_ids = checkCreateTable(CARTO_TABLE_INTERACTION, CARTO_SCHEMA_INTERACTION, UID_FIELD) new_interactions = [] for ctry in countries_with_interaction: r = cartosql.get( "SELECT * FROM {} WHERE current='True' AND country_iso3='{}'". format(CARTO_TABLE, ctry), user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) ctry_interaction_data = r.json()['rows'] event_num = 1 for interaction in ctry_interaction_data: event = interaction['event_name'].split(": ", 1) if event_num == 1: if len(event) == 1: interaction_str = '{} ({})'.format(event[0], interaction['url']) else: interaction_str = '{} ({})'.format(event[1], interaction['url']) else: if len(event) == 1: interaction_str = interaction_str + '; ' + '{} ({})'.format( event[0], interaction['url']) else: interaction_str = interaction_str + '; ' + '{} ({})'.format( event[1], interaction['url']) event_num += 1 #uid = gen_interaction_uid(ctry) if ctry_interaction_data: row = [] for key in CARTO_SCHEMA_INTERACTION.keys(): try: if key == 'the_geom': lon = ctry_interaction_data[0]['lon'] lat = ctry_interaction_data[0]['lat'] item = {'type': 'Point', 'coordinates': [lon, lat]} elif key == 'interaction': item = interaction_str else: item = ctry_interaction_data[0][key] except KeyError: item = None row.append(item) new_interactions.append(row) logging.info('Adding {} new interactions'.format(len(new_interactions))) cartosql.blockInsertRows(CARTO_TABLE_INTERACTION, CARTO_SCHEMA_INTERACTION.keys(), CARTO_SCHEMA_INTERACTION.values(), new_interactions, user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY'))
('year', 'numeric')]) # Go through each type of "value" in this table # Add data column, unit, and indicator code to CARTO_SCHEMA, column_order, and dataset valnames = info['Carto Column'].split(";") for i in range(len(valnames)): CARTO_SCHEMA.update({valnames[i]: 'numeric'}) # add the unit column name and type for this value to the Carto Schema CARTO_SCHEMA.update({'unit' + str(i + 1): 'text'}) # add the WB Indicator Code column name and type for this value to the Carto Schema CARTO_SCHEMA.update({'indicator_code' + str(i + 1): 'text'}) # add the RW country name and country code columns to the table CARTO_SCHEMA.update({"rw_country_name": 'text'}) CARTO_SCHEMA.update({"rw_country_code": 'text'}) cartosql.deleteRows(table_name, 'cartodb_id IS NOT NULL', user=CARTO_USER, key=CARTO_KEY) # Insert new observations if len(all_world_bank_data): cartosql.blockInsertRows(table_name, CARTO_SCHEMA.keys(), CARTO_SCHEMA.values(), all_world_bank_data.values.tolist(), user=CARTO_USER, key=CARTO_KEY) logging.info('Success! New rows have been added to Carto.') else: logging.info('No rows to add to Carto.') ''' Upload original data and processed data to Amazon S3 storage
def processInteractions(markets_updated): num_new_interactions = 0 #new_rows = [] if PROCESS_HISTORY_INTERACTIONS == True: # get all markets logging.info('Processing interactions for all ALPS data') markets_to_process = getIds(CARTO_MARKET_TABLE, 'uid') else: logging.info('Getting IDs of interactions that should be updated') r = cartosql.getFields( ['region_id', 'market_id', 'market_name'], CARTO_INTERACTION_TABLE, where="{} < current_date - interval '{}' month".format( INTERACTION_TIME_FIELD, LOOKBACK), f='csv', user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) old_ids = [market.split(',') for market in r.text.split('\r\n')[1:-1]] old_market_uids = [ genMarketUID(old_id[0], old_id[1], old_id[2]) for old_id in old_ids ] logging.info( 'Processing interactions for new ALPS data and re-processing interactions that are out of date' ) markets_to_process = np.unique(markets_updated + old_market_uids) #go through each market that was updated and create the correct rows for them num_markets = len(markets_to_process) market_num = 1 logging.info('{} markets to update interactions for'.format( len(markets_to_process))) for m_uid in markets_to_process: logging.info('processing {} out of {} markets'.format( market_num, len(markets_to_process))) new_rows = [] for food_category, sql_query in CATEGORIES.items(): try_num = 1 while try_num <= 3: try: #logging.info('Processing interaction for {} at uid {}, try number {} (market {} of {})'.format(food_category, m_uid, try_num, market_num, num_markets)) # get information about market r = cartosql.get("SELECT * FROM {} WHERE uid='{}'".format( CARTO_MARKET_TABLE, m_uid), user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) if r.json()['total_rows'] == 0: #logging.info('No rows for interaction') alps_entries = [] break market_entry = r.json()['rows'][0] # get information about food prices at market # SQL gets most recent entry for each commodity at each market that is NOT a forecast request = "SELECT DISTINCT ON (mktid, cmname) * FROM {table} WHERE mktid={market_id} AND mktname='{market_name}' AND adm1id={region_id} AND category LIKE '{cat_name}' AND date > current_date - interval '{x}' month AND forecast = 'False' ORDER BY mktid, cmname, date desc".format( table=CARTO_ALPS_TABLE, market_id=market_entry['market_id'], market_name=market_entry['market_name'].replace( "'", "''"), region_id=market_entry['region_id'], cat_name=sql_query, x=LOOKBACK) r = cartosql.get(request, user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) alps_entries = r.json()['rows'] break except: try_num += 1 uid = genInteractionUID(market_entry['region_id'], market_entry['market_id'], market_entry['market_name'], food_category) if alps_entries: commodity_num = 1 for entry in alps_entries: if commodity_num == 1: interaction_string = INTERACTION_STRING_FORMAT.format( num=commodity_num, commodity=entry['cmname'], alps=entry['alps'].lower(), date=entry['date'][:10]) else: interaction_string = interaction_string + '; ' + INTERACTION_STRING_FORMAT.format( num=commodity_num, commodity=entry['cmname'], alps=entry['alps'].lower(), date=entry['date'][:10]) commodity_num += 1 # create new Carto row row = [] for field in CARTO_INTERACTION_SCHEMA.keys(): if field == 'uid': row.append(uid) elif field == 'market_id': row.append(int(market_entry['market_id'])) elif field == 'the_geom': shapely_point = wkb.loads(market_entry['the_geom'], hex=True) json_point = json.loads( json.dumps( shapely.geometry.mapping(shapely_point))) row.append(json_point) elif field == 'region_name': row.append(market_entry['region_name']) elif field == 'region_id': row.append(market_entry['region_id']) elif field == 'market_name': row.append(market_entry['market_name']) elif field == 'market_interaction': if len(alps_entries) == 0: row.append(None) else: row.append(interaction_string) elif field == 'category': row.append(food_category) elif field == 'highest_pewi': if len(alps_entries) == 0: row.append(None) else: highest_pewi = max( [entry['pewi'] for entry in alps_entries]) row.append(highest_pewi) elif field == 'highest_alps': if len(alps_entries) == 0: row.append(None) else: highest_alps_category = assignALPS(highest_pewi) row.append(highest_alps_category) elif field == INTERACTION_TIME_FIELD: if len(alps_entries) == 0: row.append(None) else: row.append( min(entry['date'] for entry in alps_entries)) new_rows.append(row) num_new_interactions += 1 #delete old entries for the markets that were updated #logging.info('Deleting old interactions from Carto') try: cartosql.deleteRows(CARTO_INTERACTION_TABLE, "{} = '{}'".format(UID_FIELD, uid), user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) except: pass #cartosql.deleteRowsByIDs(CARTO_INTERACTION_TABLE, markets_to_process, id_field='market_id') #send new rows for these markets #logging.info('Sending new interactions to Carto') if new_rows: cartosql.insertRows(CARTO_INTERACTION_TABLE, CARTO_INTERACTION_SCHEMA.keys(), CARTO_INTERACTION_SCHEMA.values(), new_rows, blocksize=500, user=os.getenv('CARTO_USER'), key=os.getenv('CARTO_KEY')) market_num += 1 return num_new_interactions