def main(): logging.basicConfig(stream=sys.stderr, level=logging.INFO) if CLEAR_TABLE_FIRST: cartosql.dropTable(CARTO_TABLE) ### 1. Check if table exists, if not, create it checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) ### 2. Delete old rows num_expired = cleanOldRows(CARTO_TABLE, TIME_FIELD, MAX_AGE) ### 3. Retrieve existing data r = cartosql.getFields(UID_FIELD, CARTO_TABLE, order='{} desc'.format(TIME_FIELD), f='csv') existing_ids = r.text.split('\r\n')[1:-1] num_existing = len(existing_ids) logging.debug("First 10 IDs already in table: {}".format( existing_ids[:10])) ### 4. Fetch data from FTP, dedupe, process filename = fetchDataFileName(SOURCE_URL) num_new = processData(SOURCE_URL, filename, existing_ids) ### 5. Delete data to get back to MAX_ROWS num_deleted = deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD) ### 6. Notify results logging.info( 'Expired rows: {}, Previous rows: {}, New rows: {}, Dropped rows: {}, Max: {}' .format(num_expired, num_existing, num_new, num_deleted, MAX_ROWS)) logging.info("SUCCESS")
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_TABLE): logging.info("Clearing table") cartosql.dropTable(CARTO_TABLE) # 1. Check if table exists and create table existing_ids = [] if cartosql.tableExists(CARTO_TABLE): existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD) else: createTableWithIndices(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) # 2. Iterively fetch, parse and post new data num_new = processNewData(existing_ids) existing_count = num_new + len(existing_ids) logging.info('Total rows: {}, New: {}, Max: {}'.format( existing_count, num_new, MAXROWS)) # 3. Remove old observations deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD, MAXAGE) ### logging.info('SUCCESS')
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_TABLE): cartosql.dropTable(CARTO_TABLE) # 1. Check if table exists and create table existing_ids = [] if cartosql.tableExists(CARTO_TABLE): logging.info('Fetching existing ids') existing_ids = getIds(CARTO_TABLE, UID_FIELD) else: logging.info('Table {} does not exist, creating'.format(CARTO_TABLE)) createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) # 2. Iterively fetch, parse and post new data new_ids = processNewData(existing_ids) new_count = len(new_ids) existing_count = new_count + len(existing_ids) logging.info('Total rows: {}, New: {}, Max: {}'.format( existing_count, new_count, MAXROWS)) # 3. Remove old observations deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD) # MAXAGE) logging.info('SUCCESS')
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_TABLE): cartosql.dropTable(CARTO_TABLE) ### 1. Check if table exists, if not, create it checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) ### 2. Retrieve existing data r = cartosql.getFields(UID_FIELD, CARTO_TABLE, order='{} desc'.format(TIME_FIELD), f='csv') existing_ids = r.text.split('\r\n')[1:-1] num_existing = len(existing_ids) ### 3. Fetch data from FTP, dedupe, process num_new = processData(existing_ids) ### 4. Delete data to get back to MAX_ROWS num_dropped = deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD, MAX_AGE) ### 5. Notify results total = num_existing + num_new - num_dropped # Get most recent update date most_recent_date = get_most_recent_date(CARTO_TABLE) lastUpdateDate(DATASET_ID, most_recent_date) logging.info('Existing rows: {}, New rows: {}, Max: {}'.format(total, num_new, MAX_ROWS)) logging.info("SUCCESS")
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_TABLE): cartosql.dropTable(CARTO_TABLE) ### 1. Check if table exists and create table checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD) num_existing = len(existing_ids) ### 2. Fetch data from FTP, dedupe, process num_new = processData(existing_ids) num_total = num_existing + num_new ### 3. Notify results logging.info('Total rows: {}, New rows: {}, Max: {}'.format(num_total, num_new, MAX_ROWS)) deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD) # Get most recent update date #if new rows were added to table, make today the most recent update date if num_new > 0: most_recent_date = datetime.datetime.utcnow() lastUpdateDate(DATASET_ID, most_recent_date) logging.info("SUCCESS")
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') if CLEAR_TABLE_FIRST: logging.info('Clearing table') cartosql.dropTable(CARTO_TABLE) # 1. Check if table exists and create table existing_ids = [] if cartosql.tableExists(CARTO_TABLE): logging.info('Fetching existing ids') existing_ids = getIds(CARTO_TABLE, UID_FIELD) else: logging.info('Table {} does not exist, creating'.format(CARTO_TABLE)) createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) # 2. Iterively fetch, parse and post new data num_new = processNewData(existing_ids) existing_count = num_new + len(existing_ids) logging.info('Total rows: {}, New: {}, Max: {}'.format( existing_count, num_new, MAXROWS)) # 3. Remove old observations deleteExcessRows(CARTO_TABLE, MAXROWS, TIME_FIELD, datetime.datetime(year=MAXAGE, month=1, day=1)) # Get most recent update date most_recent_date = get_most_recent_date(CARTO_TABLE) lastUpdateDate(DATASET_ID, most_recent_date) logging.info('SUCCESS')
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') if CLEAR_TABLE_FIRST: logging.info('Clearing explore table') cartosql.dropTable(CARTO_TABLE_EXPLORE) cartosql.dropTable(CARTO_TABLE_PLANETPULSE) # 1. Check if table exists and create table ## EXPLORE TABLE existing_explore_ids = [] if cartosql.tableExists(CARTO_TABLE_EXPLORE): logging.info('Fetching existing explore ids') existing_explore_ids = getIds(CARTO_TABLE_EXPLORE, UID_FIELD) else: logging.info( 'Table {} does not exist, creating'.format(CARTO_TABLE_EXPLORE)) createTableWithIndex(CARTO_TABLE_EXPLORE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) ## PLANET PULSE TABLE existing_pp_ids = [] if cartosql.tableExists(CARTO_TABLE_PLANETPULSE): logging.info('Fetching existing planet pulse ids') existing_pp_ids = getIds(CARTO_TABLE_PLANETPULSE, UID_FIELD) else: logging.info('Table {} does not exist, creating'.format( CARTO_TABLE_PLANETPULSE)) createTableWithIndex(CARTO_TABLE_PLANETPULSE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) # 2. Iterively fetch, parse and post new data num_new_explore, num_new_pp = processNewData(existing_explore_ids, existing_pp_ids) existing_count_explore = num_new_explore + len(existing_explore_ids) existing_count_pp = num_new_pp + len(existing_pp_ids) logging.info('Total rows in Explore table: {}, New: {}, Max: {}'.format( existing_count_explore, num_new_explore, MAXROWS_EXPLORE)) logging.info( 'Total rows in Planet Pulse table: {}, New: {}, Max: {}'.format( existing_count_pp, num_new_pp, MAXROWS_PLANETPULSE)) # 3. Remove old observations deleteExcessRows(CARTO_TABLE_EXPLORE, MAXROWS_EXPLORE, TIME_FIELD, MAXAGE_EXPLORE) deleteExcessRows(CARTO_TABLE_PLANETPULSE, MAXROWS_PLANETPULSE, TIME_FIELD, MAXAGE_PLANETPULSE) logging.info('SUCCESS')
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_MARKET_TABLE): cartosql.dropTable(CARTO_MARKET_TABLE) if cartosql.tableExists(CARTO_ALPS_TABLE): cartosql.dropTable(CARTO_ALPS_TABLE) # 1. Check if table exists and create table existing_markets = [] if cartosql.tableExists(CARTO_MARKET_TABLE): logging.info('Fetching existing ids') existing_markets = getIds(CARTO_MARKET_TABLE, UID_FIELD) else: logging.info( 'Table {} does not exist, creating'.format(CARTO_MARKET_TABLE)) createTableWithIndex(CARTO_MARKET_TABLE, CARTO_MARKET_SCHEMA, UID_FIELD) existing_alps = [] if cartosql.tableExists(CARTO_ALPS_TABLE): logging.info('Fetching existing ids') existing_alps = getIds(CARTO_ALPS_TABLE, UID_FIELD) else: logging.info( 'Table {} does not exist, creating'.format(CARTO_ALPS_TABLE)) createTableWithIndex(CARTO_ALPS_TABLE, CARTO_ALPS_SCHEMA, UID_FIELD, TIME_FIELD) # 2. Iterively fetch, parse and post new data num_new_markets, num_new_alps = processNewData(existing_markets, existing_alps) existing_markets = num_new_markets + len(existing_markets) logging.info('Total market rows: {}, New: {}, Max: {}'.format( existing_markets, num_new_markets, MAXROWS)) existing_alps = num_new_alps + len(existing_alps) logging.info('Total alps rows: {}, New: {}, Max: {}'.format( existing_alps, num_new_alps, MAXROWS)) # 3. Remove old observations deleteExcessRows(CARTO_ALPS_TABLE, MAXROWS, TIME_FIELD) # MAXAGE) # Get most recent update date most_recent_date = get_most_recent_date(CARTO_ALPS_TABLE) lastUpdateDate(DATASET_ID, most_recent_date) logging.info('SUCCESS')
def test(): '''''' logging.basicConfig(level=logging.DEBUG) lyr = 'b4f1bd67-d0b7-4b53-a815-1638761ca70f' end_time = datetime.datetime.utcnow() start_time = end_time - datetime.timedelta(days=1) time_field = 'utc' table_name = 'cit_003a_air_quality_pm25' layerId, table = freezeLayer(lyr, start_time, end_time, time_field, table_name, True) print(('Created: ', layerId, table)) rw_api.Layer(layerId).delete() csql.dropTable(table) print(('Deleted: ', layerId, table))
def main(): logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) if CLEAR_TABLE_FIRST: if cartosql.tableExists(CARTO_TABLE): cartosql.dropTable(CARTO_TABLE) ### 1. Check if table exists and create table checkCreateTable(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD, TIME_FIELD) existing_ids = getFieldAsList(CARTO_TABLE, UID_FIELD) num_existing = len(existing_ids) ### 2. Fetch data from FTP, dedupe, process num_new = processData(existing_ids) num_total = num_existing + num_new ### 3. Notify results logging.info('Total rows: {}, New rows: {}, Max: {}'.format( num_total, num_new, MAX_ROWS)) deleteExcessRows(CARTO_TABLE, MAX_ROWS, TIME_FIELD) logging.info("SUCCESS")
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') if CLEAR_TABLE_FIRST: cartosql.dropTable(CARTO_TABLE) # 1. Check collection, create table if necessary existing_ids = [] if cartosql.tableExists(CARTO_TABLE): logging.info('Fetching existing ids') existing_ids = getIds(CARTO_TABLE, UID_FIELD) else: logging.info('Table {} does not exist, creating'.format(CARTO_TABLE)) createTableWithIndex(CARTO_TABLE, CARTO_SCHEMA, UID_FIELD) # 2. Ingest new data num_new_vectors = processNewVectorData(existing_ids) # 3. Report results existing_count = num_new_vectors + len(existing_ids) logging.info('Total rows: {}, New: {}, Max: {}'.format( existing_count, num_new_vectors, 'none'))
def processArgs(args): opts = {} if args['-u']: opts['user'] = args['-u'] if args['-k']: opts['key'] = args['-k'] f = args['-f'] if f: opts['f'] = f if args['--help'] or not cartosql.init(): return __doc__ if args['<sql>']: if args['post']: r = cartosql.post(args['<sql>'], **opts) return returnFormat(r, f) elif args['get']: r = cartosql.get(args['<sql>'], **opts) return returnFormat(r, f) elif args['select']: if args['<fields>'] and args['<table>']: if args['-w']: opts['where'] = args['-w'] if args['-o']: opts['order'] = args['-o'] if args['-l']: opts['limit'] = args['-l'] r = cartosql.getFields(args['<fields>'], args['<table>'], **opts) return returnFormat(r, f) elif args['ls']: r = cartosql.getTables(**opts) if f is None or f == 'csv': return prettyJson(r) return returnFormat(r, f) elif args['exists'] and args['<table>']: r = cartosql.tableExists(args['<table>'], **opts) return r elif args['drop'] and args['<table>']: confirm = args['--confirm'] if not confirm: confirm = input('Drop table {}? (y/N)'.format(args['<table>'])) == 'y' if confirm: r = cartosql.dropTable(args['<table>'], **opts) return returnFormat(r, f) else: print('Pass option --confirm to drop table') return __doc__
def processArgs(args): opts = {} if args['-u']: opts['user'] = args['-u'] if args['-k']: opts['key'] = args['-k'] f = args['-f'] if f: opts['f'] = f if args['--help']: return __doc__ elif args['post'] and args['<sql>']: r = cartosql.post(args['<sql>'], **opts) return returnFormat(r, f) elif args['get']: if args['<sql>']: r = cartosql.get(args['<sql>'], **opts) return returnFormat(r, f) if args['<fields>'] and args['<table>']: if args['-w']: opts['where'] = args['-w'] if args['-o']: opts['order'] = args['-o'] r = cartosql.getFields(args['<fields>'], args['<table>'], **opts) return returnFormat(r, f) elif args['ls']: r = cartosql.getTables(**opts) if f is None or f == 'csv': return prettyJson(r) return returnFormat(r, f) elif args['exists'] and args['<table>']: r = cartosql.tableExists(args['<table>'], **opts) return returnFormat(r, f) elif args['drop'] and args['<table>']: r = cartosql.dropTable(args['<table>'], **opts) return returnFormat(r, f) return __doc__
def freezeLayer(layerId, start_date, end_date, time_field=None, table_name=None, ignore_future=False): """ Copies a CARTO layer's data to new table, and creates an idential layer pointing to the new table. @params layerId string the layer to be copied start_date datetime the start of the period of data to copy end_date datetime the end of the period of data to copy [time_field] string the field in which datetime information is stored [table_name] string the main table name containing time data ignore_future bool do not warn if trying to query data in the future The table_name and time_field are read from Dataset definition if None. @return tuple(<rw_api.Layer>, string) the new layer object and tableName """ # 1. Fetch layer and dataset defition logging.info('Fetching layer definition for {}'.format(layerId)) layer = rw_api.getLayer(layerId) if not layer.provider == 'cartodb': raise ("Layer must be of type 'cartodb'") if not time_field or not table_name: dataset = layer.getDataset() time_field = time_field or dataset.mainDateField table_name = table_name or dataset.tableName sql = layer.layerConfig['options']['sql'].lower() # 2. Check if end_date is in future or more recent than the # most recent data in the dataset # Truncate times to minutes, no need to be too accurate start_date = asUTC(start_date).replace(second=0, microsecond=0) end_date = asUTC(end_date).replace(second=0, microsecond=0) if start_date > end_date: start_date, end_date = (end_date, start_date) if not ignore_future: checkFutureData(end_date, table_name, time_field) # 3. Modify the layer SQL query, replacing any where clauses referring to # time_field with new ones selecting for the start and end date logging.debug('Query: {}'.format(sql)) start = start_date.isoformat() end = end_date.isoformat() time_clauses = findTimeClauses(sql, time_field) new_cls = " {time_field} >= '{start}' and {time_field} < '{end}'".format( time_field=time_field, start=start, end=end) for cls in time_clauses: sql = sql.replace(cls, new_cls) logging.debug('New query: {}'.format(sql)) # 4. Create the table from the updated layer SQL query # Name new table with start and end dates, make sure it isn't too long new_table = "{}_{}_{}".format(table_name, start_date.strftime("%Y%m%d_%H%M"), end_date.strftime("%Y%m%d_%H%M")) if len(new_table) > 62: new_table = "{}_{}".format(new_table[:32], hashlib.md5(new_table[32:])) # If we've made this exact query before, replace it logging.info("Coping data to table: {}".format(table_name)) if csql.tableExists(new_table): logging.info("Table {} exists, overwriting".format(new_table)) csql.dropTable(new_table) csql.createTableFromQuery(new_table, sql) # 5. Create layer copy and update SQL to refer to new table layer_name = "{} ({} to {})".format(layer.name, start, end) new_lyr = layer.copy(layer_name) new_lyr.layerConfig['options']['sql'] = "SELECT * FROM {}".format( new_table) new_lyr.published = False logging.info("Uploading new layer {}".format(layer_name)) new_lyr.push() return (new_lyr, new_table)