def run(dryrun=False):
    logger.info("MediaOcean download started.")
    try:
        download(dryrun)
    except Exception as ex:
        logger.exception(ex, "MediaOcean download failed.")
    logger.info("MediaOcean download finished.")
def download(dryrun=False):
    transport, sftp = connect()
    DL_PATH = "/dataVol/collector/mediaocean"
    if not os.path.exists(DL_PATH):
        os.makedirs(DL_PATH)
    subpaths = ("", "bulk_api")  # Spectra/Offline & Prisma/Online
    for subpath in subpaths:
        filenames = sftp.listdir(subpath)
        try:
            filenames.remove("bulk_api")
        except:
            pass
        for fn in filenames:
            localPath = os.path.join(DL_PATH, fn)
            remotePath = os.path.join(subpath, fn)
            logger.info("Download started: %s" % fn)
            if not dryrun:
                if fn.startswith("extract-status"):
                    sftp.remove(remotePath)
                else:
                    sftp.get(remotePath, localPath)
                    if get_env() == PROD_ENV:
                        sftp.remove(remotePath)
            logger.info("Download complete: %s" % fn)
    sftp.close()
    transport.close()
def denormalize(week_range):
	try:
		logger.info('%s fill started.' % TABLE_NAME)
		sql = SQL()
		sql.execute_batches(FILL_SQL, week_range)
		sql.commitclose()
		logger.info('%s fill finished.' % TABLE_NAME)
	except Exception as ex:
		logger.exception(ex, 'Critical error in denormalization: %s' % TABLE_NAME)
def write_fuel_data():
	"""Persist fuel data."""
	fuel_data = fetch_fuel_data()
	fuel_json = parse_fuel_data(fuel_data)
	ts = temporal.datetimestamp()
	base_dir = '/dataVol/collector/json'
	if not os.path.exists(base_dir):
		os.makedirs(base_dir)
	fp = os.path.join(base_dir, 'fuel_data_' + ts + '.json')
	with open(fp, 'w') as fout:
		fout.write(fuel_json)
	logger.info('Fuel prices downloaded: %s' % fp)
Example #5
0
def download(days = 1):
	logger.info('Crimson Hexagon download started.')
	dat = {}
	dat['is_historical'] = (days > 1)
	started = time.time()
	is_finished = False
	while time.time() - started < 600:
		try:
			dat['monitors'] = monitors()
			logger.info('Crimson Hexagon monitor results...')
			dat['monitor_results'] = monitor_results(days)
			dat['monitor_results_bycity'] = monitor_results_by_city(days)
			dat['monitor_results_bystate'] = monitor_results_by_state(days)
			logger.info('Crimson Hexagon facebook results...')
			dat['facebook_admin_posts'] = facebook_admin_posts(days)
			dat['facebook_page_likes'] = facebook_page_likes(days)
			dat['facebook_total_activity'] = facebook_total_activity(days)
			logger.info('Crimson Hexagon twitter results...')
			dat['twitter_engagement_metrics'] = twitter_engagement_metrics(days)
			dat['twitter_followers'] =  twitter_followers(days)
			dat['twitter_sent_posts'] = twitter_sent_posts(days)
			dat['twitter_total_engagement'] = twitter_total_engagement(days)
			logger.info('Crimson Hexagon instagram results...')
			dat['instagram_followers'] = instagram_followers(days)
			dat['instagram_total_activity'] = instagram_total_activity(days)
			dat['instagram_sent_media'] = instagram_sent_media(days)
			is_finished = True
			break
		except Exception as ex:
			logger.exception(ex, 'Crimson Hexagon API failure, retrying in 10s...')
		finally:
			time.sleep(10)
	if is_finished:
		savejson(dat)
	else:
		logger.warn('Crimson Hexagon download failed!')
	logger.info('Crimson Hexagon download finished.')
Example #6
0
def savejson(json_blob):
	fn = 'crimsonhexagon_' + temporal.datetimestamp() + '.json'
	fp = os.path.join(DL_PATH, fn)
	os.makedirs(DL_PATH, exist_ok = True)
	files.savejson(json_blob, fp)
	logger.info('Crimson Hexagon downloaded: %s' % fp)