def run(cfg, forcerun): tz = config.get_tz(cfg) while True: dt_str = str(util.time.get_utcnow().astimezone(tz).date()) logging.info(cfg, 'checking if run for {dt_str} should be done'.format(dt_str=dt_str)) if not forcerun and upload.daily.history.did_upload_today(): logging.info(cfg, 'run for {dt_str} is already done'.format(dt_str=dt_str)) time.sleep(10 * 60) continue t_run_after = config.get_daily_ingestion_start_t(cfg) while True: t_cur = util.time.get_utcnow().astimezone(tz).time() logging.info(cfg, 'checking if the schedule time for {dt_str} has reached'.format(dt_str=dt_str)) if forcerun or t_cur > t_run_after: run_download(cfg) run_ingests_append_combine() run_upload(cfg) upload.daily.history.on_upload() break logging.info(cfg, 'schedule time {t_run_after} not yet reached at {t_cur}'.format(t_run_after=t_run_after, t_cur=t_cur)) time.sleep(10 * 60) if forcerun: # forcerun runs only once break
def _did_upload_today_run_dates(): cfg = config.load('config.us.yaml') tz = config.get_tz(cfg) dt_str = str(util.time.get_utcnow().astimezone(tz).date()) if dt_str in _run_dates: return True return False
def get_today_v_tz(): ''' :return: e.g: 2019-12-25 ''' cfg = config.load('config.us.yaml') tz = config.get_tz(cfg) return get_utcnow().astimezone(tz).strftime('%Y%m%d')
def get_now_time_tz(cfg): ''' get datetime.time of now in korean time zone. :return: ''' tz = config.get_tz(cfg) now_tz = get_utcnow().astimezone(tz) return datetime.time(now_tz.hour, now_tz.minute, now_tz.second, tzinfo=tz)
def _did_upload_today_run_dates(): cfg = config.load('config.kr.yaml') tz = config.get_tz(cfg) dt_str = str( korean_financial_minute_data_miner.util.time.get_utcnow().astimezone( tz).date()) if dt_str in _run_dates: return True return False
def get_minutes_url(config, metadata): start_date = parse_timestamp_naively(metadata['start']).astimezone(get_tz(config)) meeting_type = None for key, value in config.get('minutes_abbrs', {}).items(): if key in metadata[config['minutes_abbrs_for']]: meeting_type = value break minutes_url = metadata.get('minutes_url') if minutes_url: requests.head(minutes_url).raise_for_status() return minutes_url elif config['id'] == 'vancouver': if not meeting_type: return 'N/A' if metadata['title'] == 'Inaugural Council Meeting': meeting_type = 'inau' mins = 'http://council.vancouver.ca/{dt:%Y%m%d}/{type}{dt:%Y%m%d}ag.htm'.format( type=meeting_type, dt=start_date) requests.head(mins).raise_for_status() return mins return 'N/A'
def run(forcerun): cfg = config.load('config.us.polygon.yaml') tz = config.get_tz(cfg) while True: dt_str = str(util.time.get_utcnow().astimezone(tz).date()) logging.info( cfg, 'checking if run for {dt_str} should be done'.format( dt_str=dt_str)) if not forcerun and upload.daily.history.did_upload_today(): logging.info( cfg, 'run for {dt_str} is already done'.format(dt_str=dt_str)) time.sleep(30 * 60) continue t_run_after = config.get_daily_last_record_ingestion_start_t(cfg) while True: t_cur = util.time.get_now_time_tz() logging.info( cfg, 'checking if the schedule time for {dt_str} has reached'. format(dt_str=dt_str)) dt = util.time.time_diff_seconds(t_run_after, t_cur) if forcerun or dt <= 0: run_ingests() run_upload() upload.daily.history.on_upload() break logging.info( cfg, 'schedule time {t_run_after} not yet reached at {t_cur}'. format(t_run_after=t_run_after, t_cur=t_cur)) time.sleep(max(dt + 1, 60 * 60)) if forcerun: # forcerun runs only once break
def _did_run_today_run_dates(cfg): tz = config.get_tz(cfg) dt_str = str(util_time.get_utcnow().astimezone(tz).date()) if dt_str in _run_dates: return True return False
def build_youtube_video_resource(config, metadata, minutes_url): """ Build a YouTube video resource as per https://developers.google.com/youtube/v3/docs/videos. :param dict config: :param dict metadata: Metadata for the video. :returns: dict representing the video resource, ready to serialize to JSON. """ ytconfig = config['youtube'] ts = parse_timestamp_naively(metadata['start']) title_no_date = metadata['title'] if title_no_date[-4:].isdigit(): title_no_date = title_no_date.split(' - ')[0] if title_no_date in ('Full Meeting', 'Entire Meeting', 'Whole Meeting') and config['id'] == 'vancouver': title_no_date = metadata['project_name'] kwargs = { 'clip_title': title_no_date, 'clip_date': ts.astimezone(get_tz(config)), 'minutes_url': minutes_url, 'project_name': metadata.get('project_name', ''), } timecodes = "\n".join('{entry[time]} - {entry[title]}'.format(entry=entry) for entry in metadata['timecodes']) if timecodes: timecodes = "\n" + timecodes kwargs['timecodes'] = timecodes recordingDetails = { 'recordingDate': ts.isoformat().replace('+00:00', '.0Z'), } location = ytconfig.get('location') if location: recordingDetails['location'] = { 'latitude': location[0], 'longitude': location[1], 'altitude': 0.0, } locationDesc = ytconfig.get('location_desc') if locationDesc: recordingDetails['locationDescription'] = locationDesc description = ytconfig['desc'].format(**kwargs).rstrip() description += "\n\nThis is an automated re-upload." missing_seconds = metadata.get('missing_seconds', 0) if missing_seconds: description += " Due to technical difficulties, this video is missing {} seconds.".format(missing_seconds) return { 'snippet': { 'title': ytconfig['title'].format(**kwargs), 'description': description, 'tags': ytconfig.get('tags', []), 'categoryId': 25, # News & Politics 'defaultLanguage': 'en', 'defaultAudioLanguage': 'en', }, 'status': { 'privacyStatus': ytconfig.get('privacy', 'unlisted'), }, 'recordingDetails': recordingDetails, }
params={'part': 'snippet'}, json=playlistItem) resp.raise_for_status() parser = argparse.ArgumentParser(description='Video uploader') parser.add_argument('config_id') parser.add_argument('action', choices=['authorize', 'upload']) parser.add_argument('--keep-uploaded', action="store_true", help="Don't delete uploaded videos.") parser.add_argument('--title-contains', help="Only upload videos containing this in its title.") parser.add_argument('--for-date', help="Only upload videos for this date (YYYY-MM-DD), in local time.") if __name__ == '__main__': args = parser.parse_args() config = get_config(args.config_id) local_tz = get_tz(config) for_date = None if args.for_date: for_date = datetime.strptime(args.for_date, '%Y-%m-%d') for_date = for_date.replace(tzinfo=local_tz).date() client_creds = load_client_credentials() credentials_file = config['id'] + '.tokens.json' if args.action == 'authorize': if os.path.isfile(credentials_file): print("This will replace the existing credentials in " + credentials_file) client_id = client_creds['client_id'] user_code_resp = obtain_user_code(client_id) print("Visit " + user_code_resp['verification_url']) print("Enter code " + user_code_resp['user_code'])
def get_now_tz(cfg): tz = config.get_tz(cfg) return get_utcnow().astimezone(tz)
def get_today_v_tz(cfg): ''' :return: e.g: 2019-12-25 ''' tz = config.get_tz(cfg) return get_utcnow().astimezone(tz).strftime('%Y%m%d')
def get_today_tz(cfg): ''' :return: e.g: 2019-12-25 ''' tz = config.get_tz(cfg) return get_utcnow().astimezone(tz).date()
def run(forcerun): cfg = config.load('config.us.yaml') tz = config.get_tz(cfg) polygon_run = PolygonAggregationsRun(subscription_id=os.getenv( 'FINANCE_STREAM_INTRADAY_PUBSUB_SUBSCRIPTION_ID')) while True: dt = us_finance_streaming_data_miner.util.time.get_utcnow().astimezone( tz) dt_str = str(dt.date()) if dt.weekday() >= 5: logging.info( 'skipping the routing during weekend, weekday: {weekday} for {dt_str}' .format(weekday=dt.weekday(), dt_str=dt_str)) time.sleep(60 * 60) continue logging.info('checking if run for {date_str} should be done'.format( date_str=dt_str)) if not forcerun and us_finance_streaming_data_miner.history.history.did_run_today( cfg): logging.info( 'run for {date_str} is already done'.format(date_str=dt_str)) time.sleep(10 * 60) continue t_market_open = config.get_market_open(cfg) while True: t_cur = us_finance_streaming_data_miner.util.time.get_utcnow( ).astimezone(tz).time() logging.info( cfg, 'checking if the schedule time for {dt_str} has reached'. format(dt_str=dt_str)) if forcerun or t_cur > t_market_open: polygon_run.on_daily_trade_start() break logging.info( cfg, 'schedule time {t_run_after} not yet reached at {t_cur}'. format(t_run_after=t_market_open, t_cur=t_cur)) time.sleep(10 * 60) if forcerun: time.sleep(70) t_ingest_end = config.get_market_ingest_end(cfg) while True: t_cur = us_finance_streaming_data_miner.util.time.get_utcnow( ).astimezone(tz).time() logging.info( cfg, 'checking if the schedule time for {dt_str} has reached'. format(dt_str=dt_str)) logging.info(polygon_run.get_status_string()) if forcerun or t_cur > t_ingest_end: polygon_run.save_daily_df() daily_upload.upload() polygon_run.on_daily_trade_end() us_finance_streaming_data_miner.history.history.on_run(cfg) break logging.info( cfg, 'schedule time {t_run_after} not yet reached at {t_cur}'. format(t_run_after=t_ingest_end, t_cur=t_cur)) time.sleep(10 * 60) if forcerun: # forcerun runs only once break
def get_now_tz(): cfg = config.load('config.us.yaml') tz = config.get_tz(cfg) return get_utcnow().astimezone(tz)