def check_nisra(secret, s3, notweet): indexkey = secret['nisra-deaths-index'] # Get the previous data file list from S3 status = S3_scraper_index(s3, secret['bucketname'], indexkey) previous = status.get_dict() previous = sorted(previous, key=lambda k: k['filedate'], reverse=True) # Check the NISRA site for file changes current, changes = check_for_nisra_files(s3, secret['bucketname'], previous) # Write any changes back to S3 if len(changes) > 0: status.put_dict(current) message = 'Wrote %d items to %s, of which %d were changes' % ( len(current), indexkey, len(changes)) # If the most recent file has changed then tweet totweet = [ c['index'] for c in changes if (c['change'] == 'added') or (c['index'] == 0) ] if not notweet and (0 in totweet): print('Launching NISRA tweeter') launch_lambda_async(os.getenv('NISRA_TWEETER_LAMBDA'), [current[a] for a in totweet]) message += ', and launched NISRA tweet lambda' else: message = 'Did nothing' return message
def lambda_handler(event, context): # Get the secret sm = boto3.client('secretsmanager') secretobj = sm.get_secret_value(SecretId='ni-covid-tweets') secret = json.loads(secretobj['SecretString']) messages = [] # Download the most recently updated PDF file s3 = boto3.client('s3') for change in event: tmp = tempfile.NamedTemporaryFile(suffix='.pdf') with open(tmp.name, 'wb') as fp: s3.download_fileobj(secret['bucketname'], change['keyname'], fp) text = textract.process(tmp.name, method='pdfminer').decode('utf-8') first = True regex = re.compile(r'^Current estimate of Rt \((.*)\):\s+(.*)$') tweet = 'R estimates by Northern Ireland DoH on ' for line in text.split('\n'): m = regex.match(line) if first is True: tweet += '%s\n\n' % line elif m: tweet += '\u2022 %s: %s\n' % (m.group(1), m.group(2)) first = False tweet += '\n%s' % change['url'] if change.get('notweet') is not True: api = TwitterAPI(secret['twitter_apikey'], secret['twitter_apisecretkey'], secret['twitter_accesstoken'], secret['twitter_accesstokensecret']) resp = api.tweet(tweet) # Download and update the index status = S3_scraper_index(s3, secret['bucketname'], secret['doh-r-index']) index = status.get_dict() for i in range(len(index)): if index[i]['filedate'] == change['filedate']: index[i]['tweet'] = resp.id break status.put_dict(index) messages.append('Tweeted ID %s and updated %s' % (resp.id, secret['doh-r-index'])) else: print(tweet) messages.append('Did not tweet') return { "statusCode": 200, "body": json.dumps({ "message": messages, }), }
def check_doh(secret, s3, notweet, mode): if mode == 'dd': indexkey = secret['doh-dd-index'] lambdaname = os.getenv('TWEETER_LAMBDA') else: indexkey = secret['doh-r-index'] lambdaname = os.getenv('R_TWEETER_LAMBDA') # Get the previous data file list from S3 status = S3_scraper_index(s3, secret['bucketname'], indexkey) previous = status.get_dict() previous = sorted(previous, key=lambda k: k['filedate'], reverse=True) # Check the DoH site for file changes if mode == 'dd': current, changes = check_for_dd_files( s3, secret['bucketname'], previous, int(secret['doh-dd-files-to-check']), store=(not notweet)) else: current, changes = check_for_r_files(s3, secret['bucketname'], previous) # Write any changes back to S3 if len(changes) > 0: status.put_dict(current) message = 'Wrote %d items to %s, of which %d were changes' % ( len(current), indexkey, len(changes)) # If the most recent file has changed then tweet totweet = [ c['index'] for c in changes if (c['change'] == 'added') or (c['index'] == 0) ] if not notweet and (0 in totweet): print('Launching %s tweeter' % mode) launch_lambda_async(lambdaname, [current[a] for a in totweet]) message += ', and launched %s tweet lambda' % mode else: message = 'Did nothing' return message
def get_all_doh(secret, s3): indexkey = secret['doh-dd-index'] # Get the previous data file list from S3 status = S3_scraper_index(s3, secret['bucketname'], indexkey) previous = status.get_dict() previous = sorted(previous, key=lambda k: k['filedate'], reverse=True) # Check the DoH site for file changes current, changes = check_for_dd_files(s3, secret['bucketname'], previous, 0) # Write any changes back to S3 if len(changes) > 0: status.put_dict(current) message = 'Wrote %d items to %s, of which %d were changes' % ( len(current), indexkey, len(changes)) else: message = 'Did nothing' return message
def lambda_handler(event, context): # Get the secret sm = boto3.client('secretsmanager') secretobj = sm.get_secret_value(SecretId='ni-covid-tweets') secret = json.loads(secretobj['SecretString']) tweets = [] # Download the most recently updated Excel file s3 = boto3.client('s3') for change in event: obj = s3.get_object(Bucket=secret['bucketname'],Key=change['keyname'])['Body'] stream = io.BytesIO(obj.read()) # Load test data and add extra fields df = pandas.read_excel(stream,engine='openpyxl',sheet_name='Table 7', header=3) df.dropna('columns',how='all',inplace=True) df.rename(columns=colclean,inplace=True) df.dropna('rows',subset=['Total'],inplace=True) # Get the latest dates with values for tests and rolling df['date'] = pandas.to_datetime(df['Week Ending'], format='%d/%m/%Y') df.sort_values('date', inplace=True) latest = df.iloc[-1] # Check against previous day's reports status = S3_scraper_index(s3, secret['bucketname'], secret['nisra-deaths-index']) index = status.get_dict() plots = [] if latest['Total'] == 0: tweet = '''No deaths registered in Northern Ireland, week ended {date} '''.format( date=latest['date'].strftime('%A %-d %B %Y'), ) else: if latest['Total'] == 1: tweet = '''One death registered in Northern Ireland, week ended {date}, in: '''.format( date=latest['date'].strftime('%A %-d %B %Y') ) else: tweet = '''{deaths:,} deaths registered in Northern Ireland, week ended {date}, in: '''.format( date=latest['date'].strftime('%A %-d %B %Y'), deaths=int(latest['Total']) ) for name in ['Hospital', 'Care Home', 'Hospice', 'Home', 'Other']: if latest[name] > 0: tweet += '\u2022 %s: %s\n' %(name, int(latest[name])) tweet += '\n' if len(df) > 1: prev = df.iloc[-2] diff = latest['Total'] - prev['Total'] tweet += '''{symb} {diff} {comp} than previous week '''.format( symb=good_symb if diff < 0 else bad_symb, diff=abs(int(diff)), comp='fewer' if diff < 0 else 'more' ) try: driver = get_chrome_driver() plots = [] if driver is None: logging.error('Failed to start chrome') else: toplot = df[(df['Week Ending'] > df['Week Ending'].max()-pandas.to_timedelta(84, unit='d'))] toplot = toplot.drop(columns=['Week of Death','date','Total']).melt(id_vars='Week Ending', var_name='Location', value_name='Deaths') print(toplot) p = altair.vconcat( altair.Chart( toplot ).mark_area().encode( x = altair.X('Week Ending:T', axis=altair.Axis(title='Week of death')), y = altair.Y('sum(Deaths):Q', axis=altair.Axis(title='Deaths', orient="right", tickMinStep=1)), color=altair.Color('Location', sort=altair.SortField('order',order='descending')), ).properties( height=450, width=800, title='NI COVID-19 Deaths reported by NISRA from %s to %s' %(toplot['Week Ending'].min().strftime('%-d %B %Y'), toplot['Week Ending'].max().strftime('%-d %B %Y')) ), ).properties( title=altair.TitleParams( ['Data from NISRA', 'https://twitter.com/ni_covid19_data on %s' %datetime.datetime.now().date().strftime('%A %-d %B %Y')], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), ) plotname = 'nisra-deaths-time-%s.png'%datetime.datetime.now().date().strftime('%Y-%d-%m') plotstore = io.BytesIO() p.save(fp=plotstore, format='png', method='selenium', webdriver=driver) plotstore.seek(0) plots.append({'name': plotname, 'store': plotstore}) except: logging.exception('Error creating plot') tweets.append({ 'text': tweet, 'url': change['url'], 'notweet': change.get('notweet'), 'filedate': change['filedate'], 'plots': plots }) donottweet = [] if len(tweets) > 1: for i in range(1,len(tweets)): for j in range(0, i): if (tweets[i]['text'] == tweets[j]['text']): donottweet.append(i) messages = [] for idx in range(len(tweets)): tweet = tweets[idx]['text'] + tweets[idx]['url'] if (idx not in donottweet): if tweets[idx].get('notweet') is not True: api = TwitterAPI(secret['twitter_apikey'], secret['twitter_apisecretkey'], secret['twitter_accesstoken'], secret['twitter_accesstokensecret']) upload_ids = api.upload_multiple(tweets[idx]['plots']) if change.get('testtweet') is True: if len(upload_ids) > 0: resp = api.dm(secret['twitter_dmaccount'], tweet, upload_ids[0]) else: resp = api.dm(secret['twitter_dmaccount'], tweet) messages.append('Tweeted DM ID %s' %(resp.id)) else: if len(upload_ids) > 0: resp = api.tweet(tweet, media_ids=upload_ids) else: resp = api.tweet(tweet) messages.append('Tweeted ID %s, ' %resp.id) # Update the file index for i in range(len(index)): if index[i]['filedate'] == tweets[idx]['filedate']: index[i]['tweet'] = resp.id break status.put_dict(index) messages[-1] += ('updated %s' %secret['nisra-deaths-index']) else: messages.append('Did not tweet') print(tweet) else: messages.append('Duplicate found %s, did not tweet, ' %tweets[idx]['filedate']) return { "statusCode": 200, "body": json.dumps({ "message:": messages, }), }
def lambda_handler(event, context): messages = [] try: # Get the secret sm = boto3.client('secretsmanager') secretobj = sm.get_secret_value(SecretId='ni-covid-tweets') secret = json.loads(secretobj['SecretString']) # Get the index s3 = boto3.client('s3') status = S3_scraper_index(s3, secret['bucketname'], secret['cog-variants-index']) index = status.get_dict() # Create a copy of the file in s3 if 'keyname' not in event: keyname = "COG-variants/%s/%s-%s.csv" % ( event['filedate'], event['modified'].replace( ':', '_'), event['length']) print('getting URL') with requests.get(event['url'], stream=True) as stream: stream.raise_for_status() stream.raw.decode_content = True s3.upload_fileobj( stream.raw, secret['bucketname'], keyname, Config=boto3.s3.transfer.TransferConfig(use_threads=False)) print('done') else: keyname = event['keyname'] # Download the most recently updated CSV file obj = s3.get_object(Bucket=secret['bucketname'], Key=keyname)['Body'] stream = io.BytesIO(obj.read()) # Dataframe for converting between pango lineage and WHO labels # Get the mapping from the raw Github URL resp = requests.get( 'https://github.com/pbarber/covid19-pango-lineage-to-who-label/raw/main/mapping.json' ) # Make sure that the request was successful resp.raise_for_status() # Convert the request data to a Python dictionary mapping = resp.json() # Expand the Pango column mapping = pandas.DataFrame(mapping).explode( 'Pango lineages').reset_index(drop=True) # Filter out old designations mapping_current = mapping[ mapping['Designation'] != 'Former Variant of Interest'] # Load variant data, aggregate and push back to S3 df = pandas.read_csv(stream) df = df[df['adm1'] == 'UK-NIR'] df['Sample Date'] = pandas.to_datetime(df['sample_date']) df['Week of sample'] = df['Sample Date'] - pandas.to_timedelta( df['Sample Date'].dt.dayofweek, unit='d') # Join the lineage data matches = mapping['Pango lineages'].apply(match, col=df['lineage']) match_idx = matches.idxmax() # Filter out indexes where there is no match match_idx[match_idx == matches.idxmin()] = pandas.NA df['idx'] = match_idx # Join to the mapping based on indexes df = df.merge(mapping, how='left', left_on='idx', right_index=True).drop(columns=['idx', 'Pango lineages']) df['WHO label'] = df['WHO label'].fillna('Other') lin_by_week = df.groupby(['Week of sample', 'WHO label']).size().rename('count') lin_pc_by_week = lin_by_week / lin_by_week.groupby(level=0).sum() lin_by_week = pandas.DataFrame(lin_by_week).reset_index() lin_pc_by_week = pandas.DataFrame(lin_pc_by_week).reset_index() stream = io.BytesIO() lin_by_week.to_csv(stream, index=False) stream.seek(0) lineage_key = '%s_lineage.csv' % keyname.rsplit('.', maxsplit=1)[0] s3.upload_fileobj(stream, secret['bucketname'], lineage_key) messages.append('Wrote lineage summary to s3') # Update the S3 index and find the previous date previous = '1970-01-01' prev_lineagekey = None thisindex = None for i in range(len(index)): if index[i]['modified'] == event['modified']: index[i]['lineage'] = lineage_key index[i]['keyname'] = keyname thisindex = i elif index[i]['filedate'] != event['filedate']: if (index[i]['filedate'] > previous) and (index[i]['filedate'] < event['filedate']): previous = index[i]['filedate'] prev_lineagekey = index[i].get('lineage') status.put_dict(index) # If there is a previous file, then load it and work out the differences if prev_lineagekey is not None: obj = s3.get_object(Bucket=secret['bucketname'], Key=prev_lineagekey)['Body'] stream = io.BytesIO(obj.read()) prev_lineage = pandas.read_csv(stream) if 'WHO label' not in prev_lineage.columns: prev_lineage['WHO label'] = 'Other' prev_lineage = prev_lineage.groupby('WHO label')['count'].sum() lineage = lin_by_week.groupby( 'WHO label')['count'].sum().reset_index() lineage = lineage.merge(prev_lineage, how='left', on='WHO label') lineage = lineage.groupby('WHO label').sum()[[ 'count_x', 'count_y' ]] lineage['count_y'] = lineage['count_y'].fillna(0) lineage['diff'] = (lineage['count_x'] - lineage['count_y']).fillna(0).astype(int) top5 = lineage.nlargest(5, 'diff') tweet = """{total:,d} new variant analyses reported for NI on {currdate} since {prevdate} ({altogether:,d} total): """.format(total=lineage['diff'].sum(), prevdate=datetime.datetime.strptime( previous, '%Y-%m-%d').date().strftime('%A %-d %B %Y'), currdate=datetime.datetime.strptime( event['filedate'], '%Y-%m-%d').date().strftime('%A %-d %B %Y'), altogether=lineage['count_x'].sum()) for variant, data in top5.to_dict('index').items(): if data['diff'] > 0: tweet += f"\u2022 {variant}: {data['diff']:,d} (of {data['count_x']:,d})\n" others = int(lineage['diff'].sum() - top5['diff'].sum()) if others != 0: tweet += f"\u2022 Others: {others:,d}\n" tweet += '\nSource: https://beta.microreact.org/' driver = get_chrome_driver() if driver is None: raise Exception('Failed to start chrome') p = altair.vconcat( altair.Chart(lin_by_week[ lin_by_week['Week of sample'] > lin_by_week['Week of sample'].max() - pandas.to_timedelta(84, unit='d')]).mark_line().encode( x=altair.X('Week of sample:T', axis=altair.Axis(title='', labels=False, ticks=False)), y=altair.Y('count:Q', axis=altair.Axis(title='Samples')), color='WHO label'). properties( height=225, width=800, title= 'NI COVID-19 variants identified by COG-UK over the most recent 12 weeks' ), altair.Chart(lin_pc_by_week[ lin_pc_by_week['Week of sample'] > lin_pc_by_week['Week of sample'].max() - pandas.to_timedelta(84, unit='d')]).mark_area().encode( x='Week of sample:T', y=altair.Y('sum(count):Q', axis=altair.Axis(format='%', title='% of samples', orient="right")), color='WHO label').properties( height=225, width=800, ) ).properties(title=altair.TitleParams([ 'Variant identification can take up to 3 weeks, so recent totals are likely to be revised upwards', 'https://twitter.com/ni_covid19_data on %s' % datetime.datetime.now().date().strftime('%A %-d %B %Y') ], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10), ) plotname = 'ni-variants-%s.png' % datetime.datetime.now().date( ).strftime('%Y-%d-%m') plotstore = io.BytesIO() p.save(fp=plotstore, format='png', method='selenium', webdriver=driver) plotstore.seek(0) if event.get('notweet') is not True: api = TwitterAPI(secret['twitter_apikey'], secret['twitter_apisecretkey'], secret['twitter_accesstoken'], secret['twitter_accesstokensecret']) resp = api.upload(plotstore, plotname) if event.get('testtweet') is True: resp = api.dm(secret['twitter_dmaccount'], tweet, resp.media_id) messages.append('Tweeted DM ID %s, ' % resp.id) else: resp = api.tweet(tweet, media_ids=[resp.media_id]) messages.append('Tweeted ID %s, ' % resp.id) # Update the file index index[thisindex]['tweet'] = resp.id status.put_dict(index) else: messages.append('Did not tweet') print(tweet) else: messages.append('Did not find previous lineage data') except: logging.exception('Caught exception in COG variants tweeter') return { "statusCode": 200, "body": json.dumps({ "message:": messages, }), }
def lambda_handler(event, context): # Get the secret sm = boto3.client('secretsmanager') secretobj = sm.get_secret_value(SecretId='ni-covid-tweets') secret = json.loads(secretobj['SecretString']) s3 = boto3.client('s3') messages = [] # Download the most recently updated PDF file for change in event: tmp = tempfile.NamedTemporaryFile(suffix='.pdf') with open(tmp.name, 'wb') as fp: s3.download_fileobj(secret['bucketname'],change['keyname'],fp) # Get the date range covered by the report text = textract.process(tmp.name, method='pdfminer').decode('utf-8') regex = re.compile(r'(\d{1,2})(?:st|nd|rd|th)\s+([A-Z][a-z]+)\s+(\d{4})\s+\–+\s+(\d{1,2})(?:st|nd|rd|th)\s+([A-Z][a-z]+)\s+(\d{4})') start_date = None end_date = None for line in text.split('\n'): m = regex.search(line) if m: start_date = pandas.to_datetime('%s %s %s' %(m.group(1),m.group(2),m.group(3)), format='%d %B %Y').date() end_date = pandas.to_datetime('%s %s %s' %(m.group(4),m.group(5),m.group(6)), format='%d %B %Y').date() break if start_date is None: logging.error('Unable to find start date in report') return { "statusCode": 404, "body": 'Unable to find start date in report %s' %change['url'], } # Get the tables from the report - note that it was not possible to get data from 4th April or earlier due to # tables that will not parse properly in the PDF tables = tabula.read_pdf(tmp.name, pages = "all", multiple_tables = True) tablecount = 0 dataset = pandas.DataFrame() for df in tables: if 'Total' not in df.columns: firstrow = df.iloc[0] newcols = [] for i in range(len(firstrow)): if isinstance(firstrow[i], float) and math.isnan(firstrow[i]): newcols.append(df.columns[i]) else: newcols.append(firstrow[i]) df.columns = newcols df = df[1:] df['Setting'] = df['Setting'].str.strip() df.dropna(axis='index',subset=['Total','Open','Closed'],inplace=True) df['Total'] = df['Total'].astype(int) df['Open'] = df['Open'].astype(int) df['Closed'] = df['Closed'].astype(int) df = df[df['Setting']!='Total'] if tablecount==0: df['Type'] = 'Probable Outbreak' elif tablecount==1: df['Type'] = 'Cluster' else: logging.warning('Unexpected table: %s' %df) tablecount += 1 dataset = pandas.concat([dataset, df]) dataset['Start Date'] = pandas.to_datetime(start_date) dataset['End Date'] = pandas.to_datetime(end_date) week = int((end_date - pandas.to_datetime('1 January 2020', format='%d %B %Y').date()).days / 7) dataset['Week'] = week # Create a simple summary and the tweet text summary = dataset.groupby('Type').sum() tweet = 'NI Contact Tracing reports from %s to %s:\n' %(start_date.strftime('%-d %B %Y'), end_date.strftime('%-d %B %Y')) for Type,data in summary.to_dict('index').items(): tweet += '\u2022 %d %ss (%d open, %d closed)\n' %(data['Total'], Type.lower(), data['Open'], data['Closed']) tweet += '\n%s' %change['url'] # Pull current data from s3 try: obj = s3.get_object(Bucket=secret['bucketname'],Key=secret['pha-clusters-datastore'])['Body'] except s3.exceptions.NoSuchKey: print("The object %s does not exist in bucket %s." %(secret['pha-clusters-datastore'], secret['bucketname'])) datastore = pandas.DataFrame(columns=['Week']) else: stream = io.BytesIO(obj.read()) datastore = pandas.read_csv(stream) # Clean out any data with matching dates datastore = datastore[datastore['Week'] != week] # Append the new data datastore = pandas.concat([datastore, dataset]) datastore['Start Date'] = pandas.to_datetime(datastore['Start Date']) datastore['End Date'] = pandas.to_datetime(datastore['End Date']) # Replace any known duplicates datastore['Setting'] = datastore['Setting'].replace({ 'Cinema/ Theatre / Entertainment': 'Cinema / Theatre / Entertainment Venue', 'Cinema/ Theatre / Entertainment Venue': 'Cinema / Theatre / Entertainment Venue', 'Funeral / Wakes': 'Funeral / Wake', 'Restaurant / Cafe': 'Restaurant / Café' }) # Push the data to s3 stream = io.BytesIO() datastore.to_csv(stream, index=False) stream.seek(0) s3.upload_fileobj(stream, secret['bucketname'], secret['pha-clusters-datastore']) # Set up chromedriver so we can save altair plots driver = get_chrome_driver() plots = [] if driver is None: logging.error('Failed to start chrome') else: p = altair.vconcat( altair.Chart( dataset ).mark_bar().encode( x = altair.X('Total:Q', axis=altair.Axis(title='Total reported')), y = altair.Y('Setting:O'), color='Type', order=altair.Order( 'Type', sort='ascending' ), ).properties( height=450, width=800, title='NI COVID-19 Contact Tracing reports from %s to %s' %(start_date.strftime('%-d %B %Y'), end_date.strftime('%-d %B %Y')) ), ).properties( title=altair.TitleParams( ['Data from Public Health Agency, does not include education or home settings', 'Covers the preceding four weeks', 'https://twitter.com/ni_covid19_data on %s' %datetime.datetime.now().date().strftime('%A %-d %B %Y')], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), ) plotname = 'pha-outbreaks-week-%s.png'%datetime.datetime.now().date().strftime('%Y-%d-%m') plotstore = io.BytesIO() p.save(fp=plotstore, format='png', method='selenium', webdriver=driver) plotstore.seek(0) plots.append({'name': plotname, 'store': plotstore}) p = altair.vconcat( altair.Chart( datastore.groupby(['End Date','Type'])['Total'].sum().reset_index() ).mark_area().encode( x = altair.X('End Date:T', axis=altair.Axis(title='Date reported (for preceding four weeks)')), y = altair.Y('Total:Q', axis=altair.Axis(title='Total reported', orient="right")), color='Type', order=altair.Order( 'Type', sort='ascending' ), ).properties( height=450, width=800, title='NI COVID-19 Contact Tracing reports from %s to %s' %(datastore['Start Date'].min().strftime('%-d %B %Y'), datastore['End Date'].max().strftime('%-d %B %Y')) ), ).properties( title=altair.TitleParams( ['Data from Public Health Agency, does not include education or home settings', 'Reported weekly for the preceding four weeks', 'https://twitter.com/ni_covid19_data on %s' %datetime.datetime.now().date().strftime('%A %-d %B %Y')], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), ) plotname = 'pha-outbreaks-time-%s.png'%datetime.datetime.now().date().strftime('%Y-%d-%m') plotstore = io.BytesIO() p.save(fp=plotstore, format='png', method='selenium', webdriver=driver) plotstore.seek(0) plots.append({'name': plotname, 'store': plotstore}) p = altair.vconcat( altair.Chart( datastore.groupby(['End Date','Setting','Type'])['Total'].sum().reset_index() ).mark_area().encode( x = altair.X('End Date:T', axis=altair.Axis(title='')), y = altair.Y('Total:Q', axis=altair.Axis(title='', orient="right")), color='Type', facet=altair.Facet('Setting:O', columns=5, title=None, spacing=0), order=altair.Order( 'Type', sort='ascending' ), ).properties( height=90, width=160, title=altair.TitleParams( 'NI COVID-19 Contact Tracing reports by setting from %s to %s' %(datastore['Start Date'].min().strftime('%-d %B %Y'), datastore['End Date'].max().strftime('%-d %B %Y')), anchor='middle', ), ), ).properties( title=altair.TitleParams( ['Data from Public Health Agency, does not include education or home settings', 'Reported weekly for the preceding four weeks', 'https://twitter.com/ni_covid19_data on %s' %datetime.datetime.now().date().strftime('%A %-d %B %Y')], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), ) plotname = 'pha-outbreaks-small-%s.png'%datetime.datetime.now().date().strftime('%Y-%d-%m') plotstore = io.BytesIO() p.save(fp=plotstore, format='png', method='selenium', webdriver=driver) plotstore.seek(0) plots.append({'name': plotname, 'store': plotstore}) # Convert to dates to ensure correct output to CSV datastore['Start Date'] = datastore['Start Date'].dt.date datastore['End Date'] = datastore['End Date'].dt.date # Tweet out the text and images if change.get('notweet') is not True: api = TwitterAPI(secret['twitter_apikey'], secret['twitter_apisecretkey'], secret['twitter_accesstoken'], secret['twitter_accesstokensecret']) upload_ids = api.upload_multiple(plots) if change.get('testtweet') is True: if len(upload_ids) > 0: resp = api.dm(secret['twitter_dmaccount'], tweet, upload_ids[0]) if len(upload_ids) > 1: resp = api.dm(secret['twitter_dmaccount'], 'Test 1', upload_ids[1]) if len(upload_ids) > 2: resp = api.dm(secret['twitter_dmaccount'], 'Test 2', upload_ids[2]) else: resp = api.dm(secret['twitter_dmaccount'], tweet) messages.append('Tweeted DM ID %s' %(resp.id)) else: if len(upload_ids) > 0: resp = api.tweet(tweet, media_ids=upload_ids) else: resp = api.tweet(tweet) # Download and update the index status = S3_scraper_index(s3, secret['bucketname'], secret['pha-clusters-index']) index = status.get_dict() for i in range(len(index)): if index[i]['filedate'] == change['filedate']: index[i]['tweet'] = resp.id break status.put_dict(index) messages.append('Tweeted ID %s and updated %s' %(resp.id, secret['pha-clusters-index'])) else: print(tweet) messages.append('Did not tweet') return { "statusCode": 200, "body": json.dumps({ "message": messages, }), }
def lambda_handler(event, context): # Get the secret sm = boto3.client('secretsmanager') secretobj = sm.get_secret_value(SecretId='ni-covid-tweets') secret = json.loads(secretobj['SecretString']) s3 = boto3.client('s3') if event.get('mode') == 'aggregate': # Get the index of all reports status = S3_scraper_index(s3, secret['bucketname'], secret['doh-dd-index']) index = status.get_dict() allreports = pandas.DataFrame(columns=[ 'Date of Specimen', 'Reported_Date', 'Total Lab Tests', 'Individ with Lab Test', 'Individ with Positive Lab Test' ]) # Download the most recently updated Excel file for item in index: try: obj = s3.get_object(Bucket=secret['bucketname'], Key=item['keyname'])['Body'] stream = io.BytesIO(obj.read()) # Load test data daily = pandas.read_excel(stream, engine='openpyxl', sheet_name='Tests') # Take only the required columns daily = daily.groupby(['Date of Specimen']).sum()[[ 'Total Lab Tests', 'Individ with Lab Test', 'Individ with Positive Lab Test' ]].reset_index() # Add reported date daily['Reported_Date'] = pandas.to_datetime(item['filedate'], format='%Y-%m-%d') except: logging.exception('Error loading %s' % item) raise # Combine with the other data reports allreports = pandas.concat([allreports, daily]) # Write the output to CSV keyname = 'DoH-DD/all_tests.csv' csvbuffer = io.StringIO() allreports.to_csv(csvbuffer, index=False) s3.put_object(Bucket=secret['bucketname'], Key='DoH-DD/all_tests.csv', Body=csvbuffer.getvalue()) message = 'Wrote %d rows to %s' % (len(allreports), keyname) else: obj = s3.get_object(Bucket=secret['bucketname'], Key='DoH-DD/all_tests.csv')['Body'] stream = io.BytesIO(obj.read()) # Load test data df = pandas.read_csv(stream) print(df.columns) message = 'Done' return { "statusCode": 200, "body": json.dumps({ "message:": message, }), }
def lambda_handler(event, context): messages = ['Failure'] # Get the secret sm = boto3.client('secretsmanager') secretobj = sm.get_secret_value(SecretId='ni-covid-tweets') secret = json.loads(secretobj['SecretString']) try: # Get the index s3 = boto3.client('s3') status = S3_scraper_index(s3, secret['bucketname'], secret['doh-dd-index']) index = status.get_dict() tweets = [] # Download the most recently updated Excel file for change in event: obj = s3.get_object(Bucket=secret['bucketname'], Key=change['keyname'])['Body'] stream = io.BytesIO(obj.read()) # Load the tests sheet and add it to the store daily = pandas.read_excel(stream, engine='openpyxl', sheet_name='Tests') daily = daily.groupby(['Sample_Date' ]).sum()[['Total Tests', 'Total Cases']].reset_index() daily['Reported_Date'] = pandas.to_datetime(change['filedate'], format='%Y-%m-%d') datastore = update_datastore( s3, secret['bucketname'], secret['doh-dd-store-tests'], daily['Reported_Date'].max(), daily, (change.get('notweet', False) is False) and (change.get('tweet', True) is True), 'Reported_Date') # Load test data and add extra fields df = pandas.read_excel(stream, engine='openpyxl', sheet_name='Summary Tests') df['pos_rate'] = df['Total Cases'] / df['Total Tests'] df['rolling_pos_rate'] = df['Rolling 7 Day Cases'] / df[ 'Rolling 7 Day Tests (PCR & LFT)'] df['printdate'] = df['Sample_Date'].dt.strftime('%-d %B %Y') df['rolling_7d_change'] = (df['Rolling 7 Day Cases'] - df['Rolling 7 Day Cases'].shift(7)) * 7 df['New cases 7-day rolling mean'] = df['Total Cases'].rolling( 7, center=True).mean() df.set_index('Sample_Date', inplace=True) newind = pandas.date_range(start=df.index.min(), end=df.index.max()) df = df.reindex(newind) df.index.name = 'Sample_Date' df.reset_index(inplace=True) df['Rolling cases per 100k'] = 100000 * ( df['New cases 7-day rolling mean'] / 1893667) df = create_model(df, 'Rolling cases per 100k', 'Sample_Date') # Get the latest dates with values for tests and rolling latest = df.iloc[df['Sample_Date'].idxmax()] latest_7d = df.iloc[df[df['Rolling 7 Day Cases'].notna()] ['Sample_Date'].idxmax()] latest_model = df.iloc[df[ df['Rolling cases per 100k model_daily_change'].notna()] ['Sample_Date'].idxmax()] last_but1_model = df.iloc[ df[(df['Rolling cases per 100k model_daily_change'].notna()) & (df['Sample_Date'] != latest_model['Sample_Date'])] ['Sample_Date'].idxmax()] # Summary stats to allow 'X registered in last 24 hours' info deaths = load_ni_time_series(stream, 'Deaths', 'Date of Death', 'Number of Deaths') admissions = load_ni_time_series(stream, 'Admissions', 'Admission Date', 'Number of Admissions', True) discharges = load_ni_time_series(stream, 'Discharges', 'Discharge Date', 'Number of Discharges') inpatients = load_ni_time_series( stream, 'Inpatients', 'Inpatients at Midnight', 'Number of Confirmed COVID Inpatients', False, 'Sex', 'All') inpatients.rename(columns={'Inpatients at Midnight': 'Date'}, inplace=True) icu = load_ni_time_series(stream, 'ICU', 'Date', 'Confirmed COVID Occupied') totals = { 'ind_tested': int(df['Total Tests'].sum()), 'ind_positive': int(df['Total Cases'].sum()), 'deaths': int(deaths['Number of Deaths'].sum()), 'admissions': int(admissions['Number of Admissions'].sum()), 'discharges': int(discharges['Number of Discharges'].sum()) } print(totals) latest_adm_model = admissions.iloc[admissions[admissions[ 'Number of Admissions 7-day rolling mean model_daily_change']. notna()] ['Admission Date'].idxmax()] adm_dis = admissions.merge(discharges, how='inner', left_on='Admission Date', right_on='Discharge Date', validate='1:1') adm_dis.drop(columns=['Discharge Date'], inplace=True) adm_dis.rename(columns={'Admission Date': 'Date'}, inplace=True) adm_dis['Inpatients'] = adm_dis[ 'Number of Admissions 7-day rolling mean'].cumsum() - adm_dis[ 'Number of Discharges 7-day rolling mean'].cumsum() adm_dis_7d = adm_dis.rename( columns={ 'Number of Admissions 7-day rolling mean': 'Admissions', 'Number of Discharges 7-day rolling mean': 'Discharges' })[['Date', 'Admissions', 'Discharges']] adm_dis_7d = adm_dis_7d.melt(id_vars='Date') # Age band data age_bands = pandas.read_excel( stream, engine='openpyxl', sheet_name='Individuals 7 Days - 5yr Age') age_bands = age_bands.groupby('Age_Band_5yr').sum()[[ 'Total_Cases', 'Total_Tests' ]].reset_index() age_bands['Positivity_Rate'] = age_bands[ 'Total_Cases'] / age_bands['Total_Tests'] age_bands['Band Start'] = age_bands['Age_Band_5yr'].str.extract( 'Aged (\d+)').astype(float) age_bands['Band End'] = age_bands['Age_Band_5yr'].str.extract( 'Aged \d+ - (\d+)').astype(float) age_bands['Date'] = df['Sample_Date'].max() age_bands['Positive_Tests'] = age_bands['Total_Cases'] # Get the age bands datastore contents from S3 datastore = update_datastore( s3, secret['bucketname'], secret['doh-dd-store-agebands'], df['Sample_Date'].max(), age_bands, (change.get('notweet', False) is False) and (change.get('tweet', True) is True), 'Date') # Plot the case reports and 7-day average driver = get_chrome_driver() plots = [] if driver is not None: today_str = datetime.datetime.now().date().strftime('%Y-%m-%d') p = plot_key_ni_stats_date_range( df, admissions, deaths, latest['Sample_Date'] - pandas.to_timedelta(42, unit='d'), latest['Sample_Date'], ['linear', 'log']) plots = output_plot(p, plots, driver, 'ni-cases-%s.png' % today_str) if len(plots) > 0: p = plot_hospital_stats( adm_dis_7d, inpatients, icu, latest['Sample_Date'] - pandas.to_timedelta(42, unit='d')) plots = output_plot(p, plots, driver, 'ni-hospitals-%s.png' % today_str) if len(plots) > 1: toplot = datastore[datastore['Date'] >= ( datastore['Date'].max() + pandas.DateOffset(days=-42))] toplot['Date'] = pandas.to_datetime(toplot['Date']) newind = pandas.date_range(start=toplot['Date'].max() + pandas.DateOffset(days=-42), end=toplot['Date'].max()) alldates = pandas.Series(newind) alldates.name = 'Date' toplot = toplot.merge(alldates, how='outer', left_on='Date', right_on='Date') toplot['X'] = toplot['Date'].dt.strftime('%e %b') toplot['Most Recent Positive Tests'] = toplot[ 'Positive_Tests'].where( toplot['Date'] == toplot['Date'].max()).apply( lambda x: f"{x:n}" if not pandas.isna(x) else "") toplot['Age_Band_5yr'].fillna('Not Known', inplace=True) bands = toplot.groupby( ['Age_Band_5yr', 'Band Start', 'Band End'], dropna=False).size().reset_index()[[ 'Age_Band_5yr', 'Band Start', 'Band End' ]] bands = bands[bands['Age_Band_5yr'] != 'Not Known'] bands.fillna(90, inplace=True) bands['Band End'] = bands['Band End'].astype(int) bands['Band Start'] = bands['Band Start'].astype(int) bands['Year'] = bands.apply(lambda x: range( x['Band Start'], x['Band End'] + 1), axis='columns') bands = bands.explode('Year').reset_index() pops = get_ni_pop_pyramid() pops = pops[pops['Year'] == 2020].groupby( ['Age Band']).sum()['Population'] bands = bands.merge(pops, how='inner', validate='1:1', right_index=True, left_on='Year') bands = bands.groupby( 'Age_Band_5yr').sum()['Population'] toplot = toplot.merge(bands, how='left', on='Age_Band_5yr') toplot['Positive per 100k'] = ( 100000 * toplot['Positive_Tests']) / toplot['Population'] toplot['Most Recent Positive per 100k'] = toplot[ 'Positive per 100k'].where( toplot['Date'] == toplot['Date'].max()).apply( lambda x: f"{int(x):n}" if not pandas.isna(x) else "") heatmap2 = plot_heatmap(toplot, 'X', 'Date', 'Date', 'Age_Band_5yr', 'Band Start', 'Age Band', 'Positive per 100k', 'Positive Tests per 100k') p = altair.vconcat( altair.layer( heatmap2.properties( height=450, width=800, title= 'NI COVID-19 7-day Positive Tests by Age Band per 100k people (%s to %s)' % (toplot['Date'].min().strftime( '%-d %B %Y'), toplot['Date'].max(). strftime('%-d %B %Y')), ), heatmap2.mark_text( align='right', baseline='middle', dx=43).encode(text=altair.Text( 'Most Recent Positive per 100k'), color=altair.value('black'))) ).properties(title=altair.TitleParams([ 'Data from DoH daily downloads', 'Numbers to right of chart show most recent value', 'https://twitter.com/ni_covid19_data on %s' % datetime.datetime.now().strftime('%A %-d %B %Y') ], baseline='bottom', orient='bottom', anchor='end', fontWeight= 'normal', fontSize=10, dy=10), ) plots = output_plot( p, plots, driver, 'ni-cases-age-bands-%s.png' % today_str) if len(plots) > 2: p = plot_test_stats( df, latest['Sample_Date'] - pandas.to_timedelta(42, unit='d')) plots = output_plot(p, plots, driver, 'ni-tests-%s.png' % today_str) # Find the date since which the rate was as high/low symb_7d, est = find_previous(df, latest_7d, 'Rolling 7 Day Cases') # Build the tweet text tweet = '''{ind_tested:,} people tested, {ind_positive:,} ({pos_rate:.2%}) positive on {date} {symb_7d} {pos_7d:,} positive in last 7 days, {est} {tag_model} cases {dir_model} by {model_daily:.1%} per day, {model_weekly:.1%} per week, {doub} time {doub_time:.1f} days '''.format(date=latest['Sample_Date'].strftime('%A %-d %B %Y'), ind_positive=int(latest['Total Cases']), ind_tested=int(latest['Total Tests']), pos_rate=latest['pos_rate'], symb_7d=symb_7d, est=est, model_daily=abs( last_but1_model['Rolling cases per 100k model_daily_change']), model_weekly=abs( last_but1_model['Rolling cases per 100k model_weekly_change']), pos_7d=int(round(latest_7d['Rolling 7 Day Cases'] * 7, 0)), dir_model='falling' if last_but1_model['Rolling cases per 100k model_daily_change'] < 0 else 'rising', tag_model=good_symb if last_but1_model['Rolling cases per 100k model_daily_change'] < 0 else bad_symb, doub='halving' if (last_but1_model['Rolling cases per 100k model0'] < 0) else 'doubling', doub_time=abs( numpy.log(2) / last_but1_model['Rolling cases per 100k model0'])) # If we have the data for it, build the second tweet last_week = datetime.datetime.strptime( change['filedate'], '%Y-%m-%d').date() - datetime.timedelta(days=7) day_before = datetime.datetime.strptime( change['filedate'], '%Y-%m-%d').date() - datetime.timedelta(days=1) yesterday = None lastweek = None for report in index: if (report['filedate'] == last_week.strftime('%Y-%m-%d')) and ('totals' in report): lastweek = report elif (report['filedate'] == day_before.strftime('%Y-%m-%d')) and ('totals' in report): yesterday = report if (yesterday is not None) and (lastweek is not None): break tweet2 = '''{inpatients} inpatient{ips} reported'''.format( inpatients=totals['admissions'] - totals['discharges'], ips='s' if ((totals['admissions'] - totals['discharges']) != 1) else '') if lastweek is not None: ip_change = (totals['admissions'] - totals['discharges']) - ( lastweek['totals']['admissions'] - lastweek['totals']['discharges']) tweet2 += ''': {ip_bullet} {ip_change} {ip_text} than 7 days ago ({admissions} admitted, {discharges} discharged)'''.format( ip_change=abs(ip_change), ip_bullet=good_symb if ip_change < 0 else bad_symb, ip_text='fewer' if ip_change < 0 else 'more', admissions=totals['admissions'] - lastweek['totals']['admissions'], discharges=totals['discharges'] - lastweek['totals']['discharges']) if yesterday is not None: tweet2 += ''' {deaths} death{ds} reported, {deaths_7d} in last 7 days'''.format( deaths=totals['deaths'] - yesterday['totals']['deaths'], ds='s' if ((totals['deaths'] - yesterday['totals']['deaths']) != 1) else '', deaths_7d=totals['deaths'] - lastweek['totals']['deaths']) tweet2 += ''' {tag_model} admissions {dir_model} by {model_daily:.1%} per day, {model_weekly:.1%} per week, {doub} time {doub_time:.1f} days'''.format( model_daily=abs(latest_adm_model[ 'Number of Admissions 7-day rolling mean model_daily_change'] ), model_weekly=abs(latest_adm_model[ 'Number of Admissions 7-day rolling mean model_weekly_change'] ), dir_model='falling' if latest_adm_model[ 'Number of Admissions 7-day rolling mean model_daily_change'] < 0 else 'rising', tag_model=good_symb if latest_adm_model[ 'Number of Admissions 7-day rolling mean model_daily_change'] < 0 else bad_symb, doub='halving' if (latest_adm_model[ 'Number of Admissions 7-day rolling mean model0'] < 0) else 'doubling', doub_time=abs( numpy.log(2) / latest_adm_model[ 'Number of Admissions 7-day rolling mean model0'])) tweets.append({ 'text': tweet, 'text2': tweet2, 'url': change['url'], 'notweet': change.get('notweet', False), 'tweet': change.get('tweet', True), 'totals': totals, 'filedate': change['filedate'], 'plots': plots }) donottweet = [] if len(tweets) > 1: for i in range(1, len(tweets)): for j in range(0, i): if (tweets[i]['text'] == tweets[j]['text']): donottweet.append(i) messages = [] for idx in reversed(range(len(tweets))): t = tweets[idx] if t['notweet'] is False: if (idx not in donottweet): api = TwitterAPI(secret['twitter_apikey'], secret['twitter_apisecretkey'], secret['twitter_accesstoken'], secret['twitter_accesstokensecret']) upload_ids = api.upload_multiple(t['plots']) if t['tweet'] is True: if len(t['plots']) > 0: resp = api.tweet(t['text'] + t['url'], media_ids=upload_ids) else: resp = api.tweet(t['text'] + t['url']) messages.append('Tweeted ID %s, ' % resp.id) if t['text2'] is not None: resp = api.tweet(t['text2'], resp.id) messages[-1] += ('ID %s, ' % resp.id) # Update the file index for i in range(len(index)): if index[i]['filedate'] == t['filedate']: index[i]['tweet'] = resp.id index[i]['totals'] = t['totals'] break status.put_dict(index) messages[-1] += ('updated %s' % secret['doh-dd-index']) else: if len(upload_ids) > 0: resp = api.dm(secret['twitter_dmaccount'], t['text'] + t['url'], upload_ids[0]) else: resp = api.dm(secret['twitter_dmaccount'], t['text'] + t['url']) messages.append('Tweeted DM %s, ' % resp.id) if len(upload_ids) > 1: resp = api.dm(secret['twitter_dmaccount'], t['text2'], upload_ids[-2]) else: resp = api.dm(secret['twitter_dmaccount'], t['text2']) else: messages.append('Duplicate found %s, did not tweet, ' % t['filedate']) else: if (idx not in donottweet): messages.append('Did not tweet') print(t['text'] + t['url']) if t['text2'] is not None: print(t['text2']) else: messages.append('Duplicate found %s, did not tweet, ' % t['filedate']) except: logging.exception('Caught error in cases tweeter') api = TwitterAPI(secret['twitter_apikey'], secret['twitter_apisecretkey'], secret['twitter_accesstoken'], secret['twitter_accesstokensecret']) api.dm(secret['twitter_dmaccount'], 'Error in cases tweeter') return { "statusCode": 200, "body": json.dumps({ "message:": messages, }), }
def lambda_handler(event, context): # Get the secret sm = boto3.client('secretsmanager') secretobj = sm.get_secret_value(SecretId='ni-covid-tweets') secret = json.loads(secretobj['SecretString']) s3 = boto3.client('s3') # Pull current data from s3 try: obj = s3.get_object(Bucket=secret['bucketname'],Key=secret['pha-education-datastore'])['Body'] except s3.exceptions.NoSuchKey: print("The object %s does not exist in bucket %s." %(secret['pha-education-datastore'], secret['bucketname'])) datastore = pandas.DataFrame(columns=['filedate']) else: stream = io.BytesIO(obj.read()) datastore = pandas.read_csv(stream) messages = [] if 'url' in event[0]: # Download the most recently updated PDF file for change in event: tmp = tempfile.NamedTemporaryFile(suffix='.pdf') with open(tmp.name, 'wb') as fp: s3.download_fileobj(secret['bucketname'],change['keyname'],fp) text = textract.process(tmp.name, method='pdfminer').decode('utf-8') regex = re.compile(r'Up to [Ww]eek \d{1,2}\s+\((\d{1,2})\s+([A-Z][a-z]+)\s+(\d{4})\)') end_date = None for line in text.split('\n'): m = regex.search(line) if m: end_date = datetime.datetime.strptime('%s %s %s' %(m.group(1),m.group(2),m.group(3)), '%d %B %Y') break if end_date is None: logging.error('Unable to find end date in report %s' %change['keyname']) continue regex = re.compile(r'Table (\d+)\. Number of Incidents by School and Incident Type') tables = tabula.read_pdf(tmp.name, pages = "all", multiple_tables = True, java_options=["-Xmx1024m"]) dataset = None for df in tables: match = False for col in df.columns: m = regex.search(col) if m: match = True break if match is True: if len(df.columns)!=1: logging.error('Too many columns in %s, %s' %(df, change['keyname'])) break df.columns=['raw'] df = df[df['raw'].str.endswith('%')] df['Proportion'] = df['raw'].str.rsplit(' ', 1,expand=True)[1] df['raw'] = df['raw'].str.rsplit(' ', 1,expand=True)[0] df['Total'] = df['raw'].str.rsplit(' ', 1,expand=True)[1] df['raw'] = df['raw'].str.rsplit(' ', 1,expand=True)[0] df['School Type'] = df['raw'].str.replace('Single Case ', '') df = df[['School Type','Total']].reset_index(drop=True) if len(df)!=12: logging.error('Unexpected number of rows in %s, %s' %(df, change['keyname'])) break df['Incident Type'] = 'Cluster (>5 cases)' df.iloc[:8, df.columns.get_loc('Incident Type')] = 'Cluster (2-5 cases)' df.iloc[:4, df.columns.get_loc('Incident Type')] = 'Single Case' dataset = df break if dataset is None: logging.error('Unable to find table in %s' %change['keyname']) continue dataset['filedate'] = change['filedate'] dataset['End Date'] = end_date.strftime('%Y-%m-%d') dataset['url'] = change['url'] # Clean out any data with matching dates datastore = datastore[datastore['filedate'] != change['filedate']] # Append the new data datastore = pandas.concat([datastore, dataset]) # Push the data to s3 stream = io.BytesIO() datastore.to_csv(stream, index=False) stream.seek(0) s3.upload_fileobj(stream, secret['bucketname'], secret['pha-education-datastore']) else: driver = get_chrome_driver() plots = [] if driver is None: logging.error('Failed to start chrome') else: datastore['End Date'] = pandas.to_datetime(datastore['End Date']) weekly = datastore.groupby(['End Date','School Type','Incident Type']).sum()['Total'].reset_index() weekly.sort_values('End Date', inplace=True) weekly['New'] = weekly['Total'] - weekly.groupby(['School Type','Incident Type'])['Total'].shift(1) weekly['New no neg'] = weekly['New'].clip(lower=0) # Remove negatives for the detailed plot weekly['order'] = weekly['Incident Type'].replace( {val: i for i, val in enumerate(['Cluster (>5 cases)', 'Cluster (2-5 cases)', 'Single Case', 'White'])} ) latest = weekly[weekly['End Date']==weekly['End Date'].max()] p = altair.vconcat( altair.Chart( latest ).mark_bar().encode( x = altair.X('New:Q', axis=altair.Axis(title='Total reported', tickMinStep=1)), y = altair.Y('School Type:O', sort=['Preschool','Primary','Post Primary','Special']), color=altair.Color('Incident Type', sort=altair.SortField('order',order='descending')), order=altair.Order( 'order', sort='ascending' ), ).properties( height=225, width=400, title='NI COVID-19 School Surveillance reports for week ending %s' %datastore['End Date'].max().strftime('%-d %B %Y') ), ).properties( title=altair.TitleParams( ['Data from Public Health Agency', 'Some data has been manually extracted', 'https://twitter.com/ni_covid19_data on %s' %datetime.datetime.now().date().strftime('%A %-d %B %Y')], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), ) plotname = 'pha-outbreaks-week-%s.png'%datetime.datetime.now().date().strftime('%Y-%d-%m') plotstore = io.BytesIO() p.save(fp=plotstore, format='png', method='selenium', webdriver=driver) plotstore.seek(0) plots.append({'name': plotname, 'store': plotstore}) toplot = weekly[(~weekly['New'].isna()) & (weekly['End Date'] > weekly['End Date'].max()-pandas.to_timedelta(84, unit='d'))] p = altair.vconcat( altair.Chart( toplot ).mark_area().encode( x = altair.X('End Date:T', axis=altair.Axis(title='Date reported')), y = altair.Y('sum(New):Q', axis=altair.Axis(title='Newly reported', orient="right", tickMinStep=1)), color=altair.Color('Incident Type', sort=altair.SortField('order',order='descending')), order=altair.Order( 'order', sort='ascending' ), ).properties( height=450, width=800, title='NI COVID-19 School Surveillance reports from %s to %s' %(toplot['End Date'].min().strftime('%-d %B %Y'), toplot['End Date'].max().strftime('%-d %B %Y')) ), ).properties( title=altair.TitleParams( ['Data from Public Health Agency', 'Some data has been manually extracted', 'https://twitter.com/ni_covid19_data on %s' %datetime.datetime.now().date().strftime('%A %-d %B %Y')], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), ) plotname = 'pha-education-time-%s.png'%datetime.datetime.now().date().strftime('%Y-%d-%m') plotstore = io.BytesIO() p.save(fp=plotstore, format='png', method='selenium', webdriver=driver) plotstore.seek(0) plots.append({'name': plotname, 'store': plotstore}) p = altair.vconcat( altair.Chart( toplot ).mark_area().encode( x = altair.X('End Date:T', axis=altair.Axis(title='Date reported')), y = altair.Y('sum(New no neg):Q', axis=altair.Axis(title='Newly reported', orient="right", tickMinStep=1)), color=altair.Color('Incident Type', sort=altair.SortField('order',order='descending')), facet=altair.Facet('School Type:O', columns=2, title=None, spacing=0, sort=['Preschool','Primary','Post Primary','Special']), order=altair.Order( 'order', sort='ascending' ), ).properties( height=225, width=450, title=altair.TitleParams( 'NI COVID-19 School Surveillance reports from %s to %s' %(toplot['End Date'].min().strftime('%-d %B %Y'), toplot['End Date'].max().strftime('%-d %B %Y')), anchor='middle', ), ), ).properties( title=altair.TitleParams( ['Data from Public Health Agency', 'Some data has been manually extracted, negative values have been removed', 'https://twitter.com/ni_covid19_data on %s' %datetime.datetime.now().date().strftime('%A %-d %B %Y')], baseline='bottom', orient='bottom', anchor='end', fontWeight='normal', fontSize=10, dy=10 ), ) plotname = 'pha-education--school-%s.png'%datetime.datetime.now().date().strftime('%Y-%d-%m') plotstore = io.BytesIO() p.save(fp=plotstore, format='png', method='selenium', webdriver=driver) plotstore.seek(0) plots.append({'name': plotname, 'store': plotstore}) change = event[0] tweet = '''School Surveillance for COVID-19 in NI, week ending {end_date} \u2022 Preschool: {preschool:,} incidents \u2022 Primary: {primary:,} \u2022 Post Primary: {post_primary:,} \u2022 Special: {special:,} \u2022 Total: {total:,} Source: https://www.publichealth.hscni.net/publications/coronavirus-bulletin'''.format( end_date=latest['End Date'].max().strftime('%-d %B %Y'), preschool=int(latest[latest['School Type']=='Preschool']['New'].sum()), primary=int(latest[latest['School Type']=='Primary']['New'].sum()), post_primary=int(latest[latest['School Type']=='Post Primary']['New'].sum()), special=int(latest[latest['School Type']=='Special']['New'].sum()), total=int(latest['New'].sum()) ) if change.get('notweet') is not True: api = TwitterAPI(secret['twitter_apikey'], secret['twitter_apisecretkey'], secret['twitter_accesstoken'], secret['twitter_accesstokensecret']) upload_ids = api.upload_multiple(plots) if change.get('testtweet') is True: if len(upload_ids) > 0: resp = api.dm(secret['twitter_dmaccount'], tweet, upload_ids[0]) if len(upload_ids) > 1: resp = api.dm(secret['twitter_dmaccount'], 'Test 1', upload_ids[1]) if len(upload_ids) > 2: resp = api.dm(secret['twitter_dmaccount'], 'Test 2', upload_ids[2]) else: resp = api.dm(secret['twitter_dmaccount'], tweet) messages.append('Tweeted DM ID %s' %(resp.id)) else: if len(upload_ids) > 0: resp = api.tweet(tweet, media_ids=upload_ids) else: resp = api.tweet(tweet) # Download and update the index status = S3_scraper_index(s3, secret['bucketname'], secret['pha-bulletin-index']) index = status.get_dict() for i in range(len(index)): if index[i]['filedate'] == datastore['filedate'].max(): index[i]['tweet'] = resp.id break status.put_dict(index) messages.append('Tweeted ID %s and updated %s' %(resp.id, secret['pha-bulletin-index'])) else: print(tweet) messages.append('Did not tweet') return { "statusCode": 200, "body": json.dumps({ "message": messages, }), }