def __init__( self, docker_device_id, country='mx', stream_name='InputReadings', start_date=datetime(2020, 1, 5, 4, 39, 0), end_date=datetime(2020, 2, 5, 4, 40, 0), kinesis_produce_many=False, kinesis_batch_size=20, reading_frequency=64, # sensor readings per second reading_interval=30): # reading raw data from S3 """ initialize Kinesis as consumer input, SNS as alert publishing system, PostgreSQL as output storage system """ self._docker_device_id = docker_device_id self._stream_name = stream_name self._start_date = start_date self._end_date = end_date self._sleep_time = 1 / reading_frequency if reading_frequency > 0 else 1 / 32 self._interval = timedelta(seconds=reading_interval) self._kinesis_produce_many = kinesis_produce_many self._kinesis_batch_size = kinesis_batch_size self._kinesis = boto3.client('kinesis') # , region_name='us-west-2') self._data_client = AwsDataClient(country)
def getSensorData(quake, sensor): global eq, records_df data_client = AwsDataClient('mx') eq = quake devices = data_client.get_devices_as_of_date(eq['date_utc']) quakeTime = quake['date_utc'] minute = timedelta(minutes=1) tenMinutes = timedelta(minutes=10) start_date_utc = datetime.fromisoformat(quakeTime) - minute end_date_utc = datetime.fromisoformat(quakeTime) + minute + minute + minute # Get records for the specified dates records_df = get_df_from_records( data_client.get_filtered_records( str(start_date_utc), str(end_date_utc) ) ) records_df['sample_dt'] = \ records_df['sample_t'].apply( lambda x: datetime.utcfromtimestamp(x) ) # Select required columns records_df = records_df[ ['device_id', 'x', 'y', 'z', 'sample_dt'] ] print(records_df.head()) plot_seismograms(sensor)
def get_active_devices(date_utc: str) -> list: """ get active seismic devices """ data_client = AwsDataClient('mx') devices = data_client.get_devices_as_of_date(date_utc) print(len(devices)) active = [device['device_id'] for device in devices] print(active) return active
def _get_device_location(self, device_id: str, alert_date: str, country: str = 'mx') -> dict: data_client = AwsDataClient(country) devices = data_client.get_devices_as_of_date(alert_date) location = {} for device in devices: if device['device_id'] == device_id: location = { 'latitude': device['latitude'], 'longitude': device['longitude'] } return location
def main(*, kafka_brokers: str, kafka_topic: str, country: str = 'mx', periods: int = 6, frequency_min: int = 10, start_timestamp_utc: str = '2018-01-01 00:00:00', parse_json_records: bool = True): s3 = boto3.client('s3') data_client = AwsDataClient(country, s3) start_datetime = datetime.strptime(start_timestamp_utc, '%Y-%m-%d %H:%M:%S') date_range = pd.date_range(start_datetime, periods=periods, freq=str(frequency_min) + 'T') date_range_from_now = pd.date_range(datetime.now(), periods=periods, freq=str(frequency_min) + 'T') if parse_json_records: producer = KafkaProducer( bootstrap_servers=kafka_brokers, value_serializer=lambda v: json.dumps(v).encode('utf-8')) else: # Kafka Producer for plain messages producer = KafkaProducer(bootstrap_servers=kafka_brokers) for date_from_now, start_date in zip(date_range_from_now, date_range): # Uncomment the line below to avoid errors when using Jupyter notebooks # nest_asyncio.apply() end_date = start_date + timedelta(minutes=frequency_min) records = data_client.get_filtered_records(str(start_date), str(end_date)) records_df = get_df_from_records(records) records_df['json'] = records_df.apply(lambda row: row.to_json(), axis=1) for index, record in records_df.iterrows(): if parse_json_records: producer.send(kafka_topic, record['json']) else: producer.send(kafka_topic, bytes(record['json'], encoding='utf8')) print(f'Data ingested from: {str(start_date)} Until: {str(end_date)}') next_period = date_from_now + timedelta(minutes=frequency_min) pause.until(next_period)
def plot_seismograms(device_id): # Get earthquake date as datetime.datetime object eq_dt = AwsDataClient._get_dt_from_str(eq['date_utc']) print(eq_dt) ob = { "ti" : "2018-02-16 23:39:48" } time_format = '%Y-%m-%d %H:%M:%S' plots = [] for axis in ['x', 'y', 'z']: plots.append( pn.ggplot( records_df[records_df['device_id'] == device_id], pn.aes('sample_dt', axis) ) + \ pn.geom_line(color='blue') + \ pn.scales.scale_x_datetime( date_breaks='1 minute', date_labels='%H:%M:%S' ) + \ pn.geoms.geom_vline( xintercept= eq_dt,#datetime.strptime(ob["ti"], time_format), color='crimson' ) + \ pn.labels.ggtitle( 'device {}, axis {}'.format( device_id, axis) ) ) # Now output the plots for p in plots: print(p)
def test_change_country_code_all_caps(): data_client = AwsDataClient('AB') data_client.country_code = 'CD' assert data_client.country_code == 'cd'
def test_initialize_country_code_all_caps(): data_client = AwsDataClient('AB') assert data_client.country_code == 'ab'
class StreamProducer: def __init__( self, docker_device_id, country='mx', stream_name='InputReadings', start_date=datetime(2020, 1, 5, 4, 39, 0), end_date=datetime(2020, 2, 5, 4, 40, 0), kinesis_produce_many=False, kinesis_batch_size=20, reading_frequency=64, # sensor readings per second reading_interval=30): # reading raw data from S3 """ initialize Kinesis as consumer input, SNS as alert publishing system, PostgreSQL as output storage system """ self._docker_device_id = docker_device_id self._stream_name = stream_name self._start_date = start_date self._end_date = end_date self._sleep_time = 1 / reading_frequency if reading_frequency > 0 else 1 / 32 self._interval = timedelta(seconds=reading_interval) self._kinesis_produce_many = kinesis_produce_many self._kinesis_batch_size = kinesis_batch_size self._kinesis = boto3.client('kinesis') # , region_name='us-west-2') self._data_client = AwsDataClient(country) def _get_raw_data(self, start_date: datetime, end_date: datetime, device_id=None) -> pd.DataFrame: """get sensor readings from S3 using OpenEEW API""" records_df_per_device = get_df_from_records( self._data_client.get_filtered_records( str(start_date), # utc date str(end_date), # utc date [device_id] # list of devices )) # Select required columns records_df = records_df_per_device[[ 'device_id', 'x', 'y', 'z', 'sample_t' ]] return records_df def _send_data_to_kinesis(self, accelerator_data: pd.DataFrame): """ send raw data for 1 time interval to Kinesis """ i = 0 records = [] if accelerator_data is not None and accelerator_data.size > 0: num_readings = accelerator_data.size # logging.info(f'Start_time: {str(start_date)}, Number of records: {str(num_readings)}') for reading in accelerator_data.itertuples(): sleep(self._sleep_time) device_id = str(reading.device_id) data = dict(reading._asdict()) # convert named tuple to dict if self._kinesis_produce_many: # create a set of records to be pushed together i += 1 record = { 'Data': json.dumps(data), 'PartitionKey': str(reading.device_id) } records.append(record) if i % self._kinesis_batch_size == 0 or i == num_readings: self._kinesis.put_records(StreamName=self._stream_name, Records=records) records = [] else: self._kinesis.put_record(StreamName=self._stream_name, Data=json.dumps(data), PartitionKey=str( reading.device_id)) def produce(self): """send sensor data to input Kinesis stream""" start_date_time = self._start_date end_date_time = self._start_date while end_date_time < self._end_date: # True try: end_date_time = start_date_time + self._interval sensor_data = self._get_raw_data(start_date_time, end_date_time, self._docker_device_id) self._send_data_to_kinesis(sensor_data) # move onto the next time interval start_date_time = end_date_time except Exception as ex: print('Exception in publishing message') print(str(ex))
import numpy as np import pandas as pd import matplotlib.pyplot as plt from openeew.data.aws import AwsDataClient import folium data_client = AwsDataClient('mx') df = pd.read_csv("quakesList.csv") eq = [] for x in df.index: eq.append({ 'latitude': df["latitude"][x], 'longitude': df["longitude"][x], 'time': df["date"][x] + " " + df["time"][x] }) m = folium.Map( location=[eq[0]['latitude'], eq[0]['longitude']], zoom_start=7, titles="Signifigant Quakes After 2018" ) for point in eq: folium.Circle( radius=10000, location=[point['latitude'], point['longitude']], color='crimson', fill='crimson',