예제 #1
0
    def __init__(
        self,
        docker_device_id,
        country='mx',
        stream_name='InputReadings',
        start_date=datetime(2020, 1, 5, 4, 39, 0),
        end_date=datetime(2020, 2, 5, 4, 40, 0),
        kinesis_produce_many=False,
        kinesis_batch_size=20,
        reading_frequency=64,  # sensor readings per second
        reading_interval=30):  # reading raw data from S3
        """ initialize Kinesis as consumer input,
            SNS as alert publishing system,
            PostgreSQL as output storage system
        """
        self._docker_device_id = docker_device_id
        self._stream_name = stream_name

        self._start_date = start_date
        self._end_date = end_date
        self._sleep_time = 1 / reading_frequency if reading_frequency > 0 else 1 / 32
        self._interval = timedelta(seconds=reading_interval)

        self._kinesis_produce_many = kinesis_produce_many
        self._kinesis_batch_size = kinesis_batch_size
        self._kinesis = boto3.client('kinesis')  # , region_name='us-west-2')
        self._data_client = AwsDataClient(country)
예제 #2
0
def getSensorData(quake, sensor):
    global eq, records_df
    data_client = AwsDataClient('mx')
    eq = quake
    devices = data_client.get_devices_as_of_date(eq['date_utc'])
    quakeTime = quake['date_utc']
    minute = timedelta(minutes=1)
    tenMinutes = timedelta(minutes=10)

    start_date_utc = datetime.fromisoformat(quakeTime) - minute
    end_date_utc = datetime.fromisoformat(quakeTime) + minute + minute + minute

    # Get records for the specified dates
    records_df = get_df_from_records(
        data_client.get_filtered_records(
            str(start_date_utc),
            str(end_date_utc)
        )
    )
    records_df['sample_dt'] = \
        records_df['sample_t'].apply(
            lambda x: datetime.utcfromtimestamp(x)
        )
    # Select required columns
    records_df = records_df[
        ['device_id', 'x', 'y', 'z', 'sample_dt']
    ]
    print(records_df.head())
    plot_seismograms(sensor)
예제 #3
0
def get_active_devices(date_utc: str) -> list:
    """ get active seismic devices """
    data_client = AwsDataClient('mx')

    devices = data_client.get_devices_as_of_date(date_utc)
    print(len(devices))
    active = [device['device_id'] for device in devices]
    print(active)
    return active
예제 #4
0
    def _get_device_location(self,
                             device_id: str,
                             alert_date: str,
                             country: str = 'mx') -> dict:
        data_client = AwsDataClient(country)

        devices = data_client.get_devices_as_of_date(alert_date)
        location = {}
        for device in devices:
            if device['device_id'] == device_id:
                location = {
                    'latitude': device['latitude'],
                    'longitude': device['longitude']
                }
        return location
예제 #5
0
def main(*,
         kafka_brokers: str,
         kafka_topic: str,
         country: str = 'mx',
         periods: int = 6,
         frequency_min: int = 10,
         start_timestamp_utc: str = '2018-01-01 00:00:00',
         parse_json_records: bool = True):

    s3 = boto3.client('s3')
    data_client = AwsDataClient(country, s3)
    start_datetime = datetime.strptime(start_timestamp_utc,
                                       '%Y-%m-%d %H:%M:%S')
    date_range = pd.date_range(start_datetime,
                               periods=periods,
                               freq=str(frequency_min) + 'T')
    date_range_from_now = pd.date_range(datetime.now(),
                                        periods=periods,
                                        freq=str(frequency_min) + 'T')

    if parse_json_records:
        producer = KafkaProducer(
            bootstrap_servers=kafka_brokers,
            value_serializer=lambda v: json.dumps(v).encode('utf-8'))
    else:
        # Kafka Producer for plain messages
        producer = KafkaProducer(bootstrap_servers=kafka_brokers)

    for date_from_now, start_date in zip(date_range_from_now, date_range):
        # Uncomment the line below to avoid errors when using Jupyter notebooks
        # nest_asyncio.apply()
        end_date = start_date + timedelta(minutes=frequency_min)
        records = data_client.get_filtered_records(str(start_date),
                                                   str(end_date))
        records_df = get_df_from_records(records)
        records_df['json'] = records_df.apply(lambda row: row.to_json(),
                                              axis=1)

        for index, record in records_df.iterrows():
            if parse_json_records:
                producer.send(kafka_topic, record['json'])
            else:
                producer.send(kafka_topic,
                              bytes(record['json'], encoding='utf8'))

        print(f'Data ingested from: {str(start_date)} Until: {str(end_date)}')
        next_period = date_from_now + timedelta(minutes=frequency_min)
        pause.until(next_period)
예제 #6
0
def plot_seismograms(device_id):
    # Get earthquake date as datetime.datetime object
    eq_dt = AwsDataClient._get_dt_from_str(eq['date_utc'])
    print(eq_dt)
    ob = {
        "ti" : "2018-02-16 23:39:48"
    }
    time_format = '%Y-%m-%d %H:%M:%S'
    plots = []
    for axis in ['x', 'y', 'z']:
        plots.append(
            pn.ggplot(
                records_df[records_df['device_id'] == device_id],
                pn.aes('sample_dt', axis)
            ) + \
            pn.geom_line(color='blue') + \
            pn.scales.scale_x_datetime(
                date_breaks='1 minute',
                date_labels='%H:%M:%S'
            ) + \
            pn.geoms.geom_vline(

                xintercept= eq_dt,#datetime.strptime(ob["ti"], time_format),
                color='crimson'
            ) + \
            pn.labels.ggtitle(
                'device {}, axis {}'.format(
                    device_id, axis)
            )
        )

    # Now output the plots
    for p in plots:
        print(p)
예제 #7
0
def test_change_country_code_all_caps():

    data_client = AwsDataClient('AB')
    data_client.country_code = 'CD'

    assert data_client.country_code == 'cd'
예제 #8
0
def test_initialize_country_code_all_caps():

    data_client = AwsDataClient('AB')

    assert data_client.country_code == 'ab'
예제 #9
0
class StreamProducer:
    def __init__(
        self,
        docker_device_id,
        country='mx',
        stream_name='InputReadings',
        start_date=datetime(2020, 1, 5, 4, 39, 0),
        end_date=datetime(2020, 2, 5, 4, 40, 0),
        kinesis_produce_many=False,
        kinesis_batch_size=20,
        reading_frequency=64,  # sensor readings per second
        reading_interval=30):  # reading raw data from S3
        """ initialize Kinesis as consumer input,
            SNS as alert publishing system,
            PostgreSQL as output storage system
        """
        self._docker_device_id = docker_device_id
        self._stream_name = stream_name

        self._start_date = start_date
        self._end_date = end_date
        self._sleep_time = 1 / reading_frequency if reading_frequency > 0 else 1 / 32
        self._interval = timedelta(seconds=reading_interval)

        self._kinesis_produce_many = kinesis_produce_many
        self._kinesis_batch_size = kinesis_batch_size
        self._kinesis = boto3.client('kinesis')  # , region_name='us-west-2')
        self._data_client = AwsDataClient(country)

    def _get_raw_data(self,
                      start_date: datetime,
                      end_date: datetime,
                      device_id=None) -> pd.DataFrame:
        """get sensor readings from S3 using OpenEEW API"""

        records_df_per_device = get_df_from_records(
            self._data_client.get_filtered_records(
                str(start_date),  # utc date
                str(end_date),  # utc date
                [device_id]  # list of devices
            ))
        # Select required columns
        records_df = records_df_per_device[[
            'device_id', 'x', 'y', 'z', 'sample_t'
        ]]
        return records_df

    def _send_data_to_kinesis(self, accelerator_data: pd.DataFrame):
        """ send raw data for 1 time interval to Kinesis """
        i = 0
        records = []

        if accelerator_data is not None and accelerator_data.size > 0:
            num_readings = accelerator_data.size
            # logging.info(f'Start_time: {str(start_date)}, Number of records: {str(num_readings)}')
            for reading in accelerator_data.itertuples():
                sleep(self._sleep_time)

                device_id = str(reading.device_id)
                data = dict(reading._asdict())  # convert named tuple to dict

                if self._kinesis_produce_many:
                    # create a set of records to be pushed together
                    i += 1
                    record = {
                        'Data': json.dumps(data),
                        'PartitionKey': str(reading.device_id)
                    }
                    records.append(record)

                    if i % self._kinesis_batch_size == 0 or i == num_readings:
                        self._kinesis.put_records(StreamName=self._stream_name,
                                                  Records=records)
                        records = []
                else:
                    self._kinesis.put_record(StreamName=self._stream_name,
                                             Data=json.dumps(data),
                                             PartitionKey=str(
                                                 reading.device_id))

    def produce(self):
        """send sensor data to input Kinesis stream"""

        start_date_time = self._start_date
        end_date_time = self._start_date

        while end_date_time < self._end_date:  # True
            try:
                end_date_time = start_date_time + self._interval
                sensor_data = self._get_raw_data(start_date_time,
                                                 end_date_time,
                                                 self._docker_device_id)
                self._send_data_to_kinesis(sensor_data)

                # move onto the next time interval
                start_date_time = end_date_time
            except Exception as ex:
                print('Exception in publishing message')
                print(str(ex))
예제 #10
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from openeew.data.aws import AwsDataClient
import folium

data_client = AwsDataClient('mx')

df = pd.read_csv("quakesList.csv")

eq = []
for x in df.index:
    eq.append({
        'latitude': df["latitude"][x],
        'longitude': df["longitude"][x],
        'time': df["date"][x] + " " + df["time"][x]
    })


m = folium.Map(
            location=[eq[0]['latitude'], eq[0]['longitude']],
            zoom_start=7,
            titles="Signifigant Quakes After 2018"
            )

for point in eq:
    folium.Circle(
        radius=10000,
        location=[point['latitude'], point['longitude']],
        color='crimson',
        fill='crimson',