Beispiel #1
0
    def run(self):
        # planner = APSPlanner()
        planner = SchedulePlanner(refresh_interval=0.5)

        http_inlet = HttpInlet('https://jsonplaceholder.typicode.com/todos/1',
                               metadata={CsvOutlet.FILE_MODE: 'a'})
        file_inlet = FileInlet('output_03.csv', read_mode=FileInletMode.LINE)
        http_inlet2 = HttpInlet(
            'https://postman-echo.com/get?foo1=bar1&foo2=bar2',
            metadata={
                'MONGODB_COLLECTION': 'test_collection2',
                'csv_file': 'output_02.csv'
            })

        print_outlet = PrintOutlet(only_payload=True)
        mongo_outlet = MongoOutlet('databay', 'test_collection')
        csv_outlet = CsvOutlet('output_03.csv')

        planner.add_links(
            Link([file_inlet], [print_outlet], timedelta(seconds=0.5)))
        planner.add_links(
            Link([http_inlet, http_inlet, http_inlet], [csv_outlet],
                 timedelta(seconds=2)))
        # planner.add_links(Link([http_inlet], [mongo_outlet], timedelta(seconds=1), name='first'))
        # planner.add_links(Link([http_inlet2, http_inlet2, http_inlet2], [mongo_outlet], timedelta(seconds=5), name='second'))
        # planner.add_links(Link([], [], timedelta(seconds=1.5)))
        # planner.add_links(Link([alphavantage_inlet], [mongo_outlet], timedelta(seconds=5)))
        # planner.add_links(Link([iex_inlet], [mongo_outlet], timedelta(seconds=5)))
        planner.start()
Beispiel #2
0
    def _unschedule(self, link: Link):
        """
        Unschedule a link.

        :type link: :any:`Link`
        :param link: Link to be unscheduled
        """
        if link.job is not None:
            link.job.remove()
            link.set_job(None)
Beispiel #3
0
    def _schedule(self, link: Link):
        """
        Schedule a link. Sets :any:`APS Job <apscheduler.job.Job>` as this link's job.

        :type link: :any:`Link`
        :param link: Link to be scheduled
        """

        job = self._scheduler.add_job(
            link.transfer,
            trigger=IntervalTrigger(seconds=link.interval.total_seconds()))
        link.set_job(job)
Beispiel #4
0
    def test_flush(self, inlet, outlet):
        buffer = Buffer(count_threshold=100, time_threshold=10)

        payload = [1, 2, 3, 4]
        records = [Record(payload=p) for p in payload]

        link = Link(inlet,
                    outlet,
                    interval=1,
                    processors=buffer,
                    copy_records=False)

        inlet._pull = pull_mock(records[:2])
        link.transfer()
        outlet._push.assert_called_with([], mock.ANY)  # no records yet

        inlet._pull = pull_mock(records[2:])
        link.transfer()
        outlet._push.assert_called_with([], mock.ANY)  # no records yet

        buffer.flush = True
        inlet._pull = pull_mock([])
        link.transfer()
        outlet._push.assert_called_with(
            records, mock.ANY)  # all records should be flushed
Beispiel #5
0
    def test_buffer_time(self, inlet, outlet):
        buffer = Buffer(time_threshold=0.02)

        payload = [1, 2, 3, 4]
        records = [Record(payload=p) for p in payload]

        link = Link(inlet,
                    outlet,
                    interval=1,
                    processors=buffer,
                    copy_records=False)

        inlet._pull = pull_mock(records[:2])
        link.transfer()
        outlet._push.assert_called_with(
            [], mock.ANY)  # not enough time have passed

        inlet._pull = pull_mock(records[2:])
        link.transfer()
        outlet._push.assert_called_with(
            [], mock.ANY)  # not enough time have passed

        time.sleep(0.02)

        inlet._pull = pull_mock([])
        link.transfer()
        outlet._push.assert_called_with(
            records, mock.ANY)  # all records should be returned here
Beispiel #6
0
    def _schedule(self, link: Link):
        """
        Schedule a link, setting a :class:`schedule.Job` as this link's job.

        :type link: :any:`Link`
        :param link: Link to be scheduled

        :raises: :class:`ScheduleIntervalError` if link's interval is smaller than the :any:`refresh interval`.
        """

        if link.interval.total_seconds() < self._refresh_interval:
            raise ScheduleIntervalError(
                f'Link interval must be greater than or equal to refresh interval. Link interval: {link.interval.total_seconds()}s, Refresh interval: {self._refresh_interval}s'
            )

        job = schedule.every(link.interval.total_seconds()).seconds.do(
            self._run_job, link)
        link.set_job(job)
Beispiel #7
0
    def test_flush_after_shutdown(self, inlet, outlet):
        buffer = Buffer(count_threshold=100, time_threshold=10)

        counter_dict = {'counter': 0, 'records': []}

        link = Link(inlet,
                    outlet,
                    interval=0.01,
                    processors=buffer,
                    copy_records=False)
        planner = SchedulePlanner(link, refresh_interval=0.01)

        async def pull_coro(_):
            counter_dict['counter'] += 1
            record = Record(payload=counter_dict['counter'])
            counter_dict['records'].append(record)
            return [record]

        mock_pull = MagicMock(side_effect=pull_coro)
        inlet._pull = mock_pull

        th = Thread(target=planner.start, daemon=True)
        th.start()
        time.sleep(0.1)

        planner.shutdown()
        th.join()

        calls = outlet._push.call_args_list
        for c in calls:
            self.assertEqual(c(), [],
                             'Should only contain empty record lists.')
        self.assertEqual(buffer.records, counter_dict['records'],
                         'All records should be stored in the buffer')

        planner.force_transfer()
        self.assertEqual(outlet._push.call_args(), [],
                         'Should return empty record list')

        buffer.flush = True
        planner.force_transfer()
        self.assertEqual(outlet._push.call_args[0][0], counter_dict['records'],
                         'Should return all records')
Beispiel #8
0
    def test_buffer_count(self, inlet, outlet):
        buffer = Buffer(count_threshold=3)

        payload = [1, 2, 3, 4]
        records = [Record(payload=p) for p in payload]

        link = Link(inlet,
                    outlet,
                    interval=1,
                    processors=buffer,
                    copy_records=False)

        inlet._pull = pull_mock(records[:2])
        link.transfer()
        outlet._push.assert_called_with(
            [], mock.ANY)  # after first call we shouldn't have any records

        inlet._pull = pull_mock(records[2:])
        link.transfer()
        outlet._push.assert_called_with(
            records, mock.ANY)  # all records should be returned here
Beispiel #9
0
class DummyTextInlet(Inlet):
    "A simple `Inlet` that randomly pulls a string from a list of strings."

    def __init__(self, text: list, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.text = text
        self._id = 0

    def pull(self, update):
        text_selection = random.choice(self.text)
        self._id += 1
        time.sleep(1)
        return {self._id: text_selection}


_LOGGER.setLevel(logging.INFO)

es_client = elasticsearch.Elasticsearch(timeout=30)

text_inlet = DummyTextInlet(TEXT.split("."))
elasticsearch_outlet = ElasticsearchIndexerOutlet(es_client, "my-test-index")

link = Link(text_inlet,
            elasticsearch_outlet,
            interval=2,
            tags='elasticsearch_outlet')

planner = ApsPlanner(link)
planner.start()
Beispiel #10
0
class PrintOutlet(Outlet):

    async def push(self, records:[Record], update):
        _LOGGER.debug(f'{update} push starts')

        # create an asynchronous task for each record
        tasks = [self.print_task(record, update) for record in records]

        # await all print tasks
        await asyncio.gather(*tasks)

    async def print_task(self, record, update):

        # simulate a long-taking operation
        await asyncio.sleep(0.5)

        # execute
        _LOGGER.debug(f'{update} consumed:{record.payload}')

random_int_inletA = RandomIntInlet()
random_int_inletB = RandomIntInlet()
random_int_inletC = RandomIntInlet()
print_outlet = PrintOutlet()

link = Link([random_int_inletA, random_int_inletB, random_int_inletC],
            print_outlet,
            interval=timedelta(seconds=2),
            name='async')

planner = SchedulePlanner(link)
planner.start()
Beispiel #11
0
import datetime
import logging

from databay import Link
from databay.inlets import HttpInlet
from databay.outlets import MongoOutlet
from databay.planners import APSPlanner

logging.getLogger('databay').setLevel(logging.DEBUG)

# Create an inlet, outlet and a link.
http_inlet = HttpInlet('https://jsonplaceholder.typicode.com/todos/1')
mongo_outlet = MongoOutlet(database_name='databay',
                           collection='test_collection')
link = Link(http_inlet,
            mongo_outlet,
            datetime.timedelta(seconds=5),
            name='http_to_mongo')

# Create a planner, add the link and start scheduling.
planner = APSPlanner(link)
planner.start()
Beispiel #12
0
import urllib.request


class WeatherInlet(Inlet):
    def __init__(self, api_key: str, city_name: str, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.api_key = api_key
        self.city_name = city_name

    def pull(self, update) -> List[Record]:
        url = f'https://api.openweathermap.org/data/2.5/weather?' \
              f'q={self.city_name}&' \
              f'appid={self.api_key}'

        contents = urllib.request.urlopen(url).read().decode('utf8')

        formatted = json.loads(contents)
        return formatted['weather'][0]['description']


api_key = os.environ.get('OPEN_WEATHER_MAP_API_KEY')
weather_inlet = WeatherInlet(api_key, 'Bangkok')

link = Link(weather_inlet,
            PrintOutlet(only_payload=True),
            interval=timedelta(seconds=2),
            name='bangkok_weather')

planner = APSPlanner(link)
planner.start()
Beispiel #13
0
        super().__init__()

        self.default_filepath = default_filepath
        self.default_file_mode = default_file_mode

    def push(self, records: [Record], update):
        for record in records:
            filepath = record.metadata.get(self.FILEPATH,
                                           self.default_filepath)
            file_mode = record.metadata.get(self.FILE_MODE,
                                            self.default_file_mode)

            with open(filepath, file_mode) as f:
                f.write(str(record.payload) + '\n')


metadata = {
    FileOutlet.FILEPATH: 'outputs/random_ints.txt',
    FileOutlet.FILE_MODE: 'a'
}
random_int_inlet = RandomIntInlet(metadata=metadata)
file_outlet = FileOutlet()

link = Link(random_int_inlet,
            file_outlet,
            interval=timedelta(seconds=2),
            name='file_outlet')

planner = APSPlanner(link)
planner.start()
Beispiel #14
0
from databay import Link
from databay.inlets import RandomIntInlet
from databay.outlet import Outlet
from databay.planners import SchedulePlanner
from databay.record import Record


class ConditionalPrintOutlet(Outlet):

    SHOULD_PRINT = 'ConditionalPrintOutlet.SHOULD_PRINT'
    """Whether records should be printed or skipped."""
    def push(self, records: [Record], update):
        for record in records:
            if record.metadata.get(self.SHOULD_PRINT):
                print(update, record)


random_int_inlet_on = RandomIntInlet(
    metadata={ConditionalPrintOutlet.SHOULD_PRINT: True})
random_int_inlet_off = RandomIntInlet(
    metadata={ConditionalPrintOutlet.SHOULD_PRINT: False})

print_outlet = ConditionalPrintOutlet()

link = Link([random_int_inlet_on, random_int_inlet_off],
            print_outlet,
            interval=timedelta(seconds=0.5),
            name='should_print_metadata')

planner = SchedulePlanner(link, refresh_interval=0.5)
planner.start()
Beispiel #15
0
from databay.inlets import HttpInlet
from databay.outlets import PrintOutlet
from databay import Link
from databay.planners import ApsPlanner

from databay.outlets.file_outlet import FileOutlet


# filter
class BitcoinInlet(HttpInlet):
    async def pull(self, update):
        response = await super().pull(update)
        return response.get('USD').get('last')


# produce
# stock_inlet = HttpInlet('https://blockchain.info/ticker')
stock_inlet = BitcoinInlet('https://blockchain.info/ticker')

# consume
print_outlet = PrintOutlet(True, True)
file_outlet = FileOutlet('bitcoin_price_1s.txt')

# transfer
link = Link(stock_inlet, [print_outlet, file_outlet],
            interval=datetime.timedelta(seconds=1))

planner = ApsPlanner(link)
planner.start()
Beispiel #16
0
from databay import Link
from databay.outlets import PrintOutlet
from databay.planners import SchedulePlanner
from datetime import timedelta
from databay import Inlet
import random


class RandomIntInlet(Inlet):
    def pull(self, update):
        return random.randint(0, 100)


random_int_inlet = RandomIntInlet()

print_outlet = PrintOutlet(only_payload=True)

link = Link(random_int_inlet,
            print_outlet,
            interval=timedelta(seconds=5),
            name='random_ints')

planner = SchedulePlanner(link)
planner.start()
Beispiel #17
0
from datetime import timedelta

from databay import Link
from databay.inlets import RandomIntInlet
from databay.planners import SchedulePlanner
from databay.record import Record
from databay.outlet import Outlet


class PrintOutlet(Outlet):
    def push(self, records: [Record], update):
        for record in records:
            print(update, record.payload)


random_int_inlet = RandomIntInlet()
print_outlet = PrintOutlet()

link = Link(random_int_inlet,
            print_outlet,
            interval=timedelta(seconds=2),
            tags='print_outlet')

planner = SchedulePlanner(link)
planner.start()