예제 #1
0
def rawquery(
        query: str,
        start_date: str = arrow.get().format(SHORT_DATE_FORMAT),
        end_date: str = arrow.get().shift(days=-15).format(SHORT_DATE_FORMAT),
        hydrate: int = 0,
        kafka: bool = False):
    logger.debug("Converting dates from string")
    init_date = arrow.get(start_date)
    finish_date = arrow.get(end_date)

    logger.info("🐦 Scrapping with:[%s] From 🗓️:[%s] ➡️ To 🗓️:[%s]" %
                (query, init_date.format('YYYY-MM-DD'),
                 finish_date.format('YYYY-MM-DD')))

    # Create day urls
    urls = __generate_search_url_by_range(query, init_date, finish_date)

    stage_results = fetch_all(urls)

    stage_results = aio.flat_map(_get_page_branches,
                                 stage_results,
                                 workers=MAX_WORKERS)
    stage_results = th.flat_map(_get_branch_walk,
                                stage_results,
                                workers=MAX_WORKERS)
    if hydrate == 0:
        stage_results = th.flat_map(__get_statuses,
                                    stage_results,
                                    workers=MAX_WORKERS)
    elif hydrate == 1:
        stage_results = th.flat_map(_read_statuses,
                                    stage_results,
                                    workers=MAX_WORKERS)
        stage_results = th.map(_update_status_stats,
                               stage_results,
                               workers=MAX_WORKERS)
    else:
        raise NotImplementedError

    if kafka:
        stage_results = th.map(_send_kafka, stage_results, workers=MAX_WORKERS)

    stage_results = th.map(lambda s: json.dumps(s, indent=4),
                           stage_results,
                           workers=MAX_WORKERS)

    # List conversion executes pipeline
    results = list(stage_results)
    results = list_no_dupes(results)

    logger.info(f"💬 Getted {len(results)}")

    return results
예제 #2
0
def test_concat_basic(nums):

    nums_py = list(map(lambda x: x + 1, nums))
    nums_py1 = list(map(lambda x: x**2, nums_py))
    nums_py2 = list(map(lambda x: -x, nums_py))
    nums_py = nums_py1 + nums_py2

    nums_pl = th.map(lambda x: x + 1, nums)
    nums_pl1 = th.map(lambda x: x**2, nums_pl)
    nums_pl2 = th.map(lambda x: -x, nums_pl)
    nums_pl = th.concat([nums_pl1, nums_pl2])

    assert sorted(nums_pl) == sorted(nums_py)
예제 #3
0
def test_map_square_event_end(nums):

    namespace = th._get_namespace()
    namespace.x = 0
    namespace.done = False
    namespace.active_workers = -1

    def set_1():
        namespace.x = 1

    def set_2(stage_status):
        namespace.x = 2
        namespace.active_workers = stage_status.active_workers
        namespace.done = stage_status.done

    nums_pl = th.map(lambda x: x**2,
                     nums,
                     workers=3,
                     on_start=set_1,
                     on_done=set_2)
    nums_pl = list(nums_pl)

    assert namespace.x == 2
    assert namespace.done == True
    assert namespace.active_workers == 0
예제 #4
0
def test_map_id(nums):

    nums_py = nums

    nums_pl = th.map(lambda x: x, nums)
    nums_pl = list(nums_pl)

    assert nums_pl == nums_py
예제 #5
0
def test_map_square_workers(nums):

    nums_py = map(lambda x: x**2, nums)
    nums_py = list(nums_py)

    nums_pl = th.map(lambda x: x**2, nums, workers=2)
    nums_pl = list(nums_pl)

    assert sorted(nums_pl) == sorted(nums_py)
예제 #6
0
def test_map_square(nums):

    nums_py = map(lambda x: x**2, nums)
    nums_py = list(nums_py)

    nums_pl = th.map(lambda x: x**2, nums)
    nums_pl = list(nums_pl)

    assert nums_pl == nums_py
예제 #7
0
파일: test_th.py 프로젝트: new07/pypeln
def test_map_id_pipe(nums):

    nums_pl = (
        nums
        | th.map(lambda x: x)
        | list
    )

    assert nums_pl == nums
예제 #8
0
def test_concat_multiple(nums):

    nums_py = [x + 1 for x in nums]
    nums_py1 = nums_py + nums_py
    nums_py2 = nums_py1 + nums_py

    nums_pl = th.map(lambda x: x + 1, nums)
    nums_pl1 = th.concat([nums_pl, nums_pl])
    nums_pl2 = th.concat([nums_pl1, nums_pl])

    assert sorted(nums_py1) == sorted(list(nums_pl1))
    assert sorted(nums_py2) == sorted(list(nums_pl2))
예제 #9
0
def test_from_to_iterable(nums):

    nums_pl = nums
    nums_pl = th.from_iterable(nums_pl)
    nums_pl = cz.partition_all(10, nums_pl)
    nums_pl = th.map(sum, nums_pl)
    nums_pl = list(nums_pl)

    nums_py = nums
    nums_py = cz.partition_all(10, nums_py)
    nums_py = map(sum, nums_py)
    nums_py = list(nums_py)

    assert nums_py == nums_pl
예제 #10
0
def test_flat_map_square(nums):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = list(nums_py)

    nums_pl = th.map(lambda x: x**2, nums)
    nums_pl = th.flat_map(_generator, nums_pl)
    nums_pl = list(nums_pl)

    assert nums_pl == nums_py
예제 #11
0
def test_flat_map_square_workers(nums):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = list(nums_py)

    nums_pl = th.map(lambda x: x**2, nums)
    nums_pl = th.flat_map(_generator, nums_pl, workers=3)
    nums_pl = list(nums_pl)

    assert sorted(nums_pl) == sorted(nums_py)
예제 #12
0
def test_map_square_event_start(nums):

    nums_py = map(lambda x: x**2, nums)
    nums_py = list(nums_py)

    namespace = th._get_namespace()
    namespace.x = 0

    def set_1():
        namespace.x = 1

    nums_pl = th.map(lambda x: x**2, nums, on_start=set_1)
    nums_pl = list(nums_pl)

    assert nums_pl == nums_py
    assert namespace.x == 1
예제 #13
0
def test_error_handling():

    error = None

    def raise_error(x):
        raise MyError()

    stage = th.map(raise_error, range(10))

    try:
        list(stage)

    except MyError as e:
        error = e

    assert isinstance(error, MyError)
예제 #14
0
def test_flat_map_square_filter_workers_pipe(nums):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = cz.filter(lambda x: x > 1, nums_py)
    nums_py = list(nums_py)

    nums_pl = (nums
               | th.map(lambda x: x**2)
               | th.flat_map(_generator, workers=3)
               | th.filter(lambda x: x > 1)
               | list)

    assert sorted(nums_pl) == sorted(nums_py)
예제 #15
0
파일: test_th.py 프로젝트: new07/pypeln
def test_worker_info():

    nums = range(100)
    n_workers = 4

    def set_1(worker_info):
        return worker_info.index

    def _lambda(x, index):
        return index

    nums_pl = th.map(
        _lambda, 
        nums, 
        on_start = set_1, 
        workers = n_workers,
    )
    nums_pl = set(nums_pl)

    assert nums_pl.issubset(set(range(n_workers)))
예제 #16
0
###################
# from_to_iterable
###################
@hp.given(nums=st.lists(st.integers()))
@hp.settings(max_examples=MAX_EXAMPLES)
def test_from_to_iterable(nums):

    nums_pl = nums
    nums_pl = th.from_iterable(nums_pl)
    nums_pl = cz.partition_all(10, nums_pl)
    nums_pl = th.map(sum, nums_pl)
    nums_pl = list(nums_pl)

    nums_py = nums
    nums_py = cz.partition_all(10, nums_py)
    nums_py = map(sum, nums_py)
    nums_py = list(nums_py)

    assert nums_py == nums_pl


if __name__ == '__main__':
    error = None

    def raise_error(x):
        raise MyError()

    stage = th.map(raise_error, range(10))

    list(stage)