Ejemplo n.º 1
0
def show(
    producer,
    template=('t', u'{t}\n', 'Message template.'),

):
    """Print tweets in human readable form."""
    producer(consumers.to_tweet(consumers.show(template=template)))
Ejemplo n.º 2
0
def uniq(producer):
    """Omit repeated tweets."""
    producer(
        consumers.to_tweet(
            consumers.uniq(consumers.select())
        ),
    )
Ejemplo n.º 3
0
def text(producer):
    """Print only tweet's text.

    It replaces the new line symbol (`\n`) with a space.

    """
    producer(consumers.to_tweet(consumers.print_text()))
Ejemplo n.º 4
0
def filter(
    producer, config, output,
    mode=('', u'a', 'The mode to open the files, `a` to append and `w` to rewrite.'),
    filters=('', [], 'The filters to use.'),
):
    """Filter the tweets to files by filtering predicates defined in the configuration file."""
    dustbin_template = config.dustbin_template
    dustbin = consumers.group(dustbin_template) if dustbin_template is not None else None

    filters_to_include = config.filters
    if filters:
        filters_to_include = (f for f in config.filters if f.name in filters)

    streams = tuple(
        (
            consumers.group(f.split_template, mode=mode)
            if f.split_template != '--'
            else consumers.show(output=output, template='{t.raw}'),
            lambda c, _, f=f: c.filter(**f.predicates),
        )
        for f in filters_to_include
    )
    target = consumers.filter(streams, dustbin)

    producer(consumers.to_tweet(target))
Ejemplo n.º 5
0
def test_filter(tweets):
    pinkpop = []
    dimazest = []
    dustbin = []

    streams = tuple((to_list(l), lambda c, f, p=p: c.filter(**p)) for l, p in [
        (
            pinkpop,
            {
                'follow': [],
                'track': ['pinkpop'],
                'locations': [],
            },
        ),
        (
            dimazest,
            {
                'follow': [10868922],
                'track': [],
                'locations': [],
            },
        ),
    ])

    target = consumers.filter(streams, to_list(dustbin))
    from_iterable(consumers.to_tweet(target), tweets)

    assert len(pinkpop) == 1
    assert pinkpop[0].id == 190800262909276162

    assert len(dimazest) == 3

    assert not dustbin
Ejemplo n.º 6
0
def test_count_tokens(tweets):
    counter = Counter()

    from_iterable(
        consumers.to_tweet(
            consumers.count_tokens(counter),
        ),
        tweets,
    )

    assert counter == Counter(
        {
            u'paaspop': 1,
            u'all': 1,
            u'pinkpop': 1,
            u'thats': 1,
            u'pedropicopop': 1,
            u'use': 1,
            u'here': 1,
            u'pukkelpop': 1,
            u'prilpop': 1,
            u'fun': 1,
            u'come': 1,
            u'#pygrunn': 1,
            u'#pp12': 1,
        }
    )
Ejemplo n.º 7
0
    def S(source):
        t = lambda target: consume_iterable(
            consumers.to_tweet(target),
            source
        )

        return t
Ejemplo n.º 8
0
def media(producer, output):
    """Retrieve media urls."""
    producer(
        consumers.to_tweet(
            consumers.print_media(output=output)
        )
    )
Ejemplo n.º 9
0
def test_filter(tweets):
    pinkpop = []
    dimazest = []
    dustbin = []

    streams = tuple((to_list(l),
                     lambda c, f, p=p: c.filter(**p)
                     ) for l, p in [(pinkpop, {'follow': [],
                                               'track': ['pinkpop'],
                                               'locations': [],
                                               },
                                     ),
                                    (dimazest, {'follow': [10868922],
                                                'track': [],
                                                'locations': [],
                                                },
                                     ),
                                    ]
                    )

    target = consumers.filter(streams, to_list(dustbin))
    from_iterable(consumers.to_tweet(target), tweets)

    assert len(pinkpop) == 1
    assert pinkpop[0].id == 190800262909276162

    assert len(dimazest) == 3

    assert not dustbin
Ejemplo n.º 10
0
def text(producer):
    """Print only tweet's text.

    It replaces the new line symbol (`\n`) with a space.

    """
    producer(consumers.to_tweet(consumers.print_text()))
Ejemplo n.º 11
0
def filter(
        producer,
        config,
        output,
        mode=('', u'a',
              'The mode to open the files, `a` to append and `w` to rewrite.'),
        filters=('', [], 'The filters to use.'),
):
    """Filter the tweets to files by filtering predicates defined in the configuration file."""
    dustbin_template = config.dustbin_template
    dustbin = consumers.group(
        dustbin_template) if dustbin_template is not None else None

    filters_to_include = config.filters
    if filters:
        filters_to_include = (f for f in config.filters if f.name in filters)

    streams = tuple((
        consumers.group(f.split_template, mode=mode) if f.split_template !=
        '--' else consumers.show(output=output, template='{t.raw}'),
        lambda c, _, f=f: c.filter(**f.predicates),
    ) for f in filters_to_include)
    target = consumers.filter(streams, dustbin)

    producer(consumers.to_tweet(target))
Ejemplo n.º 12
0
def test_closing():
    sink = to_list([])
    target = consumers.to_tweet(sink)

    target.close()

    with raises(StopIteration):
        sink.send('Sink is expected to be closed too.')
Ejemplo n.º 13
0
def test_closing():
    sink = to_list([])
    target = consumers.to_tweet(sink)

    target.close()

    with raises(StopIteration):
        sink.send('Sink is expected to be closed too.')
Ejemplo n.º 14
0
def timeline(producer, window=('w', '%Y-%m-%d-%H', '')):
    """Count the number of tweets per window."""

    producer(
        consumers.to_tweet(
            consumers.timeline(
                window=window,
                target=consumers.counter_printer(sys.stdout),
            ), ), )
Ejemplo n.º 15
0
def text(
    producer,
    output,
):
    """Print only tweet's text.

    It replaces the new line symbol (\\n) with a space.

    """
    producer(consumers.to_tweet(consumers.print_text(output=output)))
Ejemplo n.º 16
0
def text(
    producer,
    output,
):
    """Print only tweet's text.

    It replaces the new line symbol (\\n) with a space.

    """
    producer(consumers.to_tweet(consumers.print_text(output=output)))
Ejemplo n.º 17
0
def test_filter_dustbin(tweets):
    result = []
    dustbin = []

    streams = ((to_list(result), lambda c, f: c.filter(follow=[-1000])), )

    target = consumers.filter(streams, to_list(dustbin))
    from_iterable(consumers.to_tweet(target), tweets)

    assert not result
    assert len(dustbin) == 3
Ejemplo n.º 18
0
def test_filter_dustbin(tweets):
    result = []
    dustbin = []

    streams = ((to_list(result), lambda c, f: c.filter(follow=[-1000])), )

    target = consumers.filter(streams, to_list(dustbin))
    from_iterable(consumers.to_tweet(target), tweets)

    assert not result
    assert len(dustbin) == 3
Ejemplo n.º 19
0
def timeline(producer, window=('w', '%Y-%m-%d-%H', '')):
    """Count the number of tweets per window."""

    producer(
        consumers.to_tweet(
            consumers.timeline(
                window=window,
                target=consumers.counter_printer(sys.stdout),
            ),
        ),
    )
Ejemplo n.º 20
0
def filter(producer, config):
    """Filter the tweets to files by filtering predicates defined in the configuration file."""
    dustbin_template = config.dustbin_template
    dustbin = consumers.group(
        dustbin_template) if dustbin_template is not None else None

    streams = tuple((
        consumers.group(f.split_template),
        lambda c, _, f=f: c.filter(**f.predicates),
    ) for f in config.filters)
    target = consumers.filter(streams, dustbin)

    producer(consumers.to_tweet(target))
Ejemplo n.º 21
0
def test_count_timeline(tweets):
    counter = Counter()

    from_iterable(
        consumers.to_tweet(consumers.timeline(counter), ),
        tweets,
    )

    assert counter == Counter({
        '2012-05-12-09': 1,
        '2012-04-26-07': 1,
        '2012-04-13-13': 1,
    })
Ejemplo n.º 22
0
def test_batch(tweets):
    @consumers.consumer
    def batch_end_consumer():
        batch_end_consumer.batches = 0
        while True:
            try:
                yield
            except consumers.BatchEndException:
                batch_end_consumer.batches += 1

    from_iterable(consumers.to_tweet(consumers.batch(batch_end_consumer())),
                  tweets)

    assert batch_end_consumer.batches == 2
Ejemplo n.º 23
0
def test_batch(tweets):

    @consumers.consumer
    def batch_end_consumer():
        batch_end_consumer.batches = 0
        while True:
            try:
                yield
            except consumers.BatchEndException:
                batch_end_consumer.batches += 1

    from_iterable(consumers.to_tweet(consumers.batch(batch_end_consumer())), tweets)

    assert batch_end_consumer.batches == 2
Ejemplo n.º 24
0
def filter(producer, config):
    """Filter the tweets to files by filtering predicates defined in the configuration file."""
    dustbin_template = config.dustbin_template
    dustbin = consumers.group(dustbin_template) if dustbin_template is not None else None

    streams = tuple(
        (
            consumers.group(f.split_template),
            lambda c, _, f=f: c.filter(**f.predicates),
        )
        for f in config.filters
    )
    target = consumers.filter(streams, dustbin)

    producer(consumers.to_tweet(target))
Ejemplo n.º 25
0
def test_count_timeline(tweets):
    counter = Counter()

    from_iterable(
        consumers.to_tweet(
            consumers.timeline(counter),
        ),
        tweets,
    )

    assert counter == Counter(
        {
            '2012-05-12-09': 1,
            '2012-04-26-07': 1,
            '2012-04-13-13': 1,
        }
    )
Ejemplo n.º 26
0
def test_count_tokens(tweets):
    counter = Counter()

    from_iterable(
        consumers.to_tweet(consumers.count_tokens(counter), ),
        tweets,
    )

    assert counter == Counter({
        u'paaspop': 1,
        u'all': 1,
        u'pinkpop': 1,
        u'thats': 1,
        u'pedropicopop': 1,
        u'use': 1,
        u'here': 1,
        u'pukkelpop': 1,
        u'prilpop': 1,
        u'fun': 1,
        u'come': 1,
        u'#pygrunn': 1,
        u'#pp12': 1,
    })
Ejemplo n.º 27
0
def test_bad_json():
    result = []
    from_iterable(consumers.to_tweet(to_list(result)), ['not valid JSON'])

    assert not result
Ejemplo n.º 28
0
def test_basic(tweets):
    result = []
    from_iterable(consumers.to_tweet(to_list(result)), tweets)

    assert len(result) == 3
    assert all(isinstance(t, Tweet) for t in result)
Ejemplo n.º 29
0
def test_uniq(tweets):
    result = []
    from_iterable(consumers.to_tweet(consumers.uniq(to_list(result))),
                  tweets * 20)

    assert len(result)
Ejemplo n.º 30
0
def group(
        producer,
        file_name_template=('t', '%Y-%m-%d-%H.gz', ''),
):
    """Group tweets to files by date according to the template."""
    producer(consumers.to_tweet(consumers.group(file_name_template)))
Ejemplo n.º 31
0
def test_bad_json():
    result = []
    from_iterable(consumers.to_tweet(to_list(result)), ['not valid JSON'])

    assert not result
Ejemplo n.º 32
0
def test_uniq(tweets):
    result = []
    from_iterable(consumers.to_tweet(consumers.uniq(to_list(result))), tweets * 20)

    assert len(result)
Ejemplo n.º 33
0
def test_basic(tweets):
    result = []
    from_iterable(consumers.to_tweet(to_list(result)), tweets)

    assert len(result) == 3
    assert all(isinstance(t, Tweet) for t in result)
Ejemplo n.º 34
0
def show(
        producer,
        template=('t', u'{t}\n', 'Message template.'),
):
    """Print tweets in human readable form."""
    producer(consumers.to_tweet(consumers.show(template=template)))
Ejemplo n.º 35
0
 def flow(out=None):
     producer(consumers.to_tweet(consumers.print_text(output=out)))
Ejemplo n.º 36
0
def pprint(producer):
    """Pretty print tweet's json representation."""
    producer(consumers.to_tweet(consumers.pprint()))
Ejemplo n.º 37
0
def show(producer):
    """Print tweets in human readable form."""
    producer(consumers.to_tweet(consumers.show()))
Ejemplo n.º 38
0
def group(producer,
          file_name_template=('t', '%Y-%m-%d-%H.gz', ''),
          ):
    """Group tweets to files by date according to the template."""
    producer(consumers.to_tweet(consumers.group(file_name_template)))
Ejemplo n.º 39
0
def uniq(producer):
    """Omit repeated tweets."""
    producer(consumers.to_tweet(consumers.uniq(consumers.select())), )
Ejemplo n.º 40
0
def pprint(producer):
    """Pretty print tweet's json representation."""
    producer(consumers.to_tweet(consumers.pprint()))
Ejemplo n.º 41
0
    def S(source):
        t = lambda target: consume_iterable(consumers.to_tweet(target), source)

        return t
Ejemplo n.º 42
0
def show(producer):
    """Print tweets in human readable form."""
    producer(consumers.to_tweet(consumers.show()))