Ejemplo n.º 1
0
    def serde_with_class(cls):
        from_fields = list(
            map(lambda a: (a, get_in([from_key], a.metadata, [a.name])),
                fields(cls)))

        to_fields = pipe(
            fields(cls),
            map(lambda a: (a, get_in([to_key], a.metadata))),
            filter(lambda f: f[1]),
            list,
        )

        def from_dict(d):
            return cls(**dict(
                map(
                    lambda f: (f[0].name, get_in(f[1], d, f[0].default)),
                    from_fields,
                )))

        def to_dict(self):
            d = asdict(self)
            return reduce(
                lambda acc, f: update_in(acc, f[1], lambda _: d[f[0].name]),
                to_fields,
                {},
            )

        cls.from_dict = staticmethod(from_dict)
        cls.to_dict = to_dict
        return cls
 def get_hashtag_string(given_item):
     """Return a string of hashtags associated with the given item"""
     return tz.pipe(
         tz.get_in(['entities', 'hashtags'], given_item, default=[]),
         tz.map(lambda x: tz.get_in(['text'], x, default=None)),
         tz.filter(lambda x: x is not None),
         lambda x: ", ".join(x))
 def get_categories(given_dict):
     """Return a string of the categories associated with a post"""
     return tz.pipe(
         tz.get_in(['object', 'tags'], given_dict, default = []),
         tz.filter(lambda x: tz.get_in(['objectType'], x, default=None) == 'category'),
         tz.map(lambda x: tz.get_in(['displayName'], x, default=None)),
         lambda x: ", ".join(x)
     )
Ejemplo n.º 4
0
        def from_dict(cls, d: Mapping[str, Any]):
            from_fields = list(
                map(
                    lambda a:
                    (a, curried.get_in([from_key], a.metadata, [a.name])),
                    fields(cls)))

            return cls(**dict(
                map(
                    lambda f:
                    (f[0].name, curried.get_in(f[1], d, f[0].default)),
                    from_fields,
                )))
Ejemplo n.º 5
0
        def to_dict(self,
                    convert_values: bool = False) -> MutableMapping[str, Any]:
            to_fields = curried.pipe(
                fields(self.__class__),
                curried.map(lambda a:
                            (a, curried.get_in([to_key], a.metadata))),
                curried.filter(lambda f: f[1]),
                list,
            )

            if convert_values:
                d = asdict(self)
            else:
                d = {
                    a.name: getattr(self, a.name)
                    for a in fields(self.__class__)
                }

            if not to_fields:
                return d

            return curried.reduce(
                lambda acc, f: curried.update_in(acc, f[1], lambda _: d[f[0].
                                                                        name]),
                to_fields,
                {},
            )
def print_twitter_stall_warning(given_item):
    """Print stall warnings, pass everything through"""
    warning = tz.get_in(['warning'], given_item, default = None)
    if warning is not None:
        write_to_log(STREAM_KEY, warning)
        print(warning) 
    return(given_item)
Ejemplo n.º 7
0
    def construct_group_dict(group_path, config):
        """
        Given a config and a path that points to a data group, compute the data group's updated parameters.
        The group_path is a list of keys and indices e.g. ['train', 'datasets', 1, 'groups', 0]
        that can be followed to reach a group's config.
        """
        # Find (almost) all prefixes of the group path
        all_paths = list(
            map(compose(list, tz.take(seq=group_path)),
                range(1, len(group_path))))

        # Filter to exclude paths that point to lists
        paths_to_merge = list(
            filter(lambda p: isinstance(last(p[1]), str), pairwise(all_paths)))
        # Find all the (mid-level) dicts that the filtered paths point to
        mid_level_dicts = list(
            map(
                lambda p: tz.keyfilter(lambda k: k != last(p[1]),
                                       tz.get_in(p[0], config)),
                paths_to_merge))

        # Merge parameters at all levels to get a single parameter set for the group
        def dmerge(*args):
            if all(is_mapping, *args):
                return Munch(tz.merge(*args))
            else:
                return tz.last(*args)

        group_dict = tz.merge_with(
            dmerge,
            tz.keyfilter(lambda k: k not in ['train', 'val', 'test'],
                         config),  # top-level dict
            *mid_level_dicts,  # mid-level dicts
            tz.get_in(group_path, config)  # bottom-level dict
        )

        return group_dict
Ejemplo n.º 8
0
 def from_dict(d):
     return cls(**dict(
         map(
             lambda f: (f[0].name, get_in(f[1], d, f[0].default)),
             from_fields,
         )))
import pandas as pd          # for data wrangling
import sqlalchemy as sqlal   # for connecting to databases
import unicodecsv as csv     # for saving to CSV in utf-8 by default
import gzip                  # for compression of CSV output
import time                  # for simple benchmarks
import datetime as dt        # for converting the formats of timestamps
import pdb                   # for testing
import argparse              # for accepting command line arguments
import yaml                  # for loading the configuration file
# import sqlite3               # for interacting with SQLite databases

## Accept Arguments
parser = argparse.ArgumentParser(description="Save a WordPress or Twitter stream")
parser.add_argument('stream_key', metavar='stream_key', type=str, nargs=1, 
                    help='Which stream to consume (tweets, likes, posts, or comments)')
STREAM_KEY = tz.get_in([0], parser.parse_args().stream_key, default=None)

## Load Configuration
with open('config.yaml') as config_file:
    CONFIG = yaml.load(config_file.read())
TWITTER_CREDENTIALS = {
    "access_token": os.environ['TWITTER_ACCESS_TOKEN'],
    "access_token_secret": os.environ['TWITTER_ACCESS_SECRET'],
    "consumer_key": os.environ['TWITTER_CONSUMER_KEY'],
    "consumer_secret": os.environ['TWITTER_CONSUMER_SECRET']
}

## Primary Functions
def main():
    """Overall function to start it off"""
    print("Starting a stream consumer for {}".format(STREAM_KEY))