def serde_with_class(cls): from_fields = list( map(lambda a: (a, get_in([from_key], a.metadata, [a.name])), fields(cls))) to_fields = pipe( fields(cls), map(lambda a: (a, get_in([to_key], a.metadata))), filter(lambda f: f[1]), list, ) def from_dict(d): return cls(**dict( map( lambda f: (f[0].name, get_in(f[1], d, f[0].default)), from_fields, ))) def to_dict(self): d = asdict(self) return reduce( lambda acc, f: update_in(acc, f[1], lambda _: d[f[0].name]), to_fields, {}, ) cls.from_dict = staticmethod(from_dict) cls.to_dict = to_dict return cls
def get_hashtag_string(given_item): """Return a string of hashtags associated with the given item""" return tz.pipe( tz.get_in(['entities', 'hashtags'], given_item, default=[]), tz.map(lambda x: tz.get_in(['text'], x, default=None)), tz.filter(lambda x: x is not None), lambda x: ", ".join(x))
def get_categories(given_dict): """Return a string of the categories associated with a post""" return tz.pipe( tz.get_in(['object', 'tags'], given_dict, default = []), tz.filter(lambda x: tz.get_in(['objectType'], x, default=None) == 'category'), tz.map(lambda x: tz.get_in(['displayName'], x, default=None)), lambda x: ", ".join(x) )
def from_dict(cls, d: Mapping[str, Any]): from_fields = list( map( lambda a: (a, curried.get_in([from_key], a.metadata, [a.name])), fields(cls))) return cls(**dict( map( lambda f: (f[0].name, curried.get_in(f[1], d, f[0].default)), from_fields, )))
def to_dict(self, convert_values: bool = False) -> MutableMapping[str, Any]: to_fields = curried.pipe( fields(self.__class__), curried.map(lambda a: (a, curried.get_in([to_key], a.metadata))), curried.filter(lambda f: f[1]), list, ) if convert_values: d = asdict(self) else: d = { a.name: getattr(self, a.name) for a in fields(self.__class__) } if not to_fields: return d return curried.reduce( lambda acc, f: curried.update_in(acc, f[1], lambda _: d[f[0]. name]), to_fields, {}, )
def print_twitter_stall_warning(given_item): """Print stall warnings, pass everything through""" warning = tz.get_in(['warning'], given_item, default = None) if warning is not None: write_to_log(STREAM_KEY, warning) print(warning) return(given_item)
def construct_group_dict(group_path, config): """ Given a config and a path that points to a data group, compute the data group's updated parameters. The group_path is a list of keys and indices e.g. ['train', 'datasets', 1, 'groups', 0] that can be followed to reach a group's config. """ # Find (almost) all prefixes of the group path all_paths = list( map(compose(list, tz.take(seq=group_path)), range(1, len(group_path)))) # Filter to exclude paths that point to lists paths_to_merge = list( filter(lambda p: isinstance(last(p[1]), str), pairwise(all_paths))) # Find all the (mid-level) dicts that the filtered paths point to mid_level_dicts = list( map( lambda p: tz.keyfilter(lambda k: k != last(p[1]), tz.get_in(p[0], config)), paths_to_merge)) # Merge parameters at all levels to get a single parameter set for the group def dmerge(*args): if all(is_mapping, *args): return Munch(tz.merge(*args)) else: return tz.last(*args) group_dict = tz.merge_with( dmerge, tz.keyfilter(lambda k: k not in ['train', 'val', 'test'], config), # top-level dict *mid_level_dicts, # mid-level dicts tz.get_in(group_path, config) # bottom-level dict ) return group_dict
def from_dict(d): return cls(**dict( map( lambda f: (f[0].name, get_in(f[1], d, f[0].default)), from_fields, )))
import pandas as pd # for data wrangling import sqlalchemy as sqlal # for connecting to databases import unicodecsv as csv # for saving to CSV in utf-8 by default import gzip # for compression of CSV output import time # for simple benchmarks import datetime as dt # for converting the formats of timestamps import pdb # for testing import argparse # for accepting command line arguments import yaml # for loading the configuration file # import sqlite3 # for interacting with SQLite databases ## Accept Arguments parser = argparse.ArgumentParser(description="Save a WordPress or Twitter stream") parser.add_argument('stream_key', metavar='stream_key', type=str, nargs=1, help='Which stream to consume (tweets, likes, posts, or comments)') STREAM_KEY = tz.get_in([0], parser.parse_args().stream_key, default=None) ## Load Configuration with open('config.yaml') as config_file: CONFIG = yaml.load(config_file.read()) TWITTER_CREDENTIALS = { "access_token": os.environ['TWITTER_ACCESS_TOKEN'], "access_token_secret": os.environ['TWITTER_ACCESS_SECRET'], "consumer_key": os.environ['TWITTER_CONSUMER_KEY'], "consumer_secret": os.environ['TWITTER_CONSUMER_SECRET'] } ## Primary Functions def main(): """Overall function to start it off""" print("Starting a stream consumer for {}".format(STREAM_KEY))