def gen_groups(self, records, chunksize=None): """Generate the QIF groups""" for chnk in chunk(records, chunksize): keyfunc = self.id if self.is_split else self.account for gee in group(chnk, keyfunc): yield gee
def gen_groups(self, records, chunksize=None): """ Generate the QIF groups """ for chnk in chunk(records, chunksize): keyfunc = self.id if self.is_split else self.account for gee in group(chnk, keyfunc): yield gee
def solution2(debug=False): # Set `debug` to True if you would like to view debugging statements from urllib.request import urlopen from operator import itemgetter from functools import partial from pprint import pprint from meza import process as pr, fntools as ft from meza.io import read_json url4 = 'https://api.github.com/search/repositories?q=data&sort=stars&order=desc' f = urlopen(url4) records = read_json(f, path='items.item') # repos without a language have a value of None, which meza doesn't like filled = pr.fillempty(records, value='', fields=['language']) filled, preview = pr.peek(filled) print(preview[0]) if debug else None # meza doesn't do well with nested dicts flat = (dict(ft.flatten(r)) for r in filled) flat, preview = pr.peek(flat) print(preview[0]) if debug else None # `watchers` is the pivot field to aggregate by # `language` is the pivot field to group by args = ('watchers', 'language') # the pivot fields we want to include in each row rows = ['has_pages', 'owner_type'] pivotted = pr.pivot(flat, *args, rows=rows, op=sum) pivotted, preview = pr.peek(pivotted) print(preview[0]) if debug else None # `rows` are the fields we don't want to normalize (since `invert` is true) kwargs = {'rows': rows, 'invert': True} # `watchers` is the field to use for the normalized values # `language` is the field to use for the normalized key normal = pr.normalize(pivotted, *args, **kwargs) normal, preview = pr.peek(normal) print(preview[0]) if debug else None # aggregate by `watchers` agg_keyfunc = itemgetter('watchers') # group by `has_pages` and `owner_type` group_keyfunc = lambda x: tuple(x[r] for r in rows) aggregator = partial(max, key=agg_keyfunc) # Only emit the groups, not the group key (since `tupled` is False) kwargs = {'tupled': False, 'aggregator': aggregator} grouped = pr.group(normal, group_keyfunc, **kwargs) grouped, preview = pr.peek(grouped) print(preview[0]) if debug else None sgrouped = sorted(grouped, key=agg_keyfunc, reverse=True) for record in sgrouped: pprint(record)
def gen_groups(self, records, chunksize=None): for chnk in chunk(records, chunksize): cleansed = [ {k: next(xmlize([v])) for k, v in c.items()} for c in chnk] keyfunc = self.id if self.is_split else self.account for g in group(cleansed, keyfunc): yield g
def gen_groups(self, records, chunksize=None): """ Generate the OFX groups """ for chnk in chunk(records, chunksize): cleansed = [ {k: next(xmlize([v])) for k, v in c.items()} for c in chnk] keyfunc = self.id if self.is_split else self.account for gee in group(cleansed, keyfunc): yield gee
def gen_trxns(self, groups, collapse=False): for grp, transactions in groups: if self.is_split and collapse: # group transactions by `collapse` field and sum the amounts byaccount = group(transactions, collapse) op = lambda values: sum(map(utils.convert_amount, values)) merger = partial(merge, pred=self.amount, op=op) trxns = [merger(dicts) for _, dicts in byaccount] else: trxns = transactions yield (grp, trxns)
def gen_trxns(self, groups, collapse=False): """ Generate transactions """ for grp, transactions in groups: if self.is_split and collapse: # group transactions by `collapse` field and sum the amounts byaccount = group(transactions, collapse) oprtn = lambda values: sum(map(utils.convert_amount, values)) merger = partial(merge, pred=self.amount, op=oprtn) trxns = [merger(dicts) for _, dicts in byaccount] else: trxns = transactions yield (grp, trxns)
url4 = 'https://api.github.com/search/repositories?q=data&sort=stars&order=desc' f = '???' records = '???' # Some of the functions you will use are `ft.flatten`, `pr.pivot`, `pr.normalize` # `pr.group`, `pr.fillempty`, and `pr.aggregate`. You can view documentation for # these functions in the doc-blocks at the links below: # # https://github.com/reubano/meza/blob/master/meza/process.py # https://github.com/reubano/meza/blob/master/meza/fntools.py # ... keyfunc = lambda x: True kwargs = {} grouped = pr.group([], keyfunc, **kwargs) for key, group in grouped: # ... pass if __name__ == "__main__": print('-----------') print('Solution #1') print('-----------') solution1() print('\n-----------') print('Solution #2') print('-----------') solution2()