def aux_filter(ctx): return assoc( ctx, 'aux', dict( list( filter(lambda d: first(get('nlcdtrn', second(d))) != 0, ctx['aux'].items()))))
def test_trim(): inputs = list() inputs.append({'include': True, 'acquired': '2015-04-01'}) inputs.append({'include': True, 'acquired': '2017-04-01'}) inputs.append({'include': False, 'acquired': '2017-01-01'}) inputs.append({'include': True, 'acquired': '2016-04-01'}) included = chips.dates(filter(lambda d: d['include'] is True, inputs)) trimmed = chips.trim(dates=included, chips=inputs) assert len(list(trimmed)) == len(included) assert set(included) == set(map(lambda x: x['acquired'], trimmed))
def compact(specs): """Excludes nodata and fill values from the dataset Args: specs (dict): A spec with value, nodata and fill keys Returns: Filtered specs excluding nodata and fill """ return filter(lambda s: s['value'] not in (s['nodata'], s['fill']), specs)
def only(ubids, specs): """Filter specs on ubids. Args: ubids (seq): [ubid1, ubid3] specs (seq): [{spec1}, {spec2}, {spec3}, ...}) Returns: tuple: [{spec1}, {spec3}] """ return tuple(cytoolz.filter(lambda x: x['ubid'] in ubids, specs))
def create(x, y, acquired, cfg): """Create a timeseries. Args: x (int): x coordinate y (int): y coordinate acquired (string): iso8601 date range cfg (dict): A Merlin configuration Returns: tuple - Results of format_fn applied to results of chips_fn """ x, y = get_in(['chip', 'proj-pt'], cfg['snap_fn'](x=x, y=y)) # get specs specmap = cfg['specs_fn'](specs=cfg['registry_fn']()) # get function that will return chipmap. # Don't create state with a realized variable to preserve memory chipmap = partial(chips.mapped, x=x, y=y, acquired=acquired, specmap=specmap, chips_fn=cfg['chips_fn']) # calculate locations chip. There's another function # here to be split out and organized. grid = first(filter(lambda x: x['name'] == 'chip', cfg['grid_fn']())) cw, ch = specs.refspec(specmap).get('data_shape') locations = partial(chips.locations, x=x, y=y, cw=cw, ch=ch, rx=grid.get('rx'), ry=grid.get('ry'), sx=grid.get('sx'), sy=grid.get('sy')) return cfg['format_fn'](x=x, y=y, locations=locations(), dates_fn=cfg['dates_fn'], specmap=specmap, chipmap=chipmap())
def test_flat_map_square_filter_workers(nums): def _generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = cz.filter(lambda x: x > 1, nums_py) nums_py = list(nums_py) nums_pl = pl.sync.map(lambda x: x**2, nums) nums_pl = pl.sync.flat_map(_generator, nums_pl, workers=3) nums_pl = pl.sync.filter(lambda x: x > 1, nums_pl) nums_pl = list(nums_pl) assert sorted(nums_pl) == sorted(nums_py)
def test_flat_map_square_filter_workers(nums: tp.List[int]): def generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(generator, nums_py) nums_py = cz.filter(lambda x: x > 1, nums_py) nums_py = list(nums_py) nums_pl = pl.thread.map(lambda x: x**2, nums) nums_pl = pl.thread.flat_map(generator, nums_pl, workers=2) nums_pl = pl.thread.filter(lambda x: x > 1, nums_pl) nums_pl = list(nums_pl) assert sorted(nums_pl) == sorted(nums_py)
def test_flat_map_square_filter_workers_pipe(nums): def _generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = cz.filter(lambda x: x > 1, nums_py) nums_py = list(nums_py) nums_pl = (nums | pl.task.map(lambda x: x**2) | pl.task.flat_map(_generator, workers=3) | pl.task.filter(lambda x: x > 1) | list) assert sorted(nums_pl) == sorted(nums_py)
def test_flat_map_square_filter_workers_pipe(nums: tp.List[int]): def generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x**2, nums) nums_py = cz.mapcat(generator, nums_py) nums_py = cz.filter(lambda x: x > 1, nums_py) nums_py = list(nums_py) nums_pl = (nums | pl.process.map(lambda x: x**2) | pl.process.flat_map(generator, workers=3) | pl.process.filter(lambda x: x > 1) | list) assert sorted(nums_pl) == sorted(nums_py)
async def test_flat_map_square_filter_workers_pipe_3(nums: tp.List[int]): def _generator(x): yield x yield x + 1 yield x + 2 nums_py = map(lambda x: x ** 2, nums) nums_py = cz.mapcat(_generator, nums_py) nums_py = cz.filter(lambda x: x > 1, nums_py) nums_py = list(nums_py) async def gt1(x): return x > 1 nums_pl = await ( nums | pl.task.map(lambda x: x ** 2) | pl.task.flat_map(_generator, workers=3) | pl.task.filter(gt1) ) assert sorted(nums_pl) == sorted(nums_py)
def mean_log_freq(indices): return np.mean(log_freqs[indices]) if len(indices) else None def min_log_freq(indices): return np.min(log_freqs[indices]) if len(indices) else None doc_sentence_indices = [[ lookup_indices(sent.split()) for sent in doc.split('\n') ] for doc in reviews.tokenized] mean_llk = [ list( cytoolz.filter(None, [mean_log_freq(indices) for indices in doc_indices])) for doc_indices in doc_sentence_indices ] min_llk = [ list( cytoolz.filter(None, [min_log_freq(indices) for indices in doc_indices])) for doc_indices in doc_sentence_indices ] mean_mean_llk = pd.Series( [np.mean(llks) if len(llks) > 0 else None for llks in mean_llk]) mean_min_llk = pd.Series( [np.mean(llks) if len(llks) > 0 else None for llks in min_llk]) logging.info("Identify the best reviews.") # Mark the top reviews: top-5 ranked reviews of restaurants with at least the median # reviews,
def chip_grid(config): return first(filter(lambda x: x['name'] == 'chip', config.get('grid_fn')()))
def tile_grid(config): return first(filter(lambda x: x['name'] == 'tile', config.get('grid_fn')()))