Example #1
0
def aux_filter(ctx):

    return assoc(
        ctx, 'aux',
        dict(
            list(
                filter(lambda d: first(get('nlcdtrn', second(d))) != 0,
                       ctx['aux'].items()))))
Example #2
0
def test_trim():
    inputs = list()
    inputs.append({'include': True, 'acquired': '2015-04-01'})
    inputs.append({'include': True, 'acquired': '2017-04-01'})
    inputs.append({'include': False, 'acquired': '2017-01-01'})
    inputs.append({'include': True, 'acquired': '2016-04-01'})
    included = chips.dates(filter(lambda d: d['include'] is True, inputs))
    trimmed = chips.trim(dates=included, chips=inputs)
    assert len(list(trimmed)) == len(included)
    assert set(included) == set(map(lambda x: x['acquired'], trimmed))
Example #3
0
def compact(specs):
    """Excludes nodata and fill values from the dataset

    Args:
        specs (dict): A spec with value, nodata and fill keys

    Returns:
        Filtered specs excluding nodata and fill
    """

    return filter(lambda s: s['value'] not in (s['nodata'], s['fill']), specs)
Example #4
0
def only(ubids, specs):
    """Filter specs on ubids.

    Args:
        ubids (seq): [ubid1, ubid3]
        specs (seq): [{spec1}, {spec2}, {spec3}, ...})

    Returns:
        tuple: [{spec1}, {spec3}]
    """

    return tuple(cytoolz.filter(lambda x: x['ubid'] in ubids, specs))
Example #5
0
def create(x, y, acquired, cfg):
    """Create a timeseries.

    Args:
        x (int): x coordinate
        y (int): y coordinate
        acquired (string): iso8601 date range
        cfg (dict): A Merlin configuration

    Returns:
        tuple - Results of format_fn applied to results of chips_fn
    """

    x, y = get_in(['chip', 'proj-pt'], cfg['snap_fn'](x=x, y=y))

    # get specs
    specmap = cfg['specs_fn'](specs=cfg['registry_fn']())

    # get function that will return chipmap.
    # Don't create state with a realized variable to preserve memory
    chipmap = partial(chips.mapped,
                      x=x,
                      y=y,
                      acquired=acquired,
                      specmap=specmap,
                      chips_fn=cfg['chips_fn'])

    # calculate locations chip.  There's another function
    # here to be split out and organized.

    grid = first(filter(lambda x: x['name'] == 'chip', cfg['grid_fn']()))

    cw, ch = specs.refspec(specmap).get('data_shape')

    locations = partial(chips.locations,
                        x=x,
                        y=y,
                        cw=cw,
                        ch=ch,
                        rx=grid.get('rx'),
                        ry=grid.get('ry'),
                        sx=grid.get('sx'),
                        sy=grid.get('sy'))

    return cfg['format_fn'](x=x,
                            y=y,
                            locations=locations(),
                            dates_fn=cfg['dates_fn'],
                            specmap=specmap,
                            chipmap=chipmap())
Example #6
0
def test_flat_map_square_filter_workers(nums):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = cz.filter(lambda x: x > 1, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.sync.map(lambda x: x**2, nums)
    nums_pl = pl.sync.flat_map(_generator, nums_pl, workers=3)
    nums_pl = pl.sync.filter(lambda x: x > 1, nums_pl)
    nums_pl = list(nums_pl)

    assert sorted(nums_pl) == sorted(nums_py)
Example #7
0
def test_flat_map_square_filter_workers(nums: tp.List[int]):
    def generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(generator, nums_py)
    nums_py = cz.filter(lambda x: x > 1, nums_py)
    nums_py = list(nums_py)

    nums_pl = pl.thread.map(lambda x: x**2, nums)
    nums_pl = pl.thread.flat_map(generator, nums_pl, workers=2)
    nums_pl = pl.thread.filter(lambda x: x > 1, nums_pl)
    nums_pl = list(nums_pl)

    assert sorted(nums_pl) == sorted(nums_py)
Example #8
0
def test_flat_map_square_filter_workers_pipe(nums):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = cz.filter(lambda x: x > 1, nums_py)
    nums_py = list(nums_py)

    nums_pl = (nums
               | pl.task.map(lambda x: x**2)
               | pl.task.flat_map(_generator, workers=3)
               | pl.task.filter(lambda x: x > 1)
               | list)

    assert sorted(nums_pl) == sorted(nums_py)
Example #9
0
def test_flat_map_square_filter_workers_pipe(nums: tp.List[int]):
    def generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x**2, nums)
    nums_py = cz.mapcat(generator, nums_py)
    nums_py = cz.filter(lambda x: x > 1, nums_py)
    nums_py = list(nums_py)

    nums_pl = (nums
               | pl.process.map(lambda x: x**2)
               | pl.process.flat_map(generator, workers=3)
               | pl.process.filter(lambda x: x > 1)
               | list)

    assert sorted(nums_pl) == sorted(nums_py)
Example #10
0
async def test_flat_map_square_filter_workers_pipe_3(nums: tp.List[int]):
    def _generator(x):
        yield x
        yield x + 1
        yield x + 2

    nums_py = map(lambda x: x ** 2, nums)
    nums_py = cz.mapcat(_generator, nums_py)
    nums_py = cz.filter(lambda x: x > 1, nums_py)
    nums_py = list(nums_py)

    async def gt1(x):
        return x > 1

    nums_pl = await (
        nums
        | pl.task.map(lambda x: x ** 2)
        | pl.task.flat_map(_generator, workers=3)
        | pl.task.filter(gt1)
    )

    assert sorted(nums_pl) == sorted(nums_py)

def mean_log_freq(indices):
    return np.mean(log_freqs[indices]) if len(indices) else None


def min_log_freq(indices):
    return np.min(log_freqs[indices]) if len(indices) else None


doc_sentence_indices = [[
    lookup_indices(sent.split()) for sent in doc.split('\n')
] for doc in reviews.tokenized]
mean_llk = [
    list(
        cytoolz.filter(None,
                       [mean_log_freq(indices) for indices in doc_indices]))
    for doc_indices in doc_sentence_indices
]
min_llk = [
    list(
        cytoolz.filter(None,
                       [min_log_freq(indices) for indices in doc_indices]))
    for doc_indices in doc_sentence_indices
]
mean_mean_llk = pd.Series(
    [np.mean(llks) if len(llks) > 0 else None for llks in mean_llk])
mean_min_llk = pd.Series(
    [np.mean(llks) if len(llks) > 0 else None for llks in min_llk])

logging.info("Identify the best reviews.")
# Mark the top reviews: top-5 ranked reviews of restaurants with at least the median # reviews,
Example #12
0
def chip_grid(config):
    return first(filter(lambda x: x['name'] == 'chip',
                        config.get('grid_fn')()))
Example #13
0
def tile_grid(config):
    return first(filter(lambda x: x['name'] == 'tile',
                        config.get('grid_fn')()))