コード例 #1
0
ファイル: bn.py プロジェクト: abdulqayyum/blocks
def get_batch_normalization_updates(training_graph, allow_duplicates=False):
    """Extract correspondences for learning BN population statistics.

    Parameters
    ----------
    training_graph : :class:`~blocks.graph.ComputationGraph`
        A graph of expressions wherein "training mode" batch normalization
        is taking place.
    allow_duplicates : bool, optional
        If `True`, allow multiple training-mode application calls from the
        same :class:`~blocks.bricks.BatchNormalization` instance, and
        return pairs corresponding to all of them. It's then the user's
        responsibility to do something sensible to resolve the duplicates.

    Returns
    -------
    update_pairs : list of tuples
        A list of 2-tuples where the first element of each tuple is the
        shared variable containing a "population" mean or standard
        deviation, and the second is a Theano variable for the
        corresponding statistics on a minibatch. Note that multiple
        applications of a single :class:`blocks.bricks.BatchNormalization`
        may appear in the graph, and therefore (if `allow_duplicates` is
        True) a single population variable may map to several different
        minibatch variables, and appear multiple times in this mapping.
        This can happen in recurrent models, siamese networks or other
        models that reuse pathways.

    Notes
    -----
    Used in their raw form, these updates will simply overwrite the
    population statistics with the minibatch statistics at every gradient
    step. You will probably want to transform these pairs into something
    more sensible, such as keeping a moving average of minibatch values,
    or accumulating an average over the entire training set once every few
    epochs.

    """
    from ..bricks import BatchNormalization
    from ..filter import VariableFilter, get_application_call
    var_filter = VariableFilter(bricks=[BatchNormalization], roles=[OUTPUT])
    all_app_calls = map(get_application_call, var_filter(training_graph))
    train_app_calls = _training_mode_application_calls(all_app_calls)
    if len(train_app_calls) == 0:
        raise ValueError("no training mode BatchNormalization "
                         "applications found in graph")
    bricks = [c.application.brick for c in train_app_calls]

    if not allow_duplicates and not isdistinct(bricks):
        raise ValueError('multiple applications of the same '
                         'BatchNormalization brick; pass allow_duplicates '
                         '= True to override this check')

    def extract_pair(brick_attribute, metadata_key, app_call):
        return (getattr(app_call.application.brick, brick_attribute),
                app_call.metadata[metadata_key])

    mean_pair = partial(extract_pair, 'population_mean', 'offset')
    stdev_pair = partial(extract_pair, 'population_stdev', 'divisor')
    return sum([[mean_pair(a), stdev_pair(a)] for a in train_app_calls], [])
コード例 #2
0
def test_scalar_symbols():
    exprs = [x, y]
    scalars = scalar_symbols(exprs)

    assert len(scalars) == len(exprs)
    assert isdistinct([s._name for s in scalars])
    assert builtins.all(s.dshape == e.schema for s, e in zip(scalars, exprs))
コード例 #3
0
def get_batch_normalization_updates(training_graph, allow_duplicates=False):
    """Extract correspondences for learning BN population statistics.

    Parameters
    ----------
    training_graph : :class:`~blocks.graph.ComputationGraph`
        A graph of expressions wherein "training mode" batch normalization
        is taking place.
    allow_duplicates : bool, optional
        If `True`, allow multiple training-mode application calls from the
        same :class:`~blocks.bricks.BatchNormalization` instance, and
        return pairs corresponding to all of them. It's then the user's
        responsibility to do something sensible to resolve the duplicates.

    Returns
    -------
    update_pairs : list of tuples
        A list of 2-tuples where the first element of each tuple is the
        shared variable containing a "population" mean or standard
        deviation, and the second is a Theano variable for the
        corresponding statistics on a minibatch. Note that multiple
        applications of a single :class:`blocks.bricks.BatchNormalization`
        may appear in the graph, and therefore (if `allow_duplicates` is
        True) a single population variable may map to several different
        minibatch variables, and appear multiple times in this mapping.
        This can happen in recurrent models, siamese networks or other
        models that reuse pathways.

    Notes
    -----
    Used in their raw form, these updates will simply overwrite the
    population statistics with the minibatch statistics at every gradient
    step. You will probably want to transform these pairs into something
    more sensible, such as keeping a moving average of minibatch values,
    or accumulating an average over the entire training set once every few
    epochs.

    """
    from ..bricks import BatchNormalization
    from ..filter import VariableFilter, get_application_call
    var_filter = VariableFilter(bricks=[BatchNormalization], roles=[OUTPUT])
    all_app_calls = map(get_application_call, var_filter(training_graph))
    train_app_calls = _training_mode_application_calls(all_app_calls)
    if len(train_app_calls) == 0:
        raise ValueError("no training mode BatchNormalization "
                         "applications found in graph")
    bricks = [c.application.brick for c in train_app_calls]

    if not allow_duplicates and not isdistinct(bricks):
        raise ValueError('multiple applications of the same '
                         'BatchNormalization brick; pass allow_duplicates '
                         '= True to override this check')

    def extract_pair(brick_attribute, metadata_key, app_call):
        return (getattr(app_call.application.brick,
                        brick_attribute), app_call.metadata[metadata_key])

    mean_pair = partial(extract_pair, 'population_mean', 'offset')
    stdev_pair = partial(extract_pair, 'population_stdev', 'divisor')
    return sum([[mean_pair(a), stdev_pair(a)] for a in train_app_calls], [])
コード例 #4
0
def merge(*exprs, **kwargs):
    if len(exprs) + len(kwargs) == 1:
        # we only have one object so don't need to construct a merge
        if exprs:
            # we only have a positional argumnent, return it unchanged
            return exprs[0]
        if kwargs:
            # we only have a single keyword argument, label it and return it
            [(k, v)] = kwargs.items()
            return v.label(k)

    # label all the kwargs and sort in key order
    exprs = tuple(
        concatv(
            (_wrap(expr, '_%s' % n) for n, expr in enumerate(exprs)),
            (label(_wrap(v, k), k)
             for k, v in sorted(kwargs.items(), key=first)),
        ))

    if all(ndim(expr) == 0 for expr in exprs):
        raise TypeError('cannot merge all scalar expressions')

    result = Merge(
        exprs,
        varargsexpr(exprs),
        maxshape(map(shape, exprs)),
    )

    if not isdistinct(result.fields):
        raise ValueError(
            "Repeated columns found: " + ', '.join(
                k for k, v in frequencies(result.fields).items() if v > 1), )

    return result
コード例 #5
0
ファイル: test_collections.py プロジェクト: blaze/blaze
def test_join_suffixes():
    a = symbol('a', 'var * {x: int, y: int}')
    b = join(a, a, 'x', suffixes=('_l', '_r'))

    assert isdistinct(b.fields)
    assert len(b.fields) == 3
    assert set(b.fields) == set(['x', 'y_l', 'y_r'])
コード例 #6
0
ファイル: test_collections.py プロジェクト: Back2Basics/blaze
def test_join_on_same_table():
    a = Symbol('a', 'var * {x: int, y: int}')

    c = join(a, a, 'x')

    assert isdistinct(c.fields)
    assert len(c.fields) == 3
コード例 #7
0
ファイル: test_broadcast.py プロジェクト: Back2Basics/blaze
def test_scalar_symbols():
    exprs = [x, y]
    scalars = scalar_symbols(exprs)

    assert len(scalars) == len(exprs)
    assert isdistinct([s._name for s in scalars])
    assert builtins.all(s.dshape == e.schema for s, e in zip(scalars, exprs))
コード例 #8
0
ファイル: test_collections.py プロジェクト: nkhuyu/blaze
def test_join_on_same_table():
    a = symbol('a', 'var * {x: int, y: int}')

    c = join(a, a, 'x')

    assert isdistinct(c.fields)
    assert len(c.fields) == 3
コード例 #9
0
ファイル: test_collections.py プロジェクト: nkhuyu/blaze
def test_join_suffixes():
    a = symbol('a', 'var * {x: int, y: int}')
    b = join(a, a, 'x', suffixes=('_l', '_r'))

    assert isdistinct(b.fields)
    assert len(b.fields) == 3
    assert set(b.fields) == set(['x', 'y_l', 'y_r'])
コード例 #10
0
ファイル: test_collections.py プロジェクト: Back2Basics/blaze
def test_join_on_same_columns():
    a = Symbol('a', 'var * {x: int, y: int, z: int}')
    b = Symbol('b', 'var * {x: int, y: int, w: int}')

    c = join(a, b, 'x')

    assert isdistinct(c.fields)
    assert len(c.fields) == 5
    assert 'y_left' in c.fields
    assert 'y_right' in c.fields
コード例 #11
0
ファイル: test_collections.py プロジェクト: nkhuyu/blaze
def test_join_on_same_columns():
    a = symbol('a', 'var * {x: int, y: int, z: int}')
    b = symbol('b', 'var * {x: int, y: int, w: int}')

    c = join(a, b, 'x')

    assert isdistinct(c.fields)
    assert len(c.fields) == 5
    assert 'y_left' in c.fields
    assert 'y_right' in c.fields
コード例 #12
0
ファイル: table.py プロジェクト: chdoig/blaze
def merge(*tables):
    # Get common sub expression
    child = common_subexpression(*tables)
    if not child:
        raise ValueError("No common sub expression found for input tables")

    result = Merge(child, tables)

    if not isdistinct(result.columns):
        raise ValueError("Repeated columns found: " + ', '.join(k for k, v in
            frequencies(result.columns).items() if v > 1))

    return result
コード例 #13
0
ファイル: collections.py プロジェクト: Casolt/blaze
def merge(*exprs):
    # Get common sub expression
    try:
        child = common_subexpression(*exprs)
    except:
        raise ValueError("No common sub expression found for input expressions")

    result = Merge(child, exprs)

    if not isdistinct(result.fields):
        raise ValueError("Repeated columns found: " + ', '.join(k for k, v in
            frequencies(result.fields).items() if v > 1))

    return result
コード例 #14
0
ファイル: collections.py プロジェクト: vitan/blaze
def merge(*exprs, **kwargs):
    # Get common sub expression
    exprs = exprs + tuple(label(v, k) for k, v in kwargs.items())
    try:
        child = common_subexpression(*exprs)
    except:
        raise ValueError(
            "No common sub expression found for input expressions")

    result = Merge(child, exprs)

    if not isdistinct(result.fields):
        raise ValueError("Repeated columns found: " + ', '.join(
            k for k, v in frequencies(result.fields).items() if v > 1))

    return result
コード例 #15
0
def test_map(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    L1 = e.map(inc, range(5))
    assert len(L1) == 5
    assert isdistinct(x.key for x in L1)
    assert all(isinstance(x, Future) for x in L1)

    result = yield L1[0]._result()
    assert result == inc(0)
    assert len(s.dask) == 5

    L2 = e.map(inc, L1)

    result = yield L2[1]._result()
    assert result == inc(inc(1))
    assert len(s.dask) == 10
    assert L1[0].key in s.dask[L2[0].key]

    total = e.submit(sum, L2)
    result = yield total._result()
    assert result == sum(map(inc, map(inc, range(5))))

    L3 = e.map(add, L1, L2)
    result = yield L3[1]._result()
    assert result == inc(1) + inc(inc(1))

    L4 = e.map(add, range(3), range(4))
    results = yield e._gather(L4)
    if sys.version_info[0] >= 3:
        assert results == list(map(add, range(3), range(4)))

    def f(x, y=10):
        return x + y

    L5 = e.map(f, range(5), y=5)
    results = yield e._gather(L5)
    assert results == list(range(5, 10))

    y = e.submit(f, 10)
    L6 = e.map(f, range(5), y=y)
    results = yield e._gather(L6)
    assert results == list(range(20, 25))

    yield e._shutdown()
コード例 #16
0
    def f(c, a, b):
        e = Executor((c.ip, c.port), start=False)
        IOLoop.current().spawn_callback(e._go)

        L1 = e.map(inc, range(5))
        assert len(L1) == 5
        assert isdistinct(x.key for x in L1)
        assert all(isinstance(x, Future) for x in L1)

        result = yield L1[0]._result()
        assert result == inc(0)
        assert len(e.dask) == 5

        L2 = e.map(inc, L1)

        result = yield L2[1]._result()
        assert result == inc(inc(1))
        assert len(e.dask) == 10
        assert L1[0].key in e.dask[L2[0].key]

        total = e.submit(sum, L2)
        result = yield total._result()
        assert result == sum(map(inc, map(inc, range(5))))

        L3 = e.map(add, L1, L2)
        result = yield L3[1]._result()
        assert result == inc(1) + inc(inc(1))

        L4 = e.map(add, range(3), range(4))
        results = yield e._gather(L4)
        if sys.version_info[0] >= 3:
            assert results == list(map(add, range(3), range(4)))

        def f(x, y=10):
            return x + y

        L5 = e.map(f, range(5), y=5)
        results = yield e._gather(L5)
        assert results == list(range(5, 10))

        y = e.submit(f, 10)
        L6 = e.map(f, range(5), y=y)
        results = yield e._gather(L6)
        assert results == list(range(20, 25))

        yield e._shutdown()
コード例 #17
0
ファイル: collections.py プロジェクト: bopopescu/QC
def merge(*exprs, **kwargs):
    if len(exprs) + len(kwargs) == 1:
        if exprs:
            return exprs[0]
        if kwargs:
            [(k, v)] = kwargs.items()
            return v.label(k)
    # Get common sub expression
    exprs += tuple(label(v, k) for k, v in sorted(kwargs.items(), key=first))
    child = common_subexpression(*exprs)
    result = Merge(child, exprs)

    if not isdistinct(result.fields):
        raise ValueError(
            "Repeated columns found: " + ', '.join(
                k for k, v in frequencies(result.fields).items() if v > 1), )

    return result
コード例 #18
0
ファイル: collections.py プロジェクト: postelrich/blaze
def merge(*exprs, **kwargs):
    if len(exprs) + len(kwargs) == 1:
        if exprs:
            return exprs[0]
        if kwargs:
            [(k, v)] = kwargs.items()
            return v.label(k)
    # Get common sub expression
    exprs += tuple(label(v, k) for k, v in sorted(kwargs.items(), key=first))
    child = common_subexpression(*exprs)
    result = Merge(child, exprs)

    if not isdistinct(result.fields):
        raise ValueError(
            "Repeated columns found: " + ', '.join(
                k for k, v in frequencies(result.fields).items() if v > 1
            ),
        )

    return result
コード例 #19
0
ファイル: collections.py プロジェクト: leolujuyi/blaze
def merge(*exprs, **kwargs):
    if len(exprs) + len(kwargs) == 1:
        if exprs:
            return exprs[0]
        if kwargs:
            [(k, v)] = kwargs.items()
            return v.label(k)
    # Get common sub expression
    exprs = exprs + tuple(label(v, k) for k, v in kwargs.items())
    try:
        child = common_subexpression(*exprs)
    except:
        raise ValueError("No common sub expression found for input expressions")

    result = Merge(child, exprs)

    if not isdistinct(result.fields):
        raise ValueError("Repeated columns found: " + ', '.join(k for k, v in
            frequencies(result.fields).items() if v > 1))

    return result
コード例 #20
0
async def test_asyncio_map():
    async with AioClient(processes=False) as c:
        L1 = c.map(inc, range(5))
        assert len(L1) == 5
        assert isdistinct(x.key for x in L1)
        assert all(isinstance(x, Future) for x in L1)

        result = await L1[0]
        assert result == inc(0)

        L2 = c.map(inc, L1)

        result = await L2[1]
        assert result == inc(inc(1))

        total = c.submit(sum, L2)
        result = await total
        assert result == sum(map(inc, map(inc, range(5))))

        L3 = c.map(add, L1, L2)
        result = await L3[1]
        assert result == inc(1) + inc(inc(1))

        L4 = c.map(add, range(3), range(4))
        results = await c.gather(L4)
        assert results == list(map(add, range(3), range(4)))

        def f(x, y=10):
            return x + y

        L5 = c.map(f, range(5), y=5)
        results = await c.gather(L5)
        assert results == list(range(5, 10))

        y = c.submit(f, 10)
        L6 = c.map(f, range(5), y=y)
        results = await c.gather(L6)
        assert results == list(range(20, 25))
コード例 #21
0
async def test_asyncio_map():
    async with AioClient(processes=False) as c:
        L1 = c.map(inc, range(5))
        assert len(L1) == 5
        assert isdistinct(x.key for x in L1)
        assert all(isinstance(x, AioFuture) for x in L1)

        result = await L1[0]
        assert result == inc(0)

        L2 = c.map(inc, L1)

        result = await L2[1]
        assert result == inc(inc(1))

        total = c.submit(sum, L2)
        result = await total
        assert result == sum(map(inc, map(inc, range(5))))

        L3 = c.map(add, L1, L2)
        result = await L3[1]
        assert result == inc(1) + inc(inc(1))

        L4 = c.map(add, range(3), range(4))
        results = await c.gather(L4)
        assert results == list(map(add, range(3), range(4)))

        def f(x, y=10):
            return x + y

        L5 = c.map(f, range(5), y=5)
        results = await c.gather(L5)
        assert results == list(range(5, 10))

        y = c.submit(f, 10)
        L6 = c.map(f, range(5), y=y)
        results = await c.gather(L6)
        assert results == list(range(20, 25))
コード例 #22
0
ファイル: collections.py プロジェクト: blaze/blaze
def merge(*exprs, **kwargs):
    if len(exprs) + len(kwargs) == 1:
        # we only have one object so don't need to construct a merge
        if exprs:
            # we only have a positional argumnent, return it unchanged
            return exprs[0]
        if kwargs:
            # we only have a single keyword argument, label it and return it
            [(k, v)] = kwargs.items()
            return v.label(k)

    # label all the kwargs and sort in key order
    exprs = tuple(concatv(
        (_wrap(expr, '_%s' % n) for n, expr in enumerate(exprs)),
        (
            label(_wrap(v, k), k)
            for k, v in sorted(kwargs.items(), key=first)
        ),
    ))

    if all(ndim(expr) == 0 for expr in exprs):
        raise TypeError('cannot merge all scalar expressions')

    result = Merge(
        exprs,
        varargsexpr(exprs),
        maxshape(map(shape, exprs)),
    )

    if not isdistinct(result.fields):
        raise ValueError(
            "Repeated columns found: " + ', '.join(
                k for k, v in frequencies(result.fields).items() if v > 1
            ),
        )

    return result