def test_learn_xor_function(): # set up the network inp = Input(out_shapes={ 'default': ('T', 'B', 2), 'targets': ('T', 'B', 1) }) error_func = BinomialCrossEntropy() (inp >> FullyConnected(2, activation='sigmoid') >> FullyConnected( 1, activation='sigmoid', name='OutLayer') >> error_func >> Loss()) net = Network.from_layer(inp - 'targets' >> 'targets' - error_func) # net.set_handler(PyCudaHandler()) net.initialize(Gaussian(1.0), seed=42) # high weight-init needed # print(net.buffer.parameters) # set up the trainer tr = Trainer(SgdStepper(learning_rate=4.0), verbose=False) tr.add_hook(StopAfterEpoch(300)) # generate the data data = np.array([[0., 0.], [0., 1.], [1., 0.], [1., 1.]]).reshape( (1, 4, 2)) targets = np.array([0., 1., 1., 0.]).reshape((1, 4, 1)) tr.train(net, Undivided(default=data, targets=targets)) out = net.buffer.OutLayer.outputs.default success = np.all(np.round(out) == targets) if not success: print('Network output:', out.flatten()) print('Rounded output:', np.round(out.flatten())) print('Targets :', targets.flatten()) raise AssertionError("Network training did not succeed.") assert min(tr.logs['rolling_training']['total_loss']) < 0.5
def test_learn_xor_function(): # set up the network inp = Input(out_shapes={'default': ('T', 'B', 2), 'targets': ('T', 'B', 1)}) error_func = BinomialCrossEntropy() (inp >> FullyConnected(2, activation='sigmoid') >> FullyConnected(1, activation='sigmoid', name='OutLayer') >> error_func >> Loss()) net = Network.from_layer(inp - 'targets' >> 'targets' - error_func) # net.set_handler(PyCudaHandler()) net.initialize(Gaussian(1.0), seed=42) # high weight-init needed # print(net.buffer.parameters) # set up the trainer tr = Trainer(SgdStepper(learning_rate=4.0), verbose=False) tr.add_hook(StopAfterEpoch(300)) # generate the data data = np.array([ [0., 0.], [0., 1.], [1., 0.], [1., 1.] ]).reshape((1, 4, 2)) targets = np.array([0., 1., 1., 0.]).reshape((1, 4, 1)) tr.train(net, Undivided(default=data, targets=targets)) out = net.buffer.OutLayer.outputs.default success = np.all(np.round(out) == targets) if not success: print('Network output:', out.flatten()) print('Rounded output:', np.round(out.flatten())) print('Targets :', targets.flatten()) raise AssertionError("Network training did not succeed.") assert min(tr.logs['rolling_training']['total_loss']) < 0.5
def create_net_from_spec(task_type, in_shape, out_shape, spec, data_name='default', targets_name='targets', mask_name=None, use_conv=None): """ Create a complete network from a spec line like this "F50 F20 F50". Spec: Capital letters specify the layer type and are followed by arguments to the layer. Supported layers are: * F : FullyConnected * R : Recurrent * L : Lstm * B : BatchNorm * D : Dropout * C : Convolution2D * P : Pooling2D Where applicable the optional first argument is the activation function from the set {l, r, s, t} corresponding to 'linear', 'relu', 'sigmoid' and 'tanh' resp. FullyConnected, Recurrent and Lstm take their size as mandatory arguments (after the optional activation function argument). Dropout takes the dropout probability as an optional argument. Convolution2D takes two mandatory arguments: num_filters and kernel_size like this: 'C32:3' or with activation 'Cs32:3' meaning 32 filters with a kernel size of 3x3. They can be followed by 'p1' for padding and/or 's2' for a stride of (2, 2). Pooling2D takes an optional first argument for the type of pooling: 'm' for max and 'a' for average pooling. The next (mandatory) argument is the kernel size. As with Convolution2D it can be followed by 'p1' for padding and/or 's2' for setting the stride to (2, 2). Whitespace is allowed everywhere and will be completely ignored. Examples: The mnist_pi example can be expressed like this: >>> net = create_net_from_spec('classification', 784, 10, ... 'D.2 F1200 D F1200 D') The cifar10_cnn example can be shortened like this: >>> net = create_net_from_spec( ... 'classification', (3, 32, 32), 10, ... 'C32:5p2 P3s2 C32:5p2 P3s2 C64:5p2 P3s2 F64') Args: task_type (str): one of ['classification', 'regression', 'multi-label'] in_shape (int or tuple[int]): Shape of the input data. out_shape (int or tuple[int]): Output shape / nr of classes spec (str): A line describing the network as explained above. data_name (Optional[str]): Name of the input data which will be provided by a data iterator. Defaults to 'default'. targets_name (Optional[str]): Name of the ground-truth target data which will be provided by a data iterator. Defaults to 'targets'. mask_name (Optional[str]): Name of the mask data which will be provided by a data iterator. Defaults to None. The mask is needed if error should be injected only at certain time steps (for sequential data). use_conv (Optional[bool]): Specify whether the projection layer should be convolutional. If true the projection layer will use 1x1 convolutions otherwise it will be fully connected. Default is to autodetect this based on the output shape. Returns: brainstorm.structure.network.Network: The constructed network initialized with DenseSqrtFanInOut for layers with activation function and a simple Gaussian default and fallback. """ out_shape = (out_shape,) if isinstance(out_shape, int) else out_shape inp, outp = get_in_out_layers(task_type, in_shape, out_shape, data_name=data_name, mask_name=mask_name, targets_name=targets_name, use_conv=use_conv) if task_type not in ['classification', 'multi-label']: raise ValueError('Unknown task type {}'.format(task_type)) output_name = 'Output.outputs.predictions' import re LAYER_TYPE = r'\s*(?P<layer_type>[A-Z]+)\s*' FLOAT = r'\s*[-+]?[0-9]*\.?[0-9]+\s*' ARG = r'\s*([a-z]|{float})\s*[:/|]?\s*'.format(float=FLOAT) ARG_LIST = r'(?P<args>({arg})*)'.format(arg=ARG) ARCH_SPEC = r'({type}{args})'.format(type=LAYER_TYPE, args=ARG_LIST) # spec = re.sub(r'\s', '', spec) # remove whitespace current_layer = inp for m in re.finditer(ARCH_SPEC, spec): layer_type = m.group('layer_type') args = re.split(ARG, m.group('args'))[1::2] args = [trynumber(a) for a in args if a != ''] current_layer >>= create_layer(layer_type, args) net = Network.from_layer(current_layer >> outp) net.output_name = output_name init_dict = { name: initializers.DenseSqrtFanInOut(l.activation) for name, l in net.layers.items() if hasattr(l, 'activation') } init_dict['default'] = initializers.Gaussian() init_dict['fallback'] = initializers.Gaussian() net.initialize(init_dict) return net
def lstm_net(): inp = Input(out_shapes={'default': ('T', 'B', 2)}) net = Network.from_layer(inp >> Lstm(3, name='out')) return net
def simple_recurrent_net(): inp = Input(out_shapes={'default': ('T', 'B', 2)}) net = Network.from_layer(inp >> Recurrent(3, name='out')) return net
raise AssertionError("Outputs did not match.") # Check if context is same as final_context assert len(context) == len(final_context), "Context list sizes mismatch!" for (x, y) in zip(context, final_context): if x is None: assert y is None else: # print("Context:\n", x) # print("Should match:\n", y) assert np.allclose(x, y) inp = Input(out_shapes={'default': ('T', 'B', 4), 'targets': ('T', 'B', 1)}) hid = FullyConnected(2, name="Hid") out = SoftmaxCE(name='Output') (inp - 'targets' >> 'targets' - out) simple_net = Network.from_layer(inp >> hid >> out) def test_forward_pass_with_missing_data(): it = Undivided(default=np.random.randn(3, 2, 4))(simple_net.handler) with pytest.raises(KeyError): for _ in run_network(simple_net, it): pass for _ in run_network(simple_net, it, all_inputs=False): pass
# Check if context is same as final_context assert len(context) == len(final_context), "Context list sizes mismatch!" for (x, y) in zip(context, final_context): if x is None: assert y is None else: # print("Context:\n", x) # print("Should match:\n", y) assert np.allclose(x, y) inp = Input(out_shapes={'default': ('T', 'B', 4), 'targets': ('T', 'B', 1)}) hid = FullyConnected(2, name="Hid") out = SoftmaxCE(name='Output') (inp - 'targets' >> 'targets' - out) simple_net = Network.from_layer(inp >> hid >> out) def test_forward_pass_with_missing_data(): it = Undivided(default=np.random.randn(3, 2, 4))(simple_net.handler) with pytest.raises(KeyError): for _ in run_network(simple_net, it): pass for _ in run_network(simple_net, it, all_inputs=False): pass