Ejemplo n.º 1
0
def test_variable_filter():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name='linear1')
    brick2 = Bias(2, name='bias1')
    activation = Sigmoid(name='sigm')

    x = tensor.vector()
    h1 = brick1.apply(x)
    h2 = activation.apply(h1)
    y = brick2.apply(h2)
    cg = ComputationGraph(y)

    parameters = [brick1.W, brick1.b, brick2.params[0]]
    bias = [brick1.b, brick2.params[0]]
    brick1_bias = [brick1.b]

    # Testing filtering by role
    role_filter = VariableFilter(roles=[PARAMETER])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[FILTER])
    assert [] == role_filter(cg.variables)

    # Testing filtering by role using each_role flag
    role_filter = VariableFilter(roles=[PARAMETER, BIAS])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True)
    assert not parameters == role_filter(cg.variables)
    assert bias == role_filter(cg.variables)

    # Testing filtering by bricks classes
    brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by bricks instances
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by brick instance
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by name
    name_filter = VariableFilter(name='W_norm')
    assert [cg.variables[2]] == name_filter(cg.variables)

    # Testing filtering by name regex
    name_filter_regex = VariableFilter(name_regex='W_no.?m')
    assert [cg.variables[2]] == name_filter_regex(cg.variables)

    # Testing filtering by application
    appli_filter = VariableFilter(applications=[brick1.apply])
    variables = [cg.variables[1], cg.variables[8]]
    assert variables == appli_filter(cg.variables)

    # Testing filtering by application
    appli_filter_list = VariableFilter(applications=[brick1.apply])
    assert variables == appli_filter_list(cg.variables)
Ejemplo n.º 2
0
    def __init__(self, ref_data, output_dim):
        input_dim = ref_data.shape[1]

        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name="ref_data")

        # Construct the model
        j = tensor.lvector("j")
        r = ref_data_sh[j, :]
        x = tensor.fmatrix("x")
        y = tensor.ivector("y")

        # input_dim must be nr
        mlp0 = MLP(activations=activation_functions_0, dims=[input_dim] + hidden_dims_0, name="e0")
        mlp0vs = MLP(activations=[None], dims=[hidden_dims_0[-1], input_dim], name="de0")
        mlp1 = MLP(
            activations=activation_functions_1, dims=[hidden_dims_0[-1]] + hidden_dims_1 + [n_inter], name="inter_gen"
        )
        mlp2 = MLP(
            activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name="end_mlp"
        )

        encod = mlp0.apply(r)
        rprime = mlp0vs.apply(encod)
        inter_weights = mlp1.apply(encod)

        ibias = Bias(n_inter)
        ibias.biases_init = Constant(0)
        ibias.initialize()
        inter = inter_act_fun.apply(ibias.apply(tensor.dot(x, inter_weights)))

        final = mlp2.apply(inter)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        error_rate = tensor.neq(y, pred).mean()

        # Initialize parameters
        for brick in [mlp0, mlp0vs, mlp1, mlp2]:
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0.001)
            brick.initialize()

        # apply regularization
        cg = ComputationGraph([cost, error_rate])

        if r_dropout != 0:
            # - dropout on input vector r : r_dropout
            cg = apply_dropout(cg, [r], r_dropout)

        if s_dropout != 0:
            # - dropout on intermediate layers of first mlp : s_dropout
            s_dropout_vars = list(
                set(VariableFilter(bricks=[Tanh], name="output")(ComputationGraph([inter_weights])))
                - set([inter_weights])
            )
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)

        if i_dropout != 0:
            # - dropout on input to second mlp : i_dropout
            cg = apply_dropout(cg, [inter], i_dropout)

        if a_dropout != 0:
            # - dropout on hidden layers of second mlp : a_dropout
            a_dropout_vars = list(
                set(VariableFilter(bricks=[Tanh], name="output")(ComputationGraph([final])))
                - set([inter_weights])
                - set(s_dropout_vars)
            )
            cg = apply_dropout(cg, a_dropout_vars, a_dropout)

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        [cost_reg, error_rate_reg] = cg.outputs

        # add reconstruction penalty for AE part
        penalty_val = tensor.sqrt(((r - rprime) ** 2).sum(axis=1)).mean()
        cost_reg = cost_reg + reconstruction_penalty * penalty_val

        self.cost = cost
        self.cost_reg = cost_reg
        self.error_rate = error_rate
        self.error_rate_reg = error_rate_reg
        self.pred = pred
        self.confidence = confidence
Ejemplo n.º 3
0
def test_variable_filter():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name="linear1")
    brick2 = Bias(2, name="bias1")
    activation = Logistic(name="sigm")

    x = tensor.vector()
    h1 = brick1.apply(x)
    h2 = activation.apply(h1)
    h2.name = "h2act"
    y = brick2.apply(h2)
    cg = ComputationGraph(y)

    parameters = [brick1.W, brick1.b, brick2.parameters[0]]
    bias = [brick1.b, brick2.parameters[0]]
    brick1_bias = [brick1.b]

    # Testing filtering by role
    role_filter = VariableFilter(roles=[PARAMETER])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[FILTER])
    assert [] == role_filter(cg.variables)

    # Testing filtering by role using each_role flag
    role_filter = VariableFilter(roles=[PARAMETER, BIAS])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True)
    assert not parameters == role_filter(cg.variables)
    assert bias == role_filter(cg.variables)

    # Testing filtering by bricks classes
    brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by bricks instances
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by brick instance
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by name
    name_filter = VariableFilter(name="W_norm")
    assert [cg.variables[2]] == name_filter(cg.variables)

    # Testing filtering by name regex
    name_filter_regex = VariableFilter(name_regex="W_no.?m")
    assert [cg.variables[2]] == name_filter_regex(cg.variables)

    # Testing filtering by theano name
    theano_name_filter = VariableFilter(theano_name="h2act")
    assert [cg.variables[11]] == theano_name_filter(cg.variables)

    # Testing filtering by theano name regex
    theano_name_filter_regex = VariableFilter(theano_name_regex="h2a.?t")
    assert [cg.variables[11]] == theano_name_filter_regex(cg.variables)

    # Testing filtering by application
    appli_filter = VariableFilter(applications=[brick1.apply])
    variables = [cg.variables[1], cg.variables[8]]
    assert variables == appli_filter(cg.variables)

    # Testing filtering by application
    appli_filter_list = VariableFilter(applications=[brick1.apply])
    assert variables == appli_filter_list(cg.variables)

    input1 = tensor.matrix("input1")
    input2 = tensor.matrix("input2")
    merge = Merge(["input1", "input2"], [5, 6], 2)
    merged = merge.apply(input1, input2)
    merge_cg = ComputationGraph(merged)
    outputs = VariableFilter(roles=[OUTPUT], bricks=[merge])(merge_cg.variables)
    assert merged in outputs
    assert len(outputs) == 3

    outputs_application = VariableFilter(roles=[OUTPUT], applications=[merge.apply])(merge_cg.variables)
    assert outputs_application == [merged]
Ejemplo n.º 4
0
    def __init__(self, ref_data, output_dim):
        input_dim = ref_data.shape[1]
        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data')

        rng = RandomStreams()

        ae_bricks = []
        ae_input = ref_data_sh
        ae_costs = []
        for i, (idim, odim) in enumerate(zip([input_dim] + ae_dims[:-1], ae_dims)):
            ae_mlp = MLP(activations=[ae_activations[i]],
                         dims=[idim, odim],
                         name='enc%i'%i)
            enc = ae_mlp.apply(ae_input)
            enc_n = ae_mlp.apply(ae_input + rng.normal(size=ae_input.shape, std=ae_f_noise_std))
            ae_mlp_dec = MLP(activations=[ae_activations[i]],
                             dims=[odim, idim],
                             name='dec%i'%i)
            dec = ae_mlp_dec.apply(enc_n)

            cost = tensor.sqrt(((ae_input - dec) ** 2).sum(axis=1)).mean() + \
                        ae_l1_pen * abs(enc).sum(axis=1).mean()
            ae_costs.append(cost)

            ae_input = enc
            ae_bricks = ae_bricks + [ae_mlp, ae_mlp_dec]

        self.ae_costs = ae_costs

        ref_data_enc = ae_input

        # Construct the model
        j = tensor.lvector('j')
        r = ref_data_enc[j, :]
        x = tensor.fmatrix('x')
        y = tensor.ivector('y')

        # input_dim must be nr
        mlp = MLP(activations=activation_functions,
                  dims=[ae_dims[-1]] + hidden_dims + [n_inter], name='inter_gen')
        mlp2 = MLP(activations=activation_functions_2 + [None],
                   dims=[n_inter] + hidden_dims_2 + [output_dim],
                   name='end_mlp')

        inter_weights = mlp.apply(r)

        if inter_bias == None:
            ibias = Bias(n_inter)
            ibias.biases_init = Constant(0)
            ibias.initialize()
            inter = ibias.apply(tensor.dot(x, inter_weights))
        else:
            inter = tensor.dot(x, inter_weights) - inter_bias
        inter = inter_act_fun.apply(inter)

        final = mlp2.apply(inter)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        # error_rate = tensor.neq(y, pred).mean()
        ber = balanced_error_rate.ber(y, pred)

        # Initialize parameters
        for brick in ae_bricks + [mlp, mlp2]:
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0.001)
            brick.initialize()

        # apply regularization
        cg = ComputationGraph([cost, ber])

        if r_dropout != 0:
            # - dropout on input vector r : r_dropout
            cg = apply_dropout(cg, [r], r_dropout)

        if x_dropout != 0:
            cg = apply_dropout(cg, [x], x_dropout)

        if s_dropout != 0:
            # - dropout on intermediate layers of first mlp : s_dropout
            s_dropout_vars = list(set(VariableFilter(bricks=[Tanh], name='output')
                                                     (ComputationGraph([inter_weights])))
                                 - set([inter_weights]))
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)

        if i_dropout != 0:
            # - dropout on input to second mlp : i_dropout
            cg = apply_dropout(cg, [inter], i_dropout)

        if a_dropout != 0:
            # - dropout on hidden layers of second mlp : a_dropout
            a_dropout_vars = list(set(VariableFilter(bricks=[Tanh], name='output')
                                                     (ComputationGraph([final])))
                                 - set([inter_weights]) - set(s_dropout_vars))
            cg = apply_dropout(cg, a_dropout_vars, a_dropout)

        if r_noise_std != 0:
            cg = apply_noise(cg, [r], r_noise_std)

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        [cost_reg, ber_reg] = cg.outputs
        
        if s_l1pen != 0:
            s_weights = VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg)
            cost_reg = cost_reg + s_l1pen * sum(abs(w).sum() for w in s_weights)
        if i_l1pen != 0:
            cost_reg = cost_reg + i_l1pen * abs(inter).sum()
        if a_l1pen != 0:
            a_weights = VariableFilter(bricks=mlp2.linear_transformations, roles=[WEIGHT])(cg)
            cost_reg = cost_reg + a_l1pen * sum(abs(w).sum() for w in a_weights)


        self.cost = cost
        self.cost_reg = cost_reg
        self.ber = ber
        self.ber_reg = ber_reg
        self.pred = pred
        self.confidence = confidence
Ejemplo n.º 5
0
def test_variable_filter():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name='linear1')
    brick2 = Bias(2, name='bias1')
    activation = Logistic(name='sigm')

    x = tensor.vector()
    h1 = brick1.apply(x, call_id='brick1_call_id')
    h2 = activation.apply(h1, call_id='act')
    h2.name = "h2act"
    y = brick2.apply(h2)
    cg = ComputationGraph(y)

    parameters = [brick1.W, brick1.b, brick2.parameters[0]]
    bias = [brick1.b, brick2.parameters[0]]
    brick1_bias = [brick1.b]

    # Testing filtering by role
    role_filter = VariableFilter(roles=[PARAMETER])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[FILTER])
    assert [] == role_filter(cg.variables)

    # Testing filtering by role using each_role flag
    role_filter = VariableFilter(roles=[PARAMETER, BIAS])
    assert parameters == role_filter(cg.variables)
    role_filter = VariableFilter(roles=[PARAMETER, BIAS], each_role=True)
    assert not parameters == role_filter(cg.variables)
    assert bias == role_filter(cg.variables)

    # Testing filtering by bricks classes
    brick_filter = VariableFilter(roles=[BIAS], bricks=[Linear])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by bricks instances
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by brick instance
    brick_filter = VariableFilter(roles=[BIAS], bricks=[brick1])
    assert brick1_bias == brick_filter(cg.variables)

    # Testing filtering by name
    name_filter = VariableFilter(name='W_norm')
    assert [cg.variables[2]] == name_filter(cg.variables)

    # Testing filtering by name regex
    name_filter_regex = VariableFilter(name_regex='W_no.?m')
    assert [cg.variables[2]] == name_filter_regex(cg.variables)

    # Testing filtering by theano name
    theano_name_filter = VariableFilter(theano_name='h2act')
    assert [cg.variables[11]] == theano_name_filter(cg.variables)

    # Testing filtering by theano name regex
    theano_name_filter_regex = VariableFilter(theano_name_regex='h2a.?t')
    assert [cg.variables[11]] == theano_name_filter_regex(cg.variables)

    brick1_apply_variables = [cg.variables[1], cg.variables[8]]
    # Testing filtering by application
    appli_filter = VariableFilter(applications=[brick1.apply])
    assert brick1_apply_variables == appli_filter(cg.variables)

    # Testing filtering by unbound application
    unbound_appli_filter = VariableFilter(applications=[Linear.apply])
    assert brick1_apply_variables == unbound_appli_filter(cg.variables)

    # Testing filtering by call identifier
    call_id_filter = VariableFilter(call_id='brick1_call_id')
    assert brick1_apply_variables == call_id_filter(cg.variables)

    input1 = tensor.matrix('input1')
    input2 = tensor.matrix('input2')
    merge = Merge(['input1', 'input2'], [5, 6], 2)
    merged = merge.apply(input1, input2)
    merge_cg = ComputationGraph(merged)
    outputs = VariableFilter(
        roles=[OUTPUT], bricks=[merge])(merge_cg.variables)
    assert merged in outputs
    assert len(outputs) == 3

    outputs_application = VariableFilter(
        roles=[OUTPUT], applications=[merge.apply])(merge_cg.variables)
    assert outputs_application == [merged]
Ejemplo n.º 6
0
    def __init__(self, ref_data, output_dim):
        input_dim = ref_data.shape[1]

        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32),
                                    name='ref_data')

        # Construct the model
        j = tensor.lvector('j')
        r = ref_data_sh[j, :]
        x = tensor.fmatrix('x')
        y = tensor.ivector('y')

        # input_dim must be nr
        mlp0 = MLP(activations=activation_functions_0,
                   dims=[input_dim] + hidden_dims_0,
                   name='e0')
        mlp0vs = MLP(activations=[None],
                     dims=[hidden_dims_0[-1], input_dim],
                     name='de0')
        mlp1 = MLP(activations=activation_functions_1,
                   dims=[hidden_dims_0[-1]] + hidden_dims_1 + [n_inter],
                   name='inter_gen')
        mlp2 = MLP(activations=activation_functions_2 + [None],
                   dims=[n_inter] + hidden_dims_2 + [output_dim],
                   name='end_mlp')

        encod = mlp0.apply(r)
        rprime = mlp0vs.apply(encod)
        inter_weights = mlp1.apply(encod)

        ibias = Bias(n_inter)
        ibias.biases_init = Constant(0)
        ibias.initialize()
        inter = inter_act_fun.apply(ibias.apply(tensor.dot(x, inter_weights)))

        final = mlp2.apply(inter)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        error_rate = tensor.neq(y, pred).mean()

        # Initialize parameters
        for brick in [mlp0, mlp0vs, mlp1, mlp2]:
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0.001)
            brick.initialize()

        # apply regularization
        cg = ComputationGraph([cost, error_rate])

        if r_dropout != 0:
            # - dropout on input vector r : r_dropout
            cg = apply_dropout(cg, [r], r_dropout)

        if s_dropout != 0:
            # - dropout on intermediate layers of first mlp : s_dropout
            s_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([inter_weights]))) -
                set([inter_weights]))
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)

        if i_dropout != 0:
            # - dropout on input to second mlp : i_dropout
            cg = apply_dropout(cg, [inter], i_dropout)

        if a_dropout != 0:
            # - dropout on hidden layers of second mlp : a_dropout
            a_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([final]))) - set([inter_weights]) -
                set(s_dropout_vars))
            cg = apply_dropout(cg, a_dropout_vars, a_dropout)

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        [cost_reg, error_rate_reg] = cg.outputs

        # add reconstruction penalty for AE part
        penalty_val = tensor.sqrt(((r - rprime)**2).sum(axis=1)).mean()
        cost_reg = cost_reg + reconstruction_penalty * penalty_val

        self.cost = cost
        self.cost_reg = cost_reg
        self.error_rate = error_rate
        self.error_rate_reg = error_rate_reg
        self.pred = pred
        self.confidence = confidence
Ejemplo n.º 7
0
    def __init__(self, ref_data, output_dim):
        input_dim = ref_data.shape[1]
        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32),
                                    name='ref_data')

        rng = RandomStreams()

        ae_bricks = []
        ae_input = ref_data_sh
        ae_costs = []
        for i, (idim,
                odim) in enumerate(zip([input_dim] + ae_dims[:-1], ae_dims)):
            ae_mlp = MLP(activations=[ae_activations[i]],
                         dims=[idim, odim],
                         name='enc%i' % i)
            enc = ae_mlp.apply(ae_input)
            enc_n = ae_mlp.apply(
                ae_input + rng.normal(size=ae_input.shape, std=ae_f_noise_std))
            ae_mlp_dec = MLP(activations=[ae_activations[i]],
                             dims=[odim, idim],
                             name='dec%i' % i)
            dec = ae_mlp_dec.apply(enc_n)

            cost = tensor.sqrt(((ae_input - dec) ** 2).sum(axis=1)).mean() + \
                        ae_l1_pen * abs(enc).sum(axis=1).mean()
            ae_costs.append(cost)

            ae_input = enc
            ae_bricks = ae_bricks + [ae_mlp, ae_mlp_dec]

        self.ae_costs = ae_costs

        ref_data_enc = ae_input

        # Construct the model
        j = tensor.lvector('j')
        r = ref_data_enc[j, :]
        x = tensor.fmatrix('x')
        y = tensor.ivector('y')

        # input_dim must be nr
        mlp = MLP(activations=activation_functions,
                  dims=[ae_dims[-1]] + hidden_dims + [n_inter],
                  name='inter_gen')
        mlp2 = MLP(activations=activation_functions_2 + [None],
                   dims=[n_inter] + hidden_dims_2 + [output_dim],
                   name='end_mlp')

        inter_weights = mlp.apply(r)

        if inter_bias == None:
            ibias = Bias(n_inter)
            ibias.biases_init = Constant(0)
            ibias.initialize()
            inter = ibias.apply(tensor.dot(x, inter_weights))
        else:
            inter = tensor.dot(x, inter_weights) - inter_bias
        inter = inter_act_fun.apply(inter)

        final = mlp2.apply(inter)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        # error_rate = tensor.neq(y, pred).mean()
        ber = balanced_error_rate.ber(y, pred)

        # Initialize parameters
        for brick in ae_bricks + [mlp, mlp2]:
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0.001)
            brick.initialize()

        # apply regularization
        cg = ComputationGraph([cost, ber])

        if r_dropout != 0:
            # - dropout on input vector r : r_dropout
            cg = apply_dropout(cg, [r], r_dropout)

        if x_dropout != 0:
            cg = apply_dropout(cg, [x], x_dropout)

        if s_dropout != 0:
            # - dropout on intermediate layers of first mlp : s_dropout
            s_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([inter_weights]))) -
                set([inter_weights]))
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)

        if i_dropout != 0:
            # - dropout on input to second mlp : i_dropout
            cg = apply_dropout(cg, [inter], i_dropout)

        if a_dropout != 0:
            # - dropout on hidden layers of second mlp : a_dropout
            a_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([final]))) - set([inter_weights]) -
                set(s_dropout_vars))
            cg = apply_dropout(cg, a_dropout_vars, a_dropout)

        if r_noise_std != 0:
            cg = apply_noise(cg, [r], r_noise_std)

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        [cost_reg, ber_reg] = cg.outputs

        if s_l1pen != 0:
            s_weights = VariableFilter(bricks=mlp.linear_transformations,
                                       roles=[WEIGHT])(cg)
            cost_reg = cost_reg + s_l1pen * sum(
                abs(w).sum() for w in s_weights)
        if i_l1pen != 0:
            cost_reg = cost_reg + i_l1pen * abs(inter).sum()
        if a_l1pen != 0:
            a_weights = VariableFilter(bricks=mlp2.linear_transformations,
                                       roles=[WEIGHT])(cg)
            cost_reg = cost_reg + a_l1pen * sum(
                abs(w).sum() for w in a_weights)

        self.cost = cost
        self.cost_reg = cost_reg
        self.ber = ber
        self.ber_reg = ber_reg
        self.pred = pred
        self.confidence = confidence
Ejemplo n.º 8
0
    def __init__(self, ref_data, output_dim):
        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32),
                                    name='ref_data')

        # Construct the model
        j = tensor.lvector('j')
        x = tensor.fmatrix('x')
        y = tensor.ivector('y')

        last_outputs = []
        s_dropout_vars = []
        r_dropout_vars = []
        i_dropout_vars = []
        penalties = []

        for i in range(nparts):
            fs = numpy.random.binomial(1,
                                       part_r_proba,
                                       size=(ref_data.shape[1], ))
            input_dim = int(fs.sum())

            fs_sh = theano.shared(fs)
            r = ref_data_sh[j, :][:, fs_sh.nonzero()[0]]

            mlp0 = MLP(activations=activation_functions_0,
                       dims=[input_dim] + hidden_dims_0,
                       name='enc%d' % i)
            mlp0r = MLP(activations=[None],
                        dims=[hidden_dims_0[-1], input_dim],
                        name='dec%d' % i)
            mlp1 = MLP(activations=activation_functions_1,
                       dims=[hidden_dims_0[-1]] + hidden_dims_1 + [n_inter],
                       name='inter_gen_%d' % i)
            mlp2 = MLP(activations=activation_functions_2 + [None],
                       dims=[n_inter] + hidden_dims_2 + [output_dim],
                       name='end_mlp_%d' % i)

            encod = mlp0.apply(r)
            rprime = mlp0r.apply(encod)
            inter_weights = mlp1.apply(encod)

            ibias = Bias(n_inter, name='inter_bias_%d' % i)
            inter = ibias.apply(tensor.dot(x, inter_weights))
            inter = inter_act_fun.apply(inter)

            out = mlp2.apply(inter)

            penalties.append(
                tensor.sqrt(((rprime - r)**2).sum(axis=1)).mean()[None])

            last_outputs.append(out)

            r_dropout_vars.append(r)
            s_dropout_vars = s_dropout_vars + (VariableFilter(
                bricks=[Tanh], name='output')(ComputationGraph([inter_weights
                                                                ])))
            i_dropout_vars.append(inter)

            # Initialize parameters
            for brick in [mlp0, mlp0r, mlp1, mlp2, ibias]:
                brick.weights_init = IsotropicGaussian(0.01)
                brick.biases_init = Constant(0.001)
                brick.initialize()

        final = tensor.concatenate([x[:, :, None] for x in last_outputs],
                                   axis=2).mean(axis=2)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        error_rate = tensor.neq(y, pred).mean()

        # apply regularization
        cg = ComputationGraph([cost, error_rate])

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        if s_dropout != 0:
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)
        if r_dropout != 0:
            cg = apply_dropout(cg, r_dropout_vars, r_dropout)
        if i_dropout != 0:
            cg = apply_dropout(cg, i_dropout_vars, i_dropout)

        [cost_reg, error_rate_reg] = cg.outputs

        cost_reg = cost_reg + reconstruction_penalty * tensor.concatenate(
            penalties, axis=0).sum()

        self.cost = cost
        self.cost_reg = cost_reg
        self.error_rate = error_rate
        self.error_rate_reg = error_rate_reg
        self.pred = pred
        self.confidence = confidence
Ejemplo n.º 9
0
    def __init__(self, ref_data, output_dim):
        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data')

        # Construct the model
        j = tensor.lvector('j')
        x = tensor.fmatrix('x')
        y = tensor.ivector('y')

        last_outputs = []
        s_dropout_vars = []
        r_dropout_vars = []
        i_dropout_vars = []
        penalties = []

        for i in range(nparts):
            fs = numpy.random.binomial(1, part_r_proba, size=(ref_data.shape[1],))
            input_dim = int(fs.sum())

            fs_sh = theano.shared(fs)
            r = ref_data_sh[j, :][:, fs_sh.nonzero()[0]]

            mlp0 = MLP(activations=activation_functions_0,
                      dims=[input_dim] + hidden_dims_0, name='enc%d'%i)
            mlp0r = MLP(activations=[None], dims=[hidden_dims_0[-1], input_dim], name='dec%d'%i)
            mlp1 = MLP(activations=activation_functions_1,
                      dims=[hidden_dims_0[-1]] + hidden_dims_1 + [n_inter], name='inter_gen_%d'%i)
            mlp2 = MLP(activations=activation_functions_2 + [None],
                       dims=[n_inter] + hidden_dims_2 + [output_dim],
                       name='end_mlp_%d'%i)

            encod = mlp0.apply(r)
            rprime = mlp0r.apply(encod)
            inter_weights = mlp1.apply(encod)

            ibias = Bias(n_inter, name='inter_bias_%d'%i)
            inter = ibias.apply(tensor.dot(x, inter_weights))
            inter = inter_act_fun.apply(inter)

            out = mlp2.apply(inter)

            penalties.append(tensor.sqrt(((rprime - r)**2).sum(axis=1)).mean()[None])

            last_outputs.append(out)

            r_dropout_vars.append(r)
            s_dropout_vars = s_dropout_vars + (
                                    VariableFilter(bricks=[Tanh], name='output')
                                                  (ComputationGraph([inter_weights]))
                            )
            i_dropout_vars.append(inter)

            # Initialize parameters
            for brick in [mlp0, mlp0r, mlp1, mlp2, ibias]:
                brick.weights_init = IsotropicGaussian(0.01)
                brick.biases_init = Constant(0.001)
                brick.initialize()

        final = tensor.concatenate([x[:, :, None] for x in last_outputs], axis=2).mean(axis=2)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        error_rate = tensor.neq(y, pred).mean()

        # apply regularization
        cg = ComputationGraph([cost, error_rate])

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        if s_dropout != 0:
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)
        if r_dropout != 0:
            cg = apply_dropout(cg, r_dropout_vars, r_dropout)
        if i_dropout != 0:
            cg = apply_dropout(cg, i_dropout_vars, i_dropout)

        [cost_reg, error_rate_reg] = cg.outputs

        cost_reg = cost_reg + reconstruction_penalty * tensor.concatenate(penalties, axis=0).sum()

        self.cost = cost
        self.cost_reg = cost_reg
        self.error_rate = error_rate
        self.error_rate_reg = error_rate_reg
        self.pred = pred
        self.confidence = confidence
Ejemplo n.º 10
0
    def __init__(self, ref_data, output_dim):
        if pca_dims is not None:
            covmat = numpy.dot(ref_data.T, ref_data)
            ev, evec = numpy.linalg.eig(covmat)
            best_i = ev.argsort()[-pca_dims:]
            best_evecs = evec[:, best_i]
            best_evecs = best_evecs / numpy.sqrt(
                (best_evecs**2).sum(axis=0))  #normalize
            ref_data = numpy.dot(ref_data, best_evecs)

        input_dim = ref_data.shape[1]

        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32),
                                    name='ref_data')

        # Construct the model
        j = tensor.lvector('j')
        r = ref_data_sh[j, :]
        x = tensor.fmatrix('x')
        y = tensor.ivector('y')

        # input_dim must be nr
        mlp = MLP(activations=activation_functions,
                  dims=[input_dim] + hidden_dims + [n_inter],
                  name='inter_gen')
        mlp2 = MLP(activations=activation_functions_2 + [None],
                   dims=[n_inter] + hidden_dims_2 + [output_dim],
                   name='end_mlp')

        inter_weights = mlp.apply(r)

        if inter_bias == None:
            ibias = Bias(n_inter)
            ibias.biases_init = Constant(0)
            ibias.initialize()
            inter = ibias.apply(tensor.dot(x, inter_weights))
        else:
            inter = tensor.dot(x, inter_weights) - inter_bias
        inter = inter_act_fun.apply(inter)

        final = mlp2.apply(inter)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        # error_rate = tensor.neq(y, pred).mean()
        ber = balanced_error_rate.ber(y, pred)

        # Initialize parameters
        for brick in [mlp, mlp2]:
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0.001)
            brick.initialize()

        # apply regularization
        cg = ComputationGraph([cost, ber])

        if r_dropout != 0:
            # - dropout on input vector r : r_dropout
            cg = apply_dropout(cg, [r], r_dropout)

        if x_dropout != 0:
            cg = apply_dropout(cg, [x], x_dropout)

        if s_dropout != 0:
            # - dropout on intermediate layers of first mlp : s_dropout
            s_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([inter_weights]))) -
                set([inter_weights]))
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)

        if i_dropout != 0:
            # - dropout on input to second mlp : i_dropout
            cg = apply_dropout(cg, [inter], i_dropout)

        if a_dropout != 0:
            # - dropout on hidden layers of second mlp : a_dropout
            a_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([final]))) - set([inter_weights]) -
                set(s_dropout_vars))
            cg = apply_dropout(cg, a_dropout_vars, a_dropout)

        if r_noise_std != 0:
            cg = apply_noise(cg, [r], r_noise_std)

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        [cost_reg, ber_reg] = cg.outputs

        if s_l1pen != 0:
            s_weights = VariableFilter(bricks=mlp.linear_transformations,
                                       roles=[WEIGHT])(cg)
            cost_reg = cost_reg + s_l1pen * sum(
                abs(w).sum() for w in s_weights)
        if i_l1pen != 0:
            cost_reg = cost_reg + i_l1pen * abs(inter).sum()
        if a_l1pen != 0:
            a_weights = VariableFilter(bricks=mlp2.linear_transformations,
                                       roles=[WEIGHT])(cg)
            cost_reg = cost_reg + a_l1pen * sum(
                abs(w).sum() for w in a_weights)

        self.cost = cost
        self.cost_reg = cost_reg
        self.ber = ber
        self.ber_reg = ber_reg
        self.pred = pred
        self.confidence = confidence