def __init__(self, ref_data, output_dim): input_dim = ref_data.shape[1] ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name="ref_data") # Construct the model j = tensor.lvector("j") r = ref_data_sh[j, :] x = tensor.fmatrix("x") y = tensor.ivector("y") # input_dim must be nr mlp0 = MLP(activations=activation_functions_0, dims=[input_dim] + hidden_dims_0, name="e0") mlp0vs = MLP(activations=[None], dims=[hidden_dims_0[-1], input_dim], name="de0") mlp1 = MLP( activations=activation_functions_1, dims=[hidden_dims_0[-1]] + hidden_dims_1 + [n_inter], name="inter_gen" ) mlp2 = MLP( activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name="end_mlp" ) encod = mlp0.apply(r) rprime = mlp0vs.apply(encod) inter_weights = mlp1.apply(encod) ibias = Bias(n_inter) ibias.biases_init = Constant(0) ibias.initialize() inter = inter_act_fun.apply(ibias.apply(tensor.dot(x, inter_weights))) final = mlp2.apply(inter) cost = Softmax().categorical_cross_entropy(y, final) confidence = Softmax().apply(final) pred = final.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in [mlp0, mlp0vs, mlp1, mlp2]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply regularization cg = ComputationGraph([cost, error_rate]) if r_dropout != 0: # - dropout on input vector r : r_dropout cg = apply_dropout(cg, [r], r_dropout) if s_dropout != 0: # - dropout on intermediate layers of first mlp : s_dropout s_dropout_vars = list( set(VariableFilter(bricks=[Tanh], name="output")(ComputationGraph([inter_weights]))) - set([inter_weights]) ) cg = apply_dropout(cg, s_dropout_vars, s_dropout) if i_dropout != 0: # - dropout on input to second mlp : i_dropout cg = apply_dropout(cg, [inter], i_dropout) if a_dropout != 0: # - dropout on hidden layers of second mlp : a_dropout a_dropout_vars = list( set(VariableFilter(bricks=[Tanh], name="output")(ComputationGraph([final]))) - set([inter_weights]) - set(s_dropout_vars) ) cg = apply_dropout(cg, a_dropout_vars, a_dropout) if w_noise_std != 0: # - apply noise on weight variables weight_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, weight_vars, w_noise_std) [cost_reg, error_rate_reg] = cg.outputs # add reconstruction penalty for AE part penalty_val = tensor.sqrt(((r - rprime) ** 2).sum(axis=1)).mean() cost_reg = cost_reg + reconstruction_penalty * penalty_val self.cost = cost self.cost_reg = cost_reg self.error_rate = error_rate self.error_rate_reg = error_rate_reg self.pred = pred self.confidence = confidence
def __init__(self, ref_data, output_dim): input_dim = ref_data.shape[1] ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data') rng = RandomStreams() ae_bricks = [] ae_input = ref_data_sh ae_costs = [] for i, (idim, odim) in enumerate(zip([input_dim] + ae_dims[:-1], ae_dims)): ae_mlp = MLP(activations=[ae_activations[i]], dims=[idim, odim], name='enc%i'%i) enc = ae_mlp.apply(ae_input) enc_n = ae_mlp.apply(ae_input + rng.normal(size=ae_input.shape, std=ae_f_noise_std)) ae_mlp_dec = MLP(activations=[ae_activations[i]], dims=[odim, idim], name='dec%i'%i) dec = ae_mlp_dec.apply(enc_n) cost = tensor.sqrt(((ae_input - dec) ** 2).sum(axis=1)).mean() + \ ae_l1_pen * abs(enc).sum(axis=1).mean() ae_costs.append(cost) ae_input = enc ae_bricks = ae_bricks + [ae_mlp, ae_mlp_dec] self.ae_costs = ae_costs ref_data_enc = ae_input # Construct the model j = tensor.lvector('j') r = ref_data_enc[j, :] x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[ae_dims[-1]] + hidden_dims + [n_inter], name='inter_gen') mlp2 = MLP(activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name='end_mlp') inter_weights = mlp.apply(r) if inter_bias == None: ibias = Bias(n_inter) ibias.biases_init = Constant(0) ibias.initialize() inter = ibias.apply(tensor.dot(x, inter_weights)) else: inter = tensor.dot(x, inter_weights) - inter_bias inter = inter_act_fun.apply(inter) final = mlp2.apply(inter) cost = Softmax().categorical_cross_entropy(y, final) confidence = Softmax().apply(final) pred = final.argmax(axis=1) # error_rate = tensor.neq(y, pred).mean() ber = balanced_error_rate.ber(y, pred) # Initialize parameters for brick in ae_bricks + [mlp, mlp2]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply regularization cg = ComputationGraph([cost, ber]) if r_dropout != 0: # - dropout on input vector r : r_dropout cg = apply_dropout(cg, [r], r_dropout) if x_dropout != 0: cg = apply_dropout(cg, [x], x_dropout) if s_dropout != 0: # - dropout on intermediate layers of first mlp : s_dropout s_dropout_vars = list(set(VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([inter_weights]))) - set([inter_weights])) cg = apply_dropout(cg, s_dropout_vars, s_dropout) if i_dropout != 0: # - dropout on input to second mlp : i_dropout cg = apply_dropout(cg, [inter], i_dropout) if a_dropout != 0: # - dropout on hidden layers of second mlp : a_dropout a_dropout_vars = list(set(VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([final]))) - set([inter_weights]) - set(s_dropout_vars)) cg = apply_dropout(cg, a_dropout_vars, a_dropout) if r_noise_std != 0: cg = apply_noise(cg, [r], r_noise_std) if w_noise_std != 0: # - apply noise on weight variables weight_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, weight_vars, w_noise_std) [cost_reg, ber_reg] = cg.outputs if s_l1pen != 0: s_weights = VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + s_l1pen * sum(abs(w).sum() for w in s_weights) if i_l1pen != 0: cost_reg = cost_reg + i_l1pen * abs(inter).sum() if a_l1pen != 0: a_weights = VariableFilter(bricks=mlp2.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + a_l1pen * sum(abs(w).sum() for w in a_weights) self.cost = cost self.cost_reg = cost_reg self.ber = ber self.ber_reg = ber_reg self.pred = pred self.confidence = confidence
def __init__(self, ref_data, output_dim): input_dim = ref_data.shape[1] ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data') rng = RandomStreams() ae_bricks = [] ae_input = ref_data_sh ae_costs = [] for i, (idim, odim) in enumerate(zip([input_dim] + ae_dims[:-1], ae_dims)): ae_mlp = MLP(activations=[ae_activations[i]], dims=[idim, odim], name='enc%i' % i) enc = ae_mlp.apply(ae_input) enc_n = ae_mlp.apply( ae_input + rng.normal(size=ae_input.shape, std=ae_f_noise_std)) ae_mlp_dec = MLP(activations=[ae_activations[i]], dims=[odim, idim], name='dec%i' % i) dec = ae_mlp_dec.apply(enc_n) cost = tensor.sqrt(((ae_input - dec) ** 2).sum(axis=1)).mean() + \ ae_l1_pen * abs(enc).sum(axis=1).mean() ae_costs.append(cost) ae_input = enc ae_bricks = ae_bricks + [ae_mlp, ae_mlp_dec] self.ae_costs = ae_costs ref_data_enc = ae_input # Construct the model j = tensor.lvector('j') r = ref_data_enc[j, :] x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[ae_dims[-1]] + hidden_dims + [n_inter], name='inter_gen') mlp2 = MLP(activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name='end_mlp') inter_weights = mlp.apply(r) if inter_bias == None: ibias = Bias(n_inter) ibias.biases_init = Constant(0) ibias.initialize() inter = ibias.apply(tensor.dot(x, inter_weights)) else: inter = tensor.dot(x, inter_weights) - inter_bias inter = inter_act_fun.apply(inter) final = mlp2.apply(inter) cost = Softmax().categorical_cross_entropy(y, final) confidence = Softmax().apply(final) pred = final.argmax(axis=1) # error_rate = tensor.neq(y, pred).mean() ber = balanced_error_rate.ber(y, pred) # Initialize parameters for brick in ae_bricks + [mlp, mlp2]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply regularization cg = ComputationGraph([cost, ber]) if r_dropout != 0: # - dropout on input vector r : r_dropout cg = apply_dropout(cg, [r], r_dropout) if x_dropout != 0: cg = apply_dropout(cg, [x], x_dropout) if s_dropout != 0: # - dropout on intermediate layers of first mlp : s_dropout s_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([inter_weights]))) - set([inter_weights])) cg = apply_dropout(cg, s_dropout_vars, s_dropout) if i_dropout != 0: # - dropout on input to second mlp : i_dropout cg = apply_dropout(cg, [inter], i_dropout) if a_dropout != 0: # - dropout on hidden layers of second mlp : a_dropout a_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([final]))) - set([inter_weights]) - set(s_dropout_vars)) cg = apply_dropout(cg, a_dropout_vars, a_dropout) if r_noise_std != 0: cg = apply_noise(cg, [r], r_noise_std) if w_noise_std != 0: # - apply noise on weight variables weight_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, weight_vars, w_noise_std) [cost_reg, ber_reg] = cg.outputs if s_l1pen != 0: s_weights = VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + s_l1pen * sum( abs(w).sum() for w in s_weights) if i_l1pen != 0: cost_reg = cost_reg + i_l1pen * abs(inter).sum() if a_l1pen != 0: a_weights = VariableFilter(bricks=mlp2.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + a_l1pen * sum( abs(w).sum() for w in a_weights) self.cost = cost self.cost_reg = cost_reg self.ber = ber self.ber_reg = ber_reg self.pred = pred self.confidence = confidence
def __init__(self, ref_data, output_dim): input_dim = ref_data.shape[1] ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data') # Construct the model j = tensor.lvector('j') r = ref_data_sh[j, :] x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp0 = MLP(activations=activation_functions_0, dims=[input_dim] + hidden_dims_0, name='e0') mlp0vs = MLP(activations=[None], dims=[hidden_dims_0[-1], input_dim], name='de0') mlp1 = MLP(activations=activation_functions_1, dims=[hidden_dims_0[-1]] + hidden_dims_1 + [n_inter], name='inter_gen') mlp2 = MLP(activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name='end_mlp') encod = mlp0.apply(r) rprime = mlp0vs.apply(encod) inter_weights = mlp1.apply(encod) ibias = Bias(n_inter) ibias.biases_init = Constant(0) ibias.initialize() inter = inter_act_fun.apply(ibias.apply(tensor.dot(x, inter_weights))) final = mlp2.apply(inter) cost = Softmax().categorical_cross_entropy(y, final) confidence = Softmax().apply(final) pred = final.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in [mlp0, mlp0vs, mlp1, mlp2]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply regularization cg = ComputationGraph([cost, error_rate]) if r_dropout != 0: # - dropout on input vector r : r_dropout cg = apply_dropout(cg, [r], r_dropout) if s_dropout != 0: # - dropout on intermediate layers of first mlp : s_dropout s_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([inter_weights]))) - set([inter_weights])) cg = apply_dropout(cg, s_dropout_vars, s_dropout) if i_dropout != 0: # - dropout on input to second mlp : i_dropout cg = apply_dropout(cg, [inter], i_dropout) if a_dropout != 0: # - dropout on hidden layers of second mlp : a_dropout a_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([final]))) - set([inter_weights]) - set(s_dropout_vars)) cg = apply_dropout(cg, a_dropout_vars, a_dropout) if w_noise_std != 0: # - apply noise on weight variables weight_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, weight_vars, w_noise_std) [cost_reg, error_rate_reg] = cg.outputs # add reconstruction penalty for AE part penalty_val = tensor.sqrt(((r - rprime)**2).sum(axis=1)).mean() cost_reg = cost_reg + reconstruction_penalty * penalty_val self.cost = cost self.cost_reg = cost_reg self.error_rate = error_rate self.error_rate_reg = error_rate_reg self.pred = pred self.confidence = confidence
def __init__(self, ref_data, output_dim): if pca_dims is not None: covmat = numpy.dot(ref_data.T, ref_data) ev, evec = numpy.linalg.eig(covmat) best_i = ev.argsort()[-pca_dims:] best_evecs = evec[:, best_i] best_evecs = best_evecs / numpy.sqrt( (best_evecs**2).sum(axis=0)) #normalize ref_data = numpy.dot(ref_data, best_evecs) input_dim = ref_data.shape[1] ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data') # Construct the model j = tensor.lvector('j') r = ref_data_sh[j, :] x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[input_dim] + hidden_dims + [n_inter], name='inter_gen') mlp2 = MLP(activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name='end_mlp') inter_weights = mlp.apply(r) if inter_bias == None: ibias = Bias(n_inter) ibias.biases_init = Constant(0) ibias.initialize() inter = ibias.apply(tensor.dot(x, inter_weights)) else: inter = tensor.dot(x, inter_weights) - inter_bias inter = inter_act_fun.apply(inter) final = mlp2.apply(inter) cost = Softmax().categorical_cross_entropy(y, final) confidence = Softmax().apply(final) pred = final.argmax(axis=1) # error_rate = tensor.neq(y, pred).mean() ber = balanced_error_rate.ber(y, pred) # Initialize parameters for brick in [mlp, mlp2]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply regularization cg = ComputationGraph([cost, ber]) if r_dropout != 0: # - dropout on input vector r : r_dropout cg = apply_dropout(cg, [r], r_dropout) if x_dropout != 0: cg = apply_dropout(cg, [x], x_dropout) if s_dropout != 0: # - dropout on intermediate layers of first mlp : s_dropout s_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([inter_weights]))) - set([inter_weights])) cg = apply_dropout(cg, s_dropout_vars, s_dropout) if i_dropout != 0: # - dropout on input to second mlp : i_dropout cg = apply_dropout(cg, [inter], i_dropout) if a_dropout != 0: # - dropout on hidden layers of second mlp : a_dropout a_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([final]))) - set([inter_weights]) - set(s_dropout_vars)) cg = apply_dropout(cg, a_dropout_vars, a_dropout) if r_noise_std != 0: cg = apply_noise(cg, [r], r_noise_std) if w_noise_std != 0: # - apply noise on weight variables weight_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, weight_vars, w_noise_std) [cost_reg, ber_reg] = cg.outputs if s_l1pen != 0: s_weights = VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + s_l1pen * sum( abs(w).sum() for w in s_weights) if i_l1pen != 0: cost_reg = cost_reg + i_l1pen * abs(inter).sum() if a_l1pen != 0: a_weights = VariableFilter(bricks=mlp2.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + a_l1pen * sum( abs(w).sum() for w in a_weights) self.cost = cost self.cost_reg = cost_reg self.ber = ber self.ber_reg = ber_reg self.pred = pred self.confidence = confidence