def __init__(self, shape, transfers=None, optimizer=None, error_func=None, input_active_probability=0.8, hidden_active_probability=0.5): if optimizer is None: # Don't use BFGS for Dropout # BFGS cannot effectively approximate hessian when problem # is constantly changing optimizer = SteepestDescent() super(DropoutMLP, self).__init__(shape, transfers, optimizer, error_func) # Dropout hyperparams self._inp_act_prob = input_active_probability self._hid_act_prob = hidden_active_probability # We modify transfers to disable hidden neurons # To disable inputs, we need a transfer for the input vector # To re-enable hidden neurons, we need to remember the original transfers self._input_transfer = LinearTransfer() self._real_transfers = self._transfers # We perform the post-training procedure on the first activation after training self._during_training = False self._did_post_training = True
def __init__(self, shape, transfers=None, optimizer=None, error_func=None): super(MLP, self).__init__() if transfers is None: transfers = [ReluTransfer() for _ in range((len(shape) - 2)) ] + [LinearTransfer()] elif isinstance(transfers, Transfer): # Treat single given transfer as output transfer transfers = [ReluTransfer() for _ in range((len(shape) - 2))] + [transfers] if len(transfers) != len(shape) - 1: raise ValueError( 'Must have exactly 1 transfer between each pair of layers, and after the output' ) self._shape = shape self._weight_matrices = [] self._setup_weight_matrices() self._transfers = transfers # Parameter optimization for training if optimizer is None: # If there are a lot of weights, use an optimizer that doesn't use hessian # TODO (maybe): Default optimizer should work with mini-batches (be robust to changing problem) # optimizers like BFGS, and initial step strategies like FO and quadratic, rely heavily on information from # previous iterations, resulting in poor performance if the problem changes between iterations. # NOTE: Ideally, the Optimizer itself should handle its problem changing. # Count number of weights if sum([ reduce(operator.mul, weight_matrix.shape) for weight_matrix in self._weight_matrices ]) > 2500: # NOTE: Cutoff value could use more testing # Too many weights, don't use hessian optimizer = SteepestDescent() else: # Low enough weights, use hessian optimizer = BFGS() self._optimizer = optimizer # Error function for training if error_func is None: error_func = MSE() self._error_func = error_func # Setup activation vectors # 1 for input, then 2 for each hidden and output (1 for transfer, 1 for perceptron)) # +1 for biases self._weight_inputs = [numpy.ones(shape[0] + 1)] self._transfer_inputs = [] for size in shape[1:]: self._weight_inputs.append(numpy.ones(size + 1)) self._transfer_inputs.append(numpy.zeros(size)) self.reset()
def __init__(self, attributes, num_clusters, num_outputs, optimizer=None, error_func=None, variance=None, scale_by_similarity=True, pre_train_clusters=False, move_rate=0.1, neighborhood=2, neighbor_move_rate=1.0): super(RBF, self).__init__() # Clustering algorithm self._pre_train_clusters = pre_train_clusters self._som = SOM(attributes, num_clusters, move_rate=move_rate, neighborhood=neighborhood, neighbor_move_rate=neighbor_move_rate) # Variance for gaussian if variance is None: variance = 4.0 / num_clusters self._variance = variance # Weight matrix for output self._weight_matrix = self._random_weight_matrix( (num_clusters, num_outputs)) # Optimizer to optimize weight_matrix if optimizer is None: # If there are a lot of weights, use an optimizer that doesn't use hessian # TODO (maybe): Default optimizer should work with mini-batches (be robust to changing problem) # optimizers like BFGS, and initial step strategies like FO and quadratic, rely heavily on information from # previous iterations, resulting in poor performance if the problem changes between iterations. # NOTE: Ideally, the Optimizer itself should handle its problem changing. # Count number of weights # NOTE: Cutoff value could use more testing if reduce(operator.mul, self._weight_matrix.shape) > 2500: # Too many weights, don't use hessian optimizer = SteepestDescent() else: # Low enough weights, use hessian optimizer = BFGS() self._optimizer = optimizer # Error function for training if error_func is None: error_func = MSE() self._error_func = error_func # Optional scaling output by total gaussian similarity self._scale_by_similarity = scale_by_similarity # For training self._similarities = None self._total_similarity = None
def test_steepest_descent_wolfe_line_search(): check_optimize_sphere_function( SteepestDescent(step_size_getter=WolfeLineSearch()))
def test_steepest_descent_backtracking_line_search(): check_optimize_sphere_function( SteepestDescent(step_size_getter=BacktrackingLineSearch()))