def __init__(self, filename, batch_size=20, learning_rate=.001, weight_penalty=0.0, model_type='classification'): '''Initialize the class by loading the required datasets and building the tensorlfow computation graph. Args: filename: a file containing the data. batch_size: number of training examples in each training batch. learning_rate: the initial learning rate used in stochastic gradient descent. weight_penalty: the coefficient of the L2 weight regularization applied to the loss function. Set to > 0.0 to apply weight regularization, 0.0 to remove. model_type: the type of regression. Either 'classification' in which case the model is a Logistic Regression classifier, or 'regression', in which it is a linear regression model. ''' # Save the hyperparameters self.batch_size = batch_size self.learning_rate = learning_rate self.weight_penalty = weight_penalty # Optimization function used to train the model. # It's widely accepted that the Adam optimizer rules --- # I mean, is state-of-the-art --- but you could choose # others, like tf.train.AdagradOptimizer, or even # tf.train.GradientDescentOptimizer. self.optimizer = tf.train.AdamOptimizer # Logistics self.model_type = model_type self.output_every_nth = 10 # save performance every n steps # Extract the data from the filename self.data_loader = data_funcs.DataLoader(filename) self.input_size = self.data_loader.get_feature_size() if model_type == 'classification': print "\nPerforming classification." self.output_size = 1 # limited to binary classification self.metric_name = 'accuracy' else: print "\nPerforming regression." self.output_size = self.data_loader.num_outputs self.metric_name = 'RMSE' # Set up tensorflow computation graph. self.graph = tf.Graph() self.build_graph() # Set up and initialize tensorflow session. self.session = tf.Session(graph=self.graph) self.session.run(self.init) # Use for plotting evaluation. self.train_metrics = [] self.val_metrics = []
def __init__(self, filename, C=1.0, kernel='linear', gamma=.01, poly_degree=3, max_iter=-1, tolerance=0.001): """Initialize the class by loading the required data and setting the parameters Args: filename: a file containing the data. C: a float for the soft-margin SVM misclassification penalty. kernel: the type of kernel to use. Can be 'linear', 'rbf', or 'poly'. gamma: a float kernel parameter. poly_degree: the degree of the polynomial used in the 'poly' kernel. max_iter: the maximum number of iterations to run when training. tolerance: a float epsilon value. If the loss function changes by only tolerance or less, the funtion will stop training. """ # Load the data. self.data_loader = data_funcs.DataLoader(filename) # Set the parameters. self.C = C self.gamma = gamma self.kernel = kernel self.poly_degree = poly_degree self.max_iter = max_iter self.tolerance = tolerance self.classifier = None
def load_data(self): """Initialize's the classes data_loader object, which takes care of loading data from a file.""" self.data_loader = data_funcs.DataLoader( self.datasets_path + self.filename, normalize_and_fill=self.normalize_and_fill, cross_validation=self.cross_validation, normalization=self.normalization)
def load_data(self): """Use the DataLoader class to load unsupervised and supervised data from files for the MMAE and classification portions of the network, respectively. """ self.data_loader = data_funcs.DataLoader( self.datasets_path + self.mmae_filename, normalize_and_fill=False, supervised=False, cross_validation=True, separate_noisy_data=self.check_noisy_data) self.classification_data_loader = data_funcs.DataLoader( self.datasets_path + self.classification_filename, normalize_and_fill=False, cross_validation=True, supervised=True, separate_noisy_data=self.check_noisy_data, wanted_label=self.wanted_label)
def load_data(self): """Loads data from csv files using the DataLoader class.""" self.data_loader = data_funcs.DataLoader( self.datasets_path + self.filename, normalize_and_fill=False, supervised=False, #cross_validation=True, normalization=self.normalization, fill_missing_with=self.fill_missing) # Loads additional classification data self.classification_data_loader = data_funcs.DataLoader( self.datasets_path + self.classification_filename, normalize_and_fill=False, supervised=True, #cross_validation=True, normalization=self.normalization, fill_missing_with=self.fill_missing, separate_noisy_data=True)
def load_data(self): """Loads data from csv files using the DataLoader class. Must change labels to be {-1,1}.""" self.data_loader = data_funcs.DataLoader( self.datasets_path + self.filename, normalize_and_fill=self.normalize_and_fill, cross_validation=True, supervised=True, wanted_label=self.wanted_label, normalization=self.normalization, labels_to_sign=True, separate_noisy_data=self.check_noisy_data)
def load_data(self): """Initializes the data loader object of the class. Specific to classification because the data loader must load supervised data, based on the wanted class label, and possibly separate noisy data.""" self.data_loader = data_funcs.DataLoader( self.datasets_path + self.filename, normalize_and_fill=self.normalize_and_fill, cross_validation=self.cross_validation, supervised=True, wanted_label=self.wanted_label, normalization=self.normalization, separate_noisy_data=self.check_noisy_data)
def __init__(self, filename=None, layer_sizes=[128, 64], batch_size=20, learning_rate=.001, dropout_prob=1.0, weight_penalty=0.0, model_name='NN', clip_gradients=True, data_loader=None, checkpoint_dir=DEFAULT_MAIN_DIRECTORY + 'temp_saved_models/', verbose=True): '''Initialize the class by loading the required datasets and building the graph. Args: filename: a file containing the data. layer_sizes: a list of sizes of the neural network layers. batch_size: number of training examples in each training batch. learning_rate: the initial learning rate used in stochastic gradient descent. dropout_prob: the probability that a node in the network will not be dropped out during training. Set to < 1.0 to apply dropout, 1.0 to remove dropout. weight_penalty: the coefficient of the L2 weight regularization applied to the loss function. Set to > 0.0 to apply weight regularization, 0.0 to remove. model_name: name of the model being trained. Used in saving model checkpoints. clip_gradients: a bool indicating whether or not to clip gradients. This is effective in preventing very large gradients from skewing training, and preventing your loss from going to inf or nan. data_loader: A DataLoader class object which already has pre-loaded data. checkpoint_dir: the directly where the model will save checkpoints, saved files containing trained network weights. verbose: if True, will print many informative output statements. ''' # Hyperparameters that should be tuned self.layer_sizes = layer_sizes self.batch_size = batch_size self.learning_rate = learning_rate self.dropout_prob = dropout_prob self.weight_penalty = weight_penalty # Hyperparameters that could be tuned # (but are probably the best to use) self.clip_gradients = clip_gradients self.activation_func = 'relu' self.optimizer = tf.train.AdamOptimizer # Logistics self.checkpoint_dir = checkpoint_dir self.filename = filename self.model_name = model_name self.output_every_nth = 100 self.verbose = verbose # Extract the data from the filename if data_loader is None: self.data_loader = data_funcs.DataLoader(filename) else: self.data_loader = data_loader self.input_size = self.data_loader.get_feature_size() self.output_size = self.data_loader.num_labels if self.verbose: print "Input dimensions (number of features):", self.input_size print "Number of classes/outputs:", self.output_size # Set up tensorflow computation graph. self.graph = tf.Graph() self.build_graph() # Set up and initialize tensorflow session. self.session = tf.Session(graph=self.graph) self.session.run(self.init) # Use for plotting evaluation. self.train_acc = [] self.val_acc = []
def __init__(self, filename, model_name, layer_sizes=[128,64, 32], batch_size=25, learning_rate=.01, dropout_prob=0.9, weight_penalty=0.01, clip_gradients=True, model_type='regression', checkpoint_dir='./saved_models/'): '''Initialize the class by loading the required datasets and building the graph. Args: filename: a file containing the data. model_name: name of the model being trained. Used in saving model checkpoints. layer_sizes: a list of sizes of the neural network layers. batch_size: number of training examples in each training batch. learning_rate: the initial learning rate used in stochastic gradient descent. dropout_prob: the probability that a node in the network will not be dropped out during training. Set to < 1.0 to apply dropout, 1.0 to remove dropout. weight_penalty: the coefficient of the L2 weight regularization applied to the loss function. Set to > 0.0 to apply weight regularization, 0.0 to remove. clip_gradients: a bool indicating whether or not to clip gradients. This is effective in preventing very large gradients from skewing training, and preventing your loss from going to inf or nan. model_type: the type of output prediction. Either 'classification' or 'regression'. checkpoint_dir: the directly where the model will save checkpoints, saved files containing trained network weights. ''' # Hyperparameters that should be tuned self.task_weights = [] self.task_bias = [] self.layer_sizes = layer_sizes self.batch_size = batch_size self.learning_rate = learning_rate self.dropout_prob = dropout_prob self.weight_penalty = weight_penalty # Hyperparameters that could be tuned # (but are probably the best to use) self.clip_gradients = clip_gradients self.activation_func = 'relu' self.optimizer = tf.train.AdamOptimizer # Logistics self.checkpoint_dir = checkpoint_dir self.filename = filename self.model_name = model_name self.model_type = model_type self.output_every_nth = 10 # Extract the data from the filename self.data_loader = data_funcs.DataLoader(filename) print dir(self.data_loader) self.input_size = self.data_loader.get_feature_size() if model_type == 'classification': print "\nPerforming classification." self.output_size = self.data_loader.num_classes self.metric_name = 'accuracy' else: print "\nPerforming regression." self.output_size = self.data_loader.num_outputs self.metric_name = 'RMSE' print "Input dimensions (number of features):", self.input_size print "Number of classes/outputs:", self.output_size # Set up tensorflow computation graph. self.graph = tf.Graph() self.build_graph() # Set up and initialize tensorflow session. self.session = tf.Session(graph=self.graph) self.session.run(self.init) # Use for plotting evaluation. self.train_metrics = [] self.val_metrics = [] self.validation_scores = []