def __init__(self,
                 file_prefix,
                 users_as_tasks=False,
                 num_cross_folds=DEFAULT_NUM_CROSS_FOLDS,
                 cont=False,
                 results_path=DEFAULT_RESULTS_PATH,
                 figures_path=DEFAULT_FIGURES_PATH,
                 datasets_path=DEFAULT_DATASETS_PATH,
                 test_run=False,
                 max_iters=DEFAULT_MAX_ITERS,
                 val_type=DEFAULT_VALIDATION_TYPE,
                 optimize_labels=None,
                 test_csv_filename=None):
        self.results_path = results_path
        self.figures_path = figures_path
        self.datasets_path = datasets_path
        self.save_prefix = self.getSavePrefix(file_prefix, replace=cont)
        self.cont = cont
        self.max_iters = max_iters
        self.val_type = val_type
        self.users_as_tasks = users_as_tasks
        self.file_prefix = file_prefix
        if test_csv_filename is not None:
            self.test_csv_filename = self.datasets_path + test_csv_filename
        else:
            self.test_csv_filename = None
        self.test_tasks = helper.loadPickledTaskList(datasets_path,
                                                     file_prefix, "Test")
        self.train_tasks = helper.loadPickledTaskList(datasets_path,
                                                      file_prefix, "Train")
        if self.val_type != 'cross':
            self.val_tasks = helper.loadPickledTaskList(
                datasets_path, file_prefix, "Val")
            self.initializeHBLRModel(self.train_tasks)
        else:
            self.classifier = None

        if users_as_tasks:
            self.K = 25
        else:
            self.K = len(self.test_tasks)
        self.n_feats = helper.calculateNumFeatsInTaskList(self.test_tasks)
        self.n_tasks = len(self.test_tasks)

        if optimize_labels is None:
            self.optimize_labels = [
                'tomorrow_Group_Happiness_Evening_Label',
                'tomorrow_Group_Health_Evening_Label',
                'tomorrow_Group_Calmness_Evening_Label'
            ]
        else:
            self.optimize_labels = optimize_labels

        #parameters that can be tuned
        self.tau10s = [10, 1, 0.05, 0.01]
        self.tau20s = [1.0, 0.05, 0.01]
        self.sigma_multipliers = [.01, 0.1, 1]
        self.mu_multipliers = [0.0]

        if test_run:
            print "This is only a testing run. Using cheap settings to make it faster"
            self.K = 2
            self.max_iters = 5
            self.n_tasks = 2
            self.tau10s = [1]
            self.tau20s = [.1]
            self.sigma_multipliers = [.01]
            self.mu_multipliers = [0]

        self.calcNumSettingsDesired()

        #storing the results
        self.time_sum = 0
        if cont:
            self.val_results_df = pd.DataFrame.from_csv(self.results_path +
                                                        self.save_prefix +
                                                        '.csv')
            print '\nPrevious validation results df loaded. It has', len(
                self.val_results_df), "rows"
            self.started_from = len(self.val_results_df)
        else:
            self.val_results_df = pd.DataFrame()
            self.started_from = 0

        self.num_cross_folds = num_cross_folds
        if self.val_type == 'cross':
            helper.generateCrossValPickleFiles(self.datasets_path,
                                               self.file_prefix,
                                               self.num_cross_folds)
    def __init__(self,
                 file_prefix,
                 users_as_tasks,
                 user_clusters=True,
                 eta_filename=None,
                 regularizers=REGULARIZERS,
                 tolerance=.0001,
                 max_iter=100,
                 val_type=VALIDATION_TYPE,
                 c_vals=C_VALS,
                 beta_vals=B_VALS,
                 v_vals=V_VALS,
                 kernels=KERNELS,
                 print_iters=False,
                 optimize_labels=None,
                 cont=False,
                 test_run=False,
                 results_path=DEFAULT_RESULTS_PATH,
                 figures_path=DEFAULT_FIGURES_PATH,
                 datasets_path=DEFAULT_DATASETS_PATH,
                 etas_path=DEFAULT_ETAS_PATH,
                 num_cross_folds=DEFAULT_NUM_CROSS_FOLDS,
                 drop20=False,
                 test_csv_filename=None):
        self.results_path = results_path
        self.figures_path = figures_path
        self.datasets_path = datasets_path
        self.etas_path = etas_path
        self.file_prefix = file_prefix
        self.cont = cont
        self.val_type = val_type
        self.users_as_tasks = users_as_tasks
        self.cluster_users = user_clusters
        self.drop20 = drop20
        if test_csv_filename is not None:
            self.test_csv_filename = self.datasets_path + test_csv_filename
        else:
            self.test_csv_filename = None
        self.save_prefix = self.getSavePrefix(file_prefix, replace=cont)

        self.test_tasks = helper.loadPickledTaskList(datasets_path,
                                                     file_prefix,
                                                     "Test",
                                                     fix_y=True)
        self.train_tasks = helper.loadPickledTaskList(datasets_path,
                                                      file_prefix,
                                                      "Train",
                                                      fix_y=True)
        if self.val_type != 'cross':
            self.val_tasks = helper.loadPickledTaskList(datasets_path,
                                                        file_prefix,
                                                        "Val",
                                                        fix_y=True)

        # print dataset sizes
        print "Num train points:", sum([len(t['Y']) for t in self.train_tasks])
        if self.val_type != 'cross':
            print "Num val points:", sum([len(t['Y']) for t in self.val_tasks])
        print "Num test points:", sum([len(t['Y']) for t in self.test_tasks])

        if self.val_type != 'cross':
            self.initializeMTMKLModel(self.train_tasks)
        else:
            self.classifier = None

        self.n_feats = helper.calculateNumFeatsInTaskList(self.test_tasks)
        self.n_tasks = len(self.test_tasks)

        if optimize_labels is None:
            self.optimize_labels = [
                'tomorrow_Group_Happiness_Evening_Label',
                'tomorrow_Group_Health_Evening_Label',
                'tomorrow_Group_Calmness_Evening_Label'
            ]
        else:
            self.optimize_labels = optimize_labels

        self.c_vals = c_vals
        self.v_vals = v_vals
        self.kernels = kernels
        self.beta_vals = beta_vals
        self.regularizers = regularizers

        self.tolerance = tolerance
        self.max_iter = max_iter
        self.print_iters = print_iters

        if test_run:
            print "This is only a testing run. Using cheap settings to make it faster"
            self.c_vals = [100]
            self.beta_vals = [.01]
            self.kernels = ['linear']
            self.v_vals = [1.0]
            self.regularizers = ['L1']
            self.max_iter = 1

        self.calcNumSettingsDesired()

        #storing the results
        self.time_sum = 0
        if cont:
            self.val_results_df = pd.DataFrame.from_csv(self.results_path +
                                                        self.save_prefix +
                                                        '.csv')
            print '\nPrevious validation results df loaded. It has', len(
                self.val_results_df), "rows"
            self.started_from = len(self.val_results_df)
        else:
            self.val_results_df = pd.DataFrame()
            self.started_from = 0

        self.num_cross_folds = num_cross_folds
        if self.val_type == 'cross':
            helper.generateCrossValPickleFiles(self.datasets_path,
                                               self.file_prefix,
                                               self.num_cross_folds)
    def __init__(self,
                 file_prefix,
                 users_as_tasks=False,
                 cont=False,
                 classifier_name='LSSVM',
                 num_cross_folds=DEFAULT_NUM_CROSS_FOLDS,
                 main_directory=DEFAULT_MAIN_DIRECTORY,
                 datasets_path='Data/Datasets/Discard20/',
                 cant_train_with_one_class=True,
                 check_test=False,
                 save_results_every_nth=3,
                 test_csv_filename=None):
        """ Initializes the parent model with fields useful for all child wrapper classes

		Args:
			file_prefix: The first portion of the name of a set of pickled task lists, e.g.
				'datasetTaskList-Discard-Future-Group_'
			users_as_tasks: A boolean. If true, will assume there are many tasks and each task
				is one person. Will not print results per task. 
			cont: A boolean. If true, will try to load a saved results .csv and continue 
				training on the next unfinished result.
			classifier_name: String name of the classifier trained. Used to know where to save
				results.
			num_cross_folds: An integer number of folds to use in cross validation.
			main_directory: The path to the main dropbox directory which contains the results and
				data directories.
			datasets_path: The path from the main dropbox to the datasets directory.
			cant_train_with_one_class: A boolean. If true, if the model encounters a task with 
				only one type of label in the training data, it will just predict the most 
				frequent class. 
			check_test: A boolean. If true, will evaluate final results on held-out test set 
				after running.
			save_results_every_nth: An integer representing the number of settings to test before
				writing the results df to a csv file.
		"""
        # memorize arguments and construct paths
        self.main_directory = main_directory
        self.classifier_name = classifier_name
        self.results_path = main_directory + 'Results/' + classifier_name + '/'
        self.figures_path = main_directory + 'Figures/' + classifier_name + '/'
        self.datasets_path = main_directory + datasets_path
        self.cont = cont
        self.users_as_tasks = users_as_tasks
        self.cant_train_with_one_class = cant_train_with_one_class
        self.check_test = check_test
        self.save_results_every_nth = save_results_every_nth
        self.file_prefix = file_prefix
        self.save_prefix = self.get_save_prefix(file_prefix, replace=cont)
        if test_csv_filename is not None:
            self.test_csv_filename = self.datasets_path + test_csv_filename
        else:
            self.test_csv_filename = None

        self.params = {}
        self.define_params()

        self.load_data()

        self.calc_num_param_settings()
        self.construct_list_of_params_to_test()

        #storing the results
        self.time_sum = 0
        if cont:
            self.val_results_df = pd.DataFrame.from_csv(self.results_path +
                                                        self.save_prefix +
                                                        '.csv')
            print '\nPrevious validation results df loaded. It has', len(
                self.val_results_df), "rows"
            self.started_from = len(self.val_results_df)
        else:
            self.val_results_df = pd.DataFrame()
            self.started_from = 0

        self.num_cross_folds = num_cross_folds
        helper.generateCrossValPickleFiles(self.datasets_path,
                                           self.file_prefix,
                                           self.num_cross_folds)