def __init__(self,
                 train_task_dict_list,
                 val_task_dict_list=None,
                 test_task_dict_list=None,
                 print_per_task=False,
                 optimize_labels=None,
                 verbose=True,
                 num_cross_folds=5,
                 val_type=DEFAULT_VAL_TYPE,
                 accuracy_logged_every_n=1000,
                 accuracy_output_every_n=1000):
        self.train_tasks = copy.deepcopy(train_task_dict_list)
        self.val_tasks = copy.deepcopy(val_task_dict_list)
        self.test_tasks = copy.deepcopy(test_task_dict_list)
        self.n_tasks = len(self.train_tasks)
        self.print_per_task = print_per_task

        self.task_training_order = self.generateNewTrainingOrder()

        self.verbose = verbose
        self.optimize_labels = optimize_labels
        self.loss_func = tfnet.getSoftmaxLoss

        for i in range(self.n_tasks):
            self.train_tasks[i]['Y'] = tfnet.changeLabelsToOneHotEncoding(
                self.train_tasks[i]['Y'])
            if self.val_tasks is not None:
                self.val_tasks[i]['Y'] = tfnet.changeLabelsToOneHotEncoding(
                    self.val_tasks[i]['Y'])
            if self.test_tasks is not None:
                self.test_tasks[i]['Y'] = tfnet.changeLabelsToOneHotEncoding(
                    self.test_tasks[i]['Y'])

        if self.print_per_task:
            self.optimize_labels = []
            for i in range(self.n_tasks):
                self.optimize_labels.append(self.train_tasks[i]['Name'])

        self.initializeStoredTrainingMetrics()

        #the following code supports performing cross validation:
        self.val_type = val_type
        if val_type == 'cross':
            assert (self.val_tasks is not None)
            self.num_cross_folds = num_cross_folds
            print "Generating cross validation sets for each ppt"
            for i in range(self.n_tasks):
                self.train_tasks[i]['crossVal_X'], self.train_tasks[i][
                    'crossVal_y'] = helper.generateCrossValSet(
                        self.train_tasks[i]['X'],
                        self.train_tasks[i]['Y'],
                        self.val_tasks[i]['X'],
                        self.val_tasks[i]['Y'],
                        self.num_cross_folds,
                        verbose=False)

        self.input_size = helper.calculateNumFeatsInTaskList(self.train_tasks)
        self.output_size = np.shape(self.train_tasks[0]['Y'])[1]
        print "OUTPUT SIZE IS CALCULATED TO BE:", self.output_size

        #parameters that can be tuned
        self.l2_beta = 5e-4
        self.initial_learning_rate = 0.0001
        self.decay = True
        self.batch_size = 10
        self.decay_steps = 1000
        self.decay_rate = 0.95
        self.optimizer = tf.train.AdamOptimizer  #can also be tf.train.AdagradOptimizer or tf.train.GradientDescentOptimizer
        self.dropout = True

        #network structure and running stuff
        self.hidden_sizes_shared = [1024]
        self.hidden_size_task = 10
        self.connection_types_shared = ['full']
        self.connection_types_task = ['full', 'full']
        self.n_steps = 4001
        self.accuracy_logged_every_n = accuracy_logged_every_n
        self.accuracy_output_every_n = accuracy_output_every_n

        # Tensorflow graph computation
        self.graph = None
        self.session = None
        self.saver = None

        #Note: for now you can only have one layer of weights that is unique to the task (hidden_size_task is a scalar)
        #TODO: improve this later
        self.task_w1 = None
        self.task_b1 = None
        self.task_w2 = None
        self.task_b2 = None
    def __init__(self,
                 file_prefix,
                 users_as_tasks,
                 user_clusters=True,
                 eta_filename=None,
                 regularizers=REGULARIZERS,
                 tolerance=.0001,
                 max_iter=100,
                 val_type=VALIDATION_TYPE,
                 c_vals=C_VALS,
                 beta_vals=B_VALS,
                 v_vals=V_VALS,
                 kernels=KERNELS,
                 print_iters=False,
                 optimize_labels=None,
                 cont=False,
                 test_run=False,
                 results_path=DEFAULT_RESULTS_PATH,
                 figures_path=DEFAULT_FIGURES_PATH,
                 datasets_path=DEFAULT_DATASETS_PATH,
                 etas_path=DEFAULT_ETAS_PATH,
                 num_cross_folds=DEFAULT_NUM_CROSS_FOLDS,
                 drop20=False,
                 test_csv_filename=None):
        self.results_path = results_path
        self.figures_path = figures_path
        self.datasets_path = datasets_path
        self.etas_path = etas_path
        self.file_prefix = file_prefix
        self.cont = cont
        self.val_type = val_type
        self.users_as_tasks = users_as_tasks
        self.cluster_users = user_clusters
        self.drop20 = drop20
        if test_csv_filename is not None:
            self.test_csv_filename = self.datasets_path + test_csv_filename
        else:
            self.test_csv_filename = None
        self.save_prefix = self.getSavePrefix(file_prefix, replace=cont)

        self.test_tasks = helper.loadPickledTaskList(datasets_path,
                                                     file_prefix,
                                                     "Test",
                                                     fix_y=True)
        self.train_tasks = helper.loadPickledTaskList(datasets_path,
                                                      file_prefix,
                                                      "Train",
                                                      fix_y=True)
        if self.val_type != 'cross':
            self.val_tasks = helper.loadPickledTaskList(datasets_path,
                                                        file_prefix,
                                                        "Val",
                                                        fix_y=True)

        # print dataset sizes
        print "Num train points:", sum([len(t['Y']) for t in self.train_tasks])
        if self.val_type != 'cross':
            print "Num val points:", sum([len(t['Y']) for t in self.val_tasks])
        print "Num test points:", sum([len(t['Y']) for t in self.test_tasks])

        if self.val_type != 'cross':
            self.initializeMTMKLModel(self.train_tasks)
        else:
            self.classifier = None

        self.n_feats = helper.calculateNumFeatsInTaskList(self.test_tasks)
        self.n_tasks = len(self.test_tasks)

        if optimize_labels is None:
            self.optimize_labels = [
                'tomorrow_Group_Happiness_Evening_Label',
                'tomorrow_Group_Health_Evening_Label',
                'tomorrow_Group_Calmness_Evening_Label'
            ]
        else:
            self.optimize_labels = optimize_labels

        self.c_vals = c_vals
        self.v_vals = v_vals
        self.kernels = kernels
        self.beta_vals = beta_vals
        self.regularizers = regularizers

        self.tolerance = tolerance
        self.max_iter = max_iter
        self.print_iters = print_iters

        if test_run:
            print "This is only a testing run. Using cheap settings to make it faster"
            self.c_vals = [100]
            self.beta_vals = [.01]
            self.kernels = ['linear']
            self.v_vals = [1.0]
            self.regularizers = ['L1']
            self.max_iter = 1

        self.calcNumSettingsDesired()

        #storing the results
        self.time_sum = 0
        if cont:
            self.val_results_df = pd.DataFrame.from_csv(self.results_path +
                                                        self.save_prefix +
                                                        '.csv')
            print '\nPrevious validation results df loaded. It has', len(
                self.val_results_df), "rows"
            self.started_from = len(self.val_results_df)
        else:
            self.val_results_df = pd.DataFrame()
            self.started_from = 0

        self.num_cross_folds = num_cross_folds
        if self.val_type == 'cross':
            helper.generateCrossValPickleFiles(self.datasets_path,
                                               self.file_prefix,
                                               self.num_cross_folds)
    def __init__(self,
                 file_prefix,
                 users_as_tasks=False,
                 num_cross_folds=DEFAULT_NUM_CROSS_FOLDS,
                 cont=False,
                 results_path=DEFAULT_RESULTS_PATH,
                 figures_path=DEFAULT_FIGURES_PATH,
                 datasets_path=DEFAULT_DATASETS_PATH,
                 test_run=False,
                 max_iters=DEFAULT_MAX_ITERS,
                 val_type=DEFAULT_VALIDATION_TYPE,
                 optimize_labels=None,
                 test_csv_filename=None):
        self.results_path = results_path
        self.figures_path = figures_path
        self.datasets_path = datasets_path
        self.save_prefix = self.getSavePrefix(file_prefix, replace=cont)
        self.cont = cont
        self.max_iters = max_iters
        self.val_type = val_type
        self.users_as_tasks = users_as_tasks
        self.file_prefix = file_prefix
        if test_csv_filename is not None:
            self.test_csv_filename = self.datasets_path + test_csv_filename
        else:
            self.test_csv_filename = None
        self.test_tasks = helper.loadPickledTaskList(datasets_path,
                                                     file_prefix, "Test")
        self.train_tasks = helper.loadPickledTaskList(datasets_path,
                                                      file_prefix, "Train")
        if self.val_type != 'cross':
            self.val_tasks = helper.loadPickledTaskList(
                datasets_path, file_prefix, "Val")
            self.initializeHBLRModel(self.train_tasks)
        else:
            self.classifier = None

        if users_as_tasks:
            self.K = 25
        else:
            self.K = len(self.test_tasks)
        self.n_feats = helper.calculateNumFeatsInTaskList(self.test_tasks)
        self.n_tasks = len(self.test_tasks)

        if optimize_labels is None:
            self.optimize_labels = [
                'tomorrow_Group_Happiness_Evening_Label',
                'tomorrow_Group_Health_Evening_Label',
                'tomorrow_Group_Calmness_Evening_Label'
            ]
        else:
            self.optimize_labels = optimize_labels

        #parameters that can be tuned
        self.tau10s = [10, 1, 0.05, 0.01]
        self.tau20s = [1.0, 0.05, 0.01]
        self.sigma_multipliers = [.01, 0.1, 1]
        self.mu_multipliers = [0.0]

        if test_run:
            print "This is only a testing run. Using cheap settings to make it faster"
            self.K = 2
            self.max_iters = 5
            self.n_tasks = 2
            self.tau10s = [1]
            self.tau20s = [.1]
            self.sigma_multipliers = [.01]
            self.mu_multipliers = [0]

        self.calcNumSettingsDesired()

        #storing the results
        self.time_sum = 0
        if cont:
            self.val_results_df = pd.DataFrame.from_csv(self.results_path +
                                                        self.save_prefix +
                                                        '.csv')
            print '\nPrevious validation results df loaded. It has', len(
                self.val_results_df), "rows"
            self.started_from = len(self.val_results_df)
        else:
            self.val_results_df = pd.DataFrame()
            self.started_from = 0

        self.num_cross_folds = num_cross_folds
        if self.val_type == 'cross':
            helper.generateCrossValPickleFiles(self.datasets_path,
                                               self.file_prefix,
                                               self.num_cross_folds)