Python getOfficialLabelName Examples

Programming Language: Python

Namespace/Package Name: helperFuncs

Method/Function: getOfficialLabelName

Examples at hotexamples.com: 5

Python getOfficialLabelName - 5 examples found. These are the top rated real world Python examples of helperFuncs.getOfficialLabelName extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: tensorFlowWrapperSTL.py Project: zhongyunuestc/PersonalizedMultitaskLearning

	def getFinalResultsForTask(self, setting_dict):
		if self.users_as_tasks:
			task_df = self.data_df[self.data_df['user_id'] == setting_dict['task_name']]
			target_label = [self.target_label]
		else:
			task_df = self.data_df
			target_label = [helper.getOfficialLabelName(setting_dict['task_name'])]
		self.net = tfnet.TensorFlowNetwork(task_df, copy.deepcopy(self.wanted_feats),target_label, verbose=False, val_type=self.val_type)
		self.net.setParams(l2_beta=setting_dict['l2_beta'], initial_learning_rate=setting_dict['learning_rate'], decay=setting_dict['decay'], 
							decay_steps=setting_dict['decay_steps'], decay_rate=setting_dict['decay_rate'], batch_size=setting_dict['batch_size'],
							optimizer=setting_dict['optimizer'], dropout=setting_dict['dropout'])
		self.constructNetwork(setting_dict['hidden_layers'])

		self.net.setUpGraph()
		preds = self.net.runGraph(self.test_steps, print_test=True, return_test_preds=True)

		preds_df = self.net.get_preds_for_df()
		label_name = setting_dict['task_name']
		preds_df.to_csv(self.results_path + "Preds-" + self.save_prefix + label_name + '.csv')
		print "Preds df saved to", self.results_path + "Preds-" + self.save_prefix + label_name + '.csv'

		return self.net.final_test_results['acc'], self.net.final_test_results['auc'], preds

Example #2

Show file

File: MTMKLWrapper.py Project: zhongyunuestc/PersonalizedMultitaskLearning

    def getFinalResultsAndSave(self, results_dict):
        print "\nRetraining on full training data with the best settings..."
        self.drop20 = False
        self.initializeAndTrainMTMKL(self.train_tasks,
                                     results_dict['C'],
                                     results_dict['beta'],
                                     results_dict['kernel'],
                                     results_dict['v'],
                                     results_dict['regularizer'],
                                     verbose=True)

        print "\nEvaluating results on held-out test set!! ..."
        all_preds = []
        all_true_y = []
        per_task_accs = [np.nan] * self.n_tasks
        per_task_aucs = [np.nan] * self.n_tasks
        per_task_f1 = [np.nan] * self.n_tasks
        per_task_precision = [np.nan] * self.n_tasks
        per_task_recall = [np.nan] * self.n_tasks
        for t in range(self.n_tasks):
            preds = self.classifier.predictOneTask(self.test_tasks, t)
            true_y = list(self.test_tasks[t]['Y'].flatten())

            if len(preds) == 0 or len(true_y) == 0:
                print "no y for task", t, "... skipping"
                continue

            all_preds.extend(preds)
            all_true_y.extend(true_y)

            # save the per-task results
            t_acc, t_auc, t_f1, t_precision, t_recall = helper.computeAllMetricsForPreds(
                preds, true_y)
            per_task_accs[t] = t_acc
            per_task_aucs[t] = t_auc
            per_task_f1[t] = t_f1
            per_task_precision[t] = t_precision
            per_task_recall[t] = t_recall

        print "\nPlotting cool stuff about the final model..."
        self.saveImagePlot(self.classifier.eta, 'Etas')
        pd.DataFrame(
            self.classifier.eta).to_csv(self.etas_path + self.save_prefix +
                                        "-etas.csv")

        print "\tHELD OUT TEST METRICS COMPUTED BY APPENDING ALL PREDS"
        acc, auc, f1, precision, recall = helper.computeAllMetricsForPreds(
            all_preds, all_true_y)
        print '\t\tAcc:', acc, 'AUC:', auc, 'F1:', f1, 'Precision:', precision, 'Recall:', recall

        print "\n\tHELD OUT TEST METRICS COMPUTED BY AVERAGING OVER TASKS"
        avg_acc = np.nanmean(per_task_accs)
        avg_auc = np.nanmean(per_task_aucs)
        avg_f1 = np.nanmean(per_task_f1)
        avg_precision = np.nanmean(per_task_precision)
        avg_recall = np.nanmean(per_task_recall)
        print '\t\tAcc:', avg_acc, 'AUC:', avg_auc, 'F1:', avg_f1, 'Precision:', avg_precision, 'Recall:', avg_recall

        print "\n\tHELD OUT TEST METRICS COMPUTED FOR EACH TASK"
        if not self.users_as_tasks:
            for t in range(self.n_tasks):
                task_name = self.test_tasks[t]['Name']
                task_name = helper.getFriendlyLabelName(task_name)
                print "\t\t", task_name, "- Acc:", per_task_accs[
                    t], "AUC:", per_task_aucs[t], 'F1:', per_task_f1[
                        t], 'Precision:', per_task_precision[
                            t], 'Recall:', per_task_recall[t]

        if self.test_csv_filename is not None:
            print "\tSAVING HELD OUT PREDICITONS"
            if 'Big5GenderKMeansCluster' in self.file_prefix:
                task_column = 'Big5GenderKMeansCluster'
                tasks_are_ints = True
                label_name = helper.getFriendlyLabelName(self.file_prefix)
                wanted_label = helper.getOfficialLabelName(label_name)
                predictions_df = helper.get_test_predictions_for_df_with_task_column(
                    self.classifier.predict_01,
                    self.test_csv_filename,
                    task_column,
                    self.test_tasks,
                    wanted_label=wanted_label,
                    num_feats_expected=np.shape(self.test_tasks[0]['X'])[1],
                    label_name=label_name,
                    tasks_are_ints=tasks_are_ints)
            elif not self.users_as_tasks:
                predictions_df = helper.get_test_predictions_for_df_with_no_task_column(
                    self.classifier.predict_01,
                    self.test_csv_filename,
                    self.test_tasks,
                    num_feats_expected=np.shape(self.test_tasks[0]['X'])[1])
            else:
                print "Error! Cannot determine what type of model you are training and therefore cannot save predictions."
                return
            predictions_df.to_csv(self.results_path + "Preds-" +
                                  self.save_prefix + '.csv')
        else:
            print "Uh oh, the test csv filename was not set, can't save test preds"

Example #3

Show file

    def retrainAndPlot(self, setting_dict):
        print "\nRETRAINING WITH THE BEST SETTINGS:"

        self.net.verbose = True
        self.net.setParams(l2_beta=setting_dict['l2_beta'],
                           initial_learning_rate=setting_dict['learning_rate'],
                           decay=setting_dict['decay'],
                           decay_steps=setting_dict['decay_steps'],
                           decay_rate=setting_dict['decay_rate'],
                           batch_size=setting_dict['batch_size'],
                           optimizer=setting_dict['optimizer'],
                           dropout=setting_dict['dropout'])
        self.constructNetwork(setting_dict['hidden_layers'])

        self.net.setUpGraph()
        self.net.runGraph(self.test_steps, print_test=True)

        if self.multilabel:
            for label in self.optimize_labels:
                friendly_label = helper.getFriendlyLabelName(label)
                self.net.plotValResults(save_path=self.figures_path +
                                        self.val_output_prefix + '-' +
                                        friendly_label + '.eps',
                                        label=label)
                self.net.plotValResults(save_path=self.figures_path +
                                        self.val_output_prefix + '-' +
                                        friendly_label + '.png',
                                        label=label)
                print "Final validation results for", friendly_label,"... Acc:", \
                  self.net.training_val_results_per_task['acc'][label][-1], "Auc:", self.net.training_val_results_per_task['auc'][label][-1]
        elif self.print_per_task:
            for label in self.wanted_labels:
                friendly_label = helper.getFriendlyLabelName(label)
                self.net.plotValResults(save_path=self.figures_path +
                                        self.val_output_prefix + '-' +
                                        friendly_label + '.eps',
                                        label=label)
                self.net.plotValResults(save_path=self.figures_path +
                                        self.val_output_prefix + '-' +
                                        friendly_label + '.png',
                                        label=label)
                print "Final validation results for", friendly_label,"... Acc:", \
                 self.net.training_val_results_per_task['acc'][label][-1], "Auc:", self.net.training_val_results_per_task['auc'][label][-1]
        else:
            self.net.plotValResults(save_path=self.figures_path +
                                    self.val_output_prefix + '.eps')
            self.net.plotValResults(save_path=self.figures_path +
                                    self.val_output_prefix + '.png')
            print "Final AUC:", self.net.training_val_results['auc'][-1]

        if self.test_csv_filename is not None:
            if self.multitask:
                task_column = None
                if 'Cluster' in self.dataset_name:
                    print "Guessing the task column is Big5GenderKMeansCluster - if this is incorrect expect errors"
                    task_column = 'Big5GenderKMeansCluster'
                    tasks_are_ints = True

                if 'User' in self.dataset_name:
                    print "Guessing the task column is user_id - if this is incorrect expect errors"
                    task_column = 'user_id'
                    tasks_are_ints = False

                if task_column is not None:
                    label_name = helper.getFriendlyLabelName(self.dataset_name)
                    wanted_label = helper.getOfficialLabelName(label_name)
                    test_preds_df = helper.get_test_predictions_for_df_with_task_column(
                        self.net.predict,
                        self.test_csv_filename,
                        task_column,
                        self.net.test_tasks,
                        wanted_label=wanted_label,
                        num_feats_expected=np.shape(
                            self.net.test_tasks[0]['X'])[1],
                        label_name=label_name,
                        tasks_are_ints=tasks_are_ints)
                else:
                    test_preds_df = helper.get_test_predictions_for_df_with_no_task_column(
                        self.net.predict,
                        self.test_csv_filename,
                        self.net.test_tasks,
                        num_feats_expected=np.shape(
                            self.net.test_tasks[0]['X'])[1])
            else:
                test_preds_df = self.net.get_preds_for_df()
            print "Got a test preds df! Saving it to:", self.results_path + "Preds-" + self.val_output_prefix + '.csv'
            test_preds_df.to_csv(self.results_path + 'Preds-' +
                                 self.val_output_prefix + '.csv')
        else:
            print "Uh oh, the test csv filename was not set, can't save test preds"

        print "Saving a copy of the final model!"
        self.net.save_model(self.val_output_prefix, self.results_path)

Example #4

Show file

File: HBLRWrapper.py Project: zhangly811/PersonalizedMultitaskLearning

    def getFinalResultsAndSave(self, setting_dict):
        if self.val_type == 'cross':
            print "\nPlotting cross-validation results for best settings..."
            self.getCrossValidationResults(dict(),
                                           setting_dict['tau10'],
                                           setting_dict['tau20'],
                                           setting_dict['sigma_multiplier'],
                                           setting_dict['mu_multiplier'],
                                           save_plots=True)

        print "\nRetraining on training data with the best settings..."
        self.initializeHBLRModel(self.train_tasks)
        self.classifier.verbose = True
        self.setClassifierToSetting(setting_dict['tau10'],
                                    setting_dict['tau20'],
                                    setting_dict['sigma_multiplier'],
                                    setting_dict['mu_multiplier'])
        self.classifier.trainUntilConverged()

        print "\nPlotting and saving cool stuff about the final model..."
        self.saveImagePlot(self.classifier.phi, 'Phi')
        pd.DataFrame(self.classifier.phi).to_csv(self.results_path +
                                                 self.save_prefix + "-phi.csv")
        self.saveConvergencePlots()

        print "\nEvaluating results on held-out test set!! ..."
        all_preds = []
        all_true_y = []
        all_X_data = []
        per_task_accs = [np.nan] * self.n_tasks
        per_task_aucs = [np.nan] * self.n_tasks
        per_task_f1 = [np.nan] * self.n_tasks
        per_task_precision = [np.nan] * self.n_tasks
        per_task_recall = [np.nan] * self.n_tasks
        for t in range(self.n_tasks):
            preds = self.classifier.predictBinary(self.test_tasks[t]['X'], t)
            true_y = list(self.test_tasks[t]['Y'].flatten())

            if len(preds) == 0 or len(true_y) == 0:
                continue

            all_preds.extend(preds)
            all_true_y.extend(true_y)
            all_X_data.extend(self.test_tasks[t]['X'])

            # save the per-task results
            t_acc, t_auc, t_f1, t_precision, t_recall = helper.computeAllMetricsForPreds(
                preds, true_y)
            per_task_accs[t] = t_acc
            per_task_aucs[t] = t_auc
            per_task_f1[t] = t_f1
            per_task_precision[t] = t_precision
            per_task_recall[t] = t_recall

        print "\tHELD OUT TEST METRICS COMPUTED BY APPENDING ALL PREDS"
        acc, auc, f1, precision, recall = helper.computeAllMetricsForPreds(
            all_preds, all_true_y)
        print '\t\tAcc:', acc, 'AUC:', auc, 'F1:', f1, 'Precision:', precision, 'Recall:', recall

        print "\n\tHELD OUT TEST METRICS COMPUTED BY AVERAGING OVER TASKS"
        avg_acc = np.nanmean(per_task_accs)
        avg_auc = np.nanmean(per_task_aucs)
        avg_f1 = np.nanmean(per_task_f1)
        avg_precision = np.nanmean(per_task_precision)
        avg_recall = np.nanmean(per_task_recall)
        print '\t\tAcc:', avg_acc, 'AUC:', avg_auc, 'F1:', avg_f1, 'Precision:', avg_precision, 'Recall:', avg_recall

        print "\n\tHELD OUT TEST METRICS COMPUTED FOR EACH TASK"
        if not self.users_as_tasks:
            for t in range(self.n_tasks):
                task_name = self.test_tasks[t]['Name']
                if not self.users_as_tasks:
                    task_name = helper.getFriendlyLabelName(task_name)
                print "\t\t", task_name, "- Acc:", per_task_accs[
                    t], "AUC:", per_task_aucs[t], 'F1:', per_task_f1[
                        t], 'Precision:', per_task_precision[
                            t], 'Recall:', per_task_recall[t]

        if self.test_csv_filename is not None:
            print "\tSAVING HELD OUT PREDICITONS"
            if self.users_as_tasks:
                task_column = 'user_id'
                label_name = helper.getFriendlyLabelName(self.file_prefix)
                wanted_label = helper.getOfficialLabelName(label_name)
                predictions_df = helper.get_test_predictions_for_df_with_task_column(
                    self.classifier.predictBinary,
                    self.test_csv_filename,
                    task_column,
                    self.test_tasks,
                    wanted_label=wanted_label,
                    num_feats_expected=np.shape(self.test_tasks[0]['X'])[1],
                    label_name=label_name,
                    tasks_are_ints=False)
            else:
                predictions_df = helper.get_test_predictions_for_df_with_no_task_column(
                    self.classifier.predictBinary,
                    self.test_csv_filename,
                    self.test_tasks,
                    num_feats_expected=np.shape(self.test_tasks[0]['X'])[1])
            predictions_df.to_csv(self.results_path + "Preds-" +
                                  self.save_prefix + '.csv')
        else:
            print "Uh oh, the test csv filename was not set, can't save test preds"

        print "\t SAVING CLASSIFIER"
        with open(
                self.results_path + "PickledModel-" + self.save_prefix + '.p',
                "w") as f:
            pickle.dump(self.classifier, f)

Example #5

Show file

File: generic_wrapper.py Project: zhongyunuestc/PersonalizedMultitaskLearning

    def get_final_results(self, optimize_for='val_acc'):
        if self.users_as_tasks and not self.check_test:
            print "check_test is set to false, Will not evaluate performance on held-out test set."
            return
        print "\nAbout to evaluate results on held-out test set!!"
        print "Will use the settings that produced the best", optimize_for

        all_preds = []
        all_true_y = []
        per_task_accs = []
        per_task_aucs = []
        per_task_f1 = []
        per_task_precision = []
        per_task_recall = []

        for t in range(self.n_tasks):
            task_settings = self.find_best_setting_for_task(
                t, optimize_for=optimize_for)
            assert (task_settings['task_num'] == t)
            if not self.users_as_tasks:
                print "\nBEST SETTING FOR TASK", t, "-", task_settings[
                    'task_name']
                print "The highest", optimize_for, "of", task_settings[
                    optimize_for], "was found with the following settings:"
                print task_settings

            task_settings = self.convert_param_dict_for_use(task_settings)
            preds, true_y = self.get_preds_true_for_task(
                self.train_tasks, self.test_tasks, task_settings)
            if preds is None or true_y is None:
                continue

            all_preds.extend(preds)
            all_true_y.extend(true_y)

            # save the per-task results
            t_acc, t_auc, t_f1, t_precision, t_recall = helper.computeAllMetricsForPreds(
                preds, true_y)
            per_task_accs.append(t_acc)
            per_task_aucs.append(t_auc)
            per_task_f1.append(t_f1)
            per_task_precision.append(t_precision)
            per_task_recall.append(t_recall)

            if not self.users_as_tasks:
                print "\nFINAL TEST RESULTS FOR", helper.getFriendlyLabelName(
                    self.train_tasks[t]['Name'])
                print 'Acc:', t_acc, 'AUC:', t_auc, 'F1:', t_f1, 'Precision:', t_precision, 'Recall:', t_recall

        print "\nHELD OUT TEST METRICS COMPUTED BY AVERAGING OVER TASKS"
        avg_acc = np.nanmean(per_task_accs)
        avg_auc = np.nanmean(per_task_aucs)
        avg_f1 = np.nanmean(per_task_f1)
        avg_precision = np.nanmean(per_task_precision)
        avg_recall = np.nanmean(per_task_recall)
        print 'Acc:', avg_acc, 'AUC:', avg_auc, 'F1:', avg_f1, 'Precision:', avg_precision, 'Recall:', avg_recall

        if self.test_csv_filename is not None:
            print "\tSAVING HELD OUT PREDICITONS"
            if self.users_as_tasks:
                task_column = 'user_id'
                label_name = helper.getFriendlyLabelName(self.file_prefix)
                wanted_label = helper.getOfficialLabelName(label_name)
                predictions_df = helper.get_test_predictions_for_df_with_task_column(
                    self.predict_task,
                    self.test_csv_filename,
                    task_column,
                    self.test_tasks,
                    wanted_label=wanted_label,
                    num_feats_expected=np.shape(self.test_tasks[0]['X'])[1],
                    label_name=label_name,
                    tasks_are_ints=False)
            else:
                predictions_df = helper.get_test_predictions_for_df_with_no_task_column(
                    self.predict_task,
                    self.test_csv_filename,
                    self.test_tasks,
                    num_feats_expected=np.shape(self.test_tasks[0]['X'])[1])
            predictions_df.to_csv(self.results_path + "Preds-" +
                                  self.save_prefix + '.csv')
        else:
            print "Uh oh, the test csv filename was not set, can't save test preds"