def dfnn_para_ao(param_config: ParamConfig, train_data: Dataset, test_data: Dataset): """ todo: consider all parameters in para_mu_list into algorithm :param param_config: :param train_data: dataset :param test_data: dataset :return: """ n_mu_list = param_config.para_mu_list.shape[0] n_mu1_list = param_config.para_mu1_list.shape[0] loss_c_train_mu_tsr = torch.zeros(n_mu_list, n_mu1_list).double() loss_c_test_mu_tsr = torch.zeros(n_mu_list, n_mu1_list).double() loss_d_train_mu_tsr = torch.zeros(n_mu_list, n_mu1_list).double() loss_d_test_mu_tsr = torch.zeros(n_mu_list, n_mu1_list).double() for i in torch.arange(n_mu_list): for j in torch.arange(n_mu1_list): param_config.para_mu_current = param_config.para_mu_list[i] param_config.para_mu1_current = param_config.para_mu1_list[j] param_config.log.info( f"running param mu: {param_config.para_mu_current}") param_config.log.info( f"running param mu1: {param_config.para_mu1_current}") loss_c_train, loss_c_test, loss_d_train, loss_d_test = \ hdfnn_run(param_config, train_data, test_data) loss_c_train_mu_tsr[i, j] = loss_c_train.double() loss_c_test_mu_tsr[i, j] = loss_c_test.double() loss_d_train_mu_tsr[i, j] = loss_d_train.double() loss_d_test_mu_tsr[i, j] = loss_d_test.double() return loss_c_train_mu_tsr, loss_c_test_mu_tsr, loss_d_train_mu_tsr, loss_d_test_mu_tsr
def dfnn_rules_kfolds(param_config: ParamConfig, dataset: Dataset): """ todo: consider all rules in para_mu_list into algorithm :param param_config: :param dataset: training dataset :return: """ n_rule_list = param_config.n_rules_list n_list_rule = n_rule_list.shape[0] loss_c_train_mu_tsr = torch.zeros(n_list_rule, param_config.n_kfolds).double() loss_c_test_mu_tsr = torch.zeros(n_list_rule, param_config.n_kfolds).double() loss_d_train_mu_tsr = torch.zeros(n_list_rule, param_config.n_kfolds).double() loss_d_test_mu_tsr = torch.zeros(n_list_rule, param_config.n_kfolds).double() for i in torch.arange(n_list_rule): n_rules = n_rule_list[int(i)] param_config.log.error(f"running at rule number: {n_rules}") param_config.n_rules = n_rules loss_c_train, loss_c_test, loss_d_train, loss_d_test = \ dfnn_kfolds(param_config, dataset) loss_c_train_mu_tsr[i, :] = loss_c_train.squeeze().double() loss_c_test_mu_tsr[i, :] = loss_c_test.squeeze().double() loss_d_train_mu_tsr[i, :] = loss_d_train.squeeze().double() loss_d_test_mu_tsr[i, :] = loss_d_test.squeeze().double() return loss_c_train_mu_tsr, loss_c_test_mu_tsr, loss_d_train_mu_tsr, loss_d_test_mu_tsr
def dfnn_para_kfolds(param_config: ParamConfig, dataset: Dataset): """ todo: consider all parameters in para_mu_list into algorithm :param param_config: :param dataset: training dataset :return: """ n_mu_list = param_config.para_mu_list.shape[0] loss_c_train_mu_tsr = torch.zeros(n_mu_list, param_config.n_kfolds).double() loss_c_test_mu_tsr = torch.zeros(n_mu_list, param_config.n_kfolds).double() loss_d_train_mu_tsr = torch.zeros(n_mu_list, param_config.n_kfolds).double() loss_d_test_mu_tsr = torch.zeros(n_mu_list, param_config.n_kfolds).double() for i in torch.arange(n_mu_list): param_config.para_mu_current = param_config.para_mu_list[i] param_config.log.info( f"running param mu: {param_config.para_mu_current}") loss_c_train, loss_c_test, loss_d_train, loss_d_test = \ dfnn_kfolds(param_config, dataset) loss_c_train_mu_tsr[i, :] = loss_c_train.squeeze().double() loss_c_test_mu_tsr[i, :] = loss_c_test.squeeze().double() loss_d_train_mu_tsr[i, :] = loss_d_train.squeeze().double() loss_d_test_mu_tsr[i, :] = loss_d_test.squeeze().double() return loss_c_train_mu_tsr, loss_c_test_mu_tsr, loss_d_train_mu_tsr, loss_d_test_mu_tsr
def dfnn_rules_para(param_config: ParamConfig, train_data: Dataset, test_data: Dataset): """ todo: this method is to calculate different rule numbers on distribute fuzzy Neuron network iterately :param param_config: :param train_data: dataset :param test_data: dataset :return: """ n_mu_list = param_config.para_mu_list.shape[0] loss_c_train_tsr = torch.empty(0, n_mu_list).double().double() loss_c_test_tsr = torch.empty(0, n_mu_list).double().double() loss_d_train_tsr = torch.empty(0, n_mu_list).double().double() loss_d_test_tsr = torch.empty(0, n_mu_list).double().double() n_rule_list = param_config.n_rules_list for i in torch.arange(n_rule_list.shape[0]): n_rules = n_rule_list[int(i)] param_config.log.info(f"running at rule number: {n_rules}") param_config.n_rules = n_rules loss_c_train, loss_c_test, loss_d_train, loss_d_test = \ dfnn_para(param_config, train_data, test_data) loss_c_train_tsr = torch.cat( (loss_c_train_tsr, loss_c_train.unsqueeze(0).double()), 0) loss_c_test_tsr = torch.cat( (loss_c_test_tsr, loss_c_test.unsqueeze(0).double()), 0) loss_d_train_tsr = torch.cat( (loss_d_train_tsr, loss_d_train.unsqueeze(0).double()), 0) loss_d_test_tsr = torch.cat( (loss_d_test_tsr, loss_d_test.unsqueeze(0).double()), 0) return loss_c_train_tsr, loss_c_test_tsr, loss_d_train_tsr, loss_d_test_tsr
from param_config import ParamConfig from loss_utils import RMSELoss, LikelyLoss from dfnn_run import dfnn_rules_para_kfold from utils import load_data, Logger import torch import os # Dataset configuration # init the parameters param_config = ParamConfig() param_config.config_parse('hrss_config') para_mu_list = torch.arange(-10, 1, 1).double() param_config.para_mu_list = torch.pow(2, para_mu_list).double() param_config.para_mu1_list = torch.pow(2, para_mu_list).double() n_rule_list = torch.arange(1, 11, 1) param_config.n_rules_list = n_rule_list acc_c_train_arr = [] acc_c_test_arr = [] acc_d_train_arr = [] acc_d_test_arr = [] acc_c_train_list = [] acc_c_test_list = [] acc_d_train_list = [] acc_d_test_list = [] for i in torch.arange(len(param_config.dataset_list)): dataset_file = param_config.get_cur_dataset(int(i)) # load dataset
import joblib from Feat import nlp_utils # sys.path.append("/home/algo/code/gitrepo/pylib/Kaggle_CrowdFlower/Code") if __name__ == '__main__': ############### ## Load Data ## ############### print("Load data...") from param_config import ParamConfig config = ParamConfig(feat_folder="./feat_folder", drop_html_flag=True, stemmer_type="porter", cooccurrence_word_exclude_stopword=False) dfTrain = pd.read_csv(config.original_train_data_path).fillna("") dfTest = pd.read_csv(config.original_test_data_path).fillna("") # number of train/test samples num_train, num_test = dfTrain.shape[0], dfTest.shape[0] print("Done.") ###################### ## Pre-process Data ## ###################### print("Pre-process data...") ## insert fake label for test
from param_config import ParamConfig from loss_utils import RMSELoss, LikelyLoss from dfnn_run import dfnn_rules_kfolds from utils import load_data import torch # Dataset configuration # init the parameters param_config = ParamConfig() param_config.config_parse('hrss_config') n_rule_list = torch.arange(1, 16, 1) param_config.n_rules_list = n_rule_list for i in torch.arange(len(param_config.dataset_list)): dataset_file = param_config.get_cur_dataset(int(i)) # load dataset dataset = load_data(dataset_file, param_config.dataset_name) dataset.generate_n_partitions(param_config.n_run, param_config.patition_strategy) dataset.generate_n_partitions(param_config.n_run, param_config.patition_strategy) param_config.log.debug(f"=====starting on {dataset.name}=======") loss_fun = None if dataset.task == 'C': param_config.log.war(f"=====Mission: Classification=======") param_config.loss_fun = LikelyLoss() else: param_config.log.war(f"=====Mission: Regression=======") param_config.loss_fun = RMSELoss()