def predict(flags_obj): """Run Wide-Deep training and eval loop. Args: flags_obj: An object containing parsed flag values. """ dirProject = '/home/thomas/fusessh/scicore/projects/patrec' # dirProject = "Z:\\projects\\PATREC" dirResultsBase = os.path.join(dirProject, 'results/') dirData = os.path.join(dirProject, 'data') dict_options_dataset_training = { 'dir_data': dirData, 'data_prefix': 'patrec', 'dataset': '20122015', 'grouping': 'verylightgrouping', 'encoding': 'embedding', 'newfeatures': None, 'featurereduction': None, 'filtering': 'EntlassBereich_Gyn', 'balanced': False, 'resample': False } dataset_options_training = DatasetOptions(dict_options_dataset_training) dict_options_dataset_testing = { 'dir_data': dirData, 'data_prefix': 'patrec', 'dataset': '20162017', 'grouping': 'verylightgrouping', 'encoding': 'embedding', 'newfeatures': None, 'featurereduction': None, 'filtering': 'EntlassBereich_Gyn', 'balanced': False, 'resample': False } dataset_options_testing = DatasetOptions(dict_options_dataset_testing) if dict_options_dataset_testing['data_prefix'] == 'nz': feature_columns = FeatureColumnsNZ( dataset_options=dataset_options_testing) # feature_columns_nz_fusion = FeatureColumnsNZFusion(dataset_options=dataset_options_testing); # feature_columns = feature_columns_nz_fusion; elif dict_options_dataset_testing['data_prefix'] == 'patrec': feature_columns = FeatureColumnsPatrec( dataset_options=dataset_options_testing) # feature_columns_patrec_fusion = FeatureColumnsPatrecFusion(dataset_options=dataset_options_testing); # feature_columns = feature_columns_patrec_fusion; else: print('unknown data prefix..exit') sys.exit() dict_dataset_options = { 'train': dataset_options_training, 'eval': None, 'test': dataset_options_testing } nn = NeuralNetModel('test', dict_dataset_options, feature_columns, flags_obj) model_flags = nn.getFlags() if model_flags.model_dir.endswith('/'): trained_model = model_flags.model_dir.split('/')[-2] else: trained_model = model_flags.model_dir.split('/')[-1] if trained_model.startswith('warmstart'): pretrained = 'pretrained' else: pretrained = None print('warmstart: ' + str(trained_model.startswith('warmstart'))) print('hidden units: ' + str(model_flags.hidden_units)) dict_options_nn = { 'hidden_units': model_flags.hidden_units, 'learningrate': model_flags.learningrate, 'dropout': model_flags.dropout, 'batchnorm': model_flags.batchnorm, 'batch_size': model_flags.batch_size, 'training_epochs': model_flags.train_epochs, 'pretrained': pretrained, } options_nn = OptionsNN(model_flags.model_dir, dataset_options_training, options_clf=dict_options_nn) classifier_nn = ClassifierNN(options_nn) results_all_runs_test = Results(dirResultsBase, dataset_options_training, options_nn, 'test', dataset_options_testing) num_runs = 10 test_auc = [] test_avgprecision = [] for k in range(0, num_runs): results = nn.predict() filename_data_testing = nn.getFilenameDatasetBalanced() df_testing_balanced = pd.read_csv(filename_data_testing) predictions = [p['probabilities'] for p in results] predictions = np.array(predictions) print('get labels...: ' + str(filename_data_testing)) labels = df_testing_balanced[ dataset_options_testing.getEarlyReadmissionFlagname()].values res = classifier_nn.setResults(predictions, labels) results_all_runs_test.addResultsSingleRun(res) auc = res.getAUC() avgprecision = res.getAvgPrecision() print('') print('AUC: ' + str(auc)) print('avg precision: ' + str(avgprecision)) print('') test_auc.append(auc) test_avgprecision.append(avgprecision) print('') print('mean test auc: ' + str(np.mean(np.array(test_auc)))) print('mean test avg precision: ' + str(np.mean(np.array(test_avgprecision)))) print('') results_all_runs_test.writeResultsToFileDataset()
years = [2012, 2013, 2014, 2015] for year in years: dict_options_dataset_training = { 'dir_data': dirData, 'data_prefix': 'nz', 'dataset': str(year), 'newfeatures': { 'names': constantsNZ.NEW_FEATURES }, 'featurereduction': None } options_training = DatasetOptions(dict_options_dataset_training) dataset_training = Dataset(dataset_options=options_training) early_readmission_flagname = options_training.getEarlyReadmissionFlagname( ) print('dataset filename: ' + str(dataset_training.getFilename())) results_all_runs_train = Results(dirResultsBase, options_training, options_sgd, 'train') results_all_runs_eval = Results(dirResultsBase, options_training, options_sgd, 'eval') df_balanced_test = dataset_testing.getBalancedSubSet() num_runs = 1 eval_aucs = [] for run in range(0, num_runs): print('') [df_balanced_train, df_balanced_eval