def results_from_folder(folder_name, out_obj_folder, file_keyword, num_classes, line_keyword): file_list = list_files(folder_name) file_count = 0 for file_name in file_list: if file_name.startswith('.'): continue if file_keyword not in file_name: continue print file_name file_count = file_count + 1 feature_matrix = results_from_file(folder_name + file_name, line_keyword) print feature_matrix.shape out_obj_file = file_name.split('.')[0] + "_top15.out" save_obj([feature_matrix], out_obj_folder + out_obj_file)
def run_dcpc_main(data_folder, class_column, num_classes, obj_folder, threshold, logger=None): if logger == None: logger = init_logging('') file_list = list_files(data_folder) overall_time = 0 file_count = 0 out_obj_dict = {} for train_file in file_list: if "train_" not in train_file: continue logger.info(train_file) out_obj_file = train_file.replace('.txt', '_dcpc.obj') file_count = file_count + 1 test_file = train_file.replace('train_', 'test_') x_matrix, y_vector = file_read_split(data_folder + train_file) min_class = min(y_vector) max_class = max(y_vector) + 1 #logger.info("x matrix tran after shape: " + str(x_matrix.shape)) #x_matrix = x_matrix.transpose((0, 2, 1)) logger.info("x matrix tran after shape: " + str(x_matrix.shape)) for label in range(min_class, max_class): label_index = np.where(y_vector == label)[0] label_x_matrix = x_matrix[label_index, :, :] logger.info("class: " + str(label)) print "class: " + str(label) logger.info("x matrix tran before shape: " + str(label_x_matrix.shape)) label_dcpc = computeDCPC(label_x_matrix, threshold) logger.info("class: " + str(label) + " dcpc shape: " + str(label_dcpc.shape)) out_obj_dict[label] = label_dcpc logger.info("dcpc out obj: " + str(obj_folder + out_obj_file)) save_obj([out_obj_dict], obj_folder + out_obj_file)
def run_dcpc_processing(dcpc_folder, num_classes, method=0, logger=None): logger.info('obj folder:' + dcpc_folder) dcpc_list = list_files(dcpc_folder) logger.info(dcpc_list) score_folder = dcpc_folder[:-1] + "_score/" score_folder = init_folder(score_folder) for dcpc_obj in dcpc_list: dcpc = load_obj(dcpc_folder + dcpc_obj)[0] if method == 0: out_label_array = [] out_label_dict = {} for label in range(0, num_classes): logger.info('class: ' + str(label)) label_dcpc = dcpc[label] logger.info("dcpc shape: " + str(label_dcpc.shape)) attr_score = clever_rank(label_dcpc, logger) logger.info(attr_score) sorted_dict = sorted(attr_score.items(), key=operator.itemgetter(1), reverse=True) sorted_attr = [] for item in sorted_dict: sorted_attr.append(item[0]) #label_array = [] #for label in range(0, num_classes): # class_array = sorted_attr # label_array.append(class_array) out_label_array.append(sorted_attr) out_label_dict[label] = attr_score logger.info(sorted_attr) logger.info(attr_score) save_obj([out_label_array, out_label_dict], score_folder + dcpc_obj) logger.info("score obj: " + score_folder + dcpc_obj) return score_folder
def global_cnn_lda_feature_main(parameter_file, method): data_keyword, data_folder, attr_num, attr_len, class_column, start_class, num_classes, pckl_folder, log_folder, log_postfix, out_obj_folder = read_global_feature_generation_parameter( parameter_file) log_file = log_folder + data_keyword + '_' + method + log_postfix #log_file = '' # without write to file logger = init_logging(log_file) logger.info('METHOD: ' + method) logger.info('DATA KEYWORD: ' + data_keyword) logger.info('ATTRIBUTE NUMBER: ' + str(attr_num)) logger.info('ATTRIBUTE LENGTH: ' + str(attr_len)) logger.info('CLASS NUMBER: ' + str(num_classes)) logger.info('CLASS COLUMN: ' + str(class_column)) logger.info('START CLASS: ' + str(start_class)) logger.info('PCKL FOLDER: ' + pckl_folder) logger.info('LOG FOLDER: ' + log_folder) logger.info('LOG POSTFIX: ' + log_postfix) logger.info('OUTPUT FOLDER: ' + out_obj_folder) function_name = sys._getframe(1).f_code.co_name logger = init_logging(log_file) file_list = listFiles(pckl_folder) overall_time = 0 ret_feature_array = [] file_count = 0 for train_file_pckl in file_list: if "train" not in train_file_pckl: continue train_file = train_file_pckl[0:train_file_pckl.index('.txt')] + '.txt' logger.info("PCKL FILE: " + train_file_pckl) logger.info("DATA FILE: " + train_file) train_x_matrix, train_y_vector = readFile(data_folder + train_file) out_matrix, weight_matrix, bias_vector = load_obj(pckl_folder + train_file_pckl) out_matrix = np.squeeze(out_matrix) row_num, attr_num, attr_len = out_matrix.shape out_matrix = out_matrix.reshape(row_num, attr_num * attr_len) if file_count == 0: logger.info('layer out matrix shape: ' + str(out_matrix.shape)) logger.info('weight matrix shape: ' + str(weight_matrix.shape)) logger.info('bias vector shape: ' + str(bias_vector.shape)) feature_index_vector, run_time = gene_global_lda_feature( out_matrix, train_y_vector, attr_num, logger) overall_time = overall_time + run_time logger.info(feature_index_vector.shape) ret_feature_array.append(feature_index_vector) file_count = file_count + 1 break #if file_count > 1: # break ret_feature_array = np.matrix(ret_feature_array) logger.info(ret_feature_array.shape) logger.info("return feature array samples:") logger.info("\n" + str(ret_feature_array[0:4, 0:6])) start_time = time.time() ret_feature_index, ret_feature_value = majority_vote_index( ret_feature_array, -1) overall_time = overall_time + time.time() - start_time logger.info("\n" + str(ret_feature_index[0:6])) logger.info(method + " global feature run time (sec): " + str(overall_time)) obj_file = out_obj_folder + method + "_global_feature.pckl" save_obj(ret_feature_index, obj_file) return ret_feature_index, overall_time
def global_lda_pca_feature_main(parameter_file, method): data_keyword, data_folder, attr_num, attr_len, class_column, start_class, num_classes, pckl_folder, log_folder, log_postfix, out_obj_folder = read_global_feature_generation_parameter( parameter_file) log_file = log_folder + data_keyword + '_' + method + log_postfix #log_file = '' # without write to file logger = init_logging(log_file) logger.info('METHOD: ' + method) logger.info('DATA KEYWORD: ' + data_keyword) logger.info('ATTRIBUTE NUMBER: ' + str(attr_num)) logger.info('ATTRIBUTE LENGTH: ' + str(attr_len)) logger.info('CLASS NUMBER: ' + str(num_classes)) logger.info('CLASS COLUMN: ' + str(class_column)) logger.info('START CLASS: ' + str(start_class)) logger.info('PCKL FOLDER: ' + pckl_folder) logger.info('LOG FOLDER: ' + log_folder) logger.info('LOG POSTFIX: ' + log_postfix) logger.info('OUTPUT FOLDER: ' + out_obj_folder) function_name = sys._getframe(1).f_code.co_name logger = init_logging(log_file) file_list = listFiles(data_folder) overall_time = 0 ret_feature_array = [] file_count = 0 #method = 'pca' for train_file in file_list: if "train" not in train_file: continue logger.info(train_file) lda_feature_array = [] x_matrix, y_vector = readFile(data_folder + train_file, class_column) if file_count == 0: logger.info("x data matrix shape: " + str(x_matrix.shape)) logger.info("y vector shape: " + str(y_vector.shape)) row_num, col_num = x_matrix.shape attr_len = col_num / attr_num if method == 'lda': feature_index_vector, run_time = gene_global_lda_feature( x_matrix, y_vector, attr_num, logger) elif method == 'pca': feature_index_vector, run_time = gene_global_pca_feature( x_matrix, attr_num, logger) overall_time = overall_time + run_time ret_feature_array.append(feature_index_vector) file_count = file_count + 1 #break #if file_count > 1: # break ret_feature_array = np.matrix(ret_feature_array) logger.info(ret_feature_array.shape) logger.info("ret_feature_array samples:") logger.info("\n" + str(ret_feature_array[0:4, :])) start_time = time.time() ret_feature_index, ret_feature_value = majority_vote_index( ret_feature_array, -1) overall_time = overall_time + time.time() - start_time logger.info("\n" + str(ret_feature_index[0:6])) logger.info("global feature run time (sec): " + str(overall_time)) obj_file = out_obj_folder + '_' + method + "_global_feature.pckl" logger.info("global feature saved to: " + str(obj_file)) save_obj(ret_feature_index, obj_file) return ret_feature_index, overall_time
def multi_proj_feature_classification( parameter_file, file_keyword, function_keyword="multi_proj_feature_classification"): data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, cnn_obj_folder, cnn_temp_folder, cnn_setting_file = read_feature_classification( parameter_file, function_keyword) log_folder = init_folder(log_folder) if method == 'cnn': return projected_cnn_classification_main(parameter_file, file_keyword) else: # Need to check the rest return False print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, cnn_obj_folder, cnn_temp_folder, cnn_setting_file data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column) print obj_folder file_list = list_files(data_folder) obj_list = list_files(obj_folder) class_column = 0 header = True save_obj_folder = obj_folder[:-1] + "_" + method + "_out" save_obj_folder = init_folder(save_obj_folder) delimiter = ' ' loop_count = -1 for train_file in file_list: if file_keyword not in train_file: continue loop_count = loop_count + 1 file_key = train_file.replace('.txt', '') log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str( class_id) + '_top' + str(top_k) + '_' + method + '.log' print "log file: " + log_file logger = setup_logger(log_file, 'logger_' + str(loop_count)) logger.info('\nlog file: ' + log_file) logger.info(train_file) logger.info('method: ' + method) logger.info('============') found_obj_file = '' for obj_file in obj_list: if file_key in obj_file: found_obj_file = obj_file break if found_obj_file == '': raise Exception('No obj file found') print found_obj_file found_obj_file = obj_folder + found_obj_file feature_array = load_obj(found_obj_file)[0] feature_array = np.array(feature_array) logger.info("feature array shape: " + str(feature_array.shape)) test_file = train_file.replace('train', 'test') train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum( data_folder + train_file, data_folder + test_file, class_column, delimiter, header) if loop_count == 0: logger.info('train matrix shape: ' + str(train_x_matrix.shape)) logger.info('train label shape: ' + str(train_y_vector.shape)) logger.info('test matrix shape: ' + str(test_x_matrix.shape)) logger.info('test label shape: ' + str(test_y_vector.shape)) train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False) test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False) data_stru.attr_num = top_k fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_classification( train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, feature_array, top_k, method, class_id, logger) logger.info("Fold F1: " + str(fold_f1_value)) logger.info(method + ' fold training time (sec):' + str(fold_train_time)) logger.info(method + ' fold testing time (sec):' + str(fold_test_time)) logger.info(method + ' fold accuracy: ' + str(fold_accuracy)) logger.info("save obj to " + save_obj_folder + file_key + "_" + method + "_project_" + method + "_result.ckpt") save_obj([ fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix ], save_obj_folder + file_key + "_" + method + "_project_" + method + "_result.ckpt")
def run_cnn_projected_feature_analysis(feature_folder, class_id, data_folder, data_file_keyword, method="rf_lda", log_folder='./'): data_file_list = list_files(data_folder) feature_file_list = list_files(feature_folder) out_obj_folder = feature_folder[:-1] + "_" + method out_obj_folder = init_folder(out_obj_folder) class_column = 0 for train_file in data_file_list: if data_file_keyword not in train_file: continue data_key = train_file.replace('.txt', '') data_matrix, attr_num = file_reading(data_folder + train_file) train_x_matrix, train_y_vector = x_y_spliting(data_matrix, class_column) #train_y_vector = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 3]) if class_id < 0: min_class = min(train_y_vector) max_class = max(train_y_vector) + 1 else: min_class = class_id max_class = min_class + 1 log_file = data_key + "_" + method + "_min" + str( min_class) + "_max" + str(max_class) + ".log" logger = setup_logger(log_folder + log_file) logger.info('data file: ' + train_file) out_obj_file = data_key + "_" + method + "_min" + str( min_class) + "_max" + str(max_class) + ".obj" out_obj_matrix = [] for label in range(min_class, max_class): logger.info("class: " + str(label)) feature_key = "_class" + str(label) + "_" for feature_file in feature_file_list: if data_key not in feature_file or feature_key not in feature_file: continue logger.info("feature file: " + feature_file) feature_obj = load_obj(feature_folder + feature_file) train_feature = obj_processing(feature_obj[0]) logger.info("train feature shape: " + str(train_feature.shape)) class_train_y = np.where(train_y_vector == label, 1, 0) logger.info("feature method: " + str(method)) if method == "rf_lda_sum": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_lda_analysis( train_feature, class_train_y, logger) elif method == "rf": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_analysis( train_feature, class_train_y, logger) elif method == "lda": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_lda_analysis( train_feature, class_train_y, logger) elif method == "cpca": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_cpca_analysis( train_feature, class_train_y, logger) if method == "cpca": class_attr_list = class_attr_imp_matrix else: logger.info("class attr imp matrix shape: " + str(class_attr_imp_matrix.shape)) class_attr_list = map_attr_imp_analysis( class_attr_imp_matrix, logger) logger.info(class_attr_list) out_obj_matrix.append(class_attr_list) out_obj_matrix = np.array(out_obj_matrix) logger.info("out obj to: " + out_obj_folder + out_obj_file) logger.info(out_obj_matrix.shape) save_obj([out_obj_matrix], out_obj_folder + out_obj_file)
def cnn_train(train_x_matrix, train_y_matrix, test_x_matrix, test_y_matrix, num_classes, cnn_setting, input_x_placeholder, output_y_placeholder, logits_out, keep_prob, keeped_feature_list, saver_file="./", logger=None): if logger is None: logger = init_logging('') min_class = 0 eval_method = cnn_setting.eval_method batch_size = cnn_setting.batch_size stop_threshold = cnn_setting.stop_threshold max_iter = cnn_setting.max_iter feature_method = cnn_setting.feature_method feature_obj_file = cnn_setting.out_obj_folder + saver_file saver_file = cnn_setting.out_model_folder + saver_file predict_y_proba = tf.nn.softmax(logits_out) prediction = tf.argmax(predict_y_proba, 1) actual = tf.argmax(output_y_placeholder, 1) correct_prediction = tf.equal(prediction, actual) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) if eval_method == 'f1': train_y_vector = np.argmax(train_y_matrix, axis=1) train_class_index_dict, train_min_length, train_max_length = class_label_vector_checking( train_y_vector) min_class = 0 max_class = max(train_y_vector) num_classes = max_class + 1 if max_class == 1: TP = tf.count_nonzero(prediction * actual, dtype=tf.float32) TN = tf.count_nonzero((prediction - 1) * (actual - 1), dtype=tf.float32) FP = tf.count_nonzero(prediction * (actual - 1), dtype=tf.float32) FN = tf.count_nonzero((prediction - 1) * actual, dtype=tf.float32) precision = (TP) / (TP + FP) recall = (TP) / (TP + FN) f1 = (2 * precision * recall) / (precision + recall) eval_method_value = f1 eval_method_keyword = "f1" else: eval_method_value = accuracy eval_method_keyword = "acc with batch" coefficient_placeholder = tf.placeholder(tf.float32, shape=[num_classes]) cross_entropy = tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits( targets=output_y_placeholder, logits=logits_out, pos_weight=coefficient_placeholder)) else: cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=output_y_placeholder, logits=logits_out)) eval_method_value = accuracy eval_method_keyword = "acc" #print cross_entropy.get_shape() train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) cnn_session = tf.InteractiveSession() cnn_session.run(tf.global_variables_initializer()) test_eval_value = 0 best_eval_value = 0 i = 0 start = 0 epoch = 0 end = batch_size batch_each_class = int(batch_size / num_classes) overall_len = len(train_y_matrix) saver = tf.train.Saver() train_run_time = 0 np.random.seed(epoch) batch_index = np.random.permutation(overall_len) logger.info("Random Epoch:" + str(epoch) + str(batch_index[0:5])) f1_unbalance_count = np.zeros(num_classes) second_chance = False re_init = False while (test_eval_value < stop_threshold): if start >= overall_len: start = 0 end = start + batch_size epoch = epoch + 1 np.random.seed(epoch) logger.info("Random Epoch:" + str(epoch) + str(batch_index[0:5])) batch_index = np.random.permutation(overall_len) elif end > overall_len: end = overall_len batch_x_matrix = train_x_matrix[batch_index[start:end], :, :, :] batch_y_matrix = train_y_matrix[batch_index[start:end], :] #print 'batch_x_matrix shape' #print batch_x_matrix.shape #print batch_y_matrix.shape if eval_method == 'f1': if i == 0: logger.info("Batch controlled") ### Normal BATCH Weight #batch_y_vector = np.argmax(batch_y_matrix, axis=1) #batch_class_index_dict, batch_min_length, batch_max_length = class_label_vector_checking(batch_y_vector) #coefficients_vector = [] #batch_class_index_dict_keys = batch_class_index_dict.keys() #for c_label in range(min_class, max_class+1): # if c_label not in batch_class_index_dict_keys: # add_index_vector_len = 0.1 # else: # add_index_vector_len = len(batch_class_index_dict[c_label]) # coefficients_vector.append(float(batch_max_length)/float(add_index_vector_len)) #coefficients_vector = np.array(coefficients_vector) ### End of Normal BATCH Weight # BATCH_CONTROLLED batch_y_vector = np.argmax(batch_y_matrix, axis=1) batch_class_index_dict, batch_min_length, batch_max_length = class_label_vector_checking( batch_y_vector) if i < 3: logger.info("class index before: ") logger.info(batch_class_index_dict) coefficients_vector = [] batch_class_index_dict_keys = batch_class_index_dict.keys() for c_label in range(min_class, max_class + 1): #print "class: " + str(c_label) #print class_label_vector_checking if c_label not in batch_class_index_dict_keys: f1_unbalance_count[ c_label] = f1_unbalance_count[c_label] + 1 c_label_index = train_class_index_dict[c_label] c_label_index_len = len(c_label_index) add_index_vector_len = 0 if c_label_index_len > batch_each_class: add_index_vector = np.random.choice(c_label_index_len, batch_each_class, replace=False) if (i < 3): logger.info("add index vector for c " + str(c_label)) logger.info(add_index_vector) add_index_vector_len = len(add_index_vector) batch_x_matrix = np.concatenate( (batch_x_matrix, train_x_matrix[ c_label_index[add_index_vector], :, :, :]), axis=0) batch_y_matrix = np.concatenate( (batch_y_matrix, train_y_matrix[c_label_index[add_index_vector], :] ), axis=0) else: batch_x_matrix = np.concatenate( (batch_x_matrix, train_x_matrix[c_label_index, :, :, :]), axis=0) batch_y_matrix = np.concatenate( (batch_y_matrix, train_y_matrix[c_label_index, :]), axis=0) add_index_vector_len = c_label_index_len else: batch_class_index = batch_class_index_dict[c_label] add_index_vector_len = len(batch_class_index) c_label_index = train_class_index_dict[c_label] c_label_index_len = len(c_label_index) if add_index_vector_len < batch_each_class: add_count = batch_each_class - add_index_vector_len if c_label_index_len > add_count: add_index_vector = np.random.choice( c_label_index_len, add_count, replace=False) if (i < 3): logger.info("add index vector for c " + str(c_label)) logger.info(add_index_vector) add_index_vector_len = add_index_vector_len + len( add_index_vector) batch_x_matrix = np.concatenate( (batch_x_matrix, train_x_matrix[ c_label_index[add_index_vector], :, :, :]), axis=0) batch_y_matrix = np.concatenate( (batch_y_matrix, train_y_matrix[ c_label_index[add_index_vector], :]), axis=0) else: batch_x_matrix = np.concatenate( (batch_x_matrix, train_x_matrix[c_label_index, :, :, :]), axis=0) batch_y_matrix = np.concatenate( (batch_y_matrix, train_y_matrix[c_label_index, :]), axis=0) add_index_vector_len = add_index_vector_len + c_label_index_len elif add_index_vector_len > 2 * batch_each_class: remove_count = (add_index_vector_len - 2 * batch_each_class) remove_index_vector = np.random.choice( batch_class_index, remove_count, replace=False) add_index_vector_len = add_index_vector_len - len( remove_index_vector) batch_x_matrix = np.delete(batch_x_matrix, remove_index_vector, axis=0) batch_y_matrix = np.delete(batch_y_matrix, remove_index_vector, axis=0) batch_y_vector = np.argmax(batch_y_matrix, axis=1) batch_class_index_dict, batch_min_length, batch_max_length = class_label_vector_checking( batch_y_vector) coefficients_vector.append(float(add_index_vector_len)) #print "End of F1" coefficients_vector = np.array(coefficients_vector) batch_max_len = float(max(coefficients_vector)) coefficients_vector = batch_max_len / coefficients_vector if i < 3: batch_y_vector = np.argmax(batch_y_matrix, axis=1) batch_class_index_dict, batch_min_length, batch_max_length = class_label_vector_checking( batch_y_vector) logger.info("class index after: ") logger.info(batch_class_index_dict) logger.info("coefficient vector: ") logger.info(coefficients_vector) start_time = time.time() train_step.run( feed_dict={ input_x_placeholder: batch_x_matrix, output_y_placeholder: batch_y_matrix, coefficient_placeholder: coefficients_vector, keep_prob: 1 }) train_run_time = train_run_time + time.time() - start_time else: start_time = time.time() train_step.run( feed_dict={ input_x_placeholder: batch_x_matrix, output_y_placeholder: batch_y_matrix, keep_prob: 1 }) train_run_time = train_run_time + time.time() - start_time if i % 100 == 0: fir_weight_variable = tf.get_default_graph().get_tensor_by_name( "conv_w_0:0") logger.info("fir weight") logger.info(fir_weight_variable.get_shape()) fir_weight_var_val = cnn_session.run(fir_weight_variable) logger.info(fir_weight_var_val[0, 0:5, 0, 0]) test_eval_value = eval_method_value.eval( feed_dict={ input_x_placeholder: test_x_matrix, output_y_placeholder: test_y_matrix, keep_prob: 1 }) if str(test_eval_value) == 'nan': test_eval_value = 0 print_str = "step " + str( i) + ", testing " + eval_method_keyword + ": " + str( test_eval_value) logger.info(print_str) if best_eval_value < test_eval_value: # Save the variables to disk. best_eval_value = test_eval_value save_path = saver.save(cnn_session, saver_file) print_str = "Model saved in file: " + save_path + ' at iteration: ' + str( i) logger.info(print_str) i = i + 1 start = end end = end + batch_size if epoch > max_iter: logger.info("best eval value at epoch: " + str(epoch)) logger.info("best eval value to break") logger.info(best_eval_value) break start_time = time.time() test_eval_value = eval_method_value.eval( feed_dict={ input_x_placeholder: test_x_matrix, output_y_placeholder: test_y_matrix, keep_prob: 1 }) test_run_time = time.time() - start_time if test_eval_value < best_eval_value: cnn_session.close() cnn_session = tf.InteractiveSession() saver.restore(cnn_session, saver_file) else: best_eval_value = test_eval_value #if best_eval_value == 0: # return logger.info("Running iteration: %d" % (i)) logger.info("final best " + eval_method_keyword + ": " + str(best_eval_value)) logger.info(f1_unbalance_count) cnn_predict_proba = cnn_session.run(predict_y_proba, feed_dict={ input_x_placeholder: test_x_matrix, keep_prob: 1.0 }) logger.info("CNN model saved: " + str(saver_file)) if cnn_setting.feature_method == 'none': cnn_session.close() return best_eval_value, train_run_time, test_run_time, cnn_predict_proba, saver_file, '' #keeped_feature_value_list = [] logger.info("feature value generation") #for feature_placeholder in keeped_feature_list: # feature_value = feature_placeholder.eval(feed_dict={input_x_placeholder: train_x_matrix, keep_prob: 1.0}) # keeped_feature_value_list.append(feature_value) # logger.info(feature_value.shape) test_keeped_feature_value_list = cnn_session.run(keeped_feature_list, feed_dict={ input_x_placeholder: test_x_matrix, keep_prob: 1.0 }) logger.info('test feature list ready') start = 0 end = 0 train_row = len(train_x_matrix) train_obj_list = [] while (start < train_row): logger.info(start) end = start + 1000 if end > train_row: end = train_row keep_obj = cnn_session.run(keeped_feature_list[0], feed_dict={ input_x_placeholder: train_x_matrix[start:end, :, :, :], keep_prob: 1.0 }) train_obj_list.append(keep_obj) start = end #keeped_feature_value_list = cnn_session.run(keeped_feature_list, feed_dict={input_x_placeholder: train_x_matrix, keep_prob: 1.0}) logger.info('train feature list ready') logger.info( "The order of feature value list: fir_out_conv_no_act, fir_out_conv, fir_weight, fir_bias, last_conv, weight_full, bias_full" ) logger.info("All features saved to ") logger.info("CNN feature list saved to: " + feature_obj_file) save_obj([train_obj_list, test_keeped_feature_value_list], feature_obj_file) cnn_session.close() return best_eval_value, train_run_time, test_run_time, cnn_predict_proba, saver_file, feature_obj_file
def run_pure_pv_evaluation( file_keyword, parameter_file='../../parameters/pv_baseline_evaluation.txt', function_keyword="pure_pv_evaluation"): data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, method, log_folder, out_obj_folder = read_pure_feature_generation( parameter_file, function_keyword) print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, method, log_folder, out_obj_folder file_list = list_files(data_folder) file_count = 0 for train_file in file_list: if file_keyword not in train_file: continue train_key = train_file.replace('.txt', '') file_count = file_count + 1 data_matrix, attr_num = file_reading(data_folder + train_file) train_x_matrix, train_y_vector = x_y_spliting(data_matrix, class_column) train_row, train_col = train_x_matrix.shape train_x_matrix = train_x_matrix.reshape(train_row, attr_num, attr_len) if class_id < 0: min_class = min(train_y_vector) max_class = max(train_y_vector) + 1 else: min_class = class_id max_class = min_class + 1 log_file = train_key + "_" + method + "_min" + str( min_class) + "_max" + str(max_class) + "_pure_projected.log" #logger = setup_logger('') logger = setup_logger(log_folder + log_file) print "log file: " + log_folder + log_file logger.info(train_file) out_obj_file = train_key + "_" + method + "_min" + str( min_class) + "_max" + str(max_class) + "_pure_projected.obj" out_obj_matrix = [] logger.info("min class: " + str(min_class)) logger.info("max class: " + str(max_class)) for label in range(min_class, max_class): class_train_y = np.where(train_y_vector == label, 1, 0) logger.info("label: " + str(label)) if method == 'rf_lda': class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_lda_analysis( train_x_matrix, class_train_y, logger) elif method == "rf": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_rf_analysis( train_x_matrix, class_train_y, logger) elif method == "lda": class_attr_imp_matrix, class_run_time = project_cnn_feature_combined_lda_analysis( train_x_matrix, class_train_y, logger) logger.info("class attr imp matrix shape: " + str(class_attr_imp_matrix.shape)) class_attr_list = map_attr_imp_analysis(class_attr_imp_matrix, logger) logger.info(class_attr_list) logger.info(class_attr_list.shape) out_obj_matrix.append(class_attr_list) out_obj_matrix = np.array(out_obj_matrix) logger.info("out obj to: " + out_obj_folder + out_obj_file) logger.info(out_obj_matrix.shape) save_obj([out_obj_matrix], out_obj_folder + out_obj_file)
def run_lda_proj_feature_main(data_folder, class_column, attr_num, num_classes, lda_proj_obj_file, transpose=False, logger=None): if logger == None: logger = init_logging('') file_list = listFiles(data_folder) overall_time = 0 ret_lda_feature_array = [] ret_lda_feature_weight = [] file_count = 0 lda_time = 0 norm_time = 0 for train_file in file_list: if "train" not in train_file: continue logger.info(train_file) file_count = file_count + 1 lda_feature_array = [] x_matrix, y_vector = readFile(data_folder + train_file, class_column) #x_matrix = x_matrix[0:100, :] #y_vector = y_vector[0:100] row_num, col_num = x_matrix.shape logger.info(x_matrix.shape) attr_len = col_num / attr_num if transpose == True: x_matrix_transpose = [] x_matrix = x_matrix.reshape(row_num, attr_num, attr_len) for r in range(4, attr_num): temp_x_matrix = x_matrix[:, r, :] fold_feature_matrix, fold_norm_time, fold_lda_time = gene_projected_lda_feature( temp_x_matrix, y_vector) print fold_feature_matrix sdfsd break else: start_time = time.time() fold_feature_matrix, fold_norm_time, fold_lda_time = gene_projected_lda_feature( x_matrix, y_vector) overall_time = overall_time + time.time() - start_time logger.info("fold norm: " + str(fold_norm_time)) logger.info("fold lda: " + str(fold_lda_time)) norm_time = fold_norm_time + norm_time lda_time = fold_lda_time + lda_time f_row_num, f_col_num = fold_feature_matrix.shape fold_feature_array = np.zeros((f_row_num, attr_num)) fold_feature_weight_array = np.zeros((f_row_num, attr_num)) logger.info(fold_feature_array.shape) for i in range(0, f_row_num): temp_vector = np.zeros(attr_num) for j in range(0, f_col_num): attr_index = j / attr_len temp_vector[attr_index] = temp_vector[ attr_index] + fold_feature_matrix[i, j] fold_feature_weight_array[i, :] = temp_vector fold_feature_array[i, :] = argsort(temp_vector)[::-1] ret_lda_feature_array.append(fold_feature_array) ret_lda_feature_weight.append(fold_feature_weight_array) logger.info("overall norm: " + str(norm_time)) logger.info("overall lda: " + str(lda_time)) ret_lda_feature_weight = np.array(ret_lda_feature_weight) ret_lda_feature_array = np.array(ret_lda_feature_array) logger.info(ret_lda_feature_array.shape) ret_lda_feature_weight = np.sum(ret_lda_feature_weight, axis=0) ret_lda_feature_array = ret_lda_feature_array.astype(int) combine_time = 0 start_time = time.time() lda_feature_array = fold_feature_combination_F_C_A(ret_lda_feature_array) combine_time = time.time() - start_time overall_time = overall_time + combine_time logger.info("combine lda: " + str(overall_time)) logger.info(lda_feature_array.shape) logger.info(lda_feature_array[0:7, 0:7]) logger.info(ret_lda_feature_weight[0:7, 0:7]) logger.info("pure lda projected feature generation overall time (sec)") logger.info(overall_time) save_obj([lda_feature_array, ret_lda_feature_weight], lda_proj_obj_file) logger.info("Object saved to " + lda_proj_obj_file) return lda_feature_array
def run_pca_proj_feature_main(data_folder, class_column, attr_num, num_classes, pca_proj_obj_file, transpose=False, logger=None): if logger == None: logger = init_logging('') ret_pca_feature_array = [] overall_time = 0 file_list = listFiles(data_folder) file_count = 0 for train_file in file_list: if "train" not in train_file: continue logger.info(train_file) file_count = file_count + 1 #if file_count > 2: # break pca_feature_array = [] x_matrix, y_vector = readFile(data_folder + train_file, class_column) row_num, col_num = x_matrix.shape attr_len = col_num / attr_num y_vector = y_vector.astype(int) start_class = min(y_vector) d3_data_matrix = x_matrix.reshape(row_num, attr_num, attr_len) for i in range(0, num_classes): class_label = i + start_class logger.info("calss label: " + str(i)) #print "class: "+ str(class_label) class_index = np.where(y_vector == class_label)[0] class_data_matrix = d3_data_matrix[class_index, :, :] #print class_data_matrix.shape start_time = time.time() class_im_index, class_im_vector = run_pca_proj_feature_3D( class_data_matrix, ) overall_time = overall_time + time.time() - start_time logger.info(class_im_index.shape) pca_feature_array.append(class_im_index) pca_feature_array = np.array(pca_feature_array) #print pca_feature_array.shape logger.info(pca_feature_array.shape) logger.info("end of " + train_file) ret_pca_feature_array.append(pca_feature_array) logger.info("Final:") ret_pca_feature_array = np.array(ret_pca_feature_array) logger.info(ret_pca_feature_array.shape) start_time = time.time() feature_array = fold_feature_combination_F_C_A(ret_pca_feature_array) overall_time = overall_time + time.time() - start_time #print feature_array.shape logger.info(feature_array.shape) logger.info(feature_array[0:3, 0:5]) logger.info("Object saved to " + pca_proj_obj_file) logger.info("Overall time (sec): ") logger.info(str(overall_time)) #print feature_array save_obj([feature_array], pca_proj_obj_file) return pca_feature_array
def pv_cnn_generation_main(parameter_file, file_keyword, function_keyword="pv_cnn_generation"): data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file = read_pv_cnn_generation( parameter_file, function_keyword) print data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, method, log_folder, out_obj_folder, out_model_folder, cnn_setting_file log_folder = init_folder(log_folder) out_obj_folder = init_folder(out_obj_folder) out_model_folder = init_folder(out_model_folder) data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column) file_list = list_files(data_folder) obj_list = list_files(obj_folder) file_count = 0 class_column = 0 header = True cnn_setting = return_cnn_setting_from_file(cnn_setting_file) cnn_setting.out_obj_folder = out_obj_folder cnn_setting.out_model_folder = out_model_folder cnn_setting.feature_method = 'save' cnn_setting.eval_method = 'f1' init_folder(out_obj_folder) init_folder(out_model_folder) result_obj_folder = obj_folder + method + "_result_folder" result_obj_folder = init_folder(result_obj_folder) delimiter = ' ' loop_count = -1 for train_file in file_list: if file_keyword not in train_file: continue loop_count = loop_count + 1 file_key = train_file.replace('.txt', '') log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str( class_id) + '_' + method + '.log' print "log file: " + log_file logger = setup_logger(log_file, 'logger_' + str(loop_count)) logger.info('\nlog file: ' + log_file) logger.info(train_file) #logger.info('cnn setting:\n ' + cnn_setting.to_string()) logger.info('method: ' + method) logger.info('============') test_file = train_file.replace('train', 'test') train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum( data_folder + train_file, data_folder + test_file, class_column, delimiter, header) if file_count == 0: logger.info('train matrix shape: ' + str(train_x_matrix.shape)) logger.info('train label shape: ' + str(train_y_vector.shape)) logger.info('test matrix shape: ' + str(test_x_matrix.shape)) logger.info('test label shape: ' + str(test_y_vector.shape)) train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False) test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False) # Call the projected feature function here, just need to set feature_dict = None feature_dict = None top_k = -1 model_save_file = file_key + '_count' + str(file_count) + '_' + method if method == 'fcn': fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_ijcnn_fcn( train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, data_stru, cnn_setting, feature_dict, top_k, model_save_file, class_id, logger) else: fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_cnn( train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, data_stru, cnn_setting, feature_dict, top_k, model_save_file, class_id, logger) logger.info("Fold F1: " + str(fold_f1_value)) logger.info(method + ' fold training time (sec):' + str(fold_train_time)) logger.info(method + ' fold testing time (sec):' + str(fold_test_time)) logger.info(method + ' fold accuracy: ' + str(fold_accuracy)) logger.info("save obj to " + result_obj_folder + file_key + "_all_feature_" + method + "_result.ckpt") save_obj([ fold_accuracy, fold_f1_value, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix ], result_obj_folder + file_key + "_all_feature_" + method + "_result.ckpt")
def run_load_predict_cnn(fold_keyword, model_saved_folder, feature_array, top_k, test_x_matrix, test_y_vector, data_stru, cnn_setting, group_all=True, save_obj_folder="./", logger=None): if logger is None: logger = init_logging('') real_num_classes = data_stru.num_classes model_list = list_files(model_saved_folder) data_stru.num_classes = 2 load_time = 0 test_time = 0 multi_predict = [] for c in range(real_num_classes): logger.info("Class: " + str(c)) class_keyword = "class" + str(c) + "_" found_model_file = "" for model_file in model_list: if ".index" not in model_file: continue if fold_keyword not in model_file: continue if class_keyword not in model_file: continue found_model_file = model_file.replace(".index", "") print (found_model_file) break if found_model_file == "": raise Exception("Model for " + class_keyword + " and " + fold_keyword + " Not Found!!!") else: found_model_file = model_saved_folder + found_model_file class_feature = feature_array[c] class_feature = class_feature[0:top_k] logger.info("model file: " + str(model_saved_folder + found_model_file)) logger.info("feature list: " + str(class_feature)) temp_test_x_matrix = test_x_matrix[:, :, class_feature, :] logger.info("In run_load_predict_cnn: " + str(temp_test_x_matrix.shape)) start_time = time.time() cnn_session, predict_y_proba, train_x_placeholder, keep_prob_placeholder = load_model(found_model_file, data_stru, cnn_setting, group_all, logger) load_time = load_time + time.time() - start_time start_time = time.time() cnn_predict_proba = load_model_predict(cnn_session, temp_test_x_matrix, predict_y_proba, train_x_placeholder, keep_prob_placeholder) #print (cnn_predict_proba[0:10, :]) test_time = test_time + time.time() - start_time multi_predict.append(cnn_predict_proba[:, 1]) cnn_session.close() multi_predict = np.array(multi_predict) #print multi_predict[0:2, 5:11] multi_predict_vector = np.argmax(multi_predict, axis=0) save_obj_file = save_obj_folder + fold_keyword + "_" + str(top_k) + ".out" save_obj([multi_predict], save_obj_file) logger.info("output obj saved to: " + save_obj_file) logger.info("multi predict matrix shape: " + str(multi_predict.shape)) logger.info("multi predict vector shape: " + str(multi_predict_vector.shape)) #print (str(multi_predict_vector[0:10])) logger.info("test y vector: " + str(test_y_vector.shape)) #print (str(test_y_vector[0:10])) acc = accuracy_score(test_y_vector, multi_predict_vector) data_stru.num_classes = real_num_classes acc1, f1_list = multiple_f1_value_precision_recall_accuracy(multi_predict_vector, test_y_vector, logger) if acc != acc1: raise Exception("check accuracy") return acc, f1_list, load_time, test_time
def mask_evaluation_main(log_folder, obj_folder, out_obj_folder, obj_keyword, shap_k=-1, shap_min=-1, shap_max=-1, func_key="arxiv_mask_gene"): log_folder = log_folder + func_key log_folder = init_folder(log_folder) log_file = obj_keyword + "_allclass_" + func_key + ".log" #logger = setup_logger('') logger = setup_logger(log_folder + log_file) logger.info("log folder: " + log_folder) logger.info("obj folder: " + obj_folder) obj_file_list = list_files(obj_folder) if shap_k != -1: obj_sec_key = "shapNum" + str(shap_k) + "_shapMin" + str( shap_min) + "_shapMax" + str(shap_max) else: obj_sec_key = ".obj" min_class = 100 max_class = -1 output_array = [] for obj_file in obj_file_list: if obj_keyword not in obj_file: continue if "_class" not in obj_file: continue if obj_sec_key not in obj_file: continue class_key = obj_file.split('_')[-1] class_key = class_key.replace('class', '').replace('.obj', '') logger.info("obj file:" + obj_file) logger.info("class key: " + class_key) class_key = int(class_key) if min_class > class_key: min_class = class_key if max_class < class_key: max_class = class_key shap_mask = load_obj(obj_folder + obj_file)[0] if len(shap_mask) == 0: continue shap_mask = numpy.array(shap_mask) shap_mask = numpy.squeeze(shap_mask) logger.info("shap_mask shape: " + str(shap_mask.shape)) #shap_num, attr_num = shap_mask.shape shap_mask = numpy.absolute(shap_mask) shap_mask = numpy.sum(shap_mask, axis=0) logger.info(shap_mask) sort_index = numpy.argsort(shap_mask) imp_value = 0 norm_imp = numpy.zeros(len(shap_mask)) for index in sort_index: norm_imp[index] = imp_value imp_value = imp_value + 1 shap_mask_index = numpy.argsort(norm_imp)[::-1] logger.info(shap_mask_index) logger.info("====") output_array.append(shap_mask_index) logger.info("shap_mask final shape: " + str(shap_mask.shape)) output_array = numpy.array(output_array) obj_file = obj_keyword + "_min" + str(min_class) + "_max" + str( max_class) + "out.obj" logger.info("final output obj shape: " + str(output_array.shape)) logger.info(output_array) save_obj([output_array], out_obj_folder + obj_file)
def run_channel_mask_main(data_folder, log_folder, obj_folder, shap_k=10, shap_min=2, shap_max=3, file_key="train_", fun_key="_mask_gene"): file_list = list_files(data_folder) file_count = 0 for train_file in file_list: if file_key not in train_file: continue this_keyword = train_file.replace('.txt', '') log_file = this_keyword + fun_key + "_shapNum" + str( shap_k) + "_shapMin" + str(shap_min) + "_shapMax" + str( shap_max) + "_all_class.log" out_obj_file = this_keyword + fun_key + "_shapNum" + str( shap_k) + "_shapMin" + str(shap_min) + "_shapMax" + str(shap_max) logger = setup_logger(log_folder + log_file) print "log file: " + log_folder + log_file print "obj file: " + obj_folder + out_obj_file logger.info(log_folder + log_file) out_obj_dict = {} file_count = file_count + 1 test_file = train_file.replace('train_', 'test_') train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum( data_folder + train_file, data_folder + test_file) train_row, train_col = train_x_matrix.shape test_row, test_col = test_x_matrix.shape attr_len = train_col / attr_num train_x_matrix = train_x_matrix.reshape(train_row, attr_num, attr_len) test_x_matrix = test_x_matrix.reshape(test_row, attr_num, attr_len) logger.info("train x matrix: " + str(train_x_matrix.shape)) logger.info("test x matrix: " + str(test_x_matrix.shape)) train_keep_len = matrix_keep_len_gene(train_x_matrix) test_keep_len = matrix_keep_len_gene(test_x_matrix) min_class = min(train_y_vector) max_class = max(train_y_vector) + 1 num_classes = max_class - min_class logger.info("x matrix tran after shape: " + str(train_x_matrix.shape)) for label in range(min_class, max_class): label = max_class - label - 1 label_train_y_vector = np.where(train_y_vector == label, 1, 0) label_test_y_vector = np.where(test_y_vector == label, 1, 0) label_train_y_matrix = y_vector_to_matrix(label_train_y_vector, 2) label_test_y_matrix = y_vector_to_matrix(label_test_y_vector, 2) logger.info("class: " + str(label)) test_eval_value, mask_value = run_channel_mask( train_x_matrix, label_train_y_matrix, train_keep_len, test_x_matrix, label_test_y_matrix, test_keep_len, shap_k, shap_min, shap_max, logger) logger.info("final for class " + str(label)) logger.info("final acc: " + str(test_eval_value)) logger.info("final mask: " + str(mask_value.shape)) logger.info("out obj saved to " + obj_folder + out_obj_file + "_class" + str(label) + ".obj") save_obj([mask_value], obj_folder + out_obj_file + "_class" + str(label) + ".obj")
def global_classification_main(parameter_file, file_keyword): function_keyword = "global_classification" data_keyword, data_folder, attr_num, attr_len, num_classes, start_class, class_column, class_id, obj_folder, top_k, method, log_folder, cnn_obj_folder, cnn_temp_folder, cnn_setting_file = read_feature_classification( parameter_file, function_keyword) data_stru = return_data_stru(num_classes, start_class, attr_num, attr_len, class_column) file_list = list_files(data_folder) obj_list = list_files(obj_folder) file_count = 0 class_column = 0 header = True cnn_setting = return_cnn_setting_from_file(cnn_setting_file) cnn_setting.save_obj_folder = cnn_obj_folder cnn_setting.temp_obj_folder = cnn_temp_folder cnn_setting.eval_method = 'f1' init_folder(cnn_obj_folder) init_folder(cnn_temp_folder) all_result_matrix = np.zeros((10, num_classes)) train_file_vector = [] prediction_matrix = [] f1_value_matrix = [] accuracy_vector = [] delimiter = ' ' all_accuracy = 0 all_train_time = 0 all_test_time = 0 loop_count = -1 for train_file in file_list: if file_keyword not in train_file: continue loop_count = loop_count + 1 file_key = train_file.replace('.txt', '') log_file = log_folder + data_keyword + '_' + file_key + '_' + function_keyword + '_class' + str( class_id) + '_top' + str(top_k) + '_' + method + '.log' print "log file: " + log_file logger = setup_logger(log_file, 'logger_' + str(loop_count)) logger.info('\nlog file: ' + log_file) logger.info(train_file) logger.info('cnn setting:\n ' + cnn_setting.to_string()) logger.info('method: ' + method) logger.info('============') continue found_obj_file = '' for obj_file in obj_list: if file_key in obj_file: found_obj_file = obj_file break if found_obj_file == '': raise Exception('No obj file found') print found_obj_file print cnn_setting.save_obj_folder + file_key + "_" + method + "_projected_result.ckpt" # found_obj_file = obj_folder + found_obj_file feature_dict = load_obj(found_obj_file)[0] feature_dict = np.array(feature_dict) logger.info("feature array shape: " + str(feature_dict.shape)) test_file = train_file.replace('train', 'test') train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, attr_num = train_test_file_reading_with_attrnum( data_folder + train_file, data_folder + test_file, class_column, delimiter, header) if file_count == 0: logger.info('train matrix shape: ' + str(train_x_matrix.shape)) logger.info('train label shape: ' + str(train_y_vector.shape)) logger.info('test matrix shape: ' + str(test_x_matrix.shape)) logger.info('test label shape: ' + str(test_y_vector.shape)) train_x_matrix = train_test_transpose(train_x_matrix, attr_num, attr_len, False) test_x_matrix = train_test_transpose(test_x_matrix, attr_num, attr_len, False) data_stru.attr_num = top_k fold_accuracy, fold_avg_eval, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix = run_feature_projected_cnn( train_x_matrix, train_y_vector, test_x_matrix, test_y_vector, data_stru, cnn_setting, feature_dict, top_k, file_key + '_count' + str(file_count), class_id, logger) prediction_matrix.append(fold_predict_y) logger.info("Fold F1: " + str(fold_f1_value_list)) accuracy_vector.append(fold_accuracy) all_accuracy = all_accuracy + fold_accuracy all_train_time = all_train_time + fold_train_time all_test_time = all_test_time + fold_test_time logger.info(method + ' fold accuracy: ' + str(fold_accuracy)) logger.info(method + ' fold training time (sec):' + str(fold_train_time)) logger.info(method + ' fold testing time (sec):' + str(fold_test_time)) save_obj([ fold_accuracy, fold_avg_eval, fold_predict_y, fold_train_time, fold_test_time, fold_predict_matrix ], save_obj_folder + file_key + "_" + method + "_global_cnn_result.ckpt")