def build_model(self,train_x, train_y, test_x, test_y): util = Utility() eval_metrics = EvaluationMetrics() br_predictions_label = pd.DataFrame() br_predictions_score = pd.DataFrame() br_classifiers = [] test_y_br = test_y.drop(['E','P'], axis = 1) for i_col in test_y_br.columns: print(i_col) # i_col = '8' grid_search_dict = {} grid_search_dict['estimator'] = MultinomialNB() grid_search_dict['cvalidator'] = 3 grid_search_dict['params'] = [{'alpha':[0.7,1.0]}] grid_search_dict['loss_fun'] = 'neg_log_loss' t1 = time() classifier = util.build_model(train_x, train_y[i_col], grid_search_dict) print('Classifier {}, completed in {} '.format(i_col, time() - t1)) pred_labels = classifier.predict( test_x) pred_score = util.predict_score(classifier, test_x) eval_labels = eval_metrics.get_classification_report_1(test_y[i_col] , pred_labels , verbose = 1) eval_score = eval_metrics.get_classification_report_3(test_y[i_col] , pred_labels , verbose = 1) br_predictions_label[i_col] = pred_labels br_predictions_score[i_col] = pred_score[:,1] br_classifiers.append(classifier) return br_predictions_label, br_predictions_score, br_classifiers
def add_log(tot, epoch, batch_idx, loss_batch, c_TP_FN_FP, total_seen, t_batch_ls, SimpleFlag=0): ave_whole_acc, class_acc_str, ave_acc_str = EvaluationMetrics.get_class_accuracy( c_TP_FN_FP, total_seen) log_str = '' if len(t_batch_ls) > 0: t_per_batch = np.mean(np.concatenate(t_batch_ls, axis=1), axis=1) t_per_block = t_per_batch / BATCH_SIZE t_per_block_str = np.array2string( t_per_block, formatter={'float_kind': lambda x: "%0.3f" % x}) else: t_per_block_str = "no-t" log_str += '%s [%d - %d] \t t_block(get_data,run):%s\tloss: %0.3f \tacc: %0.3f' % \ ( tot,epoch,batch_idx,t_per_block_str,loss_batch,ave_whole_acc ) if SimpleFlag > 0: log_str += ave_acc_str if SimpleFlag > 1: log_str += class_acc_str log_string(log_str) return log_str
def eval_one_epoch(sess, ops, test_writer, epoch): """ ops: dict mapping from string to tf ops """ is_training = False total_seen = 0.00001 loss_sum = 0.0 c_TP_FN_FP = np.zeros(shape=(3, NUM_CLASSES)) log_string('----') num_blocks = DATASET.num_blocks['test'] if num_blocks != None: num_batches = num_blocks // BATCH_SIZE if num_batches == 0: print('\ntest num_blocks=%d BATCH_SIZE=%d num_batches=%d' % (num_blocks, BATCH_SIZE, num_batches)) return '' else: num_batches = None t_batch_ls = [] batch_idx = -1 while (batch_idx < num_batches) or (num_batches == None): t0 = time.time() batch_idx += 1 start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE cur_data, cur_label, cur_smp_weights = DATASET.test_dlw( start_idx, end_idx) t1 = time.time() if type(cur_data) == type(None): print('batch_idx:%d, get None, reading finished' % (batch_idx)) break # all data reading finished feed_dict = { ops['pointclouds_pl']: cur_data, ops['labels_pl']: cur_label, ops['is_training_pl']: is_training, ops['smpws_pl']: cur_smp_weights } summary, step, loss_val, pred_val = sess.run( [ops['merged'], ops['step'], ops['loss'], ops['pred']], feed_dict=feed_dict) if test_writer != None: test_writer.add_summary(summary, step) pred_logits = np.argmax(pred_val, 2) total_seen += (BATCH_SIZE * NUM_POINT) loss_sum += loss_val c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(NUM_CLASSES, pred_logits, cur_label) t_batch_ls.append( np.reshape(np.array([time.time() - t1, t1 - t0]), (2, 1))) if FLAGS.only_evaluate: #DATASET.write_pred(pred_val) if batch_idx % 10 == 0: add_log('eval', epoch, batch_idx, loss_sum / (batch_idx + 1), c_TP_FN_FP, total_seen, t_batch_ls) return add_log('eval', epoch, batch_idx, loss_sum / (batch_idx + 1), c_TP_FN_FP, total_seen, t_batch_ls)
def train_one_epoch(sess, ops, train_writer, epoch): """ ops: dict mapping from string to tf ops """ is_training = True #log_string('----') num_blocks = DATASET.num_blocks['train'] if num_blocks != None: num_batches = num_blocks // BATCH_SIZE if num_batches == 0: return '' else: num_batches = None total_seen = 0.0001 loss_sum = 0.0 c_TP_FN_FP = np.zeros(shape=(3, NUM_CLASSES)) print('total batch num = ', num_batches) batch_idx = -1 DATASET.shuffle_idx() t_batch_ls = [] while (batch_idx < num_batches) or (num_batches == None): t0 = time.time() batch_idx += 1 start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE cur_data, cur_label, cur_smp_weights = DATASET.train_dlw( start_idx, end_idx) t1 = time.time() if type(cur_data) == type(None): break # all data reading finished feed_dict = { ops['pointclouds_pl']: cur_data, ops['labels_pl']: cur_label, ops['is_training_pl']: is_training, ops['smpws_pl']: cur_smp_weights } summary, step, _, loss_val, pred_val = sess.run([ ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred'] ], feed_dict=feed_dict) train_writer.add_summary(summary, step) pred_val = np.argmax(pred_val, 2) total_seen += (BATCH_SIZE * NUM_POINT) loss_sum += loss_val c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(NUM_CLASSES, pred_val, cur_label) t_batch_ls.append( np.reshape(np.array([time.time() - t1, t1 - t0]), (2, 1))) if (epoch == 0 and batch_idx % 10 == 0) or batch_idx % 100 == 0: add_log('train', epoch, batch_idx, loss_sum / (batch_idx + 1), c_TP_FN_FP, total_seen, t_batch_ls) if batch_idx == 100: os.system('nvidia-smi') return add_log('train', epoch, batch_idx, loss_sum / (batch_idx + 1), c_TP_FN_FP, total_seen, t_batch_ls)
def eval_one_epoch(sess, ops, test_writer, epoch,eval_feed_buf_q): """ ops: dict mapping from string to tf ops """ is_training = False total_seen = 0.00001 loss_sum = 0.0 c_TP_FN_FP = np.zeros(shape=(3,NUM_CLASSES)) log_string('----') num_blocks = net_provider.eval_num_blocks if num_blocks != None: num_batches = num_blocks // BATCH_SIZE num_batches = limit_eval_num_batches(epoch,num_batches) if num_batches == 0: print('\ntest num_blocks=%d BATCH_SIZE=%d num_batches=%d'%(num_blocks,BATCH_SIZE,num_batches)) return '' else: num_batches = None eval_logstr = '' t_batch_ls = [] batch_idx = -1 while (batch_idx < num_batches-1) or (num_batches==None): t0 = time.time() batch_idx += 1 start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE if eval_feed_buf_q == None: cur_data,cur_label,cur_smp_weights = net_provider.get_eval_batch(start_idx,end_idx) else: if eval_feed_buf_q.qsize() == 0: print('eval_feed_buf_q.qsize == 0') break cur_data,cur_label,cur_smp_weights, batch_idx_buf,epoch_buf = eval_feed_buf_q.get() #assert batch_idx == batch_idx_buf and epoch== epoch_buf t1 = time.time() if type(cur_data) == type(None): print('batch_idx:%d, get None, reading finished'%(batch_idx)) break # all data reading finished feed_dict = {ops['pointclouds_pl']: cur_data, ops['labels_pl']: cur_label[:,:,CATEGORY_LABEL_IDX], ops['is_training_pl']: is_training, ops['smpws_pl']: cur_smp_weights[:,:,CATEGORY_LABEL_IDX]} summary, step, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['loss'], ops['pred']], feed_dict=feed_dict) if ISSUMMARY and test_writer != None: test_writer.add_summary(summary, step) t_batch_ls.append( np.reshape(np.array([t1-t0,time.time() - t1]),(2,1)) ) if batch_idx == num_batches-1 or (FLAGS.only_evaluate and batch_idx%30==0): pred_logits = np.argmax(pred_val, 2) total_seen += (BATCH_SIZE*NUM_POINT) loss_sum += loss_val c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(NUM_CLASSES,pred_logits,cur_label) #net_provider.set_pred_label_batch(pred_val,start_idx,end_idx) eval_logstr = add_log('eval',epoch,batch_idx,loss_sum/(batch_idx+1),c_TP_FN_FP,total_seen,t_batch_ls) #if FLAGS.only_evaluate: # obj_dump_dir = os.path.join(FLAGS.log_dir,'obj_dump') # net_provider.gen_gt_pred_objs(FLAGS.visu,obj_dump_dir) # net_provider.write_file_accuracies(FLAGS.log_dir) # print('\nobj out path:'+obj_dump_dir) return eval_logstr
def train_one_epoch(sess, ops, train_writer,epoch,train_feed_buf_q,pctx,opts): """ ops: dict mapping from string to tf ops """ is_training = True #log_string('----') num_blocks = net_provider.train_num_blocks if num_blocks!=None: num_batches = num_blocks // BATCH_SIZE if num_batches ==0: return '' else: num_batches = None total_seen = 0.0001 loss_sum = 0.0 c_TP_FN_FP = np.zeros(shape=(3,NUM_CLASSES)) print('total batch num = ',num_batches) batch_idx = -1 t_batch_ls=[] train_logstr = '' while (batch_idx < num_batches-1) or (num_batches==None): t0 = time.time() batch_idx += 1 start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE if train_feed_buf_q == None: cur_data,cur_label,cur_smp_weights = net_provider.get_train_batch(start_idx,end_idx) else: if train_feed_buf_q.qsize() == 0: print('train_feed_buf_q.qsize == 0') break cur_data,cur_label,cur_smp_weights, batch_idx_buf,epoch_buf = train_feed_buf_q.get() #assert batch_idx == batch_idx_buf and epoch== epoch_buf t1 = time.time() if type(cur_data) == type(None): break # all data reading finished label_category = cur_label[:,:,CATEGORY_LABEL_IDX] feed_dict = {ops['pointclouds_pl']: cur_data, ops['labels_pl']: label_category, ops['is_training_pl']: is_training, ops['smpws_pl']: cur_smp_weights[:,:,CATEGORY_LABEL_IDX]} if ISDEBUG and epoch == 0 and batch_idx ==5: pctx.trace_next_step() pctx.dump_next_step() summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict) pctx.profiler.profile_operations(options=opts) else: summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']], feed_dict=feed_dict) t_batch_ls.append( np.reshape(np.array([t1-t0,time.time() - t1]),(2,1)) ) if ISSUMMARY: train_writer.add_summary(summary, step) if batch_idx == num_batches-1 or (epoch == 0 and batch_idx % 20 ==0) or batch_idx%200==0: pred_val = np.argmax(pred_val, 2) loss_sum += loss_val total_seen += (BATCH_SIZE*NUM_POINT) c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(NUM_CLASSES,pred_val,cur_label) train_logstr = add_log('train',epoch,batch_idx,loss_sum/(batch_idx+1),c_TP_FN_FP,total_seen,t_batch_ls) if batch_idx == 100: os.system('nvidia-smi') return train_logstr
def train_one_epoch(sess, ops, train_writer,epoch,train_feed_buf_q,pctx,opts): """ ops: dict mapping from string to tf ops """ is_training = True #log_string('----') num_blocks = data_provider.num_train_data if num_blocks!=None: num_batches = num_blocks // BATCH_SIZE if num_batches ==0: return '' else: num_batches = None total_seen = 0.0001 loss_sum = 0.0 c_TP_FN_FP = np.zeros(shape=(3,NUM_CLASSES)) print('total batch num = ',num_batches) batch_idx = -1 t_batch_ls=[] train_logstr = '' while (batch_idx < num_batches-1) or (num_batches==None): t0 = time.time() batch_idx += 1 #start_idx = batch_idx * BATCH_SIZE #end_idx = (batch_idx+1) * BATCH_SIZE poinr_cloud_data = [] label_data = [] if train_feed_buf_q == None: point_cloud_data, label_data = data_provider._get_next_minibatch() #cur_data,cur_label,cur_smp_weights = net_provider.get_train_batch(start_idx,end_idx) else: if train_feed_buf_q.qsize() == 0: print('train_feed_buf_q.qsize == 0') break #cur_data,cur_label,cur_smp_weights, batch_idx_buf,epoch_buf = train_feed_buf_q.get() point_cloud_data, label_data = train_feed_buf_q.get() cur_smp_weights = np.ones((point_cloud_data.shape[0], point_cloud_data.shape[1])) t1 = time.time() if type(point_cloud_data) == type(None): break # all data reading finished feed_dict = {ops['pointclouds_pl']: point_cloud_data, ops['labels_pl']: label_data, ops['is_training_pl']: is_training, ops['smpws_pl']: cur_smp_weights} if ISDEBUG and epoch == 0 and batch_idx ==5: pctx.trace_next_step() pctx.dump_next_step() summary, step, _, loss_val, pred_class_val, classification_loss_val, regression_loss_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred_class'], ops['classification_loss'], ops['regression_loss']], feed_dict=feed_dict) pctx.profiler.profile_operations(options=opts) else: summary, step, _, loss_val, pred_class_val, classification_loss_val, regression_loss_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred_class'], ops['classification_loss'], ops['regression_loss']], feed_dict=feed_dict) t_batch_ls.append( np.reshape(np.array([t1-t0,time.time() - t1]),(2,1)) ) if ISSUMMARY: train_writer.add_summary(summary, step) if batch_idx%80 == 0: print('the training batch is {}, the loss value is {}'.format(batch_idx, loss_val)) print('the classificaiton loss is {}, the regression loss is {}'.format(classification_loss_val, regression_loss_val)) #print('the all merged is {}'.format(summary)) if False and ( batch_idx == num_batches-1 or (epoch == 0 and batch_idx % 20 ==0) or batch_idx%200==0) : ## not evaluation in one epoch pred_class_val = np.argmax(pred_class_val, 2) loss_sum += loss_val total_seen += (BATCH_SIZE*NUM_POINT) c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(NUM_CLASSES,pred_class_val,cur_label) train_logstr = add_log('train',epoch,batch_idx,loss_sum/(batch_idx+1),c_TP_FN_FP,total_seen,t_batch_ls) if batch_idx == 200: os.system('nvidia-smi') return train_logstr
def main(): input_directory = sys.argv[1] train_size = int(sys.argv[2]) test_size = (100 - train_size) / 100 ##### Step 1: Data Loading and Basic stats ##### t0 = time() print() print('** STEP 1: Data Loading **') dl_obj = DataLoading() base_df = dl_obj.clean_data(input_directory) #prodid_ix = base_df.id.values #base_df = base_df.reindex(prodid_ix) ## This line should be removed ## #print('Only 1000 rows are loaded') #base_df = base_df.sample(10000, random_state = 123) target_matrix = dl_obj.get_multilabel(base_df) #target_matrix = target_matrix.reindex(prodid_ix) dl_obj.get_label_info(target_matrix) #### Step 2: feature Engineering ##### print() print('** STEP 2: Text Processing **') tp_obj = TextProcessing() cnt_vectorizer, feature_matrix = tp_obj.run_textprocessing(base_df) feature_matrix = pd.DataFrame(feature_matrix.toarray()) feature_matrix = feature_matrix.join( base_df[['vegetarian', 'spicy', 'garlic', 'fish']]) feature_matrix.fillna(0, inplace=True) #### Step 3: ### STEP 1: Normalize the labels ### print() print('** Filter Rare Labels combination **') util = Utility() print("Feature Matrix Shape:{} Target Matrix.shape: {}"\ .format(feature_matrix.shape, target_matrix.shape)) feature_matrix_fltrd, target_matrix_fltrd = util.filter_rare_classes( feature_matrix, target_matrix) print("Feature Matrix Shape:{} Target Matrix.shape: {}"\ .format(feature_matrix_fltrd.shape, target_matrix_fltrd.shape))# (18340,3763) ### STEP 2: Train Test Split using StratifiedShuffleSplit ##### print() print('** Train test split **') train_x, train_y, test_x, test_y = util.train_test_split( feature_matrix_fltrd, target_matrix_fltrd, test_size=test_size) print("Train_x Shape:{} \n Train_y.shape: {}"\ .format(train_x.shape, train_y.shape)) # 14672 print("Test_x Shape:{} \n Test_y.shape: {}"\ .format(test_x.shape, test_y.shape)) # 3668 ### Delete unnecssary files from memory ## ### STEP 3: Find Frequnet Itemsets on training target matrix #### print() print('** STEP 3: Frequent Itemset **') col_mapping = {} for i_col, col_name in enumerate(target_matrix.columns.tolist()): col_mapping[i_col] = col_name supp = 0.05 item_size = 3 train_y_lil = lil_matrix(train_y) frequent_items_list = util.find_frequent_itemsets(train_y_lil, col_mapping, supp, item_size) print('No of {} frequent itemsets with support {}: {} '\ .format(item_size , supp , len(frequent_items_list))) #21 itemsets freq_additives_list = [ items for itemset in frequent_items_list for items in itemset ] freq_additives_set = list( set([items for itemset in frequent_items_list for items in itemset])) freq_additives_cnt_dict = dict(Counter(freq_additives_list).items()) #del base_df,target_matrix,target_matrix_fltrd, feature_matrix, feature_matrix_fltrd #gc.collect() ### STEP 4.1: Build 21 classifiers using Naive Bayes #### print() print('** STEP 4: LabelPowerSet Classifiers**') lp = LabelPowerSet(train_x, train_y, test_x, test_y, frequent_items_list, 'nb') model_list, metrics_labels, metrics_score, prediction_list = lp.build_model_lp( ) index_value = [''.join(items) for items in frequent_items_list] metrics_labels_df = pd.DataFrame( metrics_labels, columns=['Accuracy', 'HammingLoss', 'JaccardSim'], index=index_value) metrics_score_df = pd.DataFrame( metrics_score, columns=['CoverageError', 'LblRankAvgPrec', 'LblRankLoss', 'LogLoss'], index=index_value) pickle.dump(model_list, open('LP_NB_21FSS.pkl', 'wb')) del model_list, lp metrics_labels_df.to_csv(input_directory + 'LP_NB_metrics_labels.csv') metrics_score_df.to_csv(input_directory + 'LP_NB_metrics_score.csv') ####### STEP 4.1: stack the predictions ############ final_predictions = pd.DataFrame(np.zeros( test_y[freq_additives_set].shape), columns=freq_additives_set) for i_model in range(len(prediction_list)): #i_model = 0 prediction = prediction_list[i_model] for col in prediction.columns: final_predictions[col] = final_predictions[col] + prediction[col] final_predictions_2 = final_predictions.apply( lambda x: x / freq_additives_cnt_dict[x.name]) final_predictions_2 = final_predictions_2.applymap(lambda x: 1 if x >= 0.5 else 0) print() print('** Evaluation metrics : Majority Voting**') eval_metrics = EvaluationMetrics() eval_final = eval_metrics.get_classification_report_1( test_y[freq_additives_set], final_predictions_2, verbose=1) #### STEP 5: Build Binary Relevance models #### print() print('** STEP 5 : Binary Relevance Classifiers **') br = BinaryRelevance() label_df, score_df, classifier_list = br.build_model( train_x, train_y, test_x, test_y) pickle.dump(classifier_list, open('BR_NB_classifiersList.pickle', 'wb')) print() print('** Evaluation Metrics for BR Classfiers **') eval_metrics.get_classification_report_1(test_y[label_df.columns], label_df) # Accurcay : 0.42 Hamming Loss: 0.05, Jaccard Similarity :0.62 eval_metrics.get_classification_report_2(test_y[label_df.columns], score_df) # CoverageError : 5.61, LabelRankingAvgPrec :0.83, LabelRankingLoss : 0.04, Log_loss = 6.7 ######## Binary Relevance predictions for frequent labels ##### print() print('** BR classifiers evaluation for labels in frequentitemset **') eval_metrics.get_classification_report_1(test_y[freq_additives_set], label_df[freq_additives_set]) ### STEP 6: Final Predictions ######### print() print('** STEP 6 : Final Predictions **') final_predictions_3 = pd.DataFrame(np.zeros(label_df.shape), columns=label_df.columns) ### Binary Relevance + LabelPowerset ##### for col in final_predictions_3.columns: if col in freq_additives_set: final_predictions_3[col] = final_predictions_2[col] else: final_predictions_3[col] = label_df[col] print() print('** Evaluation Metrics for Final Predcition **') print('test_ shape', test_y[label_df.columns].shape) print('final predictions', final_predictions_3.shape) eval_final_2 = eval_metrics.get_classification_report_1( test_y[label_df.columns], final_predictions_3, verbose=1) ### STEP 7: Dumping Predictions ########## print() print('** STEP 7 : Saving Predictions **') test_y.to_csv('test_actual_labels.csv') final_predictions_3.to_csv('test_final_predicted_labels.csv') score_df.to_csv('test_scoring_from_br.csv') print('Entire Process completed in {} seconds'.format(time() - t0))
csv_path = "/Users/aniruddha/Downloads/DL_info.csv" Image_slices_dir = "/Users/aniruddha/Downloads/Test" df = pd.read_csv(csv_path) # The DL_info.csv file path df.sort_values("File_name", inplace=True) df.drop_duplicates(subset="File_name", keep=False, inplace=True) new_df = df[df['Train_Val_Test'] == 3] model = PanNet() model.load_state_dict(torch.load('/Users/aniruddha/Downloads/panet_model_4.dms')) eval1 = EvaluationMetrics() map_scores_list = [] eval1 = EvaluationMetrics() for i in range(1, 9): batch_no = 0 train_dataset = ImageDataset( root_dir="/Users/aniruddha/Downloads/Test", dataset_type=3) # Set the new_df yourself print("Original length is", len(new_df.index)) loop_df = new_df[new_df['Coarse_lesion_type'] == i] train_dataset.df = loop_df batch_size = 3 dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size)
def build_model_lp(self): util = Utility() eval_met = EvaluationMetrics() model_list = [] metrics_labels = [] metrics_score = [] prediction_list = [] t0 = time() for items in self.frequent_items_list: #items = 0 label_subset_train = self.train_y[items] multiclass_labels = util.transform(label_subset_train) sss_cv = StratifiedShuffleSplit( multiclass_labels, n_iter=3 #3 , test_size=0.3 #0.3 , random_state=123) grid_search_dict = {} if (self.classifier == 'nb'): grid_search_dict['estimator'] = MultinomialNB() grid_search_dict['cvalidator'] = sss_cv grid_search_dict['params'] = [{'alpha': [0.7, 1.0]}] grid_search_dict['loss_fun'] = 'neg_log_loss' elif (self.classifier == 'svc'): grid_search_dict['estimator'] = SVC(probability=True, kernel='rbf', gamma=0.001) grid_search_dict['cvalidator'] = sss_cv grid_search_dict['params'] = [{'C': [100, 1000]}] grid_search_dict['loss_fun'] = 'neg_log_loss' t1 = time() print('Classifier {}, Started {} seconds '.format( items, time() - t1)) classifier = util.build_model(self.train_x, multiclass_labels, grid_search_dict) print('Classifier {}, completed in {} seconds '.format( items, time() - t1)) pred_labels = util.predict_label(classifier, self.test_x) pred_labels.columns = label_subset_train.columns.tolist() pred_score = util.predict_score(classifier, self.test_x) label_subset_test = self.test_y[items] eval_labels = eval_met.get_classification_report_1( label_subset_test, pred_labels, verbose=1) transformed_test_labels = util.transform(label_subset_test) dummy_trans_test_labels = pd.get_dummies(transformed_test_labels) eval_score = eval_met.get_classification_report_2( dummy_trans_test_labels, pred_score, verbose=1) model_list.append(classifier) metrics_labels.append(eval_labels) metrics_score.append(eval_score) prediction_list.append(pred_labels) print('Label Powerset Method Completed in {}'.format(time() - t0)) return model_list, metrics_labels, metrics_score, prediction_list
def eval_one_epoch(sess, ops, test_writer, epoch, eval_feed_buf_q): """ ops: dict mapping from string to tf ops """ is_training = False total_seen = 0.00001 loss_sum = 0.0 c_TP_FN_FP = np.zeros(shape=(3, NUM_CLASSES)) log_string('----') num_blocks = data_provider.evaluation_num # evaluation some of data if num_blocks != None: num_batches = num_blocks // BATCH_SIZE num_batches = limit_eval_num_batches(epoch, num_batches) if num_batches == 0: print('\ntest num_blocks=%d BATCH_SIZE=%d num_batches=%d' % (num_blocks, BATCH_SIZE, num_batches)) return '' else: num_batches = None eval_logstr = '' t_batch_ls = [] all_gt_box = [] all_pred_class_val = [] all_pred_box_val = [] all_xyz = [] batch_idx = -1 # label while (batch_idx < num_batches - 1) or (num_batches == None): t0 = time.time() batch_idx += 1 start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx + 1) * BATCH_SIZE if eval_feed_buf_q == None: point_cloud_data, label_data, gt_box = data_provider._get_evaluation_minibatch( start_idx, end_idx ) #cur_data,cur_label,cur_smp_weights = net_provider.get_eval_batch(start_idx,end_idx) else: if eval_feed_buf_q.qsize() == 0: print('eval_feed_buf_q.qsize == 0') break point_cloud_data, label_data, epoch_buf = eval_feed_buf_q.get() #assert batch_idx == batch_idx_buf and epoch== epoch_buf cur_smp_weights = np.ones( (point_cloud_data.shape[0], point_cloud_data.shape[1])) t1 = time.time() if type(point_cloud_data) == type(None): print('batch_idx:%d, get None, reading finished' % (batch_idx)) break # all data reading finished feed_dict = { ops['pointclouds_pl']: point_cloud_data, ops['labels_pl']: label_data, ops['is_training_pl']: is_training, ops['smpws_pl']: cur_smp_weights } summary, step, loss_val, pred_class_val, pred_prob_val, pred_box_val, xyz_pl, classification_loss_val, regression_loss_val, loss_details_val = sess.run( [ ops['merged'], ops['step'], ops['loss'], ops['pred_class'], ops['pred_prob'], ops['pred_box'], ops['xyz_pl'], ops['classification_loss'], ops['regression_loss'], ops['loss_details'] ], feed_dict=feed_dict) if ISSUMMARY and test_writer != None: test_writer.add_summary(summary, step) t_batch_ls.append( np.reshape(np.array([t1 - t0, time.time() - t1]), (2, 1))) all_gt_box.append( gt_box ) # all_gt_box is a list, num_batches x BATCH_SIZE x ( k*8 ), all_gt_box[n][m] is the ground truth box of one label image. all_pred_class_val.append( pred_prob_val ) # the all_pred_class_val is the list, num_batches x BATCH_SIZE x point_num x 4, all_pred_val[n] is the narray of BATCH_SIZE x point_num all_pred_box_val.append( pred_box_val ) # the all_pred_box_val is also list, num_batches x BATCH_SIZE x point_num x 14, all_pred_box_val[n] is the narray of BATCH_SIZE x point_num all_xyz.append( xyz_pl ) # the all_xyz shape: num_batches x (BATCHSIZE X point_num x3) if False and (batch_idx == num_batches - 1 or (FLAGS.only_evaluate and batch_idx % 30 == 0)): pred_logits = np.argmax(pred_prob_val, 2) total_seen += (BATCH_SIZE * NUM_POINT) loss_sum += loss_val c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP( NUM_CLASSES, pred_logits, cur_label) #net_provider.set_pred_label_batch(pred_prob_val,start_idx,end_idx) eval_logstr = add_log('eval', epoch, batch_idx, loss_sum / (batch_idx + 1), c_TP_FN_FP, total_seen, t_batch_ls) if batch_idx % 40 == 0: print('the test batch is {}, the loss value is {}'.format( batch_idx, loss_val)) print('the classificaiton loss is {}, the regression loss is {}'. format(classification_loss_val, regression_loss_val)) print('the details of loss value, dx:{},dy:{},dz:{},dl:{},dw:{},dh:{},dtheta:{}'.format(\ loss_details_val[0], loss_details_val[1], loss_details_val[2], loss_details_val[3], loss_details_val[4], loss_details_val[5], loss_details_val[6])) ## estimate the all detection results # format of all_pred_boxes: l, w, h, theta, x, y, z, score # format of gt_boxes: type, l, w, h, theta, x, y, z # put assemble all_pred_class_val and all_pred_box_val together accroding to # the format of all_pred_boxes # using 0.05 to select the all prediction, getting all_3D_box # using nms_3d to filter out the all bounding box print('---------------------------') print('Start evaluation!!') all_pred_boxes, all_gt_boxes = boxes_assemble_filter( all_pred_class_val, all_pred_box_val, all_xyz, all_gt_box, 0.05) # caculate the average precision with the detection results aveg_precision = evaluation_3d(all_pred_boxes, all_gt_boxes, cfg.TEST.RPN_NMS_THRESH) # delete the all_gt_box, all_pred_class_val and all_pred_box_val to save # memory print('The average precision is {}'.format(aveg_precision)) #if FLAGS.only_evaluate: # obj_dump_dir = os.path.join(FLAGS.log_dir,'obj_dump') # net_provider.gen_gt_pred_objs(FLAGS.visu,obj_dump_dir) # net_provider.write_file_accuracies(FLAGS.log_dir) # print('\nobj out path:'+obj_dump_dir) return aveg_precision