def build_model(self,train_x, train_y, test_x, test_y):
     util  = Utility()
     eval_metrics = EvaluationMetrics()
     br_predictions_label = pd.DataFrame()
     br_predictions_score = pd.DataFrame()
     br_classifiers = []
     test_y_br = test_y.drop(['E','P'], axis = 1)
     for i_col in test_y_br.columns:
         print(i_col)
         # i_col = '8'
         grid_search_dict = {}
         grid_search_dict['estimator'] = MultinomialNB()
         grid_search_dict['cvalidator'] = 3
         grid_search_dict['params'] = [{'alpha':[0.7,1.0]}]  
         grid_search_dict['loss_fun'] = 'neg_log_loss'
     
         t1 = time()
         classifier = util.build_model(train_x, train_y[i_col], grid_search_dict)
         print('Classifier {}, completed in {} '.format(i_col, time() - t1))
         
         pred_labels =  classifier.predict( test_x)
         pred_score = util.predict_score(classifier, test_x)
         
         eval_labels = eval_metrics.get_classification_report_1(test_y[i_col]
                                                 , pred_labels
                                                 , verbose = 1)    
         eval_score = eval_metrics.get_classification_report_3(test_y[i_col]
                                                 , pred_labels
                                                 , verbose = 1)
         br_predictions_label[i_col] =  pred_labels
         br_predictions_score[i_col] = pred_score[:,1]  
         br_classifiers.append(classifier) 
     return br_predictions_label, br_predictions_score, br_classifiers
def add_log(tot,
            epoch,
            batch_idx,
            loss_batch,
            c_TP_FN_FP,
            total_seen,
            t_batch_ls,
            SimpleFlag=0):
    ave_whole_acc, class_acc_str, ave_acc_str = EvaluationMetrics.get_class_accuracy(
        c_TP_FN_FP, total_seen)
    log_str = ''
    if len(t_batch_ls) > 0:
        t_per_batch = np.mean(np.concatenate(t_batch_ls, axis=1), axis=1)
        t_per_block = t_per_batch / BATCH_SIZE
        t_per_block_str = np.array2string(
            t_per_block, formatter={'float_kind': lambda x: "%0.3f" % x})
    else:
        t_per_block_str = "no-t"
    log_str += '%s [%d - %d] \t t_block(get_data,run):%s\tloss: %0.3f \tacc: %0.3f' % \
            ( tot,epoch,batch_idx,t_per_block_str,loss_batch,ave_whole_acc )
    if SimpleFlag > 0:
        log_str += ave_acc_str
    if SimpleFlag > 1:
        log_str += class_acc_str
    log_string(log_str)
    return log_str
def eval_one_epoch(sess, ops, test_writer, epoch):
    """ ops: dict mapping from string to tf ops """
    is_training = False
    total_seen = 0.00001
    loss_sum = 0.0
    c_TP_FN_FP = np.zeros(shape=(3, NUM_CLASSES))

    log_string('----')

    num_blocks = DATASET.num_blocks['test']
    if num_blocks != None:
        num_batches = num_blocks // BATCH_SIZE
        if num_batches == 0:
            print('\ntest num_blocks=%d  BATCH_SIZE=%d  num_batches=%d' %
                  (num_blocks, BATCH_SIZE, num_batches))
            return ''
    else:
        num_batches = None

    t_batch_ls = []
    batch_idx = -1
    while (batch_idx < num_batches) or (num_batches == None):
        t0 = time.time()
        batch_idx += 1
        start_idx = batch_idx * BATCH_SIZE
        end_idx = (batch_idx + 1) * BATCH_SIZE

        cur_data, cur_label, cur_smp_weights = DATASET.test_dlw(
            start_idx, end_idx)
        t1 = time.time()
        if type(cur_data) == type(None):
            print('batch_idx:%d, get None, reading finished' % (batch_idx))
            break  # all data reading finished
        feed_dict = {
            ops['pointclouds_pl']: cur_data,
            ops['labels_pl']: cur_label,
            ops['is_training_pl']: is_training,
            ops['smpws_pl']: cur_smp_weights
        }
        summary, step, loss_val, pred_val = sess.run(
            [ops['merged'], ops['step'], ops['loss'], ops['pred']],
            feed_dict=feed_dict)
        if test_writer != None:
            test_writer.add_summary(summary, step)
        pred_logits = np.argmax(pred_val, 2)
        total_seen += (BATCH_SIZE * NUM_POINT)
        loss_sum += loss_val
        c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(NUM_CLASSES, pred_logits,
                                                     cur_label)
        t_batch_ls.append(
            np.reshape(np.array([time.time() - t1, t1 - t0]), (2, 1)))
        if FLAGS.only_evaluate:
            #DATASET.write_pred(pred_val)
            if batch_idx % 10 == 0:
                add_log('eval', epoch, batch_idx, loss_sum / (batch_idx + 1),
                        c_TP_FN_FP, total_seen, t_batch_ls)

    return add_log('eval', epoch, batch_idx, loss_sum / (batch_idx + 1),
                   c_TP_FN_FP, total_seen, t_batch_ls)
def train_one_epoch(sess, ops, train_writer, epoch):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    #log_string('----')
    num_blocks = DATASET.num_blocks['train']
    if num_blocks != None:
        num_batches = num_blocks // BATCH_SIZE
        if num_batches == 0: return ''
    else:
        num_batches = None

    total_seen = 0.0001
    loss_sum = 0.0
    c_TP_FN_FP = np.zeros(shape=(3, NUM_CLASSES))

    print('total batch num = ', num_batches)
    batch_idx = -1

    DATASET.shuffle_idx()
    t_batch_ls = []
    while (batch_idx < num_batches) or (num_batches == None):
        t0 = time.time()
        batch_idx += 1
        start_idx = batch_idx * BATCH_SIZE
        end_idx = (batch_idx + 1) * BATCH_SIZE

        cur_data, cur_label, cur_smp_weights = DATASET.train_dlw(
            start_idx, end_idx)
        t1 = time.time()
        if type(cur_data) == type(None):
            break  # all data reading finished
        feed_dict = {
            ops['pointclouds_pl']: cur_data,
            ops['labels_pl']: cur_label,
            ops['is_training_pl']: is_training,
            ops['smpws_pl']: cur_smp_weights
        }
        summary, step, _, loss_val, pred_val = sess.run([
            ops['merged'], ops['step'], ops['train_op'], ops['loss'],
            ops['pred']
        ],
                                                        feed_dict=feed_dict)
        train_writer.add_summary(summary, step)
        pred_val = np.argmax(pred_val, 2)
        total_seen += (BATCH_SIZE * NUM_POINT)
        loss_sum += loss_val

        c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(NUM_CLASSES, pred_val,
                                                     cur_label)

        t_batch_ls.append(
            np.reshape(np.array([time.time() - t1, t1 - t0]), (2, 1)))
        if (epoch == 0 and batch_idx % 10 == 0) or batch_idx % 100 == 0:
            add_log('train', epoch, batch_idx, loss_sum / (batch_idx + 1),
                    c_TP_FN_FP, total_seen, t_batch_ls)
        if batch_idx == 100:
            os.system('nvidia-smi')
    return add_log('train', epoch, batch_idx, loss_sum / (batch_idx + 1),
                   c_TP_FN_FP, total_seen, t_batch_ls)
def eval_one_epoch(sess, ops, test_writer, epoch,eval_feed_buf_q):
    """ ops: dict mapping from string to tf ops """
    is_training = False
    total_seen = 0.00001
    loss_sum = 0.0
    c_TP_FN_FP = np.zeros(shape=(3,NUM_CLASSES))

    log_string('----')

    num_blocks = net_provider.eval_num_blocks
    if num_blocks != None:
        num_batches = num_blocks // BATCH_SIZE
        num_batches = limit_eval_num_batches(epoch,num_batches)
        if num_batches == 0:
            print('\ntest num_blocks=%d  BATCH_SIZE=%d  num_batches=%d'%(num_blocks,BATCH_SIZE,num_batches))
            return ''
    else:
        num_batches = None

    eval_logstr = ''
    t_batch_ls = []
    batch_idx = -1

    while (batch_idx < num_batches-1) or (num_batches==None):
        t0 = time.time()
        batch_idx += 1
        start_idx = batch_idx * BATCH_SIZE
        end_idx = (batch_idx+1) * BATCH_SIZE

        if eval_feed_buf_q == None:
            cur_data,cur_label,cur_smp_weights = net_provider.get_eval_batch(start_idx,end_idx)
        else:
            if eval_feed_buf_q.qsize() == 0:
                print('eval_feed_buf_q.qsize == 0')
                break
            cur_data,cur_label,cur_smp_weights, batch_idx_buf,epoch_buf  = eval_feed_buf_q.get()
            #assert batch_idx == batch_idx_buf and epoch== epoch_buf

        t1 = time.time()
        if type(cur_data) == type(None):
            print('batch_idx:%d, get None, reading finished'%(batch_idx))
            break # all data reading finished
        feed_dict = {ops['pointclouds_pl']: cur_data,
                     ops['labels_pl']: cur_label[:,:,CATEGORY_LABEL_IDX],
                     ops['is_training_pl']: is_training,
                     ops['smpws_pl']: cur_smp_weights[:,:,CATEGORY_LABEL_IDX]}
        summary, step, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['loss'], ops['pred']],
                                      feed_dict=feed_dict)
        if ISSUMMARY and  test_writer != None:
            test_writer.add_summary(summary, step)
        t_batch_ls.append( np.reshape(np.array([t1-t0,time.time() - t1]),(2,1)) )

        if batch_idx == num_batches-1 or (FLAGS.only_evaluate and  batch_idx%30==0):
            pred_logits = np.argmax(pred_val, 2)
            total_seen += (BATCH_SIZE*NUM_POINT)
            loss_sum += loss_val
            c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(NUM_CLASSES,pred_logits,cur_label)
            #net_provider.set_pred_label_batch(pred_val,start_idx,end_idx)
            eval_logstr = add_log('eval',epoch,batch_idx,loss_sum/(batch_idx+1),c_TP_FN_FP,total_seen,t_batch_ls)

    #if FLAGS.only_evaluate:
    #    obj_dump_dir = os.path.join(FLAGS.log_dir,'obj_dump')
    #    net_provider.gen_gt_pred_objs(FLAGS.visu,obj_dump_dir)
    #    net_provider.write_file_accuracies(FLAGS.log_dir)
    #    print('\nobj out path:'+obj_dump_dir)

    return eval_logstr
def train_one_epoch(sess, ops, train_writer,epoch,train_feed_buf_q,pctx,opts):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    #log_string('----')
    num_blocks = net_provider.train_num_blocks
    if num_blocks!=None:
        num_batches = num_blocks // BATCH_SIZE
        if num_batches ==0: return ''
    else:
        num_batches = None

    total_seen = 0.0001
    loss_sum = 0.0
    c_TP_FN_FP = np.zeros(shape=(3,NUM_CLASSES))

    print('total batch num = ',num_batches)
    batch_idx = -1

    t_batch_ls=[]
    train_logstr = ''
    while (batch_idx < num_batches-1) or (num_batches==None):
        t0 = time.time()
        batch_idx += 1
        start_idx = batch_idx * BATCH_SIZE
        end_idx = (batch_idx+1) * BATCH_SIZE

        if train_feed_buf_q == None:
            cur_data,cur_label,cur_smp_weights = net_provider.get_train_batch(start_idx,end_idx)
        else:
            if train_feed_buf_q.qsize() == 0:
                print('train_feed_buf_q.qsize == 0')
                break
            cur_data,cur_label,cur_smp_weights, batch_idx_buf,epoch_buf = train_feed_buf_q.get()
            #assert batch_idx == batch_idx_buf and epoch== epoch_buf

        t1 = time.time()
        if type(cur_data) == type(None):
            break # all data reading finished
        label_category = cur_label[:,:,CATEGORY_LABEL_IDX]
        feed_dict = {ops['pointclouds_pl']: cur_data,
                     ops['labels_pl']: label_category,
                     ops['is_training_pl']: is_training,
                     ops['smpws_pl']: cur_smp_weights[:,:,CATEGORY_LABEL_IDX]}

        if ISDEBUG  and  epoch == 0 and batch_idx ==5:
                pctx.trace_next_step()
                pctx.dump_next_step()
                summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']],
                                            feed_dict=feed_dict)
                pctx.profiler.profile_operations(options=opts)
        else:
            summary, step, _, loss_val, pred_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred']],
                                        feed_dict=feed_dict)

        t_batch_ls.append( np.reshape(np.array([t1-t0,time.time() - t1]),(2,1)) )
        if ISSUMMARY: train_writer.add_summary(summary, step)
        if batch_idx == num_batches-1 or  (epoch == 0 and batch_idx % 20 ==0) or batch_idx%200==0:
            pred_val = np.argmax(pred_val, 2)
            loss_sum += loss_val
            total_seen += (BATCH_SIZE*NUM_POINT)
            c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(NUM_CLASSES,pred_val,cur_label)

            train_logstr = add_log('train',epoch,batch_idx,loss_sum/(batch_idx+1),c_TP_FN_FP,total_seen,t_batch_ls)
        if batch_idx == 100:
            os.system('nvidia-smi')
    return train_logstr
def train_one_epoch(sess, ops, train_writer,epoch,train_feed_buf_q,pctx,opts):
    """ ops: dict mapping from string to tf ops """
    is_training = True
    #log_string('----')
    num_blocks = data_provider.num_train_data
    if num_blocks!=None:
        num_batches = num_blocks // BATCH_SIZE
        if num_batches ==0: return ''
    else:
        num_batches = None

    total_seen = 0.0001
    loss_sum = 0.0
    c_TP_FN_FP = np.zeros(shape=(3,NUM_CLASSES))

    print('total batch num = ',num_batches)
    batch_idx = -1

    t_batch_ls=[]
    train_logstr = ''
    while (batch_idx < num_batches-1) or (num_batches==None):
        t0 = time.time()
        batch_idx += 1
        #start_idx = batch_idx * BATCH_SIZE
        #end_idx = (batch_idx+1) * BATCH_SIZE
        poinr_cloud_data = []
        label_data = []
        if train_feed_buf_q == None:
            point_cloud_data, label_data = data_provider._get_next_minibatch()  #cur_data,cur_label,cur_smp_weights =  net_provider.get_train_batch(start_idx,end_idx)
        else:
            if train_feed_buf_q.qsize() == 0:
                print('train_feed_buf_q.qsize == 0')
                break
            #cur_data,cur_label,cur_smp_weights, batch_idx_buf,epoch_buf = train_feed_buf_q.get()
            point_cloud_data, label_data = train_feed_buf_q.get()
        cur_smp_weights = np.ones((point_cloud_data.shape[0], point_cloud_data.shape[1]))
        t1 = time.time()
        if type(point_cloud_data) == type(None):
            break # all data reading finished
        feed_dict = {ops['pointclouds_pl']: point_cloud_data,
                     ops['labels_pl']: label_data,
                     ops['is_training_pl']: is_training,
                     ops['smpws_pl']: cur_smp_weights}

        if ISDEBUG  and  epoch == 0 and batch_idx ==5:
                pctx.trace_next_step()
                pctx.dump_next_step()
                summary, step, _, loss_val, pred_class_val, classification_loss_val, regression_loss_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred_class'], ops['classification_loss'], ops['regression_loss']],
                                            feed_dict=feed_dict)
                pctx.profiler.profile_operations(options=opts)
        else:
            summary, step, _, loss_val, pred_class_val, classification_loss_val, regression_loss_val = sess.run([ops['merged'], ops['step'], ops['train_op'], ops['loss'], ops['pred_class'], ops['classification_loss'], ops['regression_loss']],
                                        feed_dict=feed_dict)

        t_batch_ls.append( np.reshape(np.array([t1-t0,time.time() - t1]),(2,1)) )
        if ISSUMMARY: train_writer.add_summary(summary, step)
        if batch_idx%80 == 0:
            print('the training batch is {}, the loss value is {}'.format(batch_idx, loss_val))
            print('the classificaiton loss is {}, the regression loss is {}'.format(classification_loss_val, regression_loss_val))
            #print('the all merged is {}'.format(summary))
        if False and ( batch_idx == num_batches-1 or  (epoch == 0 and batch_idx % 20 ==0) or batch_idx%200==0) : ## not evaluation in one epoch
            pred_class_val = np.argmax(pred_class_val, 2)
            loss_sum += loss_val
            total_seen += (BATCH_SIZE*NUM_POINT)
            c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(NUM_CLASSES,pred_class_val,cur_label)

            train_logstr = add_log('train',epoch,batch_idx,loss_sum/(batch_idx+1),c_TP_FN_FP,total_seen,t_batch_ls)
        if batch_idx == 200:
            os.system('nvidia-smi')
    return train_logstr
Example #8
0
def main():
    input_directory = sys.argv[1]
    train_size = int(sys.argv[2])
    test_size = (100 - train_size) / 100

    ##### Step 1: Data Loading and Basic stats #####
    t0 = time()

    print()
    print('** STEP 1: Data Loading **')
    dl_obj = DataLoading()
    base_df = dl_obj.clean_data(input_directory)
    #prodid_ix = base_df.id.values
    #base_df = base_df.reindex(prodid_ix)

    ## This line should be removed ##
    #print('Only 1000 rows are loaded')
    #base_df = base_df.sample(10000, random_state = 123)

    target_matrix = dl_obj.get_multilabel(base_df)
    #target_matrix = target_matrix.reindex(prodid_ix)

    dl_obj.get_label_info(target_matrix)

    #### Step 2: feature Engineering #####

    print()
    print('** STEP 2: Text Processing **')
    tp_obj = TextProcessing()
    cnt_vectorizer, feature_matrix = tp_obj.run_textprocessing(base_df)

    feature_matrix = pd.DataFrame(feature_matrix.toarray())
    feature_matrix = feature_matrix.join(
        base_df[['vegetarian', 'spicy', 'garlic', 'fish']])
    feature_matrix.fillna(0, inplace=True)

    #### Step 3:
    ### STEP 1: Normalize the labels ###
    print()
    print('** Filter Rare Labels combination **')
    util = Utility()
    print("Feature Matrix Shape:{} Target Matrix.shape: {}"\
            .format(feature_matrix.shape, target_matrix.shape))
    feature_matrix_fltrd, target_matrix_fltrd = util.filter_rare_classes(
        feature_matrix, target_matrix)
    print("Feature Matrix Shape:{} Target Matrix.shape: {}"\
            .format(feature_matrix_fltrd.shape, target_matrix_fltrd.shape))# (18340,3763)

    ### STEP 2: Train Test Split using StratifiedShuffleSplit #####
    print()
    print('** Train test split **')
    train_x, train_y, test_x, test_y = util.train_test_split(
        feature_matrix_fltrd, target_matrix_fltrd, test_size=test_size)
    print("Train_x Shape:{} \n Train_y.shape: {}"\
            .format(train_x.shape, train_y.shape)) # 14672
    print("Test_x Shape:{} \n Test_y.shape: {}"\
            .format(test_x.shape, test_y.shape)) # 3668

    ### Delete unnecssary files from memory ##

    ### STEP 3: Find Frequnet Itemsets on training target matrix ####
    print()
    print('** STEP 3: Frequent Itemset **')

    col_mapping = {}
    for i_col, col_name in enumerate(target_matrix.columns.tolist()):
        col_mapping[i_col] = col_name

    supp = 0.05
    item_size = 3
    train_y_lil = lil_matrix(train_y)
    frequent_items_list = util.find_frequent_itemsets(train_y_lil, col_mapping,
                                                      supp, item_size)
    print('No of {} frequent itemsets with support {}: {} '\
           .format(item_size
                  , supp
                  , len(frequent_items_list))) #21 itemsets

    freq_additives_list = [
        items for itemset in frequent_items_list for items in itemset
    ]
    freq_additives_set = list(
        set([items for itemset in frequent_items_list for items in itemset]))
    freq_additives_cnt_dict = dict(Counter(freq_additives_list).items())

    #del base_df,target_matrix,target_matrix_fltrd, feature_matrix, feature_matrix_fltrd
    #gc.collect()

    ### STEP 4.1: Build 21 classifiers using Naive Bayes ####
    print()
    print('** STEP 4: LabelPowerSet Classifiers**')
    lp = LabelPowerSet(train_x, train_y, test_x, test_y, frequent_items_list,
                       'nb')

    model_list, metrics_labels, metrics_score, prediction_list = lp.build_model_lp(
    )
    index_value = [''.join(items) for items in frequent_items_list]
    metrics_labels_df = pd.DataFrame(
        metrics_labels,
        columns=['Accuracy', 'HammingLoss', 'JaccardSim'],
        index=index_value)
    metrics_score_df = pd.DataFrame(
        metrics_score,
        columns=['CoverageError', 'LblRankAvgPrec', 'LblRankLoss', 'LogLoss'],
        index=index_value)
    pickle.dump(model_list, open('LP_NB_21FSS.pkl', 'wb'))
    del model_list, lp
    metrics_labels_df.to_csv(input_directory + 'LP_NB_metrics_labels.csv')
    metrics_score_df.to_csv(input_directory + 'LP_NB_metrics_score.csv')

    ####### STEP 4.1: stack the predictions ############
    final_predictions = pd.DataFrame(np.zeros(
        test_y[freq_additives_set].shape),
                                     columns=freq_additives_set)
    for i_model in range(len(prediction_list)):
        #i_model = 0
        prediction = prediction_list[i_model]
        for col in prediction.columns:
            final_predictions[col] = final_predictions[col] + prediction[col]

    final_predictions_2 = final_predictions.apply(
        lambda x: x / freq_additives_cnt_dict[x.name])
    final_predictions_2 = final_predictions_2.applymap(lambda x: 1
                                                       if x >= 0.5 else 0)

    print()
    print('** Evaluation metrics : Majority Voting**')
    eval_metrics = EvaluationMetrics()
    eval_final = eval_metrics.get_classification_report_1(
        test_y[freq_additives_set], final_predictions_2, verbose=1)

    #### STEP 5: Build Binary Relevance models ####

    print()
    print('** STEP 5 : Binary Relevance Classifiers **')
    br = BinaryRelevance()
    label_df, score_df, classifier_list = br.build_model(
        train_x, train_y, test_x, test_y)
    pickle.dump(classifier_list, open('BR_NB_classifiersList.pickle', 'wb'))

    print()
    print('** Evaluation Metrics for BR Classfiers **')
    eval_metrics.get_classification_report_1(test_y[label_df.columns],
                                             label_df)
    # Accurcay : 0.42 Hamming Loss: 0.05, Jaccard Similarity :0.62

    eval_metrics.get_classification_report_2(test_y[label_df.columns],
                                             score_df)
    # CoverageError : 5.61, LabelRankingAvgPrec :0.83, LabelRankingLoss : 0.04, Log_loss = 6.7

    ######## Binary Relevance predictions for frequent labels #####
    print()
    print('** BR classifiers evaluation for labels in frequentitemset **')
    eval_metrics.get_classification_report_1(test_y[freq_additives_set],
                                             label_df[freq_additives_set])

    ### STEP 6: Final Predictions #########
    print()
    print('** STEP 6 : Final Predictions **')
    final_predictions_3 = pd.DataFrame(np.zeros(label_df.shape),
                                       columns=label_df.columns)

    ### Binary Relevance + LabelPowerset #####
    for col in final_predictions_3.columns:
        if col in freq_additives_set:
            final_predictions_3[col] = final_predictions_2[col]
        else:
            final_predictions_3[col] = label_df[col]

    print()
    print('** Evaluation Metrics for Final Predcition **')
    print('test_ shape', test_y[label_df.columns].shape)
    print('final predictions', final_predictions_3.shape)
    eval_final_2 = eval_metrics.get_classification_report_1(
        test_y[label_df.columns], final_predictions_3, verbose=1)

    ### STEP 7: Dumping Predictions ##########
    print()
    print('** STEP 7 : Saving Predictions **')
    test_y.to_csv('test_actual_labels.csv')
    final_predictions_3.to_csv('test_final_predicted_labels.csv')
    score_df.to_csv('test_scoring_from_br.csv')

    print('Entire Process completed in {} seconds'.format(time() - t0))
Example #9
0
csv_path = "/Users/aniruddha/Downloads/DL_info.csv"
Image_slices_dir = "/Users/aniruddha/Downloads/Test"

df = pd.read_csv(csv_path)          # The DL_info.csv file path
df.sort_values("File_name", inplace=True)
df.drop_duplicates(subset="File_name",
                   keep=False, inplace=True)

new_df = df[df['Train_Val_Test'] == 3]


model = PanNet()
model.load_state_dict(torch.load('/Users/aniruddha/Downloads/panet_model_4.dms'))

eval1 = EvaluationMetrics()

map_scores_list = []
eval1 = EvaluationMetrics()
for i in range(1, 9):
    batch_no = 0
    train_dataset = ImageDataset(
        root_dir="/Users/aniruddha/Downloads/Test", dataset_type=3)
    # Set the new_df yourself
    print("Original length is", len(new_df.index))
    loop_df = new_df[new_df['Coarse_lesion_type'] == i]

    train_dataset.df = loop_df

    batch_size = 3
    dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size)
Example #10
0
    def build_model_lp(self):
        util = Utility()
        eval_met = EvaluationMetrics()
        model_list = []
        metrics_labels = []
        metrics_score = []
        prediction_list = []
        t0 = time()
        for items in self.frequent_items_list:
            #items = 0
            label_subset_train = self.train_y[items]
            multiclass_labels = util.transform(label_subset_train)
            sss_cv = StratifiedShuffleSplit(
                multiclass_labels,
                n_iter=3  #3
                ,
                test_size=0.3  #0.3
                ,
                random_state=123)
            grid_search_dict = {}
            if (self.classifier == 'nb'):
                grid_search_dict['estimator'] = MultinomialNB()
                grid_search_dict['cvalidator'] = sss_cv
                grid_search_dict['params'] = [{'alpha': [0.7, 1.0]}]
                grid_search_dict['loss_fun'] = 'neg_log_loss'

            elif (self.classifier == 'svc'):
                grid_search_dict['estimator'] = SVC(probability=True,
                                                    kernel='rbf',
                                                    gamma=0.001)
                grid_search_dict['cvalidator'] = sss_cv
                grid_search_dict['params'] = [{'C': [100, 1000]}]
                grid_search_dict['loss_fun'] = 'neg_log_loss'

            t1 = time()
            print('Classifier {}, Started {} seconds '.format(
                items,
                time() - t1))
            classifier = util.build_model(self.train_x, multiclass_labels,
                                          grid_search_dict)
            print('Classifier {}, completed in {} seconds '.format(
                items,
                time() - t1))

            pred_labels = util.predict_label(classifier, self.test_x)
            pred_labels.columns = label_subset_train.columns.tolist()
            pred_score = util.predict_score(classifier, self.test_x)

            label_subset_test = self.test_y[items]

            eval_labels = eval_met.get_classification_report_1(
                label_subset_test, pred_labels, verbose=1)

            transformed_test_labels = util.transform(label_subset_test)
            dummy_trans_test_labels = pd.get_dummies(transformed_test_labels)
            eval_score = eval_met.get_classification_report_2(
                dummy_trans_test_labels, pred_score, verbose=1)

            model_list.append(classifier)
            metrics_labels.append(eval_labels)
            metrics_score.append(eval_score)
            prediction_list.append(pred_labels)
        print('Label Powerset Method Completed in {}'.format(time() - t0))
        return model_list, metrics_labels, metrics_score, prediction_list
Example #11
0
def eval_one_epoch(sess, ops, test_writer, epoch, eval_feed_buf_q):
    """ ops: dict mapping from string to tf ops """
    is_training = False
    total_seen = 0.00001
    loss_sum = 0.0
    c_TP_FN_FP = np.zeros(shape=(3, NUM_CLASSES))

    log_string('----')

    num_blocks = data_provider.evaluation_num  # evaluation some of data
    if num_blocks != None:
        num_batches = num_blocks // BATCH_SIZE
        num_batches = limit_eval_num_batches(epoch, num_batches)
        if num_batches == 0:
            print('\ntest num_blocks=%d  BATCH_SIZE=%d  num_batches=%d' %
                  (num_blocks, BATCH_SIZE, num_batches))
            return ''
    else:
        num_batches = None

    eval_logstr = ''
    t_batch_ls = []
    all_gt_box = []
    all_pred_class_val = []
    all_pred_box_val = []
    all_xyz = []
    batch_idx = -1
    # label
    while (batch_idx < num_batches - 1) or (num_batches == None):
        t0 = time.time()
        batch_idx += 1
        start_idx = batch_idx * BATCH_SIZE
        end_idx = (batch_idx + 1) * BATCH_SIZE

        if eval_feed_buf_q == None:
            point_cloud_data, label_data, gt_box = data_provider._get_evaluation_minibatch(
                start_idx, end_idx
            )  #cur_data,cur_label,cur_smp_weights = net_provider.get_eval_batch(start_idx,end_idx)
        else:
            if eval_feed_buf_q.qsize() == 0:
                print('eval_feed_buf_q.qsize == 0')
                break
            point_cloud_data, label_data, epoch_buf = eval_feed_buf_q.get()
            #assert batch_idx == batch_idx_buf and epoch== epoch_buf
        cur_smp_weights = np.ones(
            (point_cloud_data.shape[0], point_cloud_data.shape[1]))
        t1 = time.time()
        if type(point_cloud_data) == type(None):
            print('batch_idx:%d, get None, reading finished' % (batch_idx))
            break  # all data reading finished
        feed_dict = {
            ops['pointclouds_pl']: point_cloud_data,
            ops['labels_pl']: label_data,
            ops['is_training_pl']: is_training,
            ops['smpws_pl']: cur_smp_weights
        }
        summary, step, loss_val, pred_class_val, pred_prob_val, pred_box_val, xyz_pl, classification_loss_val, regression_loss_val, loss_details_val = sess.run(
            [
                ops['merged'], ops['step'], ops['loss'], ops['pred_class'],
                ops['pred_prob'], ops['pred_box'], ops['xyz_pl'],
                ops['classification_loss'], ops['regression_loss'],
                ops['loss_details']
            ],
            feed_dict=feed_dict)
        if ISSUMMARY and test_writer != None:
            test_writer.add_summary(summary, step)
        t_batch_ls.append(
            np.reshape(np.array([t1 - t0, time.time() - t1]), (2, 1)))

        all_gt_box.append(
            gt_box
        )  # all_gt_box is a list, num_batches x BATCH_SIZE x ( k*8 ), all_gt_box[n][m] is the ground truth box of one label image.
        all_pred_class_val.append(
            pred_prob_val
        )  # the all_pred_class_val is the list, num_batches x BATCH_SIZE x point_num x 4, all_pred_val[n] is the narray of BATCH_SIZE x point_num
        all_pred_box_val.append(
            pred_box_val
        )  # the all_pred_box_val is also list, num_batches x BATCH_SIZE x point_num x 14, all_pred_box_val[n] is the narray of BATCH_SIZE x point_num
        all_xyz.append(
            xyz_pl
        )  # the all_xyz shape: num_batches x (BATCHSIZE X point_num x3)

        if False and (batch_idx == num_batches - 1 or
                      (FLAGS.only_evaluate and batch_idx % 30 == 0)):
            pred_logits = np.argmax(pred_prob_val, 2)
            total_seen += (BATCH_SIZE * NUM_POINT)
            loss_sum += loss_val
            c_TP_FN_FP += EvaluationMetrics.get_TP_FN_FP(
                NUM_CLASSES, pred_logits, cur_label)
            #net_provider.set_pred_label_batch(pred_prob_val,start_idx,end_idx)
            eval_logstr = add_log('eval', epoch, batch_idx,
                                  loss_sum / (batch_idx + 1), c_TP_FN_FP,
                                  total_seen, t_batch_ls)
        if batch_idx % 40 == 0:
            print('the test batch is {}, the loss value is {}'.format(
                batch_idx, loss_val))
            print('the classificaiton loss is {}, the regression loss is {}'.
                  format(classification_loss_val, regression_loss_val))
            print('the details of loss value, dx:{},dy:{},dz:{},dl:{},dw:{},dh:{},dtheta:{}'.format(\
                                          loss_details_val[0], loss_details_val[1], loss_details_val[2],  loss_details_val[3], loss_details_val[4], loss_details_val[5], loss_details_val[6]))

    ## estimate the all detection results
    # format of all_pred_boxes: l, w, h, theta, x, y, z, score
    # format of gt_boxes: type, l, w, h, theta, x, y, z
    # put assemble all_pred_class_val and all_pred_box_val together accroding to
    # the format of all_pred_boxes
    # using 0.05 to select the all prediction, getting all_3D_box
    # using nms_3d to filter out the all bounding box
    print('---------------------------')
    print('Start evaluation!!')
    all_pred_boxes, all_gt_boxes = boxes_assemble_filter(
        all_pred_class_val, all_pred_box_val, all_xyz, all_gt_box, 0.05)

    # caculate the average precision with the detection results
    aveg_precision = evaluation_3d(all_pred_boxes, all_gt_boxes,
                                   cfg.TEST.RPN_NMS_THRESH)
    # delete the all_gt_box, all_pred_class_val and all_pred_box_val to save
    # memory
    print('The average precision is {}'.format(aveg_precision))
    #if FLAGS.only_evaluate:
    #    obj_dump_dir = os.path.join(FLAGS.log_dir,'obj_dump')
    #    net_provider.gen_gt_pred_objs(FLAGS.visu,obj_dump_dir)
    #    net_provider.write_file_accuracies(FLAGS.log_dir)
    #    print('\nobj out path:'+obj_dump_dir)

    return aveg_precision