def create_model(scenario):
    n_layer = scenario['layer']
    n_column = get_total_columns(n_layer)
    column_indexes = get_feature_columns(n_layer)
    
    target_directory = get_feature_array_scenario_path(scenario['codename'])
    
    target_files = get_files(target_directory)
    target_train_files = target_files[train_start_index: train_end_index]
    target_test_files = target_files[test_start_index: test_end_index]
    
    X = np.zeros((0,n_column))
    X_test = np.zeros((0, n_column))
    
    for target_file in target_train_files:
#         print target_directory+target_file
        entry = np.load(target_directory+target_file)
        X = np.concatenate((X,entry), axis = 0 )
    
    for target_file in target_test_files:
#         print target_directory+target_file
        entry = np.load(target_directory+target_file)
        X_test = np.concatenate((X_test,entry), axis = 0 )
        
    Y = X[:, -1]
    X = X[:,column_indexes]
    
    Y_test = X_test[:, -1]
    X_test = X_test[:,column_indexes]
    
    strat_kfold = StratifiedKFold(Y, n_folds = 5)
#     print "Train data: test data = %s : %s"%(len(X), len(X_test))
    # clf1 = svm.SVC(class_weight='auto')
    # clf2 = DecisionTreeClassifier()
    clf3 = RandomForestClassifier()
    
    def benchmark(classifier, X, Y, train_indices, test_indices):
        classifier.fit(X[train_indices], Y[train_indices])
        Y_predict = classifier.predict(X[test_indices])
        return accuracy_score(Y[test_indices], Y_predict), confusion_matrix(Y[test_indices], Y_predict)
        
    highest_accuracy = 0
    highest_counter = 0
    chosen_train_index = []
    chosen_test_index = []
    counter = 0
    
    
    for train_index, test_index in strat_kfold:
        acc, conf = benchmark(clf3, X, Y, train_index, test_index)
        if acc > highest_accuracy:
            highest_accuracy = acc
            highest_counter = counter
            chosen_train_index = train_index
            chosen_test_index = test_index
        counter += 1
#         new_entry = {'acc':acc, 'fn':conf[0,1], 'fp':conf[1,0]}
#         str_new_entry = str(new_entry) + " -- train-test:(%s,%s)"%(len(train_index), len(test_index)) 
#         print str_new_entry
    
        
    print "Chosen index-accuracy: %s -- %s"%(highest_counter, highest_accuracy)
    clf3.fit(X[chosen_train_index], Y[chosen_train_index])
    
    
    Y_predict = clf3.predict(X_test)
    a_score, c_matrix = accuracy_score(Y_test, Y_predict), confusion_matrix(Y_test, Y_predict)
    final_result = {'codename': scenario['codename'],'acc':a_score, 'fn':c_matrix[0,1], 'fp':c_matrix[1,0], 'tn':c_matrix[0,0], 'tp':c_matrix[1,1]}
    str_final_result = "Test result --> " + str(final_result) + " test case:"+str(len(X_test))
    print str_final_result

    classifier_directory = classifier_path_px + scenario['codename'] + "/"
    if not exists(classifier_directory):
        makedirs(classifier_directory)
    joblib.dump(clf3, classifier_directory+'RF'+"-"+category+"-"+scenario['codename']+ classifier_extension)
    return final_result
def extract_feature(scenario):
    n_layer = scenario['layer']
    target_directory = get_feature_array_scenario_path(scenario['codename'])
    
    create_directory(target_directory)
    array_px_files = get_files(target_directory)
    
    # Jangan lakukan ekstraksi fitur ulang
    if len(array_px_files) >= 50:
        print "feature "+scenario['codename']+" is already existed. Abort mission"
        return
        
    # Ambil semua file gambar
    image_filenames = get_files(directory_path)
    counter = 0
    for image_filename in image_filenames:
#         print "Extracting %s:%s"%(counter, position_file)
        counter += 1
        a = read_image(image_filename)
        gt = read_groundtruth_image(image_filename)
        
        # konversi menjadi binary image
        gt = gt > 20
        gt = gt.astype(int)
        image_shape = a.shape
        image_row = image_shape[0]
        image_col = image_shape[1]
        image_layer = image_shape[2]
        
        im_slic = []
        im_disp = []
        im_bound = []
        features = []
        
        # Extract superpixel feature for each layer
        for i in range(n_layer):
            im_slic.append(slic(a, compactness=scenario['settings'][i]['compactness'],
                                n_segments=scenario['settings'][i]['segment'],
                                sigma=scenario['settings'][i]['sigma']))
            im_slic[i] = label(im_slic[i], neighbors=8)
            im_disp.append(np.copy(im_slic[i]))
            im_bound.append(mark_boundaries(a, im_slic[i]))
            temp_feature = regionprops(im_slic[i], intensity_image=rgb2gray(a))
            features.append(list_to_dict(temp_feature))
            
        X_indiv = []
        
        for im_row in range(image_row):
            for im_col in range(image_col):
    #         extract position and corresponding labels
                posLabel = gt[im_row, im_col]
                current_labels = []
                
        #         validate labels. 0 label is not allowed. causing not exists error
                valid_position = True
                for i in range(n_layer):
                    current_level_labels = im_slic[i][im_row, im_col] 
                    current_labels.append(current_level_labels)
                    if current_level_labels == 0:
                        valid_position = False
                        break
                
                if not valid_position:
                    continue
                
        #         concat all layer properties
                x_entry = []
                for i in range(n_layer):
                    feat = features[i][current_labels[i]]
                    for att in attributes:
                        if att == 'bbox':
                            (min_row, min_col, max_row, max_col) = feat['bbox']
                            x_entry.append(min_row)
                            x_entry.append(min_col)
                            x_entry.append(max_row)
                            x_entry.append(max_col)
                        else:
                            x_entry.append(feat[att])
                    if posLabel == 1:
                        mark(current_labels[i], 1, im_slic[i], im_disp[i])
                x_entry.append(posLabel)
                X_indiv.append(x_entry)
                
        f = get_feature_array_file(scenario['codename'], image_filename, mode='w')
        
        X_indiv = np.array(X_indiv)
        X_indiv_u = unique_rows(X_indiv)
        np.save(f, X_indiv_u)
        f.close() 
@author: fruity
'''

import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score
from constant import array_px_path, train_start_index, train_end_index, test_start_index, test_end_index, classifier_path_px, classifier_extension, category 
from os import listdir, makedirs
from os.path import isfile, join, splitext, exists
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib
from sklearn.cross_validation import StratifiedKFold, cross_val_score
from util import get_feature_columns, get_total_columns, get_feature_array_scenario_path
from util.file import get_files

target_path = get_feature_array_scenario_path("base3")
print target_path

def create_model(scenario):
    n_layer = scenario['layer']
    n_column = get_total_columns(n_layer)
    column_indexes = get_feature_columns(n_layer)
    
    target_directory = get_feature_array_scenario_path(scenario['codename'])
    
    target_files = get_files(target_directory)
    target_train_files = target_files[train_start_index: train_end_index]
    target_test_files = target_files[test_start_index: test_end_index]
    
    X = np.zeros((0,n_column))
    X_test = np.zeros((0, n_column))