コード例 #1
0
def get_conf_mat(set_name):

    conf_mat_filename = "temp/conf_mat_{}.h5".format(set_name)

    if not os.path.exists(conf_mat_filename):
        # Prepare data generator
        data_folder = os.path.join(Glb.images_folder, "Bal_v14", "Ind-0",
                                   set_name)
        data_iterator = Glb_Iterators.get_iterator(data_folder,
                                                   div255_resnet="div255",
                                                   shuffle=False)

        # Load moddel
        model_filename = "model_clsf_from_isVisible_20210415_gpu1.h5"
        print("Loading model {}".format(model_filename))
        now = time.time()
        model = load_model(os.path.join(Glb.results_folder,
                                        model_filename))  # 83% test accuracy
        print("Loaded in {} sec".format(time.time() - now))

        # Predict highest classes and get conf_mat
        print("Predicting...")
        now = time.time()
        (y_pred, y_true) = cm.get_pred_actual_classes(model, data_iterator)
        del model
        print("Predicted in {} sec".format(time.time() - now))
        conf_mat = confusion_matrix(y_true=y_true, y_pred=y_pred)
        pickle.dump(conf_mat, open(conf_mat_filename, 'wb'))
        print("Saved conf mat {}".format(set_name))
    else:
        conf_mat = pickle.load(open(conf_mat_filename, 'rb'))
        print("Loaded conf mat {}".format(set_name))

    # sanity check: should be 83% (Test), 49.8 (Val)
    acc = np.sum([conf_mat[i, i] for i in range(194)]) / np.sum(conf_mat)
    print("Acc: {}".format(acc))
    return conf_mat
コード例 #2
0
#set_name = "Test"
#set_name = "Train"
set_name = "Val"


#dist_method = "manhattan"
#dist_method = "euclidean"
dist_method = "cosine"
#dist_method = "rbf"

#linkage_method='centroid'
linkage_method='single'
#linkage_method='complete'

data_iterator = Glb_Iterators.get_iterator(os.path.join( r"C:\IsKnown_Images_IsVisible\Bal_v14\Ind-0", set_name), "div255", batch_size=batch_size)
# Total number of images
cnt_imgs = len(data_iterator.classes)
cnt_classes = len(data_iterator.class_indices)

act_filename = act_filename_pattern.format(set_name)
#if not os.path.exists (act_filename):
#    # Allocate buffer for storing activations and labels
#    act_prelast = np.zeros ((cnt_imgs, prelast_output_shape), dtype=np.float32)
#    lbls = np.zeros ((cnt_imgs), dtype=np.int)
#
#    cntr = 0
#    now = datetime.now()
#
#    # Save activations
#    for X,y in data_iterator:
コード例 #3
0
    4:
    os.path.join(Glb.results_folder,
                 "model_clsf_from_isVisible_20210614_gpu0_hier4.h5")
}

data_folders = {
    0: os.path.join(Glb.images_folder, "Bal_v14", "Ind-0", "Test"),
    1: os.path.join(Glb.images_folder, "Bal_v14", "Ind-1", "Test"),
    2: os.path.join(Glb.images_folder, "Bal_v14", "Ind-2", "Test"),
    3: r"D:\IsKnown_Images\Bal_102030_v14_Ind-3\Ind-3\Test",
    4: r"D:\IsKnown_Images\Bal_102030_v14_Ind-4\Ind-4\Test"
}

for hier_lvl in range(0, 1):
    # Prep data, model
    model = load_model(model_filenames[hier_lvl])
    data_folder = data_folders[hier_lvl]
    data_iterator = Glb_Iterators.get_iterator(data_folder,
                                               div255_resnet="div255",
                                               shuffle=False)

    # Predict
    preds = model.predict(data_iterator)
    pred_classes = np.argmax(preds, axis=1)
    actual_classes = data_iterator.classes

    # Accuracy, f-score
    acc = accuracy_score(y_true=actual_classes, y_pred=pred_classes)
    f1 = f1_score(y_true=actual_classes, y_pred=pred_classes, average="macro")
    print("Model Hier-{}. Acc={}, F1={}".format(hier_lvl, acc, f1))
コード例 #4
0
    return tf.train.Feature(float_list=tf.train.FloatList(value=values))


def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


hier_lvl = 0
set_name = "Test"
batch_size = 32
div255_resnet = "div255"
img_filepath = os.path.join(Glb.images_folder, "Bal_v14",
                            "Ind-{}".format(hier_lvl), set_name)
data_iterator = Glb_Iterators.get_iterator(img_filepath,
                                           div255_resnet=div255_resnet,
                                           batch_size=batch_size)

# all file names to list
allfiles_path = []
for barcode_path in os.listdir(img_filepath):
    allfiles_path += [
        os.path.join(img_filepath, barcode_path, filepath)
        for filepath in os.listdir(os.path.join(img_filepath, barcode_path))
    ]

now = time.time()
for i, (X, y) in enumerate(data_iterator):
    #print ("batch {}/{}".format(i,len(data_iterator)))
    #if i+1>=0: #len(data_iterator):
    if i + 1 >= len(data_iterator):
df_prodnames = pd.read_csv("df_prods_194_translated.csv",
                           header=0)["product"].tolist()
df_classes = pd.read_csv("df_prods_194_translated.csv",
                         header=0)["class"].tolist()

model_filename = os.path.join(
    Glb.results_folder,
    "model_clsf_from_isVisible_20210415_gpu1.h5")  # 83% test accuracy  #Hier-0
model = load_model(model_filename)

data_folder = os.path.join(Glb.images_folder, "Bal_v14",
                           "Ind-{}".format(hier_lvl), set_name)
data_iterator = Glb_Iterators.get_iterator(data_folder,
                                           div255_resnet="div255",
                                           batch_size=350,
                                           target_size=256,
                                           shuffle=False)
total_classes = len(data_iterator.class_indices)

actual_classes = data_iterator.classes
now = time.time()
preds = model.predict(data_iterator, steps=len(data_iterator))
print("Predicted in {} sec".format(time.time() - now))
pred_classes = np.argmax(preds, axis=1)

# Sanity check: overall accuracy
acc = len(np.where(pred_classes == actual_classes)[0]) / len(actual_classes)
total_errors = len(np.where(pred_classes != actual_classes)[0])
print("{} accuracy: {}. Total errors: {}/{}".format(set_name, acc,
                                                    total_errors,
コード例 #6
0
def trainModel(epochs, bn_layers, dropout_layers, l2_layers, padding,
               target_size, dense_sizes, architecture, conv_layers_over_5,
               use_maxpool_after_conv_layers_after_5th, version, load_existing,
               gpu_id, model_filename, lc_filename, data_dir):

    # Trains a model
    #   model = optional parameter; creates new if not passed; otherwise keeps training
    #   epochs - number of max epochs to train (subject to early stopping)
    #   bn_layers - list of indexes of Dense layers (-1 and down) and CNN layers (1 and up) where Batch Norm should be applied
    #   dropout_layers - list of indexes of Dense layers (-1 and down) where Dropout should be applied
    #   bn_layers - list of indexes of Dense layers (-1 and down) where L2 regularization should be applied
    #   padding - changed to "same" to keep 2^n feature map sizes
    #   dense_sizes - dictionary of dense layer sizes (cnt of neurons)
    #   architecture - one of:  Model_6classes_c4_d3_v1, Model_6classes_c5_d2_v1, Model_6classes_c5_d3_v1
    #   conv_layers_over_5 - number of convolutional layers after 5th
    #   use_maxpool_after_conv_layers_after_5th - list of boolean values whether to use maxpooling after 5th layer
    #   version - used to name a learning curve file
    #   load_existing - whether to load an existing model file
    # Returns:
    #   model: trained Keras model
    #
    # To call:
    #   model = Train_v1.trainModel(epochs=20)

    crop_range = 1  # number of pixels to crop image (if size is 235, crops are 0-223, 1-224, ... 11-234)
    #target_size = 224
    batch_size = 32
    #datasrc = "visible"

    # Manually copied to C: to speed up training
    #data_dir = os.path.join(Glb.images_folder, "Bal_v14", "Ind-{}".format(hier_lvl) )
    data_dir_train = os.path.join(data_dir, "Train")
    data_dir_val = os.path.join(data_dir, "Val")
    data_dir_test = os.path.join(data_dir, "Test")

    train_iterator = Glb_Iterators.get_iterator(data_dir_train, "div255")
    val_iterator = Glb_Iterators.get_iterator(data_dir_val, "div255")
    test_iterator = Glb_Iterators.get_iterator(
        data_dir_test, "div255", shuffle=False
    )  # dont shuffle in order to get proper actual/prediction pairs

    Softmax_size = len(train_iterator.class_indices)
    dense_sizes["d-1"] = Softmax_size

    #model_filename = os.path.join(Glb.results_folder,
    #                              "model_clsf_from_isVisible_{}_gpu{}_hier{}.h5".format(date.today().strftime("%Y%m%d"), gpu_id, hier_lvl))
    #lc_filename = os.path.join(Glb.results_folder,
    #                           "lc_clsf_from_isVisible_{}_gpu{}_hier{}.csv".format(date.today().strftime("%Y%m%d"), gpu_id, hier_lvl))
    # Create or load model
    if not load_existing:
        print("Creating model")
        prepModel = modelVersions_dic[architecture]
        prep_model_params = {
            "input_shape": (target_size, target_size, 3),
            "bn_layers":
            bn_layers,
            "dropout_layers":
            dropout_layers,
            "l2_layers":
            l2_layers,
            "padding":
            padding,
            "dense_sizes":
            dense_sizes,
            "conv_layers_over_5":
            conv_layers_over_5,
            "use_maxpool_after_conv_layers_after_5th":
            use_maxpool_after_conv_layers_after_5th
        }
        model = prepModel(**prep_model_params)
    else:
        print("Loading model")
        #model_filename = r"J:\Visible_models\6class\model_6classes_v" + str(version) + ".h5"
        model = load_model(model_filename)
        model.compile(
            loss='categorical_crossentropy',
            optimizer=Adam(learning_rate=0.001),  # default LR: 0.001
            metrics=['accuracy'])

    print(model.summary())

    callback_earlystop = EarlyStopping(monitor='val_accuracy',
                                       min_delta=0.0001,
                                       patience=10,
                                       verbose=1,
                                       mode='max',
                                       restore_best_weights=True)
    callback_csv_logger = CSVLogger(lc_filename, separator=",", append=False)

    mcp_save = ModelCheckpoint(model_filename,
                               save_best_only=True,
                               monitor='val_accuracy',
                               mode='max')

    model.fit(train_iterator,
              steps_per_epoch=len(train_iterator),
              epochs=epochs,
              verbose=2,
              validation_data=val_iterator,
              validation_steps=len(val_iterator),
              callbacks=[callback_csv_logger, callback_earlystop, mcp_save])  #

    print("Evaluation on test set (1 frame)")
    test_metrics = model.evaluate(test_iterator)
    print("Test: {}".format(test_metrics))

    print("Evaluating F1 test set (1 frame)")
    y_pred = model.predict(test_iterator)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = test_iterator.classes
    test_acc = accuracy_score(y_true=y_true, y_pred=y_pred_classes)
    test_f1 = f1_score(y_true=y_true, y_pred=y_pred_classes, average='macro')
    print("acc:{}, f1:{}".format(test_acc, test_f1))

    # metrics to csv
    df_metrics = pd.DataFrame(
        data={
            "gpu": [gpu_id],
            "datetime": [datetime.now().strftime("%Y%m%d %H:%M:%S")],
            "data_dir": [data_dir],
            "test_acc": [test_acc],
            "test_f1": [test_f1]
        })
    df_metrics_filename = os.path.join(Glb.results_folder, "metrics_mrg.csv")
    df_metrics.to_csv(df_metrics_filename, index=False, header=False, mode='a')

    #print("Evaluation on validation set (1 frame)")
    #val_metrics = model.evaluate(val_iterator)
    #print("Val: {}".format(val_metrics))

    return model
コード例 #7
0
def put_prelast_act_to_file(model_filename, act_filename_pattern, hier_lvl, set_name, incl_filenames):
    model = load_model(model_filename)
    act_filename = act_filename_pattern.format(set_name, hier_lvl, "filenames" if incl_filenames else "nofilenames")

    # Data iterator
    batch_size = 128

    #set_name = "Test"
    #set_name = "Train"
    #set_name = "Val"

    #hier_lvl = 0
    #hier_lvl = 1
    #hier_lvl = 2
    #hier_lvl = 3
    #hier_lvl = 4

    # which layer is needed?
    #   model.summary()
    prelast_dense_layer = model.layers[-2]  #model.layers[dense_layer_ids[-2]]
    prelast_func_activation = function([model.input], [prelast_dense_layer.output])
    prelast_output_shape = prelast_dense_layer.output_shape[1]


    #data_iterator = Glb_Iterators.get_iterator(os.path.join( Glb.images_folder, "Bal_v14", "Ind-{}".format(hier_lvl), set_name), "div255", batch_size=batch_size)
    data_folder = os.path.join(Glb.images_folder, "Bal_v14", "Ind-{}".format(hier_lvl), set_name)
    print ("Datafolder:{}".format(data_folder))
    if incl_filenames:
        data_iterator = Glb_Iterators.get_iterator_incl_filenames( data_folder=data_folder, batch_size=batch_size, target_size=256)
    else:
        data_iterator = Glb_Iterators.get_iterator (data_folder=data_folder, div255_resnet="div255", batch_size=batch_size, target_size=256, shuffle=True)

    #cntr = 0
    now = datetime.now()
    all_filenames = []

    # Save activations
    #for X,y in data_iterator:
    for cntr, batch_tuple in enumerate( data_iterator ):

        if incl_filenames:
            (X, y, filenames) = batch_tuple
        else:
            (X, y) = batch_tuple

        if cntr==0:
            if incl_filenames:
                cnt_imgs = len(Glb_Iterators.all_filepaths)
            else:
                cnt_imgs = len(data_iterator.classes)
            # Allocate buffer for storing activations and labels
            act_prelast = np.zeros((cnt_imgs, prelast_output_shape), dtype=np.float32)
            lbls = np.zeros((cnt_imgs), dtype=np.int)

        cnt_samples_in_batch = y.shape[0]
        #print ("Batch {}/{}".format(cntr, len(data_iterator)))
        print("Batch {}/{}".format(cntr, Glb_Iterators.len_iterator if incl_filenames else len(data_iterator)))
        act_prelast[ (cntr*batch_size):(cntr*batch_size+cnt_samples_in_batch),:] = prelast_func_activation([X])[0]
        lbls [ (cntr*batch_size):(cntr*batch_size+cnt_samples_in_batch) ] = np.argmax(y, axis=1)
        if incl_filenames:
            all_filenames += filenames
        if not incl_filenames and (cntr+1) >= len(data_iterator):
            break

    print ("Total seconds: {}".format((datetime.now() - now).seconds))

    if incl_filenames:
        pickle.dump( (act_prelast,lbls,all_filenames), open(act_filename, 'wb') )
    else:
        pickle.dump( (act_prelast,lbls), open(act_filename, 'wb') )