def compute_similarities_for_each_topK(test_codes_folder,
                                       old_results_file_topK,
                                       topK=topK):
    suf = '.jpg'
    test_img_similarity_dict = dict()
    names_results_ids_dict = get_results_topK(old_results_file_topK, k=topK)
    counter = 1
    for codes_dict in get_codes_from_files(test_codes_folder):
        for name, code in zip(codes_dict['names'], codes_dict['codes']):
            name = name.replace(suf, '')
            catProbDict = names_results_ids_dict[name]
            similarity = 0
            for cat, prob in catProbDict.items():
                tmpSim = ensFunc(prob, calc_for_each_sim(code, cat))
                if tmpSim > similarity:
                    idd = cat
                    similarity = tmpSim
            test_img_similarity_dict[name] = [idd, similarity]
            # min_distance = distance if distance < min_distance else min_distance
        print(counter)
        counter += 1
    # test_img_similarity_dict = distance_to_similarity(test_img_similarity_dict, min_distance)
    save_obj(
        test_img_similarity_dict, 'test_sim_dict' + '_top' + str(topK) + '_' +
        old_results_file_topK.split('/')[-1])
    return test_img_similarity_dict
def second_phase():
    global resnet_model
    tensorboard = TensorBoard(log_dir=second_phase_folder + 'tb_logs', batch_size=batch_size)
    resnet_model = load_model(data_folder + '1st_phase_resnet_model.h5')

    trainable_layers_ratio = 1/3.0
    trainable_layers_index = int(len(resnet_model.layers) * (1 - trainable_layers_ratio))
    for layer in resnet_model.layers[:trainable_layers_index]:
       layer.trainable = False
    for layer in resnet_model.layers[trainable_layers_index:]:
       layer.trainable = True

    # for layer in resnet_model.layers:
    #     layer.trainable = True

    resnet_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['acc'])

    if not os.path.exists(second_phase_folder):
        os.makedirs(second_phase_folder)
    # train the model on the new data for a few epochs
    for i in range(second_phase_train_reps):
        history = resnet_model.fit_generator(train_img_class_gen,
                                               steps_per_epoch=steps_per_small_epoch,
                                               epochs=small_epochs, verbose=2,
                                               validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch,
                                               workers=4, callbacks=[tensorboard])
        print('itr', i)
        if i % saves_per_epoch == 0:
            print('{} epoch completed'.format(int(i / saves_per_epoch)))

        ts = calendar.timegm(time.gmtime())
        resnet_model.save(second_phase_folder + str(ts) + '_resnet_model.h5')
        save_obj(history.history, str(ts) + '_xcpetion_history.h5', folder=second_phase_folder)

    resnet_model.save(data_folder + '2nd_phase_resnet_model.h5')
def first_phase(trained=True,
                printGap=True,
                first_phase_train_reps=first_phase_train_reps,
                data_folder=data_folder):
    global siamese_model
    tensorboard = TensorBoard(log_dir=first_phase_folder + 'tb_logs',
                              batch_size=batch_size)

    if not trained:
        siamese_model = SiameseModel('../resnet/' +
                                     '3rd_phase_resnet_model.h5')

        optimizer = Adam(0.0005)
        siamese_model.compile(optimizer=optimizer,
                              loss='binary_crossentropy',
                              metrics=['acc'])
    else:
        siamese_model = load_model(data_folder + '1st_phase_siamese_model.h5')

    if not os.path.exists(first_phase_folder):
        os.makedirs(first_phase_folder)

    for i in range(first_phase_train_reps):
        history = siamese_model.fit_generator(
            train_img_class_gen,
            steps_per_epoch=steps_per_small_epoch,
            epochs=small_epochs,
            verbose=2,
            validation_data=val_img_class_gen,
            validation_steps=val_steps_per_small_epoch,
            workers=4,
            callbacks=[tensorboard])
        print('itr', i)
        if i % saves_per_epoch == 0:
            print('{} epoch completed'.format(int(i / saves_per_epoch)))

        if i >= 5:
            ts = calendar.timegm(time.gmtime())
            siamese_model.save(first_phase_folder + str(ts) +
                               '_siamese_model.h5')
            save_obj(history.history,
                     str(ts) + '_siamese_history',
                     folder=first_phase_folder)

        if printGap:
            steps = len(val_names_list) / batch_size
            predicts = siamese_model.predict_generator(val_img_class_gen,
                                                       steps=steps / 10,
                                                       verbose=2)  ##########
            predProb = np.max(predicts, axis=-1)
            predId = np.argmax(predicts, axis=-1)
            trueId = list(
                map(
                    lambda x: val_name_id_dict[str(x).split('.')[0].split('/')[
                        1]], [name for name in val_img_class_gen.filenames]))
            gap = GAP_vector(predId, predProb, trueId)
            print('gap: ', gap)

        siamese_model.save(data_folder + '1st_phase_siamese_model.h5')
def first_phase(trained=True, printGap=True, first_phase_train_reps=first_phase_train_reps):
    global nasnet_model
    tensorboard = TensorBoard(log_dir=first_phase_folder + 'tb_logs', batch_size=batch_size)

    if not trained:
        # create the base pre-trained model
        input_tensor = Input(shape=input_shape)
        base_model = NASNetMobile(input_tensor=input_tensor, weights='imagenet', include_top=False)

        # add a global spatial average pooling layer
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        # add a fully-connected layer
        x = Dense(1024, activation='relu')(x)
        # add a logistic layer
        predictions = Dense(classes_num, activation='softmax')(x)

        # this is the model we will train
        nasnet_model = Model(inputs=base_model.input, outputs=predictions)
        nasnet_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['acc'])
    else:
        nasnet_model = load_model(data_folder + '1st_phase_nasnet_model.h5')
        
    if not os.path.exists(first_phase_folder):
        os.makedirs(first_phase_folder)
        
    for i in range(first_phase_train_reps):
        history = nasnet_model.fit_generator(train_img_class_gen,
                                     steps_per_epoch=steps_per_small_epoch,
                                     epochs=small_epochs, 
                                     verbose=2,
                                     validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch,
                                     workers=4, callbacks=[tensorboard])
        print('itr', i)
        if i % saves_per_epoch == 0:
            print('{} epoch completed'.format(int(i / saves_per_epoch)))
        
        if i>=50:
            ts = calendar.timegm(time.gmtime())
            nasnet_model.save(first_phase_folder + str(ts) + '_nasnet_model.h5')
            save_obj(history.history, str(ts) + '_nasnet_history', folder=first_phase_folder)
        
        if printGap:
            steps = len(val_names_list)/batch_size
            predicts = nasnet_model.predict_generator(val_img_class_gen, steps=steps/10, verbose=2)##########
            predProb = np.max(predicts, axis=-1)
            predId = np.argmax(predicts, axis=-1)
            trueId = list(map(lambda x: val_name_id_dict[str(x).split('.')[0].split('/')[1]], [name for name in val_img_class_gen.filenames]))
            gap = GAP_vector(predId, predProb, trueId)
            print('gap: ', gap)

        nasnet_model.save(data_folder + '1st_phase_nasnet_model.h5')
def get_results(results_file):
    """returns a dict with images names (without suffix) as keys and the result's idd/class as value"""
    names_results_ids_dict = dict()
    with open(results_file) as f:
        f.readline()
        for line in f:
            name, idd = line.strip().split(',')
            if len(idd) < 3:
                save_obj(names_results_ids_dict, 'names_results_ids_dict')
                return names_results_ids_dict
            idd = idd.split(' ')[0]
            names_results_ids_dict[name] = idd
    return names_results_ids_dict
def compute_similarities_for_each(test_codes_folder, old_results_file):
    suf = '.jpg'
    test_img_similarity_dict = dict()
    names_results_ids_dict = get_results(old_results_file)
    counter = 1
    for codes_dict in get_codes_from_files(test_codes_folder):
        for name, code in zip(codes_dict['names'], codes_dict['codes']):
            name = name.replace(suf, '')
            cat = names_results_ids_dict[name]
            similarity = calc_for_each_sim(code, cat)
            test_img_similarity_dict[name] = similarity
            # min_distance = distance if distance < min_distance else min_distance
        print(counter)
        counter += 1
    # test_img_similarity_dict = distance_to_similarity(test_img_similarity_dict, min_distance)
    save_obj(test_img_similarity_dict,
             'test_img_similarity_dict' + old_results_file.split('/')[-1])
    return test_img_similarity_dict
def get_results_topK(old_results_file_topK, k=topK):
    """returns a dict with images names (without suffix) as keys and the result's idd/class as value"""
    names_results_ids_dict = dict()
    with open(old_results_file_topK) as f:
        f.readline()
        for line in f:
            name, idd = line.strip().split(',')
            if len(idd) < 3:
                save_obj(names_results_ids_dict, 'names_results_ids_dict_topK')
                return names_results_ids_dict
            names_results_ids_dict[name] = {}
            iddList = idd.split(' ')
            for i in range(k):
                names_results_ids_dict[name][iddList[2 *
                                                     (topK - 1 - i)]] = float(
                                                         iddList[2 *
                                                                 (topK - 1 - i)
                                                                 + 1])
    return names_results_ids_dict
def compute_codes(imgs_path, codes_folder, siamese_model_path):
    batch_size = 64
    code_net = get_code_net(siamese_model_path)

    if not os.path.exists(codes_folder):
        os.makedirs(codes_folder)

    codes_batches_size = 2**14
    print('make images names list')
    imgs_names = listdir(imgs_path)
    imgs_num = len(imgs_names)
    batches_num = imgs_num // codes_batches_size + (1 if imgs_num %
                                                    codes_batches_size else 0)
    counter = 1
    this_names = []
    print('start loop')
    for ind, name in enumerate(imgs_names):
        this_names.append(name)
        # img = img_to_array(imgs_path + name)
        # imgs.append(img)
        if len(this_names) == codes_batches_size or ind == len(imgs_names) - 1:
            code_dict = {'names': this_names}
            names = this_names[:]
            steps = len(this_names) // batch_size + (1 if len(names) %
                                                     batch_size else 0)
            codes = code_net.predict_generator(code_generator(
                names, imgs_path, batch_size=batch_size),
                                               steps=steps,
                                               verbose=2,
                                               workers=4)
            code_dict['codes'] = codes
            # codes = batch_compute_codes(code_net, imgs)
            save_obj(code_dict,
                     'batch_{}'.format(counter),
                     folder=codes_folder)
            this_names = []
            print('done {} out of {}'.format(counter, batches_num))
            counter += 1
    print('codes computed and saved at: ' + codes_folder)
def make_codes_by_category(train_codes_folder, codes_by_category_folder,
                           train_name_id_dict):
    if not os.path.exists(codes_by_category_folder):
        os.makedirs(codes_by_category_folder)

    counter = 1
    for train_codes in listdir(train_codes_folder):
        train_codes = train_codes.replace('.pkl', '')
        print(train_codes)
        counter += 1
        temp_cats_dict = dict()
        train_codes = load_obj(train_codes, folder=train_codes_folder)
        for name, code in zip(train_codes['names'], train_codes['codes']):
            cat = train_name_id_dict[name.replace('.jpg', '')]
            if cat not in temp_cats_dict.keys():
                temp_cats_dict[cat] = dict()
                temp_cats_dict[cat]['names'] = []
                temp_cats_dict[cat]['codes'] = []
            temp_cats_dict[cat]['names'].append(name)
            temp_cats_dict[cat]['codes'].append(code)
        for cat in temp_cats_dict.keys():
            if not Path(codes_by_category_folder + str(cat)).is_file():
                cat_file = dict()
                cat_file['names'] = []
                cat_file['codes'] = []
                save_obj(obj=cat_file,
                         name=str(cat),
                         folder=codes_by_category_folder)
            cat_file = load_obj(str(cat), folder=codes_by_category_folder)
            names = temp_cats_dict[cat]['names']
            codes = temp_cats_dict[cat]['codes']
            cat_file['names'].extend(names)
            cat_file['codes'].extend(codes)
            save_obj(obj=cat_file,
                     name=str(cat),
                     folder=codes_by_category_folder)
Beispiel #10
0
def third_phase(trained=False, third_phase_train_reps=third_phase_train_reps):
    global inception_model, new_inception_model, optimizer
    tensorboard = TensorBoard(log_dir=third_phase_folder + 'tb_logs',
                              batch_size=batch_size)

    if not trained:
        inception_model = load_model(data_folder +
                                     '1st_phase_inception_model.h5')
    else:
        inception_model = load_model(data_folder +
                                     '3rd_phase_inception_model.h5')


#     # add regularizers to the convolutional layers
#     trainable_layers_ratio = 1 / 2.0
#     trainable_layers_index = int(len(inception_model.layers) * (1 - trainable_layers_ratio))
#     for layer in inception_model.layers[:trainable_layers_index]:
#         layer.trainable = False
#     for layer in inception_model.layers[trainable_layers_index:]:
#         layer.trainable = True

    for layer in inception_model.layers:
        layer.trainable = True
        if isinstance(layer, keras.layers.convolutional.Conv2D):
            layer.kernel_regularizer = regularizers.l2(0.001)
            layer.activity_regularizer = regularizers.l1(0.001)

    # add dropout and regularizer to the penultimate Dense layer
    predictions = inception_model.layers[-1]
    dropout = Dropout(0.2)
    fc = inception_model.layers[-2]
    fc.kernel_regularizer = regularizers.l2(0.001)
    fc.activity_regularizer = regularizers.l1(0.001)

    x = dropout(fc.output)
    predictors = predictions(x)
    new_inception_model = Model(inputs=inception_model.input,
                                outputs=predictors)

    optimizer = Adam(lr=0.1234)
    start_lr = 0.00015
    end_lr = 0.00002
    step_lr = (end_lr - start_lr) / (third_phase_train_reps - 1)
    new_inception_model.compile(optimizer=optimizer,
                                loss='categorical_crossentropy',
                                metrics=['acc'])

    if not os.path.exists(third_phase_folder):
        os.makedirs(third_phase_folder)

    for i in range(third_phase_train_reps):
        lr = start_lr + step_lr * i
        K.set_value(new_inception_model.optimizer.lr, lr)
        print(i, 'out of ', third_phase_train_reps, '\nlearning rate ',
              K.eval(new_inception_model.optimizer.lr))
        history = new_inception_model.fit_generator(
            train_img_class_gen,
            steps_per_epoch=steps_per_small_epoch,
            epochs=small_epochs,
            verbose=2,
            validation_data=val_img_class_gen,
            validation_steps=val_steps_per_small_epoch,
            workers=4,
            callbacks=[tensorboard])
        #         history = new_inception_model.fit_generator(train_img_class_gen,
        #                                                    steps_per_epoch=steps_per_small_epoch,
        #                                                    epochs=small_epochs, verbose=2,
        #                                                    validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch,
        #                                                    workers=4, callbacks=[LosswiseKerasCallback(tag='keras inception model')])
        print("iteration", i)
        if i % saves_per_epoch == 0:
            print('{} epoch completed'.format(int(i / saves_per_epoch)))

        if i >= 5:
            ts = calendar.timegm(time.gmtime())
            new_inception_model.save(third_phase_folder + str(ts) +
                                     '_inception_model.h5')
            save_obj(history.history,
                     str(ts) + '_inception_history.h5',
                     folder=third_phase_folder)

    new_inception_model.save(data_folder + '3rd_phase_inception_model.h5')
Beispiel #11
0
def inception(trained=False, third_phase_train_reps=third_phase_train_reps):
    global inception_model, new_inception_model, optimizer
    tensorboard = TensorBoard(log_dir=third_phase_folder + 'tb_logs',
                              batch_size=batch_size)
    start_lr = 0.00015
    end_lr = 0.00001
    step_lr = (end_lr - start_lr) / (third_phase_train_reps - 1)

    if not trained:
        # create the base pre-trained model
        input_tensor = Input(shape=input_shape)
        base_model = InceptionResNetV2(input_tensor=input_tensor,
                                       weights='imagenet',
                                       include_top=False)

        # add a global spatial average pooling layer
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        # add a fully-connected layer
        x = Dense(1024, activation='relu')(x)
        # add a logistic layer
        predictions = Dense(classes_num, activation='softmax')(x)

        # this is the model we will train
        inception_model = Model(inputs=base_model.input, outputs=predictions)
        inception_model.compile(optimizer=Adam(lr=0.0001),
                                loss='categorical_crossentropy',
                                metrics=['acc'])

        for layer in inception_model.layers:
            layer.trainable = True
            if isinstance(layer, keras.layers.convolutional.Conv2D):
                layer.kernel_regularizer = regularizers.l2(0.001)
                layer.activity_regularizer = regularizers.l1(0.001)

        # add dropout and regularizer to the penultimate Dense layer
        predictions = inception_model.layers[-1]
        dropout = Dropout(0.2)
        fc = inception_model.layers[-2]
        fc.kernel_regularizer = regularizers.l2(0.001)
        fc.activity_regularizer = regularizers.l1(0.001)

        x = dropout(fc.output)
        predictors = predictions(x)
        new_inception_model = Model(inputs=inception_model.input,
                                    outputs=predictors)

        optimizer = Adam(lr=0.1234)

        new_inception_model.compile(optimizer=optimizer,
                                    loss='categorical_crossentropy',
                                    metrics=['acc'])
    else:
        new_inception_model = load_model(data_folder +
                                         '3rd_phase_inception_model.h5')
        optimizer = Adam(lr=0.1234)
        new_inception_model.compile(optimizer=optimizer,
                                    loss='categorical_crossentropy',
                                    metrics=['acc'])

    if not os.path.exists(third_phase_folder):
        os.makedirs(third_phase_folder)

    for i in range(third_phase_train_reps):
        lr = start_lr + step_lr * i
        K.set_value(new_inception_model.optimizer.lr, lr)
        print(i, 'out of ', third_phase_train_reps, '\nlearning rate ',
              K.eval(new_inception_model.optimizer.lr))
        history = new_inception_model.fit_generator(
            train_img_class_gen,
            steps_per_epoch=steps_per_small_epoch,
            epochs=small_epochs,
            verbose=2,
            validation_data=val_img_class_gen,
            validation_steps=val_steps_per_small_epoch,
            workers=4,
            callbacks=[tensorboard])
        #         history = new_inception_model.fit_generator(train_img_class_gen,
        #                                                    steps_per_epoch=steps_per_small_epoch,
        #                                                    epochs=small_epochs, verbose=2,
        #                                                    validation_data=val_img_class_gen, validation_steps=val_steps_per_small_epoch,
        #                                                    workers=4, callbacks=[LosswiseKerasCallback(tag='keras inception model')])
        print("iteration", i)
        if i % saves_per_epoch == 0:
            print('{} epoch completed'.format(int(i / saves_per_epoch)))

        if i >= 5:
            ts = calendar.timegm(time.gmtime())
            new_inception_model.save(third_phase_folder + str(ts) +
                                     '_inception_model.h5')
            save_obj(history.history,
                     str(ts) + '_inception_history.h5',
                     folder=third_phase_folder)

    new_inception_model.save(data_folder + '3rd_phase_inception_model.h5')
def make_files():
    global csv_name_id_tuples_list, csv_ids_list, csv_ids_set, csv_names_set, \
    csv_id_name_dict, csv_name_id_dict, classes_num, \
    train_names_list, train_name_id_dict, val_names_list, val_name_id_dict

    print("parsing train.csv")
    with open(csv_csv_path) as f:
        f.readline()
        for line in f:
            l = line.replace('"', '').strip().split(',')
            if len(l) != 3:
                print(l)
                continue
            name, idd = l[0], int(l[2])
            csv_name_id_tuples_list.append((name, idd))
            csv_names_list.append(name)

            csv_name_id_dict[name] = idd
            csv_ids_list.append(idd)

            if idd in csv_id_name_dict.keys():
                csv_id_name_dict[id].add(name)
            else:
                csv_id_name_dict[id] = {name}

    print("start saving lists")
    csv_names_set = set(csv_names_list)
    csv_ids_set = set(csv_ids_list)

    save_obj(csv_name_id_tuples_list, 'csv_name_id_tuples_list')
    save_obj(csv_names_list, 'csv_names_list')
    save_obj(csv_ids_list, 'csv_ids_list')

    save_obj(csv_name_id_dict, 'csv_name_id_dict')
    save_obj(csv_id_name_dict, 'csv_id_name_dict')

    save_obj(csv_ids_set, 'csv_ids_set')
    save_obj(csv_names_set, 'csv_names_set')

    train_names_list, train_name_id_dict = make_folder_lists_dicts(
        train_images_folder)
    val_names_list, val_name_id_dict = make_folder_lists_dicts(
        val_images_folder)

    save_obj(train_names_list, 'train_names_list')
    save_obj(train_name_id_dict, 'train_name_id_dict')

    save_obj(val_names_list, 'val_names_list')
    save_obj(val_name_id_dict, 'val_name_id_dict')
import os, sys

module_path = os.path.abspath(os.path.join('..'))
sys.path.append(module_path)
from conf.configure import *
from utils.data_util import save_obj, load_obj
from preprocess.generator import make_generators

import time

pred_batch_size = 32 #32
pred_lr = 0.0002

pred_model = data_folder + 'xcpetion_model_.json'
pred_weights = data_folder + 'continue_second_phase_logs/older/1521093898_xcpetion_model.h5'
# pred_model_path = data_folder + '2nd_2nd_phase_xcpetion_model.h5'
pred_model_path = data_folder + 'continue_second_phase_xcpetion_model.h5'

if not os.path.exists(working_folder+'class_indices_dict'+'.pkl'):
#     classList = [f for f in os.listdir(train_class_images_path) if not os.path.isfile(os.path.join(train_class_images_path, f))]
#     classDict = dict(zip(classList, list(range(len(classList)))))
    train_img_class_gen, val_img_class_gen=make_generators(isPlain=True)
#     print(train_img_class_gen.class_indices)
    classDict = train_img_class_gen.class_indices
    save_obj(classDict, 'class_indices_dict')

class_indices_dict = load_obj('class_indices_dict')
inverted_class_indices_dict = dict((v, k) for k, v in class_indices_dict.items())
# print(type(inverted_class_indices_dict[10]))
# print(inverted_class_indices_dict[10])