Esempio n. 1
0
def exp_outlier_merge(sub_dataset=['_train', '_valid', '_test'],
                      outlier_dataset_name='outliers'):

    datasets = [cfg.dataset + sd for sd in sub_dataset]

    for feat_net in cfg.nets:

        outliers_dataset = common.feat_dataset(outlier_dataset_name, feat_net)

        for dataset in datasets:
            feat_dataset = common.feat_dataset(dataset, feat_net)
            out_dataset_path = common.feat_path(dataset + "_outl", feat_net)
            print("")
            print("Features net:     {}".format(feat_net))
            print("Input dataset:    {}".format(
                common.feat_fname(dataset, feat_net)))
            print("Merging with:     {}".format(
                common.feat_fname(outlier_dataset_name, feat_net)))
            print("Out feat dataset: {}".format(
                common.feat_fname(dataset + "_outl", feat_net)))

            out = ImageDataset.merge_datasets(feat_dataset,
                                              outliers_dataset,
                                              label_mode='new')
            out.save_hdf5(out_dataset_path)
            print("Done")
        print("")

    print("")
    print("All done.")
def merge_features_train_valid(feat_net='resnet50'):
    dataset_name = cfg.dataset
    trainset_name = dataset_name + '_train'
    validset_name = dataset_name + '_valid'

    print("Merging training and validation data-features (feature net: " + feat_net + ")")

    train_feat_set = common.feat_dataset(trainset_name, feat_net)
    valid_feat_set = common.feat_dataset(validset_name, feat_net)

    merged_feat_set = ImageDataset.merge_datasets(train_feat_set, valid_feat_set)

    merged_dataset_path = common.feat_path(dataset_name, feat_net)
    merged_feat_set.save_hdf5(merged_dataset_path)
    def __init__(self, feat_net_name, trainset_name, validset_name=None, validsplit=0, shuffle_trainset=False,
                 batch_size=32, loss='categorical_crossentropy', metric=['ACCURACY'], checkpoint_monitor=None):
        self.trainset_name = trainset_name
        self.trainset =  common.feat_dataset(trainset_name, feat_net_name)
        if shuffle_trainset:
            self.trainset.shuffle()
        #self.validset = validset
        if validset_name is None:
            self.valdata = None
        else:
            validset = common.feat_dataset(validset_name, feat_net_name)
            self.valdata = [validset.data, validset.getLabelsVec()]
        self.validsplit = validsplit
        self.batch_size = batch_size
        self.loss=loss
        self.metric=metric
        self.feat_net_name = feat_net_name
        self.chk_mon = checkpoint_monitor

        if self.chk_mon is None and (self.valdata is not None or self.validsplit > 0):
            self.chk_mon = 'val_loss'
        else:
            self.chk_mon = 'loss'
    def __init__(self, feat_net_name, trainset_name, testset_name, verbose=True, csv_global_stats=True, csv_class_stats=True,
                 single_class_verbose=False, batch_size=32, save_csv_dir='weights'):
        '''
        :param save_csv_dir: 'weights': save the csv files in the directory of the weights of the shallow network
                         'current': save the csv files in the current working directory
                          others_strings: save the csv in the specified directory

        '''
        self.trainset_name = trainset_name
        self.feat_net_name = feat_net_name
        self.testset = common.feat_dataset(testset_name, feat_net_name)
        self.testset_name = testset_name
        self.verbose = verbose
        self.single_class_verbose = single_class_verbose
        self.csv_global_stats = csv_global_stats
        self.csv_class_stats = csv_class_stats
        self.batch_size = batch_size
        self.save_csv_mode = save_csv_dir
def extract_shallow_features():
    feat_net = 'resnet50'
    cfg.init(include_nets=[feat_net])

    old_trainset_name = cfg.dataset + '_train_ds'
    #old_testset_name = cfg.dataset + '_test'
    dataset_name =  cfg.dataset + '_train_ds'
    dataset_name = cfg.dataset + '_test'
    #crop, size = cfg.crop_size(net=feat_net)


    print("\nloading dataset: " + dataset_name)
    try:
        dataset = common.feat_dataset(dataset_name, feat_net)
    except IOError:
        print("Can't open dataset.")
        return
    print("dataset loaded.")

    in_shape = cfg.feat_shape_dict[feat_net]
    out_shape = feat_dataset_n_classes(dataset_name, feat_net)


    B = ShallowNetBuilder(in_shape, out_shape)
    SL = ShallowLoader(old_trainset_name, feat_net)

    pretrain_weight_epoch = '10'
    labelflip_finetune_epoch = '00'
    out_layer = 'additional_hidden_0'


    extr_n = '_ft@' + pretrain_weight_epoch
    model = B.H8K(extr_n, lf_decay=0.01).init(lf=False).load(SL, labelflip_finetune_epoch).model()
    #model.summary()
    feature_vectors = net_utils.extract_features(model, dataset, out_layer, batch_size, True)
    feature_vectors.save_hdf5("shallow_extracted_features/shallow_feat_" + dataset_name + ".h5")
def main(args):
    config.init()

    feat_net = 'resnet50'
    print("")
    print("")
    print("Running experiment on net: " + feat_net)

    #
    testset_name = "dbp3120_test"  # + '_verrocchio77'

    testset = common.feat_dataset(testset_name, feat_net)

    in_shape = config.feat_shape_dict[feat_net]
    out_shape = testset.labelsize

    def for_resnet50():

        #shallow_path = common.shallow_path("LF_FT_A", trainset_name, feat_net, ext=False)
        LF = new_model(in_shape, out_shape)

        # shallow_path = config.SHALLOW_PATH + "shallow_AB__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.best.h5"
        # LF.load_weights(shallow_path, by_name=True)
        # score = test_net(LF, testset)
        # write_net_score(score, "AB best", testset_name, "test_results.csv", detailed_csv=True)
        #
        # shallow_path = config.SHALLOW_PATH + "shallow_AB__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.last.h5"
        # LF.load_weights(shallow_path, by_name=True)
        # score = test_net(LF, testset)
        # write_net_score(score, "AB last", testset_name, "test_results.csv", detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.best.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "A best (5ep)",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.last.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "A last (5ep)",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.best.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A best",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.00.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 0",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.01.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 1",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.02.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 2",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.03.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 3",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_noflk_train_ds__resnet50__avg_pool.weights.04.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 4",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

    if feat_net == 'resnet50':
        for_resnet50()
Esempio n. 7
0
def main(args):
    config.init()

    feat_net = 'resnet50'
    print("")
    print("")
    print("Running experiment on net: " + feat_net)

    # if config.USE_TOY_DATASET:
    #     trainset_name = config.DATASET + '_train'
    # else:
    #     trainset_name = config.DATASET + '_train_ds'

    testset_name = config.DATASET + '_test'

    testset = common.feat_dataset(testset_name, feat_net)

    in_shape = config.feat_shape_dict[feat_net]
    out_shape = testset.labelsize

    def for_resnet50():

        #shallow_path = common.shallow_path("LF_FT_A", trainset_name, feat_net, ext=False)
        LF = new_model(in_shape, out_shape)

        shallow_path = config.SHALLOW_PATH + "shallow_AB__feat_dbp3120_train_ds__resnet50__avg_pool.weights.best.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "AB best",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_AB__feat_dbp3120_train_ds__resnet50__avg_pool.weights.last.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "AB last",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.best.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "A best (5ep)",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.last.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "A last (5ep)",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.best.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A best",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.00.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 0",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.17.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 17",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.41.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 41",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

        shallow_path = config.SHALLOW_PATH + "shallow_LF_FT_A__feat_dbp3120_train_ds__resnet50__avg_pool.weights.60.h5"
        LF.load_weights(shallow_path, by_name=True)
        score = test_net(LF, testset)
        write_net_score(score,
                        "LF A 60",
                        testset_name,
                        "test_results.csv",
                        detailed_csv=True)

    if feat_net == 'resnet50':
        for_resnet50()
def main(args):
    config.init()

    feat_net = 'resnet50'
    print("")
    print("")
    print("Running experiment on net: " + feat_net)

    trainset_name = config.DATASET + '_so'
    trainset = common.feat_dataset(trainset_name, feat_net)
    validset = common.feat_dataset(config.DATASET + '_so_test', feat_net)
    valid_data = validset.data, validset.getLabelsVec()
    valid_split = 0

    in_shape = config.feat_shape_dict[feat_net]
    out_shape = trainset.labelsize

    def addestra(model,
                 name,
                 optimizer,
                 epochs,
                 callbacks,
                 chk_period=-1,
                 loss_in_name=False):
        shallow_path = common.shallow_path(name,
                                           trainset_name,
                                           feat_net,
                                           ext=False)

        if chk_period > 0:
            name = shallow_path + '.weights.{epoch:02d}' + (
                '-{val_loss:.2f}.h5' if loss_in_name else '.h5')
            checkpoint = ModelCheckpoint(name,
                                         monitor='val_acc',
                                         save_weights_only=True,
                                         period=chk_period)
            callbacks.append(checkpoint)

        bestpoint = ModelCheckpoint(shallow_path + '.weights.best.h5',
                                    monitor='val_loss',
                                    save_best_only=True,
                                    save_weights_only=True)
        callbacks.append(bestpoint)

        model.compile(optimizer=optimizer, loss=LOSS, metrics=METRIC)
        #model.summary()
        #print("Valid split: " + str(valid_split))
        model.fit(trainset.data,
                  trainset.getLabelsVec(),
                  nb_epoch=epochs,
                  batch_size=BATCH,
                  callbacks=callbacks,
                  shuffle=True,
                  validation_data=valid_data,
                  validation_split=valid_split)

        save_model_json(model, shallow_path + '.json')
        model.save_weights(shallow_path + '.weights.last.h5')

    def for_resnet50():

        early_stopping = EarlyStopping('val_loss',
                                       min_delta=0.01,
                                       patience=7,
                                       verbose=1)
        reduceLR = ReduceLROnPlateau(monitor='val_loss',
                                     factor=0.1,
                                     patience=2,
                                     verbose=1,
                                     epsilon=0.01,
                                     cooldown=0,
                                     min_lr=0)
        callbacks = [early_stopping, reduceLR]

        A = new_model(in_shape, out_shape)
        optimizer = SGD(lr=0.01, momentum=0.9, decay=1e-6, nesterov=True)
        addestra(A,
                 "A_5ep",
                 optimizer,
                 100,
                 callbacks,
                 chk_period=1,
                 loss_in_name=True)

        shallow_path = common.shallow_path("A_5ep",
                                           trainset_name,
                                           feat_net,
                                           ext=False)
        early_stopping = EarlyStopping('val_loss',
                                       min_delta=0.001,
                                       patience=10,
                                       verbose=1)
        reduceLR = ReduceLROnPlateau('val_loss',
                                     factor=0.1,
                                     patience=4,
                                     verbose=1,
                                     epsilon=0.0001)
        callbacks = [early_stopping, reduceLR]

        LF = new_model(in_shape, out_shape, lf=True, lf_decay=0.03)
        LF.load_weights(shallow_path + '.weights.best.h5', by_name=True)
        optimizer = SGD(lr=0.001, momentum=0.9, decay=1e-6, nesterov=True)
        addestra(LF,
                 "LF_FT_A",
                 optimizer,
                 epochs=100,
                 callbacks=callbacks,
                 chk_period=1)

    def for_vgg():
        pass
        # m = new_model(in_shape, out_shape, hiddens=[Hidden(4096, 0.5), Hidden(4096, 0.5)])
        # addestra(m, "H4K_H4K", SGD(lr=0.0001, momentum=0.9, decay=1e-6, nesterov=True), epochs=100, callbacks=callbacks)
        #
        # m = new_model(in_shape, out_shape, hiddens=[Hidden(4096, 0.5)])
        # addestra(m, "H4K", SGD(lr=0.0001, momentum=0.9, decay=1e-6, nesterov=True), epochs=100, callbacks=callbacks)

    if feat_net == 'resnet50':
        for_resnet50()
    if feat_net.startswith('vgg'):
        for_vgg()
def prune_feat_dataset_with_shallow_classifier(
        feat_net=cfg_emb.FEAT_NET,
        double_seeds=True,
        n_top_classes=cfg_emb.PRUNING_KEEP_N_CLASSES,
        labelflip=cfg_emb.USE_LABELFLIP):

    dataset_name = cfg_emb.FEAT_DATASET
    trainset_name = cfg.dataset + '_train' + ('_ds' if double_seeds else '')
    #validset_name = cfg.dataset + '_valid'

    testset_name = cfg.dataset
    # testset_name = cfg.dataset + '_test'

    print("Shallow Test")
    print("Features from CNN: " + feat_net)
    print("Trained on: " + trainset_name)
    print("Testing on: " + testset_name)

    in_shape = cfg.feat_shape_dict[feat_net]
    out_shape = feat_dataset_n_classes(testset_name, feat_net)

    SNB = ShallowNetBuilder(in_shape, out_shape)
    SL = ShallowLoader(trainset_name, feat_net)
    ST = ShallowTester(feat_net,
                       trainset_name,
                       testset_name,
                       csv_class_stats=False,
                       csv_global_stats=False)

    # Nets to test
    #shallow_nets = [SNB.H8K]
    shallow_nets = [SNB.A]

    # Weights to load on nets to test
    shallow_weights_to_loads = ['best']
    #  Weights to load on labelflip-finetuned nets (finetuned loading the weights  in shallow_weights_to_loads list)
    shallow_ft_lf_weights_to_load = ['00']

    dataset_to_prune = common.feat_dataset(dataset_name, feat_net)

    for sn in shallow_nets:
        for sh_i in shallow_weights_to_loads:
            if cfg_emb.USE_LABELFLIP:
                # Test some of the finetuned model that use LabelFlip noise label:
                extr_n = '_ft@' + str(sh_i)
                for lf_i in shallow_ft_lf_weights_to_load:
                    shallow_net = sn(extr_n, lf_decay=cfg_emb.LF_DECAY).init(
                        lf=False).load(SL, lf_i)
                    keep, prune = test_for_top_classes(
                        shallow_net,
                        ST,
                        nb_selected_classes=n_top_classes,
                        out_on_csv="class_pruning.csv",
                        out_classname_txt="class_names_keep_from_pruning.txt",
                        out_classindex_txt="class_keep_from_pruning.txt")
                    pruned = dataset_to_prune.sub_dataset_with_labels(keep)
                    pruned_out_feature_dataset = pruned_feat_dataset_path(
                        dataset_name, testset_name, n_top_classes, feat_net,
                        shallow_net)
                    print("Saving pruned feature dataset in: " +
                          pruned_out_feature_dataset)
                    pruned.save_hdf5(pruned_out_feature_dataset)
            else:
                # Test without LabelFlip Finetune:
                shallow_net = sn().init(lf=False).load(SL, sh_i)
                keep, prune = test_for_top_classes(
                    shallow_net,
                    ST,
                    nb_selected_classes=n_top_classes,
                    out_on_csv="class_pruning.csv",
                    out_classname_txt="class_names_keep_from_pruning.txt",
                    out_classindex_txt="class_keep_from_pruning.txt")
                pruned = dataset_to_prune.sub_dataset_with_labels(keep)
                pruned_out_feature_dataset = pruned_feat_dataset_path(
                    dataset_name, testset_name, n_top_classes, feat_net,
                    shallow_net)
                print("Saving pruned feature dataset in: " +
                      pruned_out_feature_dataset)
                pruned.save_hdf5(pruned_out_feature_dataset)