def shallow_finetune_labelflip(feat_net='resnet50', double_seeds=True, outlier_model=False): if cfg.use_toy_dataset: trainset_name = cfg.dataset + '_train' validset_name = None valid_split = 0.3 else: trainset_name = cfg.dataset + '_train' + ('_ds' if double_seeds else '') validset_name = cfg.dataset + '_valid' valid_split = 0 print("Shallow train on features from net: " + feat_net) print("Trainset: " + trainset_name) in_shape = cfg.feat_shape_dict[feat_net] out_shape = feat_dataset_n_classes(trainset_name, feat_net) SNB = ShallowNetBuilder(in_shape, out_shape) SL = ShallowLoader(trainset_name, feat_net) load_iter = ['10'] decay = [0.1, 0.01, 0.001, 0.0001] lrs = [0.01, 0.001, 0.0001, 0.00001] for li in load_iter: for dc in decay: for lr in lrs: extr_n = '_ft@{}_dc-{}_lr-{}'.format(li, dc, lr) opt = SGD(lr=0.001, momentum=0.9, decay=1e-6, nesterov=True) snet = [SNB.H8K(extr_n, lf_decay=0.01).init(lf=True).load(SL, li, SNB.H8K())] ST = ShallowTrainer(feat_net, trainset_name, validset_name, valid_split, batch_size=BATCH, loss=LOSS, metric=METRIC) ST.train(snet, opt, epochs=20, chk_period=1)
def dnn2docvec_exp(): feat_net = 'resnet50' dataset = cfg.dataset shallow_trainset = dataset + '_train' shallow_net = ShallowNetBuilder().A() shallow_weight_index = 'best' SL = ShallowLoader(shallow_trainset, feat_net) shallow_net.load(shallowLoader=SL, weight_index=shallow_weight_index)
def shallow_test(feat_net='resnet50', double_seeds=True, outlier_model=False): if cfg.use_toy_dataset: validset_name = None valid_split = 0.3 else: trainset_name = cfg.dataset + '_train' + ('_ds' if double_seeds else '') validset_name = cfg.dataset + '_valid' valid_split = 0 testset_name = cfg.dataset + '_test' print("Shallow Test") print("Features from CNN: " + feat_net) print("Trained on: " + trainset_name) print("Testing on: " + testset_name) in_shape = cfg.feat_shape_dict[feat_net] out_shape = feat_dataset_n_classes(testset_name, feat_net) SNB = ShallowNetBuilder(in_shape, out_shape) SL = ShallowLoader(trainset_name, feat_net) ST = ShallowTester(feat_net, trainset_name, testset_name, save_csv_dir='current') # Nets to test shallow_nets = [SNB.H8K] sh_i = '10' decay = [0.1, 0.01, 0.001, 0.0001] lrs = [0.01, 0.001, 0.0001, 0.00001] # Weights to load on labelflip-finetuned nets (finetuned loading the weights in shallow_weights_to_loads list) shallow_ft_lf_weights_to_load = [ '00', '01', '02', '03', '04', '05', '06', '07', 'best', 'last' ] for sn in shallow_nets: for dc in decay: for lr in lrs: # Test without LabelFlip Finetune: sn().init(lf=False).load(SL, sh_i).test(ST) # Test some of the finetuned model that use LabelFlip noise label: extr_n = '_ft@{}_dc-{}_lr-{}'.format(sh_i, dc, lr) for lf_i in shallow_ft_lf_weights_to_load: sn(extr_n, lf_decay=0.01).init(lf=False).load(SL, lf_i).test(ST)
def shallow_finetune_labelflip(feat_net='resnet50', double_seeds=True, outlier_model=False): if cfg.use_toy_dataset: trainset_name = cfg.dataset + '_train' validset_name = None valid_split = 0.3 else: trainset_name = cfg.dataset + '_train' + ('_ds' if double_seeds else '') validset_name = cfg.dataset + '_valid' valid_split = 0 print("Shallow train on features from net: " + feat_net) print("Trainset: " + trainset_name) in_shape = cfg.feat_shape_dict[feat_net] out_shape = feat_dataset_n_classes(trainset_name, feat_net) SNB = ShallowNetBuilder(in_shape, out_shape) SL = ShallowLoader(trainset_name, feat_net) load_iter = ['05', '10', '15', 'best', 'last'] snets = [] for li in load_iter: extr_n = '_ft@' + str(li) #snets += [SNB.H8K(extr_n, lf_decay=0.01).init(lf=True).load(SL, li, SNB.H8K())] snets += [ SNB.H4K(extr_n, lf_decay=0.01).init(lf=True).load(SL, li, SNB.H4K()) ] #snets += [SNB.A(extr_n, lf_decay=0.01).init(lf=True).load(SL, li, SNB.A())] snets += [ SNB.H4K_H4K(extr_n, lf_decay=0.01).init(lf=True).load( SL, li, SNB.H4K_H4K()) ] opt = SGD(lr=0.001, momentum=0.9, decay=1e-6, nesterov=True) #early_stopping = EarlyStopping(monitor='val_acc', min_delta=0.0001, patience=8, verbose=1) reduceLR = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4, verbose=1, epsilon=0.005) callbacks = [reduceLR] ST = ShallowTrainer(feat_net, trainset_name, validset_name, valid_split, batch_size=BATCH, loss=LOSS, metric=METRIC) for snet in snets: ST.train(snet, opt, epochs=20, callbacks=callbacks, chk_period=1)
def extract_shallow_features(): feat_net = 'resnet50' cfg.init(include_nets=[feat_net]) old_trainset_name = cfg.dataset + '_train_ds' #old_testset_name = cfg.dataset + '_test' dataset_name = cfg.dataset + '_train_ds' dataset_name = cfg.dataset + '_test' #crop, size = cfg.crop_size(net=feat_net) print("\nloading dataset: " + dataset_name) try: dataset = common.feat_dataset(dataset_name, feat_net) except IOError: print("Can't open dataset.") return print("dataset loaded.") in_shape = cfg.feat_shape_dict[feat_net] out_shape = feat_dataset_n_classes(dataset_name, feat_net) B = ShallowNetBuilder(in_shape, out_shape) SL = ShallowLoader(old_trainset_name, feat_net) pretrain_weight_epoch = '10' labelflip_finetune_epoch = '00' out_layer = 'additional_hidden_0' extr_n = '_ft@' + pretrain_weight_epoch model = B.H8K(extr_n, lf_decay=0.01).init(lf=False).load(SL, labelflip_finetune_epoch).model() #model.summary() feature_vectors = net_utils.extract_features(model, dataset, out_layer, batch_size, True) feature_vectors.save_hdf5("shallow_extracted_features/shallow_feat_" + dataset_name + ".h5")
if cls in class_list_for_map: docs_vectors_100_zero_shot.append(docs_vectors_500[i]) TEXT_FEATURES_100 = np.asarray(docs_vectors_100_zero_shot) # TEXT_FEATURES_TEST = "doc2vec_dbpedia_vectors-test.npy" # TEXT_FEATURES_TRAIN = "doc2vec_dbpedia_vectors-train.npy" SHALLOW_WEIGHT_LOAD = 'best' USE_LABELFLIP = False SHALLOW_FT_LF_WEIGHT_LOAD = '00' LF_DECAY = 0.01 in_shape = cfg.feat_shape_dict[FEAT_NET] out_shape = feat_dataset_n_classes(FEAT_TESTSET_NAME, FEAT_NET) SNB = ShallowNetBuilder(in_shape, out_shape) SL = ShallowLoader(FEAT_TRAINSET_NAME, FEAT_NET) #ST = ShallowTester(FEAT_NET, FEAT_TRAINSET_NAME, FEAT_TESTSET_NAME, csv_class_stats=False, csv_global_stats=False) # Nets to test # shallow_nets = [SNB.H8K] SHALLOW_NET_BUILD = SNB.A if USE_LABELFLIP: raise ValueError("Not implemented...") else: SHALLOW_NET = SHALLOW_NET_BUILD().init().load(SL, SHALLOW_WEIGHT_LOAD) def pruned_feat_dataset(dataset_name, feat_net, shallow_net): return ImageDataset().load_hdf5( pruned_feat_dataset_path(dataset_name, feat_net, shallow_net))
def dnn2docvec(dataset=cfg.dataset, feat_net='resnet50', shallow_net=ShallowNetBuilder().A()): import numpy as np print("Loading visual features..") SL = ShallowLoader(shallow_training_set, feat_net) shallow_net.load(SL, ) visual_features = pruned_feat_dataset( dataset, feat_net, ) visual_features = ImageDataset().load_hdf5( "shallow_extracted_features/shallow_feat_dbp3120_train_ds.h5") print("Loading textual features..") textual_features = np.load("doc2vec_dbpedia_vectors.npy") print("Generating dataset..") data = [] labels = [] for lbl, docv in enumerate(textual_features): visual_features_with_label = visual_features.sub_dataset_with_label( lbl) for visual_feat in visual_features_with_label.data: data.append(visual_feat) labels.append(docv) data = np.asarray(data) labels = np.asarray(labels) print("Generating model..") EPOCHS = 40 hiddens = [[1000], [2000], [4000], [2000, 1000], [4000, 2000], [4000, 2000, 1000]] lrs = [10, 1] batch_sizes = [64, 32, 16] optimizers_str = ['Adadelta', 'Adagrad'] optimizers = [Adadelta, Adagrad] for hid in hiddens: for opt, opt_str in zip(optimizers, optimizers_str): for lr in lrs: for bs in batch_sizes: print "" print("Training model..") print "hiddens: " + str(hid) print "optim: " + str(opt_str) print "lr: " + str(lr) fname = "video2doc_model_opt-{}_lr-{}_bs-{}".format( opt_str, lr, bs) for i, hu in enumerate(hid): fname += "_hl-" + str(hu) model = get_model(data.shape[-1], labels.shape[-1], hid) model.compile(optimizer=opt(lr=lr), loss=cos_distance) earlystop = EarlyStopping(monitor='loss', min_delta=0.0005, patience=8) reduceLR = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=3, verbose=1, epsilon=0.001) bestpoint = ModelCheckpoint( fname + '.weights.{epoch:02d}.loss-{loss:.4f}.h5', monitor='loss', save_best_only=True, save_weights_only=False) bestpoint_wo = ModelCheckpoint( fname + '.weights.{epoch:02d}.loss-{loss:.4f}.h5', monitor='loss', save_best_only=True, save_weights_only=True) callbacks = [earlystop, reduceLR, bestpoint, bestpoint_wo] model.fit(data, labels, batch_size=64, nb_epoch=EPOCHS, verbose=1, shuffle=True, callbacks=callbacks)
def prune_feat_dataset_with_shallow_classifier( feat_net=cfg_emb.FEAT_NET, double_seeds=True, n_top_classes=cfg_emb.PRUNING_KEEP_N_CLASSES, labelflip=cfg_emb.USE_LABELFLIP): dataset_name = cfg_emb.FEAT_DATASET trainset_name = cfg.dataset + '_train' + ('_ds' if double_seeds else '') #validset_name = cfg.dataset + '_valid' testset_name = cfg.dataset # testset_name = cfg.dataset + '_test' print("Shallow Test") print("Features from CNN: " + feat_net) print("Trained on: " + trainset_name) print("Testing on: " + testset_name) in_shape = cfg.feat_shape_dict[feat_net] out_shape = feat_dataset_n_classes(testset_name, feat_net) SNB = ShallowNetBuilder(in_shape, out_shape) SL = ShallowLoader(trainset_name, feat_net) ST = ShallowTester(feat_net, trainset_name, testset_name, csv_class_stats=False, csv_global_stats=False) # Nets to test #shallow_nets = [SNB.H8K] shallow_nets = [SNB.A] # Weights to load on nets to test shallow_weights_to_loads = ['best'] # Weights to load on labelflip-finetuned nets (finetuned loading the weights in shallow_weights_to_loads list) shallow_ft_lf_weights_to_load = ['00'] dataset_to_prune = common.feat_dataset(dataset_name, feat_net) for sn in shallow_nets: for sh_i in shallow_weights_to_loads: if cfg_emb.USE_LABELFLIP: # Test some of the finetuned model that use LabelFlip noise label: extr_n = '_ft@' + str(sh_i) for lf_i in shallow_ft_lf_weights_to_load: shallow_net = sn(extr_n, lf_decay=cfg_emb.LF_DECAY).init( lf=False).load(SL, lf_i) keep, prune = test_for_top_classes( shallow_net, ST, nb_selected_classes=n_top_classes, out_on_csv="class_pruning.csv", out_classname_txt="class_names_keep_from_pruning.txt", out_classindex_txt="class_keep_from_pruning.txt") pruned = dataset_to_prune.sub_dataset_with_labels(keep) pruned_out_feature_dataset = pruned_feat_dataset_path( dataset_name, testset_name, n_top_classes, feat_net, shallow_net) print("Saving pruned feature dataset in: " + pruned_out_feature_dataset) pruned.save_hdf5(pruned_out_feature_dataset) else: # Test without LabelFlip Finetune: shallow_net = sn().init(lf=False).load(SL, sh_i) keep, prune = test_for_top_classes( shallow_net, ST, nb_selected_classes=n_top_classes, out_on_csv="class_pruning.csv", out_classname_txt="class_names_keep_from_pruning.txt", out_classindex_txt="class_keep_from_pruning.txt") pruned = dataset_to_prune.sub_dataset_with_labels(keep) pruned_out_feature_dataset = pruned_feat_dataset_path( dataset_name, testset_name, n_top_classes, feat_net, shallow_net) print("Saving pruned feature dataset in: " + pruned_out_feature_dataset) pruned.save_hdf5(pruned_out_feature_dataset)