Ejemplo n.º 1
0
def tuning_model(inp, extractor, pre_model, feature_model, data_dict, sites, augData=0):
    allData, allLabel = mytools.datadict2data(data_dict, sites)
    if augData:
        allData, allLabel = augData.data_aug(allData, allLabel, newSamNum=20)
    clsNum = len(sites)
    allData = allData[:, :, np.newaxis]
    allLabel = to_categorical(allLabel, clsNum)

    # replace the last layer
    outLayer = Dense(clsNum, activation='softmax')(extractor)
    new_model = Model(inputs=inp, outputs=outLayer)
    new_model = copy_weights(new_model, pre_model, compileModel=False)

    print('Compiling...')
    new_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

    # tunning the model
    modelPath = os.path.join(ResDir, 'best_tune_model.h5')
    checkpointer = ModelCheckpoint(filepath=modelPath, monitor='accuracy', verbose=1, save_best_only=True, mode='max')
    earlyStopper = EarlyStopping(monitor='accuracy', mode='max', patience=10)
    callBackList = [checkpointer, earlyStopper]

    new_model.fit(allData, allLabel, batch_size=64, epochs=30, verbose=1, shuffle=True, callbacks=callBackList)
    feature_model = copy_weights(feature_model, new_model)

    return feature_model
Ejemplo n.º 2
0
def kNN_train(signature_vector_dict, params):
    site_labels = list(signature_vector_dict.keys())
    random.shuffle(site_labels)
    X_train, y_train = mytools.datadict2data(signature_vector_dict)
    print('kNN training data shape: ', X_train.shape)

    knn = KNeighborsClassifier(n_neighbors=params['k'], weights=params['weights'], p=params['p'], metric=params['metric'], algorithm='brute')
    knn.fit(X_train, y_train)

    return knn, site_labels
Ejemplo n.º 3
0
    def test_open(self, opts, threshold, openDataOpt, test_times=5):
        n_shot = opts.nShot
        precision_list, recall_list, tpr_list, fpr_list = [], [], [], []
        for i in range(test_times):
            # tune model phase
            signature_dict, test_dict = self.formOpenData(
                opts, n_shot, openDataOpt=openDataOpt)
            X_train, y_train = mytools.datadict2data(signature_dict)
            size_of_problem = len(set(y_train))
            print('n_shot is: {}\tsize_of_problem is: {}'.format(
                n_shot, size_of_problem))

            NUM_CLASS = len(set(y_train))
            X_train = X_train[:, :, np.newaxis]
            y_train = np_utils.to_categorical(y_train, NUM_CLASS)

            new_model = self.tuneTheModel(X_train, y_train, NUM_CLASS)

            # test phase
            X_test_Mon, y_test_Mon, X_test_Umon, y_test_Umon, maxLabel = utility.splitMonAndUnmon(
                test_dict)
            result_Mon = new_model.predict(X_test_Mon)
            result_Umon = new_model.predict(X_test_Umon)

            precision, recall, tpr, fpr = utility.calculatePrecAndRecAndTPRAndFPR(
                result_Mon, result_Umon, y_test_Mon, maxLabel, threshold)
            precision_list.append(precision)
            recall_list.append(recall)
            tpr_list.append(tpr)
            fpr_list.append(fpr)

        mean_precision, mean_recall, mean_tpr, mean_fpr = mean(
            precision_list), mean(recall_list), mean(tpr_list), mean(fpr_list)
        print('precision = ', mean_precision, '\trecall = ', mean_recall,
              '\tTPR = ', mean_tpr, '\tFPR = ', mean_fpr)
        return mean_precision, mean_recall, mean_tpr, mean_fpr
def run(param, args):
    source = os.path.basename(args.source).split('.')[0]
    target = os.path.basename(args.target).split('.')[0]
    flag = False if 'trainNum' == args.testType else True
    test_num = 10

    if flag:
        # Load source and target data
        param["source_data"], param["source_label"] = data.data_loader(args.source, param["inp_dims"], sample_num=25)
        # Encode labels into one-hot format
        clsNum = len(set(param["source_label"]))
        param["source_label"] = data.one_hot_encoding(param["source_label"], clsNum)
    else:
        print('will run train num test, so not loading training data at first')

    if 'nShot' == args.testType:
        print('run n_shot test...')
        n_shot_list = [1, 5, 10, 15, 20]
        #n_shot_list = [20]
        outfile = os.path.join(ResDir, 'ADA_one_source_{}_target_{}_res.txt'.format(source, target))
        f = open(outfile, 'a+')
        print('\n\n##################### test time is: {}####################'.format(time.ctime()), file=f, flush=True)
        for n_shot in n_shot_list:
            acc_list = []
            time_last_list = []
            for i in range(test_num):
                # Train phase
                signature_dict, test_dict, sites = utility.getDataDict(args.target, n_shot=n_shot, data_dim=param['inp_dims'], train_pool_size=20, test_size=70)
                target_data, target_label = mytools.datadict2data(signature_dict)
                print('target data shape: ', target_data.shape)
                target_data = target_data[:, :, np.newaxis]
                target_label = data.one_hot_encoding(target_label, len(set(target_label)))
                param["target_data"], param["target_label"] = target_data, target_label
                model_path, time_last = train(param, args)
                time_last_list.append(time_last)
                print('training time last: ', time_last)

                # Test phase
                test_opts = test.MyOpts(model_path, nShot=n_shot, tuning=True, aug=0, exp_type=args.exp_type)
                test_opts.nShot = n_shot
                test_params = test.generate_default_params(test_opts)
                inp_shape = (param["inp_dims"], 1)
                _, acc = test.run(test_opts, signature_dict, test_dict, params=test_params, emb_size=param['embsz'], inp_shape=inp_shape, test_times=1)
                acc_list.append(acc)
                print('acc of source {} and target {} with n_shot {} is: {:f}'.format(source, target, n_shot, acc))
            resLine = 'acc of source {} and target {} with n_shot {} is: {:f}, stdev is: {:f}, time last: {:f}\n\n'.format(source, target, n_shot, mean(acc_list), stdev(acc_list), mean(time_last_list))
            print(resLine, file=f, flush=True)
        f.close()
    elif 'aug' == args.testType:
        print('will run aug test...')
        pass
    elif 'trainNum' == args.testType:
        print('will run train num test...')
        n_shot = 20
        outfile = os.path.join(ResDir, 'trainNumTest_ADA_one_source_{}_target_{}_res.txt'.format(source, target))
        f = open(outfile, 'a+')
        print('\n\n################### test time is: {} ####################'.format(time.ctime()), file=f, flush=True)
        print('test with N shot num: {}'.format(n_shot), file=f, flush=True)
        trainNumList = [25, 50, 75, 100, 125]
        for trainNum in trainNumList:
            acc_list, time_last_list = [], []
            # load training data accord to the train num
            param["source_data"], param["source_label"] = data.data_loader(args.source, param["inp_dims"], sample_num=trainNum)
            print('train data shape is: ', np.array(param['source_data']).shape)
            clsNum = len(set(param["source_label"]))
            param["source_label"] = data.one_hot_encoding(param["source_label"], clsNum)

            for i in range(test_num):
                # Train phase
                signature_dict, test_dict, sites = utility.getDataDict(args.target, n_shot=n_shot, data_dim=param['inp_dims'], train_pool_size=20, test_size=70)
                target_data, target_label = mytools.datadict2data(signature_dict)
                target_data = target_data[:, :, np.newaxis]
                target_label = data.one_hot_encoding(target_label, len(set(target_label)))
                param["target_data"], param["target_label"] = target_data, target_label
                model_path, time_last = train(param, args)
                time_last_list.append(time_last)

                # Test phase
                test_opts = test.MyOpts(model_path, nShot=n_shot, tuning=True, aug=0, exp_type=args.exp_type)
                test_opts.nShot = n_shot
                test_params = test.generate_default_params(test_opts)
                inp_shape = (param["inp_dims"], 1)
                _, acc = test.run(test_opts, signature_dict, test_dict, params=test_params, emb_size=param['embsz'], inp_shape=inp_shape, test_times=1)
                acc_list.append(acc)
                print('acc of source {} and target {} with n_shot {} is: {:f}'.format(source, target, n_shot, acc))
            resLine = 'acc of source {} and target {} with n_shot {} is: {:f}, stdev is: {:f}, training time last: {:f}'.format(source, target, n_shot, mean(acc_list), stdev(acc_list), mean(time_last_list))
            print(resLine, file=f, flush=True)
        f.close()
    elif 'trainTime' == args.testType:
        # Train phase
        n_shot = 20
        signature_dict, test_dict, sites = utility.getDataDict(args.target, n_shot=n_shot, data_dim=param['inp_dims'], train_pool_size=20, test_size=70)
        target_data, target_label = mytools.datadict2data(signature_dict)
        target_data = target_data[:, :, np.newaxis]
        target_label = data.one_hot_encoding(target_label, len(set(target_label)))
        param["target_data"], param["target_label"] = target_data, target_label
        model_path, time_last = train(param, args)
        print('training time last: ', time_last)

    else:
        raise