Python load_input_dataの例、svm_helper.load_input_data Pythonの例

コード例 #1

0

ファイルを表示

ファイル: train_svm_low_shot.py プロジェクト: XiaohangZhan/fair_self_supervision_benchmark

def train_svm_low_shot(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    if not os.path.exists(opts.output_path):
        os.makedirs(opts.output_path)

    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)

    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    logger.info('Training SVM for costs: {}'.format(costs_list))

    # classes for which SVM testing should be done
    num_classes, cls_list = svm_helper.get_low_shot_svm_classes(
        targets, opts.dataset)

    for cls in cls_list:
        for cost_idx in range(len(costs_list)):
            cost = costs_list[cost_idx]
            suffix = '_'.join(
                opts.targets_data_file.split('/')[-1].split('.')[0].split('_')
                [-2:])
            out_file = svm_helper.get_low_shot_output_file(
                opts, cls, cost, suffix)
            if os.path.exists(out_file):
                logger.info('SVM model exists: {}'.format(out_file))
            else:
                logger.info('SVM model not found: {}'.format(out_file))
                logger.info('Training model with the cost: {}'.format(cost))
                clf = LinearSVC(
                    C=cost,
                    class_weight={
                        1: 2,
                        -1: 1
                    },
                    intercept_scaling=1.0,
                    verbose=1,
                    penalty='l2',
                    loss='squared_hinge',
                    tol=0.0001,
                    dual=True,
                    max_iter=2000,
                )
                train_feats, train_cls_labels = svm_helper.get_cls_feats_labels(
                    cls, features, targets, opts.dataset)
                num_positives = len(np.where(train_cls_labels == 1)[0])
                num_negatives = len(np.where(train_cls_labels == -1)[0])

                logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format(
                    cls, num_positives, num_negatives,
                    float(num_positives) / num_negatives))
                logger.info('features: {} cls_labels: {}'.format(
                    train_feats.shape, train_cls_labels.shape))
                clf.fit(train_feats, train_cls_labels)
                logger.info('Saving SVM model to: {}'.format(out_file))
                with open(out_file, 'wb') as fwrite:
                    pickle.dump(clf, fwrite)
    logger.info('All done!')

コード例 #2

0

ファイルを表示

def test_svm(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    json_predictions, img_ids, cls_names = {}, [], []
    if opts.generate_json:
        img_ids, cls_names = load_json(opts.json_targets)

    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)
    num_classes = targets.shape[1]
    logger.info('Num classes: {}'.format(num_classes))

    # get the chosen cost that maximizes the cross-validation AP per class
    costs_list = get_chosen_costs(opts, num_classes)

    ap_matrix = np.zeros((num_classes, 1))
    for cls in range(num_classes):
        cost = costs_list[cls]
        logger.info('Testing model for cls: {} cost: {}'.format(cls, cost))
        model_file = os.path.join(
            opts.output_path,
            'cls' + str(cls) + '_cost' + str(cost) + '.pickle')
        with open(model_file, 'rb') as fopen:
            if six.PY2:
                model = pickle.load(fopen)
            else:
                model = pickle.load(fopen, encoding='latin1')
        prediction = model.decision_function(features)
        if opts.generate_json:
            cls_name = cls_names[cls]
            for idx in range(len(prediction)):
                img_id = img_ids[idx]
                if img_id in json_predictions:
                    json_predictions[img_id][cls_name] = prediction[idx]
                else:
                    out_lbl = {}
                    out_lbl[cls_name] = prediction[idx]
                    json_predictions[img_id] = out_lbl

        cls_labels = targets[:, cls]
        # meaning of labels in VOC/COCO original loaded target files:
        # label 0 = not present, set it to -1 as svm train target
        # label 1 = present. Make the svm train target labels as -1, 1.
        evaluate_data_inds = (targets[:, cls] != -1)
        eval_preds = prediction[evaluate_data_inds]
        eval_cls_labels = cls_labels[evaluate_data_inds]
        eval_cls_labels[np.where(eval_cls_labels == 0)] = -1
        P, R, score, ap = svm_helper.get_precision_recall(
            eval_cls_labels, eval_preds)
        ap_matrix[cls][0] = ap
    if opts.generate_json:
        output_file = os.path.join(opts.output_path, 'json_preds.json')
        with open(output_file, 'w') as fp:
            json.dump(json_predictions, fp)
        logger.info('Saved json predictions to: {}'.format(output_file))
    logger.info('Mean AP: {}'.format(np.mean(ap_matrix, axis=0)))
    np.save(os.path.join(opts.output_path, 'test_ap.npy'), np.array(ap_matrix))
    logger.info('saved test AP to file: {}'.format(
        os.path.join(opts.output_path, 'test_ap.npy')))

コード例 #3

0

ファイルを表示

ファイル: train_svm_low_shot_parallel.py プロジェクト: Qianna00/openselfsup

def train_svm_low_shot(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    if not os.path.exists(opts.output_path):
        os.makedirs(opts.output_path)

    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)

    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)

    # classes for which SVM testing should be done
    num_classes, cls_list = svm_helper.get_low_shot_svm_classes(
        targets, opts.dataset)

    num_task = len(cls_list) * len(costs_list)
    args_cls = []
    args_cost = []
    for cls in cls_list:
        for cost in costs_list:
            args_cls.append(cls)
            args_cost.append(cost)
    args_opts = [opts] * num_task
    args_features = [features] * num_task
    args_targets = [targets] * num_task

    pool = mp.Pool(mp.cpu_count())
    for _ in tqdm.tqdm(pool.imap_unordered(
            mp_helper,
            zip(args_cls, args_cost, args_opts, args_features, args_targets)),
                       total=num_task):
        pass

コード例 #4

0

ファイルを表示

def train_svm(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    if not os.path.exists(opts.output_path):
        os.makedirs(opts.output_path)

    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)

    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    #logger.info('Training SVM for costs: {}'.format(costs_list))

    # classes for which SVM training should be done
    if opts.cls_list:
        cls_list = [int(cls) for cls in opts.cls_list.split(",")]
    else:
        num_classes = targets.shape[1]
        cls_list = range(num_classes)
    #logger.info('Training SVM for classes: {}'.format(cls_list))

    num_task = len(cls_list) * len(costs_list)
    args_cls = []
    args_cost = []
    for cls_idx in range(len(cls_list)):
        cls = cls_list[cls_idx]
        for cost_idx in range(len(costs_list)):
            cost = costs_list[cost_idx]
            args_cls.append(cls)
            args_cost.append(cost)
    args_opts = [opts] * num_task
    args_features = [features] * num_task
    args_targets = [targets] * num_task

    pool = mp.Pool(mp.cpu_count())
    for _ in tqdm.tqdm(pool.imap_unordered(
            mp_helper,
            zip(args_cls, args_cost, args_opts, args_features, args_targets)),
                       total=num_task):
        pass

コード例 #5

0

ファイルを表示

ファイル: train_svm_kfold.py プロジェクト: XiaohangZhan/fair_self_supervision_benchmark

def train_svm(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    if not os.path.exists(opts.output_path):
        os.makedirs(opts.output_path)

    features, targets = svm_helper.load_input_data(
        opts.data_file, opts.targets_data_file
    )
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)

    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    logger.info('Training SVM for costs: {}'.format(costs_list))

    # classes for which SVM training should be done
    if opts.cls_list:
        cls_list = [int(cls) for cls in opts.cls_list.split(",")]
    else:
        num_classes = targets.shape[1]
        cls_list = range(num_classes)
    logger.info('Training SVM for classes: {}'.format(cls_list))

    for cls_idx in range(len(cls_list)):
        cls = cls_list[cls_idx]
        for cost_idx in range(len(costs_list)):
            cost = costs_list[cost_idx]
            out_file, ap_out_file = svm_helper.get_svm_train_output_files(
                cls, cost, opts.output_path
            )
            if os.path.exists(out_file) and os.path.exists(ap_out_file):
                logger.info('SVM model exists: {}'.format(out_file))
                logger.info('AP file exists: {}'.format(ap_out_file))
            else:
                logger.info('Training model with the cost: {}'.format(cost))
                clf = LinearSVC(
                    C=cost, class_weight={1: 2, -1: 1}, intercept_scaling=1.0,
                    verbose=1, penalty='l2', loss='squared_hinge', tol=0.0001,
                    dual=True, max_iter=2000,
                )
                cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
                # meaning of labels in VOC/COCO original loaded target files:
                # label 0 = not present, set it to -1 as svm train target
                # label 1 = present. Make the svm train target labels as -1, 1.
                cls_labels[np.where(cls_labels == 0)] = -1
                num_positives = len(np.where(cls_labels == 1)[0])
                num_negatives = len(cls_labels) - num_positives

                logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format(
                    cls, num_positives, num_negatives,
                    float(num_positives) / num_negatives)
                )
                logger.info('features: {} cls_labels: {}'.format(
                    features.shape, cls_labels.shape))
                ap_scores = cross_val_score(
                    clf, features, cls_labels, cv=3, scoring='average_precision'
                )
                clf.fit(features, cls_labels)

                logger.info('cls: {} cost: {} AP: {} mean:{}'.format(
                    cls, cost, ap_scores, ap_scores.mean()))
                logger.info('Saving cls cost AP to: {}'.format(ap_out_file))
                np.save(ap_out_file, np.array([ap_scores.mean()]))
                logger.info('Saving SVM model to: {}'.format(out_file))
                with open(out_file, 'wb') as fwrite:
                    pickle.dump(clf, fwrite)

コード例 #6

0

ファイルを表示

def test_svm_low_shot(opts):
    k_values = [int(val) for val in opts.k_values.split(",")]
    sample_inds = [int(val) for val in opts.sample_inds.split(",")]
    logger.info('Testing svm for k-values: {} and sample_inds: {}'.format(
        k_values, sample_inds))

    img_ids, cls_names = [], []
    if opts.generate_json:
        img_ids, cls_names = load_json(opts.json_targets)

    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    # we test the svms on the full test set. Given the test features and the
    # targets, we test it for various k-values (low-shot), cost values and
    # 5 independent samples.
    features, targets = svm_helper.load_input_data(opts.data_file,
                                                   opts.targets_data_file)
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)

    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    logger.info('Testing SVM for costs: {}'.format(costs_list))

    # classes for which SVM testing should be done
    num_classes, cls_list = svm_helper.get_low_shot_svm_classes(
        targets, opts.dataset)

    # create the output for per sample, per k-value and per cost.
    sample_ap_matrices = []
    for _ in range(len(sample_inds)):
        ap_matrix = np.zeros((len(k_values), len(costs_list)))
        sample_ap_matrices.append(ap_matrix)

    # the test goes like this: For a given sample, for a given k-value and a
    # given cost value, we evaluate the trained svm model for all classes.
    # After computing over all classes, we get the mean AP value over all
    # classes. We hence end up with: output = [sample][k_value][cost]
    for inds in range(len(sample_inds)):
        sample_idx = sample_inds[inds]
        for k_idx in range(len(k_values)):
            k_low = k_values[k_idx]
            suffix = 'sample{}_k{}'.format(sample_idx + 1, k_low)
            for cost_idx in range(len(costs_list)):
                cost = costs_list[cost_idx]
                local_cost_ap = np.zeros((num_classes, 1))
                for cls in cls_list:
                    logger.info(
                        'Test sample/k_value/cost/cls: {}/{}/{}/{}'.format(
                            sample_idx + 1, k_low, cost, cls))
                    model_file = svm_helper.get_low_shot_output_file(
                        opts, cls, cost, suffix)
                    with open(model_file, 'rb') as fopen:
                        if six.PY2:
                            model = pickle.load(fopen)
                        else:
                            model = pickle.load(fopen, encoding='latin1')
                    prediction = model.decision_function(features)
                    eval_preds, eval_cls_labels = svm_helper.get_cls_feats_labels(
                        cls, prediction, targets, opts.dataset)
                    P, R, score, ap = svm_helper.get_precision_recall(
                        eval_cls_labels, eval_preds)
                    local_cost_ap[cls][0] = ap
                mean_cost_ap = np.mean(local_cost_ap, axis=0)
                sample_ap_matrices[inds][k_idx][cost_idx] = mean_cost_ap
            out_k_sample_file = os.path.join(
                opts.output_path,
                'test_ap_sample{}_k{}.npy'.format(sample_idx + 1, k_low))
            save_data = sample_ap_matrices[inds][k_idx]
            save_data = save_data.reshape((1, -1))
            np.save(out_k_sample_file, save_data)
            logger.info('Saved sample test k_idx AP to file: {} {}'.format(
                out_k_sample_file, save_data.shape))
            if opts.generate_json:
                argmax_cls = np.argmax(save_data, axis=1)
                chosen_cost = costs_list[argmax_cls[0]]
                logger.info('chosen cost: {}'.format(chosen_cost))
                save_json_predictions(opts, chosen_cost, sample_idx, k_low,
                                      features, cls_list, cls_names, img_ids)
    logger.info('All done!!')