def get_chosen_costs(opts, num_classes):
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    train_ap_matrix = np.zeros((num_classes, len(costs_list)))
    for cls in range(num_classes):
        for cost_idx in range(len(costs_list)):
            cost = costs_list[cost_idx]
            _, ap_out_file = svm_helper.get_svm_train_output_files(
                cls, cost, opts.output_path
            )
            train_ap_matrix[cls][cost_idx] = float(
                np.load(ap_out_file, encoding='latin1')[0]
            )
    argmax_cls = np.argmax(train_ap_matrix, axis=1)
    chosen_cost = [costs_list[idx] for idx in argmax_cls]
    logger.info('chosen_cost: {}'.format(chosen_cost))
    np.save(
        os.path.join(opts.output_path, 'crossval_ap.npy'),
        np.array(train_ap_matrix)
    )
    np.save(
        os.path.join(opts.output_path, 'chosen_cost.npy'),
        np.array(chosen_cost)
    )
    logger.info('saved crossval_ap AP to file: {}'.format(
        os.path.join(opts.output_path, 'crossval_ap.npy')))
    logger.info('saved chosen costs to file: {}'.format(
        os.path.join(opts.output_path, 'chosen_cost.npy')))
    return np.array(chosen_cost)
Example #2
0
def task(cls, cost, opts, features, targets):
    out_file, ap_out_file = svm_helper.get_svm_train_output_files(
        cls, cost, opts.output_path)
    if not (os.path.exists(out_file) and os.path.exists(ap_out_file)):
        clf = LinearSVC(
            C=cost,
            class_weight={
                1: 2,
                -1: 1
            },
            intercept_scaling=1.0,
            verbose=0,
            penalty='l2',
            loss='squared_hinge',
            tol=0.0001,
            dual=True,max_iter = 500,
	
            #max_iter=2000,
        )
        cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
        cls_labels[np.where(cls_labels == 0)] = -1
        ap_scores = cross_val_score(
            clf, features, cls_labels, cv=3, scoring='average_precision')
        clf.fit(features, cls_labels)
        np.save(ap_out_file, np.array([ap_scores.mean()]))
        with open(out_file, 'wb') as fwrite:
            pickle.dump(clf, fwrite)
    return 0
Example #3
0
def task(cls, cost, opts, features, targets):
    out_file, ap_out_file = svm_helper.get_svm_train_output_files(
        cls, cost, opts.output_path)
    if os.path.exists(out_file) and os.path.exists(ap_out_file):
        logger.info('SVM model exists: {}'.format(out_file))
        logger.info('AP file exists: {}'.format(ap_out_file))
    else:
        #logger.info('Training model with the cost: {}'.format(cost))
        clf = LinearSVC(
            C=cost,
            class_weight={
                1: 2,
                -1: 1
            },
            intercept_scaling=1.0,
            verbose=1,
            penalty='l2',
            loss='squared_hinge',
            tol=0.0001,
            dual=True,
            max_iter=2000,
        )
        cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
        # meaning of labels in VOC/COCO original loaded target files:
        # label 0 = not present, set it to -1 as svm train target
        # label 1 = present. Make the svm train target labels as -1, 1.
        cls_labels[np.where(cls_labels == 0)] = -1
        num_positives = len(np.where(cls_labels == 1)[0])
        num_negatives = len(cls_labels) - num_positives
        #logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format(
        #   cls, num_positives, num_negatives,
        #   float(num_positives) / num_negatives)
        #)
        #logger.info('features: {} cls_labels: {}'.format(
        #   features.shape, cls_labels.shape))
        ap_scores = cross_val_score(clf,
                                    features,
                                    cls_labels,
                                    cv=3,
                                    scoring='average_precision')
        clf.fit(features, cls_labels)
        #logger.info('cls: {} cost: {} AP: {} mean:{}'.format(
        #   cls, cost, ap_scores, ap_scores.mean()))
        #logger.info('Saving cls cost AP to: {}'.format(ap_out_file))
        np.save(ap_out_file, np.array([ap_scores.mean()]))
        #logger.info('Saving SVM model to: {}'.format(out_file))
        with open(out_file, 'wb') as fwrite:
            pickle.dump(clf, fwrite)
    return 0
def train_svm(opts):
    assert os.path.exists(opts.data_file), "Data file not found. Abort!"
    if not os.path.exists(opts.output_path):
        os.makedirs(opts.output_path)

    features, targets = svm_helper.load_input_data(
        opts.data_file, opts.targets_data_file
    )
    # normalize the features: N x 9216 (example shape)
    features = svm_helper.normalize_features(features)

    # parse the cost values for training the SVM on
    costs_list = svm_helper.parse_cost_list(opts.costs_list)
    logger.info('Training SVM for costs: {}'.format(costs_list))

    # classes for which SVM training should be done
    if opts.cls_list:
        cls_list = [int(cls) for cls in opts.cls_list.split(",")]
    else:
        num_classes = targets.shape[1]
        cls_list = range(num_classes)
    logger.info('Training SVM for classes: {}'.format(cls_list))

    for cls_idx in range(len(cls_list)):
        cls = cls_list[cls_idx]
        for cost_idx in range(len(costs_list)):
            cost = costs_list[cost_idx]
            out_file, ap_out_file = svm_helper.get_svm_train_output_files(
                cls, cost, opts.output_path
            )
            if os.path.exists(out_file) and os.path.exists(ap_out_file):
                logger.info('SVM model exists: {}'.format(out_file))
                logger.info('AP file exists: {}'.format(ap_out_file))
            else:
                logger.info('Training model with the cost: {}'.format(cost))
                clf = LinearSVC(
                    C=cost, class_weight={1: 2, -1: 1}, intercept_scaling=1.0,
                    verbose=1, penalty='l2', loss='squared_hinge', tol=0.0001,
                    dual=True, max_iter=2000,
                )
                cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True)
                # meaning of labels in VOC/COCO original loaded target files:
                # label 0 = not present, set it to -1 as svm train target
                # label 1 = present. Make the svm train target labels as -1, 1.
                cls_labels[np.where(cls_labels == 0)] = -1
                num_positives = len(np.where(cls_labels == 1)[0])
                num_negatives = len(cls_labels) - num_positives

                logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format(
                    cls, num_positives, num_negatives,
                    float(num_positives) / num_negatives)
                )
                logger.info('features: {} cls_labels: {}'.format(
                    features.shape, cls_labels.shape))
                ap_scores = cross_val_score(
                    clf, features, cls_labels, cv=3, scoring='average_precision'
                )
                clf.fit(features, cls_labels)

                logger.info('cls: {} cost: {} AP: {} mean:{}'.format(
                    cls, cost, ap_scores, ap_scores.mean()))
                logger.info('Saving cls cost AP to: {}'.format(ap_out_file))
                np.save(ap_out_file, np.array([ap_scores.mean()]))
                logger.info('Saving SVM model to: {}'.format(out_file))
                with open(out_file, 'wb') as fwrite:
                    pickle.dump(clf, fwrite)