def get_chosen_costs(opts, num_classes): costs_list = svm_helper.parse_cost_list(opts.costs_list) train_ap_matrix = np.zeros((num_classes, len(costs_list))) for cls in range(num_classes): for cost_idx in range(len(costs_list)): cost = costs_list[cost_idx] _, ap_out_file = svm_helper.get_svm_train_output_files( cls, cost, opts.output_path ) train_ap_matrix[cls][cost_idx] = float( np.load(ap_out_file, encoding='latin1')[0] ) argmax_cls = np.argmax(train_ap_matrix, axis=1) chosen_cost = [costs_list[idx] for idx in argmax_cls] logger.info('chosen_cost: {}'.format(chosen_cost)) np.save( os.path.join(opts.output_path, 'crossval_ap.npy'), np.array(train_ap_matrix) ) np.save( os.path.join(opts.output_path, 'chosen_cost.npy'), np.array(chosen_cost) ) logger.info('saved crossval_ap AP to file: {}'.format( os.path.join(opts.output_path, 'crossval_ap.npy'))) logger.info('saved chosen costs to file: {}'.format( os.path.join(opts.output_path, 'chosen_cost.npy'))) return np.array(chosen_cost)
def task(cls, cost, opts, features, targets): out_file, ap_out_file = svm_helper.get_svm_train_output_files( cls, cost, opts.output_path) if not (os.path.exists(out_file) and os.path.exists(ap_out_file)): clf = LinearSVC( C=cost, class_weight={ 1: 2, -1: 1 }, intercept_scaling=1.0, verbose=0, penalty='l2', loss='squared_hinge', tol=0.0001, dual=True,max_iter = 500, #max_iter=2000, ) cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True) cls_labels[np.where(cls_labels == 0)] = -1 ap_scores = cross_val_score( clf, features, cls_labels, cv=3, scoring='average_precision') clf.fit(features, cls_labels) np.save(ap_out_file, np.array([ap_scores.mean()])) with open(out_file, 'wb') as fwrite: pickle.dump(clf, fwrite) return 0
def task(cls, cost, opts, features, targets): out_file, ap_out_file = svm_helper.get_svm_train_output_files( cls, cost, opts.output_path) if os.path.exists(out_file) and os.path.exists(ap_out_file): logger.info('SVM model exists: {}'.format(out_file)) logger.info('AP file exists: {}'.format(ap_out_file)) else: #logger.info('Training model with the cost: {}'.format(cost)) clf = LinearSVC( C=cost, class_weight={ 1: 2, -1: 1 }, intercept_scaling=1.0, verbose=1, penalty='l2', loss='squared_hinge', tol=0.0001, dual=True, max_iter=2000, ) cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True) # meaning of labels in VOC/COCO original loaded target files: # label 0 = not present, set it to -1 as svm train target # label 1 = present. Make the svm train target labels as -1, 1. cls_labels[np.where(cls_labels == 0)] = -1 num_positives = len(np.where(cls_labels == 1)[0]) num_negatives = len(cls_labels) - num_positives #logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format( # cls, num_positives, num_negatives, # float(num_positives) / num_negatives) #) #logger.info('features: {} cls_labels: {}'.format( # features.shape, cls_labels.shape)) ap_scores = cross_val_score(clf, features, cls_labels, cv=3, scoring='average_precision') clf.fit(features, cls_labels) #logger.info('cls: {} cost: {} AP: {} mean:{}'.format( # cls, cost, ap_scores, ap_scores.mean())) #logger.info('Saving cls cost AP to: {}'.format(ap_out_file)) np.save(ap_out_file, np.array([ap_scores.mean()])) #logger.info('Saving SVM model to: {}'.format(out_file)) with open(out_file, 'wb') as fwrite: pickle.dump(clf, fwrite) return 0
def train_svm(opts): assert os.path.exists(opts.data_file), "Data file not found. Abort!" if not os.path.exists(opts.output_path): os.makedirs(opts.output_path) features, targets = svm_helper.load_input_data( opts.data_file, opts.targets_data_file ) # normalize the features: N x 9216 (example shape) features = svm_helper.normalize_features(features) # parse the cost values for training the SVM on costs_list = svm_helper.parse_cost_list(opts.costs_list) logger.info('Training SVM for costs: {}'.format(costs_list)) # classes for which SVM training should be done if opts.cls_list: cls_list = [int(cls) for cls in opts.cls_list.split(",")] else: num_classes = targets.shape[1] cls_list = range(num_classes) logger.info('Training SVM for classes: {}'.format(cls_list)) for cls_idx in range(len(cls_list)): cls = cls_list[cls_idx] for cost_idx in range(len(costs_list)): cost = costs_list[cost_idx] out_file, ap_out_file = svm_helper.get_svm_train_output_files( cls, cost, opts.output_path ) if os.path.exists(out_file) and os.path.exists(ap_out_file): logger.info('SVM model exists: {}'.format(out_file)) logger.info('AP file exists: {}'.format(ap_out_file)) else: logger.info('Training model with the cost: {}'.format(cost)) clf = LinearSVC( C=cost, class_weight={1: 2, -1: 1}, intercept_scaling=1.0, verbose=1, penalty='l2', loss='squared_hinge', tol=0.0001, dual=True, max_iter=2000, ) cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True) # meaning of labels in VOC/COCO original loaded target files: # label 0 = not present, set it to -1 as svm train target # label 1 = present. Make the svm train target labels as -1, 1. cls_labels[np.where(cls_labels == 0)] = -1 num_positives = len(np.where(cls_labels == 1)[0]) num_negatives = len(cls_labels) - num_positives logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format( cls, num_positives, num_negatives, float(num_positives) / num_negatives) ) logger.info('features: {} cls_labels: {}'.format( features.shape, cls_labels.shape)) ap_scores = cross_val_score( clf, features, cls_labels, cv=3, scoring='average_precision' ) clf.fit(features, cls_labels) logger.info('cls: {} cost: {} AP: {} mean:{}'.format( cls, cost, ap_scores, ap_scores.mean())) logger.info('Saving cls cost AP to: {}'.format(ap_out_file)) np.save(ap_out_file, np.array([ap_scores.mean()])) logger.info('Saving SVM model to: {}'.format(out_file)) with open(out_file, 'wb') as fwrite: pickle.dump(clf, fwrite)