def train_svm_low_shot(opts): assert os.path.exists(opts.data_file), "Data file not found. Abort!" if not os.path.exists(opts.output_path): os.makedirs(opts.output_path) features, targets = svm_helper.load_input_data(opts.data_file, opts.targets_data_file) # normalize the features: N x 9216 (example shape) features = svm_helper.normalize_features(features) # parse the cost values for training the SVM on costs_list = svm_helper.parse_cost_list(opts.costs_list) logger.info('Training SVM for costs: {}'.format(costs_list)) # classes for which SVM testing should be done num_classes, cls_list = svm_helper.get_low_shot_svm_classes( targets, opts.dataset) for cls in cls_list: for cost_idx in range(len(costs_list)): cost = costs_list[cost_idx] suffix = '_'.join( opts.targets_data_file.split('/')[-1].split('.')[0].split('_') [-2:]) out_file = svm_helper.get_low_shot_output_file( opts, cls, cost, suffix) if os.path.exists(out_file): logger.info('SVM model exists: {}'.format(out_file)) else: logger.info('SVM model not found: {}'.format(out_file)) logger.info('Training model with the cost: {}'.format(cost)) clf = LinearSVC( C=cost, class_weight={ 1: 2, -1: 1 }, intercept_scaling=1.0, verbose=1, penalty='l2', loss='squared_hinge', tol=0.0001, dual=True, max_iter=2000, ) train_feats, train_cls_labels = svm_helper.get_cls_feats_labels( cls, features, targets, opts.dataset) num_positives = len(np.where(train_cls_labels == 1)[0]) num_negatives = len(np.where(train_cls_labels == -1)[0]) logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format( cls, num_positives, num_negatives, float(num_positives) / num_negatives)) logger.info('features: {} cls_labels: {}'.format( train_feats.shape, train_cls_labels.shape)) clf.fit(train_feats, train_cls_labels) logger.info('Saving SVM model to: {}'.format(out_file)) with open(out_file, 'wb') as fwrite: pickle.dump(clf, fwrite) logger.info('All done!')
def test_svm(opts): assert os.path.exists(opts.data_file), "Data file not found. Abort!" json_predictions, img_ids, cls_names = {}, [], [] if opts.generate_json: img_ids, cls_names = load_json(opts.json_targets) features, targets = svm_helper.load_input_data(opts.data_file, opts.targets_data_file) # normalize the features: N x 9216 (example shape) features = svm_helper.normalize_features(features) num_classes = targets.shape[1] logger.info('Num classes: {}'.format(num_classes)) # get the chosen cost that maximizes the cross-validation AP per class costs_list = get_chosen_costs(opts, num_classes) ap_matrix = np.zeros((num_classes, 1)) for cls in range(num_classes): cost = costs_list[cls] logger.info('Testing model for cls: {} cost: {}'.format(cls, cost)) model_file = os.path.join( opts.output_path, 'cls' + str(cls) + '_cost' + str(cost) + '.pickle') with open(model_file, 'rb') as fopen: if six.PY2: model = pickle.load(fopen) else: model = pickle.load(fopen, encoding='latin1') prediction = model.decision_function(features) if opts.generate_json: cls_name = cls_names[cls] for idx in range(len(prediction)): img_id = img_ids[idx] if img_id in json_predictions: json_predictions[img_id][cls_name] = prediction[idx] else: out_lbl = {} out_lbl[cls_name] = prediction[idx] json_predictions[img_id] = out_lbl cls_labels = targets[:, cls] # meaning of labels in VOC/COCO original loaded target files: # label 0 = not present, set it to -1 as svm train target # label 1 = present. Make the svm train target labels as -1, 1. evaluate_data_inds = (targets[:, cls] != -1) eval_preds = prediction[evaluate_data_inds] eval_cls_labels = cls_labels[evaluate_data_inds] eval_cls_labels[np.where(eval_cls_labels == 0)] = -1 P, R, score, ap = svm_helper.get_precision_recall( eval_cls_labels, eval_preds) ap_matrix[cls][0] = ap if opts.generate_json: output_file = os.path.join(opts.output_path, 'json_preds.json') with open(output_file, 'w') as fp: json.dump(json_predictions, fp) logger.info('Saved json predictions to: {}'.format(output_file)) logger.info('Mean AP: {}'.format(np.mean(ap_matrix, axis=0))) np.save(os.path.join(opts.output_path, 'test_ap.npy'), np.array(ap_matrix)) logger.info('saved test AP to file: {}'.format( os.path.join(opts.output_path, 'test_ap.npy')))
def train_svm_low_shot(opts): assert os.path.exists(opts.data_file), "Data file not found. Abort!" if not os.path.exists(opts.output_path): os.makedirs(opts.output_path) features, targets = svm_helper.load_input_data(opts.data_file, opts.targets_data_file) # normalize the features: N x 9216 (example shape) features = svm_helper.normalize_features(features) # parse the cost values for training the SVM on costs_list = svm_helper.parse_cost_list(opts.costs_list) # classes for which SVM testing should be done num_classes, cls_list = svm_helper.get_low_shot_svm_classes( targets, opts.dataset) num_task = len(cls_list) * len(costs_list) args_cls = [] args_cost = [] for cls in cls_list: for cost in costs_list: args_cls.append(cls) args_cost.append(cost) args_opts = [opts] * num_task args_features = [features] * num_task args_targets = [targets] * num_task pool = mp.Pool(mp.cpu_count()) for _ in tqdm.tqdm(pool.imap_unordered( mp_helper, zip(args_cls, args_cost, args_opts, args_features, args_targets)), total=num_task): pass
def train_svm(opts): assert os.path.exists(opts.data_file), "Data file not found. Abort!" if not os.path.exists(opts.output_path): os.makedirs(opts.output_path) features, targets = svm_helper.load_input_data(opts.data_file, opts.targets_data_file) # normalize the features: N x 9216 (example shape) features = svm_helper.normalize_features(features) # parse the cost values for training the SVM on costs_list = svm_helper.parse_cost_list(opts.costs_list) #logger.info('Training SVM for costs: {}'.format(costs_list)) # classes for which SVM training should be done if opts.cls_list: cls_list = [int(cls) for cls in opts.cls_list.split(",")] else: num_classes = targets.shape[1] cls_list = range(num_classes) #logger.info('Training SVM for classes: {}'.format(cls_list)) num_task = len(cls_list) * len(costs_list) args_cls = [] args_cost = [] for cls_idx in range(len(cls_list)): cls = cls_list[cls_idx] for cost_idx in range(len(costs_list)): cost = costs_list[cost_idx] args_cls.append(cls) args_cost.append(cost) args_opts = [opts] * num_task args_features = [features] * num_task args_targets = [targets] * num_task pool = mp.Pool(mp.cpu_count()) for _ in tqdm.tqdm(pool.imap_unordered( mp_helper, zip(args_cls, args_cost, args_opts, args_features, args_targets)), total=num_task): pass
def train_svm(opts): assert os.path.exists(opts.data_file), "Data file not found. Abort!" if not os.path.exists(opts.output_path): os.makedirs(opts.output_path) features, targets = svm_helper.load_input_data( opts.data_file, opts.targets_data_file ) # normalize the features: N x 9216 (example shape) features = svm_helper.normalize_features(features) # parse the cost values for training the SVM on costs_list = svm_helper.parse_cost_list(opts.costs_list) logger.info('Training SVM for costs: {}'.format(costs_list)) # classes for which SVM training should be done if opts.cls_list: cls_list = [int(cls) for cls in opts.cls_list.split(",")] else: num_classes = targets.shape[1] cls_list = range(num_classes) logger.info('Training SVM for classes: {}'.format(cls_list)) for cls_idx in range(len(cls_list)): cls = cls_list[cls_idx] for cost_idx in range(len(costs_list)): cost = costs_list[cost_idx] out_file, ap_out_file = svm_helper.get_svm_train_output_files( cls, cost, opts.output_path ) if os.path.exists(out_file) and os.path.exists(ap_out_file): logger.info('SVM model exists: {}'.format(out_file)) logger.info('AP file exists: {}'.format(ap_out_file)) else: logger.info('Training model with the cost: {}'.format(cost)) clf = LinearSVC( C=cost, class_weight={1: 2, -1: 1}, intercept_scaling=1.0, verbose=1, penalty='l2', loss='squared_hinge', tol=0.0001, dual=True, max_iter=2000, ) cls_labels = targets[:, cls].astype(dtype=np.int32, copy=True) # meaning of labels in VOC/COCO original loaded target files: # label 0 = not present, set it to -1 as svm train target # label 1 = present. Make the svm train target labels as -1, 1. cls_labels[np.where(cls_labels == 0)] = -1 num_positives = len(np.where(cls_labels == 1)[0]) num_negatives = len(cls_labels) - num_positives logger.info('cls: {} has +ve: {} -ve: {} ratio: {}'.format( cls, num_positives, num_negatives, float(num_positives) / num_negatives) ) logger.info('features: {} cls_labels: {}'.format( features.shape, cls_labels.shape)) ap_scores = cross_val_score( clf, features, cls_labels, cv=3, scoring='average_precision' ) clf.fit(features, cls_labels) logger.info('cls: {} cost: {} AP: {} mean:{}'.format( cls, cost, ap_scores, ap_scores.mean())) logger.info('Saving cls cost AP to: {}'.format(ap_out_file)) np.save(ap_out_file, np.array([ap_scores.mean()])) logger.info('Saving SVM model to: {}'.format(out_file)) with open(out_file, 'wb') as fwrite: pickle.dump(clf, fwrite)
def test_svm_low_shot(opts): k_values = [int(val) for val in opts.k_values.split(",")] sample_inds = [int(val) for val in opts.sample_inds.split(",")] logger.info('Testing svm for k-values: {} and sample_inds: {}'.format( k_values, sample_inds)) img_ids, cls_names = [], [] if opts.generate_json: img_ids, cls_names = load_json(opts.json_targets) assert os.path.exists(opts.data_file), "Data file not found. Abort!" # we test the svms on the full test set. Given the test features and the # targets, we test it for various k-values (low-shot), cost values and # 5 independent samples. features, targets = svm_helper.load_input_data(opts.data_file, opts.targets_data_file) # normalize the features: N x 9216 (example shape) features = svm_helper.normalize_features(features) # parse the cost values for training the SVM on costs_list = svm_helper.parse_cost_list(opts.costs_list) logger.info('Testing SVM for costs: {}'.format(costs_list)) # classes for which SVM testing should be done num_classes, cls_list = svm_helper.get_low_shot_svm_classes( targets, opts.dataset) # create the output for per sample, per k-value and per cost. sample_ap_matrices = [] for _ in range(len(sample_inds)): ap_matrix = np.zeros((len(k_values), len(costs_list))) sample_ap_matrices.append(ap_matrix) # the test goes like this: For a given sample, for a given k-value and a # given cost value, we evaluate the trained svm model for all classes. # After computing over all classes, we get the mean AP value over all # classes. We hence end up with: output = [sample][k_value][cost] for inds in range(len(sample_inds)): sample_idx = sample_inds[inds] for k_idx in range(len(k_values)): k_low = k_values[k_idx] suffix = 'sample{}_k{}'.format(sample_idx + 1, k_low) for cost_idx in range(len(costs_list)): cost = costs_list[cost_idx] local_cost_ap = np.zeros((num_classes, 1)) for cls in cls_list: logger.info( 'Test sample/k_value/cost/cls: {}/{}/{}/{}'.format( sample_idx + 1, k_low, cost, cls)) model_file = svm_helper.get_low_shot_output_file( opts, cls, cost, suffix) with open(model_file, 'rb') as fopen: if six.PY2: model = pickle.load(fopen) else: model = pickle.load(fopen, encoding='latin1') prediction = model.decision_function(features) eval_preds, eval_cls_labels = svm_helper.get_cls_feats_labels( cls, prediction, targets, opts.dataset) P, R, score, ap = svm_helper.get_precision_recall( eval_cls_labels, eval_preds) local_cost_ap[cls][0] = ap mean_cost_ap = np.mean(local_cost_ap, axis=0) sample_ap_matrices[inds][k_idx][cost_idx] = mean_cost_ap out_k_sample_file = os.path.join( opts.output_path, 'test_ap_sample{}_k{}.npy'.format(sample_idx + 1, k_low)) save_data = sample_ap_matrices[inds][k_idx] save_data = save_data.reshape((1, -1)) np.save(out_k_sample_file, save_data) logger.info('Saved sample test k_idx AP to file: {} {}'.format( out_k_sample_file, save_data.shape)) if opts.generate_json: argmax_cls = np.argmax(save_data, axis=1) chosen_cost = costs_list[argmax_cls[0]] logger.info('chosen cost: {}'.format(chosen_cost)) save_json_predictions(opts, chosen_cost, sample_idx, k_low, features, cls_list, cls_names, img_ids) logger.info('All done!!')