def apply_full_nonlinear_svm(model, data, report_margins_recounting=False): """ Apply parsed full SVM model (original libSVM model with embedded SVs) This is a custom implementation which bypasses libsvm routine (faster). @param model: model parsed by event_agent_generator.parse_full_SVM_model @param data: row-wise data vector/matrix in numpy format @type data: numpy.array @param report_margins_recounting: if True, report bin-wise contribution towards margin for every data_test @type report_margins_recounting: bool @return: dictionary with 'probs' and 'margins', which are each numpy array of floats @rtype: dictionary of float numpy arrays todo: add mer """ print "#training samples loaded by full SVM model: %d" % model["train_data"].shape[0] matrices = kernels.compute_kernel_matrix( data, model["train_data"], func_kernel=model["func_kernel"], recounting=report_margins_recounting ) outputs = apply_common_nonlinear_svm( model["svm_model"], kernel_matrix=matrices["kernel_matrix"], kernel_matrix_recounting=matrices["kernel_matrix_recounting"], target_class=model["target_class"], model_is_compact=False, ) return outputs
def apply_full_nonlinear_svm(model, data, report_margins_recounting=False): """ Apply parsed full SVM model (original libSVM model with embedded SVs) This is a custom implementation which bypasses libsvm routine (faster). @param model: model parsed by event_agent_generator.parse_full_SVM_model @param data: row-wise data vector/matrix in numpy format @type data: numpy.array @param report_margins_recounting: if True, report bin-wise contribution towards margin for every data_test @type report_margins_recounting: bool @return: dictionary with 'probs' and 'margins', which are each numpy array of floats @rtype: dictionary of float numpy arrays todo: add mer """ print '#training samples loaded by full SVM model: %d' % model[ 'train_data'].shape[0] matrices = kernels.compute_kernel_matrix( data, model['train_data'], func_kernel=model['func_kernel'], recounting=report_margins_recounting) outputs = apply_common_nonlinear_svm( model['svm_model'], kernel_matrix=matrices['kernel_matrix'], kernel_matrix_recounting=matrices['kernel_matrix_recounting'], target_class=model['target_class'], model_is_compact=False) return outputs
def apply_compact_nonlinear_svm(model, data, use_approx=False, report_margins_recounting=False): """ Apply parsed compact SVM model to new data. This is a custom implementation which bypasses libsvm routine (faster). @param model: model parsed from 'parse_compact_nonlinear_svm' @param data: row-wise data vector/matrix in numpy format @type data: numpy.array @param report_margins_recounting: if True, report bin-wise contribution towards margin for every data_test @type report_margins_recounting: bool @return: dictionary with 'probs','margins','margins_recounting' which are each numpy array of floats @rtype: dictionary of multiple numpy.array """ if use_approx: svm_model_approx = compute_approx_nonlinear_SVM(model, model['SVs']) outputs = apply_approx_nonlinear_SVM( svm_model_approx, data, report_margins_recounting=report_margins_recounting) else: # handle report_margins_recounting if not report_margins_recounting: # speedy version without MER matrices = kernels.compute_kernel_matrix( data, model['SVs'], func_kernel=model['func_kernel'], recounting=report_margins_recounting) outputs = apply_common_nonlinear_svm( model['svm_model'], kernel_matrix=matrices['kernel_matrix'], kernel_matrix_recounting=matrices['kernel_matrix_recounting'], target_class=model['target_class']) else: # memory light implementation to deal with MER outputs = apply_common_nonlinear_svm_memory_light( model['svm_model'], model['func_kernel'], model['SVs'], data, target_class=model['target_class'], report_margins_recounting=report_margins_recounting) return outputs
def apply_compact_nonlinear_svm(model, data, use_approx=False, report_margins_recounting=False): """ Apply parsed compact SVM model to new data. This is a custom implementation which bypasses libsvm routine (faster). @param model: model parsed from 'parse_compact_nonlinear_svm' @param data: row-wise data vector/matrix in numpy format @type data: numpy.array @param report_margins_recounting: if True, report bin-wise contribution towards margin for every data_test @type report_margins_recounting: bool @return: dictionary with 'probs','margins','margins_recounting' which are each numpy array of floats @rtype: dictionary of multiple numpy.array """ if use_approx: svm_model_approx = compute_approx_nonlinear_SVM(model, model["SVs"]) outputs = apply_approx_nonlinear_SVM( svm_model_approx, data, report_margins_recounting=report_margins_recounting ) else: # handle report_margins_recounting if not report_margins_recounting: # speedy version without MER matrices = kernels.compute_kernel_matrix( data, model["SVs"], func_kernel=model["func_kernel"], recounting=report_margins_recounting ) outputs = apply_common_nonlinear_svm( model["svm_model"], kernel_matrix=matrices["kernel_matrix"], kernel_matrix_recounting=matrices["kernel_matrix_recounting"], target_class=model["target_class"], ) else: # memory light implementation to deal with MER outputs = apply_common_nonlinear_svm_memory_light( model["svm_model"], model["func_kernel"], model["SVs"], data, target_class=model["target_class"], report_margins_recounting=report_margins_recounting, ) return outputs
def apply_common_nonlinear_svm_memory_light( model, func_kernel, SVs, data, target_class=1, report_margins_recounting=False ): """ Common routine to apply nonlinear compact libSVM on test data, Uses smaller memory foot print during recounting, than 'apply_common_nonlinear_svm' @param model: libsvm model @param func_kernel: kernel function @param SVs: row-wise support vector matrix @param data: test data in numpy format @param target_class: target class @type target_class: int @param report_margins_recounting: if True, report recounting per data as well @return: dictionary with 'probs','margins', and optional 'margins_mer' @rtype: dictionary with multiple numpy.array entries """ # get SV weights weights = get_SV_weights_nonlinear_svm(model, target_class=target_class) # compute kernel_matrix and kernel_matrix_recounting # in memory efficient way n1 = data.shape[0] dim = len(data[0]) n2 = SVs.shape[0] # kernel matrix is |data|-by-|SVs| kernel_matrix = np.zeros((n1, n2)) margins_recounting = None if report_margins_recounting: margins_recounting = np.zeros((n1, dim)) _tmp_in = np.zeros((1, dim)) for i in range(n1): _tmp_in[0] = data[i] # _tmp_out['kernel_matrix']: 1-by-|SVs| # _tmp_out['kernel_matrix_recounting']: 1 x|SVs| x dim _tmp_out = kernels.compute_kernel_matrix( _tmp_in, SVs, func_kernel=func_kernel, recounting=report_margins_recounting ) kernel_matrix[i] = _tmp_out["kernel_matrix"][0] if report_margins_recounting: margins_recounting[i] = np.dot(_tmp_out["kernel_matrix_recounting"][0].T, weights) # this part needs to be updated further for more generalization, to select SV row/columns margins = np.dot(kernel_matrix, weights) # compute probs, using platt scaling rho = model.rho[0] probA = model.probA[0] probB = model.probB[0] probs = 1.0 / (1.0 + np.exp((margins - rho) * probA + probB)) idx_target = get_column_idx_for_class(model, target_class) if idx_target == 1: margins = -margins probs = 1.0 - probs if margins_recounting is not None: margins_recounting = -margins_recounting outputs = dict() outputs["margins"] = margins outputs["probs"] = probs outputs["margins_recounting"] = margins_recounting return outputs
normalization = True gamma = params[dataset_id]['gamma'] print "Computing Kernels..." if not os.path.exists('./cache/'): os.makedirs('./cache/') K_train_path = './cache/K_train_dataset_%s.npy' % dataset_id K_test_path = './cache/K_test_dataset_%s.npy' % dataset_id if os.path.exists(K_train_path) and os.path.exists(K_test_path): K_train = np.load(K_train_path) K_test = np.load(K_test_path) else: K_train = compute_kernel_matrix(dataset.dataset['train']['sequences'], spectrum_size=spectrum_size, feature_extractor=feature_extractor, kernel=kernel, normalization=normalization, gamma=gamma) K_test = compute_kernel_matrix(dataset.dataset['test']['sequences'], dataset.dataset['train']['sequences'], spectrum_size=spectrum_size, feature_extractor=feature_extractor, kernel=kernel, normalization=normalization, gamma=gamma) np.save(K_train_path, K_train) np.save(K_test_path, K_test) C = params[dataset_id]['C'] my_svm = MySVM(C=C, dual=True, verbose=True)
def apply_common_nonlinear_svm_memory_light(model, func_kernel, SVs, data, target_class=1, report_margins_recounting=False): """ Common routine to apply nonlinear compact libSVM on test data, Uses smaller memory foot print during recounting, than 'apply_common_nonlinear_svm' @param model: libsvm model @param func_kernel: kernel function @param SVs: row-wise support vector matrix @param data: test data in numpy format @param target_class: target class @type target_class: int @param report_margins_recounting: if True, report recounting per data as well @return: dictionary with 'probs','margins', and optional 'margins_mer' @rtype: dictionary with multiple numpy.array entries """ # get SV weights weights = get_SV_weights_nonlinear_svm(model, target_class=target_class) # compute kernel_matrix and kernel_matrix_recounting # in memory efficient way n1 = data.shape[0] dim = len(data[0]) n2 = SVs.shape[0] # kernel matrix is |data|-by-|SVs| kernel_matrix = np.zeros((n1, n2)) margins_recounting = None if report_margins_recounting: margins_recounting = np.zeros((n1, dim)) _tmp_in = np.zeros((1, dim)) for i in range(n1): _tmp_in[0] = data[i] # _tmp_out['kernel_matrix']: 1-by-|SVs| # _tmp_out['kernel_matrix_recounting']: 1 x|SVs| x dim _tmp_out = kernels.compute_kernel_matrix( _tmp_in, SVs, func_kernel=func_kernel, recounting=report_margins_recounting) kernel_matrix[i] = _tmp_out['kernel_matrix'][0] if report_margins_recounting: margins_recounting[i] = np.dot( _tmp_out['kernel_matrix_recounting'][0].T, weights) # this part needs to be updated further for more generalization, to select SV row/columns margins = np.dot(kernel_matrix, weights) # compute probs, using platt scaling rho = model.rho[0] probA = model.probA[0] probB = model.probB[0] probs = 1.0 / (1.0 + np.exp((margins - rho) * probA + probB)) idx_target = get_column_idx_for_class(model, target_class) if idx_target == 1: margins = -margins probs = 1.0 - probs if margins_recounting is not None: margins_recounting = -margins_recounting outputs = dict() outputs['margins'] = margins outputs['probs'] = probs outputs['margins_recounting'] = margins_recounting return outputs