Beispiel #1
0
def apply_full_nonlinear_svm(model, data, report_margins_recounting=False):
    """ Apply parsed full SVM model (original libSVM model with embedded SVs)
    This is a custom implementation which bypasses libsvm routine (faster).
    @param model: model parsed by event_agent_generator.parse_full_SVM_model
    @param data: row-wise data vector/matrix in numpy format
    @type data: numpy.array
    @param report_margins_recounting: if True, report bin-wise contribution towards margin for every data_test
    @type report_margins_recounting: bool
    @return: dictionary with 'probs' and 'margins', which are each numpy array of floats
    @rtype: dictionary of float numpy arrays

    todo: add mer
    """

    print "#training samples loaded by full SVM model: %d" % model["train_data"].shape[0]
    matrices = kernels.compute_kernel_matrix(
        data, model["train_data"], func_kernel=model["func_kernel"], recounting=report_margins_recounting
    )

    outputs = apply_common_nonlinear_svm(
        model["svm_model"],
        kernel_matrix=matrices["kernel_matrix"],
        kernel_matrix_recounting=matrices["kernel_matrix_recounting"],
        target_class=model["target_class"],
        model_is_compact=False,
    )

    return outputs
Beispiel #2
0
def apply_full_nonlinear_svm(model, data, report_margins_recounting=False):
    """ Apply parsed full SVM model (original libSVM model with embedded SVs)
    This is a custom implementation which bypasses libsvm routine (faster).
    @param model: model parsed by event_agent_generator.parse_full_SVM_model
    @param data: row-wise data vector/matrix in numpy format
    @type data: numpy.array
    @param report_margins_recounting: if True, report bin-wise contribution towards margin for every data_test
    @type report_margins_recounting: bool
    @return: dictionary with 'probs' and 'margins', which are each numpy array of floats
    @rtype: dictionary of float numpy arrays

    todo: add mer
    """

    print '#training samples loaded by full SVM model: %d' % model[
        'train_data'].shape[0]
    matrices = kernels.compute_kernel_matrix(
        data,
        model['train_data'],
        func_kernel=model['func_kernel'],
        recounting=report_margins_recounting)

    outputs = apply_common_nonlinear_svm(
        model['svm_model'],
        kernel_matrix=matrices['kernel_matrix'],
        kernel_matrix_recounting=matrices['kernel_matrix_recounting'],
        target_class=model['target_class'],
        model_is_compact=False)

    return outputs
Beispiel #3
0
def apply_compact_nonlinear_svm(model,
                                data,
                                use_approx=False,
                                report_margins_recounting=False):
    """ Apply parsed compact SVM model to new data.
    This is a custom implementation which bypasses libsvm routine (faster).

    @param model: model parsed from  'parse_compact_nonlinear_svm'
    @param data: row-wise data vector/matrix in numpy format
    @type data: numpy.array
    @param report_margins_recounting: if True, report bin-wise contribution towards margin for every data_test
    @type report_margins_recounting: bool
    @return: dictionary with 'probs','margins','margins_recounting' which are each numpy array of floats
    @rtype: dictionary of multiple numpy.array
    """

    if use_approx:
        svm_model_approx = compute_approx_nonlinear_SVM(model, model['SVs'])
        outputs = apply_approx_nonlinear_SVM(
            svm_model_approx,
            data,
            report_margins_recounting=report_margins_recounting)
    else:
        # handle report_margins_recounting

        if not report_margins_recounting:
            # speedy version without MER
            matrices = kernels.compute_kernel_matrix(
                data,
                model['SVs'],
                func_kernel=model['func_kernel'],
                recounting=report_margins_recounting)
            outputs = apply_common_nonlinear_svm(
                model['svm_model'],
                kernel_matrix=matrices['kernel_matrix'],
                kernel_matrix_recounting=matrices['kernel_matrix_recounting'],
                target_class=model['target_class'])
        else:
            # memory light implementation to deal with MER
            outputs = apply_common_nonlinear_svm_memory_light(
                model['svm_model'],
                model['func_kernel'],
                model['SVs'],
                data,
                target_class=model['target_class'],
                report_margins_recounting=report_margins_recounting)

    return outputs
Beispiel #4
0
def apply_compact_nonlinear_svm(model, data, use_approx=False, report_margins_recounting=False):
    """ Apply parsed compact SVM model to new data.
    This is a custom implementation which bypasses libsvm routine (faster).

    @param model: model parsed from  'parse_compact_nonlinear_svm'
    @param data: row-wise data vector/matrix in numpy format
    @type data: numpy.array
    @param report_margins_recounting: if True, report bin-wise contribution towards margin for every data_test
    @type report_margins_recounting: bool
    @return: dictionary with 'probs','margins','margins_recounting' which are each numpy array of floats
    @rtype: dictionary of multiple numpy.array
    """

    if use_approx:
        svm_model_approx = compute_approx_nonlinear_SVM(model, model["SVs"])
        outputs = apply_approx_nonlinear_SVM(
            svm_model_approx, data, report_margins_recounting=report_margins_recounting
        )
    else:
        # handle report_margins_recounting

        if not report_margins_recounting:
            # speedy version without MER
            matrices = kernels.compute_kernel_matrix(
                data, model["SVs"], func_kernel=model["func_kernel"], recounting=report_margins_recounting
            )
            outputs = apply_common_nonlinear_svm(
                model["svm_model"],
                kernel_matrix=matrices["kernel_matrix"],
                kernel_matrix_recounting=matrices["kernel_matrix_recounting"],
                target_class=model["target_class"],
            )
        else:
            # memory light implementation to deal with MER
            outputs = apply_common_nonlinear_svm_memory_light(
                model["svm_model"],
                model["func_kernel"],
                model["SVs"],
                data,
                target_class=model["target_class"],
                report_margins_recounting=report_margins_recounting,
            )

    return outputs
Beispiel #5
0
def apply_common_nonlinear_svm_memory_light(
    model, func_kernel, SVs, data, target_class=1, report_margins_recounting=False
):
    """ Common routine to apply nonlinear compact libSVM on test data,
    Uses smaller memory foot print during recounting, than 'apply_common_nonlinear_svm'
    @param model: libsvm model
    @param func_kernel: kernel function
    @param SVs: row-wise support vector matrix
    @param data: test data in numpy format
    @param target_class: target class
    @type target_class: int
    @param report_margins_recounting: if True, report recounting per data as well
    @return: dictionary with 'probs','margins', and optional 'margins_mer'
    @rtype: dictionary with multiple numpy.array entries
    """

    # get SV weights
    weights = get_SV_weights_nonlinear_svm(model, target_class=target_class)

    # compute kernel_matrix and kernel_matrix_recounting
    # in memory efficient way

    n1 = data.shape[0]
    dim = len(data[0])
    n2 = SVs.shape[0]

    # kernel matrix is |data|-by-|SVs|
    kernel_matrix = np.zeros((n1, n2))

    margins_recounting = None
    if report_margins_recounting:
        margins_recounting = np.zeros((n1, dim))

    _tmp_in = np.zeros((1, dim))
    for i in range(n1):
        _tmp_in[0] = data[i]
        # _tmp_out['kernel_matrix']: 1-by-|SVs|
        # _tmp_out['kernel_matrix_recounting']: 1 x|SVs| x dim
        _tmp_out = kernels.compute_kernel_matrix(
            _tmp_in, SVs, func_kernel=func_kernel, recounting=report_margins_recounting
        )
        kernel_matrix[i] = _tmp_out["kernel_matrix"][0]
        if report_margins_recounting:
            margins_recounting[i] = np.dot(_tmp_out["kernel_matrix_recounting"][0].T, weights)

    # this part needs to be updated further for more generalization, to select SV row/columns
    margins = np.dot(kernel_matrix, weights)

    # compute probs, using platt scaling
    rho = model.rho[0]
    probA = model.probA[0]
    probB = model.probB[0]
    probs = 1.0 / (1.0 + np.exp((margins - rho) * probA + probB))

    idx_target = get_column_idx_for_class(model, target_class)
    if idx_target == 1:
        margins = -margins
        probs = 1.0 - probs
        if margins_recounting is not None:
            margins_recounting = -margins_recounting

    outputs = dict()
    outputs["margins"] = margins
    outputs["probs"] = probs
    outputs["margins_recounting"] = margins_recounting

    return outputs
    normalization = True
    gamma = params[dataset_id]['gamma']

    print "Computing Kernels..."
    if not os.path.exists('./cache/'):
        os.makedirs('./cache/')
    K_train_path = './cache/K_train_dataset_%s.npy' % dataset_id
    K_test_path = './cache/K_test_dataset_%s.npy' % dataset_id

    if os.path.exists(K_train_path) and os.path.exists(K_test_path):
        K_train = np.load(K_train_path)
        K_test = np.load(K_test_path)
    else:
        K_train = compute_kernel_matrix(dataset.dataset['train']['sequences'],
                                        spectrum_size=spectrum_size,
                                        feature_extractor=feature_extractor,
                                        kernel=kernel,
                                        normalization=normalization,
                                        gamma=gamma)
        K_test = compute_kernel_matrix(dataset.dataset['test']['sequences'],
                                       dataset.dataset['train']['sequences'],
                                       spectrum_size=spectrum_size,
                                       feature_extractor=feature_extractor,
                                       kernel=kernel,
                                       normalization=normalization,
                                       gamma=gamma)
        np.save(K_train_path, K_train)
        np.save(K_test_path, K_test)

    C = params[dataset_id]['C']
    my_svm = MySVM(C=C, dual=True, verbose=True)
Beispiel #7
0
def apply_common_nonlinear_svm_memory_light(model,
                                            func_kernel,
                                            SVs,
                                            data,
                                            target_class=1,
                                            report_margins_recounting=False):
    """ Common routine to apply nonlinear compact libSVM on test data,
    Uses smaller memory foot print during recounting, than 'apply_common_nonlinear_svm'
    @param model: libsvm model
    @param func_kernel: kernel function
    @param SVs: row-wise support vector matrix
    @param data: test data in numpy format
    @param target_class: target class
    @type target_class: int
    @param report_margins_recounting: if True, report recounting per data as well
    @return: dictionary with 'probs','margins', and optional 'margins_mer'
    @rtype: dictionary with multiple numpy.array entries
    """

    # get SV weights
    weights = get_SV_weights_nonlinear_svm(model, target_class=target_class)

    # compute kernel_matrix and kernel_matrix_recounting
    # in memory efficient way

    n1 = data.shape[0]
    dim = len(data[0])
    n2 = SVs.shape[0]

    # kernel matrix is |data|-by-|SVs|
    kernel_matrix = np.zeros((n1, n2))

    margins_recounting = None
    if report_margins_recounting:
        margins_recounting = np.zeros((n1, dim))

    _tmp_in = np.zeros((1, dim))
    for i in range(n1):
        _tmp_in[0] = data[i]
        # _tmp_out['kernel_matrix']: 1-by-|SVs|
        # _tmp_out['kernel_matrix_recounting']: 1 x|SVs| x dim
        _tmp_out = kernels.compute_kernel_matrix(
            _tmp_in,
            SVs,
            func_kernel=func_kernel,
            recounting=report_margins_recounting)
        kernel_matrix[i] = _tmp_out['kernel_matrix'][0]
        if report_margins_recounting:
            margins_recounting[i] = np.dot(
                _tmp_out['kernel_matrix_recounting'][0].T, weights)

    # this part needs to be updated further for more generalization, to select SV row/columns
    margins = np.dot(kernel_matrix, weights)

    # compute probs, using platt scaling
    rho = model.rho[0]
    probA = model.probA[0]
    probB = model.probB[0]
    probs = 1.0 / (1.0 + np.exp((margins - rho) * probA + probB))

    idx_target = get_column_idx_for_class(model, target_class)
    if idx_target == 1:
        margins = -margins
        probs = 1.0 - probs
        if margins_recounting is not None:
            margins_recounting = -margins_recounting

    outputs = dict()
    outputs['margins'] = margins
    outputs['probs'] = probs
    outputs['margins_recounting'] = margins_recounting

    return outputs