Ejemplo n.º 1
0
    def __init__(self, matrix=None, **kwargs):
        """Initialize PrecomputedSGKernel

        Parameters
        ----------
        matrix : SGKernel or Kernel or ndarray
          Kernel matrix to be used
        """
        # Convert to appropriate kernel for input
        if isinstance(matrix, SGKernel):
            k = matrix._k  # Take internal shogun
        elif isinstance(matrix, Kernel):
            k = matrix.as_raw_np()  # Convert to NP otherwise
        else:
            # Otherwise SG would segfault ;-)
            k = np.array(matrix)

        SGKernel.__init__(self, **kwargs)

        if versions['shogun:rev'] >= 4455:
            self._k = sgk.CustomKernel(k)
        else:
            raise RuntimeError, \
                  "Cannot create PrecomputedSGKernel using current version" \
                  " of shogun -- please upgrade"
Ejemplo n.º 2
0
def _run_auc ():
	"""Run AUC kernel."""

	# handle subkernel
	params={
		'name': 'Gaussian',
		'data': dataop.get_rand(),
		'feature_class': 'simple',
		'feature_type': 'Real',
		'args': {'key': ('size', 'width'), 'val': (10, 1.7)}
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	subk=kernel.GaussianKernel(*params['args']['val'])
	subk.init(feats['train'], feats['test'])
	output=fileop.get_output(category.KERNEL, params, 'subkernel0_')

	# handle AUC
	params={
		'name': 'AUC',
		'data': dataop.get_rand(numpy.ushort, num_feats=2,
			max_train=dataop.NUM_VEC_TRAIN, max_test=dataop.NUM_VEC_TEST),
		'feature_class': 'simple',
		'feature_type': 'Word',
		'accuracy': 1e-8,
		'args': {'key': ('size', 'subkernel'), 'val': (10, subk)}
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])
	_compute(feats, params, output)
Ejemplo n.º 3
0
def _run_combined ():
	"""Run Combined kernel."""

	kern=kernel.CombinedKernel()
	feats={'train': CombinedFeatures(), 'test': CombinedFeatures()}
	output={}
	params={
		'name': 'Combined',
		'accuracy': 1e-7
	}
	subkdata=[
		{
			'name': 'FixedDegreeString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {'key': ('size', 'degree'), 'val': (10, 3)}
		},
		{
			'name': 'PolyMatchString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {
				'key': ('size', 'degree', 'inhomogene'),
				'val': (10, 3, True)
			}
		},
		{
			'name': 'LocalAlignmentString',
			'feature_class': 'string',
			'feature_type': 'Char',
			'args': {'key': ('size',), 'val': (10,)}
		}
	]

	i=0
	for sd in subkdata:
		kfun=eval('kernel.'+sd['name']+'Kernel')
		subk=kfun(*sd['args']['val'])
		sd['data']=dataop.get_dna()
		subkfeats=featop.get_features(
			sd['feature_class'], sd['feature_type'], sd['data'])
		output.update(
			fileop.get_output(category.KERNEL, sd, 'subkernel'+str(i)+'_'))

		kern.append_kernel(subk)
		feats['train'].append_feature_obj(subkfeats['train'])
		feats['test'].append_feature_obj(subkfeats['test'])

		i+=1

	output.update(fileop.get_output(category.KERNEL, params))
	kern.init(feats['train'], feats['train'])
	output['kernel_matrix_train']=kern.get_kernel_matrix()
	kern.init(feats['train'], feats['test'])
	output['kernel_matrix_test']=kern.get_kernel_matrix()

	fileop.write(category.KERNEL, output)
Ejemplo n.º 4
0
def _run_feats_byte ():
	"""Run kernel with ByteFeatures."""

	params={
		'name': 'Linear',
		'accuracy': 1e-8,
		'feature_class': 'simple',
		'feature_type': 'Byte',
		'data': dataop.get_rand(dattype=numpy.ubyte),
		'normalizer': kernel.AvgDiagKernelNormalizer()
	}
	feats=featop.get_features(params['feature_class'], params['feature_type'],
		params['data'], RAWBYTE)

	_compute(feats, params)
Ejemplo n.º 5
0
def _run_feats_word ():
	"""Run kernel with WordFeatures."""

	maxval=42
	params={
		'name': 'Linear',
		'accuracy': 1e-8,
		'feature_class': 'simple',
		'feature_type': 'Word',
		'data': dataop.get_rand(
			dattype=numpy.ushort, max_train=maxval, max_test=maxval),
		'normalizer': kernel.AvgDiagKernelNormalizer()
	}
	feats=featop.get_features(
		params['feature_class'], params['feature_type'], params['data'])

	_compute(feats, params)
Ejemplo n.º 6
0
def _run_custom():
    """Run Custom kernel."""

    params = {
        'name': 'Custom',
        'accuracy': 1e-7,
        'feature_class': 'simple',
        'feature_type': 'Real'
    }
    dim_square = 7
    data = dataop.get_rand(dim_square=dim_square)
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], data)
    data = data['train']
    symdata = data + data.T

    lowertriangle = numpy.array([
        symdata[(x, y)] for x in xrange(symdata.shape[1])
        for y in xrange(symdata.shape[0]) if y <= x
    ])
    kern = kernel.CustomKernel()
    #kern.init(feats['train'], feats['train']
    kern.set_triangle_kernel_matrix_from_triangle(lowertriangle)
    km_triangletriangle = kern.get_kernel_matrix()
    kern.set_triangle_kernel_matrix_from_full(symdata)
    km_fulltriangle = kern.get_kernel_matrix()
    kern.set_full_kernel_matrix_from_full(data)
    km_fullfull = kern.get_kernel_matrix()

    output = {
        'kernel_matrix_triangletriangle': km_triangletriangle,
        'kernel_matrix_fulltriangle': km_fulltriangle,
        'kernel_matrix_fullfull': km_fullfull,
        'kernel_symdata': numpy.matrix(symdata),
        'kernel_data': numpy.matrix(data),
        'kernel_dim_square': dim_square
    }
    output.update(fileop.get_output(category.KERNEL, params))

    fileop.write(category.KERNEL, output)
Ejemplo n.º 7
0
    # shogun
    try:
        import shogun
        from shogun import (Kernel as sgKernel, Features as sgFeatures,
                            Classifier as sgClassifier)
    except ImportError, exc:
        config.ExternalDepFailed('shogun', exc)
    else:
        if os.getenv('MDP_DISABLE_SHOGUN'):
            config.ExternalDepFailed('shogun', 'disabled')
        else:
            # From now on just support shogun >= 1.0
            # Between 0.10 to 1.0 there are too many API changes...
            try:
                version = sgKernel.Version_get_version_release()
            except AttributeError:
                config.ExternalDepFailed(
                    'shogun', 'too old, upgrade to at least version 1.0')
            else:
                if not version.startswith('v1.'):
                    config.ExternalDepFailed(
                        'shogun', 'too old, upgrade to at least version 1.0.')
                else:
                    config.ExternalDepFound('shogun', version)

    # libsvm
    try:
        import svm as libsvm
        libsvm.libsvm
    except ImportError, exc:
Ejemplo n.º 8
0
def _run_feats_string():
    """Run kernel with StringFeatures."""

    params = {
        'accuracy': 1e-9,
        'data': dataop.get_dna(),
        'feature_class': 'string',
        'feature_type': 'Char',
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])

    params['name'] = 'FixedDegreeString'
    params['args'] = {'key': ('size', 'degree'), 'val': (10, 3)}
    _compute(feats, params)

    params['accuracy'] = 0
    params['name'] = 'LocalAlignmentString'
    params['args'] = {'key': ('size', ), 'val': (10, )}
    _compute(feats, params)

    params['accuracy'] = 1e-10
    params['name'] = 'PolyMatchString'
    params['args'] = {
        'key': ('size', 'degree', 'inhomogene'),
        'val': (10, 3, True)
    }
    _compute(feats, params)
    params['args']['val'] = (10, 3, False)
    _compute(feats, params)

    params['accuracy'] = 1e-15
    params['name'] = 'SimpleLocalityImprovedString'
    params['args'] = {
        'key': ('size', 'length', 'inner_degree', 'outer_degree'),
        'val': (10, 5, 7, 5)
    }
    _compute(feats, params)
    # buggy:
    #params['name']='LocalityImprovedString'
    #_compute(feats, params)

    params['name'] = 'WeightedDegreeString'
    params['accuracy'] = 1e-9
    params['args'] = {'key': ('degree', ), 'val': (20, )}
    _compute(feats, params)
    params['args'] = {'key': ('degree', ), 'val': (1, )}
    _compute(feats, params)

    params['name'] = 'WeightedDegreePositionString'
    params['args'] = {'key': ('size', 'degree'), 'val': (10, 20)}
    _compute(feats, params)
    params['args'] = {'key': ('size', 'degree'), 'val': (10, 1)}
    _compute(feats, params)

    params['name'] = 'OligoString'
    params['args'] = {'key': ('size', 'k', 'width'), 'val': (10, 3, 1.2)}
    _compute(feats, params)
    params['args'] = {'key': ('size', 'k', 'width'), 'val': (10, 4, 1.7)}
    _compute(feats, params)

    params['name'] = 'LinearString'
    params['accuracy'] = 1e-8
    params['normalizer'] = kernel.AvgDiagKernelNormalizer()
    del params['args']
    _compute(feats, params)
Ejemplo n.º 9
0
def _run_feats_real():
    """Run kernel with RealFeatures."""

    params = {
        'data': dataop.get_rand(),
        'accuracy': 1e-8,
        'feature_class': 'simple',
        'feature_type': 'Real'
    }
    feats = featop.get_features(params['feature_class'],
                                params['feature_type'], params['data'])
    sparsefeats = featop.get_features(params['feature_class'],
                                      params['feature_type'],
                                      params['data'],
                                      sparse=True)

    params['name'] = 'Gaussian'
    params['args'] = {
        'key': (
            'size',
            'width',
        ),
        'val': (10, 1.3)
    }
    _compute(feats, params)

    params['name'] = 'GaussianShift'
    params['args'] = {
        'key': ('size', 'width', 'max_shift', 'shift_step'),
        'val': (10, 1.3, 2, 1)
    }
    _compute(feats, params)

    params['name'] = 'SparseGaussian'
    params['args'] = {'key': ('size', 'width'), 'val': (10, 1.7)}
    _compute(sparsefeats, params)

    params['accuracy'] = 0
    params['name'] = 'Const'
    params['args'] = {'key': ('c', ), 'val': (23., )}
    _compute(feats, params)

    params['name'] = 'Diag'
    params['args'] = {'key': ('size', 'diag'), 'val': (10, 23.)}
    _compute(feats, params)

    params['accuracy'] = 1e-9
    params['name'] = 'Sigmoid'
    params['args'] = {'key': ('size', 'gamma', 'coef0'), 'val': (10, 1.1, 1.3)}
    _compute(feats, params)
    params['args']['val'] = (10, 0.5, 0.7)
    _compute(feats, params)

    params['name'] = 'Chi2'
    params['args'] = {'key': ('size', 'width'), 'val': (10, 1.2)}
    _compute(feats, params)

    params['accuracy'] = 1e-8
    params['name'] = 'SparsePoly'
    params['args'] = {
        'key': ('size', 'degree', 'inhomogene'),
        'val': (10, 3, True)
    }
    _compute(sparsefeats, params)
    params['args']['val'] = (10, 3, False)
    _compute(sparsefeats, params)

    params['name'] = 'Poly'
    params['normalizer'] = kernel.SqrtDiagKernelNormalizer()
    params['args'] = {
        'key': ('size', 'degree', 'inhomogene'),
        'val': (10, 3, True)
    }
    _compute(feats, params)
    params['args']['val'] = (10, 3, False)
    _compute(feats, params)

    params['normalizer'] = kernel.AvgDiagKernelNormalizer()
    del params['args']
    params['name'] = 'Linear'
    _compute(feats, params)
    params['name'] = 'SparseLinear'
    _compute(sparsefeats, params)
Ejemplo n.º 10
0
def set_configuration():
    # set python version
    config.ExternalDepFound('python',
                            '.'.join([str(x) for x in sys.version_info]))
    version = mdp.__version__
    if mdp.__revision__:
        version += ', ' + mdp.__revision__
    config.ExternalDepFound('mdp', version)

    # parallel python dependency
    try:
        import pp
        # set pp secret if not there already
        # (workaround for debian patch to pp that disables pp's default password)
        pp_secret = os.getenv('MDP_PP_SECRET') or 'mdp-pp-support-password'
        # module 'user' has been deprecated since python 2.6 and deleted
        # completely as of python 3.0.
        # Basically pp can not work on python 3 at the moment.
        import user
        if not hasattr(user, 'pp_secret'):
            user.pp_secret = pp_secret
    except ImportError as exc:
        config.ExternalDepFailed('parallel_python', exc)
    else:
        if os.getenv('MDP_DISABLE_PARALLEL_PYTHON'):
            config.ExternalDepFailed('parallel_python', 'disabled')
        else:
            # even if we can import pp, starting the server may still fail
            # for example with:
            # OSError: [Errno 12] Cannot allocate memory
            try:
                server = pp.Server()
                server.destroy()
            except Exception as exc:
                # no idea what exception the pp server may raise
                # we need to catch all here...
                config.ExternalDepFailed('parallel_python', exc)
            else:
                if _pp_needs_monkeypatching():
                    if os.getenv('MDP_DISABLE_MONKEYPATCH_PP'):
                        config.ExternalDepFailed(
                            'parallel_python',
                            pp.version + ' broken on Debian')
                    else:
                        config.ExternalDepFound('parallel_python',
                                                pp.version + '-monkey-patched')
                        config.pp_monkeypatch_dirname = tempfile.gettempdir()
                else:
                    config.ExternalDepFound('parallel_python', pp.version)

    # shogun
    try:
        import shogun
        from shogun import (Kernel as sgKernel, Features as sgFeatures,
                            Classifier as sgClassifier)
    except ImportError as exc:
        config.ExternalDepFailed('shogun', exc)
    else:
        if os.getenv('MDP_DISABLE_SHOGUN'):
            config.ExternalDepFailed('shogun', 'disabled')
        else:
            # From now on just support shogun < 2.0
            # Between 0.10 to 1.0 or beyond there are too many API changes...
            try:
                version = sgKernel.Version_get_version_release()
            except AttributeError:
                config.ExternalDepFailed('shogun',
                                         'only shogun v1 is supported')
            else:
                if not version.startswith('v1.'):
                    config.ExternalDepFailed('shogun',
                                             'only shogun v1 is supported')
                else:
                    config.ExternalDepFound('shogun', version)

    # libsvm
    try:
        import svm as libsvm
        libsvm.libsvm
    except ImportError as exc:
        config.ExternalDepFailed('libsvm', exc)
    except AttributeError as exc:
        config.ExternalDepFailed('libsvm', 'libsvm version >= 2.91 required')
    else:
        if os.getenv('MDP_DISABLE_LIBSVM'):
            config.ExternalDepFailed('libsvm', 'disabled')
        else:
            config.ExternalDepFound('libsvm', libsvm.libsvm._name)

    # joblib
    try:
        import joblib
    except ImportError as exc:
        config.ExternalDepFailed('joblib', exc)
    else:
        version = joblib.__version__
        if os.getenv('MDP_DISABLE_JOBLIB'):
            config.ExternalDepFailed('joblib', 'disabled')
        elif _version_too_old(version, (0, 4, 3)):
            config.ExternalDepFailed('joblib',
                                     'version %s is too old' % version)
        else:
            config.ExternalDepFound('joblib', version)

    # sklearn
    try:
        try:
            import sklearn
        except ImportError:
            import scikits.learn as sklearn
        version = sklearn.__version__
    except ImportError as exc:
        config.ExternalDepFailed('sklearn', exc)
    except AttributeError as exc:
        config.ExternalDepFailed('sklearn', exc)
    else:
        if os.getenv('MDP_DISABLE_SKLEARN'):
            config.ExternalDepFailed('sklearn', 'disabled')
        elif _version_too_old(version, (0, 6)):
            config.ExternalDepFailed('sklearn',
                                     'version %s is too old' % version)
        else:
            config.ExternalDepFound('sklearn', version)
Ejemplo n.º 11
0
def _as_raw_sg(kernel):
    """Converts directly to a Shogun kernel"""
    return sgk.CustomKernel(kernel.as_raw_np())
def train_attribute(attribute_id, C, split=0):
    from shogun import Classifier, Features, Kernel, Distance
    attribute_id = int(attribute_id)
    print "# attribute ", attributenames[attribute_id]
    C = float(C)
    print "# C ", C

    if split == 0:
        train_classes = loadstr(
            '/nfs3group/chlgrp/datasets/Animals_with_Attributes/trainclasses.txt'
        )
        test_classes = loadstr(
            '/nfs3group/chlgrp/datasets/Animals_with_Attributes/testclasses.txt'
        )
    else:
        classnames = loadstr(
            '/nfs3group/chlgrp/datasets/Animals_with_Attributes/classnames.txt'
        )
        startid = (split - 1) * 10
        stopid = split * 10
        test_classes = classnames[startid:stopid]
        train_classes = classnames[0:startid] + classnames[stopid:]

    Xtrn, Ltrn = create_data(train_classes, attribute_id)
    Xtst, Ltst = create_data(test_classes, attribute_id)

    if min(Ltrn) == max(Ltrn):  # only 1 class
        Lprior = mean(Ltrn)
        prediction = sign(Lprior) * ones(len(Ltst))
        probabilities = 0.1 + 0.8 * 0.5 * (Lprior + 1.) * ones(
            len(Ltst))  # fallback
        return prediction, probabilities, Ltst

    #sg('loglevel', 'WARN')
    widths = {}
    for feature in all_features:
        traindata = array(Xtrn[feature][:, ::50], float)  # used to be 5*offset
        trainfeat = Features.RealFeatures(traindata)
        DM = Distance.ChiSquareDistance(trainfeat,
                                        trainfeat).get_distance_matrix()
        widths[feature] = median(DM.flatten())
        del traindata, trainfeat, DM

    s = Classifier.LibSVM()  #sg('new_svm', 'LIBSVM')
    #sg('use_mkl', False)     # we use fixed weights here

    #sg('clean_features', 'TRAIN')
    #sg('clean_features', 'TEST')

    Lplatt_trn = concatenate([Ltrn[i::10]
                              for i in range(9)])  # 90% for training
    Lplatt_val = Ltrn[9::10]  # remaining 10% for platt scaling

    feats_trn = Features.CombinedFeatures()
    feats_val = Features.CombinedFeatures()
    for feature in all_features:
        Xplatt_trn = concatenate([Xtrn[feature][:, i::10] for i in range(9)],
                                 axis=1)
        feats_trn.append_feature_obj(
            Features.RealFeatures(ascontiguousarray(Xplatt_trn)))
        #sg('add_features', 'TRAIN', Xplatt_trn)
        Xplatt_val = Xtrn[feature][:, 9::10]
        feats_val.append_feature_obj(
            Features.RealFeatures(ascontiguousarray(Xplatt_val)))
        #sg('add_features', 'TEST', Xplatt_val)
        del Xplatt_trn, Xplatt_val, Xtrn[feature]

    labels_trn = Features.Labels(Lplatt_trn)
    #sg('set_labels', 'TRAIN', Lplatt_trn)

    kernel = Kernel.CombinedKernel()
    #sg('set_kernel', 'COMBINED', 5000)
    for featureset in all_features:
        kernel.append_kernel(Kernel.Chi2Kernel(5000, widths[featureset] / 5.))
        #sg('add_kernel', 1., 'CHI2', 'REAL', 10, widths[featureset]/5. )

    kernel.init(feats_trn, feats_trn)
    K = kernel.get_kernel_matrix()
    K.tofile('/scratch/chl/cvfold%d_C%g_%02d-trn.kernel' %
             (split, C, attribute_id))
    del K

    s.set_max_train_time(600 * 60.)
    #sg('svm_max_train_time', 600*60.) # one hour should be plenty
    s.set_C(C, C)
    #sg('c', C)

    s.set_kernel(kernel)
    s.set_labels(labels_trn)
    #sg('init_kernel', 'TRAIN')
    try:
        s.train()
        #sg('train_classifier')
    except (RuntimeWarning, RuntimeError
            ):  # can't train, e.g. all samples have the same labels
        Lprior = mean(Ltrn)
        prediction = sign(Lprior) * ones(len(Ltst))
        probabilities = 0.1 + 0.8 * 0.5 * (Lprior + 1.) * ones(len(Ltst))
        savetxt('./DAP/cvfold%d_C%g_%02d.txt' % (split, C, attribute_id),
                prediction)
        savetxt('./DAP/cvfold%d_C%g_%02d.prob' % (split, C, attribute_id),
                probabilities)
        savetxt('./DAP/cvfold%d_C%g_%02d.labels' % (split, C, attribute_id),
                Ltst)
        return prediction, probabilities, Ltst

    bias = s.get_bias()
    alphas = s.get_alphas()
    #[bias, alphas]=sg('get_svm')
    #print bias,alphas

    kernel.init(feats_trn, feats_val)
    K = kernel.get_kernel_matrix()
    K.tofile('/scratch/chl/cvfold%d_C%g_%02d-val.kernel' %
             (split, C, attribute_id))
    del K

    #sg('init_kernel', 'TEST')
    try:
        prediction = s.classify().get_labels()
        #prediction=sg('classify')
        platt_params = SigmoidTrain(prediction, Lplatt_val)
        probabilities = SigmoidPredict(prediction, platt_params)

        savetxt('./DAP/cvfold%d_C%g_%02d-val.txt' % (split, C, attribute_id),
                prediction)
        savetxt('./DAP/cvfold%d_C%g_%02d-val.prob' % (split, C, attribute_id),
                probabilities)
        savetxt(
            './DAP/cvfold%d_C%g_%02d-val.labels' % (split, C, attribute_id),
            Lplatt_val)
        savetxt('./DAP/cvfold%d_C%g_%02d-val.platt' % (split, C, attribute_id),
                platt_params)
        #print '#train-perf ',attribute_id,C,mean((prediction*Lplatt_val)>0),mean(Lplatt_val>0)
        #print '#platt-perf ',attribute_id,C,mean((sign(probabilities-0.5)*Lplatt_val)>0),mean(Lplatt_val>0)
    except RuntimeError:
        Lprior = mean(Ltrn)
        prediction = sign(Lprior) * ones(len(Ltst))
        probabilities = 0.1 + 0.8 * 0.5 * (Lprior + 1.) * ones(len(Ltst))
        print >> sys.stderr, "#Error during testing. Using constant platt scaling"
        platt_params = [1., 0.]

    # ----------------------------- now apply to test classes ------------------

    feats_tst = Features.CombinedFeatures()
    #sg('clean_features', 'TEST')
    for feature in all_features:
        feats_tst.append_feature_obj(
            Features.RealFeatures(ascontiguousarray(Xtst[feature])))
        del Xtst[feature]

    kernel.init(feats_trn, feats_tst)
    K = kernel.get_kernel_matrix()
    K.tofile('/scratch/chl/cvfold%d_C%g_%02d-tst.kernel' %
             (split, C, attribute_id))
    del K

    #sg('init_kernel', 'TEST')
    prediction = s.classify().get_labels()
    #prediction=sg('classify')
    probabilities = SigmoidPredict(prediction, platt_params)

    savetxt('./DAP/cvfold%d_C%g_%02d.txt' % (split, C, attribute_id),
            prediction)
    savetxt('./DAP/cvfold%d_C%g_%02d.prob' % (split, C, attribute_id),
            probabilities)
    savetxt('./DAP/cvfold%d_C%g_%02d.labels' % (split, C, attribute_id), Ltst)

    #print '#test-perf ',attribute_id,C,mean((prediction*Ltst)>0),mean(Ltst>0)
    #print '#platt-perf ',attribute_id,C,mean((sign(probabilities-0.5)*Ltst)>0),mean(Ltst>0)
    return prediction, probabilities, Ltst