def features_read_svmlight_format_modular(fname):
    import os
    from modshogun import SparseRealFeatures
    from modshogun import LibSVMFile

    f = SparseRealFeatures()
    lab = f.load_with_labels(LibSVMFile(fname))
    f.save_with_labels(LibSVMFile('testwrite.light', 'w'), lab)
Ejemplo n.º 2
0
def main(dataset, output, epsilon, capacity, width, kernel_type):

	LOGGER.info("SVM Multiclass classifier")
	LOGGER.info("Epsilon: %s" % epsilon)
	LOGGER.info("Capacity: %s" % capacity)
	LOGGER.info("Gaussian width: %s" % width)

	# Get features
	feats, labels = get_features_and_labels(LibSVMFile(dataset))

	# Create kernel
	try:
		kernel = KERNELS[kernel_type](feats, width)
	except KeyError:
		LOGGER.error("Kernel %s not available. try Gaussian or Linear" % kernel_type)

	# Initialize and train Multiclass SVM
	svm = MulticlassLibSVM(capacity, kernel, labels)
	svm.set_epsilon(epsilon)
	with track_execution():
		svm.train()

	# Serialize to file
	writable_file = SerializableHdf5File(output, 'w')
	with closing(writable_file):
		svm.save_serializable(writable_file)
	LOGGER.info("Serialized classifier saved in: '%s'" % output)
def load_sparse_data(filename, dimension=None):
    input_file = LibSVMFile(args.dataset)
    sparse_feats = SparseRealFeatures()
    label_array = sparse_feats.load_with_labels(input_file)
    labels = BinaryLabels(label_array)

    if dimension != None:
        sparse_feats.set_num_features(dimension)

    return {'data': sparse_feats, 'labels': labels}
Ejemplo n.º 4
0
def main(classifier, testset, output):
    LOGGER.info("SVM Multiclass evaluation")

    svm = MulticlassLibSVM()
    serialized_classifier = SerializableHdf5File(classifier, 'r')
    with closing(serialized_classifier):
        svm.load_serializable(serialized_classifier)

    test_feats, test_labels = get_features_and_labels(LibSVMFile(testset))
    predicted_labels = svm.apply(test_feats)

    with open(output, 'w') as f:
        for cls in predicted_labels.get_labels():
            f.write("%s\n" % int(cls))

    LOGGER.info("Predicted labels saved in: '%s'" % output)
Ejemplo n.º 5
0
def main(actual, predicted):
	LOGGER.info("SVM Multiclass evaluator")

	# Load SVMLight dataset
	feats, labels = get_features_and_labels(LibSVMFile(actual))

	# Load predicted labels
	with open(predicted, 'r') as f:
		predicted_labels_arr = np.array([float(l) for l in f])
		predicted_labels = MulticlassLabels(predicted_labels_arr)

	# Evaluate accuracy
	multiclass_measures = MulticlassAccuracy()
	LOGGER.info("Accuracy = %s" % multiclass_measures.evaluate(
		labels, predicted_labels))
	LOGGER.info("Confusion matrix:")
	res = multiclass_measures.get_confusion_matrix(labels, predicted_labels)
	print res
Ejemplo n.º 6
0
def features_io_modular(fm_train_real, label_train_twoclass):
    import numpy
    from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels
    from modshogun import GaussianKernel
    from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File

    feats = SparseRealFeatures(fm_train_real)
    feats2 = SparseRealFeatures()

    f = BinaryFile("fm_train_sparsereal.bin", "w")
    feats.save(f)

    f = LibSVMFile("fm_train_sparsereal.ascii", "w")
    feats.save(f)

    f = BinaryFile("fm_train_sparsereal.bin")
    feats2.load(f)

    f = LibSVMFile("fm_train_sparsereal.ascii")
    feats2.load(f)

    feats = RealFeatures(fm_train_real)
    feats2 = RealFeatures()

    f = BinaryFile("fm_train_real.bin", "w")
    feats.save(f)

    f = HDF5File("fm_train_real.h5", "w", "/data/doubles")
    feats.save(f)

    f = CSVFile("fm_train_real.ascii", "w")
    feats.save(f)

    f = BinaryFile("fm_train_real.bin")
    feats2.load(f)
    #print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

    f = CSVFile("fm_train_real.ascii")
    feats2.load(f)
    #print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

    lab = MulticlassLabels(numpy.array([0.0, 1.0, 2.0, 3.0]))
    lab2 = MulticlassLabels()
    f = CSVFile("label_train_twoclass.ascii", "w")
    lab.save(f)

    f = BinaryFile("label_train_twoclass.bin", "w")
    lab.save(f)

    f = HDF5File("label_train_real.h5", "w", "/data/labels")
    lab.save(f)

    f = CSVFile("label_train_twoclass.ascii")
    lab2.load(f)

    f = BinaryFile("label_train_twoclass.bin")
    lab2.load(f)

    f = HDF5File("fm_train_real.h5", "r", "/data/doubles")
    feats2.load(f)
    #print(feats2.get_feature_matrix())
    f = HDF5File("label_train_real.h5", "r", "/data/labels")
    lab2.load(f)
    #print(lab2.get_labels())

    #clean up
    import os
    for f in [
            'fm_train_sparsereal.bin', 'fm_train_sparsereal.ascii',
            'fm_train_real.bin', 'fm_train_real.h5', 'fm_train_real.ascii',
            'label_train_real.h5', 'label_train_twoclass.ascii',
            'label_train_twoclass.bin'
    ]:
        os.unlink(f)
    return feats, feats2, lab, lab2