def features_read_svmlight_format_modular(fname): import os from modshogun import SparseRealFeatures from modshogun import LibSVMFile f = SparseRealFeatures() lab = f.load_with_labels(LibSVMFile(fname)) f.save_with_labels(LibSVMFile('testwrite.light', 'w'), lab)
def main(dataset, output, epsilon, capacity, width, kernel_type): LOGGER.info("SVM Multiclass classifier") LOGGER.info("Epsilon: %s" % epsilon) LOGGER.info("Capacity: %s" % capacity) LOGGER.info("Gaussian width: %s" % width) # Get features feats, labels = get_features_and_labels(LibSVMFile(dataset)) # Create kernel try: kernel = KERNELS[kernel_type](feats, width) except KeyError: LOGGER.error("Kernel %s not available. try Gaussian or Linear" % kernel_type) # Initialize and train Multiclass SVM svm = MulticlassLibSVM(capacity, kernel, labels) svm.set_epsilon(epsilon) with track_execution(): svm.train() # Serialize to file writable_file = SerializableHdf5File(output, 'w') with closing(writable_file): svm.save_serializable(writable_file) LOGGER.info("Serialized classifier saved in: '%s'" % output)
def load_sparse_data(filename, dimension=None): input_file = LibSVMFile(args.dataset) sparse_feats = SparseRealFeatures() label_array = sparse_feats.load_with_labels(input_file) labels = BinaryLabels(label_array) if dimension != None: sparse_feats.set_num_features(dimension) return {'data': sparse_feats, 'labels': labels}
def main(classifier, testset, output): LOGGER.info("SVM Multiclass evaluation") svm = MulticlassLibSVM() serialized_classifier = SerializableHdf5File(classifier, 'r') with closing(serialized_classifier): svm.load_serializable(serialized_classifier) test_feats, test_labels = get_features_and_labels(LibSVMFile(testset)) predicted_labels = svm.apply(test_feats) with open(output, 'w') as f: for cls in predicted_labels.get_labels(): f.write("%s\n" % int(cls)) LOGGER.info("Predicted labels saved in: '%s'" % output)
def main(actual, predicted): LOGGER.info("SVM Multiclass evaluator") # Load SVMLight dataset feats, labels = get_features_and_labels(LibSVMFile(actual)) # Load predicted labels with open(predicted, 'r') as f: predicted_labels_arr = np.array([float(l) for l in f]) predicted_labels = MulticlassLabels(predicted_labels_arr) # Evaluate accuracy multiclass_measures = MulticlassAccuracy() LOGGER.info("Accuracy = %s" % multiclass_measures.evaluate( labels, predicted_labels)) LOGGER.info("Confusion matrix:") res = multiclass_measures.get_confusion_matrix(labels, predicted_labels) print res
def features_io_modular(fm_train_real, label_train_twoclass): import numpy from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels from modshogun import GaussianKernel from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File feats = SparseRealFeatures(fm_train_real) feats2 = SparseRealFeatures() f = BinaryFile("fm_train_sparsereal.bin", "w") feats.save(f) f = LibSVMFile("fm_train_sparsereal.ascii", "w") feats.save(f) f = BinaryFile("fm_train_sparsereal.bin") feats2.load(f) f = LibSVMFile("fm_train_sparsereal.ascii") feats2.load(f) feats = RealFeatures(fm_train_real) feats2 = RealFeatures() f = BinaryFile("fm_train_real.bin", "w") feats.save(f) f = HDF5File("fm_train_real.h5", "w", "/data/doubles") feats.save(f) f = CSVFile("fm_train_real.ascii", "w") feats.save(f) f = BinaryFile("fm_train_real.bin") feats2.load(f) #print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))) f = CSVFile("fm_train_real.ascii") feats2.load(f) #print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))) lab = MulticlassLabels(numpy.array([0.0, 1.0, 2.0, 3.0])) lab2 = MulticlassLabels() f = CSVFile("label_train_twoclass.ascii", "w") lab.save(f) f = BinaryFile("label_train_twoclass.bin", "w") lab.save(f) f = HDF5File("label_train_real.h5", "w", "/data/labels") lab.save(f) f = CSVFile("label_train_twoclass.ascii") lab2.load(f) f = BinaryFile("label_train_twoclass.bin") lab2.load(f) f = HDF5File("fm_train_real.h5", "r", "/data/doubles") feats2.load(f) #print(feats2.get_feature_matrix()) f = HDF5File("label_train_real.h5", "r", "/data/labels") lab2.load(f) #print(lab2.get_labels()) #clean up import os for f in [ 'fm_train_sparsereal.bin', 'fm_train_sparsereal.ascii', 'fm_train_real.bin', 'fm_train_real.h5', 'fm_train_real.ascii', 'label_train_real.h5', 'label_train_twoclass.ascii', 'label_train_twoclass.bin' ]: os.unlink(f) return feats, feats2, lab, lab2