def generateKernelMatrix(DataSet): """Creates a kernel matrix/gram matrix from an input dataset, which is a list of examples""" n_samples = len(DataSet) kernelMatrix = np.empty([n_samples, n_samples], dtype="string") PatternIds = np.empty([n_samples, 1], dtype="string") Labels = np.empty([n_samples, 1], dtype="string") for i in xrange(n_samples): (label, tps, pID, A1) = DataSet[i] PatternIds[i, 0] = pID Labels[i, 0] = label for i in xrange(n_samples): for j in xrange(n_samples): (label1, tps, pID, A1) = DataSet[i] (label2, tps, pID, A2) = DataSet[j] kernelMatrix[i, j] = str(ger.GERPKernel(A1, A2)) kernelFileMatrix = np.concatenate(PatternIds, kernelMatrix) labelMatrix = np.concatenate(PatternIds, Labels) np.savetxt("labelText.txt", labelMatrix, delimiter=',') np.savetxt("kernelText.txt", kernelFileMatrix, delimiter=',') labels = ml.Labels("labelText.txt") kdata = ml.kernelData("kernelText.txt") kdata.attachLabels(labels) return kdata
def prepare_train_set(mgrs): assert (len(mgrs) > 0) l = PyML.Labels([i.get_param('epoch_name') for i in mgrs]) train_data = zeros((len(mgrs), len(mgrs[0].get_samples()[0]))) for i, mgr in enumerate(mgrs): # Only the first channel is taken to consideration train_data[i, :] = mgr.get_samples()[0] train_set = PyML.VectorDataSet(train_data, L=l) return train_set
def _p300_verify_svm_one_fold(t_train_mgrs, nt_train_mgrs, t_test_mgrs, nt_test_mgrs, non_target_per_target, C, Cmode, kernel): assert (len(t_test_mgrs) * non_target_per_target == len(nt_test_mgrs)) s = PyML.svm.SVM(C=C, Cmode=Cmode, arg=kernel) # Train classifier on a train set... train_data = t_train_mgrs + nt_train_mgrs train_vect = zeros((len(train_data), len(train_data[0].get_samples()[0]))) train_labels_vect = [] for i, mgr in enumerate(train_data): train_labels_vect.append(mgr.get_param('epoch_name')) train_vect[i, :] = mgr.get_samples()[0] s.train(PyML.VectorDataSet(train_vect, L=PyML.Labels(train_labels_vect))) # test classifier on a test set # grab two elements of target test set and 2*non_target_per_target elements # of non-target test set .... succ = 0 fail = 0 i = 0 while i + 1 < len(t_test_mgrs): t1 = t_test_mgrs[i] ns1 = nt_test_mgrs[i * non_target_per_target:(i + 1) * non_target_per_target] whatever, t1_value = s.classify(PyML.VectorDataSet(t1.get_samples()), 0) ns1_value = max([ s.classify(PyML.VectorDataSet(n1.get_samples()), 0)[1] for n1 in ns1 ]) t2 = t_test_mgrs[i + 1] ns2 = nt_test_mgrs[(i + 1) * non_target_per_target:(i + 2) * non_target_per_target] whatever, t2_value = s.classify(PyML.VectorDataSet(t2.get_samples()), 0) ns2_value = max([ s.classify(PyML.VectorDataSet(n2.get_samples()), 0)[1] for n2 in ns2 ]) # Check if the decision was good ... if t1_value > ns1_value and t2_value > ns2_value: succ += 1 else: fail += 1 i += 2 return succ, fail
def generateKernelMatrix(TestData, TrainingData, sigma=10, lam=math.pow(10, -3), nu=0.5, kernel="twed"): """Creates a kernel matrix/gram matrix from an input dataset, which is a list of examples""" n_samplesTest = len(TestData) n_samplesTrain = len(TrainingData) kernelMatrix = np.empty([n_samplesTest, n_samplesTrain], dtype="float") PatternIds = np.empty([n_samplesTest, 1], dtype=object) Labels = np.empty([n_samplesTest, 1], dtype=object) for i in xrange(n_samplesTest): (label, tps, pID, A1) = TestData[i] PatternIds[i, 0] = pID Labels[i, 0] = label for i in xrange(n_samplesTest): for j in xrange(n_samplesTrain): (label1, tps, pID, A1) = TestData[i] (label2, tps, pID, A2) = TrainingData[j] if (kernel == "gerp"): kernelMatrix[i, j] = str(ger.GERPKernel(A1, A2, sigma)) elif (kernel == "twed"): kernelMatrix[i, j] = str( twed.TwedKernel(TestData[i], TrainingData[j], lam, nu, sigma)) kernelFileMatrix = np.concatenate((PatternIds, kernelMatrix), axis=1) labelMatrix = np.concatenate((PatternIds, Labels), axis=1) np.savetxt("labelText.txt", labelMatrix, fmt='%s', delimiter=',') np.savetxt("kernelText.txt", kernelFileMatrix, fmt="%s", delimiter=',') f1 = "labelText.txt" f2 = "kernelText.txt" labels = ml.Labels(f1) kdata = ml.KernelData(f2) kdata.attachLabels(labels) return kdata