def main(): # read hard and easy estimator = misvm.MISVM(kernel=kernel, gamma=gamma, C=C, verbose=True, max_iters=100) mil.train(estimator=estimator, resultSuperDirPath=path)
def cross_validation(): estimator = misvm.MISVM(kernel=kernel, gamma=gamma, C=C, verbose=True, max_iters=200) mil.crossValidation(estimator, 5, path)
def leave_one_person_out(n_jobs=8): estimator = misvm.MISVM(kernel=kernel, gamma=gamma, C=C, verbose=True, max_iters=200) mil.leaveOnePersonOut(estimator=estimator, resultSuperDirPath=path, n_jobs=n_jobs, trainAcc=True)
def misvm_classify(verbose, output, vectors, labels): # perform the actual misvm classification if verbose: print "Creating train and test bags and labels..." bags = [numpy.array(vectors[v], dtype=float) for v in vectors] # numpy-format matrix for use in misvm labels = numpy.array([labels[l] for l in labels], dtype=float) # numpy-format labels for use in misvm # Spilt dataset into train and test sets train_bags = [] train_labels = [] test_bags = [] test_labels = [] for i in range(len(labels)): if i % 2 == 0: train_bags.append(bags[i]) train_labels.append(labels[i]) else: test_bags.append(bags[i]) test_labels.append(labels[i]) if verbose: print "MISVM Classifying..." if output != 'NONE': sys.stdout = open(output, 'w') # establish classifiers classifiers = { 'sbMIL': misvm.sbMIL(kernel='rbf', eta=0.1, C=1.0), 'SIL': misvm.SIL(kernel='rbf', C=1.0), 'MISVM': misvm.MISVM(kernel='rbf', C=1.0, max_iters=100), } # Train/Evaluate classifiers accuracies = {} for algorithm, classifier in classifiers.items(): classifier.fit(train_bags, train_labels) predictions = classifier.predict(test_bags) accuracies[algorithm] = numpy.average( test_labels == numpy.sign(predictions)) for algorithm, accuracy in accuracies.items(): print '\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy) if output != 'NONE': sys.stdout = sys.__stdout__ # reset stdout to normal
def __init__(self, cfg, **kwargs): super(MISVMDetector, self).__init__(cfg, **kwargs) self.classifier = misvm.MISVM(kernel='linear', C=1.0, max_iters=50)
def main(argv=sys.argv): pos_train_file, neg_train_file, method, model_file, scale_file, score_file, res_file = process_options( argv) ######train feature extraction feature_matrix = [] for line in pos_train_file: feature_vector = [] sequence_infor = line.split(',') header = sequence_infor[0] bag = sequence_infor[1] sequence = sequence_infor[3].strip() feature_vector.append(header) feature_vector.append(bag) feature_vector.extend( kmer(sequence) + ksnpf(sequence) + nucleic_shift(sequence)) #feature_vector.extend(ksnpf(sequence)) feature_vector.append('1') feature_matrix.append(feature_vector) pos_train_file.close() for line in neg_train_file: feature_vector = [] sequence_infor = line.split(',') header = sequence_infor[0] bag = sequence_infor[1] sequence = sequence_infor[3].strip() feature_vector.append(header) feature_vector.append(bag) feature_vector.extend( kmer(sequence) + ksnpf(sequence) + nucleic_shift(sequence)) #feature_vector.extend(ksnpf(sequence)) feature_vector.append('-1') feature_matrix.append(feature_vector) feature_array = np.array([b[2:-1] for b in feature_matrix], dtype=np.float32) min_max_scaler = preprocessing.MinMaxScaler(copy=True, feature_range=(-1, 1)) feature_scaled = min_max_scaler.fit_transform(feature_array) feature_matrix_T = map(list, zip(*feature_matrix)) feature_scaled_T = map(list, zip(*feature_scaled)) k = 0 train_feature_matrix_T = [] train_feature_matrix_T.append(feature_matrix_T[0]) train_feature_matrix_T.append(feature_matrix_T[1]) for i in range(len(feature_scaled_T)): train_feature_matrix_T.append(feature_scaled_T[k]) k = k + 1 train_feature_matrix_T.append(feature_matrix_T[-1]) train_feature_matrix = map(list, zip(*train_feature_matrix_T)) neg_train_file.close() np.savetxt("train_features.txt", train_feature_matrix, fmt='%s', delimiter=',') ######put samples into bags train_file_path = './train_features.txt' [ train_bag_targets, train_bag_samples, train_bag_instance_targets, sample_info ] = file_to_features(train_file_path) if method == 0: svc = misvm.miSVM(kernel='quadratic', C=5.4, max_iters=10) elif method == 1: svc = misvm.MISVM(kernel='quadratic', C=5.4, max_iters=10) svc.fit(train_bag_samples, train_bag_targets) #joblib.dump(svc,'./svc.pkl') joblib.dump(svc, model_file) joblib.dump(min_max_scaler, scale_file) bag_predictions, inst_predictions = svc.predict(train_bag_samples, 1) #score_file=open('training_score.txt','w+') score_file.write("name\tscore\n") for i in range(0, len(sample_info)): score_file.write(sample_info[i] + "\t" + str(inst_predictions[i]) + "\n") score_file.close() #reserved_samples=open('reserved_samples.txt','w+') res_file.write("name\n") for j in range(0, len(inst_predictions)): if inst_predictions[j] >= 0: res_file.write(sample_info[j] + "\n") res_file.close()
def gridsearch(params_grid, cv=2): estimator = misvm.MISVM(max_iters=250) mil.gridSearch(estimator=estimator, params_grid=params_grid, cv=cv)