예제 #1
0
def main():  # read hard and easy
    estimator = misvm.MISVM(kernel=kernel,
                            gamma=gamma,
                            C=C,
                            verbose=True,
                            max_iters=100)
    mil.train(estimator=estimator, resultSuperDirPath=path)
예제 #2
0
def cross_validation():
    estimator = misvm.MISVM(kernel=kernel,
                            gamma=gamma,
                            C=C,
                            verbose=True,
                            max_iters=200)
    mil.crossValidation(estimator, 5, path)
예제 #3
0
def leave_one_person_out(n_jobs=8):
    estimator = misvm.MISVM(kernel=kernel,
                            gamma=gamma,
                            C=C,
                            verbose=True,
                            max_iters=200)
    mil.leaveOnePersonOut(estimator=estimator,
                          resultSuperDirPath=path,
                          n_jobs=n_jobs,
                          trainAcc=True)
def misvm_classify(verbose, output, vectors, labels):
    # perform the actual misvm classification
    if verbose:
        print "Creating train and test bags and labels..."
    bags = [numpy.array(vectors[v], dtype=float)
            for v in vectors]  # numpy-format matrix for use in misvm
    labels = numpy.array([labels[l] for l in labels],
                         dtype=float)  # numpy-format labels for use in misvm
    # Spilt dataset into train and test sets
    train_bags = []
    train_labels = []
    test_bags = []
    test_labels = []
    for i in range(len(labels)):
        if i % 2 == 0:
            train_bags.append(bags[i])
            train_labels.append(labels[i])
        else:
            test_bags.append(bags[i])
            test_labels.append(labels[i])

    if verbose:
        print "MISVM Classifying..."
    if output != 'NONE':
        sys.stdout = open(output, 'w')
    # establish classifiers
    classifiers = {
        'sbMIL': misvm.sbMIL(kernel='rbf', eta=0.1, C=1.0),
        'SIL': misvm.SIL(kernel='rbf', C=1.0),
        'MISVM': misvm.MISVM(kernel='rbf', C=1.0, max_iters=100),
    }
    # Train/Evaluate classifiers
    accuracies = {}
    for algorithm, classifier in classifiers.items():
        classifier.fit(train_bags, train_labels)
        predictions = classifier.predict(test_bags)
        accuracies[algorithm] = numpy.average(
            test_labels == numpy.sign(predictions))
    for algorithm, accuracy in accuracies.items():
        print '\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy)
    if output != 'NONE':
        sys.stdout = sys.__stdout__  # reset stdout to normal
예제 #5
0
 def __init__(self, cfg, **kwargs):
     super(MISVMDetector, self).__init__(cfg, **kwargs)
     self.classifier = misvm.MISVM(kernel='linear', C=1.0, max_iters=50)
예제 #6
0
def main(argv=sys.argv):
    pos_train_file, neg_train_file, method, model_file, scale_file, score_file, res_file = process_options(
        argv)
    ######train feature extraction
    feature_matrix = []
    for line in pos_train_file:
        feature_vector = []
        sequence_infor = line.split(',')
        header = sequence_infor[0]
        bag = sequence_infor[1]
        sequence = sequence_infor[3].strip()
        feature_vector.append(header)
        feature_vector.append(bag)
        feature_vector.extend(
            kmer(sequence) + ksnpf(sequence) + nucleic_shift(sequence))
        #feature_vector.extend(ksnpf(sequence))
        feature_vector.append('1')
        feature_matrix.append(feature_vector)
    pos_train_file.close()

    for line in neg_train_file:
        feature_vector = []
        sequence_infor = line.split(',')
        header = sequence_infor[0]
        bag = sequence_infor[1]
        sequence = sequence_infor[3].strip()
        feature_vector.append(header)
        feature_vector.append(bag)
        feature_vector.extend(
            kmer(sequence) + ksnpf(sequence) + nucleic_shift(sequence))
        #feature_vector.extend(ksnpf(sequence))
        feature_vector.append('-1')
        feature_matrix.append(feature_vector)
    feature_array = np.array([b[2:-1] for b in feature_matrix],
                             dtype=np.float32)
    min_max_scaler = preprocessing.MinMaxScaler(copy=True,
                                                feature_range=(-1, 1))
    feature_scaled = min_max_scaler.fit_transform(feature_array)

    feature_matrix_T = map(list, zip(*feature_matrix))
    feature_scaled_T = map(list, zip(*feature_scaled))
    k = 0
    train_feature_matrix_T = []
    train_feature_matrix_T.append(feature_matrix_T[0])
    train_feature_matrix_T.append(feature_matrix_T[1])
    for i in range(len(feature_scaled_T)):
        train_feature_matrix_T.append(feature_scaled_T[k])
        k = k + 1
    train_feature_matrix_T.append(feature_matrix_T[-1])
    train_feature_matrix = map(list, zip(*train_feature_matrix_T))
    neg_train_file.close()
    np.savetxt("train_features.txt",
               train_feature_matrix,
               fmt='%s',
               delimiter=',')
    ######put samples into bags
    train_file_path = './train_features.txt'
    [
        train_bag_targets, train_bag_samples, train_bag_instance_targets,
        sample_info
    ] = file_to_features(train_file_path)
    if method == 0:
        svc = misvm.miSVM(kernel='quadratic', C=5.4, max_iters=10)
    elif method == 1:
        svc = misvm.MISVM(kernel='quadratic', C=5.4, max_iters=10)
    svc.fit(train_bag_samples, train_bag_targets)
    #joblib.dump(svc,'./svc.pkl')
    joblib.dump(svc, model_file)
    joblib.dump(min_max_scaler, scale_file)
    bag_predictions, inst_predictions = svc.predict(train_bag_samples, 1)
    #score_file=open('training_score.txt','w+')
    score_file.write("name\tscore\n")
    for i in range(0, len(sample_info)):
        score_file.write(sample_info[i] + "\t" + str(inst_predictions[i]) +
                         "\n")
    score_file.close()
    #reserved_samples=open('reserved_samples.txt','w+')
    res_file.write("name\n")
    for j in range(0, len(inst_predictions)):
        if inst_predictions[j] >= 0:
            res_file.write(sample_info[j] + "\n")
    res_file.close()
예제 #7
0
def gridsearch(params_grid, cv=2):
    estimator = misvm.MISVM(max_iters=250)
    mil.gridSearch(estimator=estimator, params_grid=params_grid, cv=cv)