Ejemplo n.º 1
0
def model_mil(train_bags, train_labels, test_bags, test_labels):
    start_time = datetime.now()
    classifiers = {}
    classifiers['MissSVM'] = misvm.MissSVM(kernel='linear',
                                           C=1.0,
                                           max_iters=20)
    classifiers['sbMIL'] = misvm.sbMIL(kernel='linear', eta=0.1, C=1e2)
    classifiers['SIL'] = misvm.SIL(kernel='linear', C=1.0)

    # Train/Evaluate classifiers
    perf = {}
    for algorithm, classifier in classifiers.items():
        classifier.fit(train_bags, train_labels)
        predictions = classifier.predict(test_bags)
        if algorithm == 'sbMIL' or algorithm == 'MissSVM':
            predictions = np.sign(predictions) + 1
        [[TN, FP], [FN, TP]] = confusion_matrix(test_labels, predictions)
        FDR = TP / (TP + FN)
        FAR = FP / (TN + FP)
        perf[algorithm] = {'FDR': FDR, 'FAR': FAR}
        print '[%s]algorithm %s done...' % (time.asctime(
            time.localtime(time.time())), algorithm)
    print '[%s]%s done for %d seconds...' % (time.asctime(
        time.localtime(time.time())), sys._getframe().f_code.co_name,
                                             (datetime.now() -
                                              start_time).seconds)
    return perf
Ejemplo n.º 2
0
def main():
    # Load list of C4.5 Examples
    example_set = parse_c45('musk1')
    table = re.findall(r"<(.*)>", str(example_set))
    output.write(str(example_set))
    print(len(table))
    print(table)

    # Get stats to normalize data
    raw_data = np.array(example_set.to_float())
    data_mean = np.average(raw_data, axis=0)
    data_std = np.std(raw_data, axis=0)
    data_std[np.nonzero(data_std == 0.0)] = 1.0

    def normalizer(ex):
        ex = np.array(ex)
        normed = ((ex - data_mean) / data_std)
        # The ...[:, 2:-1] removes first two columns and last column,
        # which are the bag/instance ids and class label, as part of the
        # normalization process
        return normed[2:-1]

    # Group examples into bags
    bagset = bag_set(example_set)

    # Convert bags to NumPy arrays
    bags = [np.array(b.to_float(normalizer)) for b in bagset]
    labels = np.array([b.label for b in bagset], dtype=float)
    # Convert 0/1 labels to -1/1 labels
    labels = 2 * labels - 1

    # Spilt dataset arbitrarily to train/test sets
    train_bags = bags[10:]
    train_labels = labels[10:]
    test_bags = bags[:10]
    test_labels = labels[:10]

    # Construct classifiers
    classifiers = {}
    classifiers['MissSVM'] = misvm.MissSVM(kernel='linear',
                                           C=1.0,
                                           max_iters=20)
    classifiers['sbMIL'] = misvm.sbMIL(kernel='linear', eta=0.1, C=1e2)
    classifiers['SIL'] = misvm.SIL(kernel='linear', C=1.0)

    # Train/Evaluate classifiers
    accuracies = {}
    for algorithm, classifier in classifiers.items():
        classifier.fit(train_bags, train_labels)
        predictions = classifier.predict(test_bags)
        accuracies[algorithm] = np.average(test_labels == np.sign(predictions))

    for algorithm, accuracy in accuracies.items():
        print('\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy))
        output.write('\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy))
Ejemplo n.º 3
0
 def train(self, dataset, labels):
     self.SVMs = []
     index = 0
     glob_start = time.time()
     for label in np.transpose(labels):
         start = time.time()
         classifier = misvm.SIL(kernel='linear', C=1.0)
         # classifier = misvm.MISVM(kernel='linear', C=1.0, max_iters=20)
         # classifier = misvm.miSVM(kernel='linear', C=1.0, max_iters=20)
         classifier.fit(dataset, np.array(label))
         self.SVMs.append(classifier)
         index += 1
         print "Trained " + str(index) + " of " + str(
             len(np.transpose(labels))) + " in " + str(
                 int(time.time()) - int(start)) + " sec"
     print "It took " + str(
         (int(time.time()) - int(glob_start)) / 60) + " minutes"
def misvm_classify(verbose, output, vectors, labels):
    # perform the actual misvm classification
    if verbose:
        print "Creating train and test bags and labels..."
    bags = [numpy.array(vectors[v], dtype=float)
            for v in vectors]  # numpy-format matrix for use in misvm
    labels = numpy.array([labels[l] for l in labels],
                         dtype=float)  # numpy-format labels for use in misvm
    # Spilt dataset into train and test sets
    train_bags = []
    train_labels = []
    test_bags = []
    test_labels = []
    for i in range(len(labels)):
        if i % 2 == 0:
            train_bags.append(bags[i])
            train_labels.append(labels[i])
        else:
            test_bags.append(bags[i])
            test_labels.append(labels[i])

    if verbose:
        print "MISVM Classifying..."
    if output != 'NONE':
        sys.stdout = open(output, 'w')
    # establish classifiers
    classifiers = {
        'sbMIL': misvm.sbMIL(kernel='rbf', eta=0.1, C=1.0),
        'SIL': misvm.SIL(kernel='rbf', C=1.0),
        'MISVM': misvm.MISVM(kernel='rbf', C=1.0, max_iters=100),
    }
    # Train/Evaluate classifiers
    accuracies = {}
    for algorithm, classifier in classifiers.items():
        classifier.fit(train_bags, train_labels)
        predictions = classifier.predict(test_bags)
        accuracies[algorithm] = numpy.average(
            test_labels == numpy.sign(predictions))
    for algorithm, accuracy in accuracies.items():
        print '\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy)
    if output != 'NONE':
        sys.stdout = sys.__stdout__  # reset stdout to normal
Ejemplo n.º 5
0
def main():
    # Load list of C4.5 Examples
    example_set = parse_c45('musk1')

    # Group examples into bags
    bagset = bag_set(example_set)

    # Convert bags to NumPy arrays
    # (The ...[:, 2:-1] removes first two columns and last column,
    #  which are the bag/instance ids and class label)
    bags = [np.array(b.to_float())[:, 2:-1] for b in bagset]
    labels = np.array([b.label for b in bagset], dtype=float)
    # Convert 0/1 labels to -1/1 labels
    labels = 2 * labels - 1

    # Spilt dataset arbitrarily to train/test sets
    train_bags = bags[10:]
    train_labels = labels[10:]
    test_bags = bags[:10]
    test_labels = labels[:10]

    # Construct classifiers
    classifiers = {}
    classifiers['MissSVM'] = misvm.MissSVM(kernel='linear',
                                           C=1.0,
                                           max_iters=10)
    classifiers['sbMIL'] = misvm.sbMIL(kernel='linear', eta=0.1, C=1.0)
    classifiers['SIL'] = misvm.SIL(kernel='linear', C=1.0)

    # Train/Evaluate classifiers
    accuracies = {}
    for algorithm, classifier in classifiers.items():
        classifier.fit(train_bags, train_labels)
        predictions = classifier.predict(test_bags)
        accuracies[algorithm] = np.average(test_labels == np.sign(predictions))

    for algorithm, accuracy in accuracies.items():
        print('\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy))