def get_raw_data(): # Load list of C4.5 Examples example_set = parse_c45('musk1') # Get stats to normalize data raw_data = np.array(example_set.to_float()) data_mean = np.average(raw_data, axis=0) data_std = np.std(raw_data, axis=0) data_std[np.nonzero(data_std == 0.0)] = 1.0 def normalizer(ex): ex = np.array(ex) normed = ((ex - data_mean) / data_std) # The ...[:, 2:-1] removes first two columns and last column, # which are the bag/instance ids and class label, as part of the # normalization process return normed[2:-1] # Group examples into bags bagset = bag_set(example_set) # Convert bags to NumPy arrays bags = [np.array(b.to_float(normalizer)) for b in bagset] labels = [b.label for b in bagset] tmp = list(zip(bags, labels)) random.seed(55) random.shuffle(tmp) bags, labels = zip(*tmp) return bags, labels
def main(): # Load list of C4.5 Examples example_set = parse_c45('musk1') table = re.findall(r"<(.*)>", str(example_set)) output.write(str(example_set)) print(len(table)) print(table) # Get stats to normalize data raw_data = np.array(example_set.to_float()) data_mean = np.average(raw_data, axis=0) data_std = np.std(raw_data, axis=0) data_std[np.nonzero(data_std == 0.0)] = 1.0 def normalizer(ex): ex = np.array(ex) normed = ((ex - data_mean) / data_std) # The ...[:, 2:-1] removes first two columns and last column, # which are the bag/instance ids and class label, as part of the # normalization process return normed[2:-1] # Group examples into bags bagset = bag_set(example_set) # Convert bags to NumPy arrays bags = [np.array(b.to_float(normalizer)) for b in bagset] labels = np.array([b.label for b in bagset], dtype=float) # Convert 0/1 labels to -1/1 labels labels = 2 * labels - 1 # Spilt dataset arbitrarily to train/test sets train_bags = bags[10:] train_labels = labels[10:] test_bags = bags[:10] test_labels = labels[:10] # Construct classifiers classifiers = {} classifiers['MissSVM'] = misvm.MissSVM(kernel='linear', C=1.0, max_iters=20) classifiers['sbMIL'] = misvm.sbMIL(kernel='linear', eta=0.1, C=1e2) classifiers['SIL'] = misvm.SIL(kernel='linear', C=1.0) # Train/Evaluate classifiers accuracies = {} for algorithm, classifier in classifiers.items(): classifier.fit(train_bags, train_labels) predictions = classifier.predict(test_bags) accuracies[algorithm] = np.average(test_labels == np.sign(predictions)) for algorithm, accuracy in accuracies.items(): print('\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy)) output.write('\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy))
def main(): # Load list of C4.5 Examples example_set = parse_c45('musk1') # Get stats to normalize data raw_data = np.array(example_set.to_float()) data_mean = np.average(raw_data, axis=0) data_std = np.std(raw_data, axis=0) data_std[np.nonzero(data_std == 0.0)] = 1.0 def normalizer(ex): ex = np.array(ex) normed = ((ex - data_mean) / data_std) # The ...[:, 2:-1] removes first two columns and last column, # which are the bag/instance ids and class label, as part of the # normalization process return normed[2:-1] # Group examples into bags bagset = bag_set(example_set) # Convert bags to NumPy arrays bags = [np.array(b.to_float(normalizer)) for b in bagset] labels = np.array([b.label for b in bagset], dtype=float) # Convert 0/1 labels to -1/1 labels labels = 2 * labels - 1 # Spilt dataset arbitrarily to train/test sets train_bags = bags[10:] train_labels = labels[10:] test_bags = bags[:10] test_labels = labels[:10] # Construct classifiers classifiers = {} classifiers['MissSVM'] = misvm.MissSVM(kernel='linear', C=1.0, max_iters=20) classifiers['sbMIL'] = misvm.sbMIL(kernel='linear', eta=0.1, C=1e2) classifiers['SIL'] = misvm.SIL(kernel='linear', C=1.0) # Train/Evaluate classifiers accuracies = {} for algorithm, classifier in classifiers.items(): classifier.fit(train_bags, train_labels) predictions = classifier.predict(test_bags) accuracies[algorithm] = np.average(test_labels == np.sign(predictions)) for algorithm, accuracy in accuracies.items(): print('\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy))
def main(): # Load list of C4.5 Examples example_set = parse_c45('musk1') # Group examples into bags bagset = bag_set(example_set) # Convert bags to NumPy arrays # (The ...[:, 2:-1] removes first two columns and last column, # which are the bag/instance ids and class label) bags = [np.array(b.to_float())[:, 2:-1] for b in bagset] labels = np.array([b.label for b in bagset], dtype=float) # Convert 0/1 labels to -1/1 labels labels = 2 * labels - 1 # Spilt dataset arbitrarily to train/test sets train_bags = bags[10:] train_labels = labels[10:] test_bags = bags[:10] test_labels = labels[:10] # Construct classifiers classifiers = {} classifiers['MissSVM'] = misvm.MissSVM(kernel='linear', C=1.0, max_iters=10) classifiers['sbMIL'] = misvm.sbMIL(kernel='linear', eta=0.1, C=1.0) classifiers['SIL'] = misvm.SIL(kernel='linear', C=1.0) # Train/Evaluate classifiers accuracies = {} for algorithm, classifier in classifiers.items(): classifier.fit(train_bags, train_labels) predictions = classifier.predict(test_bags) accuracies[algorithm] = np.average(test_labels == np.sign(predictions)) for algorithm, accuracy in accuracies.items(): print('\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy))
def main(): # Load list of C4.5 Examples example_set = parse_c45('musk1') # Group examples into bags bagset = bag_set(example_set) # Convert bags to NumPy arrays # (The ...[:, 2:-1] removes first two columns and last column, # which are the bag/instance ids and class label) bags = [np.array(b.to_float())[:, 2:-1] for b in bagset] labels = np.array([b.label for b in bagset], dtype=float) # Convert 0/1 labels to -1/1 labels labels = 2 * labels - 1 # Spilt dataset arbitrarily to train/test sets train_bags = bags[10:] train_labels = labels[10:] test_bags = bags[:10] test_labels = labels[:10] print train_bags print len(train_bags) print "\n\n\n" print train_labels # Construct classifiers classifiers = {} classifiers['MissSVM'] = misvm.MissSVM(kernel='linear', C=1.0, max_iters=10) classifiers['sbMIL'] = misvm.sbMIL(kernel='linear', eta=0.1, C=1.0) classifiers['SIL'] = misvm.SIL(kernel='linear', C=1.0) # Train/Evaluate classifiers accuracies = {} for algorithm, classifier in classifiers.items(): classifier.fit(train_bags, train_labels) predictions = classifier.predict(test_bags) accuracies[algorithm] = np.average(test_labels == np.sign(predictions)) for algorithm, accuracy in accuracies.items(): print '\n%s Accuracy: %.1f%%' % (algorithm, 100 * accuracy)
from scipy.sparse import identity from scipy import sparse from scipy.optimize import linprog from numpy import inf from scipy.optimize import fmin_bfgs import sys from misvmio import parse_c45, bag_set from sklearn import cross_validation from sklearn import metrics ############### #LOAD DATA SET# ############### # Load list of C4.5 Examples example_set = parse_c45('musk1') # Group examples into bags bagset = bag_set(example_set) # Convert bags to NumPy arrays # (The ...[:, 2:-1] removes first two columns and last column, # which are the bag/instance ids and class label) bags = [np.array(b.to_float())[:, 2:-1] for b in bagset] labels = np.array([b.label for b in bagset], dtype=float) #SPLIT IN TRAINING AND TESTING train_bags, test_bags, train_labels, test_labels = cross_validation.train_test_split(bags, labels, test_size=0.1, random_state=65)