Example #1
0
def initialize_instances(filename):
    """Read the abalone.txt CSV data into a list of instances."""
    print('Creating instances')
    instances = []

    inFile = open(filename, 'rb')
    reader = csv.reader(inFile, delimiter=',')

    skipHeader = True
    for row in reader:
        if skipHeader:
            skipHeader = False
            continue
        # print(len(row[1:-1]))
        # print(len([float(value) for value in row[1:-1]]))
        instance = Instance([float(value) for value in row[1:-1]
                             ])  #ignore the index and the label
        instance.setLabel(
            Instance(0 if float(row[-1]) < 15 else 1))  #set the label
        instances.append(instance)

    inFile.close()

    print('Finished instances')

    return instances
Example #2
0
def train(oa, network, oaName, instances, measure):
    """Train a given network on a set of instances.

    :param OptimizationAlgorithm oa:
    :param BackPropagationNetwork network:
    :param str oaName:
    :param list[Instance] instances:
    :param AbstractErrorMeasure measure:
    """
    print "\nError results for %s\n---------------------------" % (oaName, )

    for iteration in xrange(TRAINING_ITERATIONS):
        oa.train()

        error = 0.00
        for instance in instances:
            network.setInputValues(instance.getData())
            network.run()

            output = instance.getLabel()
            output_values = network.getOutputValues()
            example = Instance(output_values, Instance(output_values.get(0)))
            error += measure.value(output, example)

        print "%0.03f" % error
Example #3
0
def errorOnDataSet(network,ds,measure):
    N = len(ds)
    error = 0.
    correct = 0
    incorrect = 0

    count = 0
    for instance in ds:
        count +=1
        network.setInputValues(instance.getData())
        network.run()
        actual = instance.getLabel().getContinuous()
        predicted = network.getOutputValues().get(0)
        predicted = max(min(predicted,1),0)
        if abs(predicted - actual) < 0.5:
            correct += 1
        else:
            incorrect += 1
        output = instance.getLabel()
        output_values = network.getOutputValues()
        example = Instance(output_values, Instance(output_values.get(0)))
        error += measure.value(output, example)
    MSE = error/float(N)
    acc = correct/float(correct+incorrect)
    return MSE,acc
Example #4
0
def train(oa, network, oaName, instances, measure = SumOfSquaresError(),
          surpress_output=False,
          TRAINING_ITERATIONS = 1500):
    """Train a given network on a set of instances.

    :param OptimizationAlgorithm oa:
    :param BackPropagationNetwork network:
    :param str oaName:
    :param list[Instance] instances:
    :param AbstractErrorMeasure measure:
    """

    if not surpress_output:
        print "\nError results for %s every 100 " \
              "iterations\n---------------------------" % (oaName,)

    for i in xrange(TRAINING_ITERATIONS):
        oa.train()

        error = 0.00
        for instance in instances:
            network.setInputValues(instance.getData())
            network.run()

            output = instance.getLabel()
            output_values = network.getOutputValues()
            example = Instance(output_values, Instance(output_values.get(0)))
            error += measure.value(output, example)

        if not surpress_output and i % 100 == 0:
            print "%0.03f" % error
Example #5
0
def initialize_instances(infile):
    """Read the given CSV data into a list of instances."""

    instances = []

    # Read in the CSV file

    with open(infile, "r") as dat:

        reader = csv.reader(dat)

        for row in reader:

            instance = Instance([float(value) for value in row[:-1]])

            # TODO: Set to <= 0 to handle 0/1 labels and not just -1/1?

            # assumes labels are integers from 0 to NUM_OF_CLASSES-1
            label = int(float(row[-1]))
            classes = [0] * NUM_OF_CLASSES
            classes[label] = 1
            instance.setLabel(Instance(classes))
            instances.append(instance)

    return instances
Example #6
0
def errorOnDataSet(network, ds, measure):
    N = len(ds)
    error = 0.
    correct = 0
    incorrect = 0
    for instance in ds:
        network.setInputValues(instance.getData())
        network.run()
        # actual = instance.getLabel().getData().argMax()
        actual = instance.getLabel().getData().get(0)
        # predicted = network.getOutputValues().argMax()
        predicted = network.getOutputValues().get(0)
        if predicted > 0.5:
            predicted = 1.0
        else:
            predicted = 0.0
        # print("Actual = ", actual, " , Predicted = ", predicted)
        if actual == predicted:
            correct += 1
        else:
            incorrect += 1
        output = instance.getLabel()
        output_values = network.getOutputValues()
        example = Instance(output_values, Instance(output_values))
        error += measure.value(output, example)
    MSE = error / float(N)
    acc = correct / float(correct + incorrect)
    return MSE, acc
Example #7
0
def train(oa, network, oaName, instances, measure):
    """Train a given network on a set of instances.

    :param OptimizationAlgorithm oa:
    :param BackPropagationNetwork network:
    :param str oaName:
    :param list[Instance] instances:
    :param AbstractErrorMeasure measure:
    """
    print "\nError results for %s\n---------------------------" % (oaName, )

    FILE_NAME = oaName + ".csv"
    OUTPUT_FILE = os.path.join("data", FILE_NAME)
    with open(OUTPUT_FILE, "wb") as results:
        writer = csv.writer(results, delimiter=',')

        for iteration in xrange(TRAINING_ITERATIONS):
            oa.train()

            error = 0.00
            for instance in instances:
                network.setInputValues(instance.getData())
                network.run()

                output = instance.getLabel()
                output_values = network.getOutputValues()
                example = Instance(output_values,
                                   Instance(output_values.get(0)))
                error += measure.value(output, example)

            print error / len(instances)
            writer.writerow([error / len(instances)])
Example #8
0
def train(oa, network, oaName, instances, measure, fileobject):
    """Train a given network on a set of instances.

    :param OptimizationAlgorithm oa:
    :param BackPropagationNetwork network:
    :param str oaName:
    :param list[Instance] instances:
    :param AbstractErrorMeasure measure:
    """
    fileobject.write(str(oaName) + " training " + "\n")

    for iteration in xrange(TRAINING_ITERATIONS):
        oa.train()

        error = 0.00
        for instance in instances:
            network.setInputValues(instance.getData())
            network.run()

            output = instance.getLabel()
            output_values = network.getOutputValues()
            example = Instance(output_values, Instance(output_values.get(0)))
            error += measure.value(output, example)
        print "finished iter", iteration, "for", oaName
        fileobject.write(str(oaName) + "," + str(iteration) + "," + str(error) + "\n")
Example #9
0
def eval_instances(net, instances, measure):
    # get the accuracy of the set (training, test, validation)

    set_len = len(instances)
    right, wrong, error = 0, 0, 0.
    for i in instances:
        net.setInputValues(i.getData())
        net.run()
        # should only need first output binary class
        truth = i.getLabel().getContinuous()
        n_out = net.getOutputValues().get(0)

        if int(truth) == int(n_out):
            right += 1
        else:
            wrong += 1

        output = i.getLabel()
        output_values = net.getOutputValues()
        example = Instance(output_values, Instance(output_values.get(0)))
        error += measure.value(output, example)
    accuracy = float(right) / float(set_len)
    error = error / float(set_len)

    return accuracy, error
Example #10
0
def error_on_data_set(network, ds, measure, ugh=False):
    N = len(ds)
    error = 0.
    correct = 0
    incorrect = 0
    actuals = []
    predicteds = []
    for instance in ds:
        network.setInputValues(instance.getData())
        network.run()
        actual_out = instance.getLabel()
        predicted_out = network.getOutputValues()
        predicted = []
        actual = []
        for j in range(0, predicted_out.size()):
            predicted.append(max(min(predicted_out.get(j), 1), 0))
            if sum([round(cur) for cur in predicted]) > 1:
                print "FOUND TWO ONES : {}".format(
                    [round(cur) for cur in predicted])
            elif sum([round(cur) for cur in predicted]) == 0:
                print "ALL ZEROS : {}".format(
                    [round(cur) for cur in predicted])

        for k in range(0, actual_out.getData().size()):
            actual.append(round(actual_out.getData().get(k)))

        if ugh:
            print "label: {}".format(instance.getLabel())
            print "actual: {}, predicted: {}".format(actual_out, predicted_out)

        predicteds.append([round(cur) for cur in predicted])
        actuals.append([max(min(cur, 1), 0) for cur in actual])
        ind = max(xrange(len(predicted)), key=predicted.__getitem__)
        if actual[ind] == 1:
            correct += 1
            if ugh:
                print "CORRECT"
        else:
            incorrect += 1
            if ugh:
                print "INCORRECT"
        output = instance.getLabel()
        output_values = network.getOutputValues()
        example = Instance(output_values, Instance(output_values))
        error += measure.value(output, example)
        if ugh:
            print "error: {}".format(measure.value(output, example))

    MSE = error / float(N)
    acc = correct / float(correct + incorrect)
    precision, recall, f1 = f1_score(actuals, predicteds)
    if ugh:
        print "MSE: {}, acc: {}, f1: {} (precision: {}, recall: {})".format(
            MSE, acc, f1, precision, recall)
        import sys
        sys.exit(0)

    return MSE, acc, f1
Example #11
0
def errorOnDataSet(network, ds, measure):
    N = len(ds)
    error = 0.
    correct = 0
    incorrect = 0
    false_positives = 0.0
    false_negatives = 0.0
    true_positives = 0.0
    true_negatives = 0.0
    for instance in ds:
        network.setInputValues(instance.getData())
        network.run()
        actual = instance.getLabel().getContinuous()
        predicted = network.getOutputValues().get(0)
        predicted = max(min(predicted, 1), 0)

        # Measure type of error for F1 score
        if actual == 0.0 and predicted >= 0.5:
            false_positives += 1.0

        if actual == 0.0 and predicted < 0.5:
            true_negatives += 1.0

        if actual == 1.0 and predicted >= 0.5:
            true_positives += 1.0

        if actual == 1.0 and predicted < 0.5:
            false_negatives += 1.0

        if abs(predicted - actual) < 0.5:
            correct += 1
        else:
            incorrect += 1
        output = instance.getLabel()
        output_values = network.getOutputValues()
        example = Instance(output_values, Instance(output_values.get(0)))
        error += measure.value(output, example)
    MSE = error / float(N)
    acc = correct / float(correct + incorrect)

    try:
        precision = true_positives / (true_positives + false_positives)
    except ZeroDivisionError:
        precision = 0.0

    try:
        recall = true_positives / (true_positives + false_negatives)
    except ZeroDivisionError:
        recall = 0.0

    try:
        F1 = 2.0 * ((precision * recall) / (precision + recall))
    except ZeroDivisionError:
        F1 = 0.0

    return MSE, acc, F1
Example #12
0
def initialize_instances():
    """Read the abalone.txt CSV data into a list of instances."""
    instances = []
    with open(TRAIN_FILE, "r") as abalone:
        reader = csv.reader(abalone)
        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            instance.setLabel(Instance(0 if int(row[-1]) == 0 else 1))
            instances.append(instance)
    return instances
Example #13
0
def get_test_instances():
    """Read the optdigits test CSV data into a list of instances."""
    instances = []
    with open(TEST_FILE, "r") as f:
        reader = csv.reader(f)
        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            instance.setLabel(Instance(0 if int(row[-1]) == 0 else 1))
            instances.append(instance)
    return instances
def train(oa,
          network,
          oaName,
          train_set,
          test_set,
          measure,
          max_iterations=TRAINING_ITERATIONS):
    """Train a given network on a set of instances.

    :param OptimizationAlgorithm oa:
    :param BackPropagationNetwork network:
    :param str oaName:
    :param list[Instance] instances:
    :param AbstractErrorMeasure measure:
    """

    train_instances = train_set.getInstances()
    test_instances = test_set.getInstances()

    fname = 'out/error/%s.csv' % (oaName)

    with open(fname, 'w') as f:
        # print "\nError results for %s\n---------------------------" % (oaName,)
        for iteration in xrange(max_iterations):
            oa.train()

            train_error = test_error = 0.00

            for train_instance in train_instances:
                network.setInputValues(train_instance.getData())
                network.run()

                output = train_instance.getLabel()
                output_values = network.getOutputValues()
                example = Instance(output_values,
                                   Instance(output_values.get(0)))
                train_error += measure.value(output, example)

            for test_instance in test_instances:
                network.setInputValues(test_instance.getData())
                network.run()

                output = test_instance.getLabel()
                output_values = network.getOutputValues()
                example = Instance(output_values,
                                   Instance(output_values.get(0)))
                test_error += measure.value(output, example)

            train_error_norm = train_error / len(train_instances)
            test_error_norm = test_error / len(test_instances)

            f.write("%d,%0.05f,%0.05f\n" %
                    (iteration, train_error_norm, test_error_norm))

    print('Error written to %s' % (fname))
def train(oa, network, oaName, instances, measure):
    """Train a given network on a set of instances.
        
        :param OptimizationAlgorithm oa:
        :param BackPropagationNetwork network:
        :param str oaName:
        :param list[Instance] instances:
        :param AbstractErrorMeasure measure:
        """
    #print "\nError results for %s\n---------------------------" % (oaName,)

    # training error each iteration
    iterdata = []
    training_time = 0.

    for iteration in TRAINING_ITERATIONS:
        #if oaName == "GA" and iteration >= int(len(TRAINING_ITERATIONS)/5):
        #    continue
        if iteration >= int(len(TRAINING_ITERATIONS) / 5):
            break

        start = time.time()
        oa.train()
        end = time.time()
        training_time += end - start

        if iteration % interval != 0:
            continue

        correct, incorrect = 0, 0
        error = 0.
        for instance in instances[:1000]:
            network.setInputValues(instance.getData())
            network.run()

            actual = instance.getLabel().getContinuous()
            predicted = network.getOutputValues().get(0)
            if abs(predicted - actual) < 0.5:
                correct += 1
            else:
                incorrect += 1

            output = instance.getLabel()
            output_values = network.getOutputValues()
            example = Instance(output_values, Instance(output_values.get(0)))
            error += measure.value(output, example)
            #print "output, example,error",output, example,measure.value(output, example)

        accuracy = 1. * correct / (correct + incorrect)

        iterdata.append([iteration, accuracy, error, training_time])


#         print 'iteration,accuracy,error,training_time',iteration,accuracy,error,training_time
    return iterdata
Example #16
0
def initialize_instances(infile):
    instances = []
    with open(infile, "r") as dat:
        reader = csv.reader(dat)

        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            instance.setLabel(Instance(0 if float(row[-1]) <= 0 else 1))
            instances.append(instance)

    return instances
Example #17
0
def get_error(data, network, measure):
    error = 0.00
    for j, instance in enumerate(data):
        network.setInputValues(instance.getData())
        network.run()

        output = instance.getLabel()
        output_values = network.getOutputValues()
        example = Instance(output_values, Instance(output_values.get(0)))
        error += measure.value(output, example)
    return error
Example #18
0
def maybe_serialize(file, force=False):
    serialized_file = os.path.splitext(file)[0] + '.ser'

    if not os.path.isfile(serialized_file) or force:
        stdout.write("Serializing Data-Set...\n\n")

        with open(file, "r") as pima:
            reader_list = list(csv.reader(pima))

            stdout.write("Some sample, un-shuffled data: \n%s\n\n" % reader_list[:3])

            normalize_data(reader_list)
            random.shuffle(reader_list)

            number_of_instances = len(reader_list)
            train_instances = []
            for row in reader_list[:int(number_of_instances *
                                                TRAIN_TEST_SPLIT_RATIO)]:
                instance = Instance([float(value) for value in row[:-1]])
                instance.setLabel(Instance(0 if float(row[-1]) == -1 else 1))
                train_instances.append(instance)

            test_instances = []
            for row in reader_list[int(number_of_instances *
                                               TRAIN_TEST_SPLIT_RATIO):]:
                instance = Instance([float(value) for value in row[:-1]])
                instance.setLabel(Instance(0 if float(row[-1]) == -1 else 1))
                test_instances.append(instance)

            stdout.write("Some sample, shuffled training data (after "
                         "normalization): "
                         "\n%s\n\n" % train_instances[:3])
            stdout.write("Some sample, shuffled test data (after "
                         "normalization): \n%s\n\n" %
                         test_instances[:3])

            stdout.write("Train Data\tTest Data\n")
            stdout.write("%s\t\t%s\n" % (len(train_instances),
                                       len(test_instances)))

            save = {
                TRAIN: train_instances,
                TEST: test_instances,
            }

            outFile = io.FileOutputStream(serialized_file)
            outStream = io.ObjectOutputStream(outFile)

            outStream.writeObject(save)
            outFile.close()
    else:
        stdout.write("Serialized file for data-set found.\n")

    return serialized_file
Example #19
0
def initialize_instances(filename, i):
    instances = []

    with open(filename, "r") as f:
        reader = csv.reader(f)

        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            instance.setLabel(Instance(int(row[-1])))
            instances.append(instance)

    return instances
Example #20
0
def read_dataset(path, pos_set):
    instances = []
    with open(path, 'r') as stream:
        reader = csv.reader(stream)
        times = 0
        for row in reader:
            if times:
                instance = Instance([float(value) for value in row[:-1]])
                label = 1 if float(row[-1]) in pos_set else 0
                instance.setLabel(Instance(label))
                instances.append(instance)
            times += 1
    return instances
Example #21
0
def initialize_instances(infile):
    """Read the m_trg.csv CSV data into a list of instances."""
    instances = []

    # Read in the CSV file
    with open(infile, "r") as dat:
        reader = csv.reader(dat)

        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            instance.setLabel(Instance(0 if float(row[-1]) < 0 else 1))
            instances.append(instance)
    return instances
Example #22
0
def initialize_instances():
    """Read the data into a list of instances."""
    instances = []

    # Read in the CSV file
    with open(INPUT_FILE, "r") as abalone:
        reader = csv.reader(abalone)

        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            instance.setLabel(Instance(1 if row[-1] == "crisis" else 0))
            instances.append(instance)

    return instances
Example #23
0
def initialize_instances():
    """Read the abalone.txt CSV data into a list of instances."""
    instances = []

    # Read in the abalone.txt CSV file
    with open(INPUT_FILE, "r") as abalone:
        reader = csv.reader(abalone)

        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            instance.setLabel(Instance(0 if float(row[-1]) < 15 else 1))
            instances.append(instance)

    return instances
Example #24
0
def initialize_instances():
    """Read the gamma.txt CSV data into a list of instances."""
    instances = []

    # Read in the gamma.txt CSV file
    with open(INPUT_FILE, "r") as gamma:
        reader = csv.reader(gamma)

        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            instance.setLabel(Instance(0 if float(row[-1]) == 0 else 1))
            instances.append(instance)

    return instances
Example #25
0
def initialize_instances(examples):
    """Read CSV data into a list of instances."""
    instances = []

    # Read in the abalone.txt CSV file
    with open(examples, "r") as gamma:
        reader = csv.reader(gamma)

        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            instance.setLabel(Instance(0 if row[-1] == 'g' else 1))
            instances.append(instance)

    return instances
Example #26
0
def initialize_instances():
    """Read the abalone.txt CSV data into a list of instances."""
    instances = []

    # Read in the abalone.txt CSV file
    with open(INPUT_FILE, "r") as abalone:
        reader = csv.reader(abalone)

        for row in reader:
            instance = Instance([float(value) for value in row[1:-1]])
            instance.setLabel(Instance(0 if float(row[-1]) == 2 else 1))  # 2 is benign, 4 malignant
            instances.append(instance)

    return instances
Example #27
0
def initialize_instances(test=False):
    """Read the abalone.txt CSV data into a list of instances."""
    instances = []

    if test:
        INPUT_FILE = TEST_FILE
    else:
        INPUT_FILE = TRAIN_FILE
    with open(INPUT_FILE, "r") as chess:
        reader = csv.reader(chess)

        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            if do_chess:
                instance.setLabel(Instance(float(row[-1])))
            if do_fmnist:
                classes = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                index = int(row[-1])
                # print "Value is: ", row[-1], " index is: ", index
                classes[index] = 1.0
                temp = Instance(classes)
                # print "Size is: ", temp.size()
                instance.setLabel(temp)
            instances.append(instance)
    return instances
def error_on_data_set(network, ds, measure, ugh=False):
    N = len(ds)
    error = 0.
    correct = 0
    incorrect = 0
    actuals = []
    predicteds = []
    # ugh=True
    for instance in ds:
        network.setInputValues(instance.getData())
        # print instance
        # print instance.getData()
        # print instance.getLabel().getContinuous()
        network.run()
        actual = instance.getLabel().getContinuous()
        predicted = network.getOutputValues().get(0)
        # print "Predicted 1", network.getOutputValues()
        predicted = max(min(predicted, 1), 0)
        if ugh:
            print "label: {}".format(instance.getLabel())
            print "actual: {}, predicted: {}".format(actual, predicted)

        predicteds.append(round(predicted))
        actuals.append(max(min(actual, 1), 0))
        if abs(predicted - actual) < 0.5:
            correct += 1
            if ugh:
                print "CORRECT"
        else:
            incorrect += 1
            if ugh:
                print "INCORRECT"
        output = instance.getLabel()
        output_values = network.getOutputValues()
        example = Instance(output_values, Instance(output_values.get(0)))
        error += measure.value(output, example)
        if ugh:
            print "error: {}".format(measure.value(output, example))

    MSE = error / float(N)
    acc = correct / float(correct + incorrect)
    precision, recall, f1 = f1_score(actuals, predicteds)
    if ugh:
        print "MSE: {}, acc: {}, f1: {} (precision: {}, recall: {})".format(
            MSE, acc, f1, precision, recall)
        import sys
        sys.exit(0)

    return MSE, acc, f1
Example #29
0
def initialize_instances():
    """Read the abalone.txt CSV data into a list of instances."""
    instances = []

    # Read in the abalone.txt CSV file
    with open(INPUT_FILE, "r") as abalone:
        reader = csv.reader(abalone)
        for row in reader:
            INPUT_LAYER = len(row) - 1
            instance = Instance([float(value) for value in row[:-1]])
            instance.setLabel(Instance(0 if float(row[-1]) < 0.5 else 1))
            instances.append(instance)
            #print instances

    return instances
Example #30
0
def initialize_instances(infile):
    """Read the given CSV data into a list of instances."""
    instances = []

    # Read in the CSV file
    with open(infile, "r") as dat:
        reader = csv.reader(dat)

        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            # TODO: Set to <= 0 to handle 0/1 labels and not just -1/1?
            instance.setLabel(Instance(0 if float(row[-1]) < 0.001 else 1))
            instances.append(instance)

    return instances
Example #31
0
def initialize_instances(file_path):
    """Read the abalone.txt CSV data into a list of instances."""
    instances = []
    
    # Read in the adult_train.txt CSV file
    with open(file_path, "r") as adult:
        reader = csv.reader(adult)
           
        for row in reader:
            instance = Instance([float(value) for value in row[:-1]])
            # my data was already preprocessed, so this basically does nothing but appends my data to instances
            instance.setLabel(Instance(0 if float(row[-1]) < 1 else 1))
            instances.append(instance)
                       
    return instances
    def read_data_files(self):
        """
        Read a scikit data set. The entire file should be numbers, so I'm not bothering with CSV or anything fancy.
        Just numbers separated by spaces.
        :return: None
        """

        # Set the data file names
        data_file = self.dataset_name + ".data"
        target_file = self.dataset_name + ".target"

        # Check that the files exist
        if not os.path.isfile(data_file):
            raise Exception("Data file '" + data_file + "' not found")
        if not os.path.isfile(data_file):
            raise Exception("Target file '" + target_file + "' not found")

        # Read the lines of the data and target files
        if self.verbose:
            print("Loading data")
        d_in = open(data_file, 'r')
        data_lines = d_in.readlines()
        d_in.close()

        t_in = open(target_file, 'r')
        target_lines = t_in.readlines()
        t_in.close()

        # A quick check that there is a one-to-one correspondence between data and target lines
        self.n_samples = len(data_lines)
        if not self.n_samples == len(target_lines):
            raise Exception("Data and Target lengths are not the same.")

        # Interpret each data and target line pair
        if self.verbose:
            print("Interpreting data")
        self.samples = []
        self.n_features = len(data_lines[0].split())
        self.n_targets = len(target_lines[0].split())
        for ss in xrange(self.n_samples):

            data = []
            ds = data_lines[ss].split()
            n_inputs = len(ds)
            for val in ds:
                data.append(float(val))

            target = []
            ds = target_lines[ss].split()
            n_outputs = len(ds)
            for val in ds:
                target.append(float(val))
            inst = Instance(data)
            inst.setLabel(Instance(target))

            # Do some checking before we append this
            if not self.n_features == n_inputs:
                raise Exception("Line " + str(ss) + ": Number of data points does not match previous lines")
            if not self.n_targets == n_outputs:
                raise Exception("Line " + str(ss) + ": Number of targets does not match previous lines")

            # Append this data pattern
            self.samples.append(inst)

        # What type of classification is this?
        if self.n_targets == 1:
            self.out_type = "binary"
        else:
            self.out_type = "multiclass"