def initialize_instances(infile): """Read the given CSV data into a list of instances.""" instances = [] # Read in the CSV file with open(infile, "r") as dat: reader = csv.reader(dat) for row in reader: instance = Instance([float(value) for value in row[:-1]]) # TODO: Set to <= 0 to handle 0/1 labels and not just -1/1? # assumes labels are integers from 0 to NUM_OF_CLASSES-1 label = int(float(row[-1])) classes = [0] * NUM_OF_CLASSES classes[label] = 1 instance.setLabel(Instance(classes)) instances.append(instance) return instances
def initialize_instances(test=False): """Read the abalone.txt CSV data into a list of instances.""" instances = [] if test: INPUT_FILE = TEST_FILE else: INPUT_FILE = TRAIN_FILE with open(INPUT_FILE, "r") as chess: reader = csv.reader(chess) for row in reader: instance = Instance([float(value) for value in row[:-1]]) if do_chess: instance.setLabel(Instance(float(row[-1]))) if do_fmnist: classes = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] index = int(row[-1]) # print "Value is: ", row[-1], " index is: ", index classes[index] = 1.0 temp = Instance(classes) # print "Size is: ", temp.size() instance.setLabel(temp) instances.append(instance) return instances
def initialize_instances(filename): """Read the abalone.txt CSV data into a list of instances.""" print('Creating instances') instances = [] inFile = open(filename, 'rb') reader = csv.reader(inFile, delimiter=',') skipHeader = True for row in reader: if skipHeader: skipHeader = False continue # print(len(row[1:-1])) # print(len([float(value) for value in row[1:-1]])) instance = Instance([float(value) for value in row[1:-1] ]) #ignore the index and the label instance.setLabel( Instance(0 if float(row[-1]) < 15 else 1)) #set the label instances.append(instance) inFile.close() print('Finished instances') return instances
def initialize_instances(): """Read the abalone.txt CSV data into a list of instances.""" instances = [] with open(TRAIN_FILE, "r") as abalone: reader = csv.reader(abalone) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if int(row[-1]) == 0 else 1)) instances.append(instance) return instances
def get_test_instances(): """Read the optdigits test CSV data into a list of instances.""" instances = [] with open(TEST_FILE, "r") as f: reader = csv.reader(f) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if int(row[-1]) == 0 else 1)) instances.append(instance) return instances
def maybe_serialize(file, force=False): serialized_file = os.path.splitext(file)[0] + '.ser' if not os.path.isfile(serialized_file) or force: stdout.write("Serializing Data-Set...\n\n") with open(file, "r") as pima: reader_list = list(csv.reader(pima)) stdout.write("Some sample, un-shuffled data: \n%s\n\n" % reader_list[:3]) normalize_data(reader_list) random.shuffle(reader_list) number_of_instances = len(reader_list) train_instances = [] for row in reader_list[:int(number_of_instances * TRAIN_TEST_SPLIT_RATIO)]: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) == -1 else 1)) train_instances.append(instance) test_instances = [] for row in reader_list[int(number_of_instances * TRAIN_TEST_SPLIT_RATIO):]: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) == -1 else 1)) test_instances.append(instance) stdout.write("Some sample, shuffled training data (after " "normalization): " "\n%s\n\n" % train_instances[:3]) stdout.write("Some sample, shuffled test data (after " "normalization): \n%s\n\n" % test_instances[:3]) stdout.write("Train Data\tTest Data\n") stdout.write("%s\t\t%s\n" % (len(train_instances), len(test_instances))) save = { TRAIN: train_instances, TEST: test_instances, } outFile = io.FileOutputStream(serialized_file) outStream = io.ObjectOutputStream(outFile) outStream.writeObject(save) outFile.close() else: stdout.write("Serialized file for data-set found.\n") return serialized_file
def initialize_instances(infile): instances = [] with open(infile, "r") as dat: reader = csv.reader(dat) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) <= 0 else 1)) instances.append(instance) return instances
def initialize_instances(filename, i): instances = [] with open(filename, "r") as f: reader = csv.reader(f) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(int(row[-1]))) instances.append(instance) return instances
def read_dataset(path, pos_set): instances = [] with open(path, 'r') as stream: reader = csv.reader(stream) times = 0 for row in reader: if times: instance = Instance([float(value) for value in row[:-1]]) label = 1 if float(row[-1]) in pos_set else 0 instance.setLabel(Instance(label)) instances.append(instance) times += 1 return instances
def initialize_instances(infile): """Read the m_trg.csv CSV data into a list of instances.""" instances = [] # Read in the CSV file with open(infile, "r") as dat: reader = csv.reader(dat) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) < 0 else 1)) instances.append(instance) return instances
def initialize_instances(): """Read the abalone.txt CSV data into a list of instances.""" instances = [] # Read in the abalone.txt CSV file with open(INPUT_FILE, "r") as abalone: reader = csv.reader(abalone) for row in reader: instance = Instance([float(value) for value in row[1:-1]]) instance.setLabel(Instance(0 if float(row[-1]) == 2 else 1)) # 2 is benign, 4 malignant instances.append(instance) return instances
def initialize_instances(): """Read the gamma.txt CSV data into a list of instances.""" instances = [] # Read in the gamma.txt CSV file with open(INPUT_FILE, "r") as gamma: reader = csv.reader(gamma) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) == 0 else 1)) instances.append(instance) return instances
def initialize_instances(): """Read the data into a list of instances.""" instances = [] # Read in the CSV file with open(INPUT_FILE, "r") as abalone: reader = csv.reader(abalone) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(1 if row[-1] == "crisis" else 0)) instances.append(instance) return instances
def initialize_instances(): """Read the abalone.txt CSV data into a list of instances.""" instances = [] # Read in the abalone.txt CSV file with open(INPUT_FILE, "r") as abalone: reader = csv.reader(abalone) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) < 15 else 1)) instances.append(instance) return instances
def initialize_instances(examples): """Read CSV data into a list of instances.""" instances = [] # Read in the abalone.txt CSV file with open(examples, "r") as gamma: reader = csv.reader(gamma) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if row[-1] == 'g' else 1)) instances.append(instance) return instances
def initialize_instances(file_path): """Read the abalone.txt CSV data into a list of instances.""" instances = [] # Read in the adult_train.txt CSV file with open(file_path, "r") as adult: reader = csv.reader(adult) for row in reader: instance = Instance([float(value) for value in row[:-1]]) # my data was already preprocessed, so this basically does nothing but appends my data to instances instance.setLabel(Instance(0 if float(row[-1]) < 1 else 1)) instances.append(instance) return instances
def initialize_instances(infile): """Read the given CSV data into a list of instances.""" instances = [] # Read in the CSV file with open(infile, "r") as dat: reader = csv.reader(dat) for row in reader: instance = Instance([float(value) for value in row[:-1]]) # TODO: Set to <= 0 to handle 0/1 labels and not just -1/1? instance.setLabel(Instance(0 if float(row[-1]) < 0.001 else 1)) instances.append(instance) return instances
def initialize_test_instances(): """Read the CSV data into a list of instances.""" instances = [] # Read in the CSV file with open(INPUT_FILE_TEST, "r") as wine_test_data: reader = csv.reader(wine_test_data) next(reader) # skip header for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) <= 0 else 1)) instances.append(instance) return instances
def initialize_instances(): """Read the abalone.txt CSV data into a list of instances.""" instances = [] # Read in the abalone.txt CSV file with open(INPUT_FILE, "r") as abalone: reader = csv.reader(abalone) for row in reader: INPUT_LAYER = len(row) - 1 instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) < 0.5 else 1)) instances.append(instance) #print instances return instances
def initialize_instances(): """Read the abalone.txt CSV data into a list of instances.""" instances = [] INPUT_FILE = os.path.join("..", "..", "project1", "data", "eyes.csv") # Read in the abalone.txt CSV file with open(INPUT_FILE, "r") as abalone: reader = csv.reader(abalone) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) < 1 else 1)) instances.append(instance) return instances
def initialize_instances(infile): """Read the given CSV data into a list of instances.""" instances = [] # Read in the CSV file with open(infile, "r") as dat: reader = csv.reader(dat) for row in reader: instance = Instance([float(value) for value in row[:-4]]) # TODO: Set to <= 0 to handle 0/1 labels and not just -1/1? labelBinary = [0 if float(label) < 0 else 1 for label in row[-4:]] # instance.setLabel(Instance([labelBinary.index(1)])) instance.setLabel(Instance(labelBinary)) instances.append(instance) return instances
def initialize_instances(file): """Read the creditcards.train.csv CSV data into a list of instances.""" instances = [] with open(file, "r") as creditcards: reader = csv.reader(creditcards) header = True for row in reader: if header: header = False else: instance = Instance([float(value) for value in row[1:-1]]) instance.setLabel(Instance(0 if float(row[-1]) < 1 else 1)) instances.append(instance) return instances
def initialize_instances(infile): """Read the m_trg.csv CSV data into a list of instances.""" instances = [] # Read in the CSV file dat = open(infile, "r") reader = csv.reader(dat) for row in reader: instance = Instance([float(value) for value in row[:-1]]) #instance.setLabel(Instance(int(row[-1]))) #0 if float(row[-1]) <= 0 else 1 instance.setLabel(Instance( 0 if row[-1] == 'no' else 1)) instances.append(instance) dat.close() return instances
def initialize_instances(): """Read the abalone.txt CSV data into a list of instances.""" instances = [] # Read in the abalone.txt CSV file with open(INPUT_FILE_TESTING, "r") as abalone: reader = csv.reader(abalone) count = 0 for row in reader: if count == 0: count += 1 continue instance = Instance([float(value) for value in row[1:]]) instance.setLabel(Instance(int(row[0]))) instances.append(instance) return instances
def initialize_instances(infile): """Read the given CSV data into a list of instances.""" instances = [] # Read in the CSV file with open(infile, "r") as dat: reader = csv.reader(dat) first=False for row in reader: if first==False: first=True continue instance = Instance([float(value) for value in row[1:]]) # TODO: Set to <= 0 to handle 0/1 labels and not just -1/1? instance.setLabel(Instance(float(row[0]))) instances.append(instance) return instances
def initialize_instances(): """Read the abalone.txt CSV data into a list of instances.""" instances = [] # Read in the abalone.txt CSV file with open(INPUT_FILE, "r") as f: # skip the first row has_header = csv.Sniffer().has_header(f.read(1024)) f.seek(0) # Rewind. reader = csv.reader(f) if has_header: next(reader) # Skip header row. for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) == 2 else 1)) instances.append(instance) return instances
def initialize_instances(): """Read the abalone.txt CSV data into a list of instances.""" instances = [] # Read in the abalone.txt CSV file with open("datasets/cache/train_basketball.csv", "r") as abalone: reader = csv.reader(abalone) # skip first line count = 0 for row in reader: if count == 0: count += 1 continue instance = Instance([float(value) for value in row[1:]]) instance.setLabel(Instance(int(row[0]))) instances.append(instance) return instances
def initialize_instances(input_file): """Read a dataset into a list of instances compatible the ABAGAIL NN. Assumes that the class labels are 0 or 1. Args: input_file (str): Input file with classes and attributes. Returns: instances (list): List of instances (attribute/class value pairs) """ instances = [] # read in the input file with open(input_file, "r") as dataset: reader = csv.reader(dataset) for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(0 if float(row[-1]) <= 0 else 1)) instances.append(instance) return instances
def initialize_instances(fn): """Read the X and Y CSV data into a list of instances.""" instances = [] labelFn = fn.replace('X.csv', 'Y.csv') print labelFn # Read in the adult.txt CSV file with open(fn, "r") as adult: with open(labelFn, "r") as adulty: reader = csv.reader(adult) reader_label = csv.reader(adulty) skip = True for row, rowY in zip(reader, reader_label): if not skip: instance = Instance([float(value) for value in row]) instance.setLabel(Instance(float(rowY[0]))) instances.append(instance) else: skip = False return instances
def initialize_instances(): """Read the abalone.txt CSV data into a list of instances.""" instances = [] # df = pd.read_csv('winequality-red.csv') # labels = df.pop('quality').values # min_max_scaler = preprocessing.MinMaxScaler() # x_scaled = min_max_scaler.fit_transform(df) # df = pd.DataFrame(x_scaled, columns=list(df)) # df = pd.concat([df, labels], axis = 1) # Read in the abalone.txt CSV file with open(INPUT_FILE, "r") as abalone: reader = list(csv.reader(abalone)) import random import math random.shuffle(reader) for ind in xrange(len(reader[0]) - 1): vals = [float(row[ind]) for row in reader] mean = sum(vals) / len(vals) variance = sum([val**2 for val in vals]) / float(len(vals)) - mean**2 for row in xrange(len(reader)): reader[row][ind] = math.ceil( (vals[row] - mean) / (variance**0.5) * 1000) / 1000 for row in reader: instance = Instance([float(value) for value in row[:-1]]) instance.setLabel(Instance(float(row[-1]))) instance.setLabel(Instance(0 if float(row[-1]) == 0.0 else 1)) instances.append(instance) trainingInstances = instances[:int(len(instances) * 0.7)] testingInstances = instances[int(len(instances) * 0.7):] print "Number of training instances: " + str(len(trainingInstances)) print "Number of testing instances: " + str(len(testingInstances)) return trainingInstances, testingInstances
def initialize_instances(): x_train = [] with open(x_train_file, 'r') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader: x_train.append(row) x_test = [] with open(x_test_file, 'r') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader: x_test.append(row) y_train = [] with open(y_train_file, 'r') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader: y_train.append(row) y_test = [] with open(y_test_file, 'r') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader: y_test.append(row) trainingInstances = [] testingInstances = [] for i in range(len(x_train)): instance = Instance([float(value) for value in x_train[i]]) instance.setLabel(Instance(float(y_train[i][0]))) trainingInstances.append(instance) for i in range(len(x_test)): instance = Instance([float(value) for value in x_test[i]]) instance.setLabel(Instance(float(y_test[i][0]))) testingInstances.append(instance) return trainingInstances, testingInstances
def read_data_files(self): """ Read a scikit data set. The entire file should be numbers, so I'm not bothering with CSV or anything fancy. Just numbers separated by spaces. :return: None """ # Set the data file names data_file = self.dataset_name + ".data" target_file = self.dataset_name + ".target" # Check that the files exist if not os.path.isfile(data_file): raise Exception("Data file '" + data_file + "' not found") if not os.path.isfile(data_file): raise Exception("Target file '" + target_file + "' not found") # Read the lines of the data and target files if self.verbose: print("Loading data") d_in = open(data_file, 'r') data_lines = d_in.readlines() d_in.close() t_in = open(target_file, 'r') target_lines = t_in.readlines() t_in.close() # A quick check that there is a one-to-one correspondence between data and target lines self.n_samples = len(data_lines) if not self.n_samples == len(target_lines): raise Exception("Data and Target lengths are not the same.") # Interpret each data and target line pair if self.verbose: print("Interpreting data") self.samples = [] self.n_features = len(data_lines[0].split()) self.n_targets = len(target_lines[0].split()) for ss in xrange(self.n_samples): data = [] ds = data_lines[ss].split() n_inputs = len(ds) for val in ds: data.append(float(val)) target = [] ds = target_lines[ss].split() n_outputs = len(ds) for val in ds: target.append(float(val)) inst = Instance(data) inst.setLabel(Instance(target)) # Do some checking before we append this if not self.n_features == n_inputs: raise Exception("Line " + str(ss) + ": Number of data points does not match previous lines") if not self.n_targets == n_outputs: raise Exception("Line " + str(ss) + ": Number of targets does not match previous lines") # Append this data pattern self.samples.append(inst) # What type of classification is this? if self.n_targets == 1: self.out_type = "binary" else: self.out_type = "multiclass"