コード例 #1
0
ファイル: getData.py プロジェクト: odinbp/NeuralNet
def getTextFileData(x, caseFraction, noClases):
    no_of_lines = 0
    with open('./Data sets/' + x, newline='') as inputfile:
        no_of_lines = sum(1 for _ in inputfile)
    iterations = int(no_of_lines * caseFraction)
    with open('./Data sets/' + x, newline='') as inputfile:
        if x == 'winequality_red.txt':
            for row in csv.reader(inputfile, delimiter=';'):
                if iterations == 0:
                    break
                iterations -= 1
                feature_data = row[:-1]
                feature_data = list(map(float, feature_data))
                feature_class_value = int(row[-1])
                feature_class_value = feature_class_value - 3
                feature_class = TFT.int_to_one_hot(feature_class_value,
                                                   noClases)
                features.append(feature_data)
                classes.append(feature_class)
        else:
            for row in csv.reader(inputfile):
                if iterations == 0:
                    break
                iterations -= 1
                feature_data = row[:-1]
                feature_data = list(map(float, feature_data))
                feature_class_value = int(row[-1])
                feature_class = TFT.int_to_one_hot(feature_class_value,
                                                   noClases)
                features.append(feature_data)
                classes.append(feature_class)
    scaled = preprocessing.scale(features)
    for i in range(len(scaled)):
        dataStructured.append([scaled[i], classes[i]])
    return dataStructured
コード例 #2
0
def read_file(filename):
    cases = []
    file_obj = open(filename, 'r')
    if filename.split('/')[1] == "wine.txt":
        for line in file_obj.readlines():
            line_vec = line.split(';')
            input_vec = line_vec[:11]
            label = int(line_vec[-1]) - 1
            cases.append(
                [list(map(float, input_vec)),
                 tft.int_to_one_hot(label, 8)])
    if filename.split('/')[1] == "yeast.txt":
        for line in file_obj.readlines():
            line_vec = line.split(',')
            input_vec = line_vec[:8]
            label = int(line_vec[-1]) - 1
            cases.append(
                [list(map(float, input_vec)),
                 tft.int_to_one_hot(label, 10)])
    if filename.split('/')[1] == "glass.txt":
        for line in file_obj.readlines():
            line_vec = line.split(',')
            input_vec = line_vec[:9]
            label = int(line_vec[-1]) - 1
            if label > 4:
                label -= 1
            cases.append(
                [list(map(float, input_vec)),
                 tft.int_to_one_hot(label, 6)])
    return cases
コード例 #3
0
ファイル: interface.py プロジェクト: jolohan/DLmodule3
def mnist(parameters, loss_function):
    dataset = 0
    digits = []
    for p in parameters:
        if (p == "testing"):
            dataset = 1
        if (p.isdigit()):
            digits.append(int(p))
    output_size = 10
    if (len(digits) == 0):
        images, labels = mb.load_mnist(
            dataset=("training" if not dataset else "testing"))

    # If specified which images to get: (i.e. [1, 4, 6])
    else:
        images, labels = mb.load_mnist(
            dataset=("training" if not dataset else "testing"), digits=digits)

    # Creating [input, output] - cases, with normalized, flattened images, and int label vectors as output (sparse need integers, not vectors):
    cases = [[
        mb.flatten_image(i) / la.norm(i),
        TFT.int_to_one_hot(int(l[0]), output_size)
    ] for (i, l) in zip(images, labels)]
    print("Total cases collected: ", len(cases))
    return cases
コード例 #4
0
ファイル: ffdfdf2.py プロジェクト: arntgm/AImod3
 def mnist(self):
     data_set = MNIST.load_mnist()
     flat_set = MNIST.gen_flat_cases(cases = data_set)
     return_set = []
     for i in range(len(flat_set[0])):
         return_set.append([flat_set[0][i], TFT.int_to_one_hot(flat_set[1][i], 10)])
     return return_set
コード例 #5
0
ファイル: interface.py プロジェクト: jolohan/DLmodule3
def dataset_loader(filename, loss_function):
    print(filename)
    with open(filename, "r") as file:
        feature_vectors = []
        labels = []
        splitter = ";"
        for line in file:
            if (len(line) > 0):
                if (splitter not in line):
                    splitter = ","
                split_string = line.split(splitter)
                labels.append(int(split_string[-1]))
                feature_vectors.append(
                    [float(i) for i in split_string[:len(split_string) - 1]])
    print("Nof features: ", len(feature_vectors[0]))
    print("Nof examples: ", len(feature_vectors))
    print(max(labels), min(labels))

    # Making one-hot-labels:
    normalized_labels = normalize_labels(labels)
    one_hot_labels = [
        TFT.int_to_one_hot(l,
                           max(normalized_labels) + 1)
        for l in normalized_labels
    ]
    # Normalizing features in the space [0, 1]:
    normalized_feature_vectors = normalize_features(feature_vectors)

    # Creating the case-set:
    cases = [[f, l]
             for (f, l) in zip(normalized_feature_vectors, one_hot_labels)]
    return cases
コード例 #6
0
def read_file(filename):
    cases = []
    file_obj = open(filename, 'r')
    for line in file_obj.readlines():
        line_vec = line.split(',')
        input_vec = line_vec[:4]
        label = flower_to_int(str(line_vec[-1]).rstrip())
        cases.append([list(map(float, input_vec)), tft.int_to_one_hot(label, 3)])
    return cases
コード例 #7
0
    def read_glass(self, text_file="dataset/glass.txt"):
        file_object = open(text_file, "r")
        k = np.genfromtxt(file_object, delimiter=",")
        x = k[:, :9]
        x = self.normalize(x).tolist()
        y = k[:, 9:].tolist()

        for i in range(len(x)):
            self.cases.append([x[i], TFT.int_to_one_hot(int(y[i][0]) - 1, 7)])
コード例 #8
0
    def read_wine(self, text_file="dataset/winequality_red.txt"):
        file_object = open(text_file, "r")
        k = np.genfromtxt(file_object, delimiter=";")
        x = k[:, :11]
        x = self.normalize(x).tolist()
        y = k[:, 11:].tolist()

        for i in range(len(x)):
            self.cases.append([x[i], TFT.int_to_one_hot(int(y[i][0]) - 3, 6)])
コード例 #9
0
ファイル: gannetwork.py プロジェクト: hringdal/AI_project01
def load_glass_dataset():
    data = np.loadtxt('data/glass.txt', delimiter=',')
    # targets between 1 and 7, no examples of class 4
    # reducing class labels above 4 by one, to use existing onehot-function
    for i in range(len(data)):
        if data[i][-1] >= 5:
            data[i][-1] -= 1

    return [[x[:9], TFT.int_to_one_hot(int(x[9]) - 1, 6)] for x in data]
コード例 #10
0
 def generate_cases(self):
     self.cases = _generator[self.casefunc["data source"]](*(
         self.casefunc["parameters"]
     ))  # Run the case generator.  Case = [input-vector, target-vector]
     if (self.casefunc["data source"] == "symmetry"):
         for i in range(len(self.cases)):
             self.cases[i] = [
                 self.cases[i][:-1],
                 TFT.int_to_one_hot(int(self.cases[i][-1:][0]), 2)
             ]
コード例 #11
0
ファイル: datasets.py プロジェクト: Pontius1007/IT3105
def load_flat_text_cases(filename, cfraction, ):
    f = open(filename, "r")
    lines = [line.split(" ") for line in f.read().split("\n")]
    f.close()
    len_lines = float(len(lines))
    fraction = int(np.ceil(cfraction * len_lines))
    new_lines = lines[:fraction]
    x_l = list(map(int, new_lines[0]))[:(fraction - 1)]  # target
    x_t = [list(map(int, line)) for line in new_lines[1:]]  # input
    np.array(x_t)
    x_t = np.divide(x_t, 255)
    x_l = [TFT.int_to_one_hot(i, 10) for i in x_l]
    return [list(i) for i in zip(x_t, x_l)]
コード例 #12
0
ファイル: mnist_basics.py プロジェクト: nordbyandreas/openAA
def load_flat_text_cases(
    filename,
    dir=__mnist_path__,
):
    f = open(dir + filename, "r")
    lines = [line.split(" ") for line in f.read().split("\n")]
    f.close()
    x_l = [TFT.int_to_one_hot(int(fv), 10) for fv in lines[0]]
    x_t = numpy.array([lines[i] for i in range(1, len(lines))]).astype(int)
    x_t = x_t / 255
    #x_t = normalize_inputs(x_t.astype(int))

    print(len(x_t))

    return [[l, t] for l, t in zip(x_t, x_l)]
コード例 #13
0
ファイル: gannetwork.py プロジェクト: hringdal/AI_project01
def load_mnist(fraction=0.1):
    mnist = tf.keras.datasets.mnist.load_data(path='mnist.npz')
    data_length = len(mnist[0][1])

    reduced_indices = np.random.choice([i for i in range(data_length)],
                                       int(fraction * data_length),
                                       replace=False)

    data = mnist[0][0][reduced_indices]
    targets = mnist[0][1][reduced_indices]
    data = [i.flatten() for i in data]

    output = [[data[i], TFT.int_to_one_hot(targets[i], 10)]
              for i in range(len(targets))]
    return output
コード例 #14
0
 def readMineFile(self, filename):
     lines = [line.rstrip('\n') for line in open(self.path + filename)]
     onehots = ["R", "M"]
     cases = []
     for line in lines:
         case = []
         inp = []
         vals = line.split(",")
         target = onehots.index(vals.pop())
         target = TFT.int_to_one_hot(target, 2, floats=True)
         for val in vals:
             inp.append(float(val))
         case.append(inp)
         case.append(target)
         cases.append(case)
コード例 #15
0
 def readfile(self, filename, numClasses, custom_buckets, normalize=False):
     lines = [line.rstrip('\n') for line in open(self.path + filename)]
     cases = []
     for line in lines:
         case = []
         inp = []
         line = line.replace(";", ",")
         vals = line.split(",")
         if custom_buckets is not None:
             target = custom_buckets.index(int(vals.pop()))
         else:
             target = int(vals.pop())
         target = TFT.int_to_one_hot(target, numClasses, floats=True)
         for val in vals:
             inp.append(float(val))
         case.append(inp)
         case.append(target)
         cases.append(case)
コード例 #16
0
ファイル: datasets.py プロジェクト: Pontius1007/IT3105
def load_generic_file(filename, cfraction, hot_length):
    with open(filename, 'r') as infile:
        output_list = []
        lines = infile.readlines()
        fraction = int(np.ceil(cfraction * len(lines)))
        for line in lines:
            line_output = []
            split_line = line.replace(';', ',')
            split_line = split_line.strip().split(',')
            input_vector = [float(i) for i in split_line[:-1]]
            target_vector = int(split_line[-1])
            hot_target = TFT.int_to_one_hot(target_vector, hot_length)
            line_output.append(input_vector)
            line_output.append(hot_target)
            output_list.append(line_output)
        # have to shuffle to get whole range
        shuffle(output_list)
        return output_list[:fraction]
コード例 #17
0
 def readDOTAfile(self, filename, onehot=False):
     lines = [line.rstrip('\n') for line in open(self.path + filename)]
     cases = []
     b = [-1.0, 1.0]
     for line in lines:
         vals = line.split(",")
         inp = []
         case = []
         if onehot:
             target = b.index(((float(vals.pop(0)))))
             target = TFT.int_to_one_hot(target, len(b), floats=True)
         else:
             target = float(vals.pop(0))
         for val in vals:
             inp.append(float(val))
         case.append(inp)
         case.append(target)
         cases.append(case)
     return cases
コード例 #18
0
 def read_mnist(self):
     cases, label = mnists.load_all_flat_cases()
     cases = np.array(cases) / 255
     for i in range(int(len(cases))):
         self.cases.append(
             [cases[i], TFT.int_to_one_hot(int(label[i]), 10)])
コード例 #19
0
ファイル: getData.py プロジェクト: odinbp/NeuralNet
def make_one_hot(labels, noClases):
    ohl = []
    for label in labels:
        ohl.append(TFT.int_to_one_hot(label, noClases))
    return ohl
コード例 #20
0
    def source(self):
        def normalize(cases):
            input = [c[0] for c in cases]
            target = [c[1] for c in cases]
            input = numpy.array(input)
            min_arr = numpy.min(input, axis=0)
            max_arr = numpy.max(input, axis=0)
            for element in input:
                for i, e in enumerate(element):
                    element[i] = (e - min_arr[i]) / (max_arr[i] - min_arr[i])
            return list(zip(input, target))

        def to_float(inp):
            # returns 0 if input is ? (questionmark)
            return 0 if inp == '?' else float(inp)

        self.source_is_called = True
        print("source:", self.args.source)
        data_set = []
        if self.args.source[-4:] == ".txt":
            with open("data_set_files/" + self.args.source) as file:
                data = list(
                    map(lambda x: re.split("[;,]", x), file.readlines()))
                data = list(map(lambda x: list(map(to_float, x)), data))
            max_d = max(map(lambda x: int(x[-1]), data))
            for element in data:
                input = element[:-1]
                target = TFT.int_to_one_hot(int(element[-1]) - 1, max_d)
                data_set.append([input, target])
        elif self.args.source == "parity":
            if self.args.sourceinit is None:
                data_set = TFT.gen_all_parity_cases(10)
            else:
                data_set = TFT.gen_all_parity_cases(self.args.sourceinit[0])
        elif self.args.source == "symmetry":
            if self.args.sourceinit is None:
                vecs = TFT.gen_symvect_dataset(101, 2000)
            else:
                vecs = TFT.gen_symvect_dataset(self.args.sourceinit[0],
                                               self.args.sourceinit[1])
            inputs = list(map(lambda x: x[:-1], vecs))
            targets = list(map(lambda x: TFT.int_to_one_hot(x[-1], 2), vecs))
            data_set = list(zip(inputs, targets))
        elif self.args.source == "auto_onehot":
            if self.args.sourceinit is None:
                data_set = TFT.gen_all_one_hot_cases(64)
            else:
                data_set = TFT.gen_all_one_hot_cases(self.args.sourceinit[0])
        elif self.args.source == "auto_dense":
            if self.args.sourceinit is None:
                data_set = TFT.gen_dense_autoencoder_cases(2000, 100)
            else:
                data_set = TFT.gen_dense_autoencoder_cases(
                    self.args.sourceinit[0], self.args.sourceinit[1])
        elif self.args.source == "bitcounter":
            if self.args.sourceinit is None:
                data_set = TFT.gen_vector_count_cases(500, 15)
            else:
                data_set = TFT.gen_vector_count_cases(self.args.sourceinit[0],
                                                      self.args.sourceinit[1])
        elif self.args.source == "segmentcounter":
            if self.args.sourceinit is None:
                data_set = TFT.gen_segmented_vector_cases(25, 1000, 0, 8)
            else:
                data_set = TFT.gen_segmented_vector_cases(self.args.sourceinit[0], \
                            self.args.sourceinit[1], self.args.sourceinit[2], self.args.sourceinit[3])
        elif self.args.source == "mnist":
            # mnist_basics.load_all_flat_cases(type='testing')
            cases = mnist_basics.load_all_flat_cases(type='training')
            input = cases[0]
            target = cases[1]
            input = list(map(lambda x: list(map(lambda e: e / 255, x)), input))
            target = list(map(lambda x: TFT.int_to_one_hot(x, 10), target))
            data_set = list(zip(input, target))

        if data_set == []:
            print(self.args.source, " is illegal for argument --source")
            print("Legal values are: <filenme>.txt, parity, symmetry, \
                        auto_onehot, auto_dense, bitcounter, segmentcounter",
                  sep="")
            quit()
        if self.args.source[-4:] == ".txt":
            data_set = normalize(data_set)
        return data_set
コード例 #21
0
def labels_to_one_hot(labels):
    one_hot = []
    for label in labels:
        for number in label:
            one_hot.append(tools.int_to_one_hot(number, 10))
    return one_hot
コード例 #22
0
ファイル: gannetwork.py プロジェクト: hringdal/AI_project01
def load_wine_dataset():
    data = np.loadtxt('data/winequality_red.txt', delimiter=';')
    # targets are between 3 and 8. Offset left by three to use onehot-encoding
    return [[x[:11], TFT.int_to_one_hot(int(x[11]) - 3, 6)] for x in data]
コード例 #23
0
ファイル: gannetwork.py プロジェクト: hringdal/AI_project01
def load_yeast_dataset():
    data = np.loadtxt('data/yeast.txt', delimiter=',')
    # targets between 1 and 10
    return [[x[:8], TFT.int_to_one_hot(int(x[8]) - 1, 10)] for x in data]