예제 #1
0
파일: perceptron.py 프로젝트: Jelvani/CS440
def train_digits(PERCENT=1, EPOCHS=1):
    digits = read_data.read_file(fdata='digitdata/trainingimages',
                                 flabel='digitdata/traininglabels',
                                 WIDTH=28,
                                 HEIGHT=28,
                                 type='digits')
    num_data = len(digits[0])  #amount of training data
    neurons = []
    hits = 0
    for x in range(10):  #create 10 neuron classes
        neurons.append(neuron(size=28 * 28))

    for epochs in range(EPOCHS):
        digits = read_data.read_file(fdata='digitdata/trainingimages',
                                     flabel='digitdata/traininglabels',
                                     WIDTH=28,
                                     HEIGHT=28,
                                     type='digits')
        for k in range(int(num_data * PERCENT)):
            x = rand.randint(0, len(digits[0]) - 1)  #get x as random
            features = get_features.features_from_image(
                digits[0][x])  #get vector of features
            scores = []
            for y in neurons:  #get score for each class
                scores.append(y.score(features))
            winnerIndex = scores.index(max(scores))
            if winnerIndex != digits[1][x]:
                neurons[winnerIndex].weights -= features
                neurons[digits[1][x]].weights += features
            digits[0].pop(x)
            digits[1].pop(x)
    return neurons
예제 #2
0
파일: perceptron.py 프로젝트: Jelvani/CS440
def train_faces(PERCENT=1, EPOCHS=1):
    faces = read_data.read_file(fdata='facedata/facedatatrain',
                                flabel='facedata/facedatatrainlabels',
                                WIDTH=60,
                                HEIGHT=70,
                                type='faces')
    num_data = len(faces[0])  #amount of training
    neurons = []
    hits = 0
    neurons.append(neuron(size=60 * 70))

    for epochs in range(EPOCHS):
        faces = read_data.read_file(fdata='facedata/facedatatrain',
                                    flabel='facedata/facedatatrainlabels',
                                    WIDTH=60,
                                    HEIGHT=70,
                                    type='faces')
        for k in range(int(num_data * PERCENT)):
            x = rand.randint(0, len(faces[0]) - 1)  #get x as random
            features = get_features.features_from_image(
                faces[0][x])  #get vector of features

            if neurons[0].score(features) < 0 and faces[1][x] == 1:
                neurons[0].weights += features
            elif neurons[0].score(features) >= 0 and faces[1][x] == 0:
                neurons[0].weights -= features
            faces[0].pop(x)
            faces[1].pop(x)
    return neurons
예제 #3
0
def main():
    demo_data = True
    config = Config("6_demo.txt")
    lr = config.config["learning_rate"]
    no_epochs = config.config["no_epochs"]
    X_train, y_train = read_file(config.config["training"])
    if demo_data:
        # Subtract 3 if the demo data is to be used
        y_train = np.array([y - 3 for y in y_train])
    X_val, y_val = None, None
    if "validation" in config.config:
        # Read the validation data.
        X_val, y_val = read_file(config.config["validation"])
        if demo_data:
            # Subtract 3 if the demo data is to be used
            y_val = np.array([y - 3 for y in y_val])
    activation_functions = []
    if "activations" in config.config:
        activation_functions = config.config["activations"]
    loss_type = config.config["loss_type"]
    l2_regularization_factor = config.config["L2_regularization"]

    layers = config.config["layers"]
    # Insert the number of features as the number of nodes in the first layer.
    layers.insert(0, X_train.shape[1])

    if loss_type == "cross_entropy":
        # If cross_entropy is used then we need to use softmax for the last layer.
        n_classes = get_num_of_classes(y_train)
        activation_functions.append("softmax")
        # Append n_classes as the number of nodes in the last layer.
        layers.append(n_classes)
        y_train = one_hot(y_train, classes=n_classes)
        if y_val is not None:
            y_val = one_hot(y_val, classes=n_classes)
    else:  # L2
        activation_functions.append(
            "linear")  # TODO:  Typisk linear, kan være relu og.
        # Here we append 1 node at the last layer.
        layers.append(1)
    network = Network(X_train,
                      y_train,
                      layers,
                      loss_type,
                      activation_functions,
                      lr,
                      X_val=X_val,
                      y_val=y_val,
                      regularization_factor=l2_regularization_factor,
                      no_epochs=no_epochs)

    assert len(layers) == len(activation_functions) + 1
    network.train()
예제 #4
0
    def extract_data(self, path):
        #根据指定路径读取出图片、标签和类别数
        imgs, labels, counter = read_file(path)

        #将数据集打乱随机分组
        X_train, X_test, y_train, y_test = train_test_split(
            imgs, labels, test_size=0.2, random_state=random.randint(0, 100))

        #重新格式化和标准化
        # 本案例是基于theano的,如果基于tensorflow的backend需要进行修改
        #X_train = X_train.reshape(X_train.shape[0], 1, self.img_size, self.img_size)/255.0 #参数:shap[0]读取矩阵第一维的长度,变为一列
        #X_test = X_test.reshape(X_test.shape[0], 1, self.img_size, self.img_size) / 255.0
        #‘channels_last’模式下,输入形如(samples,rows,cols,channels)的4D张量
        X_train = X_train.reshape(X_train.shape[0], self.img_size,
                                  self.img_size,
                                  1) / 255.0  # 参数:shap[0]读取矩阵第一维的长度,变为一列
        X_test = X_test.reshape(X_test.shape[0], self.img_size, self.img_size,
                                1) / 255.0

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')

        #将labels转成 binary class matrices
        Y_train = np_utils.to_categorical(y_train, num_classes=counter)
        Y_test = np_utils.to_categorical(y_test, num_classes=counter)

        #将格式化后的数据赋值给类的属性上
        self.X_train = X_train
        self.X_test = X_test
        self.Y_train = Y_train
        self.Y_test = Y_test
        self.num_classes = counter
예제 #5
0
def train_faces(PERCENTAGE=1):

    faces = read_data.read_file(fdata='facedata/facedatatrain',
                                flabel='facedata/facedatatrainlabels',
                                WIDTH=60,
                                HEIGHT=70,
                                type='faces')
    num_data = len(faces[0])  #amount of training data
    features = get_features.advanced_features_from_image(faces[0][0])
    face_class = label()
    face_class.features = np.zeros(len(features))
    not_face_class = label()
    not_face_class.features = np.zeros(len(features))
    '''
    get frequency of feature values for each feature in training set
    '''
    for k in range(int(num_data *
                       PERCENTAGE)):  # for each training data number
        x = rand.randint(0, len(faces[0]) - 1)  #get x as random index
        features = get_features.advanced_features_from_image(
            faces[0][x])  #get vector of features
        if faces[1][x] == 0:
            not_face_class.frequency += 1
            not_face_class.features += features
        elif faces[1][x] == 1:
            face_class.frequency += 1
            face_class.features += features
        faces[0].pop(x)
        faces[1].pop(x)
    return face_class, not_face_class, num_data
예제 #6
0
    def extract_data(self, path):
        #根据指定路径读取出图片、标签和类别数
        imgs, labels, counter = read_file(path)

        #将数据集打乱随机分组
        X_train, X_test, y_train, y_test = train_test_split(
            imgs, labels, test_size=0.2, random_state=random.randint(0, 100))

        #重新格式化和标准化
        X_train = X_train.reshape(
            X_train.shape[0], self.img_size, self.img_size,
            1) / 255.0  #将x_train.shape[0](垂直尺寸)调整,变成128*128*1再除以255.0
        X_test = X_test.reshape(X_test.shape[0], self.img_size, self.img_size,
                                1) / 255.0

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')

        #将labels转成 binary class matrices
        Y_train = np_utils.to_categorical(y_train, num_classes=counter)
        Y_test = np_utils.to_categorical(y_test, num_classes=counter)

        #将格式化后的数据赋值给类的属性上
        self.X_train = X_train
        self.X_test = X_test
        self.Y_train = Y_train
        self.Y_test = Y_test
        self.num_classes = counter
예제 #7
0
    def extract_data(self, path):
        # 根据指定路径读取出图片、标签和类别数
        imgs, labels, counter = read_file(path)

        # 将数据集打乱随机分组
        X_train, X_test, y_train, y_test = train_test_split(imgs, labels, test_size=0.2,
                                                            random_state=random.randint(0, 100))

        # 重新格式化和标准化
        # 本案例是基于thano的,如果基于tensorflow的backend需要进行修改
        X_train = X_train.reshape(X_train.shape[0], 1, self.imgX_size, self.imgY_size) / 255.0
        X_test = X_test.reshape(X_test.shape[0], 1, self.imgX_size, self.imgY_size) / 255.0

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')

        # 将labels转成 binary class matrices
        Y_train = np_utils.to_categorical(y_train, num_classes=counter)
        Y_test = np_utils.to_categorical(y_test, num_classes=counter)

        # 将格式化后的数据赋值给类的属性上
        self.X_train = X_train
        self.X_test = X_test
        self.Y_train = Y_train
        self.Y_test = Y_test
        self.num_classes = counter
예제 #8
0
파일: demo.py 프로젝트: Jelvani/CS440
def perceptron_deploy(digit=1, PERCENTAGE=1, digits=True):
    if digits:
        data = read_data.read_file(fdata='digitdata/testimages',
                                   flabel='digitdata/testlabels',
                                   WIDTH=28,
                                   HEIGHT=28,
                                   type='digits')
    else:
        data = read_data.read_file(fdata='facedata/facedatatest',
                                   flabel='facedata/facedatatestlabels',
                                   WIDTH=60,
                                   HEIGHT=70,
                                   type='faces')
    num_data = len(data[1])  #amount of training data
    neurons = []
    if digits:
        neurons = perceptron.train_digits(PERCENT=PERCENTAGE, EPOCHS=1)
    else:
        neurons = perceptron.train_faces(PERCENT=PERCENTAGE, EPOCHS=1)
    print("Trained Model!")
    hits = 0

    num = 0
    if digits:
        while True:
            num = rand.randint(0, len(data[0]) - 1)
            if digit == data[1][num]:
                print("Found Digit to Guess!")
                break
    else:
        num = rand.randint(0, len(data[0]) - 1)

    features = get_features.features_from_image(
        data[0][num])  #get vector of features
    scores = []
    for y in neurons:
        scores.append(y.score(features))
    if digits:
        winnerIndex = scores.index(max(scores))
        print("Predicted the digit: %s" % winnerIndex)
    else:
        if scores[0] < 0:
            print("Not Face!")
        else:
            print("Is Face!")
    plt.imshow(data[0][num])
    plt.show()
def main():
    lookback_window = 50
    raw_time_sequences = []
    cutoffs = []
    for input_filename in sorted(glob('../output/*_VIEW_13.json'))[0:20]:
        d = read_file(input_filename)
        d.drop('Annual', axis=1, inplace=True)
        d_train = d.head(len(d) - HOW_MANY_YEARS_IN_TEST)
        cutoff_train_test_index = len(d_train.values.flatten())
        time_sequence = d.values.flatten()
        time_sequence = time_sequence[~np.isnan(time_sequence)]
        time_sequence = np.log(
            time_sequence +
            1e-6)  # simple normalization. could be per station.
        raw_time_sequences.append(time_sequence)
        cutoffs.append(cutoff_train_test_index)
    mean = np.mean(np.concatenate(raw_time_sequences))
    std = np.std(np.concatenate(raw_time_sequences))
    scale = 10

    x_train, y_train, x_test, y_test = [], [], [], []
    for time_sequence, cutoff_train_test_index in zip(raw_time_sequences,
                                                      cutoffs):
        # normalization
        time_sequence = (time_sequence - mean) / std / scale
        for i in range(lookback_window, len(time_sequence)):
            model_input_slice = time_sequence[i - lookback_window:i]
            if i < cutoff_train_test_index:
                x_train.append(model_input_slice)
                y_train.append(time_sequence[i])
            else:
                x_test.append(model_input_slice)
                y_test.append(time_sequence[i])

    x_train = np.expand_dims(x_train, axis=-1)
    y_train = np.expand_dims(y_train, axis=-1)
    x_test = np.expand_dims(x_test, axis=-1)
    y_test = np.expand_dims(y_test, axis=-1)
    print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

    m = Sequential()
    m.add(LSTM(128, input_shape=x_train.shape[1:]))
    m.add(Dense(128, activation='relu'))
    m.add(Dense(1, activation='linear'))
    opt = RMSprop(lr=1e-4)
    m.compile(loss='mae', optimizer=opt)

    for epoch in range(100):
        p = np.exp(m.predict(x_test) * scale * std + mean)
        t = np.exp(y_test * scale * std + mean)
        error = np.mean(np.abs(p - t))
        print(epoch, error)
        m.fit(x_train,
              y_train,
              shuffle=True,
              validation_data=(x_test, y_test),
              epochs=1,
              batch_size=256,
              verbose=0)
예제 #10
0
파일: art.py 프로젝트: qiray/MathArtist
 def read_file_data(self, path):
     try:
         art, use_depth, coord_transform, polar_shift, name = read_file(
             path)
         self.read_art_params(art, use_depth, coord_transform, polar_shift,
                              name)
     except:
         print("Failed to read file " + path)
예제 #11
0
	def load_data(self):
		pwd =  os.path.dirname(os.path.realpath(__file__))
		self.gyroscope = np.mat(read_file(pwd+'/FilterTestData/Gyroscope.txt'))
		self.accelerometer = np.mat(read_file(pwd+'/FilterTestData/Accelerometer.txt'))
		self.magnetometer = np.mat(read_file(pwd+'/FilterTestData/Magnetometer.txt'))
		self.time = np.mat(read_file(pwd+'/FilterTestData/Time.txt'))

		self.quaternion_madgwick_imu = np.roll( np.mat( read_file(pwd+'/FilterTestData/quaternion_madgwick_imu.txt') ), -1)
	
		self.quaternion_madgwick_marg = np.roll( np.mat( read_file(pwd+'/FilterTestData/quaternion_madgwick_marg.txt') ), -1 )
		self.quaternion_mahoney_imu = np.roll( np.mat( read_file(pwd+'/FilterTestData/quaternion_mahoney_imu.txt') ), -1 )
		self.quaternion_mahoney_marg = np.roll( np.mat( read_file(pwd+'/FilterTestData/quaternion_mahoney_marg.txt') ), -1 )
예제 #12
0
파일: perceptron.py 프로젝트: Jelvani/CS440
def deploy_model(PERCENT,
                 EPOCHS,
                 digits=True):  #set digits to false to train faces
    if digits:
        data = read_data.read_file(fdata='digitdata/testimages',
                                   flabel='digitdata/testlabels',
                                   WIDTH=28,
                                   HEIGHT=28,
                                   type='digits')
    else:
        data = read_data.read_file(fdata='facedata/facedatatest',
                                   flabel='facedata/facedatatestlabels',
                                   WIDTH=60,
                                   HEIGHT=70,
                                   type='faces')
    num_data = len(data[1])  #amount of training data
    neurons = []
    if digits:
        neurons = train_digits(PERCENT=PERCENT, EPOCHS=EPOCHS)
    else:
        neurons = train_faces(PERCENT=PERCENT, EPOCHS=EPOCHS)
    hits = 0
    for x in range(num_data):
        features = get_features.features_from_image(
            data[0][x])  #get vector of features
        scores = []
        for y in neurons:
            scores.append(y.score(features))
        if digits:
            winnerIndex = scores.index(max(scores))
            if winnerIndex == data[1][x]:
                hits += 1
        else:
            if scores[0] < 0 and data[1][x] == 0:
                hits += 1
            elif scores[0] >= 0 and data[1][x] == 1:
                hits += 1
    return hits / num_data
def test_onBatch(path):
    model = Model()
    model.load()
    index = 0
    img_list, label_lsit, counter = read_file(path)
    for img in img_list:
        picType, prob = model.predict(img)
        if picType != -1:
            index += 1
            print(labbel[picType], prob)
        else:
            print("invaild person")

    return index
예제 #14
0
def test_onBatch(path):
    model = Model()
    model.load()
    index = 0
    img_list, label_lsit, counter = read_file(path)
    for img in img_list:
        picType, prob = model.predict(img)
        if picType != -1:
            index += 1
            name_list = read_name_list('./image/trainfaces')
            print(name_list[picType])
        else:
            print(" Don't know this person")
    return index
def test_onBatch(path):
    model= Model()
    model.load()
    index = 0
    img_list, label_lsit, counter = read_file(path)
    for img in img_list:
        picType,prob = model.predict(img)
        if picType != -1:
            index += 1
            name_list = read_name_list('/Users/gaoxingyun/Documents/uw/courses/Sp19/EE576_CV/project/faceRecognition/dataset')
            print (name_list[picType])
        else:
            print (" Don't know this person")

    return index
예제 #16
0
def test_onBatch(path):
    model = Model()
    model.load()
    index = 0
    img_list, label_lsit, counter = read_file(path)
    for img in img_list:
        picType, prob = model.predict(img)
        if picType != -1:
            index += 1
            name_list = read_name_list('D:\myProject\pictures\dataset')
            print name_list[picType]
        else:
            print " Don't know this person"

    return index
예제 #17
0
def infrence_model(PERCENTAGE=1):
    SMOOTHER = 1
    label_obj, num_data = train_digits(PERCENTAGE=PERCENTAGE)

    digits = read_data.read_file(fdata='digitdata/testimages',
                                 flabel='digitdata/testlabels',
                                 WIDTH=28,
                                 HEIGHT=28,
                                 type='digits')

    predictions = []  #outputs from bayes classifier
    for x in range(len(digits[0])):
        features = get_features.features_from_image(
            digits[0][x])  #get array of features
        maxls = []
        cur_guess = None
        for y in range(
                10):  #get prob of each label and choose highest as answer
            p_y = math.log(
                (label_obj[y].frequency) / int(num_data * PERCENTAGE))
            likelihood = 0
            for feats in range(len(features)):
                if features[feats] == 0:
                    likelihood += math.log(
                        (label_obj[y].v0[feats] + SMOOTHER) /
                        (label_obj[y].frequency + label_obj[y].v0[feats]) *
                        SMOOTHER)
                elif features[feats] == 1:
                    likelihood += math.log(
                        (label_obj[y].v1[feats] + SMOOTHER) /
                        (label_obj[y].frequency + label_obj[y].v1[feats]) *
                        SMOOTHER)
                elif features[feats] == 2:
                    likelihood += math.log(
                        (label_obj[y].v2[feats] + SMOOTHER) /
                        (label_obj[y].frequency + label_obj[y].v2[feats]) *
                        SMOOTHER)
            likelihood = likelihood + p_y
            maxls.append(likelihood)
        predictions.append(maxls.index(max(maxls)))

    hits = 0
    for x in range(len(digits[1])):
        if predictions[x] == digits[1][x]:
            hits += 1
    accuracy = hits / len(digits[1])
    return accuracy
def test_onBatch(path):
    model = Model()
    model.load()
    index = 0
    img_list, label_lsit, counter = read_file(path)
    for img in img_list:
        picType, prob = model.predict(img)
        if picType != -1:
            index += 1
            # name_list = read_name_list('D:\myProject\pictures\dataset')
            name_list = read_name_list(
                r'D:/my_laboratory/face_detection20180516/dataset')
            print(name_list[picType])
        else:
            print(" Don't know this person")

    return index
def test_onBatch(path):
    model = Model()
    model.load()
    index = 0
    img_list, label_lsit, counter = read_file(path)
    for img in img_list:
        picType, prob = model.predict(img)
        if picType != -1:
            index += 1
            name_list = read_name_list(
                'C:\\Users\\jimmychen\\Desktop\\chernger\\chernger_faceRecognition\\dataset'
            )
            print(name_list[picType])
        else:
            print("Don't know this person")

    return index
예제 #20
0
파일: demo.py 프로젝트: Jelvani/CS440
def bayes_digit(digit=1, PERCENTAGE=1):
    SMOOTHER = 1
    label_obj, num_data = naive_bayes_digits.train_digits(PERCENTAGE=1)
    print("Trained Model!")
    digits = read_data.read_file(fdata='digitdata/testimages',
                                 flabel='digitdata/testlabels',
                                 WIDTH=28,
                                 HEIGHT=28,
                                 type='digits')

    num = 0
    while True:
        num = rand.randint(0, len(digits[0]) - 1)
        if digit == digits[1][num]:
            break
    print("Found Digit to Guess!")
    features = get_features.features_from_image(
        digits[0][num])  #get array of features
    maxls = []
    for y in range(10):  #get prob of each label and choose highest as answer
        p_y = math.log((label_obj[y].frequency) / int(num_data) * PERCENTAGE)
        likelihood = 0
        for feats in range(len(features)):
            if features[feats] == 0:
                likelihood += math.log(
                    (label_obj[y].v0[feats] + SMOOTHER) /
                    (label_obj[y].frequency + label_obj[y].v0[feats]) *
                    SMOOTHER)
            elif features[feats] == 1:
                likelihood += math.log(
                    (label_obj[y].v1[feats] + SMOOTHER) /
                    (label_obj[y].frequency + label_obj[y].v1[feats]) *
                    SMOOTHER)
            elif features[feats] == 2:
                likelihood += math.log(
                    (label_obj[y].v2[feats] + SMOOTHER) /
                    (label_obj[y].frequency + label_obj[y].v2[feats]) *
                    SMOOTHER)
        likelihood = likelihood + p_y
        maxls.append(likelihood)
    prediction = maxls.index(max(maxls))
    print("Predicted the digit: %s" % prediction)
    plt.imshow(digits[0][num])
    plt.show()
예제 #21
0
def data_handle(path):
    imgs, labels, counter = read_file(path)
    imgs -= MEAN_PIXEL
    X_train, X_test, y_train, y_test = train_test_split(
        imgs, labels, test_size=0.2, random_state=random.randint(0, 100))

    X_train = X_train.reshape(X_train.shape[0], img_size, img_size, 3) / 255.0
    X_test = X_test.reshape(X_test.shape[0], img_size, img_size, 3) / 255.0

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    # 多目标的label:one hot coding
    Y_train = np_utils.to_categorical(y_train)
    Y_test = np_utils.to_categorical(y_test)

    num_classes = counter
    print('handle successful,', X_train.shape[0])
    return X_train, X_test, Y_train, Y_test, num_classes
def get_feature_objects(filename: str,
                        max_items: int) -> (Dict[str, Features]):
    result_filename = filename + "results_stats.bin"
    if os.path.isfile(result_filename):
        with open(result_filename, "rb") as file:
            return pickle.load(file)

    else:
        c = ComputeFeatures()
        features = dict()
        for data in read_file(filename, max_items, True):
            if data.playerId not in features:
                features[data.playerId] = []
            features[data.playerId].append(c.compute_features(data))

        with open(result_filename, "wb") as file:
            pickle.dump(features, file)

        return features
예제 #23
0
    def extract_data(self, path):
        #Read the number of pictures, tags, and categories based on the specified path
        imgs, labels, counter = read_file(path)

        print("Output tag")
        print(labels)

        #Dataset random grouping

        X_train, X_test, y_train, y_test = train_test_split(
            imgs, labels, test_size=0.4, random_state=random.randint(0, 100))
        print("Output training mark and training set length")
        print(y_train)
        print(len(X_train))
        print(X_train[1])
        print("Test length and test set tag")
        print(len(X_test))
        print(y_test)
        print("Output and")
        print(counter)

        #reformatting and standardization
        # This case is based on thano, if the backend based on tensorflow needs to be modified
        print(X_train.shape)
        X_train = X_train.reshape(X_train.shape[0], 480, 640, 1)
        X_test = X_test.reshape(X_test.shape[0], 480, 640, 1)

        X_train = X_train.astype('float32') / 255
        X_test = X_test.astype('float32') / 255
        print(X_train[1])

        #labels into binary class matrices
        Y_train = np_utils.to_categorical(y_train, num_classes=counter)
        Y_test = np_utils.to_categorical(y_test, num_classes=counter)

        print(Y_train)
        # Assign the formatted data to the properties of the class
        self.X_train = X_train
        self.X_test = X_test
        self.Y_train = Y_train
        self.Y_test = Y_test
        self.num_classes = counter
예제 #24
0
    def extract_data(self, path):
        imgs, labels, counter = read_file(path)
        X_train, X_test, y_train, y_test = train_test_split(
            imgs, labels, test_size=0.2, random_state=random.randint(0, 100))
        X_train = X_train.reshape(X_train.shape[0], 1, self.img_size,
                                  self.img_size) / 255.0
        X_test = X_test.reshape(X_test.shape[0], 1, self.img_size,
                                self.img_size) / 255.0

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')

        Y_train = np_utils.to_categorical(y_train, num_classes=counter)
        Y_test = np_utils.to_categorical(y_test, num_classes=counter)

        self.X_train = X_train
        self.X_test = X_test
        self.Y_train = Y_train
        self.Y_test = Y_test
        self.num_classes = counter
def test_onBatch(path):
    model = Model()
    model.load()
    index = 0
    img_list, label_list, counter = read_file(path)
    #     img_list = img_list.reshape(img_list.shape[0], 174, 212, 1)
    #     print(img_list.shape[0:])
    #     img_list = img_list.astype('float32')/255
    #     Label_list = np_utils.to_categorical(label_list, num_classes=counter)
    for img in img_list:
        picType, prob = model.predict(img)
        if picType != -1:
            index += 1
            name_list = read_name_list('G:/desktop/myProject/pictures/test')
            print(name_list)
            print(name_list[picType])
        else:
            print(" Don't know this person")

    return index
예제 #26
0
    def extract_data(self, path):
        #根据指定路径读取出图片、标签和类别数
        imgs, labels, counter = read_file(path)

        print("输出标记")
        print(labels)

        #将数据集打乱随机分组

        X_train, X_test, y_train, y_test = train_test_split(
            imgs, labels, test_size=0.4, random_state=random.randint(0, 100))
        print("输出训练标记和训练集长度")
        print(y_train)
        print(len(X_train))
        print(X_train[1])
        print("测试长度和测试集标记")
        print(len(X_test))
        print(y_test)
        print("输出和")
        print(counter)

        #重新格式化和标准化
        # 本案例是基于thano的,如果基于tensorflow的backend需要进行修改
        X_train = X_train.reshape(X_train.shape[0], 174, 212, 1)
        X_test = X_test.reshape(X_test.shape[0], 174, 212, 1)

        X_train = X_train.astype('float32') / 255
        X_test = X_test.astype('float32') / 255
        print(X_train[1])

        #将labels转成 binary class matrices
        Y_train = np_utils.to_categorical(y_train, num_classes=counter)
        Y_test = np_utils.to_categorical(y_test, num_classes=counter)

        print(Y_train)
        #将格式化后的数据赋值给类的属性上
        self.X_train = X_train
        self.X_test = X_test
        self.Y_train = Y_train
        self.Y_test = Y_test
        self.num_classes = counter
def get_features(filename: str,
                 max_items: int,
                 label_present=True) -> (List[str], List[List]):
    result_filename = filename + "results.bin"
    if os.path.isfile(result_filename):
        with open(result_filename, "rb") as file:
            data = pickle.load(file)
            return data["labels"], data["features"]

    else:
        c = ComputeFeatures()
        features = []
        labels = []
        for data in read_file(filename, max_items, label_present):
            if label_present:
                labels.append(data.playerId)
            features.append(c.compute_features(data).to_array())

        with open(result_filename, "wb") as file:
            pickle.dump({"labels": labels, "features": features}, file)

        return labels, features
예제 #28
0
def main(file_loc):
    global explode, pos, neg, emos, counter, handler, span, dates, words_total

    if ('.txt' not in file_loc):
        file_txt = file_loc[:-3] + 'txt'

        if not os.path.exists(file_txt):
            data_set = read_data_set(file_loc)

            file = open(file_txt, 'w')
            for month in data_set:
                file.write(month.to_string() + '\n')
            file.close()

        file_loc = file_txt

    (data_set, words_total) = read_file(file_loc)
    (counter, dates) = sum_data_set(data_set)
    set_emos(data_set)
    pos = counter.pop(0)
    neg = counter.pop(0)

    (plot, stacked) = make_flower()
    handler = FlowerEventHandler(plot)
    span = SpanSelector(stacked,
                        onselect,
                        'horizontal',
                        useblit=True,
                        rectprops=dict(alpha=0.5, facecolor='red'))

    subplots_adjust(left=0.0,
                    bottom=0.1,
                    right=0.95,
                    top=0.91,
                    wspace=0.04,
                    hspace=0.22)
    plt.show(block=True)
예제 #29
0
def main2():
    global explode, pos, neg, emos, counter, handler, span, dates, words_total

    (data_set, words_total) = read_file('../data/article_compact.txt')
    (counter, dates) = sum_data_set(data_set)
    set_emos(data_set)
    pos = counter.pop(0)
    neg = counter.pop(0)

    (plot, stacked) = make_flower()
    handler = FlowerEventHandler(plot)
    span = SpanSelector(stacked,
                        onselect,
                        'horizontal',
                        useblit=True,
                        rectprops=dict(alpha=0.5, facecolor='red'))

    subplots_adjust(left=0.0,
                    bottom=0.1,
                    right=0.95,
                    top=0.91,
                    wspace=0.04,
                    hspace=0.22)
    plt.show(block=True)
예제 #30
0
def train_digits(PERCENTAGE=1):

    digits = read_data.read_file(fdata='digitdata/trainingimages',
                                 flabel='digitdata/traininglabels',
                                 WIDTH=28,
                                 HEIGHT=28,
                                 type='digits')
    num_data = len(digits[0])  #amount of training data
    label_obj = []
    features = get_features.features_from_image(digits[0][0])
    for x in range(10):  #create 10 label objects for each class
        lbl = label()
        lbl.v0 = np.ones(len(features))
        lbl.v1 = np.ones(len(features))
        lbl.v2 = np.ones(len(features))
        label_obj.append(lbl)
    '''
    get frequency of feature values for each feature in training set
    '''
    for k in range(int(num_data *
                       PERCENTAGE)):  # for each training data number
        x = rand.randint(0, len(digits[0]) - 1)  #get x as random index
        features = get_features.features_from_image(
            digits[0][x])  #get vector of features
        label_obj[digits[1][x]].frequency += 1
        for y in range(len(features)):
            if features[y] == 0:
                label_obj[digits[1][x]].v0[y] += 1
            elif features[y] == 1:
                label_obj[digits[1][x]].v1[y] += 1
            elif features[y] == 2:
                label_obj[digits[1][x]].v2[y] += 1
        digits[0].pop(x)
        digits[1].pop(x)

    return label_obj, num_data
예제 #31
0
                    ' to use when collecting training experiences.')

args = parser.parse_args()
continue_from_file = args.continue_from_file
model_name = args.model_name
simulator_type = args.simulator_type

all_configs = load(open(config_filename, 'r'), Loader=yaml.FullLoader)
config = all_configs[model_name]

device = get_device()

# Find the input size, hidden dim sizes, and output size
env_name = config['env_name']
# env = gym.make(env_name)
data = read_file('./data/train.csv')
embeddings = Embeddings(read_embeddings('./data/embeddings.csv'))
env = gym.make(env_name,
               data=data,
               embeddings=embeddings,
               alpha=0.5,
               gamma=0.9,
               fixed_length=True,
               trajectory_length=5)
action_space = env.action_space
observation_space = env.observation_space
policy_hidden_dims = config['policy_hidden_dims']
vf_hidden_dims = config['vf_hidden_dims']
vf_args = (observation_space.shape[0] + 1, vf_hidden_dims, 1)

# Initialize the policy
예제 #32
0
from read_data import read_file, get_ratexuser, get_movie_genres
from similarity import recommend
from user_personalization import print_question

data, labels = read_file()

# Get all the movie titles just one time
movies, genres = get_movie_genres(data, labels)

# separate the users by Id and then take just the ranking for each movie
data_dicts, user_ids = get_ratexuser(data)

print_question(data, labels, user_ids, data_dicts)

예제 #33
0
import read_data as rd
import vectorization as vt
import LSTM_Class as lstmcl
import time as t
import datetime
import math
import matplotlib.pyplot as plt

tic = t.time()

example = 'example_2015_12_2016_01'

# read training data
file_name = 'TrainData.txt'
file_dir = 'txt_files\examples\\' + example + '\\' + file_name
req_contents_tr = rd.read_file(file_dir)

# read test data
file_name = 'TestData.txt'
file_dir = 'txt_files\examples\\' + example + '\\' + file_name
req_contents_ts = rd.read_file(file_dir)

# decode traing & test data
req_data_tr = rd.decode_req_data(req_contents_tr)
req_data_ts = rd.decode_req_data(req_contents_ts)

# read input file
file_name = "input.txt"
file_dir = "txt_files\examples\\" + example + '\\' + file_name
input_file_contents = rd.read_file(file_dir)