Exemple #1
0
def get_enlarged_data():
    train_data1 = pretreatment.pretreatment(size=60000,
                                            save_file=0,
                                            save_images=0,
                                            rotate=0,
                                            hog=0)
    train_data2 = pretreatment.pretreatment(size=60000,
                                            save_file=0,
                                            save_images=0,
                                            rotate=1,
                                            hog=0)
    train_data = np.concatenate((train_data1, train_data2), axis=0)
    test_data = pretreatment.pretreatment(size=10000,
                                          save_file=0,
                                          save_images=0,
                                          rotate=0,
                                          hog=0,
                                          file_path="mnist_test_data")
    test_label = np.fromfile("mnist_test_label", dtype=np.uint8)
    train_label = np.fromfile("mnist_train_label", dtype=np.uint8)
    train_label = np.concatenate((train_label, train_label), axis=0)
    return [
        np.reshape(train_data / 255, (120000, -1)), train_label,
        np.reshape(test_data / 255, (10000, -1)), test_label
    ]
Exemple #2
0
def pre_treatment():
    test_image_path = 'test01.jpg'
    # 读取彩色识别图片
    test_image = cv.imread(test_image_path, 1)
    test_image = test_image[0:-200, 1000::]
    test_image = cv.resize(test_image, (0, 0), fx=0.3, fy=0.3)
    cv.namedWindow("test_image", 0)  # flag = 0 ,默认窗口大小可以改变
    cv.resizeWindow("test_image", 800, 600)
    cv.imshow('test_image', test_image)
    cv.waitKey(0)
    cv.destroyAllWindows()

    pos = pt.pretreatment(test_image)
    # 根据确定的位置分割字符
    for m in range(len(pos)):
        """
        img图像 pt1矩形的一个顶点。pt2矩形对角线上的另一个顶点 
        color线条颜色 (RGB) 或亮度(灰度图像 )(grayscale image)。
        thickness组成矩形的线条的粗细程度。
        """
        cv.rectangle(test_image, (pos[m][0] - 3, pos[m][1] - 2),
                     (pos[m][2] + 3, pos[m][3] + 2),
                     (0, 0, 255), 1)
    cv.imwrite('rec_image.jpg', test_image)

    # 根据确定的位置分割字符
    print(len(pos))
    rec_image = cv.imread('test_image.png')
    for m in range(-18, 0, 1):
        data = rec_image[pos[m][1] - 1: pos[m][3] + 1, pos[m][0] - 1:pos[m][2] + 1]
        cv.imwrite(r'test1/{}.jpg'.format((m + 19)), data)
Exemple #3
0
def get_hog_data():
    train_data = pretreatment.pretreatment(size=60000,
                                           save_file=0,
                                           save_images=0,
                                           hog=1)
    test_data = pretreatment.pretreatment(size=10000,
                                          save_file=0,
                                          save_images=0,
                                          rotate=0,
                                          hog=1,
                                          file_path="mnist_test_data")
    test_label = np.fromfile("mnist_test_label", dtype=np.uint8)
    train_label = np.fromfile("mnist_train_label", dtype=np.uint8)
    return [
        np.reshape(train_data / 255, (60000, -1)), train_label,
        np.reshape(test_data / 255, (10000, -1)), test_label
    ]
Exemple #4
0
def set_up_data():
    """ Load the data and compute the value of some global variables 
	
    Args:
        _
		
    Returns:
        4D numpy array containing the source sentences, using one-hot vector representation
        4D numpy array containing the target sentences, using one-hot vector representation
    """
    
    X, Y = pretreatment.import_dataset()
    
    print('Applying cleansing...')
    X = pretreatment.pretreatment(X)
    Y = pretreatment.pretreatment(Y)
    
    indice = [i for i in range(len(X)) if (len(X[i]) > SENTENCE_LENGTH-2 and len(X[i]) < SENTENCE_LENGTH+1 and len(Y[i]) > SENTENCE_LENGTH-2 and len(Y[i]) < SENTENCE_LENGTH+1)]#(len(X[i]) > SENTENCE_LENGTH and len(X[i]) < 2 * SENTENCE_LENGTH and len(Y[i]) > SENTENCE_LENGTH and len(Y[i]) < 2 * SENTENCE_LENGTH)]
    X = [X[i] for i in indice]
    Y = [Y[i] for i in indice]
    
    X = pretreatment.standardize_sentence_length(X)
    Y = pretreatment.standardize_sentence_length(Y)
      
    print('Computing the corpus sizes...')
    compute_T(X, 'english')
    compute_T(Y, 'french')
    compute_S(X, 'english')
    compute_S(Y, 'french')
    compute_N(X, 'french')
    compute_N(Y, 'english')
    
    print('English corpus: %d tokens' % T_ENGLISH)
    print('French corpus: %d tokens' % T_FRENCH)
    print('English sentence length: %d' % S_ENGLISH)
    print('French sentence length: %d' % S_FRENCH)
    print('Number of sentences (both english and french): %d / %d' % (N_ENGLISH, N_FRENCH))
    
    print('Converting in one hot vectors')
    global CORPUS_ENGLISH, CORPUS_FRENCH
    params_ENGLISH = (N_ENGLISH, S_ENGLISH, T_ENGLISH)
    params_FRENCH = (N_FRENCH, S_FRENCH, T_FRENCH)
    X, CORPUS_ENGLISH= treatment.convert_to_one_hot(X, params_ENGLISH)
    Y, CORPUS_FRENCH= treatment.convert_to_one_hot(Y, params_FRENCH)
    
    return (X, Y)
            hour_ratio_out_list += [hour_ratio]
            direction_ratio_out_list += [direction_ratio]
            others_out_list += [[
                fisher_time, area, HL[0], HL[1], HL[2], HL[3], HL_ratio[0],
                HL_ratio[1], HL_ratio[2], HL_ratio[3]
            ]]
            if len(d_list) != 0:
                xy_out_list += [count_out(d_list)]
            if len(d_list) == 0:
                xy_out_list += [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

        #print("当前进度:",num)
        #if num == 100:
        #    break

    write_csv(work_time_name, work_time_out_list, csv_N + '/work_time.csv')
    write_csv(hour_ratio_name, hour_ratio_out_list, csv_N + '/hour_ratio.csv')
    write_csv(direction_name, direction_ratio_out_list,
              csv_N + '/direction_ratio.csv')
    write_csv(xy_name, xy_out_list, csv_N + '/xy.csv')
    write_csv(others_name, others_out_list, csv_N + '/others.csv')
    #print("finish!")


if __name__ == '__main__':
    pretreatment("train", 5)
    pretreatment("test", 5)
    feature_clean("train")
    feature_clean("test")
    feature_hour("train")
    feature_hour("test")
Exemple #6
0
# -*- coding: utf-8 -*-

import numpy as np
from pretreatment import pretreatment
from gradientDescent_multi import gradientDescent
from math_solution import math_solution

data = np.genfromtxt('ex1data2.txt',delimiter = ',')
#print(data)
m = len(data)
pre_X, middle, sigma = pretreatment(data[:,[0,1]])
X = np.column_stack((np.ones(m).T,pre_X))
#print(X)
y = data[:,2]

theta = np.array([0.,0.,0.])
alpha = 0.01
step = 2000
'''
theta_new = gradientDescent(X, y, theta, step, alpha)
print(theta_new)
'''
xx = np.matrix(X)
yy = np.matrix(y)
#print(xx.shape)
#print(yy.T.shape)
theta_new = math_solution(xx, yy.T)
print(theta_new)

Exemple #7
0
def pretreatment():
    ptt.pretreatment()