Esempio n. 1
0
        'm_label_one_hot', 'image', 'italic', 'aspect_ratio', 'upper_case'
    ]
    ds = ocr_utils.read_data(input_filters_dict=input_filters_dict,
                             output_feature_list=output_feature_list,
                             test_size=.1,
                             engine_type='tensorflow',
                             dtype=dtype)
    nn = nnetwork.network(ds.train)
    nn.fit(ds.train, nEpochs=5000)
    nn.test(ds.test)

else:
    # loop through all the fonts and train individually

    # pick up the entire list of fonts and font variants. Train each one.
    df1 = ocr_utils.get_list(input_filters_dict={'font': ()})

    import pprint as pprint
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(df1)

    output_feature_list = [
        'm_label_one_hot', 'image', 'italic', 'aspect_ratio', 'upper_case',
        'font_one_hot'
    ]

    # Change nEpochs to 5000 for better results
    for l in df1:
        #input_filters_dict= {'font': (l[0],)}
        input_filters_dict = {
            'm_label':
'''

dumps out some statistics for the fonts.zip database and
makes plots, saved in files, of characters for each fontVariant found in 
the database

Created on Jul 25, 2016
@author: richard
'''
import ocr_utils
import numpy as np 
        
df1 = ocr_utils.get_list(input_filters_dict = {'font':()})
unique_fonts=[]
unique_fontVariants=[]
unique_m_labels=[]
unique_strengths=[]
unique_italics=[]
unique_orientations=[]

#############################################################################
# read and show the character images for each font variant
# output only the character label and the image

for font in df1:    
    df2 = ocr_utils.get_list(input_filters_dict = {'font':font,'fontVariant':(), 'm_label':(),'strength':(),'italic':(),'orientation':()})
    unique_fonts = np.unique( np.append(unique_fonts, df2['font']))
    u1= np.unique(df2['fontVariant'])    
    unique_fontVariants = np.unique(np.append(unique_fontVariants, u1))    
    u2 = np.unique(df2['m_label'])
    unique_m_labels = np.unique(np.append(unique_m_labels,u2))   
    input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))+list(range(97,123)),'fontVariant':'scanned'}
    #input_filters_dict = {}    
    output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case']    
    ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, 
                                output_feature_list=output_feature_list,
                                test_size = .1,
                                engine_type='tensorflow',dtype=dtype)    
    nn = nnetwork.network( ds.train)
    nn.fit( ds.train,  nEpochs=5000)  
    nn.test(ds.test)
    
else:
    # loop through all the fonts and train individually

    # pick up the entire list of fonts and font variants. Train each one.
    df1 = ocr_utils.get_list(input_filters_dict={'font': ()})      
    
    import pprint as pprint
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(df1)
   
    output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case','font_one_hot']
    
    # Change nEpochs to 5000 for better results
    for l in df1:
        #input_filters_dict= {'font': (l[0],)}   
        input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))+list(range(97,123)),'font': (l[0],)}            
        #train_a_font(input_filters_dict,output_feature_list, nEpochs = 500) 
        
        ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, 
                                    output_feature_list=output_feature_list,
'''

dumps out some statistics for the fonts.zip database and
makes plots, saved in files, of characters for each fontVariant found in 
the database

Created on Jul 25, 2016
@author: richard
'''
import ocr_utils
import numpy as np

df1 = ocr_utils.get_list(input_filters_dict={'font': ()})
unique_fonts = []
unique_fontVariants = []
unique_m_labels = []
unique_strengths = []
unique_italics = []
unique_orientations = []

#############################################################################
# read and show the character images for each font variant
# output only the character label and the image

for font in df1:
    df2 = ocr_utils.get_list(
        input_filters_dict={
            'font': font,
            'fontVariant': (),
            'm_label': (),
            'strength': (),