'm_label_one_hot', 'image', 'italic', 'aspect_ratio', 'upper_case' ] ds = ocr_utils.read_data(input_filters_dict=input_filters_dict, output_feature_list=output_feature_list, test_size=.1, engine_type='tensorflow', dtype=dtype) nn = nnetwork.network(ds.train) nn.fit(ds.train, nEpochs=5000) nn.test(ds.test) else: # loop through all the fonts and train individually # pick up the entire list of fonts and font variants. Train each one. df1 = ocr_utils.get_list(input_filters_dict={'font': ()}) import pprint as pprint pp = pprint.PrettyPrinter(indent=4) pp.pprint(df1) output_feature_list = [ 'm_label_one_hot', 'image', 'italic', 'aspect_ratio', 'upper_case', 'font_one_hot' ] # Change nEpochs to 5000 for better results for l in df1: #input_filters_dict= {'font': (l[0],)} input_filters_dict = { 'm_label':
''' dumps out some statistics for the fonts.zip database and makes plots, saved in files, of characters for each fontVariant found in the database Created on Jul 25, 2016 @author: richard ''' import ocr_utils import numpy as np df1 = ocr_utils.get_list(input_filters_dict = {'font':()}) unique_fonts=[] unique_fontVariants=[] unique_m_labels=[] unique_strengths=[] unique_italics=[] unique_orientations=[] ############################################################################# # read and show the character images for each font variant # output only the character label and the image for font in df1: df2 = ocr_utils.get_list(input_filters_dict = {'font':font,'fontVariant':(), 'm_label':(),'strength':(),'italic':(),'orientation':()}) unique_fonts = np.unique( np.append(unique_fonts, df2['font'])) u1= np.unique(df2['fontVariant']) unique_fontVariants = np.unique(np.append(unique_fontVariants, u1)) u2 = np.unique(df2['m_label']) unique_m_labels = np.unique(np.append(unique_m_labels,u2))
input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))+list(range(97,123)),'fontVariant':'scanned'} #input_filters_dict = {} output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case'] ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, output_feature_list=output_feature_list, test_size = .1, engine_type='tensorflow',dtype=dtype) nn = nnetwork.network( ds.train) nn.fit( ds.train, nEpochs=5000) nn.test(ds.test) else: # loop through all the fonts and train individually # pick up the entire list of fonts and font variants. Train each one. df1 = ocr_utils.get_list(input_filters_dict={'font': ()}) import pprint as pprint pp = pprint.PrettyPrinter(indent=4) pp.pprint(df1) output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case','font_one_hot'] # Change nEpochs to 5000 for better results for l in df1: #input_filters_dict= {'font': (l[0],)} input_filters_dict = {'m_label': list(range(48,58))+list(range(65,91))+list(range(97,123)),'font': (l[0],)} #train_a_font(input_filters_dict,output_feature_list, nEpochs = 500) ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, output_feature_list=output_feature_list,
''' dumps out some statistics for the fonts.zip database and makes plots, saved in files, of characters for each fontVariant found in the database Created on Jul 25, 2016 @author: richard ''' import ocr_utils import numpy as np df1 = ocr_utils.get_list(input_filters_dict={'font': ()}) unique_fonts = [] unique_fontVariants = [] unique_m_labels = [] unique_strengths = [] unique_italics = [] unique_orientations = [] ############################################################################# # read and show the character images for each font variant # output only the character label and the image for font in df1: df2 = ocr_utils.get_list( input_filters_dict={ 'font': font, 'fontVariant': (), 'm_label': (), 'strength': (),