def exist_digit_data(t_file): #checks to see if digit_data.csv exist. It hold the classification # labels and the vectors. It is also when counting the number # of each individual class. t_prefix=set_file_name_prefix(0)[0] from os import path t_file_name=set_file_name_prefix(0)[0]+t_file #print(t_file_name) return int(path.exists(t_file_name))
def save(t_canvas_image): #function used to save imaages to a csc file. #It takes in the full canvas. It separates the grid squares # (via separate_digits), refromats them (via reformat_mnist) and # reshapes then into a one dimensional vector that is written to # a csv file that can be used as input to different machine # learning programs. The output includes a class label. The label # has the same value for all the digits on the filled out grid sqaures. # Only one class at a time can be entered for each set of grid squares import pandas as pd import cv2 import numpy as np from digit_capture import get_global_settings as gs d_2=gs('d_2') grid_count=gs('grid_count') t_dest=set_file_name_prefix(0) fname_pfx=t_dest[0] t_path=t_dest[1] t_class=gs('t_class') final_side_size=gs('final_side_size') df_class=make_class_list(t_class,(grid_count*grid_count)) df_column_names=make_col_list((final_side_size*final_side_size)) out_array = np.zeros(((grid_count*grid_count),(d_2*d_2)),dtype='uint8') (t_digits,t_reformat_dg)=separate_digits(t_canvas_image,d_2,grid_count) new_data=reformat_mnist(t_reformat_dg,0) df_dta=pd.DataFrame(data=new_data) df_dta=pd.concat([df_class,df_dta],axis=1) write_digit_data(df_dta,fname_pfx+'digit_data.csv')
def get_dict_from_file(t_file): #reads in a dataframe and converts it into a dictionary. #The dictioary holds a lot of values of global variables. from digit_capture import set_file_name_prefix import pandas as pd fname_pfx=set_file_name_prefix(0)[0] #print(fname_pfx) df=pd.read_csv(fname_pfx+t_file) t_name=list(df['Name']) t_value=list(df['Value']) t_type=list(df['Type']) t_value=set_correct_type(t_value,t_type) res = {t_name[i]: t_value[i] for i in range(len(t_name))} return res
def get_val_from_dict_csv(t_key): #input is a key. That is the row entry key in the dictionary_inputs.csv file. #Read the file into pandas. Convert the Name column into a list. Find the #index where the lest entry = the_key. Use the index to get the value #from a list made from th Value column in the file. Return the value as #a string import pandas as pd t_prefix=set_file_name_prefix(in_colab())[0] t_file=t_prefix+'dictionary_inputs.csv' df=pd.read_csv(t_file) t_list_n=list(df['Name']) t_pos=t_list_n.index(t_key) t_list_v=list(df['Value']) return str(t_list_v[t_pos])
def output_data(t_data): #Outputs the image to a csv file from digit_capture import get_global_settings as gs t_dest=set_file_name_prefix(0) fname_pfx=t_dest[0] dmy=len(t_data) import numpy as np import cv2 tAry=np.asarray(t_data) #tAry final_side_size=gs('final_side_size') grid_count = gs('grid_count') t_class=gs('t_class') df_class=make_class_list(t_class,(grid_count*grid_count)) df_column_names=make_col_list((final_side_size*final_side_size)) pxl=gs('pxl') line_wd=gs('line_wd') canvas_width = grid_count*pxl canvas_height = grid_count*pxl prev_x=-999999; prev_y=-999999; x_coords=[] y_coords=[] width = canvas_width # canvas width height = canvas_height # canvas height center = height/2 white = (255, 255, 255) # canvas back t_bd1=5 t_bd2=175 d_1=1 d_2=gs('d_2') d_3=120 d_4=180 wdh=1 #print(type(t_data)) tAry=np.asarray(t_data,dtype="uint8") #tAry.shape #tAry2_dim0=int(len(tAry)/4) #tAry2=np.reshape(tAry,(tAry2_dim0,4)) #nw_image=np.reshape(tAry2[0:tAry2_dim0,0],(canvas_width,canvas_width)) #cv2.imwrite(fname_pfx+'python_version.jpg',nw_image) #print(' point 3 '+str(nw_image.shape)) #print(type(nw_image[0,0])) nw_image=np.reshape(tAry,(canvas_width,canvas_width)) save(nw_image)
def get_class_counts(t_file): import pandas as pd #gets a dictionary with the counts of classes that are in #digit_data.csv. If the file does not exist, it returns a dictioary #with only a single entry with 0 for count. This is how the #situation where no digit_data.csv exists is handled. Groupby #returns a dictionary dd={'replace_dict': {'?class02?': 0}} fname_pfx=set_file_name_prefix(0)[0] #print(fname_pfx) if(exist_digit_data(t_file)==1): df=pd.read_csv(fname_pfx+t_file,header=None) #print(df.shape) t_df=df.iloc[:,0:2] t_list=['class','replace_dict'] t_df.columns = t_list dd=t_df.groupby('class').count().to_dict() return dd
def insert_mods_to_js_file(t_js_file,the_dict): #This function takes as input a js file and a dict. #The js file has wild card values for certain varibles #And functions. These wildcards get swapped for values #cross referenced in the dictionary that is also input. #This allows the python to communicate with the javascript. from digit_capture import set_file_name_prefix,in_colab from digit_capture import read_pickle,write_pickle fname_pfx=set_file_name_prefix(0)[0] #print(fname_pfx) #pk_file=t_js_file[0,-3]+'.pk' #the_dict=read_pickle(fname_pfx+pk_file) #file1 = open(fname_pfx+t_js_file,"r+") file1 = open(fname_pfx+t_js_file,"r+") the_js_data=file1.read() for old,new in the_dict.items(): the_js_data=the_js_data.replace(old,str(new)) return the_js_data
def update_class_number(nw_class_index): #reads in dictionary_inputs.csv which holds values for global #variable where the name is the global variable name. Once read in, #it updates class label. This needs to be done via user so it can be changed #as a user enters different digit classes. import pandas as pd t_list02=[] for i in range(0,10): t_class='?class0'+str(i)+'?' t_list02.append(t_class) nw_class=t_list02[nw_class_index] t_prefix=set_file_name_prefix(in_colab())[0] t_file=t_prefix+'dictionary_inputs.csv' df=pd.read_csv(t_file) t_list=list(df['Name']) t_pos=t_list.index('t_class') df.at[t_pos, 'Value']=nw_class t_pos=t_list.index('?selval?') df.at[t_pos, 'Value']=nw_class_index df.to_csv(t_file,index=False) return df