def load_table_in_chunks(): logging.info('Purge table {}'.format(tableName)) with toolkit_sqlite.SqliteDB(DB_FILE) as sqlitedb: sqlitedb.execute('DELETE FROM {}'.format(tableName)) fileList = toolkit_file.get_file_list(SOURCE_FOLDER) chunk_size = 100000 start_number = 1 file_count = len(toolkit_file.get_file_list(SOURCE_FOLDER)) logging.info('Read {} files'.format(file_count)) for chunk in toolkit_text.chunks(toolkit_file.get_file_list(SOURCE_FOLDER), chunk_size): logging.info('Loading {}-{}'.format( start_number, min(start_number + chunk_size - 1, file_count))) batch_load_to_table(chunk) start_number += chunk_size
def generate_file_list(dataset_dir): imgFileList = [ x for x in toolkit_file.get_file_list(dataset_dir) if x.endswith('.jpg') ] # print(fileList) dataset_dict_list = [] for file in imgFileList: pic_id = int( toolkit_file.get_basename(file, withExtension=False).replace( 'image_', '')) group_id = (pic_id - 1) // 80 dataset_dict_list.append({ 'pic_id': pic_id, 'group_id': group_id, 'image_path': file }) return dataset_dict_list
import warnings warnings.simplefilter(action='ignore', category=FutureWarning) import numpy as np from keras.models import load_model import toolkit_file import data_prep import config model_name = config.MODEL_NAME predict_dir = 'predict' predictList = toolkit_file.get_file_list(predict_dir) predict_dataset = data_prep.read_img(predictList) print(predict_dataset.shape) predict_dataset = predict_dataset.reshape(predict_dataset.shape[0], predict_dataset.shape[1], predict_dataset.shape[2], -1) # Predict model = load_model(model_name) predict = model.predict(predict_dataset) print(predict) print(np.argmax(predict)) import os import cv2
@logging_manager.logging_to_file def join_number_region(html): regionDict, phoneRegionList = get_city_province(html), get_phone_region( html) phoneNumRegionList = [] for phoneNumRegion in phoneRegionList: regionDict['phone_num_region'] = phoneNumRegion phoneNumRegionList.append(regionDict.copy()) return phoneNumRegionList if __name__ == '__main__': allPhoneNumRegionList = [] for htmlFile in toolkit_file.get_file_list('html'): print('Read ' + htmlFile) with open(htmlFile, encoding='utf-8') as f: html = f.read() allPhoneNumRegionList += join_number_region(html) print('Load to database...') df = pd.DataFrame(allPhoneNumRegionList) with toolkit_sqlite.SqliteDB(DB_FILE) as sqlitedb: sqlitedb.execute('DELETE FROM {}'.format(tableName)) df.to_sql(tableName, con=sqlitedb.conn, index=False, if_exists='append')
def batch_generate(font_file): basename = toolkit_file.get_basename(font_file) for order in char_set: generate_image(font_file, chr(order)).save( os.path.join(training_data_dir, str(order), '{}_{}.jpg'.format(basename, order))) if __name__ == '__main__': ''' Sushanty Fixedsys500c Helvetica ''' purge_models.purge_folders(training_data_dir) config.init_folder() font_list = toolkit_file.get_file_list(font_dir) for fontName in font_list: font = '{}'.format(fontName) print(font) # char = 'd' # generate_image(font, char) # import time # for i in range(20): # generate_image(font, char) # time.sleep(.5) batch_generate(font)
import shutil import jsbeautifier import toolkit_file js_dir = r'' def beautify_js(file, inplace=True): print('Format: ' + file) fileFormatted = file + '.formatted' res = jsbeautifier.beautify_file(file) with open(fileFormatted, 'w', encoding='utf-8') as f: f.write(res) if inplace: shutil.move(fileFormatted, file) if __name__ == '__main__': jsFileList = [ x for x in toolkit_file.get_file_list(js_dir) if x.endswith('.js') ] for file in jsFileList: beautify_js(file)
profile = xcs_profile[xcs_profile['Names']['NAME0']] profile['NAME'] = xcs_profile['Names']['NAME0'] profile['FILE_PATH'] = xcsFile return profile def load_xcs(profileList): '''Load xcs into database''' tableName = config.TABLE_NAME profileDataframe = pd.DataFrame(profileList, columns=config.TABLE_COLONM_ORDER_LIST) # profileDataframe = [] for i in profileDataframe.columns: profileDataframe.rename(columns={i: i.replace('(BOLD)', '_BOLD')}, inplace=True) # print(profileDataframe) conn = sqlite3.connect(config.DB_FILE) profileDataframe.to_sql(name=tableName, if_exists='replace', con=conn, index=True) if __name__ == '__main__': xcsList = [ i for i in toolkit_file.get_file_list(config.XCS_LIB) if i.lower().endswith('.xcs') ] profileList = list(map(read_xcs, xcsList)) load_xcs(profileList)