def load_table_in_chunks():
    logging.info('Purge table {}'.format(tableName))
    with toolkit_sqlite.SqliteDB(DB_FILE) as sqlitedb:
        sqlitedb.execute('DELETE FROM {}'.format(tableName))

    fileList = toolkit_file.get_file_list(SOURCE_FOLDER)

    chunk_size = 100000
    start_number = 1
    file_count = len(toolkit_file.get_file_list(SOURCE_FOLDER))
    logging.info('Read {} files'.format(file_count))
    for chunk in toolkit_text.chunks(toolkit_file.get_file_list(SOURCE_FOLDER),
                                     chunk_size):
        logging.info('Loading {}-{}'.format(
            start_number, min(start_number + chunk_size - 1, file_count)))
        batch_load_to_table(chunk)
        start_number += chunk_size
예제 #2
0
def generate_file_list(dataset_dir):
    imgFileList = [
        x for x in toolkit_file.get_file_list(dataset_dir)
        if x.endswith('.jpg')
    ]
    # print(fileList)

    dataset_dict_list = []

    for file in imgFileList:
        pic_id = int(
            toolkit_file.get_basename(file, withExtension=False).replace(
                'image_', ''))
        group_id = (pic_id - 1) // 80
        dataset_dict_list.append({
            'pic_id': pic_id,
            'group_id': group_id,
            'image_path': file
        })
    return dataset_dict_list
예제 #3
0
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
from keras.models import load_model

import toolkit_file

import data_prep
import config

model_name = config.MODEL_NAME

predict_dir = 'predict'

predictList = toolkit_file.get_file_list(predict_dir)
predict_dataset = data_prep.read_img(predictList)
print(predict_dataset.shape)
predict_dataset = predict_dataset.reshape(predict_dataset.shape[0],
                                          predict_dataset.shape[1],
                                          predict_dataset.shape[2], -1)

# Predict
model = load_model(model_name)
predict = model.predict(predict_dataset)

print(predict)
print(np.argmax(predict))

import os
import cv2
예제 #4
0
@logging_manager.logging_to_file
def join_number_region(html):
    regionDict, phoneRegionList = get_city_province(html), get_phone_region(
        html)
    phoneNumRegionList = []
    for phoneNumRegion in phoneRegionList:
        regionDict['phone_num_region'] = phoneNumRegion
        phoneNumRegionList.append(regionDict.copy())

    return phoneNumRegionList


if __name__ == '__main__':
    allPhoneNumRegionList = []

    for htmlFile in toolkit_file.get_file_list('html'):
        print('Read ' + htmlFile)
        with open(htmlFile, encoding='utf-8') as f:
            html = f.read()
        allPhoneNumRegionList += join_number_region(html)

    print('Load to database...')
    df = pd.DataFrame(allPhoneNumRegionList)
    with toolkit_sqlite.SqliteDB(DB_FILE) as sqlitedb:
        sqlitedb.execute('DELETE FROM {}'.format(tableName))
        df.to_sql(tableName,
                  con=sqlitedb.conn,
                  index=False,
                  if_exists='append')
def batch_generate(font_file):
    basename = toolkit_file.get_basename(font_file)
    for order in char_set:
        generate_image(font_file, chr(order)).save(
            os.path.join(training_data_dir, str(order),
                         '{}_{}.jpg'.format(basename, order)))


if __name__ == '__main__':
    '''
    Sushanty
    Fixedsys500c
    Helvetica
    '''
    purge_models.purge_folders(training_data_dir)
    config.init_folder()
    font_list = toolkit_file.get_file_list(font_dir)
    for fontName in font_list:
        font = '{}'.format(fontName)
        print(font)
        # char = 'd'
        # generate_image(font, char)

        # import time
        # for i in range(20):
        #     generate_image(font, char)
        #     time.sleep(.5)

        batch_generate(font)
예제 #6
0
import shutil
import jsbeautifier
import toolkit_file

js_dir = r''


def beautify_js(file, inplace=True):
    print('Format: ' + file)
    fileFormatted = file + '.formatted'
    res = jsbeautifier.beautify_file(file)
    with open(fileFormatted, 'w', encoding='utf-8') as f:
        f.write(res)
    if inplace:
        shutil.move(fileFormatted, file)


if __name__ == '__main__':
    jsFileList = [
        x for x in toolkit_file.get_file_list(js_dir) if x.endswith('.js')
    ]
    for file in jsFileList:
        beautify_js(file)
예제 #7
0
    profile = xcs_profile[xcs_profile['Names']['NAME0']]
    profile['NAME'] = xcs_profile['Names']['NAME0']
    profile['FILE_PATH'] = xcsFile
    return profile


def load_xcs(profileList):
    '''Load xcs into database'''
    tableName = config.TABLE_NAME
    profileDataframe = pd.DataFrame(profileList,
                                    columns=config.TABLE_COLONM_ORDER_LIST)
    # profileDataframe = []
    for i in profileDataframe.columns:
        profileDataframe.rename(columns={i: i.replace('(BOLD)', '_BOLD')},
                                inplace=True)
    # print(profileDataframe)
    conn = sqlite3.connect(config.DB_FILE)
    profileDataframe.to_sql(name=tableName,
                            if_exists='replace',
                            con=conn,
                            index=True)


if __name__ == '__main__':
    xcsList = [
        i for i in toolkit_file.get_file_list(config.XCS_LIB)
        if i.lower().endswith('.xcs')
    ]
    profileList = list(map(read_xcs, xcsList))
    load_xcs(profileList)