def download_7scenes(scene_dict, data_dir): for scene in scene_dict: seqs = scene_dict[scene] for seq in seqs: name = '{}_{}_mvs_training'.format(scene,seq) try: ut.download_and_unzip(name,data_dir) except Exception as e: print("Download of {} failed with exception {}".format(name,e))
def __init__(self, config, inspect = False): self.apply_fn = None download_url = "https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/matlab.zip" save_file = "emnist.zip" extract_dir = "emnist" main_data_file = "emnist/matlab/emnist-balanced.mat" if not os.path.exists(main_data_file): utils.download_and_unzip(download_url, save_file, extract_dir) extract_fn = lambda fname: spio.loadmat(fname)["dataset"] self.dataset = extract_fn(main_data_file) config = [[ord(item2) for item2 in item] for item in config] train_x_fn = lambda x: x[0][0][0][0][0][0] train_y_fn = lambda x: x[0][0][0][0][0][1] test_x_fn = lambda x: x[0][0][1][0][0][0] test_y_fn = lambda x: x[0][0][1][0][0][1] labels_fn = lambda: list(range(0x30, 0x39+1))+list(range(0x41, 0x5a+1))+list(range(0x61, 0x7a+1)) reshape_dims = (784,) self.img_fn = lambda x: np.rot90(np.flipud(x.reshape(28, 28)), 3) self.trainx = train_x_fn(self.dataset) self.trainy = train_y_fn(self.dataset) self.testx = test_x_fn(self.dataset) self.testy = test_y_fn(self.dataset) self.labels = labels_fn() self.reshape_dims = reshape_dims self.config = config self.n_tasks = len(config) print("Custom dataset: n_tasks = %d" % self.n_tasks) self.n_classes = len(config[0]) for task in config: assert(len(task) == self.n_classes) print("Custom dataset: n_classes = %d" % self.n_classes) self.tasks = [] self.categorize(all = False, render = inspect) self.curr_idx = 0 self.n = self.tasks[self.curr_idx].n self.tn = self.tasks[self.curr_idx].tn self.batch_size = self.tasks[self.curr_idx].batch_size self.train_x = self.tasks[self.curr_idx].train_x self.train_y = self.tasks[self.curr_idx].train_y self.test_x = self.tasks[self.curr_idx].test_x self.test_y = self.tasks[self.curr_idx].test_y
from sklearn.metrics import confusion_matrix from keras.models import Sequential, Model, load_model from keras.initializers import Initializer, RandomNormal, RandomUniform from keras.layers import Input, Dense, Dropout, Flatten from keras.layers.convolutional import Conv2D, MaxPooling2D from keras.optimizers import SGD, Adadelta, Adagrad from keras.constraints import maxnorm from keras.regularizers import l2 from keras import backend as K print(K.tensorflow_backend._get_available_gpus()) cwd = os.getcwd() fontsPath = cwd + "/fonts" if not os.path.exists(fontsPath): download_and_unzip() fonts = [] for root, dirs, files in os.walk(fontsPath): for e in files: fonts.append(e.split(".")[0]) # fonts = ['PALATINO', 'STYLUS', 'NINA', 'GOUDY'] X_test, X_train, Y_test, Y_train, idx_to_label, label_to_idx = data_load( 0.8, fonts) def get_model(X_train, Y_train, target_shape=153): print(Y_train.shape) model_name = "model_with_target_" + str(target_shape) + ".h5"
import pickle import numpy as np import random import utils import keras.preprocessing.image as io # In[2]: data_url = "https://d17h27t6h515a5.cloudfront.net/topher/2017/February/5898cd6f_traffic-signs-data/traffic-signs-data.zip" dest_dir = "traffic-signs-data" training_file = os.path.join(dest_dir, "train.p") validation_file= os.path.join(dest_dir, "valid.p") testing_file = os.path.join(dest_dir, "test.p") utils.download_and_unzip(data_url, dest_dir, training_file, validation_file, testing_file) # In[3]: # Load pickled data with open(training_file, mode='rb') as f: train = pickle.load(f) with open(validation_file, mode='rb') as f: valid = pickle.load(f) with open(testing_file, mode='rb') as f: test = pickle.load(f) X_train, y_train = train['features'], train['labels']
import boto3 import botocore import constants import utils as ut import argparse if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('name', type=str, help="Name of dataset to be downloaded") parser.add_argument('data_dir', type=str, help="Diretory to download dataset to") args = parser.parse_args() ut.download_and_unzip(args.name, args.data_dir)
def run(self): download_and_unzip(download_url=nhtsa_url, data_dir='data/', target_file=f"nhtsa_raw_{self.job_id}.tsv")
def __init__(self, lang_list, gpu=True, model_storage_directory=None, user_network_directory=None, recog_network='standard', download_enabled=True, detector=True, recognizer=True): """Create an EasyOCR Reader. Parameters: lang_list (list): Language codes (ISO 639) for languages to be recognized during analysis. gpu (bool): Enable GPU support (default) model_storage_directory (string): Path to directory for model data. If not specified, models will be read from a directory as defined by the environment variable EASYOCR_MODULE_PATH (preferred), MODULE_PATH (if defined), or ~/.EasyOCR/. user_network_directory (string): Path to directory for custom network architecture. If not specified, it is as defined by the environment variable EASYOCR_MODULE_PATH (preferred), MODULE_PATH (if defined), or ~/.EasyOCR/. download_enabled (bool): Enabled downloading of model data via HTTP (default). """ self.download_enabled = download_enabled self.model_storage_directory = MODULE_PATH + '/model' if model_storage_directory: self.model_storage_directory = model_storage_directory Path(self.model_storage_directory).mkdir(parents=True, exist_ok=True) self.user_network_directory = MODULE_PATH + '/user_network' if user_network_directory: self.user_network_directory = user_network_directory Path(self.user_network_directory).mkdir(parents=True, exist_ok=True) sys.path.append(self.user_network_directory) if gpu is False: self.device = 'cpu' LOGGER.warning( 'Using CPU. Note: This module is much faster with a GPU.') elif not torch.cuda.is_available(): self.device = 'cpu' LOGGER.warning( 'CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.' ) elif gpu is True: self.device = 'cuda' else: self.device = gpu # check and download detection model corrupt_msg = 'MD5 hash mismatch, possible file corruption' detector_path = os.path.join(self.model_storage_directory, DETECTOR_FILENAME) if os.path.isfile(detector_path) == False: if not self.download_enabled: raise FileNotFoundError("Missing %s and downloads disabled" % detector_path) LOGGER.warning( 'Downloading detection model, please wait. ' 'This may take several minutes depending upon your network connection.' ) download_and_unzip(model_url['detector'][0], DETECTOR_FILENAME, self.model_storage_directory) assert calculate_md5( detector_path) == model_url['detector'][1], corrupt_msg LOGGER.info('Download complete') elif calculate_md5(detector_path) != model_url['detector'][1]: if not self.download_enabled: raise FileNotFoundError( "MD5 mismatch for %s and downloads disabled" % detector_path) LOGGER.warning(corrupt_msg) os.remove(detector_path) LOGGER.warning( 'Re-downloading the detection model, please wait. ' 'This may take several minutes depending upon your network connection.' ) download_and_unzip(model_url['detector'][0], DETECTOR_FILENAME, self.model_storage_directory) assert calculate_md5( detector_path) == model_url['detector'][1], corrupt_msg # recognition model separator_list = {} if recog_network != 'standard': with open( os.path.join(self.user_network_directory, recog_network + '.yaml')) as file: recog_config = yaml.load(file, Loader=yaml.FullLoader) imgH = recog_config['imgH'] available_lang = recog_config['lang_list'] self.setModelLanguage(recog_network, lang_list, available_lang, available_lang) char_file = os.path.join(self.user_network_directory, recog_network + '.txt') with open(char_file, "r", encoding="utf-8-sig") as input_file: list = input_file.read().splitlines() self.character = number + symbol + ''.join(list) model_file = recog_network + '.pth' model_path = os.path.join(self.model_storage_directory, model_file) else: # check available languages unknown_lang = set(lang_list) - set(all_lang_list) if unknown_lang != set(): raise ValueError(unknown_lang, 'is not supported') # choose recognition model if 'th' in lang_list: self.setModelLanguage('thai', lang_list, ['th', 'en'], '["th","en"]') elif 'ch_tra' in lang_list: self.setModelLanguage('chinese_tra', lang_list, ['ch_tra', 'en'], '["ch_tra","en"]') elif 'ch_sim' in lang_list: self.setModelLanguage('chinese_sim', lang_list, ['ch_sim', 'en'], '["ch_sim","en"]') elif 'ja' in lang_list: self.setModelLanguage('japanese', lang_list, ['ja', 'en'], '["ja","en"]') elif 'ko' in lang_list: self.setModelLanguage('korean', lang_list, ['ko', 'en'], '["ko","en"]') elif 'ta' in lang_list: self.setModelLanguage('tamil', lang_list, ['ta', 'en'], '["ta","en"]') elif set(lang_list) & set(bengali_lang_list): self.setModelLanguage('bengali', lang_list, bengali_lang_list + ['en'], '["bn","as","en"]') elif set(lang_list) & set(arabic_lang_list): self.setModelLanguage('arabic', lang_list, arabic_lang_list + ['en'], '["ar","fa","ur","ug","en"]') elif set(lang_list) & set(devanagari_lang_list): self.setModelLanguage('devanagari', lang_list, devanagari_lang_list + ['en'], '["hi","mr","ne","en"]') elif set(lang_list) & set(cyrillic_lang_list): self.setModelLanguage( 'cyrillic', lang_list, cyrillic_lang_list + ['en'], '["ru","rs_cyrillic","be","bg","uk","mn","en"]') else: self.model_lang = 'latin' if self.model_lang == 'latin': self.character = number + symbol + characters['all_char'] model_file = 'latin.pth' elif self.model_lang == 'arabic': self.character = number + symbol + characters[ 'en_char'] + characters['ar_number'] + characters[ 'ar_symbol'] + characters['ar_char'] model_file = 'arabic.pth' elif self.model_lang == 'cyrillic': self.character = number + symbol + characters[ 'en_char'] + characters['cyrillic_char'] model_file = 'cyrillic.pth' elif self.model_lang == 'devanagari': self.character = number + symbol + characters[ 'en_char'] + characters['devanagari_char'] model_file = 'devanagari.pth' elif self.model_lang == 'bengali': self.character = number + symbol + characters[ 'en_char'] + characters['bn_char'] model_file = 'bengali.pth' elif self.model_lang == 'chinese_tra': ch_tra_char = self.getChar("ch_tra_char.txt") self.character = number + symbol + characters[ 'en_char'] + ch_tra_char model_file = 'chinese.pth' elif self.model_lang == 'chinese_sim': ch_sim_char = self.getChar("ch_sim_char.txt") self.character = number + symbol + characters[ 'en_char'] + ch_sim_char model_file = 'chinese_sim.pth' elif self.model_lang == 'japanese': ja_char = self.getChar("ja_char.txt") self.character = number + symbol + characters[ 'en_char'] + ja_char model_file = 'japanese.pth' elif self.model_lang == 'korean': ko_char = self.getChar("ko_char.txt") self.character = number + symbol + characters[ 'en_char'] + ko_char model_file = 'korean.pth' elif self.model_lang == 'tamil': ta_char = self.getChar("ta_char.txt") self.character = number + symbol + characters[ 'en_char'] + ta_char model_file = 'tamil.pth' elif self.model_lang == 'thai': separator_list = { 'th': ['\xa2', '\xa3'], 'en': ['\xa4', '\xa5'] } separator_char = [] for lang, sep in separator_list.items(): separator_char += sep self.character = ''.join(separator_char) + symbol + characters[ 'en_char'] + characters['th_char'] + characters['th_number'] model_file = 'thai.pth' else: LOGGER.error('invalid language') model_path = os.path.join(self.model_storage_directory, model_file) # check recognition model file if os.path.isfile(model_path) == False: if not self.download_enabled: raise FileNotFoundError( "Missing %s and downloads disabled" % model_path) LOGGER.warning( 'Downloading recognition model, please wait. ' 'This may take several minutes depending upon your network connection.' ) download_and_unzip(model_url[model_file][0], model_file, self.model_storage_directory) assert calculate_md5( model_path) == model_url[model_file][1], corrupt_msg LOGGER.info('Download complete.') elif calculate_md5(model_path) != model_url[model_file][1]: if not self.download_enabled: raise FileNotFoundError( "MD5 mismatch for %s and downloads disabled" % model_path) LOGGER.warning(corrupt_msg) os.remove(model_path) LOGGER.warning( 'Re-downloading the recognition model, please wait. ' 'This may take several minutes depending upon your network connection.' ) download_and_unzip(model_url[model_file][0], model_file, self.model_storage_directory) assert calculate_md5( model_path) == model_url[model_file][1], corrupt_msg LOGGER.info('Download complete') self.lang_char = [] for lang in lang_list: char_file = os.path.join(BASE_PATH, 'character', lang + "_char.txt") with open(char_file, "r", encoding="utf-8-sig") as input_file: char_list = input_file.read().splitlines() self.lang_char += char_list self.lang_char = set(self.lang_char).union(set(number + symbol)) self.lang_char = ''.join(self.lang_char) dict_list = {} for lang in lang_list: dict_list[lang] = os.path.join(BASE_PATH, 'dict', lang + ".txt") if detector: self.detector = get_detector(detector_path, self.device) if recognizer: if recog_network == 'standard': network_params = { 'input_channel': 1, 'output_channel': 512, 'hidden_size': 512 } else: network_params = recog_config['network_params'] self.recognizer, self.converter = get_recognizer(recog_network, network_params,\ self.character, separator_list,\ dict_list, model_path, device = self.device)
def prepare_arugment_dataset(is_color=True, s=4): # Download and unzip the file data_url = "https://d17h27t6h515a5.cloudfront.net/topher/2017/February/5898cd6f_traffic-signs-data/traffic-signs-data.zip" dest_dir = "traffic-signs-data" training_file = os.path.join(dest_dir, "train.p") validation_file = os.path.join(dest_dir, "valid.p") testing_file = os.path.join(dest_dir, "test.p") utils.download_and_unzip(data_url, dest_dir, training_file, validation_file, testing_file) # Load pickled data with open(training_file, mode='rb') as f: train = pickle.load(f) with open(validation_file, mode='rb') as f: valid = pickle.load(f) with open(testing_file, mode='rb') as f: test = pickle.load(f) X_train, y_train = train['features'], train['labels'] X_valid, y_valid = valid['features'], valid['labels'] X_test, y_test = test['features'], test['labels'] # Shuffle the datasets X_train, y_train = shuffle(X_train, y_train, random_state=0) X_valid, y_valid = shuffle(X_valid, y_valid, random_state=0) X_test, y_test = shuffle(X_test, y_test, random_state=0) # GCN/ grayscale/ MinMax preprocessing X_train_color, y_train_color = preprocess_and_save( X_train, y_train, True, "traffic-signs-data/train_preprocessed_color.p") X_train_gray, y_train_gray = preprocess_and_save( X_train, y_train, False, "traffic-signs-data/train_preprocessed_gray.p") X_valid_color, y_valid_color = preprocess_and_save( X_valid, y_valid, True, "traffic-signs-data/valid_preprocessed_color.p", True, False) X_valid_gray, y_valid_gray = preprocess_and_save( X_valid, y_valid, False, "traffic-signs-data/valid_preprocessed_gray.p", True, False) X_test_color, y_test_color = preprocess_and_save( X_test, y_test, True, "traffic-signs-data/test_preprocessed_color.p") X_test_gray, y_test_gray = preprocess_and_save( X_test, y_test, False, "traffic-signs-data/test_preprocessed_gray.p") # Augment the examples if is_color: X_train_augmented_color, y_train_augmented_color = augment_examples( X_train_color, y_train_color, s, dataset_name="Train_augmented (color)") X_train_large_color = np.concatenate( (X_train_color, X_train_augmented_color), axis=0) y_train_large_color = np.concatenate( (y_train_color, y_train_augmented_color), axis=0) X_train_large_color, y_train_large_color = shuffle(X_train_large_color, y_train_large_color, random_state=42) utils.get_stats(X_train_large_color, y_train_large_color, "Original + Train_augmented (color)") return X_train_large_color, y_train_large_color, X_valid_color, y_valid_color, \ X_test_color, y_test_color if not is_color: X_train_augmented_gray, y_train_augmented_gray = augment_examples( X_train_gray, y_train_gray, s, dataset_name="Train_augmented (gray)") X_train_large_gray = np.concatenate( (X_train_gray, X_train_augmented_gray), axis=0) y_train_large_gray = np.concatenate( (y_train_gray, y_train_augmented_gray), axis=0) X_train_large_gray, y_train_large_gray = shuffle(X_train_large_gray, y_train_large_gray, random_state=42) utils.get_stats(X_train_large_gray, y_train_large_gray, "Original + Train_augmented (gray)") return X_train_large_gray, y_train_large_gray, X_valid_gray, y_valid_gray, \ X_test_gray, y_test_gray
angle_adjustment = float(args[2]) p_zeros_samples_to_exclude = float(args[3]) p_near_zeros_samples_to_exclude = float(args[4]) # create data folder if needed data_folder = parameters.data_folder if not os.path.isdir(data_folder): os.mkdir(data_folder) # download data if needed then loads it speed_list = [] paths_list = [] y_list = [] for destination_folder, url in zip(parameters.data_folders_list, parameters.urls_list): if not os.path.isdir(data_folder + destination_folder): utils.download_and_unzip(url, data_folder, destination_folder) _, y, paths, speed = utils.load_data(data_folder + destination_folder, return_images=False) speed_list.append(speed) paths_list.append(paths) y_list.append(y) # concatenate data speed = np.concatenate(speed_list) paths = np.concatenate(paths_list) y = np.concatenate(y_list) # remove low speed data min_speed = 15 mask = speed > min_speed paths = paths[mask] y = y[mask]
def __init__(self, lang_list, gpu=True, model_storage_directory=None, user_network_directory=None, recog_network='standard', download_enabled=True, detector=True, recognizer=True, verbose=True, quantize=True): """Create an EasyOCR Reader. Parameters: lang_list (list): Language codes (ISO 639) for languages to be recognized during analysis. gpu (bool): Enable GPU support (default) model_storage_directory (string): Path to directory for model data. If not specified, models will be read from a directory as defined by the environment variable EASYOCR_MODULE_PATH (preferred), MODULE_PATH (if defined), or ~/.EasyOCR/. user_network_directory (string): Path to directory for custom network architecture. If not specified, it is as defined by the environment variable EASYOCR_MODULE_PATH (preferred), MODULE_PATH (if defined), or ~/.EasyOCR/. download_enabled (bool): Enabled downloading of model data via HTTP (default). """ self.download_enabled = download_enabled self.model_storage_directory = MODULE_PATH + '/model' if model_storage_directory: self.model_storage_directory = model_storage_directory Path(self.model_storage_directory).mkdir(parents=True, exist_ok=True) self.user_network_directory = MODULE_PATH + '/user_network' if user_network_directory: self.user_network_directory = user_network_directory Path(self.user_network_directory).mkdir(parents=True, exist_ok=True) sys.path.append(self.user_network_directory) if gpu is False: self.device = 'cpu' LOGGER.warning( 'Using CPU. Note: This module is much faster with a GPU.') elif not torch.cuda.is_available(): self.device = 'cpu' LOGGER.warning( 'CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.' ) elif gpu is True: self.device = 'cuda' else: self.device = gpu # check and download detection model detector_model = 'craft' corrupt_msg = 'MD5 hash mismatch, possible file corruption' detector_path = os.path.join( self.model_storage_directory, detection_models[detector_model]['filename']) if detector: if os.path.isfile(detector_path) == False: if not self.download_enabled: raise FileNotFoundError( "Missing %s and downloads disabled" % detector_path) LOGGER.warning( 'Downloading detection model, please wait. ' 'This may take several minutes depending upon your network connection.' ) download_and_unzip( detection_models[detector_model]['url'], detection_models[detector_model]['filename'], self.model_storage_directory, verbose) assert calculate_md5(detector_path) == detection_models[ detector_model]['filesize'], corrupt_msg LOGGER.info('Download complete') elif calculate_md5(detector_path) != detection_models[ detector_model]['filesize']: if not self.download_enabled: raise FileNotFoundError( "MD5 mismatch for %s and downloads disabled" % detector_path) LOGGER.warning(corrupt_msg) os.remove(detector_path) LOGGER.warning( 'Re-downloading the detection model, please wait. ' 'This may take several minutes depending upon your network connection.' ) download_and_unzip( detection_models[detector_model]['url'], detection_models[detector_model]['filename'], self.model_storage_directory, verbose) assert calculate_md5(detector_path) == detection_models[ detector_model]['filesize'], corrupt_msg # recognition model separator_list = {} if recog_network in ['standard'] + [ model for model in recognition_models['gen1'] ] + [model for model in recognition_models['gen2']]: if recog_network in [ model for model in recognition_models['gen1'] ]: model = recognition_models['gen1'][recog_network] recog_network = 'generation1' self.model_lang = model['model_script'] elif recog_network in [ model for model in recognition_models['gen2'] ]: model = recognition_models['gen2'][recog_network] recog_network = 'generation2' self.model_lang = model['model_script'] else: # auto-detect unknown_lang = set(lang_list) - set(all_lang_list) if unknown_lang != set(): raise ValueError(unknown_lang, 'is not supported') # choose recognition model if lang_list == ['en']: self.setModelLanguage('english', lang_list, ['en'], '["en"]') model = recognition_models['gen2']['english_g2'] recog_network = 'generation2' elif 'th' in lang_list: self.setModelLanguage('thai', lang_list, ['th', 'en'], '["th","en"]') model = recognition_models['gen1']['thai_g1'] recog_network = 'generation1' elif 'ch_tra' in lang_list: self.setModelLanguage('chinese_tra', lang_list, ['ch_tra', 'en'], '["ch_tra","en"]') model = recognition_models['gen1']['zh_tra_g1'] recog_network = 'generation1' elif 'ch_sim' in lang_list: self.setModelLanguage('chinese_sim', lang_list, ['ch_sim', 'en'], '["ch_sim","en"]') model = recognition_models['gen2']['zh_sim_g2'] recog_network = 'generation2' elif 'ja' in lang_list: self.setModelLanguage('japanese', lang_list, ['ja', 'en'], '["ja","en"]') model = recognition_models['gen2']['japanese_g2'] recog_network = 'generation2' elif 'ko' in lang_list: self.setModelLanguage('korean', lang_list, ['ko', 'en'], '["ko","en"]') model = recognition_models['gen2']['korean_g2'] recog_network = 'generation2' elif 'ta' in lang_list: self.setModelLanguage('tamil', lang_list, ['ta', 'en'], '["ta","en"]') model = recognition_models['gen1']['tamil_g1'] recog_network = 'generation1' elif 'te' in lang_list: self.setModelLanguage('telugu', lang_list, ['te', 'en'], '["te","en"]') model = recognition_models['gen2']['telugu_g2'] recog_network = 'generation2' elif 'kn' in lang_list: self.setModelLanguage('kannada', lang_list, ['kn', 'en'], '["kn","en"]') model = recognition_models['gen2']['kannada_g2'] recog_network = 'generation2' elif set(lang_list) & set(bengali_lang_list): self.setModelLanguage('bengali', lang_list, bengali_lang_list + ['en'], '["bn","as","en"]') model = recognition_models['gen1']['bengali_g1'] recog_network = 'generation1' elif set(lang_list) & set(arabic_lang_list): self.setModelLanguage('arabic', lang_list, arabic_lang_list + ['en'], '["ar","fa","ur","ug","en"]') model = recognition_models['gen1']['arabic_g1'] recog_network = 'generation1' elif set(lang_list) & set(devanagari_lang_list): self.setModelLanguage('devanagari', lang_list, devanagari_lang_list + ['en'], '["hi","mr","ne","en"]') model = recognition_models['gen1']['devanagari_g1'] recog_network = 'generation1' elif set(lang_list) & set(cyrillic_lang_list): self.setModelLanguage( 'cyrillic', lang_list, cyrillic_lang_list + ['en'], '["ru","rs_cyrillic","be","bg","uk","mn","en"]') model = recognition_models['gen1']['cyrillic_g1'] recog_network = 'generation1' else: self.model_lang = 'latin' model = recognition_models['gen2']['latin_g2'] recog_network = 'generation2' self.character = model['characters'] model_path = os.path.join(self.model_storage_directory, model['filename']) # check recognition model file if recognizer: if os.path.isfile(model_path) == False: if not self.download_enabled: raise FileNotFoundError( "Missing %s and downloads disabled" % model_path) LOGGER.warning( 'Downloading recognition model, please wait. ' 'This may take several minutes depending upon your network connection.' ) download_and_unzip(model['url'], model['filename'], self.model_storage_directory, verbose) assert calculate_md5( model_path) == model['filesize'], corrupt_msg LOGGER.info('Download complete.') elif calculate_md5(model_path) != model['filesize']: if not self.download_enabled: raise FileNotFoundError( "MD5 mismatch for %s and downloads disabled" % model_path) LOGGER.warning(corrupt_msg) os.remove(model_path) LOGGER.warning( 'Re-downloading the recognition model, please wait. ' 'This may take several minutes depending upon your network connection.' ) download_and_unzip(model['url'], model['filename'], self.model_storage_directory, verbose) assert calculate_md5( model_path) == model['filesize'], corrupt_msg LOGGER.info('Download complete') self.setLanguageList(lang_list, model) else: # user-defined model with open( os.path.join(self.user_network_directory, recog_network + '.yaml')) as file: recog_config = yaml.load(file, Loader=yaml.FullLoader) imgH = recog_config['imgH'] available_lang = recog_config['lang_list'] self.setModelLanguage(recog_network, lang_list, available_lang, available_lang) #char_file = os.path.join(self.user_network_directory, recog_network+ '.txt') self.character = recog_config['character_list'] model_file = recog_network + '.pth' model_path = os.path.join(self.model_storage_directory, model_file) self.setLanguageList(lang_list, None) dict_list = {} for lang in lang_list: dict_list[lang] = os.path.join(BASE_PATH, 'dict', lang + ".txt") if detector: self.detector = get_detector(detector_path, self.device, quantize) if recognizer: if recog_network == 'generation1': network_params = { 'input_channel': 1, 'output_channel': 512, 'hidden_size': 512 } elif recog_network == 'generation2': network_params = { 'input_channel': 1, 'output_channel': 256, 'hidden_size': 256 } else: network_params = recog_config['network_params'] self.recognizer, self.converter = get_recognizer(recog_network, network_params,\ self.character, separator_list,\ dict_list, model_path, device = self.device, quantize=quantize)