Exemple #1
0
def download_7scenes(scene_dict, data_dir):
    for scene in scene_dict:
        seqs = scene_dict[scene]
        for seq in seqs:
            name = '{}_{}_mvs_training'.format(scene,seq)
            try:
                ut.download_and_unzip(name,data_dir)
            except Exception as e:
                print("Download of {} failed with exception {}".format(name,e))
Exemple #2
0
    def __init__(self, config, inspect = False):
        self.apply_fn = None

        download_url = "https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/matlab.zip"
        save_file = "emnist.zip"
        extract_dir = "emnist"
        main_data_file = "emnist/matlab/emnist-balanced.mat"
        if not os.path.exists(main_data_file):
            utils.download_and_unzip(download_url, save_file, extract_dir)
        extract_fn = lambda fname: spio.loadmat(fname)["dataset"]
        self.dataset = extract_fn(main_data_file)
        config = [[ord(item2) for item2 in item] for item in config]
        train_x_fn = lambda x: x[0][0][0][0][0][0]
        train_y_fn = lambda x: x[0][0][0][0][0][1]
        test_x_fn = lambda x: x[0][0][1][0][0][0]
        test_y_fn = lambda x: x[0][0][1][0][0][1]
        labels_fn = lambda: list(range(0x30, 0x39+1))+list(range(0x41, 0x5a+1))+list(range(0x61, 0x7a+1))
        reshape_dims = (784,)
        self.img_fn = lambda x: np.rot90(np.flipud(x.reshape(28, 28)), 3)

        self.trainx = train_x_fn(self.dataset)
        self.trainy = train_y_fn(self.dataset)
        self.testx = test_x_fn(self.dataset)
        self.testy = test_y_fn(self.dataset)
        self.labels = labels_fn()
        self.reshape_dims = reshape_dims

        self.config = config
        self.n_tasks = len(config)
        print("Custom dataset: n_tasks = %d" % self.n_tasks)
        self.n_classes = len(config[0])
        for task in config: assert(len(task) == self.n_classes)
        print("Custom dataset: n_classes = %d" % self.n_classes)

        self.tasks = []
        self.categorize(all = False, render = inspect)
        self.curr_idx = 0
        self.n = self.tasks[self.curr_idx].n
        self.tn = self.tasks[self.curr_idx].tn
        self.batch_size = self.tasks[self.curr_idx].batch_size
        self.train_x = self.tasks[self.curr_idx].train_x
        self.train_y = self.tasks[self.curr_idx].train_y
        self.test_x = self.tasks[self.curr_idx].test_x
        self.test_y = self.tasks[self.curr_idx].test_y
Exemple #3
0
from sklearn.metrics import confusion_matrix
from keras.models import Sequential, Model, load_model
from keras.initializers import Initializer, RandomNormal, RandomUniform
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD, Adadelta, Adagrad
from keras.constraints import maxnorm
from keras.regularizers import l2
from keras import backend as K
print(K.tensorflow_backend._get_available_gpus())

cwd = os.getcwd()
fontsPath = cwd + "/fonts"

if not os.path.exists(fontsPath):
    download_and_unzip()

fonts = []
for root, dirs, files in os.walk(fontsPath):
    for e in files:
        fonts.append(e.split(".")[0])

# fonts = ['PALATINO', 'STYLUS', 'NINA', 'GOUDY']

X_test, X_train, Y_test, Y_train, idx_to_label, label_to_idx = data_load(
    0.8, fonts)


def get_model(X_train, Y_train, target_shape=153):
    print(Y_train.shape)
    model_name = "model_with_target_" + str(target_shape) + ".h5"
Exemple #4
0
import pickle
import numpy as np
import random
import utils
import keras.preprocessing.image as io


# In[2]:


data_url = "https://d17h27t6h515a5.cloudfront.net/topher/2017/February/5898cd6f_traffic-signs-data/traffic-signs-data.zip"
dest_dir = "traffic-signs-data"
training_file = os.path.join(dest_dir, "train.p")
validation_file= os.path.join(dest_dir, "valid.p")
testing_file = os.path.join(dest_dir, "test.p")
utils.download_and_unzip(data_url, dest_dir, training_file, validation_file, testing_file)


# In[3]:


# Load pickled data

with open(training_file, mode='rb') as f:
    train = pickle.load(f)
with open(validation_file, mode='rb') as f:
    valid = pickle.load(f)
with open(testing_file, mode='rb') as f:
    test = pickle.load(f)

X_train, y_train = train['features'], train['labels']
Exemple #5
0
import boto3
import botocore
import constants
import utils as ut
import argparse

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('name',
                        type=str,
                        help="Name of dataset to be downloaded")
    parser.add_argument('data_dir',
                        type=str,
                        help="Diretory to download dataset to")
    args = parser.parse_args()
    ut.download_and_unzip(args.name, args.data_dir)
Exemple #6
0
 def run(self):
     download_and_unzip(download_url=nhtsa_url,
                        data_dir='data/',
                        target_file=f"nhtsa_raw_{self.job_id}.tsv")
Exemple #7
0
    def __init__(self,
                 lang_list,
                 gpu=True,
                 model_storage_directory=None,
                 user_network_directory=None,
                 recog_network='standard',
                 download_enabled=True,
                 detector=True,
                 recognizer=True):
        """Create an EasyOCR Reader.

        Parameters:
            lang_list (list): Language codes (ISO 639) for languages to be recognized during analysis.

            gpu (bool): Enable GPU support (default)

            model_storage_directory (string): Path to directory for model data. If not specified,
            models will be read from a directory as defined by the environment variable
            EASYOCR_MODULE_PATH (preferred), MODULE_PATH (if defined), or ~/.EasyOCR/.

            user_network_directory (string): Path to directory for custom network architecture.
            If not specified, it is as defined by the environment variable
            EASYOCR_MODULE_PATH (preferred), MODULE_PATH (if defined), or ~/.EasyOCR/.

            download_enabled (bool): Enabled downloading of model data via HTTP (default).
        """
        self.download_enabled = download_enabled

        self.model_storage_directory = MODULE_PATH + '/model'
        if model_storage_directory:
            self.model_storage_directory = model_storage_directory
        Path(self.model_storage_directory).mkdir(parents=True, exist_ok=True)

        self.user_network_directory = MODULE_PATH + '/user_network'
        if user_network_directory:
            self.user_network_directory = user_network_directory
        Path(self.user_network_directory).mkdir(parents=True, exist_ok=True)
        sys.path.append(self.user_network_directory)

        if gpu is False:
            self.device = 'cpu'
            LOGGER.warning(
                'Using CPU. Note: This module is much faster with a GPU.')
        elif not torch.cuda.is_available():
            self.device = 'cpu'
            LOGGER.warning(
                'CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.'
            )
        elif gpu is True:
            self.device = 'cuda'
        else:
            self.device = gpu

        # check and download detection model
        corrupt_msg = 'MD5 hash mismatch, possible file corruption'
        detector_path = os.path.join(self.model_storage_directory,
                                     DETECTOR_FILENAME)
        if os.path.isfile(detector_path) == False:
            if not self.download_enabled:
                raise FileNotFoundError("Missing %s and downloads disabled" %
                                        detector_path)
            LOGGER.warning(
                'Downloading detection model, please wait. '
                'This may take several minutes depending upon your network connection.'
            )
            download_and_unzip(model_url['detector'][0], DETECTOR_FILENAME,
                               self.model_storage_directory)
            assert calculate_md5(
                detector_path) == model_url['detector'][1], corrupt_msg
            LOGGER.info('Download complete')
        elif calculate_md5(detector_path) != model_url['detector'][1]:
            if not self.download_enabled:
                raise FileNotFoundError(
                    "MD5 mismatch for %s and downloads disabled" %
                    detector_path)
            LOGGER.warning(corrupt_msg)
            os.remove(detector_path)
            LOGGER.warning(
                'Re-downloading the detection model, please wait. '
                'This may take several minutes depending upon your network connection.'
            )
            download_and_unzip(model_url['detector'][0], DETECTOR_FILENAME,
                               self.model_storage_directory)
            assert calculate_md5(
                detector_path) == model_url['detector'][1], corrupt_msg

        # recognition model
        separator_list = {}
        if recog_network != 'standard':
            with open(
                    os.path.join(self.user_network_directory,
                                 recog_network + '.yaml')) as file:
                recog_config = yaml.load(file, Loader=yaml.FullLoader)
            imgH = recog_config['imgH']
            available_lang = recog_config['lang_list']
            self.setModelLanguage(recog_network, lang_list, available_lang,
                                  available_lang)
            char_file = os.path.join(self.user_network_directory,
                                     recog_network + '.txt')
            with open(char_file, "r", encoding="utf-8-sig") as input_file:
                list = input_file.read().splitlines()
                self.character = number + symbol + ''.join(list)
            model_file = recog_network + '.pth'
            model_path = os.path.join(self.model_storage_directory, model_file)
        else:
            # check available languages
            unknown_lang = set(lang_list) - set(all_lang_list)
            if unknown_lang != set():
                raise ValueError(unknown_lang, 'is not supported')

            # choose recognition model
            if 'th' in lang_list:
                self.setModelLanguage('thai', lang_list, ['th', 'en'],
                                      '["th","en"]')
            elif 'ch_tra' in lang_list:
                self.setModelLanguage('chinese_tra', lang_list,
                                      ['ch_tra', 'en'], '["ch_tra","en"]')
            elif 'ch_sim' in lang_list:
                self.setModelLanguage('chinese_sim', lang_list,
                                      ['ch_sim', 'en'], '["ch_sim","en"]')
            elif 'ja' in lang_list:
                self.setModelLanguage('japanese', lang_list, ['ja', 'en'],
                                      '["ja","en"]')
            elif 'ko' in lang_list:
                self.setModelLanguage('korean', lang_list, ['ko', 'en'],
                                      '["ko","en"]')
            elif 'ta' in lang_list:
                self.setModelLanguage('tamil', lang_list, ['ta', 'en'],
                                      '["ta","en"]')
            elif set(lang_list) & set(bengali_lang_list):
                self.setModelLanguage('bengali', lang_list,
                                      bengali_lang_list + ['en'],
                                      '["bn","as","en"]')
            elif set(lang_list) & set(arabic_lang_list):
                self.setModelLanguage('arabic', lang_list,
                                      arabic_lang_list + ['en'],
                                      '["ar","fa","ur","ug","en"]')
            elif set(lang_list) & set(devanagari_lang_list):
                self.setModelLanguage('devanagari', lang_list,
                                      devanagari_lang_list + ['en'],
                                      '["hi","mr","ne","en"]')
            elif set(lang_list) & set(cyrillic_lang_list):
                self.setModelLanguage(
                    'cyrillic', lang_list, cyrillic_lang_list + ['en'],
                    '["ru","rs_cyrillic","be","bg","uk","mn","en"]')
            else:
                self.model_lang = 'latin'

            if self.model_lang == 'latin':
                self.character = number + symbol + characters['all_char']
                model_file = 'latin.pth'
            elif self.model_lang == 'arabic':
                self.character = number + symbol + characters[
                    'en_char'] + characters['ar_number'] + characters[
                        'ar_symbol'] + characters['ar_char']
                model_file = 'arabic.pth'
            elif self.model_lang == 'cyrillic':
                self.character = number + symbol + characters[
                    'en_char'] + characters['cyrillic_char']
                model_file = 'cyrillic.pth'
            elif self.model_lang == 'devanagari':
                self.character = number + symbol + characters[
                    'en_char'] + characters['devanagari_char']
                model_file = 'devanagari.pth'
            elif self.model_lang == 'bengali':
                self.character = number + symbol + characters[
                    'en_char'] + characters['bn_char']
                model_file = 'bengali.pth'
            elif self.model_lang == 'chinese_tra':
                ch_tra_char = self.getChar("ch_tra_char.txt")
                self.character = number + symbol + characters[
                    'en_char'] + ch_tra_char
                model_file = 'chinese.pth'
            elif self.model_lang == 'chinese_sim':
                ch_sim_char = self.getChar("ch_sim_char.txt")
                self.character = number + symbol + characters[
                    'en_char'] + ch_sim_char
                model_file = 'chinese_sim.pth'
            elif self.model_lang == 'japanese':
                ja_char = self.getChar("ja_char.txt")
                self.character = number + symbol + characters[
                    'en_char'] + ja_char
                model_file = 'japanese.pth'
            elif self.model_lang == 'korean':
                ko_char = self.getChar("ko_char.txt")
                self.character = number + symbol + characters[
                    'en_char'] + ko_char
                model_file = 'korean.pth'
            elif self.model_lang == 'tamil':
                ta_char = self.getChar("ta_char.txt")
                self.character = number + symbol + characters[
                    'en_char'] + ta_char
                model_file = 'tamil.pth'
            elif self.model_lang == 'thai':
                separator_list = {
                    'th': ['\xa2', '\xa3'],
                    'en': ['\xa4', '\xa5']
                }
                separator_char = []
                for lang, sep in separator_list.items():
                    separator_char += sep
                self.character = ''.join(separator_char) + symbol + characters[
                    'en_char'] + characters['th_char'] + characters['th_number']
                model_file = 'thai.pth'
            else:
                LOGGER.error('invalid language')

            model_path = os.path.join(self.model_storage_directory, model_file)
            # check recognition model file
            if os.path.isfile(model_path) == False:
                if not self.download_enabled:
                    raise FileNotFoundError(
                        "Missing %s and downloads disabled" % model_path)
                LOGGER.warning(
                    'Downloading recognition model, please wait. '
                    'This may take several minutes depending upon your network connection.'
                )
                download_and_unzip(model_url[model_file][0], model_file,
                                   self.model_storage_directory)
                assert calculate_md5(
                    model_path) == model_url[model_file][1], corrupt_msg
                LOGGER.info('Download complete.')
            elif calculate_md5(model_path) != model_url[model_file][1]:
                if not self.download_enabled:
                    raise FileNotFoundError(
                        "MD5 mismatch for %s and downloads disabled" %
                        model_path)
                LOGGER.warning(corrupt_msg)
                os.remove(model_path)
                LOGGER.warning(
                    'Re-downloading the recognition model, please wait. '
                    'This may take several minutes depending upon your network connection.'
                )
                download_and_unzip(model_url[model_file][0], model_file,
                                   self.model_storage_directory)
                assert calculate_md5(
                    model_path) == model_url[model_file][1], corrupt_msg
                LOGGER.info('Download complete')

        self.lang_char = []
        for lang in lang_list:
            char_file = os.path.join(BASE_PATH, 'character',
                                     lang + "_char.txt")
            with open(char_file, "r", encoding="utf-8-sig") as input_file:
                char_list = input_file.read().splitlines()
            self.lang_char += char_list
        self.lang_char = set(self.lang_char).union(set(number + symbol))
        self.lang_char = ''.join(self.lang_char)

        dict_list = {}
        for lang in lang_list:
            dict_list[lang] = os.path.join(BASE_PATH, 'dict', lang + ".txt")

        if detector:
            self.detector = get_detector(detector_path, self.device)
        if recognizer:
            if recog_network == 'standard':
                network_params = {
                    'input_channel': 1,
                    'output_channel': 512,
                    'hidden_size': 512
                }
            else:
                network_params = recog_config['network_params']
            self.recognizer, self.converter = get_recognizer(recog_network, network_params,\
                                                         self.character, separator_list,\
                                                         dict_list, model_path, device = self.device)
Exemple #8
0
def prepare_arugment_dataset(is_color=True, s=4):

    # Download and unzip the file
    data_url = "https://d17h27t6h515a5.cloudfront.net/topher/2017/February/5898cd6f_traffic-signs-data/traffic-signs-data.zip"
    dest_dir = "traffic-signs-data"
    training_file = os.path.join(dest_dir, "train.p")
    validation_file = os.path.join(dest_dir, "valid.p")
    testing_file = os.path.join(dest_dir, "test.p")
    utils.download_and_unzip(data_url, dest_dir, training_file,
                             validation_file, testing_file)

    # Load pickled data
    with open(training_file, mode='rb') as f:
        train = pickle.load(f)
    with open(validation_file, mode='rb') as f:
        valid = pickle.load(f)
    with open(testing_file, mode='rb') as f:
        test = pickle.load(f)

    X_train, y_train = train['features'], train['labels']
    X_valid, y_valid = valid['features'], valid['labels']
    X_test, y_test = test['features'], test['labels']

    # Shuffle the datasets
    X_train, y_train = shuffle(X_train, y_train, random_state=0)
    X_valid, y_valid = shuffle(X_valid, y_valid, random_state=0)
    X_test, y_test = shuffle(X_test, y_test, random_state=0)

    # GCN/ grayscale/ MinMax preprocessing
    X_train_color, y_train_color = preprocess_and_save(
        X_train, y_train, True,
        "traffic-signs-data/train_preprocessed_color.p")

    X_train_gray, y_train_gray = preprocess_and_save(
        X_train, y_train, False,
        "traffic-signs-data/train_preprocessed_gray.p")

    X_valid_color, y_valid_color = preprocess_and_save(
        X_valid, y_valid, True,
        "traffic-signs-data/valid_preprocessed_color.p", True, False)

    X_valid_gray, y_valid_gray = preprocess_and_save(
        X_valid, y_valid, False,
        "traffic-signs-data/valid_preprocessed_gray.p", True, False)

    X_test_color, y_test_color = preprocess_and_save(
        X_test, y_test, True, "traffic-signs-data/test_preprocessed_color.p")

    X_test_gray, y_test_gray = preprocess_and_save(
        X_test, y_test, False, "traffic-signs-data/test_preprocessed_gray.p")

    # Augment the examples
    if is_color:
        X_train_augmented_color, y_train_augmented_color = augment_examples(
            X_train_color,
            y_train_color,
            s,
            dataset_name="Train_augmented (color)")
        X_train_large_color = np.concatenate(
            (X_train_color, X_train_augmented_color), axis=0)
        y_train_large_color = np.concatenate(
            (y_train_color, y_train_augmented_color), axis=0)
        X_train_large_color, y_train_large_color = shuffle(X_train_large_color,
                                                           y_train_large_color,
                                                           random_state=42)
        utils.get_stats(X_train_large_color, y_train_large_color,
                        "Original + Train_augmented (color)")
        return X_train_large_color, y_train_large_color, X_valid_color, y_valid_color, \
                X_test_color, y_test_color

    if not is_color:
        X_train_augmented_gray, y_train_augmented_gray = augment_examples(
            X_train_gray,
            y_train_gray,
            s,
            dataset_name="Train_augmented (gray)")
        X_train_large_gray = np.concatenate(
            (X_train_gray, X_train_augmented_gray), axis=0)
        y_train_large_gray = np.concatenate(
            (y_train_gray, y_train_augmented_gray), axis=0)
        X_train_large_gray, y_train_large_gray = shuffle(X_train_large_gray,
                                                         y_train_large_gray,
                                                         random_state=42)
        utils.get_stats(X_train_large_gray, y_train_large_gray,
                        "Original + Train_augmented (gray)")
        return X_train_large_gray, y_train_large_gray, X_valid_gray, y_valid_gray, \
                X_test_gray, y_test_gray
Exemple #9
0
    angle_adjustment = float(args[2])
    p_zeros_samples_to_exclude = float(args[3])
    p_near_zeros_samples_to_exclude = float(args[4])

# create data folder if needed
data_folder = parameters.data_folder
if not os.path.isdir(data_folder):
    os.mkdir(data_folder)

# download data if needed then loads it
speed_list = []
paths_list = []
y_list = []
for destination_folder, url in zip(parameters.data_folders_list, parameters.urls_list):
    if not os.path.isdir(data_folder + destination_folder):
        utils.download_and_unzip(url, data_folder, destination_folder)
    _, y, paths, speed = utils.load_data(data_folder + destination_folder, return_images=False)
    speed_list.append(speed)
    paths_list.append(paths)
    y_list.append(y)

# concatenate data
speed = np.concatenate(speed_list)
paths = np.concatenate(paths_list)
y = np.concatenate(y_list)

# remove low speed data
min_speed = 15
mask = speed > min_speed
paths = paths[mask]
y = y[mask]
    def __init__(self,
                 lang_list,
                 gpu=True,
                 model_storage_directory=None,
                 user_network_directory=None,
                 recog_network='standard',
                 download_enabled=True,
                 detector=True,
                 recognizer=True,
                 verbose=True,
                 quantize=True):
        """Create an EasyOCR Reader.

        Parameters:
            lang_list (list): Language codes (ISO 639) for languages to be recognized during analysis.

            gpu (bool): Enable GPU support (default)

            model_storage_directory (string): Path to directory for model data. If not specified,
            models will be read from a directory as defined by the environment variable
            EASYOCR_MODULE_PATH (preferred), MODULE_PATH (if defined), or ~/.EasyOCR/.

            user_network_directory (string): Path to directory for custom network architecture.
            If not specified, it is as defined by the environment variable
            EASYOCR_MODULE_PATH (preferred), MODULE_PATH (if defined), or ~/.EasyOCR/.

            download_enabled (bool): Enabled downloading of model data via HTTP (default).
        """
        self.download_enabled = download_enabled

        self.model_storage_directory = MODULE_PATH + '/model'
        if model_storage_directory:
            self.model_storage_directory = model_storage_directory
        Path(self.model_storage_directory).mkdir(parents=True, exist_ok=True)

        self.user_network_directory = MODULE_PATH + '/user_network'
        if user_network_directory:
            self.user_network_directory = user_network_directory
        Path(self.user_network_directory).mkdir(parents=True, exist_ok=True)
        sys.path.append(self.user_network_directory)

        if gpu is False:
            self.device = 'cpu'
            LOGGER.warning(
                'Using CPU. Note: This module is much faster with a GPU.')
        elif not torch.cuda.is_available():
            self.device = 'cpu'
            LOGGER.warning(
                'CUDA not available - defaulting to CPU. Note: This module is much faster with a GPU.'
            )
        elif gpu is True:
            self.device = 'cuda'
        else:
            self.device = gpu

        # check and download detection model
        detector_model = 'craft'
        corrupt_msg = 'MD5 hash mismatch, possible file corruption'
        detector_path = os.path.join(
            self.model_storage_directory,
            detection_models[detector_model]['filename'])
        if detector:
            if os.path.isfile(detector_path) == False:
                if not self.download_enabled:
                    raise FileNotFoundError(
                        "Missing %s and downloads disabled" % detector_path)
                LOGGER.warning(
                    'Downloading detection model, please wait. '
                    'This may take several minutes depending upon your network connection.'
                )
                download_and_unzip(
                    detection_models[detector_model]['url'],
                    detection_models[detector_model]['filename'],
                    self.model_storage_directory, verbose)
                assert calculate_md5(detector_path) == detection_models[
                    detector_model]['filesize'], corrupt_msg
                LOGGER.info('Download complete')
            elif calculate_md5(detector_path) != detection_models[
                    detector_model]['filesize']:
                if not self.download_enabled:
                    raise FileNotFoundError(
                        "MD5 mismatch for %s and downloads disabled" %
                        detector_path)
                LOGGER.warning(corrupt_msg)
                os.remove(detector_path)
                LOGGER.warning(
                    'Re-downloading the detection model, please wait. '
                    'This may take several minutes depending upon your network connection.'
                )
                download_and_unzip(
                    detection_models[detector_model]['url'],
                    detection_models[detector_model]['filename'],
                    self.model_storage_directory, verbose)
                assert calculate_md5(detector_path) == detection_models[
                    detector_model]['filesize'], corrupt_msg

        # recognition model
        separator_list = {}

        if recog_network in ['standard'] + [
                model for model in recognition_models['gen1']
        ] + [model for model in recognition_models['gen2']]:
            if recog_network in [
                    model for model in recognition_models['gen1']
            ]:
                model = recognition_models['gen1'][recog_network]
                recog_network = 'generation1'
                self.model_lang = model['model_script']
            elif recog_network in [
                    model for model in recognition_models['gen2']
            ]:
                model = recognition_models['gen2'][recog_network]
                recog_network = 'generation2'
                self.model_lang = model['model_script']
            else:  # auto-detect
                unknown_lang = set(lang_list) - set(all_lang_list)
                if unknown_lang != set():
                    raise ValueError(unknown_lang, 'is not supported')
                # choose recognition model
                if lang_list == ['en']:
                    self.setModelLanguage('english', lang_list, ['en'],
                                          '["en"]')
                    model = recognition_models['gen2']['english_g2']
                    recog_network = 'generation2'
                elif 'th' in lang_list:
                    self.setModelLanguage('thai', lang_list, ['th', 'en'],
                                          '["th","en"]')
                    model = recognition_models['gen1']['thai_g1']
                    recog_network = 'generation1'
                elif 'ch_tra' in lang_list:
                    self.setModelLanguage('chinese_tra', lang_list,
                                          ['ch_tra', 'en'], '["ch_tra","en"]')
                    model = recognition_models['gen1']['zh_tra_g1']
                    recog_network = 'generation1'
                elif 'ch_sim' in lang_list:
                    self.setModelLanguage('chinese_sim', lang_list,
                                          ['ch_sim', 'en'], '["ch_sim","en"]')
                    model = recognition_models['gen2']['zh_sim_g2']
                    recog_network = 'generation2'
                elif 'ja' in lang_list:
                    self.setModelLanguage('japanese', lang_list, ['ja', 'en'],
                                          '["ja","en"]')
                    model = recognition_models['gen2']['japanese_g2']
                    recog_network = 'generation2'
                elif 'ko' in lang_list:
                    self.setModelLanguage('korean', lang_list, ['ko', 'en'],
                                          '["ko","en"]')
                    model = recognition_models['gen2']['korean_g2']
                    recog_network = 'generation2'
                elif 'ta' in lang_list:
                    self.setModelLanguage('tamil', lang_list, ['ta', 'en'],
                                          '["ta","en"]')
                    model = recognition_models['gen1']['tamil_g1']
                    recog_network = 'generation1'
                elif 'te' in lang_list:
                    self.setModelLanguage('telugu', lang_list, ['te', 'en'],
                                          '["te","en"]')
                    model = recognition_models['gen2']['telugu_g2']
                    recog_network = 'generation2'
                elif 'kn' in lang_list:
                    self.setModelLanguage('kannada', lang_list, ['kn', 'en'],
                                          '["kn","en"]')
                    model = recognition_models['gen2']['kannada_g2']
                    recog_network = 'generation2'
                elif set(lang_list) & set(bengali_lang_list):
                    self.setModelLanguage('bengali', lang_list,
                                          bengali_lang_list + ['en'],
                                          '["bn","as","en"]')
                    model = recognition_models['gen1']['bengali_g1']
                    recog_network = 'generation1'
                elif set(lang_list) & set(arabic_lang_list):
                    self.setModelLanguage('arabic', lang_list,
                                          arabic_lang_list + ['en'],
                                          '["ar","fa","ur","ug","en"]')
                    model = recognition_models['gen1']['arabic_g1']
                    recog_network = 'generation1'
                elif set(lang_list) & set(devanagari_lang_list):
                    self.setModelLanguage('devanagari', lang_list,
                                          devanagari_lang_list + ['en'],
                                          '["hi","mr","ne","en"]')
                    model = recognition_models['gen1']['devanagari_g1']
                    recog_network = 'generation1'
                elif set(lang_list) & set(cyrillic_lang_list):
                    self.setModelLanguage(
                        'cyrillic', lang_list, cyrillic_lang_list + ['en'],
                        '["ru","rs_cyrillic","be","bg","uk","mn","en"]')
                    model = recognition_models['gen1']['cyrillic_g1']
                    recog_network = 'generation1'
                else:
                    self.model_lang = 'latin'
                    model = recognition_models['gen2']['latin_g2']
                    recog_network = 'generation2'
            self.character = model['characters']

            model_path = os.path.join(self.model_storage_directory,
                                      model['filename'])
            # check recognition model file
            if recognizer:
                if os.path.isfile(model_path) == False:
                    if not self.download_enabled:
                        raise FileNotFoundError(
                            "Missing %s and downloads disabled" % model_path)
                    LOGGER.warning(
                        'Downloading recognition model, please wait. '
                        'This may take several minutes depending upon your network connection.'
                    )
                    download_and_unzip(model['url'], model['filename'],
                                       self.model_storage_directory, verbose)
                    assert calculate_md5(
                        model_path) == model['filesize'], corrupt_msg
                    LOGGER.info('Download complete.')
                elif calculate_md5(model_path) != model['filesize']:
                    if not self.download_enabled:
                        raise FileNotFoundError(
                            "MD5 mismatch for %s and downloads disabled" %
                            model_path)
                    LOGGER.warning(corrupt_msg)
                    os.remove(model_path)
                    LOGGER.warning(
                        'Re-downloading the recognition model, please wait. '
                        'This may take several minutes depending upon your network connection.'
                    )
                    download_and_unzip(model['url'], model['filename'],
                                       self.model_storage_directory, verbose)
                    assert calculate_md5(
                        model_path) == model['filesize'], corrupt_msg
                    LOGGER.info('Download complete')
            self.setLanguageList(lang_list, model)

        else:  # user-defined model
            with open(
                    os.path.join(self.user_network_directory,
                                 recog_network + '.yaml')) as file:
                recog_config = yaml.load(file, Loader=yaml.FullLoader)
            imgH = recog_config['imgH']
            available_lang = recog_config['lang_list']
            self.setModelLanguage(recog_network, lang_list, available_lang,
                                  available_lang)
            #char_file = os.path.join(self.user_network_directory, recog_network+ '.txt')
            self.character = recog_config['character_list']
            model_file = recog_network + '.pth'
            model_path = os.path.join(self.model_storage_directory, model_file)
            self.setLanguageList(lang_list, None)

        dict_list = {}
        for lang in lang_list:
            dict_list[lang] = os.path.join(BASE_PATH, 'dict', lang + ".txt")

        if detector:
            self.detector = get_detector(detector_path, self.device, quantize)
        if recognizer:
            if recog_network == 'generation1':
                network_params = {
                    'input_channel': 1,
                    'output_channel': 512,
                    'hidden_size': 512
                }
            elif recog_network == 'generation2':
                network_params = {
                    'input_channel': 1,
                    'output_channel': 256,
                    'hidden_size': 256
                }
            else:
                network_params = recog_config['network_params']
            self.recognizer, self.converter = get_recognizer(recog_network, network_params,\
                                                         self.character, separator_list,\
                                                         dict_list, model_path, device = self.device, quantize=quantize)