def train_component_cnn(dataset, img_x=64, img_y=64, n_channels=1, batch_size=32, nb_epoch=10): dataset = os.path.realpath(os.path.expandvars(dataset)) if os.path.isfile(dataset): ds = np.load(dataset) X = ds["X"] y = ds["y"] elif os.path.isdir(dataset): X, y = produce_dataset(dataset, img_x, img_y, n_channels=n_channels, image_preprocessor=invert_and_resize_img) save_dataset(X=X, y=y) else: raise Exception("{} is neither dir nor filename".format(dataset)) n_labels = len(np.unique(y)) n_samples, n_channel, img_rows, img_cols = X.shape print("Loaded {} with {} samples containing ({}x{}) pixels.".format( dataset, n_samples, img_rows, img_cols, n_labels)) le = sp.LabelEncoder() y_encoded = le.fit_transform(y) model = anmccv1.train_model(X, y_encoded) anc.save_and_upload_model(model, save_dir=save_dir()) return model
def test_train_model_from_dataset(): classifier, feature_extractor = anmccv2.train_model_from_dataset( dataset_dir="/home/iesahin/Annex/Arabic/arabic-letter-classes-240K", early_stop_patience=1000) anc.save_and_upload_model(classifier, "/home/iesahin/Annex/Arabic/", "length_classifier") anc.save_and_upload_model(feature_extractor, "/home/iesahin/Annex/Arabic/", "length_feature_extractor")
def train_cnn_from_dataset(load_from, output_model): print("Loading Dataset From: {}".format(load_from)) ds_in = np.load(load_from) positive_set = ds_in["positive_set"] negative_set = ds_in["negative_set"] positive_set = positive_set.swapaxes(1, 3).swapaxes(2, 3) negative_set = negative_set.swapaxes(1, 3).swapaxes(2, 3) positive_y = np.ones(shape=(positive_set.shape[0], 1)) negative_y = np.ones(shape=(negative_set.shape[0], 1)) LOG.debug("=== positive_y.shape ===") LOG.debug(positive_y.shape) LOG.debug("=== negative_y.shape ===") LOG.debug(negative_y.shape) y_train = np.vstack((np.ones(shape=(positive_set.shape[0], 1)), np.zeros(shape=(negative_set.shape[0], 1)))) x_train_shape = (positive_set.shape[0] + negative_set.shape[0], positive_set.shape[1], positive_set.shape[2], positive_set.shape[3]) print("Merging Positive and Negative Sets into Memmap File: {}".format( x_train_shape)) x_train = np.memmap("x_train.memmap", dtype=positive_set.dtype, shape=x_train_shape, mode="w+") x_train[:positive_set.shape[0]] = positive_set x_train[positive_set.shape[0]:] = negative_set # move channel axis to 2 del positive_set del negative_set gc.collect() LOG.debug("=== x_train.shape ===") LOG.debug(x_train.shape) anc.shuffle_parallel(x_train, y_train) model = create_cnn(x_train, y_train) anc.save_and_upload_model(model, save_dir()) return model
def train_model(dataset, autoencoder, decoder, encoder, model_key="", batch_size=32, nb_epoch=500, early_stop_patience=30): x_train = dataset.as_dataset() np.random.shuffle(x_train) stop_early = EarlyStopping(monitor='loss', patience=early_stop_patience, verbose=1, mode='auto') # model_checkpoint = ModelCheckpoint(save_dir() + "checkpoint-weights-epoch-{epoch:02d}-val-loss-{val_loss:.2f}.h5", # noqa # monitor='val_loss', # verbose=1, # save_best_only=True, # mode='auto') print("Fitting Model: {}".format(autoencoder)) print("Data Shape: {}".format(x_train.shape)) val_data = anc.prepare_validation_data(x_train, x_train) autoencoder.fit(x_train, x_train, batch_size=batch_size, shuffle=True, nb_epoch=nb_epoch, verbose=1, validation_split=0.0, validation_data=val_data, callbacks=[stop_early]) anc.save_and_upload_model(autoencoder, save_dir(), "autoencoder") anc.save_and_upload_model(decoder, save_dir(), "decoder") anc.save_and_upload_model(encoder, save_dir(), "encoder") return autoencoder, decoder, encoder
def component_ac_v1(wordlist_file, size=64, **kwargs): autoencoder, decoder, encoder = anmcav1.create_model() print("AUTOENCODER") autoencoder.summary() print("DECODER") decoder.summary() print("ENCODER") encoder.summary() if wordlist_file.endswith('.npz'): dataset = AutoencoderDataset(wordlist_file) os.symlink(wordlist_file, dataset.file_to_write()) else: words = open(wordlist_file).readlines() dataset = AutoencoderDataset(initial_size=len(words)) for w in words: for wi in create_word_images(w, **kwargs): comp_arr = aui.decompose_and_resize(wi, size, size) comp_arr = remove_zeros_and_reshape(comp_arr) dataset.add_components(comp_arr) dataset.write(save_dir() + "autoencoder-dataset.npz") X = dataset.as_dataset() autoencoder = anc.train_model(autoencoder, X, X) anc.save_and_upload_model(autoencoder, save_dir(), "autoencoder") anc.save_and_upload_model(decoder, save_dir(), "decoder") anc.save_and_upload_model(encoder, save_dir(), "encoder") reconstruction_test(dataset, autoencoder) return (autoencoder, decoder, encoder)
def test_train_model_from_dataset(): classifier, feature_extractor = anmscv1.train_model_from_dataset(dataset_dir="$HOME/Annex/Arabic/component-dataset-170K", early_stop_patience=500) anc.save_and_upload_model(classifier, "/home/iesahin/Annex/Arabic/", "letter_group_classifier") anc.save_and_upload_model(feature_extractor, "/home/iesahin/Annex/Arabic/", "letter_group_feature_extractor")
def test_train_model_from_dataset(): # classifier = anmclv3.train_model(dataset_dir="/home/iesahin/Annex/Datasets/leylamecnun/lm-26-wob") classifier = anmclv3.train_model( dataset_dir="/home/iesahin/Annex/Arabic/arabic-component-dataset-518") anc.save_and_upload_model(classifier, "/home/iesahin/Annex/Arabic/", "component_lstm_v3")
def test_train_model_from_dataset(): classifier = anmclv1.train_model( dataset_dir="/home/iesahin/Annex/Arabic/arabic-component-dataset-518") anc.save_and_upload_model(classifier, "/home/iesahin/Annex/Arabic/", "letter_length_lstm_v1")
def test_train_model_from_dataset(): classifier = anmccv3.train_model_from_dataset( dataset_dir="/home/iesahin/Annex/Arabic/arabic-component-dataset-4170", early_stop_patience=0) anc.save_and_upload_model(classifier, "/home/iesahin/Annex/Arabic/", "component_classifier_4170_v3")