Example #1
0
def train_component_cnn(dataset,
                        img_x=64,
                        img_y=64,
                        n_channels=1,
                        batch_size=32,
                        nb_epoch=10):
    dataset = os.path.realpath(os.path.expandvars(dataset))
    if os.path.isfile(dataset):
        ds = np.load(dataset)
        X = ds["X"]
        y = ds["y"]
    elif os.path.isdir(dataset):
        X, y = produce_dataset(dataset,
                               img_x,
                               img_y,
                               n_channels=n_channels,
                               image_preprocessor=invert_and_resize_img)
        save_dataset(X=X, y=y)
    else:
        raise Exception("{} is neither dir nor filename".format(dataset))

    n_labels = len(np.unique(y))
    n_samples, n_channel, img_rows, img_cols = X.shape
    print("Loaded {} with {} samples containing ({}x{}) pixels.".format(
        dataset, n_samples, img_rows, img_cols, n_labels))

    le = sp.LabelEncoder()
    y_encoded = le.fit_transform(y)
    model = anmccv1.train_model(X, y_encoded)
    anc.save_and_upload_model(model, save_dir=save_dir())
    return model
def test_train_model_from_dataset():
    classifier, feature_extractor = anmccv2.train_model_from_dataset(
        dataset_dir="/home/iesahin/Annex/Arabic/arabic-letter-classes-240K",
        early_stop_patience=1000)
    anc.save_and_upload_model(classifier, "/home/iesahin/Annex/Arabic/",
                              "length_classifier")
    anc.save_and_upload_model(feature_extractor, "/home/iesahin/Annex/Arabic/",
                              "length_feature_extractor")
Example #3
0
def train_cnn_from_dataset(load_from, output_model):
    print("Loading Dataset From: {}".format(load_from))
    ds_in = np.load(load_from)
    positive_set = ds_in["positive_set"]
    negative_set = ds_in["negative_set"]
    positive_set = positive_set.swapaxes(1, 3).swapaxes(2, 3)
    negative_set = negative_set.swapaxes(1, 3).swapaxes(2, 3)
    positive_y = np.ones(shape=(positive_set.shape[0], 1))
    negative_y = np.ones(shape=(negative_set.shape[0], 1))
    LOG.debug("=== positive_y.shape ===")
    LOG.debug(positive_y.shape)
    LOG.debug("=== negative_y.shape ===")
    LOG.debug(negative_y.shape)
    y_train = np.vstack((np.ones(shape=(positive_set.shape[0], 1)),
                         np.zeros(shape=(negative_set.shape[0], 1))))
    x_train_shape = (positive_set.shape[0] + negative_set.shape[0],
                     positive_set.shape[1], positive_set.shape[2],
                     positive_set.shape[3])
    print("Merging Positive and Negative Sets into Memmap File: {}".format(
        x_train_shape))
    x_train = np.memmap("x_train.memmap",
                        dtype=positive_set.dtype,
                        shape=x_train_shape,
                        mode="w+")
    x_train[:positive_set.shape[0]] = positive_set
    x_train[positive_set.shape[0]:] = negative_set
    # move channel axis to 2
    del positive_set
    del negative_set
    gc.collect()
    LOG.debug("=== x_train.shape ===")
    LOG.debug(x_train.shape)
    anc.shuffle_parallel(x_train, y_train)
    model = create_cnn(x_train, y_train)
    anc.save_and_upload_model(model, save_dir())
    return model
def train_model(dataset,
                autoencoder,
                decoder,
                encoder,
                model_key="",
                batch_size=32,
                nb_epoch=500,
                early_stop_patience=30):

    x_train = dataset.as_dataset()
    np.random.shuffle(x_train)

    stop_early = EarlyStopping(monitor='loss',
                               patience=early_stop_patience,
                               verbose=1,
                               mode='auto')

    # model_checkpoint = ModelCheckpoint(save_dir() + "checkpoint-weights-epoch-{epoch:02d}-val-loss-{val_loss:.2f}.h5",  # noqa
    #                                    monitor='val_loss',
    #                                    verbose=1,
    #                                    save_best_only=True,
    #                                    mode='auto')

    print("Fitting Model: {}".format(autoencoder))
    print("Data Shape: {}".format(x_train.shape))

    val_data = anc.prepare_validation_data(x_train, x_train)

    autoencoder.fit(x_train,
                    x_train,
                    batch_size=batch_size,
                    shuffle=True,
                    nb_epoch=nb_epoch,
                    verbose=1,
                    validation_split=0.0,
                    validation_data=val_data,
                    callbacks=[stop_early])

    anc.save_and_upload_model(autoencoder, save_dir(), "autoencoder")
    anc.save_and_upload_model(decoder, save_dir(), "decoder")
    anc.save_and_upload_model(encoder, save_dir(), "encoder")

    return autoencoder, decoder, encoder
Example #5
0
def component_ac_v1(wordlist_file, size=64, **kwargs):
    autoencoder, decoder, encoder = anmcav1.create_model()
    print("AUTOENCODER")
    autoencoder.summary()
    print("DECODER")
    decoder.summary()
    print("ENCODER")
    encoder.summary()
    
    if wordlist_file.endswith('.npz'): 
        dataset = AutoencoderDataset(wordlist_file)
        os.symlink(wordlist_file, dataset.file_to_write())
    else: 
        words = open(wordlist_file).readlines()
        dataset = AutoencoderDataset(initial_size=len(words))
        for w in words: 
            for wi in create_word_images(w, **kwargs): 
                comp_arr = aui.decompose_and_resize(wi, size, size)
                comp_arr = remove_zeros_and_reshape(comp_arr)
                dataset.add_components(comp_arr)

        dataset.write(save_dir() + "autoencoder-dataset.npz")

    X = dataset.as_dataset()
    
    autoencoder = anc.train_model(autoencoder, 
                                  X,
                                  X)

    anc.save_and_upload_model(autoencoder, save_dir(), "autoencoder")
    anc.save_and_upload_model(decoder, save_dir(), "decoder")
    anc.save_and_upload_model(encoder, save_dir(), "encoder")

    reconstruction_test(dataset, autoencoder)
    
    return (autoencoder, decoder, encoder)
Example #6
0
def test_train_model_from_dataset():
    classifier, feature_extractor = anmscv1.train_model_from_dataset(dataset_dir="$HOME/Annex/Arabic/component-dataset-170K",
                                                                     early_stop_patience=500)
    anc.save_and_upload_model(classifier, "/home/iesahin/Annex/Arabic/", "letter_group_classifier")
    anc.save_and_upload_model(feature_extractor, "/home/iesahin/Annex/Arabic/", "letter_group_feature_extractor")
Example #7
0
def test_train_model_from_dataset():
    # classifier = anmclv3.train_model(dataset_dir="/home/iesahin/Annex/Datasets/leylamecnun/lm-26-wob")
    classifier = anmclv3.train_model(
        dataset_dir="/home/iesahin/Annex/Arabic/arabic-component-dataset-518")
    anc.save_and_upload_model(classifier, "/home/iesahin/Annex/Arabic/",
                              "component_lstm_v3")
Example #8
0
def test_train_model_from_dataset():
    classifier = anmclv1.train_model(
        dataset_dir="/home/iesahin/Annex/Arabic/arabic-component-dataset-518")
    anc.save_and_upload_model(classifier, "/home/iesahin/Annex/Arabic/",
                              "letter_length_lstm_v1")
def test_train_model_from_dataset():
    classifier = anmccv3.train_model_from_dataset(
        dataset_dir="/home/iesahin/Annex/Arabic/arabic-component-dataset-4170",
        early_stop_patience=0)
    anc.save_and_upload_model(classifier, "/home/iesahin/Annex/Arabic/",
                              "component_classifier_4170_v3")