Esempio n. 1
0
def main(data, vocab):
    data_config = Config(data)

    # datasets
    train_set = DataGenerator(
        path_formulas=data_config.path_formulas_train,
        dir_images=data_config.dir_images_train,
        path_matching=data_config.path_matching_train)
    test_set = DataGenerator(
        path_formulas=data_config.path_formulas_test,
        dir_images=data_config.dir_images_test,
        path_matching=data_config.path_matching_test)
    val_set = DataGenerator(
        path_formulas=data_config.path_formulas_val,
        dir_images=data_config.dir_images_val,
        path_matching=data_config.path_matching_val)

    # produce images and matching files
    train_set.build(buckets=data_config.buckets)
    test_set.build(buckets=data_config.buckets)
    val_set.build(buckets=data_config.buckets)

    # vocab
    vocab_config = Config(vocab)
    vocab = build_vocab([train_set], min_count=vocab_config.min_count_tok)
    write_vocab(vocab, vocab_config.path_vocab)
Esempio n. 2
0
def main(data, vocab):
    data_config = Config(data)

    # datasets
    train_set = DataGenerator(path_formulas=data_config.path_formulas_train,
                              dir_images=data_config.dir_images_train,
                              path_matching=data_config.path_matching_train)
    test_set = DataGenerator(path_formulas=data_config.path_formulas_test,
                             dir_images=data_config.dir_images_test,
                             path_matching=data_config.path_matching_test)
    val_set = DataGenerator(path_formulas=data_config.path_formulas_val,
                            dir_images=data_config.dir_images_val,
                            path_matching=data_config.path_matching_val)

    # produce images and matching files
    # train_set.build(buckets=None, n_threads=1)
    train_set.build(buckets=None)
    test_set.build(buckets=None)
    val_set.build(buckets=None)
    # train_set.build(buckets=data_config.buckets)
    # test_set.build(buckets=data_config.buckets)
    # val_set.build(buckets=data_config.buckets)

    # p = Augmentor.Pipeline(data_config.dir_images_train)
    # p.zoom(probability=1, min_factor=1.1, max_factor=1.5)
    # # p.process()
    # augmented_images, labels = p.sample(3)
    # print labels

    # vocab
    vocab_config = Config(vocab)
    vocab = build_vocab([train_set], min_count=vocab_config.min_count_tok)
    write_vocab(vocab, vocab_config.path_vocab)