Ejemplo n.º 1
0
        'bag_of_words_generator')
    logger.info('Program start, bag of words generator experiment id = %s',
                experiment_id)
    logger.info(config)

    vocabulary = db.load_vocabulary(args.vocabulary_experiment)
    logger.info('Vocabulary loaded')
    logger.info('Vocabulary length = %s', len(vocabulary))

    table_name = get_table_name(args, experiment_id)

    # Get the corpus and prepare the bag of words generator.
    db = DatabaseManager()
    subject_ids, corpus, chart_dates = db.get_corpus(
        toy_set=args.toy_set,
        top100_labels=args.top100_labels,
        validation_set=args.validation_set,
        test_set=args.test_set)
    bag_of_words_generator = BagOfWordsGenerator(logger, vocabulary,
                                                 subject_ids, corpus,
                                                 chart_dates)

    if args.for_rnn:
        bag_of_words_vectors_rnn = bag_of_words_generator.build_bag_of_words_vectors_rnn(
        )
        logger.info('Bag of words vectors for RNN created')
        db.insert_bag_of_words_vectors_rnn(bag_of_words_vectors_rnn,
                                           table_name)
        logger.info('Bag of words vectors for RNN inserted in table %s',
                    table_name)
    else:
Ejemplo n.º 2
0
                        nargs='?',
                        const=700,
                        help='how many rows to fetch from the corpus table')
    parser.add_argument('--top100_labels', action='store_true', default=False)
    args = parser.parse_args()

    db = DatabaseManager()

    start = datetime.datetime.now()
    time_str = start.strftime("%m%d_%H%M%S")
    config = vars(args)
    experiment_id = db.vocabulary_experiment_create(config, start)

    log_filename = '{}_vocabulary_generator.log'.format(experiment_id)
    db.vocabulary_experiment_insert_log_file(experiment_id, log_filename)

    logger = logging_utils.build_logger(log_filename).getLogger(
        'vocabulary_generator')
    logger.info('Program start, vocabulary experiment id = %s', experiment_id)
    logger.info(config)

    _, corpus, _ = db.get_corpus(toy_set=args.toy_set,
                                 top100_labels=args.top100_labels)

    vocabulary_generator = VocabularyGenerator(corpus, logger)
    vocabulary = vocabulary_generator.build_vocabulary()

    end = datetime.datetime.now()
    db.vocabulary_experiment_insert_vocabulary(experiment_id, end, vocabulary)
    logger.info('Vocabulary inserted into database')