Esempio n. 1
0
 def test_process(self):
     check_list = [('ask', 'v', 1),
                   ('congress', 'n', 1),
                   ('grant', 'n', 1),
                   ('increase', 'v', 1),
                   ('president', 'n', 1),
                   ('rehabilitation', 'n', 1),
                   ('say', 'v', 1),
                   ('state', 'n', 1),
                   ('vocational', 'a', 1)]
     s = create_source('Test Source',
                       'Test Author',
                       1984,
                       [],
                       os.path.split(self.filepath)[1])
     process(s)
     test_list = db.session.query(Word.word,
                                  Word.word_pos,
                                  WordStat.freq)\
         .join(WordStat)\
         .filter(WordStat.source_id == s.source_id)\
         .order_by(Word.word).all()
     self.assertEqual(test_list, check_list)
Esempio n. 2
0
    parser = Parser(info)
    err = parser.get_errors()
    if err:
        if input('There are errors:\n'
                 '%s\nProceed? (y\\n)' % err).lower() == 'n':
            sys.exit()
    for i, (source_path, title, author, year, *tags) in enumerate(parser):
        try:
            dest = generate_filename(app.config['SOURCE_DIR'],
                                     path.split(source_path)[1])
            print('fill_db: Started processing'
                  ' file ({} of {})'.format(i + 1, len(parser)))
            log('fill_db: Processing ' + source_path)
            source = create_source(title,
                                   author,
                                   year,
                                   tags,
                                   path.split(dest)[1])
            copyfile(source_path, dest)
            process(source)
            print('fill_db: done')
            print('*' * 15)
        except SourceExistsException:
            print('Source "{}" already exists. Skipping'.format(
                title.encode("ascii", "ignore")))
            log_warning('Source "{}" already exists. Skipping'.format(
                title))
        except Exception as E:
            print('Some kind of error: ', str(E).encode("ascii", "ignore"))
            log_error(str(E))