def test_link(self, cleanup, db): flarf = Source(name='flarf', description='blah', content='lol naw') puke = Source(name='puke', description='blah', content='lol naw') corpus = Corpus(name='whee', description='bleh') db.add_all([corpus, flarf, puke]) db.commit() assert 0 == prosaic('corpus', 'link', 'whee', 'flarf').code assert 0 == prosaic('corpus', 'link', 'whee', 'puke').code db.refresh(corpus) assert set([puke, flarf]) == set(corpus.sources)
def test_sources(self, cleanup, db): flarf = Source(name='flarf', description='blah', content='lol naw') puke = Source(name='puke', description='blah', content='lol naw') corpus = Corpus(name='whee', description='bleh') db.add_all([corpus, flarf, puke]) db.commit() prosaic('corpus', 'link', 'whee', 'flarf') prosaic('corpus', 'link', 'whee', 'puke') db.refresh(corpus) code, lines = prosaic('corpus', 'sources', 'whee') assert 0 == code assert set(lines).issuperset(set(['flarf', 'puke'])) prosaic('corpus', 'unlink', 'whee', 'puke').code code, lines = prosaic('corpus', 'sources', 'whee') assert 0 == code assert lines.issuperset(set(['flarf']))
def test_ls(self, db, cleanup): source_names = ['blarf', 'flarf', 'narf'] for name in source_names: db.add(Source(name=name, content='')) db.commit() code, lines = prosaic('source', 'ls') assert 0 == code assert lines.issuperset(set(source_names))
def test_rm(self, db, cleanup): source_names = ['blarf', 'flarf', 'narf'] for name in source_names: db.add(Source(name=name, content='')) db.commit() assert 3 == db.query(Source).count() code, _ = prosaic('source', 'rm', 'flarf') assert 0 == code code, _ = prosaic('source', 'rm', 'narf') assert 0 == code assert 1 == db.query(Source).count()
def source_new(self): text_file = open(self.args.path, 'r') name = self.args.source_name description = self.args.source_description source = Source(name=name, description=description) error = process_text(self.db, source, text_file) if error is not None: print('There was an error extracting phrases:') print('********') print(error) print('********') print("The source '{}' was not saved.".format(name)) text_file.close()
def process_text(db: Database, source: Source, text: IOBase) -> Optional[Exception]: session = get_session(db) line_no = 1 # lol ultimate_text = '' futures = [] source.content = '' session.add(source) session.commit() # so we can attach phrases to it. need its id. line_queue = Queue() error_queue = Queue() db_proc = Process(target=line_handler, args=(db, line_queue, error_queue, source.id)) db_proc.start() chunk = text.read(CHUNK_SIZE) while len(chunk) > 0: line_buff = "" for c in chunk: if BAD_CHARS.get(c, False): if not line_buff.endswith(' '): line_buff += ' ' continue if CLAUSE_MARKERS.get(c, False): if len(line_buff) > LONG_ENOUGH: ultimate_text += line_buff line_queue.put((line_no, line_buff)) line_no += 1 line_buff = "" else: line_buff += c continue if SENTENCE_MARKERS.get(c, False): if len(line_buff) > LONG_ENOUGH: ultimate_text += line_buff line_queue.put((line_no, line_buff)) line_no += 1 line_buff = "" continue if c == ' ' and line_buff.endswith(' '): continue if c == "'" and line_buff.endswith(' '): continue if c == "'" and peek(text, 1) == ' ': continue line_buff += c chunk = text.read(CHUNK_SIZE) line_queue.put(DONE_READING) db_proc.join() error = None if error_queue.empty(): source.content = ultimate_text session.add(source) else: error = error_queue.get() session.delete(source) result = None if error is None: result = source.id else: result = error session.commit() session.close() return result