def line_handler(db: Database, line_queue: Queue, error_queue: Queue, source_id: int) -> None: session = get_session(db) source = session.query(Source).filter(Source.id == source_id).one() while True: try: line_pair = line_queue.get() if line_pair == DONE_READING: break line_no, line = line_pair stems = nlp.stem_sentence(line) rhyme_sound = nlp.rhyme_sound(line) syllables = nlp.count_syllables(line) alliteration = nlp.has_alliteration(line) phrase = Phrase(stems=stems, raw=line, alliteration=alliteration, rhyme_sound=rhyme_sound, syllables=syllables, line_no=line_no, source=source) session.add(phrase) except Exception as e: error_queue.put(e) log.error('Died while processing text, rolling back') session.rollback() session.close() return session.commit()
def corpus_unlink(self): session = get_session(self.db) corpus = self.get_corpus(session) source = session.query(Source).filter(Source.name==self.args.source_name).one() corpus.sources.remove(source) session.add(corpus) session.commit()
def corpus_sources(self): session = get_session(self.db) corpus = self.get_corpus(session) if 0 == len(corpus.sources): log.error("Corpus {} has no sources.".format(corpus.name)) log.error("Use `prosaic corpus link 'corpus name' 'source name'` to add sources") for source in corpus.sources: print(source.name)
def corpus_rm(self): conn = self.engine.connect() unlink_sql = """ delete from corpora_sources where corpus_id = :corpus_id """ delete_sql = """ delete from corpora where name = :corpus_name """ session = get_session(self.db) corpus = self.get_corpus(session) conn.execute(text(unlink_sql).params(corpus_id=corpus.id)) conn.execute(text(delete_sql).params(corpus_name=self.args.corpus_name))
def poem_new(self): session = get_session(self.db) corpus = self.get_corpus(session) if 0 == len(corpus.sources): log.error("Corpus {} has no sources.".format(corpus.name)) log.error("Use `prosaic corpus link 'corpus name' 'source name'` to add sources") return template = self.template poem_lines = map(first, poem_from_template(self.template, self.db, corpus.id)) output_filename = self.args.output if output_filename: with open(output_filename, 'w') as f: f.write(list(poem_lines).join('\n') + '\n') log.debug('poem written to {}'.format(output_filename)) else: for line in poem_lines: print(line)
def source_rm(self): # ugh i know; TODO figure out how to do this with ORM conn = self.engine.connect() session = get_session(self.db) unlink_sql = """ delete from corpora_sources where source_id = :source_id """ source_delete_sql = """ delete from sources where name = :source_name """ phrase_delete_sql = """ delete from phrases where source_id = :source_id """ name = self.args.source_name source_id = first(session.query(Source.id).filter(Source.name == name).one()) conn.execute(text(unlink_sql).params(source_id=source_id)) conn.execute(text(phrase_delete_sql).params(source_id=source_id)) conn.execute(text(source_delete_sql).params(source_name=name))
def source_ls(self): session = get_session(self.db) for name in session.query(Source.name): print(first(name))
def corpus_new(self): session = get_session(self.db) corpus = Corpus(name=self.args.corpus_name, description=self.args.corpus_desc) session.add(corpus) session.commit()
def corpus_ls(self): session = get_session(self.db) for name in session.query(Corpus.name): print(first(name))
def process_text(db: Database, source: Source, text: IOBase) -> Optional[Exception]: session = get_session(db) line_no = 1 # lol ultimate_text = '' futures = [] source.content = '' session.add(source) session.commit() # so we can attach phrases to it. need its id. line_queue = Queue() error_queue = Queue() db_proc = Process(target=line_handler, args=(db, line_queue, error_queue, source.id)) db_proc.start() chunk = text.read(CHUNK_SIZE) while len(chunk) > 0: line_buff = "" for c in chunk: if BAD_CHARS.get(c, False): if not line_buff.endswith(' '): line_buff += ' ' continue if CLAUSE_MARKERS.get(c, False): if len(line_buff) > LONG_ENOUGH: ultimate_text += line_buff line_queue.put((line_no, line_buff)) line_no += 1 line_buff = "" else: line_buff += c continue if SENTENCE_MARKERS.get(c, False): if len(line_buff) > LONG_ENOUGH: ultimate_text += line_buff line_queue.put((line_no, line_buff)) line_no += 1 line_buff = "" continue if c == ' ' and line_buff.endswith(' '): continue if c == "'" and line_buff.endswith(' '): continue if c == "'" and peek(text, 1) == ' ': continue line_buff += c chunk = text.read(CHUNK_SIZE) line_queue.put(DONE_READING) db_proc.join() error = None if error_queue.empty(): source.content = ultimate_text session.add(source) else: error = error_queue.get() session.delete(source) result = None if error is None: result = source.id else: result = error session.commit() session.close() return result
def db(request): engine = m.get_engine(DB) m.Base.metadata.create_all(engine) yield m.get_session(DB) m.Session.close_all() m.Base.metadata.drop_all(engine)