Esempio n. 1
0
def line_handler(db: Database, line_queue: Queue, error_queue: Queue,
                 source_id: int) -> None:

    session = get_session(db)
    source = session.query(Source).filter(Source.id == source_id).one()

    while True:
        try:
            line_pair = line_queue.get()
            if line_pair == DONE_READING:
                break

            line_no, line = line_pair
            stems = nlp.stem_sentence(line)
            rhyme_sound = nlp.rhyme_sound(line)
            syllables = nlp.count_syllables(line)
            alliteration = nlp.has_alliteration(line)

            phrase = Phrase(stems=stems,
                            raw=line,
                            alliteration=alliteration,
                            rhyme_sound=rhyme_sound,
                            syllables=syllables,
                            line_no=line_no,
                            source=source)

            session.add(phrase)
        except Exception as e:
            error_queue.put(e)
            log.error('Died while processing text, rolling back')
            session.rollback()
            session.close()
            return

    session.commit()
Esempio n. 2
0
 def corpus_unlink(self):
     session = get_session(self.db)
     corpus = self.get_corpus(session)
     source = session.query(Source).filter(Source.name==self.args.source_name).one()
     corpus.sources.remove(source)
     session.add(corpus)
     session.commit()
Esempio n. 3
0
def line_handler(db: Database,
                 line_queue: Queue,
                 error_queue: Queue,
                 source_id: int) -> None:

    session = get_session(db)
    source = session.query(Source).filter(Source.id == source_id).one()

    while True:
        try:
            line_pair = line_queue.get()
            if line_pair == DONE_READING:
                break

            line_no, line = line_pair
            stems = nlp.stem_sentence(line)
            rhyme_sound = nlp.rhyme_sound(line)
            syllables = nlp.count_syllables(line)
            alliteration = nlp.has_alliteration(line)

            phrase = Phrase(stems=stems, raw=line, alliteration=alliteration,
                            rhyme_sound=rhyme_sound,
                            syllables=syllables, line_no=line_no, source=source)

            session.add(phrase)
        except Exception as e:
            error_queue.put(e)
            log.error('Died while processing text, rolling back')
            session.rollback()
            session.close()
            return

    session.commit()
Esempio n. 4
0
 def corpus_sources(self):
     session = get_session(self.db)
     corpus = self.get_corpus(session)
     if 0 == len(corpus.sources):
         log.error("Corpus {} has no sources.".format(corpus.name))
         log.error("Use `prosaic corpus link 'corpus name' 'source name'` to add sources")
     for source in corpus.sources:
         print(source.name)
Esempio n. 5
0
 def corpus_rm(self):
     conn = self.engine.connect()
     unlink_sql = """
     delete from corpora_sources
     where corpus_id = :corpus_id
     """
     delete_sql = """
     delete from corpora
     where name = :corpus_name
     """
     session = get_session(self.db)
     corpus = self.get_corpus(session)
     conn.execute(text(unlink_sql).params(corpus_id=corpus.id))
     conn.execute(text(delete_sql).params(corpus_name=self.args.corpus_name))
Esempio n. 6
0
 def poem_new(self):
     session = get_session(self.db)
     corpus = self.get_corpus(session)
     if 0 == len(corpus.sources):
         log.error("Corpus {} has no sources.".format(corpus.name))
         log.error("Use `prosaic corpus link 'corpus name' 'source name'` to add sources")
         return
     template = self.template
     poem_lines = map(first, poem_from_template(self.template, self.db, corpus.id))
     output_filename = self.args.output
     if output_filename:
         with open(output_filename, 'w') as f:
             f.write(list(poem_lines).join('\n') + '\n')
             log.debug('poem written to {}'.format(output_filename))
     else:
         for line in poem_lines:
             print(line)
Esempio n. 7
0
    def source_rm(self):
        # ugh i know; TODO figure out how to do this with ORM
        conn = self.engine.connect()
        session = get_session(self.db)
        unlink_sql = """
        delete from corpora_sources
        where source_id = :source_id
        """

        source_delete_sql = """
        delete from sources
        where name = :source_name
        """
        phrase_delete_sql = """
        delete from phrases
        where source_id = :source_id
        """
        name = self.args.source_name
        source_id = first(session.query(Source.id).filter(Source.name == name).one())
        conn.execute(text(unlink_sql).params(source_id=source_id))
        conn.execute(text(phrase_delete_sql).params(source_id=source_id))
        conn.execute(text(source_delete_sql).params(source_name=name))
Esempio n. 8
0
 def source_ls(self):
     session = get_session(self.db)
     for name in session.query(Source.name):
         print(first(name))
Esempio n. 9
0
 def corpus_new(self):
     session = get_session(self.db)
     corpus = Corpus(name=self.args.corpus_name,
                     description=self.args.corpus_desc)
     session.add(corpus)
     session.commit()
Esempio n. 10
0
 def corpus_ls(self):
     session = get_session(self.db)
     for name in session.query(Corpus.name):
         print(first(name))
Esempio n. 11
0
def process_text(db: Database, source: Source,
                 text: IOBase) -> Optional[Exception]:
    session = get_session(db)
    line_no = 1  # lol
    ultimate_text = ''
    futures = []
    source.content = ''
    session.add(source)
    session.commit()  # so we can attach phrases to it. need its id.
    line_queue = Queue()
    error_queue = Queue()
    db_proc = Process(target=line_handler,
                      args=(db, line_queue, error_queue, source.id))
    db_proc.start()

    chunk = text.read(CHUNK_SIZE)
    while len(chunk) > 0:
        line_buff = ""
        for c in chunk:
            if BAD_CHARS.get(c, False):
                if not line_buff.endswith(' '):
                    line_buff += ' '
                continue
            if CLAUSE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                    line_buff = ""
                else:
                    line_buff += c
                continue
            if SENTENCE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                line_buff = ""
                continue
            if c == ' ' and line_buff.endswith(' '):
                continue
            if c == "'" and line_buff.endswith(' '):
                continue
            if c == "'" and peek(text, 1) == ' ':
                continue
            line_buff += c
        chunk = text.read(CHUNK_SIZE)

    line_queue.put(DONE_READING)
    db_proc.join()

    error = None
    if error_queue.empty():
        source.content = ultimate_text
        session.add(source)
    else:
        error = error_queue.get()
        session.delete(source)

    result = None
    if error is None:
        result = source.id
    else:
        result = error

    session.commit()
    session.close()

    return result
Esempio n. 12
0
def db(request):
    engine = m.get_engine(DB)
    m.Base.metadata.create_all(engine)
    yield m.get_session(DB)
    m.Session.close_all()
    m.Base.metadata.drop_all(engine)
Esempio n. 13
0
def db(request):
    engine = m.get_engine(DB)
    m.Base.metadata.create_all(engine)
    yield m.get_session(DB)
    m.Session.close_all()
    m.Base.metadata.drop_all(engine)
Esempio n. 14
0
def process_text(db: Database,
                 source: Source,
                 text: IOBase) -> Optional[Exception]:
    session = get_session(db)
    line_no = 1 # lol
    ultimate_text = ''
    futures = []
    source.content = ''
    session.add(source)
    session.commit() # so we can attach phrases to it. need its id.
    line_queue = Queue()
    error_queue = Queue()
    db_proc = Process(target=line_handler,
                      args=(db, line_queue, error_queue, source.id))
    db_proc.start()

    chunk = text.read(CHUNK_SIZE)
    while len(chunk) > 0:
        line_buff = ""
        for c in chunk:
            if BAD_CHARS.get(c, False):
                if not line_buff.endswith(' '):
                    line_buff += ' '
                continue
            if CLAUSE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                    line_buff = ""
                else:
                    line_buff += c
                continue
            if SENTENCE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                line_buff = ""
                continue
            if c == ' ' and line_buff.endswith(' '):
                continue
            if c == "'" and line_buff.endswith(' '):
                continue
            if c == "'" and peek(text, 1) == ' ':
                continue
            line_buff += c
        chunk = text.read(CHUNK_SIZE)

    line_queue.put(DONE_READING)
    db_proc.join()

    error = None
    if error_queue.empty():
        source.content = ultimate_text
        session.add(source)
    else:
        error = error_queue.get()
        session.delete(source)

    result = None
    if error is None:
        result = source.id
    else:
        result = error

    session.commit()
    session.close()

    return result