Ejemplo n.º 1
0
 def test_link(self, cleanup, db):
     flarf = Source(name='flarf', description='blah', content='lol naw')
     puke = Source(name='puke', description='blah', content='lol naw')
     corpus = Corpus(name='whee', description='bleh')
     db.add_all([corpus, flarf, puke])
     db.commit()
     assert 0 == prosaic('corpus', 'link', 'whee', 'flarf').code
     assert 0 == prosaic('corpus', 'link', 'whee', 'puke').code
     db.refresh(corpus)
     assert set([puke, flarf]) == set(corpus.sources)
Ejemplo n.º 2
0
 def test_sources(self, cleanup, db):
     flarf = Source(name='flarf', description='blah', content='lol naw')
     puke = Source(name='puke', description='blah', content='lol naw')
     corpus = Corpus(name='whee', description='bleh')
     db.add_all([corpus, flarf, puke])
     db.commit()
     prosaic('corpus', 'link', 'whee', 'flarf')
     prosaic('corpus', 'link', 'whee', 'puke')
     db.refresh(corpus)
     code, lines = prosaic('corpus', 'sources', 'whee')
     assert 0 == code
     assert set(lines).issuperset(set(['flarf', 'puke']))
     prosaic('corpus', 'unlink', 'whee', 'puke').code
     code, lines = prosaic('corpus', 'sources', 'whee')
     assert 0 == code
     assert lines.issuperset(set(['flarf']))
Ejemplo n.º 3
0
 def test_ls(self, db, cleanup):
     source_names = ['blarf', 'flarf', 'narf']
     for name in source_names:
         db.add(Source(name=name, content=''))
     db.commit()
     code, lines = prosaic('source', 'ls')
     assert 0 == code
     assert lines.issuperset(set(source_names))
Ejemplo n.º 4
0
 def test_rm(self, db, cleanup):
     source_names = ['blarf', 'flarf', 'narf']
     for name in source_names:
         db.add(Source(name=name, content=''))
     db.commit()
     assert 3 == db.query(Source).count()
     code, _ = prosaic('source', 'rm', 'flarf')
     assert 0 == code
     code, _ = prosaic('source', 'rm', 'narf')
     assert 0 == code
     assert 1 == db.query(Source).count()
Ejemplo n.º 5
0
    def source_new(self):
        text_file = open(self.args.path, 'r')
        name = self.args.source_name
        description = self.args.source_description
        source = Source(name=name, description=description)

        error = process_text(self.db, source, text_file)
        if error is not None:
            print('There was an error extracting phrases:')
            print('********')
            print(error)
            print('********')
            print("The source '{}' was not saved.".format(name))

        text_file.close()
Ejemplo n.º 6
0
def process_text(db: Database, source: Source,
                 text: IOBase) -> Optional[Exception]:
    session = get_session(db)
    line_no = 1  # lol
    ultimate_text = ''
    futures = []
    source.content = ''
    session.add(source)
    session.commit()  # so we can attach phrases to it. need its id.
    line_queue = Queue()
    error_queue = Queue()
    db_proc = Process(target=line_handler,
                      args=(db, line_queue, error_queue, source.id))
    db_proc.start()

    chunk = text.read(CHUNK_SIZE)
    while len(chunk) > 0:
        line_buff = ""
        for c in chunk:
            if BAD_CHARS.get(c, False):
                if not line_buff.endswith(' '):
                    line_buff += ' '
                continue
            if CLAUSE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                    line_buff = ""
                else:
                    line_buff += c
                continue
            if SENTENCE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                line_buff = ""
                continue
            if c == ' ' and line_buff.endswith(' '):
                continue
            if c == "'" and line_buff.endswith(' '):
                continue
            if c == "'" and peek(text, 1) == ' ':
                continue
            line_buff += c
        chunk = text.read(CHUNK_SIZE)

    line_queue.put(DONE_READING)
    db_proc.join()

    error = None
    if error_queue.empty():
        source.content = ultimate_text
        session.add(source)
    else:
        error = error_queue.get()
        session.delete(source)

    result = None
    if error is None:
        result = source.id
    else:
        result = error

    session.commit()
    session.close()

    return result
Ejemplo n.º 7
0
def process_text(db: Database,
                 source: Source,
                 text: IOBase) -> Optional[Exception]:
    session = get_session(db)
    line_no = 1 # lol
    ultimate_text = ''
    futures = []
    source.content = ''
    session.add(source)
    session.commit() # so we can attach phrases to it. need its id.
    line_queue = Queue()
    error_queue = Queue()
    db_proc = Process(target=line_handler,
                      args=(db, line_queue, error_queue, source.id))
    db_proc.start()

    chunk = text.read(CHUNK_SIZE)
    while len(chunk) > 0:
        line_buff = ""
        for c in chunk:
            if BAD_CHARS.get(c, False):
                if not line_buff.endswith(' '):
                    line_buff += ' '
                continue
            if CLAUSE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                    line_buff = ""
                else:
                    line_buff += c
                continue
            if SENTENCE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                line_buff = ""
                continue
            if c == ' ' and line_buff.endswith(' '):
                continue
            if c == "'" and line_buff.endswith(' '):
                continue
            if c == "'" and peek(text, 1) == ' ':
                continue
            line_buff += c
        chunk = text.read(CHUNK_SIZE)

    line_queue.put(DONE_READING)
    db_proc.join()

    error = None
    if error_queue.empty():
        source.content = ultimate_text
        session.add(source)
    else:
        error = error_queue.get()
        session.delete(source)

    result = None
    if error is None:
        result = source.id
    else:
        result = error

    session.commit()
    session.close()

    return result