예제 #1
0
파일: rss.py 프로젝트: stes/nbot
def test_db():
    gl_vlist = VocabList()
    log('searching directory: %s' % FEED_DIR)
    for dir in os.listdir(FEED_DIR):
        if '.mk4' in dir[-4:]:
            log('found database: %s' % dir)
            # open database
            db = metakit.storage(os.path.join(FEED_DIR, dir), 0)
            data = read_database(db)
            if len(data) > 0:
                # feed content in database
                log('create library')
                lib = Library()
                for feed in data:
                    lib.add_document(read_data(feed))
                vlist = lib.gen_vocablist()
                vlist.clean(5)
                gl_vlist.merge(vlist)
            db = None # close database
    print gl_vlist
예제 #2
0
파일: tools.py 프로젝트: stes/nbot
def print80(text):
    tmp = ''
    for c in text:
        tmp+=c
        if len(tmp) > 80:
            print(tmp)
            tmp = ''
    print(tmp)

def printlist(list):
    for item in list:
        print(item)

if __name__ == '__main__':
    from nbot.document import Document, Library
    q = ['file:///home/stes/dislike.html']
    lib = Library()
    url = q.pop(0)
    page = fetch_content(url)
    hrefs = get_hyperlinks(page)
    q.extend(hrefs)
    while q:
        print 'currently %d elements in the queue' % len(q)
        url = q.pop(0)
        print 'getting %s' % url

        page = fetch_content(url)

        doc = Document(page)
        lib.add_document(doc)
    lib.save('res/dislike')