Example #1
0
def initialize():
    if not os.path.exists(define.STORAGE_PATH):
        os.mkdir(define.STORAGE_PATH)
    if not os.path.exists(define.HTML_PATH):
        os.mkdir(define.HTML_PATH)
    if not os.path.exists(define.CACHE_PATH):
        os.mkdir(define.CACHE_PATH)
    for i in range(0, 5):
        _logic.create_record_table('%s_%s' % (record.DB_TABLE, i))
    bloom_fliter.init_bitarray()
    index = 0
    for url in urls.init_url_list:
        if not bloom_fliter.url_exist(url):
            record_obj = record.Record(define.UNDEFINE, url, record.STATUS_NOTYET, '', '')
            table = '%s_%s' % (record.DB_TABLE, index % THREAD_COUNT)
            _logic.insert_record(table, record_obj)
        index = index + 1
    bloom_fliter.save_bitarray()
Example #2
0
def run_listener():
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)  
    sock.bind(('localhost', 8002))  
    sock.listen(5)  
    while True:  
        connection, address = sock.accept()  
        try:  
            connection.settimeout(5)  
            buf = connection.recv(1024)  
            if buf == 'stop': 
                print '正在停止...'
                stop_crawler() 
                stop_analyzer()
                bloom_fliter.save_bitarray()
                print '已经成功保存数据'
                connection.close()
                break
        except socket.timeout:  
            print 'time out'  
        connection.close()