Example #1
0
def local(db='file', folder=None, uids=[]):
    global give_ups
    
    create = create_cookie_file()
    fetcher = CnFetcher(account, pwd, cookie_file if not create else None)
    if create:
        fetcher.login(cookie_filename=cookie_file)
    while give_ups > 0:
        while len(tokens) == 0:
            if give_ups > 0:
                pass
            else:
                return
        
        token = tokens.pop()
        cb = callback(token)
        
        if len(uids) == 0:
            give_ups = 0
        else:
            uid = uids.pop()
            
            try:
                crawler = UserCrawler(uid, is_uid=True, fetcher=fetcher, 
                                      fetch_fans=False, callbacks=cb, span=False)
                uid = crawler.uid
                if db == 'file' and folder is not None:
                    storage = FileStorage(uid, folder)
                elif db == 'mongo':
                    storage = MongoStorage(uid)
                else:
                    raise ValueError('db must be "file" or "mongo", ' + 
                                     'when is "file", you must define folder parameter.')
                
                if storage.crawled: 
                    storage.complete()
                    cb()
                    continue
                else:
                    crawler.set_storage(storage)
                    crawler.start()
            except Exception, e:
                cb()
                # raise e
                logger.exception(e)
Example #2
0
def local(uids=[]):

    fetcher = CnFetcher()
    fetcher.login()

    connection_error = False

    while len(uids) > 0 or connection_error:
        if not connection_error:
            uid = uids.pop()
        try:
            crawler = UserCrawler(uid, fetcher)
            crawler.run()
            connection_error = False
        except URLError, e:
            logger.exception(e)
            connection_error = True
            time.sleep(10)
Example #3
0
def dc():
    def run_callbacks(callbacks):
        for callback in callbacks:
            callback()
    
    global give_ups
    
    try:
        create = create_cookie_file()
        fetcher = CnFetcher(account, pwd, cookie_file if not create else None)
        if create:
            fetcher.login(cookie_filename=cookie_file)
        while give_ups > 0:
            n = 0
            while len(tokens) == 0:
                if give_ups > 0:
                    n += 1
                    time.sleep(n);
                else:
                    return
            
            token = tokens.pop()
            cb = callback(token)
            
            soc = create_socket()
            try:
                data = json.loads(soc.recv(buf_size))
                if data == None:
                    time.sleep(15)
                    cb()
                    continue
                elif len(data) == 0:
                    give_ups -= 1
                    continue
                
                user = data['user']
                is_uid = data['is_uid']
                crawled = data.get('crawled', False)
                follow = data.get('follow', None)
                
                # monitor callback
                register_heartbeat(user)()
                register_rm_cb = register_heartbeat(user, True)
                
                # success callbacks
                success_callbacks = (register_rm_cb, reset_error_callback)
                error_callbacks = (error_callback, register_rm_cb)
                
                try:
                    crawler = UserCrawler(user, is_uid=is_uid, fetcher=fetcher, 
                                          fetch_fans=follow is None, 
                                          callbacks=cb, 
                                          success_callbacks=success_callbacks,
                                          error_callbacks=error_callbacks)
                    # the user not exist
                    if crawler.user_not_exist or crawler.uid == 'attention':
                        cb()
                        run_callbacks(success_callbacks)
                        continue
                    
                    uid = crawler.uid
                    storage = MongoStorage(uid, follow, user=user)
                    
                    if crawled or storage.crawled: 
                        cb()
                        run_callbacks(success_callbacks)
                        storage.close()
                        continue
                    else:
                        crawler.set_storage(storage)
                        crawler.start()
                except Exception, e:
                    cb()
                    run_callbacks(error_callbacks)
                    # raise e
                    logger.exception(e)
            finally:
                soc.close()
    finally:
        # When run over, call stop heartbeat
        stop_heartbeat()