def store_userinfo_to_db(self, uid_or_nickname, user_info):
        if type(user_info) is str:
            WeiboSearchLog().get_scheduler_logger().info(self.name +
                                                         " nothing ! :" +
                                                         user_info)
            return

        unique_user_info = UserInfo_store(
            uid_or_uname=user_info.uid_or_uname,
            nickname=user_info.nickname,
            is_persion=user_info.is_persion,
            check_or_not=user_info.check_or_not,
            fensi=user_info.fensi,
            sex=user_info.sex,
            location=user_info.location,
            check_info=user_info.check_info,
            weibo_all_nums=user_info.weibo_all_nums,
            guan_zhu_nums=user_info.guan_zhu_nums)

        #         Bie_Ming_store
        if unique_user_info['uid_or_uname'] != uid_or_nickname:
            bie_ming = Bie_Ming_store(
                uid_or_uname=unique_user_info['uid_or_uname'],
                bie_ming=uid_or_nickname)

        sign = 0
        try:
            unique_user_info.save()
        except NotUniqueError:
            sign = 1
            WeiboSearchLog().get_scheduler_logger().info(
                self.name + " insert to database, not unique ! " +
                unique_user_info['uid_or_uname'] + " crawl: " +
                uid_or_nickname)
        except:
            sign = 2
            WeiboSearchLog().get_scheduler_logger().info(
                self.name + " insert to database, something wrong !")

        if sign == 0:
            WeiboSearchLog().get_scheduler_logger().info(
                self.name +
                " insert to database, success success success success!")

        try:
            bie_ming.save()
        except NotUniqueError:
            WeiboSearchLog().get_scheduler_logger().info(
                self.name + " bieming already in database" +
                unique_user_info['uid_or_uname'] + " crawl: " +
                uid_or_nickname)
            return
        except:
            WeiboSearchLog().get_scheduler_logger().info(
                self.name + " bieming insert to database, something wrong !")
            return

        pass
 def init_url_queue(self):
     global UserInfo_store
     for uid_or_nickname in self.uid_or_uname_list:            
         if len(UserInfo_store.objects(Q(uid_or_uname=str(uid_or_nickname)) | Q(nickname=str(uid_or_nickname)))) != 0 or\
         len(Bie_Ming_store.objects(Q(uid_or_uname=str(uid_or_nickname)) | Q(bie_ming=str(uid_or_nickname)))) != 0:
             continue
        
         self.url_queue.put(uid_or_nickname)
     print "crawl size ::::::::: ", self.url_queue.qsize()
     pass
    def store_userinfo_to_db(self, uid_or_nickname, user_info):
        if type(user_info) is str:
            WeiboSearchLog().get_scheduler_logger().info(self.name + " nothing ! :" + user_info)
            return
        
        unique_user_info = UserInfo_store(uid_or_uname=user_info.uid_or_uname, nickname=user_info.nickname, is_persion=user_info.is_persion, check_or_not=user_info.check_or_not, fensi=user_info.fensi,
                                          sex=user_info.sex, location=user_info.location,
                                          check_info=user_info.check_info,
                                          weibo_all_nums=user_info.weibo_all_nums,
                                          guan_zhu_nums=user_info.guan_zhu_nums
                                          )
        
#         Bie_Ming_store
        if unique_user_info['uid_or_uname'] != uid_or_nickname:
            bie_ming = Bie_Ming_store(uid_or_uname=unique_user_info['uid_or_uname']  , bie_ming=uid_or_nickname)
        
        sign = 0
        try:
            unique_user_info.save()
        except NotUniqueError:
            sign = 1
            WeiboSearchLog().get_scheduler_logger().info(self.name + " insert to database, not unique ! " + unique_user_info['uid_or_uname'] + " crawl: " + uid_or_nickname)
        except:
            sign = 2
            WeiboSearchLog().get_scheduler_logger().info(self.name + " insert to database, something wrong !")
        
        if sign == 0:
            WeiboSearchLog().get_scheduler_logger().info(self.name + " insert to database, success success success success!")
        
        try:
            bie_ming.save()
        except NotUniqueError:
            WeiboSearchLog().get_scheduler_logger().info(self.name + " bieming already in database" + unique_user_info['uid_or_uname'] + " crawl: " + uid_or_nickname)
            return
        except:
            WeiboSearchLog().get_scheduler_logger().info(self.name + " bieming insert to database, something wrong !")
            return
        
        pass
    def crawl(self, uid_or_nickname, is_again=False):
        
        # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        url = ''
        if len(UserInfo_store.objects(Q(uid_or_uname=str(uid_or_nickname)) | Q(nickname=str(uid_or_nickname)))) != 0 or\
            len(Bie_Ming_store.objects(Q(uid_or_uname=str(uid_or_nickname)) | Q(bie_ming=str(uid_or_nickname)))) != 0:
            WeiboSearchLog().get_scheduler_logger().info("already in the database : " + uid_or_nickname)
            return "nothing"
        
        quote_uid_or_nickname = ""
        try:
            quote_uid_or_nickname = quote_plus(str(uid_or_nickname.strip()))
        except:
            print  traceback.format_exc()
            print  uid_or_nickname
        
#         url = "http://weibo.cn/" + uid_or_nickname + "?f=search_0"
            
        if quote_uid_or_nickname == uid_or_nickname:
            url = "http://weibo.cn/" + uid_or_nickname + "?f=search_0"
        else:
            url = "http://weibo.cn/n/" + quote_uid_or_nickname
        
        # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
        
        loginer = Loginer()
        cookie = loginer.get_cookie()
        proxy = loginer.get_proxy()
        
        craw_object = Crawler_with_proxy(url, cookie, proxy)
        
        WeiboSearchLog().get_scheduler_logger().info(self.name + " start to crawl ! " + url)
        
        user_info = ""
        try:
            page = craw_object.get_page()
            
            user_info = page_parser_from_search_for_UserInfo(page, url)
        except:
            if is_again:
                return self.crawl(url, is_again=False)
            else:
                return user_info
            
            
        return user_info
Beispiel #5
0
# -*- coding: utf-8 -*-
'''
Created on 2016年5月2日

@author: nlp
'''
from store_model import UserInfo_store




if __name__ == '__main__':
    
    print len( UserInfo_store.objects( uid_or_uname = str( "2080114694" ) ) )
    pass
Beispiel #6
0
# -*- coding: utf-8 -*-
'''
Created on 2016年5月2日

@author: nlp
'''
from store_model import UserInfo_store

if __name__ == '__main__':

    print len(UserInfo_store.objects(uid_or_uname=str("2080114694")))
    pass