Beispiel #1
0
 def manageWwwInfo(self, data, collection):
     # 获取网站的标题
     domain_name = data['domain_name']
     wwwtitle = ''
     if 'wwwtitle' in data:
         wwwtitle = data['wwwtitle']
     htmlInfo = wwwInfo.start_parse(domain_name, self.contacttool_info,
                                    self.getContactFlag)
     title = ''
     # description = None
     # keywords = None
     status = 'C'
     if 'title' in htmlInfo:
         title = htmlInfo['title']
     else:
         # 没有网站的
         mongodb = MONGODB()
         mongodb.connect()
         mongodb.getdb('mxmanage')
         mongodb.getcollection(collection)
         mongodb.updateOne({"_id": data['_id']},
                           {'$set': {
                               'status': status
                           }})
     if title:
         status = self.checktitle(title)
         mongodb = MONGODB()
         mongodb.connect()
         mongodb.getdb('mxmanage')
         mongodb.getcollection(collection)
         mongodb.updateOne({"_id": data['_id']},
                           {'$set': {
                               'wwwtitle': title,
                               'status': status
                           }})
         print(self.name + data['domain_name'] + ' add title' +
               '  status ' + status)
         data['wwwtitle'] = title
         mongodb.close()
     data['status'] = status
     if not self.getContactFlag:
         return
     brandinfo = {}
     if 'contacttool' in htmlInfo:
         brandinfo = htmlInfo['contacttool']
     if brandinfo:
         self.manageContacttoolInfo(data, collection, brandinfo)
Beispiel #2
0
    def manageMxInfo(self, data, collection):
        domain_name = data['domain_name']
        mx_info = MxManage.startParseMx(domain_name)
        if not 'mxsuffix' in mx_info:
            return
        if not 'mxrecord' in mx_info:
            return
        mongodbWhere = {"_id": data['_id']}
        # mongodb 操作对象初始化
        mongodb = MONGODB()
        mongodb.connect()
        mongodb.getdb('mxmanage')
        mongodb.getcollection(collection)
        mx_brand_info = {}
        brand_id = 0
        brand_name = ''
        # 是不是已经存在之前的mx记录  用于以后判断 mx 变更
        if not 'mxrecord' in data:
            mxrecord = mx_info['mxrecord']
            perdata = {'$set': {'mxrecord': mxrecord}}
            mongodb.updateOne(mongodbWhere, perdata)
            print(self.name + data['domain_name'] + ' append mxrecord')
        else:
            mxrecord = mx_info['mxrecord']
            # 匹配下 原始的 mx 记录
            pre_mxrecord = data['mxrecord']
            if set(pre_mxrecord).issubset(set(mxrecord)):
                return
        # 后缀在 黑名单中 直接返回  有些后缀天天变化
        if mx_info['mxsuffix'] in self.mx_blacklist_suffix:
            print(self.name + data['domain_name'] + ' mx suffix in blacklist')
            return

        if mx_info['mxsuffix'] in self.mxSuffix:
            mx_brand_info = self.mxSuffix[mx_info['mxsuffix']]
            brand_id = mx_brand_info['brand_id']
            brand_name = mx_brand_info['brand_name']
        else:
            # mx 后缀没有 需要添加到数据库中
            try:
                # 表示没有该brand信息
                not_classified_suffix = mx_info['mxsuffix']
                db = DB()
                db.connect()
                where = " where host = '" + not_classified_suffix + "'"
                sql = "select * from sm_mx_suffix_notclassified " + where
                # print "查询未分类的mx"+sql
                stepCursor = db.query(sql)
                mx_notclassified = stepCursor.fetchone()
                # print mx_notclassified
                # {u'count': 0, u'addtime': 1487381057, u'host': u'dragonparking.com', u'id': 7}
                if mx_notclassified:
                    updateSql = "update sm_mx_suffix_notclassified set count=" + str(
                        mx_notclassified['count'] + 1) + where
                    db.update(updateSql)
                else:
                    insertSql = "insert into sm_mx_suffix_notclassified(`host`, `count`, `addtime`) VALUE ('" + not_classified_suffix + "','1','" + str(
                        int(time.time())) + "') "
                    db.update(insertSql)
                stepCursor.close()
                db.close()
            except OperationalError as ex:
                pass
        # 首先需要添加
        if 'mx' in data:
            # 表示存在包含数据 匹配下是不是一致  不一致需要更新数据
            pre_mx = data['mx']  # 之前的所有mx 信息 包含品牌 品牌id mx 以及优先级
            now_mx = mx_info['mx']
            # 表示品牌不一样
            if brand_id != 0 and pre_mx['brand_id'] != 0 and pre_mx[
                    'brand_id'] == brand_id:
                # 之前跟现在的品牌 都存在 但是相等的情况下 直接返回
                return
            if MxManage.subMxSuffix(
                    pre_mx['mx']) != MxManage.subMxSuffix(now_mx):
                perdata = {
                    '$set': {
                        'mx': {
                            'mx': mx_info['mx'],
                            'priority': mx_info['priority'],
                            'brand_id': brand_id,
                            'brand_name': brand_name,
                            'addtime': int(time.time())
                        },
                        'mx_changetime': int(time.time())
                    },
                    '$push': {
                        # mx 历史记录 不包含当前所属品牌
                        'mxlist': data['mx']
                    }
                }
                mongodb.updateOne(mongodbWhere, perdata)
                print(self.name + data['domain_name'] + ' change MX')
                if self.addMailCusFlag:
                    addCrmData.addMailCustomer(data, mx_info, mx_brand_info,
                                               collection, 'update')
            else:
                # 这种情况是 可能有些 之前mx后缀没有匹配的 后来又匹配到了
                if brand_id and data['mx']['brand_id'] == 0:
                    perdata = {
                        '$set': {
                            'mx': {
                                'mx': mx_info['mx'],
                                'priority': mx_info['priority'],
                                'brand_id': brand_id,
                                'brand_name': brand_name,
                                'addtime': int(time.time())
                            },
                            'mx_changetime': int(time.time())
                        }
                    }
                    print(self.name + data['domain_name'] +
                          ' update brand info')
                    mongodb.updateOne(mongodbWhere, perdata)
        else:
            # 有可能更新品牌信息 但是mx 没有变更
            perdata = {
                '$set': {
                    'mx': {
                        'mx': mx_info['mx'],
                        'priority': mx_info['priority'],
                        'brand_id': brand_id,
                        'brand_name': brand_name,
                        'addtime': int(time.time()),
                    },
                    'mx_changetime': int(time.time())
                }
            }
            print(self.name + data['domain_name'] + ' add MX')
            mongodb.updateOne(mongodbWhere, perdata)
            if self.addMailCusFlag:
                addCrmData.addMailCustomer(data, mx_info, mx_brand_info,
                                           collection, 'add')
Beispiel #3
0
    def manageMailInfo(self, data, collection):
        mongodbWhere = {"_id": data['_id']}
        # 获取网站的标题
        domainName = data['domain_name']
        # 域名信息
        brandInfo = wwwInfo.startParseMailIndex(domainName,
                                                self.mailSelfBuildInfo)
        # 判断下是不是包含 title
        mongodb = MONGODB()
        mongodb.connect()
        mongodb.getdb('mxmanage')
        mongodb.getcollection(collection)

        if 'title' in brandInfo and brandInfo['title'] != '':
            title = brandInfo['title']
            status = self.checktitle(title, True)
            print(self.name + domainName + ' get mailtitle ' + ' status:' +
                  status)
            # 更新mailtitle
            mongodb.updateOne(
                mongodbWhere,
                {'$set': {
                    'mailtitle': brandInfo['title'],
                    'status': status
                }})
        if 'brandInfo' in brandInfo and len(brandInfo['brandInfo']) != 0:
            # print(self.name + domainName + ' get self build mail info')
            mongodb.updateOne(
                mongodbWhere,
                {'$set': {
                    'mailselfbuild': brandInfo['brandInfo']
                }})
Beispiel #4
0
 def manageContacttoolInfo(self, data, collection, brandinfo):
     mongodbWhere = {"_id": data['_id']}
     domain_name = data['domain_name']
     # mongodb 操作对象初始化
     mongodb = MONGODB()
     mongodb.connect()
     mongodb.getdb('mxmanage')
     mongodb.getcollection(collection)
     # 比对文章分类
     if 'contacttool' in data:
         contacttool_info = data['contacttool']
         pre_brand = contacttool_info['brand_id']
         if pre_brand != brandinfo['brand_id']:
             perdata = {
                 '$set': {
                     'contacttool': brandinfo,
                     'contacttool_changetime': int(time.time())
                 },
                 '$push': {
                     # mx 历史记录 不包含当前所属品牌
                     'contacttoollist': contacttool_info
                 }
             }
             mongodb.updateOne(mongodbWhere, perdata)
             # print(self.name + domain_name + ' change contact tool ')
     else:
         # 直接追加
         perdata = {
             '$set': {
                 'contacttool': brandinfo,
                 'contacttool_changetime': int(time.time())
             }
         }
         # print(self.name + domain_name + ' add new contact tool')
         mongodb.updateOne(mongodbWhere, perdata)
     mongodb.close()
Beispiel #5
0
 def run(self):  # 把要执行的代码写到run函数里面 线程在创建后会直接运行 run 函数
     mongodb = MONGODB()
     mongodb.connect()
     mongodb.getdb('mxmanage')
     num_coll = 'mxmanage_stopnum'
     while True:
         self.queueLock.acquire()
         if self.q.qsize() == 0:
             # 当前客户
             print("producer are producing data")
             current_coll = self.coll[0]
             # 从数据库中取数据
             mongodb.getcollection(num_coll)
             flagWhere = {"flag": self.flag}
             stop_info = mongodb.findOne(flagWhere)
             if not stop_info:
                 continue
             start = stop_info['start']
             stop = stop_info['stop']
             collection = stop_info['collection']
             if start >= stop and start != 0:
                 # 取数据失败 需要换个地区重新获取
                 del self.coll[0]
                 # 如果没有
                 if self.coll:
                     mongodb.getcollection(num_coll)
                     mongodb.updateOne(
                         flagWhere, {
                             "$set": {
                                 "stop": 0,
                                 "start": 0,
                                 "collection": self.coll[0]
                             }
                         })
                 else:
                     for province in self.permanent_coll:
                         self.coll.append(province)
                     mongodb.updateOne(
                         flagWhere, {
                             "$set": {
                                 "stop": 0,
                                 "start": 0,
                                 "collection": self.coll[0]
                             }
                         })
                 self.queueLock.release()
                 continue
             if collection != current_coll:
                 # 更新数据库中的 公司信息 从上次的断点位置继续遍历数据
                 length = len(self.coll)
                 for item in range(length):
                     if self.coll[0] == collection:
                         break
                     else:
                         del self.coll[0]
             if stop == 0:
                 # 重新 获取下总的数量
                 mongodb.getcollection(collection)
                 # 重新来更新数据
                 stopnum = mongodb.count()
                 # print(stopnum)
                 mongodb.getcollection(num_coll)
                 mongodb.updateOne(flagWhere,
                                   {"$set": {
                                       "stop": int(stopnum)
                                   }})
             # 从数据库中获取 上次已经获取到哪了 这次从哪开始   有个问题是如果 数据有变动 会取到重复的值
             mongodb.getcollection(collection)
             # print("producing data" + collection)
             mongodb.findMany(self.q, start, self.qCount)
             # print("produced data" + collection)
             # 更新mongodb 中的 start 数据 取数据成功
             mongodb.getcollection(num_coll)
             mongodb.updateOne(
                 flagWhere, {"$set": {
                     "start": int(start + self.qCount)
                 }})
             mongodb.close()
             self.queueLock.release()
         else:
             self.queueLock.release()
             # print("生产者不需要生产" + str(self.q.qsize()))
             time.sleep(1)