Beispiel #1
0
    def clear_items(self):

        global logger_sourcing
        file_path = os.path.join(
            os.path.split(os.path.realpath(__file__))[0], u'dumps/saoanzi.csv')
        data = []
        for anzi in dbutil.get_daily_saoanzi_sources(self.db, self.today):
            cactive = dbutil.get_company_active(self.db, anzi.companyId)
            need_verify = self.tcg.need_verify(anzi.companyId)
            if need_verify or (cactive != 'Y'):
                self.tcg.generate_tc(
                    json.dumps({
                        'id': anzi.companyId,
                        'source': 'track_saoanzi'
                    }))
                dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId,
                                                  'P')
            elif not self.__valid_message(anzi):
                dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId,
                                                  'N')
            else:
                dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId,
                                                  'Y')
            url = "http://pro.xiniudata.com/validator/#/company/%s/overview" \
                  % dbutil.get_company_code(self.db, anzi.companyId)
            # sources = ';'.join([s.name for s in dbutil.get_saoanzi_item_sources(self.db, anzi.id)])
            source = anzi.source
            need_verify = u'需要检查' if (need_verify or
                                      (cactive != 'Y')) else u'不需要检查'
            data.append([
                dbutil.get_company_name(self.db, anzi.companyId), url,
                need_verify, anzi.createTime, source
            ])
        if not data:
            return
        # send email
        data = pandas.DataFrame(data)
        data.to_csv(file_path, encoding='utf_8_sig')
        # stat_verify = {title: len(set(detail[0])) for title, detail in data.groupby(3)}
        stat_verify = '<br/>'.join([
            '%s\t%s' % (title, len(set(detail[0])))
            for title, detail in data.groupby(2)
        ])
        # stat_source = {title: len(detail) for title, detail in data.groupby(5)}
        stat_source = '<br/>'.join([
            '%s\t%s' % (title, len(detail))
            for title, detail in data.groupby(4)
        ])
        stat = u'去重公司数<br/>%s<br/>每个源下的公司数<br/>%s\n' % (stat_verify,
                                                        stat_source)
        receivers = ['victor', 'erin', 'weiguangxiao', 'gewei']
        receivers = ';'.join(['*****@*****.**' % r for r in receivers])
        title = u'扫案子项目列表 %s' % self.current_check_time.strftime('%Y-%m-%d %H')
        content = u'%s检查,今天共有%s个扫案子条目<br/>%s' % \
                  (self.current_check_time.strftime('%Y-%m-%d %H:%M'), len(data), stat)
        send_mail_file(u'烯牛扫案子后台', u'烯牛扫案子后台', "*****@*****.**",
                       receivers, title, content, file_path)
Beispiel #2
0
    def need_verify(self, cid, debug=False):

        if debug:
            if dbutil.get_company_active(self.db, cid) == 'P':
                return False, -2
            if not dbutil.get_company_verified(self.db, cid):
                return True, 0
            # if not dbutil.get_corporate_verified(self.db, cid):
            #     return True, 1
            if not dbutil.get_company_alias_verified(self.db, cid):
                return True, 2
            if not dbutil.get_corporate_alias_verified(self.db, cid):
                return True, 3
            if not dbutil.get_funding_verified(self.db, cid):
                return True, 4
            if not dbutil.get_artifact_verified(self.db, cid):
                return True, 5
            if not dbutil.get_member_verified(self.db, cid):
                return True, 6
            if not dbutil.get_recruit_verified(self.db, cid):
                return True, 7
            return False, -1
        else:
            if dbutil.get_company_active(self.db, cid) == 'P':
                return False
            if not dbutil.get_company_verified(self.db, cid):
                return True
            # if not dbutil.get_corporate_verified(self.db, cid):
            #     return True
            if not dbutil.get_company_alias_verified(self.db, cid):
                return True
            if not dbutil.get_corporate_alias_verified(self.db, cid):
                return True
            if not dbutil.get_funding_verified(self.db, cid):
                return True
            if not dbutil.get_artifact_verified(self.db, cid):
                return True
            if not dbutil.get_member_verified(self.db, cid):
                return True
            if not dbutil.get_recruit_verified(self.db, cid):
                return True
            return False
Beispiel #3
0
    def create_index(self, db, cid):

        name = db.get('select name from company where id=%s', cid).name.lower().replace(' ', '')
        code = db.get('select code from company where id=%s', cid).code
        if not code:
            return
        company = {
            'id': cid,
            'name': name,
            'code': code,
            'active': dbutil.get_company_active(self.db, cid)
        }

        # name
        alias = set()
        # short name
        alias.add(name.lower())
        alias.add(''.join(lazy_pinyin(name.lower(), errors='ignore')))
        # full name
        full = dbutil.get_company_corporate_name(self.db, cid, False)
        if full and full.strip():
            alias.add(full.lower())
            short_full = copy(full)
            for location in self.domestic_locations:
                short_full = short_full.replace(location, '')
            alias.add(short_full.lower())
        # artifact name
        aresults = dbutil.get_artifact_idname_from_cid(db, cid, True)
        if aresults:
            alias.update([self.valid_name(aname) for _, aname in aresults if self.valid_name(aname)])
        # alias
        aliass = dbutil.get_alias_idname(db, cid)
        if aliass and len(aliass) < 20:
            alias.update([self.valid_name(aname) for _, aname in aliass if self.valid_name(aname)])
        # corporate name
        corporate = dbutil.get_company_corporate_name(db, cid)
        if corporate and corporate.strip():
            alias.add(corporate.lower())
        # corporate full name
        corporate_full = dbutil.get_company_corporate_name(db, cid, False)
        if corporate_full and corporate_full.strip():
            alias.add(corporate_full.lower())
        # corporate alias
        corporate_alias = dbutil.get_corporate_alias(db, cid)
        if corporate_alias and len(corporate_alias) < 20:
            alias.update([self.valid_name(aname) for aname in corporate_alias if self.valid_name(aname)])

        company['i_alias'] = [name for name in alias if name and name.strip()]
        self.es.index(index="xiniudata", doc_type='interior', id=code, body=company)
Beispiel #4
0
def track_updates(db, producer_comps, cid, updates):

    if dbutil.get_company_active(db, cid) != 'Y':
        return
    updates = [update for update in updates if __track_comps_match(db, update)]
    if len(updates) < 1:
        return
    # producer_comps.send_messages("track_message", json.dumps({'id': cid, 'type': 'comps', 'comps': updates}))
    comments = ','.join([dbutil.get_company_name(db, c) for c in updates])
    track_msg = u'%s发现了%s个潜在的竞争对手: %s' % (dbutil.get_company_name(
        db, cid), len(updates), comments)
    cmsg_id = dbutil.update_company_message(
        db,
        cid,
        track_msg,
        6001,
        60,
        ','.join([str(update) for update in updates]),
        comments=comments)
    if cmsg_id:
        try:
            producer_comps.send_messages(
                "track_message_v2",
                json.dumps({
                    'id': cmsg_id,
                    'type': 'company_message',
                    'action': 'create'
                }))
        except FailedPayloadsError, fpe:
            url = tsbconfig.get_kafka_config()
            kafka = KafkaClient(url)
            producer_comps = SimpleProducer(kafka)
            producer_comps.send_messages(
                "track_message_v2",
                json.dumps({
                    'id': cmsg_id,
                    'type': 'company_message',
                    'action': 'create'
                }))
Beispiel #5
0
def __source_module_71001(db, mongo, yesterday, day_seven):

    aggregates = [
        item.get('newCorporateIds', []) for item in
        mongo.task.corporate_decompose.find({'modifyTime': {
            '$gt': day_seven
        }})
    ]
    aggregates = set(chain(*aggregates))
    # for c in db.query('select company.id id, source_company.source source from company, source_company '
    #                   'where company.createTime>%s and company.modifyTime>%s and company.id=source_company.companyId '
    #                   'and (company.active is null or company.active="Y") and '
    #                   '(source_company.active is null or source_company.active="Y");', day_seven, yesterday):
    for tc in mongo.task.company.find({
            'finishTime': {
                '$gte': yesterday
            },
            'processStatus': 1,
            'types': 'company_job'
    }):
        cid = tc.get('companyId')
        if dbutil.get_company_active(db, cid) == 'Y':
            if dbutil.get_company_round(db, cid) > 1040:
                continue
            # if dbutil.get_company_establish_date(db, cid).year < 2000:
            #     continue
            # if dbutil.get_company_corporate_id(db, cid) in aggregates:
            #     continue
            if dbutil.get_company_source(db, cid) == {13050}:
                dbutil.update_extract_source_company(db,
                                                     67001,
                                                     13050,
                                                     cid,
                                                     only_insert=False)
                dbutil.update_custom_sourcing_company(db, cid, 71001,
                                                      day_seven)