Beispiel #1
0
def __source_database(db, mongo, yesterday, day_seven):

    aggregates = [
        item.get('newCorporateIds', []) for item in
        mongo.task.corporate_decompose.find({'modifyTime': {
            '$gt': day_seven
        }})
    ]
    aggregates = set(chain(*aggregates))
    for c in db.query(
            'select company.id id, source_company.source source from company, source_company '
            'where company.createTime>%s and company.modifyTime>%s and company.id=source_company.companyId '
            'and (company.active is null or company.active="Y") and '
            '(source_company.active is null or source_company.active="Y");',
            day_seven, yesterday):
        if dbutil.get_company_round(db, c.id) > 1040:
            continue
        if dbutil.get_company_establish_date(db, c.id).year < 2016:
            continue
        if dbutil.get_company_corporate_id(db, c.id) in aggregates:
            continue
        if dbutil.get_company_source(db, c.id) == set([13050]):
            continue
        dbutil.update_extract_source_company(db,
                                             67001,
                                             c.source,
                                             c.id,
                                             only_insert=False)
Beispiel #2
0
    def __score_infos(self, db, cid):

        aspects, total = 0.0, 0.0
        num_aspects = 0

        # description
        count = db.get(
            'select count(*) c from company where id=%s and description is not null;',
            cid).c
        aspects += (count > 0)
        total += count
        num_aspects += 1

        # verified
        count = db.get(
            'select count(*) c from company where id=%s and modifyUser is not null;',
            cid).c
        aspects += (count > 0)
        total += count
        num_aspects += 1

        # artifact
        count = db.get(
            'select count(distinct type) as count from artifact where companyId=%s',
            cid).count
        aspects += (count > 0)
        total += count
        num_aspects += 1

        # member
        count = db.get(
            'select count(*) as count from company_member_rel where companyId=%s',
            cid).count
        aspects += (count > 0)
        total += count
        num_aspects += 1

        sources = dbutil.get_company_source(db, cid)
        if (len(set(sources)) == 1 and 13050 in sources) or len(sources) > 5:
            discount = self.non_truested_source_discount
        else:
            discount = 1 if dbutil.get_company_source(
                db, cid, justify=True) else self.lack_trusted_source_discount
        return round(
            (aspects / (2 * num_aspects) + min(0.5, total / 10)) * discount, 3)
Beispiel #3
0
    def score(self, cid, utags):

        tagscore = CompanyTagsRelevance.score(self, cid, utags)

        if not dbutil.get_company_source(self.db, cid, justify=True):
            tagscore *= self.non_trusted_source_discount

        if not set(x[0] for x in dbutil.get_company_tags_idname(self.db, cid)) & set(dbutil.get_yellow_tags(self.db)):
            tagscore *= self.non_yellow_label_discount

        return round(tagscore, 4)
Beispiel #4
0
def __source_module_71001(db, mongo, yesterday, day_seven):

    aggregates = [
        item.get('newCorporateIds', []) for item in
        mongo.task.corporate_decompose.find({'modifyTime': {
            '$gt': day_seven
        }})
    ]
    aggregates = set(chain(*aggregates))
    # for c in db.query('select company.id id, source_company.source source from company, source_company '
    #                   'where company.createTime>%s and company.modifyTime>%s and company.id=source_company.companyId '
    #                   'and (company.active is null or company.active="Y") and '
    #                   '(source_company.active is null or source_company.active="Y");', day_seven, yesterday):
    for tc in mongo.task.company.find({
            'finishTime': {
                '$gte': yesterday
            },
            'processStatus': 1,
            'types': 'company_job'
    }):
        cid = tc.get('companyId')
        if dbutil.get_company_active(db, cid) == 'Y':
            if dbutil.get_company_round(db, cid) > 1040:
                continue
            # if dbutil.get_company_establish_date(db, cid).year < 2000:
            #     continue
            # if dbutil.get_company_corporate_id(db, cid) in aggregates:
            #     continue
            if dbutil.get_company_source(db, cid) == {13050}:
                dbutil.update_extract_source_company(db,
                                                     67001,
                                                     13050,
                                                     cid,
                                                     only_insert=False)
                dbutil.update_custom_sourcing_company(db, cid, 71001,
                                                      day_seven)
Beispiel #5
0
    def feed_incremental(self):

        global producer_track, logger_track
        fundings = dbutil.get_untracked_fundings(self.db)
        for funding in fundings:
            try:
                logger_track.info('Processing track %s' % funding.id)
                cid, corpid, fid, funding_date = funding.companyId, funding.corporateId, funding.id, funding.fundingDate
                # for cid in dbutil.get_corporate_companies(self.db, copid):
                # none funding date
                if not funding_date:
                    dbutil.mark_funding_tracked(self.db, fid)
                    logger_track.info('%s not funding date' % fid)
                    continue
                # funding date older than 1 year
                # if funding_date < datetime.now() - timedelta(days=365):
                #     dbutil.mark_funding_tracked(self.db, fid)
                #     logger_track.info('%s funding date 1 year ago' % fid)
                #     continue
                # check if publish date is older than 7 days
                if funding.publishDate and funding.publishDate < datetime.now(
                ) - timedelta(days=7):
                    dbutil.mark_funding_tracked(self.db, fid)
                    logger_track.info('%s funding published 7 days ago' % fid)
                    continue
                # check if there is publish date
                if not funding.publishDate:
                    dbutil.mark_funding_tracked(self.db, fid)
                    logger_track.info('%s funding no published date' % fid)
                    continue
                # check if this funding is a new funding
                latest = dbutil.get_corporate_latest_funding(self.db, corpid)
                if latest and latest.fundingDate > funding_date:
                    dbutil.mark_funding_tracked(self.db, fid)
                    logger_track.info('%s not new, funding id %s, had %s' %
                                      (cid, fid, latest.id))
                    continue
                if latest and latest.fundingDate == funding_date and not latest.id == fid:
                    dbutil.mark_funding_tracked(self.db, fid)
                    logger_track.info('%s not new, funding id %s, had %s' %
                                      (cid, fid, latest.id))
                    continue
                # only from ipo
                sources = dbutil.get_company_source(self.db, cid)
                if len(sources) == 1 and len(sources
                                             & {13400, 13401, 13402}) == 1:
                    dbutil.mark_funding_tracked(self.db, fid)
                    logger_track.info('%s just IPO, funding id %s' %
                                      (cid, fid))
                    continue
                name = dbutil.get_company_name(self.db, cid)
                abstract = u'%s获得新一轮融资' % name

                track_msg_id = str(
                    self.mongo.track.track.insert({
                        'topic_id':
                        3,
                        'company_id':
                        cid,
                        'abstract':
                        abstract,
                        'createTime':
                        datetime.today()
                    }))

                self.track_funding(cid, track_msg_id, fid, abstract)
                self.track_funding_for_investor_message(
                    cid, fid, dbutil.get_funding_investor_ids(self.db, fid),
                    funding.round, abstract)
                dbutil.mark_funding_tracked(self.db, fid)
                logger_track.info('%s tracked, funding id %s' % (cid, fid))
            except Exception, e:
                logger_track.exception('Failed %s, %s' % (funding.id, e))