def expand(self): companies = set([ c.companyId for c in dbutil.get_topic_companies(self.db, self.tpid) ]) if len(companies) < 5: return candidates = [ comps.get('candidates', []) for comps in self.mongo.comps.candidates.find( {'company': { '$in': list(companies) }}) ] candidates = map(lambda x: x[0], chain(*candidates)) candidates = Counter( [cid for cid in candidates if cid not in companies]) comps = Counter([ cid for cid in chain( *[dbutil.get_company_comps(self.db, cid) for cid in companies]) if cid not in companies ]) for comp in comps.most_common(min(len(companies) / 2, 50)): if comp[1] > len(companies) / 5: dbutil.update_topic_company(self.db, self.tpid, comp[0], confidence=0.51) for candidate in candidates.most_common(min(len(companies) / 5, 30)): if candidate[1] > len(companies) * 0.6: dbutil.update_topic_company(self.db, self.tpid, candidate[0], confidence=0.51)
def fa_relevant_track(): # topic52 global logger_track db = dbcon.connect_torndb() day7 = datetime.now() - timedelta(days=7) for cid in dbutil.get_all_fund_raising(db): logger_track.info('Processing %s' % cid) brief = dbutil.get_company_brief(db, cid) msg = u'%s, %s开启了新一轮融资' % (brief, dbutil.get_company_name(db, cid)) active = 'Y' if (dbutil.get_company_verify(db, cid) == 'Y' and brief) else 'P' tpm = dbutil.update_topic_message_withoutdup( db, 52, msg, active, 80, dbutil.get_company_latest_fa(db, cid, day7), detail_id=cid) tpc = dbutil.update_topic_company(db, 52, cid, active) if tpm: dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpm, 'topic_message') send_msg(tpc, 'topic_company') logger_track.info('Updated %s' % cid) dbutil.update_last_message_time(db, 52)
def __topic_29(db, funding): """ BAT又在这些领域出手了 187, 217, 117 """ investors = set(dbutil.get_funding_investor_ids( db, funding.id)) & {187, 217, 117} if investors: active = 'Y' if dbutil.get_topic_auto_pubilsh_status( db, 29) == 'Y' else 'P' cid = funding.companyId investors = ','.join( [dbutil.get_investor_name(db, iid) for iid in investors]) msg = u'%s投资了%s,%s' % \ (investors, dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid)) tpm = dbutil.update_topic_message(db, 29, msg, active, 70, funding.id) # if active == 'Y': if tpm: send_msg(tpm, 'topic_message') tpc = dbutil.update_topic_company(db, 29, cid, active) if tpc: dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpc, 'topic_company') logger_track.info('29 for %s, add tpm %s' % (funding.id, tpm)) else: logger_track.info('29 not for %s' % funding.id)
def __topic_28(db, funding): """ 红杉真格经纬IDG 114, 122, 125, 109 """ investors = set(dbutil.get_funding_investor_ids( db, funding.id)) & {114, 122, 125, 109} if investors: active = 'Y' if dbutil.get_topic_auto_pubilsh_status( db, 28) == 'Y' else 'P' # cid = dbutil.get_corporate_companies(db, funding.corporateId)[0] cid = funding.companyId investors = ','.join( [dbutil.get_investor_name(db, iid) for iid in investors]) msg = u'%s投资了%s,%s' % \ (investors, dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid)) tpm = dbutil.update_topic_message(db, 28, msg, active, 70, funding.id) # if active == 'Y': if tpm: send_msg(tpm, 'topic_message') # for cid in dbutil.get_corporate_companies(db, funding.corporateId): tpc = dbutil.update_topic_company(db, 28, cid, active) # if active == 'Y': if tpc: dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpc, 'topic_company') logger_track.info('28 for %s, add tpm %s' % (funding.id, tpm)) else: logger_track.info('28 not for %s' % funding.id)
def __topic_27(db, funding): """ 每日退出事件 """ global logger_track if funding.round and (funding.round == 1110 or funding.round == 1120): active = 'Y' if dbutil.get_topic_auto_pubilsh_status( db, 27) == 'Y' else 'P' # cid = dbutil.get_corporate_companies(db, funding.corporateId)[0] cid = funding.companyId if funding.round == 1120: msg = u'%s,%s被%s收购' % \ (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), funding.investorsRaw) else: msg = u'%s,%s完成上市' % \ (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid)) tpm = dbutil.update_topic_message(db, 27, msg, active, 70, funding.id) if tpm: send_msg(tpm, 'topic_message') # for cid in dbutil.get_corporate_companies(db, funding.corporateId): tpc = dbutil.update_topic_company(db, 27, cid, active) # if active == 'Y': if tpc: dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpc, 'topic_company') logger_track.info('27 for %s, add tpm %s' % (funding.id, tpm)) else: logger_track.info('27 not for %s' % funding.id)
def fit_company(self, cid): global producer_track, logger_track company_features = set(dbutil.get_company_feature_tags(self.db, cid)) # logger_track.info(company_features) # logger_track.info(self.features.values()) for k, v in self.features.items(): if not (set(v) & company_features): return False features_used = list(chain(*[tids for tids in self.features.values()])) # for detailid comment = dbutil.get_company_tags_comment(self.db, cid, features_used) if comment: tpmid = dbutil.update_topic_message(self.db, self.tpid, comment.get('message'), self.auto_publish, comment.get('relate_type'), comment.get('relate_id'), comment.get('detail_id'), comment.get('comments')) tpcid = dbutil.update_topic_company(self.db, self.tpid, cid, self.auto_publish) else: tpmid = dbutil.update_topic_message(self.db, self.tpid, u'添加了一家新公司', self.auto_publish, 60, cid) tpcid = dbutil.update_topic_company(self.db, self.tpid, cid, self.auto_publish) logger_track.info( '%s seem to meet requirements, tpm %s, tpc %s, topic %s' % (cid, tpmid, tpcid, self.tpid)) if tpmid: self.send_track_msg(tpmid, 'topic_message') if tpcid: self.send_track_msg(tpcid, 'topic_company') self.send_msg( 'task_company', json.dumps({ 'source': 'track_topic', 'id': cid, 'detail': tpcid, 'from': 'nlp' })) dbutil.update_topic_message_company(self.db, tpmid, tpcid) return True
def fit(self, client): if self.rules: codes = client.search('topic', query=self.rules).get('company', {}).get('data', []) if codes: codes.reverse() for code in codes: flag = True cid = self.db.get('select id from company where code=%s', code).id company_features = set( dbutil.get_company_feature_tags(self.db, cid)) for k, v in self.features.items(): if not (set(v) & company_features): flag = False break if flag: dbutil.update_topic_company(self.db, self.tpid, cid) if self.auto_expand: self.expand()
def __update_company_news(db, mongo, cids, tpid, content=u'发现一家公司', fund_extract=-5, detail_id=None, comments=None): for cid in cids: existed = dbutil.exist_topic_company(db, tpid, cid) tpc = dbutil.update_topic_company(db, tpid, cid, 'P') if tpc and not existed: nid = mongo.article.news.insert({ 'date': datetime.utcnow(), 'createTime': datetime.utcnow(), 'modifyTime': datetime.utcnow(), 'title': dbutil.get_company_name(db, cid), 'contents': [{ 'content': content, 'rank': 1 }], 'type': 61000, 'createUser': 139, 'fund_extract': fund_extract, 'processStatus': 2, 'companyIds': [int(cid)], 'companyCodes': [dbutil.get_company_code(db, cid)], 'topic_id': tpid }) send_msg(tpc, 'topic_company') tpm = dbutil.update_topic_message(db, tpid, dbutil.get_company_name(db, cid), 'P', 10, str(nid), detail_id, comments) dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpm, 'topic_message')
def __topic_26(db, funding): """ 每日投融资速递 """ global logger_track, round_desc active = 'Y' if dbutil.get_topic_auto_pubilsh_status(db, 26) == 'Y' else 'P' # cid = dbutil.get_corporate_companies(db, funding.corporateId)[0] cid = funding.companyId if funding.round in (1105, 1110): msg = u'%s, %s%s上市' % \ (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), round_desc.get(funding.round)) elif funding.round == 1120: msg = u'%s, %s%s' % \ (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), round_desc.get(funding.round)) elif funding.round in (1106, 1130, 1140, 1150, 1160): msg = u'%s, %s完成了%s' % \ (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), round_desc.get(funding.round)) elif funding.round == 0: msg = u'%s, %s完成了新一轮融资' % (dbutil.get_company_brief( db, cid), dbutil.get_company_name(db, cid)) elif funding.round == 1111: msg = u'%s, %s完成了新一轮融资' % (dbutil.get_company_brief( db, cid), dbutil.get_company_name(db, cid)) elif funding.round == 1131: msg = u'%s, %s完成了战略合并' % (dbutil.get_company_brief( db, cid), dbutil.get_company_name(db, cid)) else: msg = u'%s, %s完成了%s融资' % \ (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), round_desc.get(funding.round)) tpm = dbutil.update_topic_message(db, 26, msg, active, 70, funding.id) if tpm: send_msg(tpm, 'topic_message') tpc = dbutil.update_topic_company(db, 26, cid, active) if tpc: dbutil.update_topic_message_company(db, tpm, tpc) # if active == 'Y': send_msg(tpc, 'topic_company') logger_track.info('26 for %s, add tpm %s' % (funding.id, tpm))
def __topic_11(db, mongo, funding): """ 独角兽,大额融资(超过10000w RMB) """ global logger_track if not funding.newsId: return if funding.precise == 'Y': investment = { 3010: 6.5, 3020: 1, 3030: 5, 3040: 7, 3050: 8, 3070: 0.8 }.get(funding['currency'], 0) * (funding.get('investment', 0) or 0) else: investment = funding.get('investment', 0) if investment >= 100000000: active = 'Y' if dbutil.get_topic_auto_pubilsh_status( db, 11) == 'Y' else 'P' title = list(mongo.article.news.find({'_id': ObjectId(funding.newsId) }))[0].get('title', '') tpm = dbutil.update_topic_message(db, 11, title, active, 10, funding.newsId) # if active == 'Y': if tpm: send_msg(tpm, 'topic_message') # for cid in dbutil.get_corporate_companies(db, funding.corporateId): tpc = dbutil.update_topic_company(db, 11, funding.companyId, active) if tpc: dbutil.update_topic_message_company(db, tpm, tpc) # if active == 'Y': send_msg(tpc, 'topic_company') logger_track.info('11 for %s, add tpm %s' % (funding.id, tpm))
def track_topic_30(self, task): """ 首次媒体报道 """ global producer_news_task news = list( self.mongo.article.news.find({'_id': ObjectId(task['news_id'])}))[0] if news.get('date') and news['date'] < ( datetime.now() - timedelta(days=self.news_timeliness)): return # 融资新闻排除 if 578349 in news.get('features', []): return for cid in task.get('companyIds', []): # establish date greater than 5 years if dbutil.get_company_establish_date( self.db, cid) < (datetime.now() - timedelta(days=365 * 5)).date(): continue if len(list(self.mongo.article.news.find({'companyIds': cid}))) == 1: active = 'Y' if dbutil.get_topic_auto_pubilsh_status( self.db, 30) == 'Y' else 'P' # tpm = dbutil.update_topic_message(self.db, 30, u'发现一家新公司', active, 10, task['news_id']) tpm = dbutil.update_topic_message(self.db, 30, news.get('title', ''), active, 10, task['news_id']) tpc = dbutil.update_topic_company(self.db, 30, cid, active) if tpm: dbutil.update_topic_message_company(self.db, tpm, tpc) if active == 'Y': try: producer_news_task.send_messages( "track_message_v2", json.dumps({ 'id': tpm, 'type': 'topic_message', 'action': 'create' })) producer_news_task.send_messages( "track_message_v2", json.dumps({ 'id': tpc, 'type': 'topic_company', 'action': 'create' })) except FailedPayloadsError, fpe: init_kafka() producer_news_task.send_messages( "track_message_v2", json.dumps({ 'id': tpm, 'type': 'topic_message', 'action': 'create' })) producer_news_task.send_messages( "track_message_v2", json.dumps({ 'id': tpc, 'type': 'topic_company', 'action': 'create' }))
def fit_news(self, record): global producer_track, logger_track if record.get('processStatus', 0) != 1: return False news_features = set(record.get('features', set())) company_features = set( chain(*[ dbutil.get_company_feature_tags(self.db, cid) for cid in record.get('companyIds') ])) if not news_features: return False for k, v in self.features.items(): if k >= 11500 and not (set(v) & news_features): return False if k < 11500 and not ((set(v) & company_features) or (set(v) & news_features)): return False # search dimension if self.searches: contents = ' '.join([ item.get('content', '') for item in record.get('contents', []) ]) for term in self.searches: if term not in contents: return False # update topic message tpmsg = dbutil.update_topic_message(self.db, self.tpid, record.get('title', ''), self.auto_publish, 10, record['_id']) # update topic message, topic company, send msg # if tpmsg and self.auto_publish == 'Y': if tpmsg: msg = { 'type': 'topic_message', 'id': tpmsg, 'action': 'create', 'from': 'nlp' } self.send_msg("track_message_v2", json.dumps(msg)) # update topic company and send msg for cid in record.get('companyIds', []): tpc = dbutil.update_topic_company(self.db, self.tpid, cid, self.auto_publish) # if tpc and self.auto_publish == 'Y': if tpc: msg = { 'type': 'topic_company', 'id': tpc, 'action': 'create', 'from': 'nlp' } # relate topic company and message dbutil.update_topic_message_company(self.db, tpmsg, tpc) self.send_msg("track_message_v2", json.dumps(msg)) self.send_msg( 'task_company', json.dumps({ 'source': 'track_topic', 'id': cid, 'detail': tpc })) return True