def __process_artifact_2002_2003(self, aid, new_version, old_versions, yesterday, logger, market): cid = dbutil.get_artifact_company(self.db, aid) aname = dbutil.get_artifact_name(self.db, aid) if self.__import_update(new_version, old_versions): msg = u'%s的一款%s应用%s有重大版本%s更新' % \ (dbutil.get_company_name(self.db, cid), market, self.normalize_artifact_name(aname), new_version) feed_back = dbutil.update_detail_company_message( self.db, cid, msg, 2002, 20, aid, new_version, comments='version: %s, %s' % (new_version, aname)) else: msg = u'%s的一款%s应用%s发布了新版本%s' % \ (dbutil.get_company_name(self.db, cid), market, self.normalize_artifact_name(aname), new_version) feed_back = dbutil.update_detail_company_message( self.db, cid, msg, 2003, 20, aid, new_version, comments='version: %s, %s' % (new_version, aname)) if feed_back: self.send_company_message_msg(feed_back) if logger: logger.info('2002/2003, %s, mysql: %s' % (cid, feed_back))
def clean(model='check', min_count=30): mongo = dbcon.connect_mongo() db = dbcon.connect_torndb() if model == 'check': cids = {} for record in mongo.article.news.find({ 'type': { '$in': [60001, 60002, 60003] }, 'processStatus': 1, 'companyIds': { '$ne': [] } }): for cid in record.get('companyIds', []): cids[cid] = cids.get(cid, 0) + 1 for cid, count in sorted(filter(lambda y: y[1] > min_count, cids.iteritems()), key=lambda x: -x[1]): try: print dbutil.get_company_name(db, cid), cid, count except Exception, e: print cid, e
def __topic_27(db, funding): """ 每日退出事件 """ global logger_track if funding.round and (funding.round == 1110 or funding.round == 1120): active = 'Y' if dbutil.get_topic_auto_pubilsh_status( db, 27) == 'Y' else 'P' # cid = dbutil.get_corporate_companies(db, funding.corporateId)[0] cid = funding.companyId if funding.round == 1120: msg = u'%s,%s被%s收购' % \ (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid), funding.investorsRaw) else: msg = u'%s,%s完成上市' % \ (dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid)) tpm = dbutil.update_topic_message(db, 27, msg, active, 70, funding.id) if tpm: send_msg(tpm, 'topic_message') # for cid in dbutil.get_corporate_companies(db, funding.corporateId): tpc = dbutil.update_topic_company(db, 27, cid, active) # if active == 'Y': if tpc: dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpc, 'topic_company') logger_track.info('27 for %s, add tpm %s' % (funding.id, tpm)) else: logger_track.info('27 not for %s' % funding.id)
def test(self): clf = joblib.load('models/175747.20180311.model') truth_cids = [ t[0] for t in dbutil.get_company_from_tag(self.db, 175747, True) if t[1] == 'Y' ] standard = [list(self.feeder.feed_seged(cid)) for cid in truth_cids] standard = [[self.w2v[w] for w in t if w in self.w2v] for t in standard if t] standard = [np.mean(t, axis=0) for t in standard if t] predict = clf.predict_proba(standard) for i in range(len(truth_cids)): if predict[i][0] > 1 - self.auto_f1_threshold: print dbutil.get_company_name(self.db, truth_cids[i]), predict[i][0]
def fa_relevant_track(): # topic52 global logger_track db = dbcon.connect_torndb() day7 = datetime.now() - timedelta(days=7) for cid in dbutil.get_all_fund_raising(db): logger_track.info('Processing %s' % cid) brief = dbutil.get_company_brief(db, cid) msg = u'%s, %s开启了新一轮融资' % (brief, dbutil.get_company_name(db, cid)) active = 'Y' if (dbutil.get_company_verify(db, cid) == 'Y' and brief) else 'P' tpm = dbutil.update_topic_message_withoutdup( db, 52, msg, active, 80, dbutil.get_company_latest_fa(db, cid, day7), detail_id=cid) tpc = dbutil.update_topic_company(db, 52, cid, active) if tpm: dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpm, 'topic_message') send_msg(tpc, 'topic_company') logger_track.info('Updated %s' % cid) dbutil.update_last_message_time(db, 52)
def feed(self, cid, today=None): global producer_track today = datetime.today() if not today else today timeperiod = (today - timedelta(hours=self.check_period), today) fundings = dbutil.get_company_funding(self.db, cid, timeperiod) if not fundings or len(fundings) < 1: return name = dbutil.get_company_name(self.db, cid) abstract = u'%s获得新一轮融资' % name funding = self.__process_amount(fundings[0]) if funding: abstract = u'%s, 融资额%s' % (abstract, funding) track_msg_id = str( self.mongo.track.track.insert({ 'topic_id': 3, 'company_id': cid, 'abstract': abstract, 'createTime': today })) producer_track.send_messages( "track_message", json.dumps({ 'id': track_msg_id, 'type': 'track', 'topic_id': 3 }))
def classify_android_black(): global logger_yl, producer_tag init_kafka() db = dbcon.connect_torndb() # for cid, score in black_android_all().iteritems(): # for cid, aid, score in recent_android_increase_rapidly_all(): for cid, aid, score, source in dbutil.get_android_explosion(db): if dbutil.get_company_establish_date(db, cid).year < 2006: continue try: # 309126 下载激增 dbutil.update_company_tag(db, cid, 309126, score, "Y") dbutil.mark_android_explosion(db, aid) dbutil.update_company_tag_comment(db, cid, 309126, 30, aid, source) msg = u'%s旗下Android产品近期下载量激增' % dbutil.get_company_name(db, cid) dbutil.update_continuous_company_message(db, cid, msg, 3201, 30, aid, 14, source) producer_msg = {"id": cid} producer_tag.send_messages("keyword_v2", json.dumps(producer_msg)) logger_yl.info( 'Android Explosion Artifact: company %s, artifact %s' % (cid, aid)) except Exception, e: logger_yl.exception( 'Failed Android Explosion Artifact: company %s, artifact %s ' % (cid, aid))
def update_all_afterfunding(self, today=None): today = datetime.today() if not today else today today = today.date() for repeat in xrange(1, 4): for funding in dbutil.get_funding_by_date( self.db, (today - timedelta(days=1 + self.check_period * repeat), today - timedelta(days=self.check_period * repeat))): # cid = funding.companyId copid = funding.corporateId for cid in dbutil.get_corporate_companies(self.db, copid): latest = dbutil.get_corporate_latest_funding( self.db, copid) if latest and latest.id == funding.id: self.mongo.track.track.insert({ 'topic_id': 6, 'company_id': cid, 'abstract': u'%s距离上一次融资已经过去%s个月了' % (dbutil.get_company_name( self.db, cid), repeat * 3), 'createTime': today })
def __topic_28(db, funding): """ 红杉真格经纬IDG 114, 122, 125, 109 """ investors = set(dbutil.get_funding_investor_ids( db, funding.id)) & {114, 122, 125, 109} if investors: active = 'Y' if dbutil.get_topic_auto_pubilsh_status( db, 28) == 'Y' else 'P' # cid = dbutil.get_corporate_companies(db, funding.corporateId)[0] cid = funding.companyId investors = ','.join( [dbutil.get_investor_name(db, iid) for iid in investors]) msg = u'%s投资了%s,%s' % \ (investors, dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid)) tpm = dbutil.update_topic_message(db, 28, msg, active, 70, funding.id) # if active == 'Y': if tpm: send_msg(tpm, 'topic_message') # for cid in dbutil.get_corporate_companies(db, funding.corporateId): tpc = dbutil.update_topic_company(db, 28, cid, active) # if active == 'Y': if tpc: dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpc, 'topic_company') logger_track.info('28 for %s, add tpm %s' % (funding.id, tpm)) else: logger_track.info('28 not for %s' % funding.id)
def track_2004(self, logger=None): """ update every day """ yesterday = datetime.now() - timedelta(days=1) for record in list( self.mongo.market.itunes.find({ 'offline_itunes': 'Y', 'offlineitunesDetectTime': { '$gt': yesterday } })): for aid in dbutil.get_artifacts_from_iOS(self.db, record['trackId']): cid = dbutil.get_artifact_company(self.db, aid) aname = dbutil.get_artifact_name(self.db, aid) msg = u'%s的一款iOS应用%s下架' % \ (dbutil.get_company_name(self.db, cid), self.normalize_artifact_name(aname)) feed_back = dbutil.update_daily_company_message(self.db, cid, msg, 2004, 20, aid, yesterday, comments=aname) if feed_back: self.send_company_message_msg(feed_back) if logger: logger.info('2004, %s, mysql: %s' % (cid, feed_back))
def track_2001(self, logger=None): """ update every day """ yesterday = datetime.now() - timedelta(days=1) # day_gap = (datetime.now() - timedelta(days=self.max_2001_release_gap)).strftime('%Y-%m-%dT%H:%M:%SZ') day_gap = datetime.now() - timedelta(days=self.max_2001_release_gap) for aid, cid, atype, domain in dbutil.get_artifacts_by_date( self.db, yesterday): if logger: logger.info('Processing, aid %s, cid %s, type %s, domain %s' % (aid, cid, atype, domain)) if not self.__valid_artifact_release_date(atype, domain, day_gap): return aname = dbutil.get_artifact_name(self.db, aid) msg = u'%s发现了一款新的%s应用 %s' % \ (dbutil.get_company_name(self.db, cid), dbutil.get_artifact_type(self.db, aid, string=True), self.normalize_artifact_name(aname)) feed_back = dbutil.update_daily_company_message(self.db, cid, msg, 2001, 20, aid, yesterday, comments=aname) if feed_back: self.send_company_message_msg(feed_back) if logger: logger.info('2001, %s, mysql: %s, check date %s' % (cid, feed_back, yesterday))
def __topic_29(db, funding): """ BAT又在这些领域出手了 187, 217, 117 """ investors = set(dbutil.get_funding_investor_ids( db, funding.id)) & {187, 217, 117} if investors: active = 'Y' if dbutil.get_topic_auto_pubilsh_status( db, 29) == 'Y' else 'P' cid = funding.companyId investors = ','.join( [dbutil.get_investor_name(db, iid) for iid in investors]) msg = u'%s投资了%s,%s' % \ (investors, dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid)) tpm = dbutil.update_topic_message(db, 29, msg, active, 70, funding.id) # if active == 'Y': if tpm: send_msg(tpm, 'topic_message') tpc = dbutil.update_topic_company(db, 29, cid, active) if tpc: dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpc, 'topic_company') logger_track.info('29 for %s, add tpm %s' % (funding.id, tpm)) else: logger_track.info('29 not for %s' % funding.id)
def update_3109(self, today=None): global logger_track today = today or datetime.today() one_week_before, three_month_before = today - timedelta(days=8), today - timedelta(days=90) types = ['free', 'charge', 'grossing'] genres = self.__get_genres() for t in types: for g in genres: outstanding_apps_candidates = [item['_id'] for item in list(self.mongo.trend.appstore_rank.aggregate([ {'$match': {'date': {'$gt': one_week_before, '$lte': today}, 'rank': {'$lte': 10}, 'type': t, 'genre': g}}, {'$group': {'_id': '$trackId', 'times': {'$sum': 1}}}, {'$match': {'times': {'$gte': 7}}}]))] def previous_perform_poorly(track_id): top_rank = list(self.mongo.trend.appstore_rank.find({'trackId': track_id, 'type': t, 'genre': g, 'date': {'$gt': three_month_before, '$lte': one_week_before}} ).sort([('rank', 1)]).limit(10)) return top_rank[-1]['rank'] > 30 if top_rank else True outstanding_apps = filter(previous_perform_poorly, outstanding_apps_candidates) for track_id in outstanding_apps: for aid in dbutil.get_artifacts_from_iOS(self.db, track_id): cid = dbutil.get_artifact_company(self.db, aid) corp_round = dbutil.get_company_round(self.db, cid) if corp_round < 1060: msg = u'%s旗下 %s 近期在AppStore%s排名表现突出' % \ (dbutil.get_company_name(self.db, cid), self.__normalize_iOS_name(dbutil.get_artifact_name(self.db, aid)), self.__get_rank_name(g, t)) detail = '%s,%s' % (g, t) dbutil.update_continuous_company_message(self.db, cid, msg, 3109, 30, aid, 7, detail) logger_track.info('3109, %s, %s, %s, %s' % (cid, aid, t, g))
def new_dominator(self, today=None, type='free', genre=None): today = today or datetime.today() one_week_before, three_month_before = today - timedelta(days=8), today - timedelta(days=90) dominate_domain = [item['_id'] for item in list(self.mongo.trend.appstore_rank.aggregate([ {'$match': {'date': {'$gt': one_week_before, '$lte': today}, 'rank': {'$lte': 10}, 'type': type, 'genre': genre}}, {'$group': {'_id': '$trackId', 'times': {'$sum': 1}}}, {'$match': {'times': {'$gte': 7}}}]))] def never_dominate_before(track_id): top_rank = list(self.mongo.trend.appstore_rank.find({'trackId': track_id, 'type': type, 'genre': genre, 'date': {'$gt': three_month_before, '$lte': one_week_before}} ).sort([('rank', 1)]).limit(10)) return top_rank[-1]['rank'] > 30 if top_rank else True new_dominate_domain = filter(never_dominate_before, dominate_domain) new_dominator = set() for track_id in new_dominate_domain: for aid in dbutil.get_artifacts_from_iOS(self.db, track_id): cid = dbutil.get_artifact_company(self.db, aid) corp_round = dbutil.get_company_round(self.db, cid) if corp_round < 1060: app_name = self.db.query('select name from artifact where id = %s' % (aid))[0]['name'] company_name = dbutil.get_company_name(self.db, cid) logger_track.info('\nDate: %s Genre: %s Type: %s\nDominator: %s Company: %s\n\n' % (today, genre, type, app_name, company_name)) new_dominator.add((cid, company_name, app_name)) return new_dominator
def feed_4004(self, cid, today): for job in self.__load_recent_jobs(cid, 1): if job.get('position', '').lower() in self.recruit_management: msg = u'%s正在招聘核心职位%s' % (dbutil.get_company_name( self.db, cid), job.get('position', '')) yield dbutil.update_company_message(self.db, cid, msg, 4004, 40, job['_id'])
def __update_company_news(db, mongo, cids, tpid, content=u'发现一家公司', fund_extract=-5, detail_id=None, comments=None): for cid in cids: existed = dbutil.exist_topic_company(db, tpid, cid) tpc = dbutil.update_topic_company(db, tpid, cid, 'P') if tpc and not existed: nid = mongo.article.news.insert({ 'date': datetime.utcnow(), 'createTime': datetime.utcnow(), 'modifyTime': datetime.utcnow(), 'title': dbutil.get_company_name(db, cid), 'contents': [{ 'content': content, 'rank': 1 }], 'type': 61000, 'createUser': 139, 'fund_extract': fund_extract, 'processStatus': 2, 'companyIds': [int(cid)], 'companyCodes': [dbutil.get_company_code(db, cid)], 'topic_id': tpid }) send_msg(tpc, 'topic_company') tpm = dbutil.update_topic_message(db, tpid, dbutil.get_company_name(db, cid), 'P', 10, str(nid), detail_id, comments) dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpm, 'topic_message')
def clear_items(self): global logger_sourcing file_path = os.path.join( os.path.split(os.path.realpath(__file__))[0], u'dumps/saoanzi.csv') data = [] for anzi in dbutil.get_daily_saoanzi_sources(self.db, self.today): cactive = dbutil.get_company_active(self.db, anzi.companyId) need_verify = self.tcg.need_verify(anzi.companyId) if need_verify or (cactive != 'Y'): self.tcg.generate_tc( json.dumps({ 'id': anzi.companyId, 'source': 'track_saoanzi' })) dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId, 'P') elif not self.__valid_message(anzi): dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId, 'N') else: dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId, 'Y') url = "http://pro.xiniudata.com/validator/#/company/%s/overview" \ % dbutil.get_company_code(self.db, anzi.companyId) # sources = ';'.join([s.name for s in dbutil.get_saoanzi_item_sources(self.db, anzi.id)]) source = anzi.source need_verify = u'需要检查' if (need_verify or (cactive != 'Y')) else u'不需要检查' data.append([ dbutil.get_company_name(self.db, anzi.companyId), url, need_verify, anzi.createTime, source ]) if not data: return # send email data = pandas.DataFrame(data) data.to_csv(file_path, encoding='utf_8_sig') # stat_verify = {title: len(set(detail[0])) for title, detail in data.groupby(3)} stat_verify = '<br/>'.join([ '%s\t%s' % (title, len(set(detail[0]))) for title, detail in data.groupby(2) ]) # stat_source = {title: len(detail) for title, detail in data.groupby(5)} stat_source = '<br/>'.join([ '%s\t%s' % (title, len(detail)) for title, detail in data.groupby(4) ]) stat = u'去重公司数<br/>%s<br/>每个源下的公司数<br/>%s\n' % (stat_verify, stat_source) receivers = ['victor', 'erin', 'weiguangxiao', 'gewei'] receivers = ';'.join(['*****@*****.**' % r for r in receivers]) title = u'扫案子项目列表 %s' % self.current_check_time.strftime('%Y-%m-%d %H') content = u'%s检查,今天共有%s个扫案子条目<br/>%s' % \ (self.current_check_time.strftime('%Y-%m-%d %H:%M'), len(data), stat) send_mail_file(u'烯牛扫案子后台', u'烯牛扫案子后台', "*****@*****.**", receivers, title, content, file_path)
def track_funding_for_investor_message(self, cid, fid, iids, funding_round, abstract): investor_names = ','.join( [dbutil.get_investor_name(self.db, i) for i in iids]) # 7002 for iid in iids: im = dbutil.update_investor_message(self.db, iid, abstract, 7002, 70, fid, active='Y') if im: self.send_investor_message_msg(im) # 7005 and 7006 previous_fundings = [ funding.id for funding in dbutil.get_company_funding(self.db, cid) if funding.id < fid ] previous_iids = chain(*[ dbutil.get_funding_investor_ids(self.db, funding) for funding in previous_fundings ]) if previous_iids: if funding_round == 1110: msg = u'%s完成IPO' % dbutil.get_company_name(self.db, cid) dimension = 7005 elif funding_round == 1120: msg = u'%s被%s收购' % (dbutil.get_company_name( self.db, cid), investor_names) dimension = 7005 else: dimension = 7006 for iid in previous_iids: if dimension == 7006: msg = u'%s已投项目, %s' % (dbutil.get_investor_name( self.db, iid), abstract) im = dbutil.update_investor_message(self.db, iid, msg, dimension, 70, fid) if im: self.send_investor_message_msg(im)
def __compare(c, fo, db, e): old = db.query('select tag.name name from tag, company_tag_rel rel where tagId=tag.id and tag.type=11012 ' 'and companyId=%s and (rel.active is null or rel.active="Y")', c.id) old = ','.join([v.name for v in old]) e.extract(c.id) new = db.query('select tag.name name from tag, company_tag_rel rel where tagId=tag.id and tag.type=11012 ' 'and companyId=%s and (rel.active is null or rel.active="Y")', c.id) new = ','.join([v.name for v in new]) fo.write('%s\t%s\t%s\t%s\t%s\n' % (dbutil.get_company_code(db, c.id), dbutil.get_company_name(db, c.id), dbutil.get_company_brief(db, c.id), old, new))
def feed(self, cid, today=None): today = datetime.today() if not today else today for apk in dbutil.get_artifact_from_cid(self.db, cid, 4050): if apk.domain and self.__feeda(apk.domain, today): self.mongo.track.track.insert({ 'topic': 4, 'companyId': cid, 'abstract': u'%s旗下Android产品有大的版本更新' % dbutil.get_company_name(self.db, cid), 'createTime': today })
def create_indice(self): db = dbcon.connect_torndb() self.logger.info('Start to create indice') self.logger.info(str(self.es.info())) self.logger.info('ES Config %s' % str(tsbconfig.get_es_config())) for cid in dbutil.get_all_company_id_withna(db): try: self.create_index(db, cid) self.logger.info('%s index created, %s' % (cid, dbutil.get_company_name(db, cid))) except Exception, e: self.logger.exception('%s failed # %s' % (cid, e))
def track_updates(db, producer_comps, cid, updates): if dbutil.get_company_active(db, cid) != 'Y': return updates = [update for update in updates if __track_comps_match(db, update)] if len(updates) < 1: return # producer_comps.send_messages("track_message", json.dumps({'id': cid, 'type': 'comps', 'comps': updates})) comments = ','.join([dbutil.get_company_name(db, c) for c in updates]) track_msg = u'%s发现了%s个潜在的竞争对手: %s' % (dbutil.get_company_name( db, cid), len(updates), comments) cmsg_id = dbutil.update_company_message( db, cid, track_msg, 6001, 60, ','.join([str(update) for update in updates]), comments=comments) if cmsg_id: try: producer_comps.send_messages( "track_message_v2", json.dumps({ 'id': cmsg_id, 'type': 'company_message', 'action': 'create' })) except FailedPayloadsError, fpe: url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) producer_comps = SimpleProducer(kafka) producer_comps.send_messages( "track_message_v2", json.dumps({ 'id': cmsg_id, 'type': 'company_message', 'action': 'create' }))
def update_all(self, today=None): global logger_track today = datetime.today() if not today else today for cid, update in summary_android_all(): try: abstract = self.abstract % (dbutil.get_company_name(self.db, cid), self.__judge(update)) self.mongo.track.track.insert({'topic_id': 5, 'company_id': cid, 'abstract': abstract, 'createTime': today}) except Exception, e: logger_track.exception('%s c5.1 exception, %s' % (cid, e))
def feed(self, cid, today=None): global producer_track today = datetime.today() if not today else today df = pd.DataFrame( list( self.mongo.article.news.find({ 'companyId': cid, 'createTime': { '$gt': (today - timedelta(days=30)), '$lte': today } }))) if df.shape[0] == 0: return # news_list = {0: { # 'type': 'text', # 'content': self.abstract % (dbutil.get_company_name(self.db, cid), df.shape[0])}} news_list = list() for index, row in enumerate(df.iterrows()): news_list.append(row[1]._id) # news_list[index+1] = { # 'type': 'news', # 'content': row[1]._id # } track_msg_id = str( self.mongo.track.track.insert({ 'topic_id': 2, 'company_id': cid, 'abstract': self.abstract % (dbutil.get_company_name(self.db, cid), df.shape[0]), 'createTime': today, 'contents': news_list })) producer_track.send_messages( "track_message", json.dumps({ 'id': track_msg_id, 'type': 'track', 'topic_id': 2 })) return True
def dump(colid): db = dbcon.connect_torndb() cids = [ item.companyId for item in db.query( 'select companyId from collection_company_rel where collectionId=%s ' 'and (active is null or active="Y");', colid) ] with codecs.open('files/%s' % colid, 'w', 'utf-8') as fo: for cid in cids: name = dbutil.get_company_name(db, cid) url = 'http://www.xiniudata.com/#/company/%s/overview' % dbutil.get_company_code( db, cid) fo.write('%s, %s\n' % (name, url)) db.close()
def score(): db = dbcon.connect_torndb() with codecs.open('dumps/rank', 'w', 'utf-8') as fo: for tag in [u'大数据', u'小程序', u'短视频', u'民宿', u'足球', u'咖啡']: cids = [] tid = dbutil.get_tag_id(db, tag)[0] complete = db.query( 'select rel.companyId cid from company_tag_rel rel, company_scores s ' 'where (rel.active="Y" or rel.active is null) and rel.companyId=s.companyId ' 'and s.type=37010 and tagId=%s order by score desc limit 100;', tid) cids.extend([c.cid for c in complete]) yellows = db.query( 'select companyId cid, count(*) c from company_tag_rel rel, tag ' 'where tag.id=tagId and tag.type=11100 and (tag.active is null or tag.active="Y") ' 'and (rel.active="Y" or rel.active is null) and companyId in ' '(select distinct companyId from company_tag_rel where tagId=%s ' 'and (active is null or active="Y")) group by companyId order by c desc limit 100;', tid) cids.extend([c.cid for c in yellows]) msgs = db.query( 'select msg.companyId cid, count(*) c from company_message msg, company_tag_rel rel ' 'where msg.active="Y" and msg.companyId=rel.companyId and msg.publishTime>"2018-02-01" ' 'and rel.tagId=%s and (rel.active="Y" or rel.active is null) group by msg.companyId ' 'order by c desc limit 100;', tid) cids.extend([c.cid for c in msgs]) cids = set(cids) for cid in cids: name = dbutil.get_company_name(db, cid) brief = dbutil.get_company_brief(db, cid) url = 'http://www.xiniudata.com/#/company/%s/overview' % dbutil.get_company_code( db, cid) s1 = dbutil.get_company_score(db, cid, 37010) s1 = 1 if s1 >= 0.5 else s1 s2 = (len(dbutil.get_company_tags_yellow(db, cid, False)) + 1 - dbutil.get_company_yellow_time_deduction(db, cid)) / 9 s3 = (log10( len(dbutil.get_company_messages(db, cid, 'Y', '2018-02-01')) + 1)) / 4 s4 = db.get( 'select confidence from company_tag_rel where companyId=%s and tagId=%s;', cid, tid).confidence fo.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (tag, name, brief, url, s1, round(s2, 2), round( s3, 2), round(s4, 2)))
def create_indice(self): global logger_universal_index self.__check() db = dbcon.connect_torndb() self.topic_tags = dbutil.get_topic_corresponding_tags(db) logger_universal_index.info('Start to create indice') logger_universal_index.info(str(self.es.info())) logger_universal_index.info('ES Config %s' % str(tsbconfig.get_es_config())) for cid in dbutil.get_all_company_id(db): try: self.create_single(db, cid) logger_universal_index.info( '%s index created, %s' % (cid, dbutil.get_company_name(db, cid))) except Exception, e: logger_universal_index.exception('%s failed # %s' % (cid, e))
def __process_artifact_2005(self, aid, yesterday, logger): cid = dbutil.get_artifact_company(self.db, aid) aname = dbutil.get_artifact_name(self.db, aid) msg = u'%s的一款%s应用%s超过90天未更新' % \ (dbutil.get_company_name(self.db, cid), dbutil.get_artifact_type(self.db, aid, True), self.normalize_artifact_name(aname)) feed_back = dbutil.update_daily_company_message(self.db, cid, msg, 2005, 20, aid, yesterday, comments=aname) if feed_back: self.send_company_message_msg(feed_back) if logger: logger.info('2005, %s, mysql: %s' % (cid, feed_back))
def update_all_onfunding(self): global producer_track for fa in dbutil.get_all_FA(self.db, self.yesterday): cid = fa.companyId if not cid: continue msg = u'%s, %s开启了新一轮融资' % \ (dbutil.get_company_brief(self.db, cid), dbutil.get_company_name(self.db, cid)) feed_back = dbutil.update_company_message(self.db, cid, msg, 8001, 80, fa.id) if feed_back: self.send_company_message_msg(feed_back) # investor track for iid in dbutil.get_company_investors(self.db, cid): imid = dbutil.update_investor_message(self.db, iid, msg, 8001, 80, fa.id) if imid: dbutil.update_investor_message_detail(self.db, imid, cid)
def update_3108(self, todays, yesterdays, today): global logger_track newout = {} todays = set(item['trackId'] for item in todays) day_thirday = today - timedelta(days=30) for item in filter(lambda x: x['trackId'] not in todays, yesterdays): self.mongo.temp.appstore.insert_one({'type': 3108, 'createTime': today, 'item': item}) for aid in dbutil.get_artifacts_from_iOS(self.db, item['trackId']): newout[aid] = item for aid, item in newout.items(): cid = dbutil.get_artifact_company(self.db, aid) msg = u'%s旗下 %s跌出iOS%s前100名' % \ (dbutil.get_company_name(self.db, cid), self.__normalize_iOS_name(dbutil.get_artifact_name(self.db, aid)), self.__get_rank_name(item['genre'], item['type'])) detail = '%s,%s' % (item['genre'], item['type']) comments = dbutil.get_artifact_name(self.db, aid) yield dbutil.update_daily_company_message(self.db, cid, msg, 3108, 30, aid, day_thirday, detail, comments) logger_track.info('3108, %s, %s' % (cid, aid))