def analyze_yuanhe(): db = dbcon.connect_torndb() mongo = dbcon.connect_mongo() investors = { name: iid for iid, name in dbutil.get_investor_alias_with_ids( db, *[x[0] for x in dbutil.get_all_investor(db)]) } candidates = { name: iid for iid, name in dbutil.get_investor_alias_candidates( db, *dbutil.get_online_investors(db)) } # majias = [alias.name for alias in db.query('select * from fof_alias;')] # majias.extend(dbutil.get_investor_alias(db, 348)) # majias = set(majias) # majias2 = set(copy(majias)) with codecs.open('files/yuanhe', 'w', 'utf-8') as fo: # for majia in majias: # print majia # gongshang = mongo.info.gongshang.find_one({'name': majia}) # if not gongshang: # continue # invests = [g.get('name') for g in gongshang.get('invests', [])] # invests = [i for i in invests if u'投资' in i or u'股权' in i or u'创业' in i] # for index, investor in enumerate(invests): # iid = investors.get(investor) # iidc = candidates.get(investor) # if iid == 348 or iidc == 348: # majias2.add(investor) # continue # majias = dbutil.get_investor_alias(db, 348) have = [line.strip() for line in codecs.open('files/yuanhe.have')] # majias2 = [m for m in majias if m not in have] # allm = set(majias) | set(have) for majia in have: print majia gongshang = mongo.info.gongshang.find_one({'name': majia}) if not gongshang: print 'no gongshang', majia continue invests = [ g.get('name') for g in gongshang.get('invests', []) if g.get('name') not in have ] if not invests: fo.write('%s\t\t\n' % majia) # invests = [i for i in invests if u'投资' in i or u'股权' in i or u'创业' in i] for index, investor in enumerate(invests): iid = investors.get(investor) iidc = candidates.get(investor) if iid: iname = dbutil.get_investor_name(db, iid) elif iidc: iname = '%s(待确认)' % dbutil.get_investor_name(db, iidc) else: iname = '' majia_name = majia if index == 0 else '' fo.write('%s\t%s\t%s\n' % (majia_name, investor, iname))
def link_july(): mongo = dbcon.connect_mongo() db = dbcon.connect_torndb() shall = {} fo = codecs.open('dumps/june.out', 'w', 'utf-8') for (iid, name) in dbutil.get_investor_gongshang_with_ids( db, *dbutil.get_online_investors(db)): shall[name] = ','.join( ('investor', name, str(iid), dbutil.get_investor_name(db, iid))) igs = mongo.info.gongshang.find_one({'name': name}) if igs: for sh in igs.get('investors', []): if sh.get('name'): shall[sh.get('name')] = ','.join( ('sh', name, str(iid), dbutil.get_investor_name(db, iid))) print len(shall) shall_keys = set(shall.keys()) for line in codecs.open('files/funded.july', encoding='utf-8'): name, establish, founded = line.strip().split('\t') amac = mongo.amac.fund.find_one({'fundName': name}) if founded == u'是': fo.write(u'%s\t%s\t%s\t关联到机构\n' % (name, establish, amac.get('regDate'))) continue gs = mongo.info.gongshang.find_one({'name': name}) if not gs: if u'私募' in name: fo.write(u'%s\t%s\t%s\t私募\n' % (name, establish, amac.get('regDate'))) continue else: l1 = name.strip().replace(u'(', u'(').replace(u')', u')') l2 = name.strip().replace(u'(', u'(').replace(u')', u')') gs = mongo.info.gongshang.find_one({ 'name': l1 }) or mongo.info.gongshang.find_one({'name': l2}) if not gs: fo.write(u'%s\t%s\t%s\t无工商\n' % (name, establish, amac.get('regDate'))) continue share_holders = set( filter(lambda x: x.strip(), [sh.get('name', '') for sh in gs.get('investors', [])])) shared = share_holders & shall_keys if shared: fo.write(u'%s\t%s\t%s\t潜在新机构\n' % (name, establish, amac.get('regDate'))) else: fo.write(u'%s\t%s\t%s\t无结果\n' % (name, establish, amac.get('regDate')))
def check_lp(): db = dbcon.connect_torndb() mongo = dbcon.connect_mongo() for (iid, alias) in dbutil.get_investor_alias_with_ids( db, *dbutil.get_online_investors(db)): try: gs = mongo.info.gongshang.find_one({'name': alias}) lps = [lp.get('name') for lp in gs.get('investors', [])] lps = [lp for lp in lps if u'宜信' in lp] if len(lps) > 0: print dbutil.get_investor_name(db, iid), alias, lps[0] except Exception, e: pass
def __topic_29(db, funding): """ BAT又在这些领域出手了 187, 217, 117 """ investors = set(dbutil.get_funding_investor_ids( db, funding.id)) & {187, 217, 117} if investors: active = 'Y' if dbutil.get_topic_auto_pubilsh_status( db, 29) == 'Y' else 'P' cid = funding.companyId investors = ','.join( [dbutil.get_investor_name(db, iid) for iid in investors]) msg = u'%s投资了%s,%s' % \ (investors, dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid)) tpm = dbutil.update_topic_message(db, 29, msg, active, 70, funding.id) # if active == 'Y': if tpm: send_msg(tpm, 'topic_message') tpc = dbutil.update_topic_company(db, 29, cid, active) if tpc: dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpc, 'topic_company') logger_track.info('29 for %s, add tpm %s' % (funding.id, tpm)) else: logger_track.info('29 not for %s' % funding.id)
def __topic_28(db, funding): """ 红杉真格经纬IDG 114, 122, 125, 109 """ investors = set(dbutil.get_funding_investor_ids( db, funding.id)) & {114, 122, 125, 109} if investors: active = 'Y' if dbutil.get_topic_auto_pubilsh_status( db, 28) == 'Y' else 'P' # cid = dbutil.get_corporate_companies(db, funding.corporateId)[0] cid = funding.companyId investors = ','.join( [dbutil.get_investor_name(db, iid) for iid in investors]) msg = u'%s投资了%s,%s' % \ (investors, dbutil.get_company_brief(db, cid), dbutil.get_company_name(db, cid)) tpm = dbutil.update_topic_message(db, 28, msg, active, 70, funding.id) # if active == 'Y': if tpm: send_msg(tpm, 'topic_message') # for cid in dbutil.get_corporate_companies(db, funding.corporateId): tpc = dbutil.update_topic_company(db, 28, cid, active) # if active == 'Y': if tpc: dbutil.update_topic_message_company(db, tpm, tpc) send_msg(tpc, 'topic_company') logger_track.info('28 for %s, add tpm %s' % (funding.id, tpm)) else: logger_track.info('28 not for %s' % funding.id)
def update_share_holders(): db = dbcon.connect_torndb() mongo = dbcon.connect_mongo() investors = set(dbutil.get_online_investors(db)) & set( dbutil.get_famous_investors(db)) investors = { iid: [i[1] for i in dbutil.get_investor_gongshang_with_ids(db, iid)] for iid in investors } with codecs.open('cach/famous.new', 'w', 'utf-8') as fo: for iid, imajias in investors.iteritems(): iname = dbutil.get_investor_name(db, iid) for majia in imajias: try: gs = mongo.info.gongshang.find_one({'name': majia}) if not gs: continue share_holers = gs.get('investors', []) share_holers = [ i.get('name') for i in share_holers if i.get('name') not in imajias ] share_holers = [i for i in share_holers if len(i) > 5] if not share_holers: fo.write('%s\t%s\n' % (iname, majia)) else: for sh in share_holers: fo.write('%s\t%s\t%s\n' % (iname, majia, sh)) except Exception, e: print majia, e
def __source_gongshang(db, mongo, yesterday): global loggers known_vcs = dbutil.get_investor_alias_with_ids( db, *dbutil.get_famous_investors(db)) for tpc in db.query( 'select * from topic_company where topicId=44 and (active is null or active="Y") ' 'and publishTime>%s', yesterday): gongshang = db.get( 'select relateId, detailId from company_message where trackDimension=5001 and companyId=%s ' 'order by publishTime desc limit 1', tpc.companyId) try: changes = {} ginfo = mongo.info.gongshang.find_one({ '_id': ObjectId(gongshang.relateId) }).get('changeInfo', []) change = filter( lambda x: x.get('id', -1) == int(gongshang.detailId), ginfo)[0] for iid, vc in known_vcs: if vc in change.get('contentAfter', '') and vc not in change.get( 'contentBefore', ''): changes.setdefault(tpc.companyId, []).append(iid) for cid, iids in changes.items(): inames = ','.join( [dbutil.get_investor_name(db, iid) for iid in set(iids)]) dbutil.update_extract_source_company(db, 67003, None, cid, gongshang.relateId, True, inames) except Exception, e: loggers.exception('Failed gongshang, %s, %s' % (tpc.companyId, e))
def create_index(self, iid): global logger_searchi alias = set() iname = dbutil.get_investor_name(self.db, iid) code = dbutil.get_investor_info(self.db, iid).code if len(iname) < 1: logger_searchi.exception('%s investor has no name' % iid) return alias.add(iname.strip()) other_names = [ item for item in dbutil.get_investor_alias(self.db, iid) if item.strip() ] alias.update(set(other_names)) alias = [name.decode('utf-8').strip() for name in alias] alias.extend( [''.join(lazy_pinyin(name, errors='ignore')) for name in alias]) for alia in copy(alias): alias.append( alia.replace(u'投资', u'').replace(u'基金', u'').replace( u'创投', u'').replace(u'资本', u'')) alias.extend([name.lower() for name in alias]) alias = list(set(alias)) tags_scores = { k: round(v, 2) for k, v in sorted(json.loads( dbutil.get_investor_tags(self.db, iid, 0)).items(), key=lambda x: -x[1])[:30] } tags = [tag.lower() for tag, _ in tags_scores.items()] active = dbutil.get_investor_info(self.db, iid).active item = { 'id': 'i%s' % iid, '_code': code, '_name': iname.lower(), 'completionName': alias, '_prompt': 'investor', 'ranking_score': round( log( dbutil.get_investor_info( self.db, iid).get('fundingCntFrom2017') or 1, 2), 2), 'online': True if dbutil.get_investor_info(self.db, iid).online == 'Y' else False, 'active': 'Y' if (active is None or active == 'Y') else active, 'features': tags, 'feature_scores': json.dumps(tags_scores) } self.__create_index(item)
def get_fundings(): db = dbcon.connect_torndb() mongo = dbcon.connect_mongo() rn = lambda x: db.get( 'select * from dictionary where typeValue=1 and value=%s', x).name with codecs.open('dumps/style', 'w', 'utf-8') as fo: for iid in itertools.chain(*era.values()): iname = dbutil.get_investor_name(db, iid) fo.write('\n\n%s\n' % iname) cinfo = get_investor_portfolio_companies(db, mongo, iid) finfo = get_investor_portfolio_statistic(db, iid) # 1 行业 fo.write(u'1. 行业\n') for tag, weight in cinfo.get('tags'): fo.write('\t%s\t%s\n' % (tag, weight)) # 3 阶段 fo.write(u'3. 阶段\n') for r, weight in sorted(finfo.get('round').items(), key=lambda x: x[0]): fo.write('\t%s\t%s\n' % (rn(r), weight)) # 4 连续投资 fo.write(u'4. 连续投资一家公司的公司数量\t%s\n' % finfo.get('dups')) # 5 fo.write(u'5. 出手次数\t%s\n' % finfo.get('total')) # 6 再融资 fo.write(u'6. 机构首次投资某公司后,该公司后续融资次数\n') for t, count in sorted(finfo.get('posts').items(), key=lambda x: x[0]): fo.write(u'\t再融%s次公司数量\t%s\n' % (t, count)) # 9 news count fo.write(u'9. 2017年以来相关新闻数量\t%s\n' % cinfo.get('news')) # 11 location fo.write(u'11. 投资企业地域分布比例\n') for l, weight in sorted(cinfo.get('location').items(), key=lambda x: -x[1]): fo.write('\t%s\t%s\n' % (l, weight)) # 12 alone fo.write(u'12. 单独投资率\t%s\n' % finfo.get('alone')) with codecs.open('dumps/style.irr', 'w', 'utf-8') as fo: for iid in itertools.chain(*era.values()): iname = dbutil.get_investor_name(db, iid) for funding in get_investor_portfolio_return(db, iid): fo.write('%s\t%s' % (iname, funding))
def track_funding_for_investor_message(self, cid, fid, iids, funding_round, abstract): investor_names = ','.join( [dbutil.get_investor_name(self.db, i) for i in iids]) # 7002 for iid in iids: im = dbutil.update_investor_message(self.db, iid, abstract, 7002, 70, fid, active='Y') if im: self.send_investor_message_msg(im) # 7005 and 7006 previous_fundings = [ funding.id for funding in dbutil.get_company_funding(self.db, cid) if funding.id < fid ] previous_iids = chain(*[ dbutil.get_funding_investor_ids(self.db, funding) for funding in previous_fundings ]) if previous_iids: if funding_round == 1110: msg = u'%s完成IPO' % dbutil.get_company_name(self.db, cid) dimension = 7005 elif funding_round == 1120: msg = u'%s被%s收购' % (dbutil.get_company_name( self.db, cid), investor_names) dimension = 7005 else: dimension = 7006 for iid in previous_iids: if dimension == 7006: msg = u'%s已投项目, %s' % (dbutil.get_investor_name( self.db, iid), abstract) im = dbutil.update_investor_message(self.db, iid, msg, dimension, 70, fid) if im: self.send_investor_message_msg(im)
def __init__(self): CompanyTracker.__init__(self) self.default_last_check = (datetime.now() - timedelta(days=90)).strftime('%Y-%m-%d') self.alias = { alias: dbutil.get_investor_name(self.db, iid) for iid, alias in dbutil.get_investor_gongshang_with_ids( self.db, *dbutil.get_online_investors(self.db)) } self.online_vcs = dbutil.get_investor_gongshang_with_ids( self.db, *list(set(dbutil.get_online_investors(self.db))))
def __merge_investor(self, iid, gs_name, session): try: i_name = dbutil.get_investor_name(self.db, iid) session.run( 'MERGE (gsc:GongShangCompany {name: {gsc_name}}) ' 'MERGE (investor: Investor {name: {i_name}}) ' 'MERGE (investor) -[:distribute]-> (gsc) ' 'RETURN investor', { 'gsc_name': gs_name, 'i_name': i_name }) except Exception, e: print iid, gs_name
def test(): global logger_track mongo = dbcon.connect_mongo() db = dbcon.connect_torndb() init_kafka() yesterday = datetime.today() - timedelta(days=2) last_year = (datetime.today() - timedelta(days=365)).strftime('%Y-%m-%d') # topic 44, known vcs equals to online vcs online_vcs = dbutil.get_investor_gongshang_with_ids( db, *list(set(dbutil.get_online_investors(db)))) online_vcs.extend([(iid, dbutil.get_investor_name(db, iid)) for iid in set(dbutil.get_online_investors(db))]) # task company, verified gongshang change for gongshang in db.query( 'select companyId, relateId, detailId, comments, message from company_message ' 'where trackDimension=5001 and createTime>%s;', yesterday): if gongshang.companyId != 250061: continue else: print gongshang if gongshang.get('comments') and cmp(gongshang.get('comments'), last_year) < 1: print 'old' continue diff = re.sub(u'减少了.*', u'', gongshang.get('message', '')) print diff posting_time = datetime.now().strftime('%Y-%m-%d:%H:%M:%S') for vcid, vc in online_vcs: if vc in diff: print vc, vcid # send_customed_msg('task_company', json.dumps({'source': 'gongshang_verified_online', # 'id': int(gongshang.get('companyId')), # 'posting_time': posting_time})) # logger_track.info('gongshang_verified_online, %s' % gongshang.get('companyId')) break else: if u'投资' in diff or u'基金' in diff: print u'投资' # send_customed_msg('task_company', json.dumps({'source': 'gongshang_verified_offline', # 'id': int(gongshang.get('companyId')), # 'posting_time': posting_time})) # logger_track.info('gongshang_verified_offline, %s' % gongshang.get('companyId')) break
def dump_share_holders(): db = dbcon.connect_torndb() mongo = dbcon.connect_mongo() # investors = set(dbutil.get_online_investors(db)) & set(dbutil.get_famous_investors(db)) investors = set([ i.id for i in db.query( 'select distinct investorId id from famous_investor ' 'where (active is null or active="Y") and createUser in (-1, -2);') ]) investors = set(dbutil.get_online_investors(db)) & investors investors = { iid: [i[1] for i in dbutil.get_investor_gongshang_with_ids(db, iid)] for iid in investors } with codecs.open('dumps/famous.sh', 'w', 'utf-8') as fo: for iid, imajias in investors.iteritems(): iname = dbutil.get_investor_name(db, iid) for majia in imajias: try: gs = mongo.info.gongshang.find_one({'name': majia}) if not gs: continue share_holers = gs.get('investors', []) share_holers = [ i.get('name') for i in share_holers if i.get('name') not in imajias ] share_holers = [i for i in share_holers if len(i) > 5] if not share_holers: fo.write('%s\t%s\n' % (iname, majia)) else: for sh in share_holers: fo.write('%s\t%s\t%s\n' % (iname, majia, sh)) except Exception, e: print majia, e
def gongshang_relevant_track(): global logger_track mongo = dbcon.connect_mongo() db = dbcon.connect_torndb() init_kafka() yesterday = datetime.today() - timedelta(days=1) last_year = (datetime.today() - timedelta(days=365)).strftime('%Y-%m-%d') # topic 44, known vcs equals to online vcs online_vcs = dbutil.get_investor_gongshang_with_ids( db, *list(set(dbutil.get_online_investors(db)))) # online_vcs.extend([(iid, dbutil.get_investor_name(db, iid)) for iid in set(dbutil.get_online_investors(db))]) logger_track.info('Start to track gongshang for 44') for gongshang in db.query( 'select companyId, relateId, detailId, comments, message from company_message ' 'where trackDimension=5001 and createTime>%s;', yesterday): # skip new detected old changes if gongshang.get('comments') and cmp(gongshang.get('comments'), last_year) < 1: continue ginfo = mongo.info.gongshang.find_one({ '_id': ObjectId(gongshang.relateId) }).get('changeInfo', []) change = filter(lambda x: x.get('id', -1) == int(gongshang.detailId), ginfo)[0] for vcid, vc in online_vcs: if vc in change.get('contentAfter', '') and vc not in change.get( 'contentBefore', ''): msg = u'%s近期新增股东%s(%s),推测其完成了新一轮融资' % \ (dbutil.get_company_name(db, gongshang.companyId), dbutil.get_investor_name(db, vcid), vc) __update_company_news(db, mongo, [gongshang.companyId], 44, msg, None, comments=change.get('changeTime')) # task company, verified gongshang change for gongshang in db.query( 'select companyId, relateId, detailId, comments, message from company_message ' 'where trackDimension=5001 and createTime>%s;', yesterday): if gongshang.get('comments') and cmp(gongshang.get('comments'), last_year) < 1: continue diff = re.sub(u'减少了.*', u'', gongshang.get('message', '')) posting_time = datetime.now().strftime('%Y-%m-%d:%H:%M:%S') for vcid, vc in online_vcs: if vc in diff: send_customed_msg( 'task_company', json.dumps({ 'source': 'gongshang_verified_online', 'id': int(gongshang.get('companyId')), 'posting_time': posting_time, 'detail': diff })) logger_track.info('gongshang_verified_online, %s' % gongshang.get('companyId')) break else: if u'投资' in diff or u'基金' in diff: send_customed_msg( 'task_company', json.dumps({ 'source': 'gongshang_verified_offline', 'id': int(gongshang.get('companyId')), 'posting_time': posting_time, 'detail': diff })) logger_track.info('gongshang_verified_offline, %s' % gongshang.get('companyId'))
def create_single(self, db, funding): global logger_universale_index # funding that is not active if not dbutil.get_funding_index_type(db, funding.id): return event = {'fid': funding.id} event['investorId'] = dbutil.get_funding_investor_ids(db, funding.id) event['investor'] = [ dbutil.get_investor_name(db, iid) for iid in event.get('investorId', []) ] # previous investors if funding.fundingDate: previous_fundings = [ f.id for f in dbutil.get_company_funding(db, funding.companyId) if f.fundingDate and f.fundingDate < funding.fundingDate ] previous_iids = set( chain(*[ dbutil.get_funding_investor_ids(db, fid) for fid in previous_fundings ])) event['previous_investor'] = [ dbutil.get_investor_name(db, iid) for iid in previous_iids if iid ] event['location'] = dbutil.get_company_location(db, funding.companyId)[0] sectors = dbutil.get_company_sector_tag(db, funding.companyId) event['sector'] = sectors[0] if len(sectors) > 0 else 0 tags_info = dbutil.get_company_tags_idname(db, funding.companyId, tag_out_type=(11000, 11001, 11002)) if tags_info: for tid, tname, weight in tags_info: event.setdefault('tags', []).append(tname.lower()) event['round'] = funding.round event['sort_round'] = dbutil.get_round_sort(db, funding.round) if funding.investment: precise = {'Y': 1, 'N': 5}.get(funding.precise, 1) investment = funding.investment * precise * dbutil.get_currency_rate( db, funding.currency) / 10000 event['last_funding_amount'] = investment else: event['last_funding_amount'] = None event['last_funding_date'] = funding.fundingDate event[ 'funding_year'] = funding.fundingDate.year if funding.fundingDate else None event['publish_date'] = funding.publishDate event['source'] = funding.source if funding.source else 0 event['sort_sector'] = dbutil.get_tag_novelty( db, sectors[0]) if len(sectors) > 0 else None event['sort_location'] = dbutil.get_company_location( db, funding.companyId, True)[1] self.es.index(index="xiniudata2", doc_type='event', id=funding.id, body=event)