def __init__(self): CompanyTracker.__init__(self) self.default_last_check = (datetime.now() - timedelta(days=90)).strftime('%Y-%m-%d') self.alias = { alias: dbutil.get_investor_name(self.db, iid) for iid, alias in dbutil.get_investor_gongshang_with_ids( self.db, *dbutil.get_online_investors(self.db)) } self.online_vcs = dbutil.get_investor_gongshang_with_ids( self.db, *list(set(dbutil.get_online_investors(self.db))))
def update_share_holders(): db = dbcon.connect_torndb() mongo = dbcon.connect_mongo() investors = set(dbutil.get_online_investors(db)) & set( dbutil.get_famous_investors(db)) investors = { iid: [i[1] for i in dbutil.get_investor_gongshang_with_ids(db, iid)] for iid in investors } with codecs.open('cach/famous.new', 'w', 'utf-8') as fo: for iid, imajias in investors.iteritems(): iname = dbutil.get_investor_name(db, iid) for majia in imajias: try: gs = mongo.info.gongshang.find_one({'name': majia}) if not gs: continue share_holers = gs.get('investors', []) share_holers = [ i.get('name') for i in share_holers if i.get('name') not in imajias ] share_holers = [i for i in share_holers if len(i) > 5] if not share_holers: fo.write('%s\t%s\n' % (iname, majia)) else: for sh in share_holers: fo.write('%s\t%s\t%s\n' % (iname, majia, sh)) except Exception, e: print majia, e
def match(self, cid): tags = { name: weight for _, name, weight in dbutil.get_company_tags_idname( self.db, cid, tag_out_type=(11000, 11001, 11002, 11100, 11054)) } tags = self.transformer.transform(tags) similarities = cosine_similarity(self.fund_profiles, tags) similarities = { self.fund_mapping.get(index): s for index, s in enumerate([s[0] for s in similarities]) } # similarities = filter(lambda item: 1020 <= self.fund_rounds.get(item[0]) < 1040, similarities) # similarities = filter(lambda item: self.activeness.get(item[0]) > 3, similarities) investor_comps = list( chain(*[ dbutil.get_company_investors(self.db, comp) for comp in dbutil.get_company_comps(self.db, cid) ])) for ic in set(investor_comps): similarities[ic] = similarities.get( ic, 0) * investor_comps.count(ic) + 1 similarities = sorted(similarities.items(), key=lambda x: -x[1]) famous = set(dbutil.get_online_investors(self.db)) with codecs.open('dumps/fund', 'w', 'utf-8') as fo: for iid, weight in similarities: fo.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (self.funds.get(iid), weight, iid in famous, self.fund_rounds.get(iid), self.activeness.get(iid), self.locations.get(iid)))
def analyze_yuanhe(): db = dbcon.connect_torndb() mongo = dbcon.connect_mongo() investors = { name: iid for iid, name in dbutil.get_investor_alias_with_ids( db, *[x[0] for x in dbutil.get_all_investor(db)]) } candidates = { name: iid for iid, name in dbutil.get_investor_alias_candidates( db, *dbutil.get_online_investors(db)) } # majias = [alias.name for alias in db.query('select * from fof_alias;')] # majias.extend(dbutil.get_investor_alias(db, 348)) # majias = set(majias) # majias2 = set(copy(majias)) with codecs.open('files/yuanhe', 'w', 'utf-8') as fo: # for majia in majias: # print majia # gongshang = mongo.info.gongshang.find_one({'name': majia}) # if not gongshang: # continue # invests = [g.get('name') for g in gongshang.get('invests', [])] # invests = [i for i in invests if u'投资' in i or u'股权' in i or u'创业' in i] # for index, investor in enumerate(invests): # iid = investors.get(investor) # iidc = candidates.get(investor) # if iid == 348 or iidc == 348: # majias2.add(investor) # continue # majias = dbutil.get_investor_alias(db, 348) have = [line.strip() for line in codecs.open('files/yuanhe.have')] # majias2 = [m for m in majias if m not in have] # allm = set(majias) | set(have) for majia in have: print majia gongshang = mongo.info.gongshang.find_one({'name': majia}) if not gongshang: print 'no gongshang', majia continue invests = [ g.get('name') for g in gongshang.get('invests', []) if g.get('name') not in have ] if not invests: fo.write('%s\t\t\n' % majia) # invests = [i for i in invests if u'投资' in i or u'股权' in i or u'创业' in i] for index, investor in enumerate(invests): iid = investors.get(investor) iidc = candidates.get(investor) if iid: iname = dbutil.get_investor_name(db, iid) elif iidc: iname = '%s(待确认)' % dbutil.get_investor_name(db, iidc) else: iname = '' majia_name = majia if index == 0 else '' fo.write('%s\t%s\t%s\n' % (majia_name, investor, iname))
def test(): global logger_track mongo = dbcon.connect_mongo() db = dbcon.connect_torndb() init_kafka() yesterday = datetime.today() - timedelta(days=2) last_year = (datetime.today() - timedelta(days=365)).strftime('%Y-%m-%d') # topic 44, known vcs equals to online vcs online_vcs = dbutil.get_investor_gongshang_with_ids( db, *list(set(dbutil.get_online_investors(db)))) online_vcs.extend([(iid, dbutil.get_investor_name(db, iid)) for iid in set(dbutil.get_online_investors(db))]) # task company, verified gongshang change for gongshang in db.query( 'select companyId, relateId, detailId, comments, message from company_message ' 'where trackDimension=5001 and createTime>%s;', yesterday): if gongshang.companyId != 250061: continue else: print gongshang if gongshang.get('comments') and cmp(gongshang.get('comments'), last_year) < 1: print 'old' continue diff = re.sub(u'减少了.*', u'', gongshang.get('message', '')) print diff posting_time = datetime.now().strftime('%Y-%m-%d:%H:%M:%S') for vcid, vc in online_vcs: if vc in diff: print vc, vcid # send_customed_msg('task_company', json.dumps({'source': 'gongshang_verified_online', # 'id': int(gongshang.get('companyId')), # 'posting_time': posting_time})) # logger_track.info('gongshang_verified_online, %s' % gongshang.get('companyId')) break else: if u'投资' in diff or u'基金' in diff: print u'投资' # send_customed_msg('task_company', json.dumps({'source': 'gongshang_verified_offline', # 'id': int(gongshang.get('companyId')), # 'posting_time': posting_time})) # logger_track.info('gongshang_verified_offline, %s' % gongshang.get('companyId')) break
def link_july(): mongo = dbcon.connect_mongo() db = dbcon.connect_torndb() shall = {} fo = codecs.open('dumps/june.out', 'w', 'utf-8') for (iid, name) in dbutil.get_investor_gongshang_with_ids( db, *dbutil.get_online_investors(db)): shall[name] = ','.join( ('investor', name, str(iid), dbutil.get_investor_name(db, iid))) igs = mongo.info.gongshang.find_one({'name': name}) if igs: for sh in igs.get('investors', []): if sh.get('name'): shall[sh.get('name')] = ','.join( ('sh', name, str(iid), dbutil.get_investor_name(db, iid))) print len(shall) shall_keys = set(shall.keys()) for line in codecs.open('files/funded.july', encoding='utf-8'): name, establish, founded = line.strip().split('\t') amac = mongo.amac.fund.find_one({'fundName': name}) if founded == u'是': fo.write(u'%s\t%s\t%s\t关联到机构\n' % (name, establish, amac.get('regDate'))) continue gs = mongo.info.gongshang.find_one({'name': name}) if not gs: if u'私募' in name: fo.write(u'%s\t%s\t%s\t私募\n' % (name, establish, amac.get('regDate'))) continue else: l1 = name.strip().replace(u'(', u'(').replace(u')', u')') l2 = name.strip().replace(u'(', u'(').replace(u')', u')') gs = mongo.info.gongshang.find_one({ 'name': l1 }) or mongo.info.gongshang.find_one({'name': l2}) if not gs: fo.write(u'%s\t%s\t%s\t无工商\n' % (name, establish, amac.get('regDate'))) continue share_holders = set( filter(lambda x: x.strip(), [sh.get('name', '') for sh in gs.get('investors', [])])) shared = share_holders & shall_keys if shared: fo.write(u'%s\t%s\t%s\t潜在新机构\n' % (name, establish, amac.get('regDate'))) else: fo.write(u'%s\t%s\t%s\t无结果\n' % (name, establish, amac.get('regDate')))
def check_lp(): db = dbcon.connect_torndb() mongo = dbcon.connect_mongo() for (iid, alias) in dbutil.get_investor_alias_with_ids( db, *dbutil.get_online_investors(db)): try: gs = mongo.info.gongshang.find_one({'name': alias}) lps = [lp.get('name') for lp in gs.get('investors', [])] lps = [lp for lp in lps if u'宜信' in lp] if len(lps) > 0: print dbutil.get_investor_name(db, iid), alias, lps[0] except Exception, e: pass
def dump_share_holders(): db = dbcon.connect_torndb() mongo = dbcon.connect_mongo() # investors = set(dbutil.get_online_investors(db)) & set(dbutil.get_famous_investors(db)) investors = set([ i.id for i in db.query( 'select distinct investorId id from famous_investor ' 'where (active is null or active="Y") and createUser in (-1, -2);') ]) investors = set(dbutil.get_online_investors(db)) & investors investors = { iid: [i[1] for i in dbutil.get_investor_gongshang_with_ids(db, iid)] for iid in investors } with codecs.open('dumps/famous.sh', 'w', 'utf-8') as fo: for iid, imajias in investors.iteritems(): iname = dbutil.get_investor_name(db, iid) for majia in imajias: try: gs = mongo.info.gongshang.find_one({'name': majia}) if not gs: continue share_holers = gs.get('investors', []) share_holers = [ i.get('name') for i in share_holers if i.get('name') not in imajias ] share_holers = [i for i in share_holers if len(i) > 5] if not share_holers: fo.write('%s\t%s\n' % (iname, majia)) else: for sh in share_holers: fo.write('%s\t%s\t%s\n' % (iname, majia, sh)) except Exception, e: print majia, e
def gongshang_relevant_track(): global logger_track mongo = dbcon.connect_mongo() db = dbcon.connect_torndb() init_kafka() yesterday = datetime.today() - timedelta(days=1) last_year = (datetime.today() - timedelta(days=365)).strftime('%Y-%m-%d') # topic 44, known vcs equals to online vcs online_vcs = dbutil.get_investor_gongshang_with_ids( db, *list(set(dbutil.get_online_investors(db)))) # online_vcs.extend([(iid, dbutil.get_investor_name(db, iid)) for iid in set(dbutil.get_online_investors(db))]) logger_track.info('Start to track gongshang for 44') for gongshang in db.query( 'select companyId, relateId, detailId, comments, message from company_message ' 'where trackDimension=5001 and createTime>%s;', yesterday): # skip new detected old changes if gongshang.get('comments') and cmp(gongshang.get('comments'), last_year) < 1: continue ginfo = mongo.info.gongshang.find_one({ '_id': ObjectId(gongshang.relateId) }).get('changeInfo', []) change = filter(lambda x: x.get('id', -1) == int(gongshang.detailId), ginfo)[0] for vcid, vc in online_vcs: if vc in change.get('contentAfter', '') and vc not in change.get( 'contentBefore', ''): msg = u'%s近期新增股东%s(%s),推测其完成了新一轮融资' % \ (dbutil.get_company_name(db, gongshang.companyId), dbutil.get_investor_name(db, vcid), vc) __update_company_news(db, mongo, [gongshang.companyId], 44, msg, None, comments=change.get('changeTime')) # task company, verified gongshang change for gongshang in db.query( 'select companyId, relateId, detailId, comments, message from company_message ' 'where trackDimension=5001 and createTime>%s;', yesterday): if gongshang.get('comments') and cmp(gongshang.get('comments'), last_year) < 1: continue diff = re.sub(u'减少了.*', u'', gongshang.get('message', '')) posting_time = datetime.now().strftime('%Y-%m-%d:%H:%M:%S') for vcid, vc in online_vcs: if vc in diff: send_customed_msg( 'task_company', json.dumps({ 'source': 'gongshang_verified_online', 'id': int(gongshang.get('companyId')), 'posting_time': posting_time, 'detail': diff })) logger_track.info('gongshang_verified_online, %s' % gongshang.get('companyId')) break else: if u'投资' in diff or u'基金' in diff: send_customed_msg( 'task_company', json.dumps({ 'source': 'gongshang_verified_offline', 'id': int(gongshang.get('companyId')), 'posting_time': posting_time, 'detail': diff })) logger_track.info('gongshang_verified_offline, %s' % gongshang.get('companyId'))