Esempio n. 1
0
    def __init__(self):

        CompanyTracker.__init__(self)
        self.default_last_check = (datetime.now() -
                                   timedelta(days=90)).strftime('%Y-%m-%d')
        self.alias = {
            alias: dbutil.get_investor_name(self.db, iid)
            for iid, alias in dbutil.get_investor_gongshang_with_ids(
                self.db, *dbutil.get_online_investors(self.db))
        }
        self.online_vcs = dbutil.get_investor_gongshang_with_ids(
            self.db, *list(set(dbutil.get_online_investors(self.db))))
Esempio n. 2
0
def update_share_holders():

    db = dbcon.connect_torndb()
    mongo = dbcon.connect_mongo()
    investors = set(dbutil.get_online_investors(db)) & set(
        dbutil.get_famous_investors(db))
    investors = {
        iid: [i[1] for i in dbutil.get_investor_gongshang_with_ids(db, iid)]
        for iid in investors
    }
    with codecs.open('cach/famous.new', 'w', 'utf-8') as fo:
        for iid, imajias in investors.iteritems():
            iname = dbutil.get_investor_name(db, iid)
            for majia in imajias:
                try:
                    gs = mongo.info.gongshang.find_one({'name': majia})
                    if not gs:
                        continue
                    share_holers = gs.get('investors', [])
                    share_holers = [
                        i.get('name') for i in share_holers
                        if i.get('name') not in imajias
                    ]
                    share_holers = [i for i in share_holers if len(i) > 5]
                    if not share_holers:
                        fo.write('%s\t%s\n' % (iname, majia))
                    else:
                        for sh in share_holers:
                            fo.write('%s\t%s\t%s\n' % (iname, majia, sh))
                except Exception, e:
                    print majia, e
Esempio n. 3
0
    def match(self, cid):

        tags = {
            name: weight
            for _, name, weight in dbutil.get_company_tags_idname(
                self.db, cid, tag_out_type=(11000, 11001, 11002, 11100, 11054))
        }
        tags = self.transformer.transform(tags)
        similarities = cosine_similarity(self.fund_profiles, tags)
        similarities = {
            self.fund_mapping.get(index): s
            for index, s in enumerate([s[0] for s in similarities])
        }
        # similarities = filter(lambda item: 1020 <= self.fund_rounds.get(item[0]) < 1040, similarities)
        # similarities = filter(lambda item: self.activeness.get(item[0]) > 3, similarities)
        investor_comps = list(
            chain(*[
                dbutil.get_company_investors(self.db, comp)
                for comp in dbutil.get_company_comps(self.db, cid)
            ]))
        for ic in set(investor_comps):
            similarities[ic] = similarities.get(
                ic, 0) * investor_comps.count(ic) + 1
        similarities = sorted(similarities.items(), key=lambda x: -x[1])
        famous = set(dbutil.get_online_investors(self.db))
        with codecs.open('dumps/fund', 'w', 'utf-8') as fo:
            for iid, weight in similarities:
                fo.write('%s\t%s\t%s\t%s\t%s\t%s\n' %
                         (self.funds.get(iid), weight, iid
                          in famous, self.fund_rounds.get(iid),
                          self.activeness.get(iid), self.locations.get(iid)))
Esempio n. 4
0
def analyze_yuanhe():

    db = dbcon.connect_torndb()
    mongo = dbcon.connect_mongo()
    investors = {
        name: iid
        for iid, name in dbutil.get_investor_alias_with_ids(
            db, *[x[0] for x in dbutil.get_all_investor(db)])
    }
    candidates = {
        name: iid
        for iid, name in dbutil.get_investor_alias_candidates(
            db, *dbutil.get_online_investors(db))
    }
    # majias = [alias.name for alias in db.query('select * from fof_alias;')]
    # majias.extend(dbutil.get_investor_alias(db, 348))
    # majias = set(majias)
    # majias2 = set(copy(majias))
    with codecs.open('files/yuanhe', 'w', 'utf-8') as fo:
        # for majia in majias:
        #     print majia
        #     gongshang = mongo.info.gongshang.find_one({'name': majia})
        #     if not gongshang:
        #         continue
        #     invests = [g.get('name') for g in gongshang.get('invests', [])]
        #     invests = [i for i in invests if u'投资' in i or u'股权' in i or u'创业' in i]
        #     for index, investor in enumerate(invests):
        #         iid = investors.get(investor)
        #         iidc = candidates.get(investor)
        #         if iid == 348 or iidc == 348:
        #             majias2.add(investor)
        #             continue
        # majias = dbutil.get_investor_alias(db, 348)
        have = [line.strip() for line in codecs.open('files/yuanhe.have')]
        # majias2 = [m for m in majias if m not in have]
        # allm = set(majias) | set(have)
        for majia in have:
            print majia
            gongshang = mongo.info.gongshang.find_one({'name': majia})
            if not gongshang:
                print 'no gongshang', majia
                continue
            invests = [
                g.get('name') for g in gongshang.get('invests', [])
                if g.get('name') not in have
            ]
            if not invests:
                fo.write('%s\t\t\n' % majia)
            # invests = [i for i in invests if u'投资' in i or u'股权' in i or u'创业' in i]
            for index, investor in enumerate(invests):
                iid = investors.get(investor)
                iidc = candidates.get(investor)
                if iid:
                    iname = dbutil.get_investor_name(db, iid)
                elif iidc:
                    iname = '%s(待确认)' % dbutil.get_investor_name(db, iidc)
                else:
                    iname = ''
                majia_name = majia if index == 0 else ''
                fo.write('%s\t%s\t%s\n' % (majia_name, investor, iname))
Esempio n. 5
0
def test():

    global logger_track
    mongo = dbcon.connect_mongo()
    db = dbcon.connect_torndb()
    init_kafka()
    yesterday = datetime.today() - timedelta(days=2)
    last_year = (datetime.today() - timedelta(days=365)).strftime('%Y-%m-%d')
    # topic 44, known vcs equals to online vcs
    online_vcs = dbutil.get_investor_gongshang_with_ids(
        db, *list(set(dbutil.get_online_investors(db))))
    online_vcs.extend([(iid, dbutil.get_investor_name(db, iid))
                       for iid in set(dbutil.get_online_investors(db))])

    # task company, verified gongshang change
    for gongshang in db.query(
            'select companyId, relateId, detailId, comments, message from company_message '
            'where trackDimension=5001 and createTime>%s;', yesterday):
        if gongshang.companyId != 250061:
            continue
        else:
            print gongshang
        if gongshang.get('comments') and cmp(gongshang.get('comments'),
                                             last_year) < 1:
            print 'old'
            continue
        diff = re.sub(u'减少了.*', u'', gongshang.get('message', ''))
        print diff
        posting_time = datetime.now().strftime('%Y-%m-%d:%H:%M:%S')
        for vcid, vc in online_vcs:
            if vc in diff:
                print vc, vcid
                # send_customed_msg('task_company', json.dumps({'source': 'gongshang_verified_online',
                #                                               'id': int(gongshang.get('companyId')),
                #                                               'posting_time': posting_time}))
                # logger_track.info('gongshang_verified_online, %s' % gongshang.get('companyId'))
                break
        else:
            if u'投资' in diff or u'基金' in diff:
                print u'投资'
                # send_customed_msg('task_company', json.dumps({'source': 'gongshang_verified_offline',
                #                                               'id': int(gongshang.get('companyId')),
                #                                               'posting_time': posting_time}))
                # logger_track.info('gongshang_verified_offline, %s' % gongshang.get('companyId'))
                break
Esempio n. 6
0
def link_july():

    mongo = dbcon.connect_mongo()
    db = dbcon.connect_torndb()
    shall = {}
    fo = codecs.open('dumps/june.out', 'w', 'utf-8')
    for (iid, name) in dbutil.get_investor_gongshang_with_ids(
            db, *dbutil.get_online_investors(db)):
        shall[name] = ','.join(
            ('investor', name, str(iid), dbutil.get_investor_name(db, iid)))
        igs = mongo.info.gongshang.find_one({'name': name})
        if igs:
            for sh in igs.get('investors', []):
                if sh.get('name'):
                    shall[sh.get('name')] = ','.join(
                        ('sh', name, str(iid),
                         dbutil.get_investor_name(db, iid)))
    print len(shall)
    shall_keys = set(shall.keys())
    for line in codecs.open('files/funded.july', encoding='utf-8'):
        name, establish, founded = line.strip().split('\t')
        amac = mongo.amac.fund.find_one({'fundName': name})
        if founded == u'是':
            fo.write(u'%s\t%s\t%s\t关联到机构\n' %
                     (name, establish, amac.get('regDate')))
            continue
        gs = mongo.info.gongshang.find_one({'name': name})
        if not gs:
            if u'私募' in name:
                fo.write(u'%s\t%s\t%s\t私募\n' %
                         (name, establish, amac.get('regDate')))
                continue
            else:
                l1 = name.strip().replace(u'(', u'(').replace(u')', u')')
                l2 = name.strip().replace(u'(', u'(').replace(u')', u')')
                gs = mongo.info.gongshang.find_one({
                    'name': l1
                }) or mongo.info.gongshang.find_one({'name': l2})
                if not gs:
                    fo.write(u'%s\t%s\t%s\t无工商\n' %
                             (name, establish, amac.get('regDate')))
                    continue
        share_holders = set(
            filter(lambda x: x.strip(),
                   [sh.get('name', '') for sh in gs.get('investors', [])]))
        shared = share_holders & shall_keys
        if shared:
            fo.write(u'%s\t%s\t%s\t潜在新机构\n' %
                     (name, establish, amac.get('regDate')))
        else:
            fo.write(u'%s\t%s\t%s\t无结果\n' %
                     (name, establish, amac.get('regDate')))
Esempio n. 7
0
def check_lp():

    db = dbcon.connect_torndb()
    mongo = dbcon.connect_mongo()

    for (iid, alias) in dbutil.get_investor_alias_with_ids(
            db, *dbutil.get_online_investors(db)):
        try:
            gs = mongo.info.gongshang.find_one({'name': alias})
            lps = [lp.get('name') for lp in gs.get('investors', [])]
            lps = [lp for lp in lps if u'宜信' in lp]
            if len(lps) > 0:
                print dbutil.get_investor_name(db, iid), alias, lps[0]
        except Exception, e:
            pass
Esempio n. 8
0
def dump_share_holders():

    db = dbcon.connect_torndb()
    mongo = dbcon.connect_mongo()
    # investors = set(dbutil.get_online_investors(db)) & set(dbutil.get_famous_investors(db))
    investors = set([
        i.id for i in db.query(
            'select distinct investorId id from famous_investor '
            'where (active is null or active="Y") and createUser in (-1, -2);')
    ])
    investors = set(dbutil.get_online_investors(db)) & investors
    investors = {
        iid: [i[1] for i in dbutil.get_investor_gongshang_with_ids(db, iid)]
        for iid in investors
    }
    with codecs.open('dumps/famous.sh', 'w', 'utf-8') as fo:
        for iid, imajias in investors.iteritems():
            iname = dbutil.get_investor_name(db, iid)
            for majia in imajias:
                try:
                    gs = mongo.info.gongshang.find_one({'name': majia})
                    if not gs:
                        continue
                    share_holers = gs.get('investors', [])
                    share_holers = [
                        i.get('name') for i in share_holers
                        if i.get('name') not in imajias
                    ]
                    share_holers = [i for i in share_holers if len(i) > 5]
                    if not share_holers:
                        fo.write('%s\t%s\n' % (iname, majia))
                    else:
                        for sh in share_holers:
                            fo.write('%s\t%s\t%s\n' % (iname, majia, sh))
                except Exception, e:
                    print majia, e
Esempio n. 9
0
def gongshang_relevant_track():

    global logger_track
    mongo = dbcon.connect_mongo()
    db = dbcon.connect_torndb()
    init_kafka()
    yesterday = datetime.today() - timedelta(days=1)
    last_year = (datetime.today() - timedelta(days=365)).strftime('%Y-%m-%d')
    # topic 44, known vcs equals to online vcs
    online_vcs = dbutil.get_investor_gongshang_with_ids(
        db, *list(set(dbutil.get_online_investors(db))))
    # online_vcs.extend([(iid, dbutil.get_investor_name(db, iid)) for iid in set(dbutil.get_online_investors(db))])
    logger_track.info('Start to track gongshang for 44')
    for gongshang in db.query(
            'select companyId, relateId, detailId, comments, message from company_message '
            'where trackDimension=5001 and createTime>%s;', yesterday):
        # skip new detected old changes
        if gongshang.get('comments') and cmp(gongshang.get('comments'),
                                             last_year) < 1:
            continue
        ginfo = mongo.info.gongshang.find_one({
            '_id':
            ObjectId(gongshang.relateId)
        }).get('changeInfo', [])
        change = filter(lambda x: x.get('id', -1) == int(gongshang.detailId),
                        ginfo)[0]
        for vcid, vc in online_vcs:
            if vc in change.get('contentAfter', '') and vc not in change.get(
                    'contentBefore', ''):
                msg = u'%s近期新增股东%s(%s),推测其完成了新一轮融资' % \
                      (dbutil.get_company_name(db, gongshang.companyId), dbutil.get_investor_name(db, vcid), vc)
                __update_company_news(db,
                                      mongo, [gongshang.companyId],
                                      44,
                                      msg,
                                      None,
                                      comments=change.get('changeTime'))

    # task company, verified gongshang change
    for gongshang in db.query(
            'select companyId, relateId, detailId, comments, message from company_message '
            'where trackDimension=5001 and createTime>%s;', yesterday):
        if gongshang.get('comments') and cmp(gongshang.get('comments'),
                                             last_year) < 1:
            continue
        diff = re.sub(u'减少了.*', u'', gongshang.get('message', ''))
        posting_time = datetime.now().strftime('%Y-%m-%d:%H:%M:%S')
        for vcid, vc in online_vcs:
            if vc in diff:
                send_customed_msg(
                    'task_company',
                    json.dumps({
                        'source': 'gongshang_verified_online',
                        'id': int(gongshang.get('companyId')),
                        'posting_time': posting_time,
                        'detail': diff
                    }))
                logger_track.info('gongshang_verified_online, %s' %
                                  gongshang.get('companyId'))
                break
        else:
            if u'投资' in diff or u'基金' in diff:
                send_customed_msg(
                    'task_company',
                    json.dumps({
                        'source': 'gongshang_verified_offline',
                        'id': int(gongshang.get('companyId')),
                        'posting_time': posting_time,
                        'detail': diff
                    }))
                logger_track.info('gongshang_verified_offline, %s' %
                                  gongshang.get('companyId'))