コード例 #1
0
ファイル: comps.py プロジェクト: yujiye/Codes
def comps():

    global logger_comps
    query = json.loads(request.data).get('payload')
    logger_comps.info('Comps Query, %s' % query)
    cid, tag, start, size = query.get('company'), query.get(
        'tag', 0), query.get('start', 0), query.get('size', 5)
    if tag == 0:
        comps_candidates = dbutil.get_company_comps(g.db, cid)
        logger_comps.info(comps_candidates)
        results = {
            'company': {
                'count':
                len(comps_candidates),
                'data':
                map(lambda x: {'id': dbutil.get_company_code(g.db, x)},
                    comps_candidates)[start:start + size],
                'tags':
                dbutil.prompt_tag_filter(g.db, comps_candidates)
            }
        }
    else:
        tag = dbutil.get_tag_id(g.db, tag)[0]
        comps_candidates = dbutil.get_filtered_company_comps(g.db, cid, tag)
        results = {
            'company': {
                'count':
                len(comps_candidates),
                'data':
                map(lambda x: {'id': dbutil.get_company_code(g.db, x)},
                    comps_candidates)[start:start + size]
            }
        }
    return make_response(jsonify(results))
コード例 #2
0
ファイル: tmp.py プロジェクト: yujiye/Codes
def check_apprank():

    mongo = dbcon.connect_mongo()
    db = dbcon.connect_torndb()
    today = datetime.today()
    todays = list(
        mongo.trend.appstore_rank.find({
            'date': {
                '$gt': (today - timedelta(days=1)),
                '$lte': today
            },
            'rank': {
                '$lte': 500
            }
        }))
    yesterdays = list(
        mongo.trend.appstore_rank.find({
            'date': {
                '$gt': (today - timedelta(days=2)),
                '$lt': (today - timedelta(days=1))
            },
            'rank': {
                '$lte': 500
            }
        }))
    newin = {}
    first = set()
    yesterdays = set(item['trackId'] for item in yesterdays)
    day_thirday = today - timedelta(days=30)
    for item in filter(lambda x: x['trackId'] not in yesterdays, todays):
        mongo.temp.appstore.insert_one({
            'type': 3017,
            'createTime': today,
            'item': item
        })
        for aid in dbutil.get_artifacts_from_iOS(db, item['trackId']):
            newin[aid] = item
            previous = mongo.trend.appstore_rank.find({
                'trackId':
                item['trackId'],
                'genre':
                item['genre'],
                'type':
                item['type'],
                'rank': {
                    '$lt': 500
                }
            }).count()
            if previous == 1:
                cid = dbutil.get_artifact_company(db, aid)
                print aid, dbutil.get_company_code(db, cid)
                first.add(cid)
    print len(newin), len(first)
    print '\n'.join([
        'http://pro.xiniudata.com/validator/#/company/%s/overview' %
        dbutil.get_company_code(db, cid) for cid in first
    ])
コード例 #3
0
def create_incremental(index=None):

    global logger_universal_index
    if not index:
        client = UniversalIndexCreator()
    elif int(index) == 1:
        host, port = tsbconfig.get_es_config_1()
        client = UniversalIndexCreator(
            Elasticsearch([{
                'host': host,
                'port': port
            }]))
    elif int(index) == 2:
        host, port = tsbconfig.get_es_config_2()
        client = UniversalIndexCreator(
            Elasticsearch([{
                'host': host,
                'port': port
            }]))
    else:
        client = UniversalIndexCreator()
    db = dbcon.connect_torndb()
    consumer_search = init_kafka()
    while True:
        logger_universal_index.info('Incremental create search1 index starts')
        try:
            for message in consumer_search:
                try:
                    logger_universal_index.info(
                        "%s:%d:%d: key=%s value=%s" %
                        (message.topic, message.partition, message.offset,
                         message.key, message.value))
                    action = json.loads(message.value).get('action', 'create')
                    cid = json.loads(message.value).get('id') or json.loads(
                        message.value).get('_id')
                    if action == 'create':
                        client.create_single(db, cid)
                        logger_universal_index.info(
                            'incremental %s index created' % cid)
                    elif action == 'delete':
                        if json.loads(message.value).get('aliasId', False):
                            client.create_single(db, cid)
                            logger_universal_index.info(
                                'incremental %s alias deleted' % cid)
                        elif json.loads(message.value).get(
                                'artifactId', False):
                            client.create_single(db, cid)
                            logger_universal_index.info(
                                'incremental %s artifact deleted' % cid)
                        else:
                            client.delete_index(
                                'universal', dbutil.get_company_code(db, cid))
                            logger_universal_index.info(
                                'incremental %s index deleted' % cid)
                    consumer_search.commit()
                except Exception, e:
                    logger_universal_index.exception(
                        'Incr exception# %s \n # %s' % (message, e))
        except Exception, e:
            logger_universal_index.exception('Incr outside exception # %s' % e)
コード例 #4
0
    def clear_items(self):

        global logger_sourcing
        file_path = os.path.join(
            os.path.split(os.path.realpath(__file__))[0], u'dumps/saoanzi.csv')
        data = []
        for anzi in dbutil.get_daily_saoanzi_sources(self.db, self.today):
            cactive = dbutil.get_company_active(self.db, anzi.companyId)
            need_verify = self.tcg.need_verify(anzi.companyId)
            if need_verify or (cactive != 'Y'):
                self.tcg.generate_tc(
                    json.dumps({
                        'id': anzi.companyId,
                        'source': 'track_saoanzi'
                    }))
                dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId,
                                                  'P')
            elif not self.__valid_message(anzi):
                dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId,
                                                  'N')
            else:
                dbutil.update_saoanzi_item_status(self.db, anzi.saoanziItemId,
                                                  'Y')
            url = "http://pro.xiniudata.com/validator/#/company/%s/overview" \
                  % dbutil.get_company_code(self.db, anzi.companyId)
            # sources = ';'.join([s.name for s in dbutil.get_saoanzi_item_sources(self.db, anzi.id)])
            source = anzi.source
            need_verify = u'需要检查' if (need_verify or
                                      (cactive != 'Y')) else u'不需要检查'
            data.append([
                dbutil.get_company_name(self.db, anzi.companyId), url,
                need_verify, anzi.createTime, source
            ])
        if not data:
            return
        # send email
        data = pandas.DataFrame(data)
        data.to_csv(file_path, encoding='utf_8_sig')
        # stat_verify = {title: len(set(detail[0])) for title, detail in data.groupby(3)}
        stat_verify = '<br/>'.join([
            '%s\t%s' % (title, len(set(detail[0])))
            for title, detail in data.groupby(2)
        ])
        # stat_source = {title: len(detail) for title, detail in data.groupby(5)}
        stat_source = '<br/>'.join([
            '%s\t%s' % (title, len(detail))
            for title, detail in data.groupby(4)
        ])
        stat = u'去重公司数<br/>%s<br/>每个源下的公司数<br/>%s\n' % (stat_verify,
                                                        stat_source)
        receivers = ['victor', 'erin', 'weiguangxiao', 'gewei']
        receivers = ';'.join(['*****@*****.**' % r for r in receivers])
        title = u'扫案子项目列表 %s' % self.current_check_time.strftime('%Y-%m-%d %H')
        content = u'%s检查,今天共有%s个扫案子条目<br/>%s' % \
                  (self.current_check_time.strftime('%Y-%m-%d %H:%M'), len(data), stat)
        send_mail_file(u'烯牛扫案子后台', u'烯牛扫案子后台', "*****@*****.**",
                       receivers, title, content, file_path)
コード例 #5
0
ファイル: nice_download.py プロジェクト: yujiye/Codes
def test():
    test_do = Download_Optimization()
    test_output = "test_nice_download_output"
    with open(test_output, 'w') as f:
        # 40 游戏
        for cid, score in test_do.get_nice_download_cids(tids=[40]):
            f.write(dbutil.get_company_code(test_do.db, cid))
            f.write(' ')
            f.write(str(round(score, 4)))
            f.write('\n')
コード例 #6
0
ファイル: nice_download_u.py プロジェクト: yujiye/Codes
def test():

    test_do = Download_Optimization()
    with open('tmp/test_nice_download_output', 'w') as f:
        # 40 游戏
        for cid, score in test_do.get_nice_download_cids(
                sectors=[22, 40, 107]):
            f.write(dbutil.get_company_code(test_do.db, cid))
            f.write(' ')
            f.write(str(round(score, 4)))
            f.write('\n')
コード例 #7
0
def __compare(c, fo, db, e):

    old = db.query('select tag.name name from tag, company_tag_rel rel where tagId=tag.id and tag.type=11012 '
                   'and companyId=%s and (rel.active is null or rel.active="Y")', c.id)
    old = ','.join([v.name for v in old])
    e.extract(c.id)
    new = db.query('select tag.name name from tag, company_tag_rel rel where tagId=tag.id and tag.type=11012 '
                   'and companyId=%s and (rel.active is null or rel.active="Y")', c.id)
    new = ','.join([v.name for v in new])
    fo.write('%s\t%s\t%s\t%s\t%s\n' % (dbutil.get_company_code(db, c.id), dbutil.get_company_name(db, c.id),
                                       dbutil.get_company_brief(db, c.id), old, new))
コード例 #8
0
ファイル: evaluate.py プロジェクト: yujiye/Codes
def dump():

    global mapping
    mongo = dbcon.connect_mongo()
    db = dbcon.connect_torndb()
    ke = KeywordExtractor()
    raw = mongo.raw.qmp.find(
        {
            "url": "http://vip.api.qimingpian.com/d/c3",
            "processed": True
        }, {
            'postdata': 1,
            'data.basic': 1
        })
    results = {}
    fo = codecs.open('dumps/20180726', 'w', 'utf-8')
    for qmp in raw:
        basic = qmp.get('data', {}).get('basic')
        tags = []
        tags.append(basic.get('hangye1', ''))
        tags.append(basic.get('hangye2', ''))
        tags.extend(basic.get('tags_match', '').split('|'))
        tags = [tag for tag in tags if tag.strip()]
        sc = db.get(
            'select companyId from source_company where source=13121 and sourceId=%s;',
            qmp['postdata']['id'])
        tag_qmp = set(tags) & set(mapping.keys())
        if not tag_qmp:
            continue
        if not (sc and sc.companyId):
            continue
        orignal = copy(tag_qmp)
        tag_qmp = [mapping.get(tag) for tag in tag_qmp]
        tag_xiniu = [
            dbutil.get_tag_name(db, tid)
            for tid in ke.extract_vip(sc.companyId).keys()
        ]
        url = 'http://www.xiniudata.com/company/%s/overview' % dbutil.get_company_code(
            db, sc.companyId)
        desc = db.get('select brief from company where id=%s;',
                      sc.companyId).brief
        desc = desc.replace('\n', '') if desc else ''
        if set(tag_qmp) & set(tag_xiniu):
            # results[1] = results.get(1, 0) + 1
            fo.write('%s\t%s\t1\t%s\t%s\n' %
                     (','.join(orignal), ','.join(tag_xiniu), url, desc))
        else:
            fo.write('%s\t%s\t0\t%s\t%s\n' %
                     (','.join(orignal), ','.join(tag_xiniu), url, desc))
            # results[0] = results.get(0, 0) + 1
    for k, v in results.items():
        print k, v
コード例 #9
0
def dump(colid):

    db = dbcon.connect_torndb()
    cids = [
        item.companyId for item in db.query(
            'select companyId from collection_company_rel where collectionId=%s '
            'and (active is null or active="Y");', colid)
    ]
    with codecs.open('files/%s' % colid, 'w', 'utf-8') as fo:
        for cid in cids:
            name = dbutil.get_company_name(db, cid)
            url = 'http://www.xiniudata.com/#/company/%s/overview' % dbutil.get_company_code(
                db, cid)
            fo.write('%s, %s\n' % (name, url))
    db.close()
コード例 #10
0
def __update_company_news(db,
                          mongo,
                          cids,
                          tpid,
                          content=u'发现一家公司',
                          fund_extract=-5,
                          detail_id=None,
                          comments=None):

    for cid in cids:
        existed = dbutil.exist_topic_company(db, tpid, cid)
        tpc = dbutil.update_topic_company(db, tpid, cid, 'P')
        if tpc and not existed:
            nid = mongo.article.news.insert({
                'date':
                datetime.utcnow(),
                'createTime':
                datetime.utcnow(),
                'modifyTime':
                datetime.utcnow(),
                'title':
                dbutil.get_company_name(db, cid),
                'contents': [{
                    'content': content,
                    'rank': 1
                }],
                'type':
                61000,
                'createUser':
                139,
                'fund_extract':
                fund_extract,
                'processStatus':
                2,
                'companyIds': [int(cid)],
                'companyCodes': [dbutil.get_company_code(db, cid)],
                'topic_id':
                tpid
            })
            send_msg(tpc, 'topic_company')
            tpm = dbutil.update_topic_message(db, tpid,
                                              dbutil.get_company_name(db, cid),
                                              'P', 10, str(nid), detail_id,
                                              comments)
            dbutil.update_topic_message_company(db, tpm, tpc)
            send_msg(tpm, 'topic_message')
コード例 #11
0
ファイル: rank.py プロジェクト: yujiye/Codes
def score():

    db = dbcon.connect_torndb()
    with codecs.open('dumps/rank', 'w', 'utf-8') as fo:
        for tag in [u'大数据', u'小程序', u'短视频', u'民宿', u'足球', u'咖啡']:
            cids = []
            tid = dbutil.get_tag_id(db, tag)[0]
            complete = db.query(
                'select rel.companyId cid from company_tag_rel rel, company_scores s '
                'where (rel.active="Y" or rel.active is null) and rel.companyId=s.companyId '
                'and s.type=37010 and tagId=%s order by score desc limit 100;',
                tid)
            cids.extend([c.cid for c in complete])
            yellows = db.query(
                'select companyId cid, count(*) c from company_tag_rel rel, tag '
                'where tag.id=tagId and tag.type=11100 and (tag.active is null or tag.active="Y") '
                'and (rel.active="Y" or rel.active is null) and companyId in '
                '(select distinct companyId from company_tag_rel where tagId=%s '
                'and (active is null or active="Y")) group by companyId order by c desc limit 100;',
                tid)
            cids.extend([c.cid for c in yellows])
            msgs = db.query(
                'select msg.companyId cid, count(*) c from company_message msg, company_tag_rel rel '
                'where msg.active="Y" and msg.companyId=rel.companyId and msg.publishTime>"2018-02-01" '
                'and rel.tagId=%s and (rel.active="Y" or rel.active is null) group by msg.companyId '
                'order by c desc limit 100;', tid)
            cids.extend([c.cid for c in msgs])
            cids = set(cids)
            for cid in cids:
                name = dbutil.get_company_name(db, cid)
                brief = dbutil.get_company_brief(db, cid)
                url = 'http://www.xiniudata.com/#/company/%s/overview' % dbutil.get_company_code(
                    db, cid)
                s1 = dbutil.get_company_score(db, cid, 37010)
                s1 = 1 if s1 >= 0.5 else s1
                s2 = (len(dbutil.get_company_tags_yellow(db, cid, False)) + 1 -
                      dbutil.get_company_yellow_time_deduction(db, cid)) / 9
                s3 = (log10(
                    len(dbutil.get_company_messages(db, cid, 'Y',
                                                    '2018-02-01')) + 1)) / 4
                s4 = db.get(
                    'select confidence from company_tag_rel where companyId=%s and tagId=%s;',
                    cid, tid).confidence
                fo.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' %
                         (tag, name, brief, url, s1, round(s2, 2), round(
                             s3, 2), round(s4, 2)))
コード例 #12
0
def create_incremental(index):

    global logger_index, consumer_search, producer_search

    if int(index) == 1:
        host, port = tsbconfig.get_es_config_1()
    elif int(index) == 2:
        host, port = tsbconfig.get_es_config_2()
        # client = IndexCreator(Elasticsearch([{'host': host, 'port': port}]))
    else:
        host, port = tsbconfig.get_es_config()
        # client = IndexCreator(Elasticsearch([{'host': host, 'port': port}]))
        logger_index.error('Not legal elasticsearch config %s, using default' %
                           index)
    client = IndexCreator(Elasticsearch([{'host': host, 'port': port}]))
    i_client = InteriorIndexCreator(
        Elasticsearch([{
            'host': host,
            'port': port
        }]))

    db = dbcon.connect_torndb()
    init_kafka(index)

    while True:
        logger_index.info('Incremental create search%s index starts' % index)
        try:
            for message in consumer_search:
                try:
                    logger_index.info(
                        "%s:%d:%d: key=%s value=%s" %
                        (message.topic, message.partition, message.offset,
                         message.key, message.value))
                    action = json.loads(message.value).get('action', 'create')
                    # sb create a new tag
                    if action == 'keyword':
                        client.create_indice_completion_keywords(db,
                                                                 update=True)
                        # consumer_search.commit()
                        logger_index.info('Update keyword')
                        continue
                    cid = json.loads(message.value).get('id') or json.loads(
                        message.value).get('_id')
                    if action == 'create':
                        client.create_single(db, cid)
                        i_client.create_index(db, cid)
                        logger_index.info('incremental %s index created' % cid)
                    elif action == 'delete':
                        if json.loads(message.value).get('aliasId', False):
                            client.create_single(db, cid)
                            i_client.create_index(db, cid)
                            logger_index.info('incremental %s alias deleted' %
                                              cid)
                        elif json.loads(message.value).get(
                                'artifactId', False):
                            client.create_single(db, cid)
                            i_client.create_index(db, cid)
                            logger_index.info(
                                'incremental %s artifact deleted' % cid)
                        else:
                            client.delete_index(
                                'company', dbutil.get_company_code(db, cid))
                            client.delete_index('completion', cid)
                            i_client.create_index(db, cid)
                            logger_index.info('incremental %s index deleted' %
                                              cid)
                    consumer_search.commit()
                except Exception, e:
                    logger_index.exception('Incr exception# %s \n # %s' %
                                           (message, e))
        except Exception, e:
            logger_index.exception('Incr outside exception # %s' % e)
コード例 #13
0
    def create_single(self, db, cid):
        """
        create a single index for a particular company,
        completion id consists of its type and original id, including
            cxxxx, fxxx, axxxx, pxxxx, nxxxx, standing for company, full, artifact, product, nick
            kxxxx, keyword
        """

        # check whether to index this cid
        if not dbutil.get_company_index_type(db, cid):
            self.logger.info('should not index %s' % cid)
            return

        company = {}
        alias = set()
        company_score = dbutil.get_company_score(db, cid, 37020)
        company['ranking_score'] = company_score

        name = dbutil.get_company_name(db, cid).lower().replace(' ', '')
        code = dbutil.get_company_code(db, cid)
        company['cid'] = code
        completion = {
            'id': cid,
            '_name': name,
            '_code': code,
            '_prompt': 'name',
        }

        # First, Names
        # short name
        alias.add(name.lower())
        alias.add(''.join(lazy_pinyin(name.lower())))
        # full name
        full = dbutil.get_company_corporate_name(db, cid, False)
        if full and full.strip():
            alias.add(full.lower())
            # TODO temp solution
            alias.add(full.lower().replace(u'北京',
                                           '').replace(u'上海',
                                                       '').replace(u'深圳', ''))
        # artifact name
        aresults = dbutil.get_artifact_idname_from_cid(db, cid, True)
        if aresults:
            alias.update([
                self.valid_name(aname) for _, aname in aresults
                if self.valid_name(aname)
            ])
        # alias
        aliass = dbutil.get_alias_idname(db, cid)
        if aliass and len(aliass) < 20:
            alias.update([
                self.valid_name(aname) for _, aname in aliass
                if self.valid_name(aname)
            ])
        # corporate name
        corporate = dbutil.get_company_corporate_name(db, cid)
        if corporate and corporate.strip():
            alias.add(corporate.lower())
        # corporate full name
        corporate_full = dbutil.get_company_corporate_name(db, cid, False)
        if corporate_full and corporate_full.strip():
            alias.add(corporate_full.lower())
        # corporate alias
        corporate_alias = dbutil.get_corporate_alias(db, cid)
        if corporate_alias and len(corporate_alias) < 20:
            alias.update([
                self.valid_name(aname) for aname in corporate_alias
                if self.valid_name(aname)
            ])
        # check if there is a relevant digital coin
        dt = dbutil.get_company_digital_coin_info(db, cid)
        if dt:
            alias.add(dt.symbol.lower())
            # short name
            if dt.name:
                alias.add(dt.name.lower().replace(' ', ''))
            # english name
            if dt.enname:
                alias.add(dt.enname.lower())

        # create indice names
        completion['completionName'] = list(alias)
        company['name'] = name.lower()
        company['alias'] = self.analyze_names(alias)

        # Second, team identify, investor identify
        team = self.identifier.identify(cid)
        if team and len(team) > 0:
            company['team'] = team
        if dbutil.exist_company_tag(db, cid, 309129):
            company['investor'] = 44010

        # Third, keywords
        # regular tag
        tags_info = dbutil.get_company_tags_idname(db,
                                                   cid,
                                                   tag_out_type=(11000, 11001,
                                                                 11002))
        if tags_info:
            for tid, tname, weight in tags_info:
                company.setdefault('tags', []).append(tname.lower())
        # yellows, --> forget y take this out
        yellows = dbutil.get_company_tags_yellow(db, cid)
        if yellows:
            company['yellows'] = [yellow.lower() for yellow in yellows]

        # Forth, description
        desc = dbutil.get_company_solid_description(db, cid)
        if desc and desc.strip():
            desc = filter(lambda x: (x not in self.stopwords) and len(x) > 1,
                          list(self.seg.cut4search(desc)))
            company['description'] = (' '.join(desc)).lower()

        # Fifth, round and investors and members
        company['round'] = dbutil.get_company_round(db, cid)
        company['investors'] = dbutil.get_company_investor_names(db, cid)
        company['members'] = [
            name for _, name in dbutil.get_member_idname(db, cid)
        ]

        # Sixth, location
        lid, lname = dbutil.get_company_location(db, cid)
        company['location'] = lid

        # Seventh, establish date, create date, count of company message
        establish_date = dbutil.get_company_establish_date(db, cid)
        try:
            company['established'] = int(establish_date.strftime('%Y%m'))
        except Exception, e:
            pass
コード例 #14
0
ファイル: trend.py プロジェクト: yujiye/Codes
    def memorize(self, tid, today=None):

        global logger_tt
        if not today:
            today = datetime.today()
        yesterday = today - timedelta(days=1)
        today_int = int(today.strftime('%Y%m%d'))
        tag = dbutil.get_tag_info(self.db, tid, 'name')

        logger_tt.info('Start to process %s' % tid)
        # relevant companies
        cids = dbutil.get_company_from_tags(self.db, [tid])
        codes = [dbutil.get_company_code(self.db, cid) for cid in cids]
        visits = self.mongo.log.user_log.find({
            'time': {
                '$gt': today - timedelta(hours=32),
                '$lte': today - timedelta(hours=8)
            },
            'requestURL': "/xiniudata-api/api2/service/company/basic",
            'jsonRequest.payload.code': {
                '$in': codes
            }
        })
        # visits = list(visits)
        # visits = [visit['jsonRequest']['payload']['code'] in codes for visit in visits]
        self.mongo.keywords.trend_statistc.update(
            {
                'tag': tid,
                'date': datetime.fromordinal(today.date().toordinal()),
                'subtype': 'company_visit'
            }, {'$set': {
                'type': 'company',
                'weight': len(list(visits))
            }}, True)
        subscriptions = dbutil.get_company_subscription_details(
            self.db, yesterday.strftime('%Y-%m-%d'),
            today.strftime('%Y-%m-%d'), *cids)
        self.mongo.keywords.trend_statistc.update(
            {
                'tag': tid,
                'date': datetime.fromordinal(today.date().toordinal()),
                'subtype': 'company_subscribe'
            }, {'$set': {
                'type': 'company',
                'weight': len(subscriptions)
            }}, True)
        # logger_tt.info('Company done')

        # relevant news
        news = self.search_client.search('general',
                                         input=tag,
                                         filters={
                                             'date': today_int
                                         },
                                         size=500).get('news', {})
        news = list(news.get('data', []))
        self.mongo.keywords.trend_statistc.update(
            {
                'tag': tid,
                'date': datetime.fromordinal(today.date().toordinal()),
                'subtype': 'news_relevant'
            }, {'$set': {
                'type': 'news',
                'weight': len(news)
            }}, True)
        # logger_tt.info('News searched')
        news_read = self.mongo.log.user_log.find({
            'time': {
                '$gt': today - timedelta(hours=32),
                '$lte': today - timedelta(hours=8)
            },
            'requestURL': self.news_read_url,
            'jsonRequest.payload.newsId': {
                '$in': news
            }
        })
        self.mongo.keywords.trend_statistc.update(
            {
                'tag': tid,
                'date': datetime.fromordinal(today.date().toordinal()),
                'subtype': 'news_read'
            }, {'$set': {
                'type': 'news',
                'weight': len(list(news_read))
            }}, True)
        # logger_tt.info('News done')

        # search
        search = self.mongo.log.search.find({
            'time': {
                '$gt': today - timedelta(hours=32),
                '$lte': today - timedelta(hours=8)
            },
            'query.input': tag,
            'userId': {
                '$ne': None
            }
        })
        self.mongo.keywords.trend_statistc.update(
            {
                'tag': tid,
                'date': datetime.fromordinal(today.date().toordinal()),
                'subtype': 'search_precise'
            }, {'$set': {
                'type': 'search',
                'weight': len(list(search))
            }}, True)
コード例 #15
0
ファイル: nice_download.py プロジェクト: yujiye/Codes
def test_get_code():
    test_do = Download_Optimization()
    test_output = "test_do"
    with open(test_output, 'w') as f:
        f.write(dbutil.get_company_code(test_do.db, 261))
コード例 #16
0
    def create_single(self, db, cid):

        global logger_universal_index
        # check whether to index this cid
        if not dbutil.get_company_index_type(db, cid):
            logger_universal_index.info('should not index %s' % cid)
            return

        company = {}
        alias, artifacts = set(), set()
        company['ranking_score'] = dbutil.get_company_score(db, cid, 37020)

        name = dbutil.get_company_name(db, cid).lower().replace(' ', '')
        code = dbutil.get_company_code(db, cid)
        company['id'] = code

        # short name
        alias.add(name.lower())
        alias.add(''.join(lazy_pinyin(name.lower())))
        # full name
        full = dbutil.get_company_corporate_name(db, cid, False)
        if full and full.strip():
            alias.add(full.lower())
            alias.add(full.lower().replace(u'北京',
                                           '').replace(u'上海', '').replace(
                                               u'深圳', '').replace(u'成都', ''))
        # artifact name
        aresults = dbutil.get_artifact_idname_from_cid(db, cid, True)
        if aresults:
            alias.update([
                self.valid_name(aname) for _, aname in aresults
                if self.valid_name(aname)
            ])
        # alias
        aliass = dbutil.get_alias_idname(db, cid)
        if aliass and len(aliass) < 20:
            alias.update([
                self.valid_name(aname) for _, aname in aliass
                if self.valid_name(aname)
            ])
        # corporate name
        corporate = dbutil.get_company_corporate_name(db, cid)
        if corporate and corporate.strip():
            alias.add(corporate.lower())
        # corporate full name
        corporate_full = dbutil.get_company_corporate_name(db, cid, False)
        if corporate_full and corporate_full.strip():
            alias.add(corporate_full.lower())
        # corporate alias
        corporate_alias = dbutil.get_corporate_alias(db, cid)
        if corporate_alias and len(corporate_alias) < 20:
            alias.update([
                self.valid_name(aname) for aname in corporate_alias
                if self.valid_name(aname)
            ])
        # check if there is a relevant digital coin
        dt = dbutil.get_company_digital_coin_info(db, cid)
        if dt:
            alias.add(dt.symbol.lower())
            # short name
            if dt.name:
                alias.add(dt.name.lower().replace(' ', ''))
            # english name
            if dt.enname:
                alias.add(dt.enname.lower())

        # create indice names
        company['name'] = name.lower()
        company['alias'] = self.analyze_names(alias)

        # tag
        tags_info = dbutil.get_company_tags_idname(db,
                                                   cid,
                                                   tag_out_type=(11000, 11001,
                                                                 11002))
        if tags_info:
            for tid, tname, weight in tags_info:
                company.setdefault('tags', []).append(tname.lower())
                company.setdefault('features', []).append(tid)
        company['nested_tag'] = []
        for industry in dbutil.get_company_industries(db, cid):
            company.setdefault('nested_tag', []).append({
                'id': industry.industryId,
                'published': industry.publishTime,
                "category": "industry"
            })
        for topic in dbutil.get_company_topics(db, cid):
            msg_publish = dbutil.get_topic_message_company_publish(db, topic)
            company.setdefault('nested_tag', []).append({
                'id': topic.topicId,
                'published': msg_publish,
                "category": "topic"
            })
            topic_tag = self.topic_tags.get(topic.topicId)
            if topic_tag:
                company.setdefault('tags', []).append(topic_tag.lower())
        sectors = dbutil.get_company_sector_tag(db, cid)
        company['sector'] = sectors

        # description
        desc = dbutil.get_company_solid_description(db, cid)
        if desc and desc.strip():
            desc = filter(lambda x: (x not in self.stopwords) and len(x) > 1,
                          list(self.seg.cut4search(desc)))
            company['description'] = (' '.join(desc)).lower()

        # round and investors and members
        round = dbutil.get_company_round(db, cid)
        company['round'] = 1000 if round == 0 else round
        company['sort_round'] = dbutil.get_round_sort(db, company.get('round'))
        status = dbutil.get_company_status(db, cid)
        if status in {2020, 2025}:
            company['status'] = status
        elif dbutil.get_company_ipo_status(db, cid):
            company['status'] = -1
        else:
            company['status'] = -2
        company['investors'] = dbutil.get_company_investor_names(db, cid)
        company['investorId'] = dbutil.get_company_investors(db, cid)
        company['members'] = [
            name for _, name in dbutil.get_member_idname(db, cid)
        ]

        # location
        lid, lname = dbutil.get_company_location(db, cid)
        company['location'] = lid

        # establish date, create date, count of company message
        establish_date = dbutil.get_company_establish_date(db, cid)
        try:
            company['established'] = int(establish_date.strftime('%Y%m'))
        except Exception, e:
            pass