Ejemplo n.º 1
0
def label_blockchain():

    db = dbcon.connect_torndb()
    feeder = Feeder()
    w2v = Word2Vec.load(word2vec_model)
    model_dir = os.path.join(os.path.split(os.path.realpath(__file__))[0], 'models')
    clf = joblib.load(os.path.join(model_dir, '175747.20180311.model'))
    for cid in dbutil.get_all_company_id(db):
        print cid
        flag = False
        try:
            content = list(feeder.feed_seged(cid))
            content = [np.mean([w2v[w] for w in content if w in w2v], axis=0)]
            if u'区块链' not in content:
                if clf.predict_proba(content)[0][1] > 0.9:
                    dbutil.update_company_tag(db, cid, 175747, 2.806, verify='N', active='Y')
                    flag = True
            else:
                if clf.predict(content)[0] == 1:
                    dbutil.update_company_tag(db, cid, 175747, 2.806, verify='N', active='Y')
                    flag = True
            if dbutil.exist_company_tag(db, cid, 175747) and not flag:
                dbutil.update_company_tag(db, cid, 175747, 0, verify='N', active='N')
        except Exception, e:
            print 'Fail to classify, due to %s', e
Ejemplo n.º 2
0
def c4_2(today=None):

    aut = AndroidUpdateCompanyTracker()
    db = dbcon.connect_torndb()
    for cid in iter(dbutil.get_all_company_id(db)):
        aut.feed(cid, today)
    db.close()
Ejemplo n.º 3
0
def summary_recruit_all(days=30, recruit_ratio=0.2, recruit_threshold=50):

    db = dbcon.connect_torndb()
    mongo = dbcon.connect_mongo()
    gte = datetime.now() - timedelta(days=days)
    for cid in dbutil.get_all_company_id(db):
        recruit_ids = dbutil.get_company_recruitments(db, cid)
        if not recruit_ids:
            continue
        recruits = len(
            list(
                mongo.job.job.find({
                    'recruit_company_id': {
                        '$in': recruit_ids
                    },
                    'updateDate': {
                        '$gte': gte
                    }
                })))
        headcount = dbutil.get_company_headcount_max(db, cid)
        if headcount and headcount > 0:
            ratio = round(recruits / float(headcount), 2)
            if ratio > recruit_ratio:
                yield cid, ratio
        else:
            if recruits > recruit_threshold:
                yield cid, 1
    db.close()
Ejemplo n.º 4
0
def c1():

    global logger_track
    nct = NewsCompanyTracker()
    db = dbcon.connect_torndb()
    for cid in dbutil.get_all_company_id(db):
        nct.feed(cid)
    logger_track.info('tracked')
Ejemplo n.º 5
0
def score_activation_full():

    global logger_activation
    db = dbcon.connect_torndb()
    scorer = ActivationScorer()
    for cid in dbutil.get_all_company_id(db):
        scorer.score(cid)
        logger_activation.info('%s scored' % cid)
    db.close()
Ejemplo n.º 6
0
Archivo: key.py Proyecto: yujiye/Codes
    def extract_all(self):

        global logger_tag
        db_back = dbcon.connect_torndb()
        for cid in dbutil.get_all_company_id(db_back):
            try:
                self.extract(cid)
                logger_tag.info('%s processed' % cid)
            except Exception, e:
                logger_tag.exception('%s, %s' % (cid, e))
Ejemplo n.º 7
0
    def dump_full(self):

        global logger_nlp
        db = dbcon.connect_torndb()
        for cid in iter(dbutil.get_all_company_id(db)):
            try:
                self.get_similar(cid)
                logger_nlp.info('%s processed' % cid)
            except Exception, e:
                logger_nlp.exception('%s failed, %s' % (cid, e))
Ejemplo n.º 8
0
def c5001_5002():
    """
    5001 股东变更
    5002 注册资本变更
    update every day
    """

    global logger_track
    cgt = CompanyGongshangTracker()
    db = dbcon.connect_torndb()
    logger_track.info('Processing gongshang change')
    for cid in dbutil.get_all_company_id(db):
        cgt.feed(cid, logger_track)
    db.close()
Ejemplo n.º 9
0
def c4004():
    """
    4004 招聘CTO, COO等核心职位, update every day
    """

    global logger_track
    crt = CompanyRecruitTracker()
    db = dbcon.connect_torndb()
    today = datetime.today()
    for cid in dbutil.get_all_company_id(db):
        for feed_back in crt.feed_4004(cid, today):
            if feed_back:
                crt.send_company_message_msg(feed_back)
                logger_track.info('4004 %s' % cid)
    db.close()
Ejemplo n.º 10
0
def c4002():
    """
    4002 长期无新职位发布, update every week
    """

    global logger_track
    crt = CompanyRecruitTracker()
    db = dbcon.connect_torndb()
    today = datetime.today()
    for cid in dbutil.get_all_company_id(db):
        feed_back = crt.feed_4002(cid, today)
        if feed_back:
            crt.send_company_message_msg(feed_back)
            logger_track.info('4002 %s' % cid)
    db.close()
Ejemplo n.º 11
0
    def dump(self, fetch_model='default'):

        global logger_nlp
        db = dbcon.connect_torndb()
        if fetch_model == 'makeup':
            all_cids = iter(dbutil.get_all_company_id_makeups(db))
        else:
            all_cids = iter(dbutil.get_all_company_id(db))
        for cid in all_cids:
            try:
                dbutil.update_company_rels(db,
                                           cid,
                                           self.generate_comps(cid),
                                           feedback_threshold=0.5)
                logger_nlp.info('%s has similar companies now' % cid)
            except Exception, e:
                logger_nlp.exception(
                    'fail to find similar company for %s, %s' % (cid, e))
Ejemplo n.º 12
0
    def create_indice(self):

        global logger_universal_index
        self.__check()
        db = dbcon.connect_torndb()
        self.topic_tags = dbutil.get_topic_corresponding_tags(db)
        logger_universal_index.info('Start to create indice')
        logger_universal_index.info(str(self.es.info()))
        logger_universal_index.info('ES Config %s' %
                                    str(tsbconfig.get_es_config()))
        for cid in dbutil.get_all_company_id(db):
            try:
                self.create_single(db, cid)
                logger_universal_index.info(
                    '%s index created, %s' %
                    (cid, dbutil.get_company_name(db, cid)))
            except Exception, e:
                logger_universal_index.exception('%s failed # %s' % (cid, e))
Ejemplo n.º 13
0
def classify_founder():

    global logger_yl
    db = dbcon.connect_torndb()
    fs = FounderScorer()
    for cid in iter(dbutil.get_all_company_id(db)):
        score = fs.score(cid)
        if score >= 0.5:
            # 309128 团队优秀
            dbutil.update_company_tag(db, cid, 309128, score, "Y")
            logger_yl.info('Outstanding team: %s insert' % cid)
        # if fs.has_QBFJ(cid):
        #     # 清北复交团队
        #     logger_yl.info('Has QBFJ: %s insert' % cid)
        # if fs.has_overseas(cid):
        #     # 海归团队
        #     logger_yl.info('Has overseas: %s insert' % cid)
        # if fs.has_serial_entrepreneur(cid):
        #     # 连续创业者
        #     logger_yl.info('Has serial entrepreneur' % cid)
    db.close()
Ejemplo n.º 14
0
    def __iter__(self):

        global description_len_threshold, complete_threshold
        db = dbcon.connect_torndb()
        index = 0
        for cid in iter(dbutil.get_all_company_id(db)):
            contents = self.feeder.feed_string(cid)
            score = dbutil.get_company_score(db, cid)
            if not (score and score > complete_threshold):
                continue
            if int(cid) > self.max_id:
                self.max_id = int(cid)
            words = list(self.segmenter.cut(contents))
            if not words:
                continue
            words = self.default_filter(words)
            if len(words) < description_len_threshold:
                continue

            self.mapping_id2in[cid] = index
            self.mapping_in2id[index] = cid
            index += 1
            yield [word.lower() for word in words]
        db.close()
Ejemplo n.º 15
0
        self.__check()

        db = dbcon.connect_torndb()
        self.logger.info('Start to create indice')
        self.logger.info(str(self.es.info()))
        self.logger.info('ES Config %s' % str(tsbconfig.get_es_config()))
        try:
            self.logger.info('Start to create location & tag indice')
            self.create_indice_completion_locations(db)
            self.create_indice_completion_keywords(db)
        except Exception, e:
            self.logger.exception('location indice & tag failed')
            self.logger.exception(e)

        for cid in dbutil.get_all_company_id(db):
            try:
                self.create_single(db, cid)
                self.logger.info('%s index created, %s' %
                                 (cid, dbutil.get_company_name(db, cid)))
            except Exception, e:
                self.logger.exception('%s failed # %s' % (cid, e))
        db.close()

    def create_single(self, db, cid):
        """
        create a single index for a particular company,
        completion id consists of its type and original id, including
            cxxxx, fxxx, axxxx, pxxxx, nxxxx, standing for company, full, artifact, product, nick
            kxxxx, keyword
        """