Exemple #1
0
def crawler_restart():
    r = get_redis()
    r.publish('crawler_signals', 'restart')

    sub = r.pubsub()
    sub.subscribe(['crawler_signals_feedback'])

    flag = 0
    while True:
        if flag > 120:
            return jsonify({'is_success': False})

        msg = sub.get_message()
        if msg:
            if msg.get('data') == 'restarted':
                return jsonify({'is_success': True})

        time.sleep(1)
        flag += 1
Exemple #2
0
def tmt_wechat_import():
    items = get_collection('items')
    fields = request.get_json(force=True)

    if not fields:
        return jsonify({'is_success': False, 'error_msg': 'ARGS ERROR'})

    sources_map = [
        u'腾讯科技',
        u'科技每日推送',
        u'科技最前线',
        u'财新TMT',
        u'网易科技',
        u'新浪科技',
        u'凤凰科技',
        u'慧聪TMT',
        u'蓝媒TMT',
        u'蓝鲸TMT',
        u'阿玻罗金融科技',
        u'TMT每日观察',
        u'朱劲松-TMT观察',
        u'TMT观察',
        u'杨吉TMT',
        u'搜狐科技',
        u'雷锋网',
        u'36氪',
        u'虎嗅网',
        u'21世纪经济报道',
        u'创业邦杂志',
        u'中国经营报',
        u'经济观察报',
        u'铅笔道',
        u'财新网',
        u'并购汪',
        u'亿欧网',
        u'新智元',
        u'猎云网',
        u'机器之心',
        u'海外情报社',
        u'FT中文网',
        u'界面',
        u'雷帝触网',
        u'好奇心日报',
        u'商业周刊中文版',
        u'环球老虎财经',
        u'钛媒体',
        u'PingWest品玩',
        u'速途网',
        u'第一财经',
        u'秦朔朋友圈',
        u'IT桔子',
        u'DoNews',
        u'动点科技',
        u'全球企业动态',
        u'蓝鲸财经网',
        u'财经天下周刊',
        u'VRAR创投圈',
        u'蓝鲸财经记者工作平台',
        u'B楼12座',
        u'经纬创投',
        u'小道消息',
        u'VR时代',
        u'财经女记者部落',
        u'真格基金',
        u'峰瑞资本',
        u'keso怎么看',
        u'一见',
    ]

    filtered_data = {}

    if fields.get('wechatName') in sources_map:
        inserted = items.insert_one({
            "content":
            fields.get('content', ''),
            "source":
            u'{}__{}'.format(fields.get('wechatId', ''),
                             fields.get('originId', '')),
            "host":
            'http://mp.weixin.qq.com/',
            "link":
            fields.get('sourceUrl', ''),
            "time":
            fields.get('createdAt', ''),
            "name":
            u"微信 - %s" % fields.get('wechatName'),
            "title":
            fields.get('title', ''),
            "time_human":
            "",
            "ctime":
            int(time.time()),
            "coops": ["tmt"],
        })
        redis = get_redis()
        rt = redis.publish('news_updated', str(inserted.inserted_id))
        filtered_data = {
            'db_inserted_id': str(inserted.inserted_id),
            'redis_published_return': rt
        }

    return jsonify({'is_success': True, 'filtered_data': filtered_data})
Exemple #3
0
def app_push_import():
    import_items = get_collection('app_push_items')
    items = get_collection('items')
    fields = request.get_json(force=True)

    auth_key = fields.get('auth_key')
    if not auth_key == '16b4af8':
        return jsonify({'is_success': False, 'error_msg': 'AUTH ERROR'})

    del fields['auth_key']

    if not fields:
        return jsonify({'is_success': False, 'error_msg': 'ARGS ERROR'})

    unique_title = fields.get('title', '') + fields.get(
        'packageName', 'N') + fields.get('contentText', '')
    uid = hashlib.md5(unique_title.encode('utf-8')).hexdigest()
    fields.update({'uid': uid})

    result = import_items.update_one({'uid': uid}, {'$set': fields},
                                     upsert=True)

    if result.matched_count > 0:
        return jsonify({'is_success': False, 'error_msg': 'DUPLICATED ERROR'})

    sources_map = [
        {
            'title': u'新华社',
            'packageName': "net.xinhuamm.mainclient",
            'titleField': 'contentText',
            'bodyField': '',
            'website': 'http://www.xinhuanet.com/',
        },
        {
            'title': u'人民日报',
            'packageName': "com.peopledailychina.activity",
            'titleField': 'title',
            'bodyField': 'contentText',
            'website': 'http://www.people.com.cn/',
        },
        {
            'title': u'二十一财经',
            'packageName': "com.twentyfirstcbh.epaper",
            'titleField': 'contentText',
            'bodyField': '',
            'website': 'http://www.21jingji.com/',
        },
    ]

    filtered_data = {}
    for source in sources_map:
        if fields.get('packageName') == source.get('packageName'):
            mname = 'APP_%s' % fields.get('packageName')

            inserted = items.insert_one({
                "content":
                fields.get(source.get('bodyField'))
                if source.get('bodyField') else '',
                "source":
                mname,
                "host":
                fields.get('packageName'),
                "link":
                source.get('website'),
                "time":
                fields.get('notifyTime'),
                "name":
                u"%s - APP推送" % source.get('title'),
                "title":
                fields.get(source.get('titleField')),
                "time_human":
                "",
                "ctime":
                int(time.time())
            })
            redis = get_redis()
            rt = redis.publish('news_updated', str(inserted.inserted_id))
            filtered_data = {
                'db_inserted_id': str(inserted.inserted_id),
                'redis_published_return': rt
            }

            break

    return jsonify({'is_success': True, 'filtered_data': filtered_data})
Exemple #4
0
def crawler_start():
    r = get_redis()
    r.publish('crawler_signals', 'start')

    return jsonify({'is_success': True})
Exemple #5
0
def crawler_reload():
    r = get_redis()
    r.publish('crawler_signals', 'reload')

    return jsonify({'is_success': True})