Beispiel #1
0
        def save_result(data):
            try:
                del data['item_id']
            except Exception as e:
                # print e
                pass
            try:

                result_file = get_result_name(plantform_e='toutiao',
                                              plantform_c='今日头条',
                                              date_time=data['publish_time'],
                                              urlOruid=data['url'],
                                              newsidOrtid=data['id'],
                                              datatype='news',
                                              full_data=data)
                if not result_file:
                    return
                print datetime.datetime.now(), '--------', result_file

                save_data_to_mongodb(data={'data': data},
                                     item_id=result_file,
                                     platform_e='toutiao',
                                     platform_c='今日头条',
                                     cache_data_list=self.cache_data_list)

                # producer.send(topic='1101_STREAM_SPIDER', value={'data': data}, key=result_file, updatetime=data['spider_time'])
                pass

                # host = '192.168.6.187:9092,192.168.6.188:9092,192.168.6.229:9092,192.168.6.230:9092'
                # producer = Producer(hosts=host)
                # result_file = get_result_name(plantform_c='今日头条',plantform_e='JinRiTouTiao', date_time=data['publish_time'], urlOruid=data['url'],
                #                               newsidOrtid=data['id'],
                #                               datatype='news', full_data=data)
                #
                # producer.send(topic='topic', value={'data': data}, key=result_file, updatetime=data['spider_time'])

                # comsumer = Consumer('topic', host, 'll')
                # what = comsumer.poll()
                # # for i in comsumer.poll():
                # #     print i.topic
                # for i in what:
                #     topic = i.topic
                #     partition = i.partition
                #     offset = i.offset
                #     key = i.key
                #     value = i.value

                # Save_result(plantform='toutiao', date_time=data['publish_time'], urlOruid=data['url'],
                #                 newsidOrtid=data['id'], datatype='news', full_data=data)
            except Exception as e:
                # print e
                pass
Beispiel #2
0
        def save_result(data):



            result_file = get_result_name(plantform_e='chengshiluntan', plantform_c='城市论坛',
                                          date_time=data['publish_time'], urlOruid=data['url'], newsidOrtid=data['id'],
                                          datatype='forum', full_data=data)

            if not result_file:
                return

            print datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '--------', result_file

            data['spider_time']=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

            save_data_to_mongodb(data={'data': data}, platform_c='城市论坛', platform_e='chengshiluntan', item_id=result_file,cache_data_list=self.cache_data_list)
Beispiel #3
0
        def save_result(data):

            try:
                del data['is_movie']
            except Exception as e:
                print e

            result_file = get_result_name(plantform_e='PengPai', plantform_c='澎湃新闻', date_time=data['publish_time'],
                                          urlOruid=data['url'],
                                          newsidOrtid=data['id'],
                                          datatype='news', full_data=data)
            if not result_file:
                return
            print datetime.now(),'---',result_file


            save_data_to_mongodb(data={'data':data},item_id=result_file,platform_c='澎湃新闻',platform_e='PengPai',cache_data_list=self.cache_data_list)
Beispiel #4
0
        def save_result(data):
            result_file = get_result_name(plantform_e='xjbtssbtszhdj',
                                          plantform_c='第十师北屯市智慧党建',
                                          date_time=data['publish_time'],
                                          urlOruid=data['url'],
                                          newsidOrtid=data['id'],
                                          datatype='news',
                                          full_data=data)
            if not result_file:
                return
            print datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S'), '--------', result_file
            data['spider_time'] = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')

            save_data_to_mongodb(data={'data': data},
                                 item_id=result_file,
                                 platform_e='xjbtssbtszhdj',
                                 platform_c='第十师北屯市智慧党建',
                                 cache_data_list=self.cache_data_Queue)
Beispiel #5
0
def saveData(content, filename, platform_e, platform_c, news_type):  #数据保存

    host = '182.150.63.40'
    port = '12308'
    username = '******'
    password = '******'
    content = json.loads(content)
    # producer = Producer(hosts=host)
    producer = RemoteProducer(host=host,
                              port=port,
                              username=username,
                              password=password)
    result_file = get_result_name(plantform_e=platform_e,
                                  plantform_c=platform_c,
                                  date_time=content['data']['publish_time'],
                                  urlOruid=content['data']['url'],
                                  newsidOrtid=content['data']['id'],
                                  datatype=news_type,
                                  full_data=content)
    # pass
    save_data_to_mongodb(data=content,
                         item_id=result_file,
                         platform_e=platform_e,
                         platform_c=platform_c)