Beispiel #1
0
    def parse_ibrebates(self, item):
        ibrebates = Ibrebates(**item)
        with session_scope(self.sess) as session:
            query = session.query(Ibrebates.id).filter(
                and_(Ibrebates.name == ibrebates.name)).one_or_none()

            if query is None:
                session.add(ibrebates)
            else:
                data = {}
                update_field = [
                    "description", "spread_type", "om_spread", "gold_spread",
                    "offshore", "a_share", "regulatory_authority",
                    "trading_varieties", "platform_type", "account_type",
                    "scalp", "hedging", "min_transaction", "least_entry",
                    "maximum_leverage", "maximum_trading", "deposit_method",
                    "entry_method", "commission_fee", "entry_fee",
                    "account_currency", "rollovers", "explosion_proportion",
                    "renminbi"
                ]
                for field in update_field:
                    try:
                        attr_value = getattr(ibrebates, field)
                        data[field] = attr_value
                    except AttributeError as err:
                        pass

                if data:
                    session.query(Ibrebates).filter(
                        Ibrebates.id == query[0]).update(data)
Beispiel #2
0
    def parse_fx678_jiedu(self, item):
        with session_scope(self.sess) as session:
            crawlFx678EconomicJiedu = CrawlFx678EconomicJiedu(**item)
            query = session.query(CrawlFx678EconomicJiedu.dataname_id).filter(
                and_(
                    CrawlFx678EconomicJiedu.dataname_id ==
                    crawlFx678EconomicJiedu.dataname_id, )).one_or_none()

            if query:
                session.query(CrawlEconomicCalendar).filter(
                    CrawlEconomicCalendar.dataname_id ==
                    crawlFx678EconomicJiedu.dataname_id).update({
                        'next_pub_time':
                        crawlFx678EconomicJiedu.next_pub_time,
                        'pub_agent':
                        crawlFx678EconomicJiedu.pub_agent,
                        'pub_frequency':
                        crawlFx678EconomicJiedu.pub_frequency,
                        'count_way':
                        crawlFx678EconomicJiedu.count_way,
                        'data_influence':
                        crawlFx678EconomicJiedu.data_influence,
                        'data_define':
                        crawlFx678EconomicJiedu.data_define,
                        'funny_read':
                        crawlFx678EconomicJiedu.funny_read
                    })
            else:
                session.add(crawlFx678EconomicJiedu)
Beispiel #3
0
    def parse_weibo(self, item):
        with session_scope(self.sess) as session:
            query = session.query(Crawl_Weibo.id).filter(
                Crawl_Weibo.source_id == item['source_id']).one_or_none()

            if not query:
                weibo = Crawl_Weibo(**item)
                session.add(weibo)
    def parse_ssi_trends_today(self, item):
        with session_scope(self.sess) as session:
            all_item = []
            for it in item:
                crawlSSiTrend = CrawlSsiTrend(**item[it])
                all_item.append(crawlSSiTrend)

            if len(all_item) > 0:
                session.add_all(all_item)
Beispiel #5
0
    def parse_weibo_seach(self, item):
        with session_scope(self.sess) as session:
            hotkey = Crawl_Weibo_Hotkey(**item)
            query = session.query(
                Crawl_Weibo_Hotkey.id).filter(Crawl_Weibo_Hotkey.source_id ==
                                              hotkey.source_id).one_or_none()

            if query is None:
                session.add(hotkey)
Beispiel #6
0
    def parse_sentence(self, item):
        with session_scope(self.sess) as session:
            sentence = Sentence(**item)
            query = session.query(Sentence.id).filter(
                Sentence.source_id == sentence.source_id).one_or_none()

            if query is None:
                session.add(sentence)
            else:
                session.query(Sentence).filter(
                    Sentence.id == query[0]).update(item)
Beispiel #7
0
    def parse_weixin_detail(self, item):
        with session_scope(self.sess) as session:
            query = session.query(Crawl_Weixin_Search.id).filter(
                Crawl_Weixin_Search.source_id ==
                item['source_id']).one_or_none()

            if query:
                article_detail = CrawlWeixinArticleDetail()
                article_detail.id = query[0]
                article_detail.body = item['body']

                session.add(article_detail)
Beispiel #8
0
    def parse_agent(self, item):
        with session_scope(self.sess) as session:
            lianjiaAgent = LianjiaAgent(**item)
            query = session.query(LianjiaFeedback.id).filter(
                and_(LianjiaAgent.agent_id ==
                     lianjiaAgent.agent_id)).one_or_none()

            if query is None:
                session.add(lianjiaAgent)
            else:
                session.query(LianjiaAgent).filter(
                    LianjiaAgent.id == query[0]).update(item)
Beispiel #9
0
    def parse_lianjia_house(self, item):
        with session_scope(self.sess) as session:
            lianjiaHouse = LianjiaHouse(**item)
            query = session.query(LianjiaHouse.id).filter(
                and_(LianjiaHouse.house_id == lianjiaHouse.house_id,
                     )).one_or_none()

            if query:
                itemdata = {
                    'price': lianjiaHouse.price,
                    'layout': lianjiaHouse.layout,
                    'area': lianjiaHouse.area,
                    'direction': lianjiaHouse.direction,
                    'elevator': lianjiaHouse.elevator,
                    'residential_id': lianjiaHouse.residential_id,
                    'flood': lianjiaHouse.flood,
                    'images': lianjiaHouse.images,
                    'district': lianjiaHouse.district,
                    'apartment_structure': lianjiaHouse.apartment_structure,
                    'street': lianjiaHouse.street,
                    'address': lianjiaHouse.address,
                    'building_type': lianjiaHouse.building_type,
                    'ladder': lianjiaHouse.ladder,
                    'heating': lianjiaHouse.heating,
                    'property_term': lianjiaHouse.property_term,
                    'list_time': lianjiaHouse.list_time,
                    'ownership': lianjiaHouse.ownership,
                    'last_trade': lianjiaHouse.last_trade,
                    'purpose': lianjiaHouse.purpose,
                    'hold_years': lianjiaHouse.hold_years,
                    'mortgage': lianjiaHouse.mortgage,
                    'house_register': lianjiaHouse.house_register,
                    'core_point': lianjiaHouse.core_point,
                    'periphery': lianjiaHouse.periphery,
                    'traffic': lianjiaHouse.traffic,
                    'residential_desc': lianjiaHouse.residential_desc,
                    'layout_desc': lianjiaHouse.layout_desc,
                    'img_layout': lianjiaHouse.img_layout,
                    'layout_datas': lianjiaHouse.layout_datas,
                    'renovation': lianjiaHouse.renovation,
                    'state': lianjiaHouse.state
                }

                updata = {}
                for key in itemdata:
                    if itemdata[key] is not None:
                        updata[key] = itemdata[key]

                session.query(LianjiaHouse).filter(
                    LianjiaHouse.house_id == lianjiaHouse.house_id).update(
                        updata)
            else:
                session.add(lianjiaHouse)
Beispiel #10
0
    def parse_lianjia_feedback(self, item):
        with session_scope(self.sess) as session:
            lianjiaFeedback = LianjiaFeedback(**item)
            query = session.query(LianjiaFeedback.id).filter(
                and_(LianjiaFeedback.agent_id == lianjiaFeedback.agent_id,
                     LianjiaFeedback.hourse_id ==
                     lianjiaFeedback.hourse_id)).one_or_none()

            if query is None:
                session.add(item)
            else:
                session.query(LianjiaFeedback).filter(
                    LianjiaFeedback.id == query[0]).update(item)
Beispiel #11
0
    def parse_lianjia_residential(self, item):
        with session_scope(self.sess) as session:
            lianjiaResidential = LianjiaResidential(**item)
            query = session.query(LianjiaResidential.id).filter(
                and_(
                    LianjiaResidential.residential_id ==
                    lianjiaResidential.residential_id, )).one_or_none()

            if query is None:
                session.add(lianjiaResidential)
            else:
                session.query(LianjiaResidential).filter(
                    LianjiaResidential.id == query[0]).update(item)
Beispiel #12
0
    def parse_anjuke_lianjia_residential(self, item):
        type = item['type']
        del item['type']
        if type == "residential":
            anjuke_id = item['anjuke_residential_id']
            del item['anjuke_residential_id']
            with session_scope(self.sess) as session:
                session.query(CrawlAnjukeResidential).filter(
                    and_(CrawlAnjukeResidential.residential_id ==
                         anjuke_id)).update(item)
        elif type == 'agent':
            with session_scope(self.sess) as session:
                lianjiaAgent = LianjiaAgent(**item)
                query = session.query(LianjiaFeedback.id).filter(
                    and_(LianjiaAgent.agent_id ==
                         lianjiaAgent.agent_id)).one_or_none()

                if query is None:
                    session.add(lianjiaAgent)
                else:
                    session.query(LianjiaAgent).filter(
                        LianjiaAgent.id == query[0]).update(item)
Beispiel #13
0
    def parse_lianjia_visited(self, item):
        with session_scope(self.sess) as session:
            lianjiaVisited = LianjiaVisited(**item)
            query = session.query(LianjiaVisited.id).filter(
                and_(
                    LianjiaVisited.agent_id == lianjiaVisited.agent_id,
                    LianjiaVisited.visited_time ==
                    lianjiaVisited.visited_time)).one_or_none()

            if query is None:
                session.add(item)
            else:
                session.query(LianjiaVisited).filter(
                    LianjiaVisited.id == query[0]).update(item)
Beispiel #14
0
    def parse_weixin_search(self, item):
        with session_scope(self.sess) as session:
            all_item = []
            for i in item:
                article = Crawl_Weixin_Search(**item[i])
                query = session.query(Crawl_Weixin_Search.id).filter(
                    Crawl_Weixin_Search.source_id ==
                    article.source_id).one_or_none()

                if query is None:
                    all_item.append(article)

            if all_item:
                session.add_all(all_item)
Beispiel #15
0
    def parse_cgse(self, item):
        print item
        with session_scope(self.sess) as session:
            cgse = Cgse(**item)
            query = session.query(Cgse.id).filter(
                and_(Cgse.idr == cgse.idr)).one_or_none()

            if query:
                up_item = {}
                for k in item:
                    if item[k]:
                        up_item[k] = item[k]

                session.query(Cgse).filter(Cgse.id == query[0]).update(up_item)
            else:
                session.add(cgse)
Beispiel #16
0
    def parse_anjuke_residentail(self, item):
        with session_scope(self.sess) as session:
            crawlAnjukeResidential = CrawlAnjukeResidential(**item)

            query = session.query(CrawlHouseHistory.id).filter(
                and_(
                    crawlAnjukeResidential.residential_id ==
                    CrawlAnjukeResidential.residential_id, )).one_or_none()

            if query:
                session.query(CrawlAnjukeResidential).filter(
                    and_(
                        CrawlAnjukeResidential.residential_id ==
                        crawlAnjukeResidential.residential_id, )).update(item)
            else:
                session.add(crawlAnjukeResidential)
    def parse_jiedu(self, item):
        with session_scope(self.sess) as session:
            crawlEconomicJiedu = CrawlEconomicJiedu(**item)
            query = session.query(CrawlEconomicJiedu.dataname_id).filter(
                CrawlEconomicJiedu.dataname_id ==
                crawlEconomicJiedu.dataname_id).one_or_none()

            if query:
                updata = {}
                for it in item:
                    if item[it]:
                        updata[it] = item
                session.query(CrawlEconomicJiedu).filter(
                    CrawlEconomicJiedu.dataname_id == query[0]).update(updata)
            else:
                session.add(crawlEconomicJiedu)
Beispiel #18
0
    def parse_baidu_rate(self, item):
        with session_scope(self.sess) as session:
            baiduRate = BaiduRate(**item)
            query = session.query(BaiduRate.id).filter(
                and_(BaiduRate.source_id == baiduRate.source_id,
                     BaiduRate.site == baiduRate.site)).one_or_none()

            if query:
                up_item = {}
                for k in item:
                    if item[k]:
                        up_item[k] = item[k]

                session.query(BaiduRate).filter(
                    BaiduRate.id == query[0]).update(up_item)
            else:
                session.add(baiduRate)
    def readbody(self):
        logging.info("This is test log")
        with session_scope(self.sess) as session:
            query = session.query(CrawlWeixinArticleDetail.id, CrawlWeixinArticleDetail.body).filter(
                CrawlWeixinArticleDetail.key_state == 0
            ).limit(200).all()

            handled_body = []
            key_map = {}

            for i in query:
                try:
                    handled_body.append(i[0])
                    keywords = self.util.keywords_analyse(i[1], topK=5, strip_tag=True)
                    key_map[i[0]] = keywords
                    for key in keywords:
                        print key
                except Exception, e:
                    logging.error(e)

            if len(handled_body) > 0:
                session.query(CrawlWeixinArticleDetail).filter(
                    CrawlWeixinArticleDetail.id.in_(handled_body)
                ).update({"key_state": 1}, synchronize_session=False)

                all_keywords_map = []
                for id in key_map:
                    for word in key_map[id]:
                        model = Crawl_keywords_map()
                        model.s_id = id
                        model.keyword = word
                        model.tb = Crawl_Weixin_Search.__tablename__

                        qu = session.query(Crawl_keywords_map).filter(
                            and_(
                                Crawl_keywords_map.s_id == id,
                                Crawl_keywords_map.keyword == word
                            )
                        ).one_or_none()

                        if not qu:
                            all_keywords_map.append(model)

                if len(all_keywords_map) > 0:
                    session.add_all(all_keywords_map)
    def parse_article(self, item):
        article = CrawlArticle(**item)
        with session_scope(self.sess) as session:
            query = session.query(CrawlArticle.id).filter(
                and_(CrawlArticle.source_id == article.source_id,
                     )).one_or_none()

            if query is None:
                session.add(article)
            else:
                data = {}
                for it in item:
                    if item[it] is not None:
                        data[it] = item[it]

                if data:
                    session.query(CrawlArticle).filter(
                        CrawlArticle.id == query[0]).update(data)
    def parse_ssi_trends(self, item):
        with session_scope(self.sess) as session:
            CrawlSsiTrend = eval("CrawlSsiTrend_" + item[0]['platform'])
            query = session.query(
                func.max(CrawlSsiTrend.time).label("max_time")).filter(
                    and_(
                        CrawlSsiTrend.type == item[0]['type'],
                        CrawlSsiTrend.platform == item[0]['platform'],
                    )).one_or_none()

            max_time = query[0] if query else None

            all_item = []
            for it in item:
                if max_time is None or item[it]['time'] > max_time.strftime(
                        '%Y-%m-%d %H:%M:%I'):
                    crawlSSiTrend = CrawlSsiTrend(**item[it])
                    all_item.append(crawlSSiTrend)

            if len(all_item) > 0:
                session.add_all(all_item)
Beispiel #22
0
    def parse_house_history(self, item):
        with session_scope(self.sess) as session:
            houseHistory = CrawlHouseHistory(**item)

            query = session.query(CrawlHouseHistory.id).filter(
                and_(
                    CrawlHouseHistory.year == houseHistory.year,
                    CrawlHouseHistory.month == houseHistory.month,
                    CrawlHouseHistory.residential_id ==
                    houseHistory.residential_id,
                )).one_or_none()

            if query:
                session.query(LianjiaHouse).filter(
                    and_(
                        CrawlHouseHistory.year == houseHistory.year,
                        CrawlHouseHistory.month == houseHistory.month,
                        CrawlHouseHistory.residential_id ==
                        houseHistory.residential_id,
                    )).update(item)
            else:
                session.add(houseHistory)
Beispiel #23
0
 def parse_zhanzhang(self, item):
     all_data = [CrawlZhanzhang(**item[it]) for it in item]
     with session_scope(self.sess) as session:
         session.add_all(all_data)
Beispiel #24
0
    def parse_fx678_calendar(self, item):
        if item and len(item) > 0:
            if 0 in item and isinstance(item[0],
                                        items.CrawlFx678EconomicCalendarItem):
                with session_scope(self.sess) as session:
                    all_data = []
                    for ditem in item:
                        ditem = item[ditem]
                        crawlfx678EconomicCalendar = CrawlFx678EconomicCalendar(
                            **ditem)

                        query = session.query(
                            CrawlFx678EconomicCalendar.id
                        ).filter(
                            and_(
                                CrawlFx678EconomicCalendar.source_id ==
                                crawlfx678EconomicCalendar.source_id,
                                # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time
                            )).one_or_none()

                        if query is not None:
                            data = {}
                            if crawlfx678EconomicCalendar.country is not None:
                                data[
                                    'country'] = crawlfx678EconomicCalendar.country
                            if crawlfx678EconomicCalendar.pub_time is not None:
                                data[
                                    'pub_time'] = crawlfx678EconomicCalendar.pub_time
                            if crawlfx678EconomicCalendar.quota_name is not None:
                                data[
                                    'quota_name'] = crawlfx678EconomicCalendar.quota_name
                            if crawlfx678EconomicCalendar.importance is not None:
                                data[
                                    'importance'] = crawlfx678EconomicCalendar.importance
                            if crawlfx678EconomicCalendar.former_value is not None:
                                data[
                                    'former_value'] = crawlfx678EconomicCalendar.former_value
                            if crawlfx678EconomicCalendar.predicted_value is not None:
                                data[
                                    'predicted_value'] = crawlfx678EconomicCalendar.predicted_value
                            if crawlfx678EconomicCalendar.published_value is not None:
                                data[
                                    'published_value'] = crawlfx678EconomicCalendar.published_value
                            if crawlfx678EconomicCalendar.influence is not None:
                                data[
                                    'influence'] = crawlfx678EconomicCalendar.influence
                            if crawlfx678EconomicCalendar.position is not None:
                                data[
                                    'position'] = crawlfx678EconomicCalendar.position

                            if data:
                                session.query(
                                    CrawlFx678EconomicCalendar).filter(
                                        CrawlFx678EconomicCalendar.id ==
                                        query[0]).update(data)

                        else:
                            all_data.append(crawlfx678EconomicCalendar)

                    if len(all_data) > 0:
                        session.add_all(all_data)

            elif 0 in item and isinstance(item[0],
                                          items.CrawlEconomicEventItem):
                all_data = []

                with session_scope(self.sess) as session:
                    for ditem in item:
                        ditem = item[ditem]
                        crawlEconomicEvent = CrawlFx678EconomicEvent(**ditem)
                        query = session.query(
                            CrawlFx678EconomicEvent.id
                        ).filter(
                            and_(
                                CrawlFx678EconomicEvent.source_id ==
                                crawlEconomicEvent.source_id,
                                # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time
                            )).one_or_none()

                        if query:
                            data = {}
                            if crawlEconomicEvent.country is not None:
                                data['country'] = crawlEconomicEvent.country
                            if crawlEconomicEvent.time is not None:
                                data['time'] = crawlEconomicEvent.time
                            if crawlEconomicEvent.city is not None:
                                data['city'] = crawlEconomicEvent.city
                            if crawlEconomicEvent.importance is not None:
                                data[
                                    'importance'] = crawlEconomicEvent.importance
                            if crawlEconomicEvent.event is not None:
                                data['event'] = crawlEconomicEvent.event
                            if crawlEconomicEvent.date is not None:
                                data['date'] = crawlEconomicEvent.date

                            if data:
                                session.query(CrawlFx678EconomicEvent).filter(
                                    CrawlFx678EconomicEvent.id ==
                                    query[0]).update(data)
                        else:
                            all_data.append(crawlEconomicEvent)

                    if len(all_data) > 0:
                        session.add_all(all_data)

            elif 0 in item and isinstance(item[0],
                                          items.CrawlEconomicHolidayItem):
                all_data = []

                with session_scope(self.sess) as session:
                    for ditem in item:
                        ditem = item[ditem]
                        crawlEconomicHoliday = CrawlFx678EconomicHoliday(
                            **ditem)

                        query = session.query(
                            CrawlFx678EconomicHoliday.id
                        ).filter(
                            and_(
                                CrawlFx678EconomicHoliday.source_id ==
                                crawlEconomicHoliday.source_id,
                                # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time
                            )).one_or_none()

                        if query:
                            data = {}
                            if crawlEconomicHoliday.country is not None:
                                data['country'] = crawlEconomicHoliday.country
                            if crawlEconomicHoliday.time is not None:
                                data['time'] = crawlEconomicHoliday.time
                            if crawlEconomicHoliday.market is not None:
                                data['market'] = crawlEconomicHoliday.market
                            if crawlEconomicHoliday.holiday_name is not None:
                                data[
                                    'holiday_name'] = crawlEconomicHoliday.holiday_name
                            if crawlEconomicHoliday.detail is not None:
                                data['detail'] = crawlEconomicHoliday.detail
                            if crawlEconomicHoliday.date is not None:
                                data['date'] = crawlEconomicHoliday.date

                            if data:
                                session.query(
                                    CrawlFx678EconomicHoliday).filter(
                                        CrawlFx678EconomicHoliday.id ==
                                        query[0]).update(data)
                        else:
                            all_data.append(crawlEconomicHoliday)

                    if len(all_data) > 0:
                        session.add_all(all_data)

            elif 0 in item and isinstance(item[0],
                                          items.CrawlEconomicJieduItem):
                with session_scope(self.sess) as session:
                    crawlEconomicJiedu = CrawlEconomicJiedu(**item[0])

                    query = session.query(
                        CrawlEconomicJiedu.dataname_id
                    ).filter(
                        and_(
                            CrawlEconomicJiedu.dataname_id ==
                            crawlEconomicJiedu.dataname_id,
                            # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time
                        )).one_or_none()

                    if query:
                        data = {
                            'next_pub_time': crawlEconomicJiedu.next_pub_time,
                            'pub_agent': crawlEconomicJiedu.pub_agent,
                            'pub_frequency': crawlEconomicJiedu.pub_frequency,
                            'count_way': crawlEconomicJiedu.count_way,
                            'data_influence':
                            crawlEconomicJiedu.data_influence,
                            'data_define': crawlEconomicJiedu.data_define,
                            'funny_read': crawlEconomicJiedu.funny_read
                        }

                        session.query(CrawlEconomicJiedu).filter(
                            CrawlEconomicJiedu.dataname_id ==
                            crawlEconomicJiedu.dataname_id).update(data)
                    else:
                        session.add(crawlEconomicJiedu)
Beispiel #25
0
    def process_baidutongji(self, item):
        all_data = []
        with session_scope(self.sess) as session:
            baiduTongji = BaiduTongji(**item)

            query = session.query(BaiduTongji.id).filter(
                and_(BaiduTongji.user_id == baiduTongji.user_id,
                     BaiduTongji.access_time ==
                     baiduTongji.access_time)).one_or_none()

            if query is None:
                all_data.append(baiduTongji)
            else:
                data = {}
                if baiduTongji.area is not None:
                    data['area'] = baiduTongji.area
                if baiduTongji.keywords is not None:
                    data['keywords'] = baiduTongji.keywords
                if baiduTongji.entry_page is not None:
                    data['entry_page'] = baiduTongji.entry_page
                if baiduTongji.ip is not None:
                    data['ip'] = baiduTongji.ip
                if baiduTongji.visit_time is not None:
                    data['visit_time'] = baiduTongji.visit_time
                if baiduTongji.visit_pages is not None:
                    data['visit_pages'] = baiduTongji.visit_pages
                if baiduTongji.visitorType is not None:
                    data['visitorType'] = baiduTongji.visitorType
                if baiduTongji.visitorFrequency is not None:
                    data['visitorFrequency'] = baiduTongji.visitorFrequency
                if baiduTongji.lastVisitTime is not None:
                    data['lastVisitTime'] = baiduTongji.lastVisitTime
                if baiduTongji.endPage is not None:
                    data['endPage'] = baiduTongji.endPage
                if baiduTongji.deviceType is not None:
                    data['deviceType'] = baiduTongji.deviceType
                if baiduTongji.fromType is not None:
                    data['fromType'] = baiduTongji.fromType
                if baiduTongji.fromurl is not None:
                    data['fromurl'] = baiduTongji.fromurl
                if baiduTongji.fromAccount is not None:
                    data['fromAccount'] = baiduTongji.fromAccount
                if baiduTongji.isp is not None:
                    data['isp'] = baiduTongji.isp
                if baiduTongji.os is not None:
                    data['os'] = baiduTongji.os
                if baiduTongji.osType is not None:
                    data['osType'] = baiduTongji.osType
                if baiduTongji.browser is not None:
                    data['browser'] = baiduTongji.browser
                if baiduTongji.browserType is not None:
                    data['browserType'] = baiduTongji.browserType
                if baiduTongji.language is not None:
                    data['language'] = baiduTongji.language
                if baiduTongji.resolution is not None:
                    data['resolution'] = baiduTongji.resolution
                if baiduTongji.color is not None:
                    data['color'] = baiduTongji.color
                if baiduTongji.accessPage is not None:
                    data['accessPage'] = baiduTongji.accessPage
                if baiduTongji.antiCode is not None:
                    data['antiCode'] = baiduTongji.antiCode

                if data:
                    session.query(BaiduTongji).filter(
                        baiduTongji.id == query[0]).update(data)

            if len(all_data) > 0:
                session.add_all(all_data)
Beispiel #26
0
    def process_jiankongbao(self, item):
        if isinstance(item, items.ChinaTimeItem):
            chinaTime = ChinaTime(**item)
            with session_scope(self.sess) as session:
                session.add(chinaTime)

        elif isinstance(item.values()[0], items.ErrorTopItem):
            all_items = []
            for it in item.values():
                all_items.append(ErrorTop(**it))
            with session_scope(self.sess) as session:
                session.add_all(all_items)

        elif isinstance(item.values()[0], items.MonitorAreaStasticItem):
            all_items = []
            for it in item.values():
                all_items.append(MonitorAreaStastic(**it))

            with session_scope(self.sess) as session:
                session.add_all(all_items)

        elif isinstance(item.values()[0], items.MonitorChartItem):
            all_items = []
            for it in item.values():
                all_items.append(MonitorChart(**it))
            with session_scope(self.sess) as session:
                for db_item in all_items:
                    query = session.query(MonitorChart.id).filter(
                        and_(MonitorChart.time == db_item.time,
                             MonitorChart.type == db_item.type,
                             MonitorChart.site == db_item.site,
                             MonitorChart.monitor_name ==
                             db_item.monitor_name)).one_or_none()

                    if query is None:
                        session.add(db_item)
                    else:
                        data = {}
                        if db_item.value is not None:
                            data['value'] = db_item.value

                        if data:
                            session.query(MonitorChart).filter(
                                MonitorChart.id == query[0]).update(data)

        elif isinstance(item.values()[0], items.MonitorProvinceItem):
            all_items = []
            for it in item.values():
                all_items.append(MonitorProvince(**it))
            with session_scope(self.sess) as session:
                session.add_all(all_items)

        elif isinstance(item.values()[0], items.MonitorStasticItem):
            all_items = []
            for it in item.values():
                all_items.append(MonitorStastic(**it))
            with session_scope(self.sess) as session:
                session.add_all(all_items)

        elif isinstance(item.values()[0], items.MonitorTypeItem):
            all_items = []
            for it in item.values():
                all_items.append(MonitorType(**it))
            with session_scope(self.sess) as session:
                session.add_all(all_items)

        elif isinstance(item.values()[0], items.ProvinceTimeItem):
            all_items = []
            for it in item.values():
                all_items.append(ProvinceTime(**it))
            with session_scope(self.sess) as session:
                session.add_all(all_items)

        elif isinstance(item.values()[0], items.TypeTimeItem):
            all_items = []
            for it in item.values():
                all_items.append(TypeTime(**it))
            with session_scope(self.sess) as session:
                session.add_all(all_items)
    def parse_calendar(self, item):
        if item and len(item) > 0:
            if 0 in item and isinstance(item[0],
                                        items.CrawlEconomicCalendarItem):
                with session_scope(self.sess) as session:
                    all_data = []
                    now_dataname_ids = []
                    for ditem in item:
                        ditem = item[ditem]
                        crawlEconomicCalendar = CrawlEconomicCalendar(**ditem)

                        now_dataname_ids.append(
                            crawlEconomicCalendar.source_id)
                        query = session.query(CrawlEconomicCalendar.id).filter(
                            and_(
                                CrawlEconomicCalendar.source_id ==
                                crawlEconomicCalendar.source_id,
                                # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time
                            )).one_or_none()

                        if query is not None:
                            data = {}
                            if crawlEconomicCalendar.country is not None:
                                data['country'] = crawlEconomicCalendar.country
                            if crawlEconomicCalendar.pub_time is not None:
                                data[
                                    'pub_time'] = crawlEconomicCalendar.pub_time
                            if crawlEconomicCalendar.quota_name is not None:
                                data[
                                    'quota_name'] = crawlEconomicCalendar.quota_name
                            if crawlEconomicCalendar.importance is not None:
                                data[
                                    'importance'] = crawlEconomicCalendar.importance
                            if crawlEconomicCalendar.former_value is not None:
                                data[
                                    'former_value'] = crawlEconomicCalendar.former_value
                            if crawlEconomicCalendar.predicted_value is not None:
                                data[
                                    'predicted_value'] = crawlEconomicCalendar.predicted_value
                            if crawlEconomicCalendar.published_value is not None:
                                data[
                                    'published_value'] = crawlEconomicCalendar.published_value
                            if crawlEconomicCalendar.influence is not None:
                                data[
                                    'influence'] = crawlEconomicCalendar.influence

                            if data:
                                session.query(CrawlEconomicCalendar).filter(
                                    CrawlEconomicCalendar.id ==
                                    query[0]).update(data)

                        else:
                            all_data.append(crawlEconomicCalendar)

                    if len(all_data) > 0:
                        session.add_all(all_data)

                    #删除昨天没有发布的
                    lastday = item[0]['pub_time']
                    lastday = datetime.datetime.strptime(
                        lastday, "%Y-%m-%d %H:%M:%S")
                    print lastday.strftime(
                        '%Y-%m-%d 00:00:00'), lastday.strftime(
                            '%Y-%m-%d 23:59:59')
                    session.query(CrawlEconomicCalendar).filter(
                        and_(
                            CrawlEconomicCalendar.pub_time.between(
                                lastday.strftime('%Y-%m-%d 00:00:00'),
                                lastday.strftime('%Y-%m-%d 23:59:59')),
                            ~CrawlEconomicCalendar.source_id.in_(
                                now_dataname_ids))).delete(
                                    synchronize_session=False)

            elif 0 in item and isinstance(item[0],
                                          items.CrawlEconomicEventItem):
                all_data = []

                with session_scope(self.sess) as session:

                    for ditem in item:
                        ditem = item[ditem]
                        crawlEconomicEvent = CrawlEconomicEvent(**ditem)

                        query = session.query(CrawlEconomicEvent.id).filter(
                            and_(
                                CrawlEconomicEvent.source_id ==
                                crawlEconomicEvent.source_id,
                                # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time
                            )).one_or_none()

                        if query:
                            data = {}
                            if crawlEconomicEvent.country is not None:
                                data['country'] = crawlEconomicEvent.country
                            if crawlEconomicEvent.time is not None:
                                data['time'] = crawlEconomicEvent.time
                            if crawlEconomicEvent.city is not None:
                                data['city'] = crawlEconomicEvent.city
                            if crawlEconomicEvent.importance is not None:
                                data[
                                    'importance'] = crawlEconomicEvent.importance
                            if crawlEconomicEvent.event is not None:
                                data['event'] = crawlEconomicEvent.event
                            if crawlEconomicEvent.date is not None:
                                data['date'] = crawlEconomicEvent.date

                            if data:
                                session.query(CrawlEconomicEvent).filter(
                                    CrawlEconomicEvent.id == query[0]).update(
                                        data)
                        else:
                            all_data.append(crawlEconomicEvent)

                    if len(all_data) > 0:
                        session.add_all(all_data)

            elif 0 in item and isinstance(item[0],
                                          items.CrawlEconomicHolidayItem):
                all_data = []

                with session_scope(self.sess) as session:
                    # crawlEconomicHoliday = CrawlEconomicHoliday(**item[0])
                    # session.query(CrawlEconomicHoliday).filter(CrawlEconomicHoliday.date == crawlEconomicHoliday.date).delete()

                    for ditem in item:
                        ditem = item[ditem]
                        crawlEconomicHoliday = CrawlEconomicHoliday(**ditem)

                        query = session.query(CrawlEconomicHoliday.id).filter(
                            and_(
                                CrawlEconomicHoliday.source_id ==
                                crawlEconomicHoliday.source_id,
                                # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time
                            )).one_or_none()

                        if query:
                            data = {}
                            if crawlEconomicHoliday.country is not None:
                                data['country'] = crawlEconomicHoliday.country
                            if crawlEconomicHoliday.time is not None:
                                data['time'] = crawlEconomicHoliday.time
                            if crawlEconomicHoliday.market is not None:
                                data['market'] = crawlEconomicHoliday.market
                            if crawlEconomicHoliday.holiday_name is not None:
                                data[
                                    'holiday_name'] = crawlEconomicHoliday.holiday_name
                            if crawlEconomicHoliday.detail is not None:
                                data['detail'] = crawlEconomicHoliday.detail
                            if crawlEconomicHoliday.date is not None:
                                data['date'] = crawlEconomicHoliday.date

                            if data:
                                session.query(CrawlEconomicHoliday).filter(
                                    CrawlEconomicHoliday.id ==
                                    query[0]).update(data)
                        else:
                            all_data.append(crawlEconomicHoliday)

                    if len(all_data) > 0:
                        session.add_all(all_data)

            elif 0 in item and isinstance(item[0],
                                          items.CrawlEconomicJieduItem):
                with session_scope(self.sess) as session:
                    crawlEconomicJiedu = CrawlEconomicJiedu(**item[0])

                    query = session.query(
                        CrawlEconomicJiedu.dataname_id
                    ).filter(
                        and_(
                            CrawlEconomicJiedu.dataname_id ==
                            crawlEconomicJiedu.dataname_id,
                            # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time
                        )).one_or_none()

                    if query:
                        data = {
                            'next_pub_time': crawlEconomicJiedu.next_pub_time,
                            'pub_agent': crawlEconomicJiedu.pub_agent,
                            'pub_frequency': crawlEconomicJiedu.pub_frequency,
                            'count_way': crawlEconomicJiedu.count_way,
                            'data_influence':
                            crawlEconomicJiedu.data_influence,
                            'data_define': crawlEconomicJiedu.data_define,
                            'funny_read': crawlEconomicJiedu.funny_read
                        }

                        session.query(CrawlEconomicJiedu).filter(
                            CrawlEconomicJiedu.dataname_id ==
                            crawlEconomicJiedu.dataname_id).update(data)
                    else:
                        session.add(crawlEconomicJiedu)
 def parse_cryptomiso(self, item):
     print item
     with session_scope(self.sess) as session:
         rank = Crawl_Cryptomiso(**item)
         session.add(rank)