def parse_ibrebates(self, item): ibrebates = Ibrebates(**item) with session_scope(self.sess) as session: query = session.query(Ibrebates.id).filter( and_(Ibrebates.name == ibrebates.name)).one_or_none() if query is None: session.add(ibrebates) else: data = {} update_field = [ "description", "spread_type", "om_spread", "gold_spread", "offshore", "a_share", "regulatory_authority", "trading_varieties", "platform_type", "account_type", "scalp", "hedging", "min_transaction", "least_entry", "maximum_leverage", "maximum_trading", "deposit_method", "entry_method", "commission_fee", "entry_fee", "account_currency", "rollovers", "explosion_proportion", "renminbi" ] for field in update_field: try: attr_value = getattr(ibrebates, field) data[field] = attr_value except AttributeError as err: pass if data: session.query(Ibrebates).filter( Ibrebates.id == query[0]).update(data)
def parse_fx678_jiedu(self, item): with session_scope(self.sess) as session: crawlFx678EconomicJiedu = CrawlFx678EconomicJiedu(**item) query = session.query(CrawlFx678EconomicJiedu.dataname_id).filter( and_( CrawlFx678EconomicJiedu.dataname_id == crawlFx678EconomicJiedu.dataname_id, )).one_or_none() if query: session.query(CrawlEconomicCalendar).filter( CrawlEconomicCalendar.dataname_id == crawlFx678EconomicJiedu.dataname_id).update({ 'next_pub_time': crawlFx678EconomicJiedu.next_pub_time, 'pub_agent': crawlFx678EconomicJiedu.pub_agent, 'pub_frequency': crawlFx678EconomicJiedu.pub_frequency, 'count_way': crawlFx678EconomicJiedu.count_way, 'data_influence': crawlFx678EconomicJiedu.data_influence, 'data_define': crawlFx678EconomicJiedu.data_define, 'funny_read': crawlFx678EconomicJiedu.funny_read }) else: session.add(crawlFx678EconomicJiedu)
def parse_weibo(self, item): with session_scope(self.sess) as session: query = session.query(Crawl_Weibo.id).filter( Crawl_Weibo.source_id == item['source_id']).one_or_none() if not query: weibo = Crawl_Weibo(**item) session.add(weibo)
def parse_ssi_trends_today(self, item): with session_scope(self.sess) as session: all_item = [] for it in item: crawlSSiTrend = CrawlSsiTrend(**item[it]) all_item.append(crawlSSiTrend) if len(all_item) > 0: session.add_all(all_item)
def parse_weibo_seach(self, item): with session_scope(self.sess) as session: hotkey = Crawl_Weibo_Hotkey(**item) query = session.query( Crawl_Weibo_Hotkey.id).filter(Crawl_Weibo_Hotkey.source_id == hotkey.source_id).one_or_none() if query is None: session.add(hotkey)
def parse_sentence(self, item): with session_scope(self.sess) as session: sentence = Sentence(**item) query = session.query(Sentence.id).filter( Sentence.source_id == sentence.source_id).one_or_none() if query is None: session.add(sentence) else: session.query(Sentence).filter( Sentence.id == query[0]).update(item)
def parse_weixin_detail(self, item): with session_scope(self.sess) as session: query = session.query(Crawl_Weixin_Search.id).filter( Crawl_Weixin_Search.source_id == item['source_id']).one_or_none() if query: article_detail = CrawlWeixinArticleDetail() article_detail.id = query[0] article_detail.body = item['body'] session.add(article_detail)
def parse_agent(self, item): with session_scope(self.sess) as session: lianjiaAgent = LianjiaAgent(**item) query = session.query(LianjiaFeedback.id).filter( and_(LianjiaAgent.agent_id == lianjiaAgent.agent_id)).one_or_none() if query is None: session.add(lianjiaAgent) else: session.query(LianjiaAgent).filter( LianjiaAgent.id == query[0]).update(item)
def parse_lianjia_house(self, item): with session_scope(self.sess) as session: lianjiaHouse = LianjiaHouse(**item) query = session.query(LianjiaHouse.id).filter( and_(LianjiaHouse.house_id == lianjiaHouse.house_id, )).one_or_none() if query: itemdata = { 'price': lianjiaHouse.price, 'layout': lianjiaHouse.layout, 'area': lianjiaHouse.area, 'direction': lianjiaHouse.direction, 'elevator': lianjiaHouse.elevator, 'residential_id': lianjiaHouse.residential_id, 'flood': lianjiaHouse.flood, 'images': lianjiaHouse.images, 'district': lianjiaHouse.district, 'apartment_structure': lianjiaHouse.apartment_structure, 'street': lianjiaHouse.street, 'address': lianjiaHouse.address, 'building_type': lianjiaHouse.building_type, 'ladder': lianjiaHouse.ladder, 'heating': lianjiaHouse.heating, 'property_term': lianjiaHouse.property_term, 'list_time': lianjiaHouse.list_time, 'ownership': lianjiaHouse.ownership, 'last_trade': lianjiaHouse.last_trade, 'purpose': lianjiaHouse.purpose, 'hold_years': lianjiaHouse.hold_years, 'mortgage': lianjiaHouse.mortgage, 'house_register': lianjiaHouse.house_register, 'core_point': lianjiaHouse.core_point, 'periphery': lianjiaHouse.periphery, 'traffic': lianjiaHouse.traffic, 'residential_desc': lianjiaHouse.residential_desc, 'layout_desc': lianjiaHouse.layout_desc, 'img_layout': lianjiaHouse.img_layout, 'layout_datas': lianjiaHouse.layout_datas, 'renovation': lianjiaHouse.renovation, 'state': lianjiaHouse.state } updata = {} for key in itemdata: if itemdata[key] is not None: updata[key] = itemdata[key] session.query(LianjiaHouse).filter( LianjiaHouse.house_id == lianjiaHouse.house_id).update( updata) else: session.add(lianjiaHouse)
def parse_lianjia_feedback(self, item): with session_scope(self.sess) as session: lianjiaFeedback = LianjiaFeedback(**item) query = session.query(LianjiaFeedback.id).filter( and_(LianjiaFeedback.agent_id == lianjiaFeedback.agent_id, LianjiaFeedback.hourse_id == lianjiaFeedback.hourse_id)).one_or_none() if query is None: session.add(item) else: session.query(LianjiaFeedback).filter( LianjiaFeedback.id == query[0]).update(item)
def parse_lianjia_residential(self, item): with session_scope(self.sess) as session: lianjiaResidential = LianjiaResidential(**item) query = session.query(LianjiaResidential.id).filter( and_( LianjiaResidential.residential_id == lianjiaResidential.residential_id, )).one_or_none() if query is None: session.add(lianjiaResidential) else: session.query(LianjiaResidential).filter( LianjiaResidential.id == query[0]).update(item)
def parse_anjuke_lianjia_residential(self, item): type = item['type'] del item['type'] if type == "residential": anjuke_id = item['anjuke_residential_id'] del item['anjuke_residential_id'] with session_scope(self.sess) as session: session.query(CrawlAnjukeResidential).filter( and_(CrawlAnjukeResidential.residential_id == anjuke_id)).update(item) elif type == 'agent': with session_scope(self.sess) as session: lianjiaAgent = LianjiaAgent(**item) query = session.query(LianjiaFeedback.id).filter( and_(LianjiaAgent.agent_id == lianjiaAgent.agent_id)).one_or_none() if query is None: session.add(lianjiaAgent) else: session.query(LianjiaAgent).filter( LianjiaAgent.id == query[0]).update(item)
def parse_lianjia_visited(self, item): with session_scope(self.sess) as session: lianjiaVisited = LianjiaVisited(**item) query = session.query(LianjiaVisited.id).filter( and_( LianjiaVisited.agent_id == lianjiaVisited.agent_id, LianjiaVisited.visited_time == lianjiaVisited.visited_time)).one_or_none() if query is None: session.add(item) else: session.query(LianjiaVisited).filter( LianjiaVisited.id == query[0]).update(item)
def parse_weixin_search(self, item): with session_scope(self.sess) as session: all_item = [] for i in item: article = Crawl_Weixin_Search(**item[i]) query = session.query(Crawl_Weixin_Search.id).filter( Crawl_Weixin_Search.source_id == article.source_id).one_or_none() if query is None: all_item.append(article) if all_item: session.add_all(all_item)
def parse_cgse(self, item): print item with session_scope(self.sess) as session: cgse = Cgse(**item) query = session.query(Cgse.id).filter( and_(Cgse.idr == cgse.idr)).one_or_none() if query: up_item = {} for k in item: if item[k]: up_item[k] = item[k] session.query(Cgse).filter(Cgse.id == query[0]).update(up_item) else: session.add(cgse)
def parse_anjuke_residentail(self, item): with session_scope(self.sess) as session: crawlAnjukeResidential = CrawlAnjukeResidential(**item) query = session.query(CrawlHouseHistory.id).filter( and_( crawlAnjukeResidential.residential_id == CrawlAnjukeResidential.residential_id, )).one_or_none() if query: session.query(CrawlAnjukeResidential).filter( and_( CrawlAnjukeResidential.residential_id == crawlAnjukeResidential.residential_id, )).update(item) else: session.add(crawlAnjukeResidential)
def parse_jiedu(self, item): with session_scope(self.sess) as session: crawlEconomicJiedu = CrawlEconomicJiedu(**item) query = session.query(CrawlEconomicJiedu.dataname_id).filter( CrawlEconomicJiedu.dataname_id == crawlEconomicJiedu.dataname_id).one_or_none() if query: updata = {} for it in item: if item[it]: updata[it] = item session.query(CrawlEconomicJiedu).filter( CrawlEconomicJiedu.dataname_id == query[0]).update(updata) else: session.add(crawlEconomicJiedu)
def parse_baidu_rate(self, item): with session_scope(self.sess) as session: baiduRate = BaiduRate(**item) query = session.query(BaiduRate.id).filter( and_(BaiduRate.source_id == baiduRate.source_id, BaiduRate.site == baiduRate.site)).one_or_none() if query: up_item = {} for k in item: if item[k]: up_item[k] = item[k] session.query(BaiduRate).filter( BaiduRate.id == query[0]).update(up_item) else: session.add(baiduRate)
def readbody(self): logging.info("This is test log") with session_scope(self.sess) as session: query = session.query(CrawlWeixinArticleDetail.id, CrawlWeixinArticleDetail.body).filter( CrawlWeixinArticleDetail.key_state == 0 ).limit(200).all() handled_body = [] key_map = {} for i in query: try: handled_body.append(i[0]) keywords = self.util.keywords_analyse(i[1], topK=5, strip_tag=True) key_map[i[0]] = keywords for key in keywords: print key except Exception, e: logging.error(e) if len(handled_body) > 0: session.query(CrawlWeixinArticleDetail).filter( CrawlWeixinArticleDetail.id.in_(handled_body) ).update({"key_state": 1}, synchronize_session=False) all_keywords_map = [] for id in key_map: for word in key_map[id]: model = Crawl_keywords_map() model.s_id = id model.keyword = word model.tb = Crawl_Weixin_Search.__tablename__ qu = session.query(Crawl_keywords_map).filter( and_( Crawl_keywords_map.s_id == id, Crawl_keywords_map.keyword == word ) ).one_or_none() if not qu: all_keywords_map.append(model) if len(all_keywords_map) > 0: session.add_all(all_keywords_map)
def parse_article(self, item): article = CrawlArticle(**item) with session_scope(self.sess) as session: query = session.query(CrawlArticle.id).filter( and_(CrawlArticle.source_id == article.source_id, )).one_or_none() if query is None: session.add(article) else: data = {} for it in item: if item[it] is not None: data[it] = item[it] if data: session.query(CrawlArticle).filter( CrawlArticle.id == query[0]).update(data)
def parse_ssi_trends(self, item): with session_scope(self.sess) as session: CrawlSsiTrend = eval("CrawlSsiTrend_" + item[0]['platform']) query = session.query( func.max(CrawlSsiTrend.time).label("max_time")).filter( and_( CrawlSsiTrend.type == item[0]['type'], CrawlSsiTrend.platform == item[0]['platform'], )).one_or_none() max_time = query[0] if query else None all_item = [] for it in item: if max_time is None or item[it]['time'] > max_time.strftime( '%Y-%m-%d %H:%M:%I'): crawlSSiTrend = CrawlSsiTrend(**item[it]) all_item.append(crawlSSiTrend) if len(all_item) > 0: session.add_all(all_item)
def parse_house_history(self, item): with session_scope(self.sess) as session: houseHistory = CrawlHouseHistory(**item) query = session.query(CrawlHouseHistory.id).filter( and_( CrawlHouseHistory.year == houseHistory.year, CrawlHouseHistory.month == houseHistory.month, CrawlHouseHistory.residential_id == houseHistory.residential_id, )).one_or_none() if query: session.query(LianjiaHouse).filter( and_( CrawlHouseHistory.year == houseHistory.year, CrawlHouseHistory.month == houseHistory.month, CrawlHouseHistory.residential_id == houseHistory.residential_id, )).update(item) else: session.add(houseHistory)
def parse_zhanzhang(self, item): all_data = [CrawlZhanzhang(**item[it]) for it in item] with session_scope(self.sess) as session: session.add_all(all_data)
def parse_fx678_calendar(self, item): if item and len(item) > 0: if 0 in item and isinstance(item[0], items.CrawlFx678EconomicCalendarItem): with session_scope(self.sess) as session: all_data = [] for ditem in item: ditem = item[ditem] crawlfx678EconomicCalendar = CrawlFx678EconomicCalendar( **ditem) query = session.query( CrawlFx678EconomicCalendar.id ).filter( and_( CrawlFx678EconomicCalendar.source_id == crawlfx678EconomicCalendar.source_id, # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time )).one_or_none() if query is not None: data = {} if crawlfx678EconomicCalendar.country is not None: data[ 'country'] = crawlfx678EconomicCalendar.country if crawlfx678EconomicCalendar.pub_time is not None: data[ 'pub_time'] = crawlfx678EconomicCalendar.pub_time if crawlfx678EconomicCalendar.quota_name is not None: data[ 'quota_name'] = crawlfx678EconomicCalendar.quota_name if crawlfx678EconomicCalendar.importance is not None: data[ 'importance'] = crawlfx678EconomicCalendar.importance if crawlfx678EconomicCalendar.former_value is not None: data[ 'former_value'] = crawlfx678EconomicCalendar.former_value if crawlfx678EconomicCalendar.predicted_value is not None: data[ 'predicted_value'] = crawlfx678EconomicCalendar.predicted_value if crawlfx678EconomicCalendar.published_value is not None: data[ 'published_value'] = crawlfx678EconomicCalendar.published_value if crawlfx678EconomicCalendar.influence is not None: data[ 'influence'] = crawlfx678EconomicCalendar.influence if crawlfx678EconomicCalendar.position is not None: data[ 'position'] = crawlfx678EconomicCalendar.position if data: session.query( CrawlFx678EconomicCalendar).filter( CrawlFx678EconomicCalendar.id == query[0]).update(data) else: all_data.append(crawlfx678EconomicCalendar) if len(all_data) > 0: session.add_all(all_data) elif 0 in item and isinstance(item[0], items.CrawlEconomicEventItem): all_data = [] with session_scope(self.sess) as session: for ditem in item: ditem = item[ditem] crawlEconomicEvent = CrawlFx678EconomicEvent(**ditem) query = session.query( CrawlFx678EconomicEvent.id ).filter( and_( CrawlFx678EconomicEvent.source_id == crawlEconomicEvent.source_id, # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time )).one_or_none() if query: data = {} if crawlEconomicEvent.country is not None: data['country'] = crawlEconomicEvent.country if crawlEconomicEvent.time is not None: data['time'] = crawlEconomicEvent.time if crawlEconomicEvent.city is not None: data['city'] = crawlEconomicEvent.city if crawlEconomicEvent.importance is not None: data[ 'importance'] = crawlEconomicEvent.importance if crawlEconomicEvent.event is not None: data['event'] = crawlEconomicEvent.event if crawlEconomicEvent.date is not None: data['date'] = crawlEconomicEvent.date if data: session.query(CrawlFx678EconomicEvent).filter( CrawlFx678EconomicEvent.id == query[0]).update(data) else: all_data.append(crawlEconomicEvent) if len(all_data) > 0: session.add_all(all_data) elif 0 in item and isinstance(item[0], items.CrawlEconomicHolidayItem): all_data = [] with session_scope(self.sess) as session: for ditem in item: ditem = item[ditem] crawlEconomicHoliday = CrawlFx678EconomicHoliday( **ditem) query = session.query( CrawlFx678EconomicHoliday.id ).filter( and_( CrawlFx678EconomicHoliday.source_id == crawlEconomicHoliday.source_id, # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time )).one_or_none() if query: data = {} if crawlEconomicHoliday.country is not None: data['country'] = crawlEconomicHoliday.country if crawlEconomicHoliday.time is not None: data['time'] = crawlEconomicHoliday.time if crawlEconomicHoliday.market is not None: data['market'] = crawlEconomicHoliday.market if crawlEconomicHoliday.holiday_name is not None: data[ 'holiday_name'] = crawlEconomicHoliday.holiday_name if crawlEconomicHoliday.detail is not None: data['detail'] = crawlEconomicHoliday.detail if crawlEconomicHoliday.date is not None: data['date'] = crawlEconomicHoliday.date if data: session.query( CrawlFx678EconomicHoliday).filter( CrawlFx678EconomicHoliday.id == query[0]).update(data) else: all_data.append(crawlEconomicHoliday) if len(all_data) > 0: session.add_all(all_data) elif 0 in item and isinstance(item[0], items.CrawlEconomicJieduItem): with session_scope(self.sess) as session: crawlEconomicJiedu = CrawlEconomicJiedu(**item[0]) query = session.query( CrawlEconomicJiedu.dataname_id ).filter( and_( CrawlEconomicJiedu.dataname_id == crawlEconomicJiedu.dataname_id, # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time )).one_or_none() if query: data = { 'next_pub_time': crawlEconomicJiedu.next_pub_time, 'pub_agent': crawlEconomicJiedu.pub_agent, 'pub_frequency': crawlEconomicJiedu.pub_frequency, 'count_way': crawlEconomicJiedu.count_way, 'data_influence': crawlEconomicJiedu.data_influence, 'data_define': crawlEconomicJiedu.data_define, 'funny_read': crawlEconomicJiedu.funny_read } session.query(CrawlEconomicJiedu).filter( CrawlEconomicJiedu.dataname_id == crawlEconomicJiedu.dataname_id).update(data) else: session.add(crawlEconomicJiedu)
def process_baidutongji(self, item): all_data = [] with session_scope(self.sess) as session: baiduTongji = BaiduTongji(**item) query = session.query(BaiduTongji.id).filter( and_(BaiduTongji.user_id == baiduTongji.user_id, BaiduTongji.access_time == baiduTongji.access_time)).one_or_none() if query is None: all_data.append(baiduTongji) else: data = {} if baiduTongji.area is not None: data['area'] = baiduTongji.area if baiduTongji.keywords is not None: data['keywords'] = baiduTongji.keywords if baiduTongji.entry_page is not None: data['entry_page'] = baiduTongji.entry_page if baiduTongji.ip is not None: data['ip'] = baiduTongji.ip if baiduTongji.visit_time is not None: data['visit_time'] = baiduTongji.visit_time if baiduTongji.visit_pages is not None: data['visit_pages'] = baiduTongji.visit_pages if baiduTongji.visitorType is not None: data['visitorType'] = baiduTongji.visitorType if baiduTongji.visitorFrequency is not None: data['visitorFrequency'] = baiduTongji.visitorFrequency if baiduTongji.lastVisitTime is not None: data['lastVisitTime'] = baiduTongji.lastVisitTime if baiduTongji.endPage is not None: data['endPage'] = baiduTongji.endPage if baiduTongji.deviceType is not None: data['deviceType'] = baiduTongji.deviceType if baiduTongji.fromType is not None: data['fromType'] = baiduTongji.fromType if baiduTongji.fromurl is not None: data['fromurl'] = baiduTongji.fromurl if baiduTongji.fromAccount is not None: data['fromAccount'] = baiduTongji.fromAccount if baiduTongji.isp is not None: data['isp'] = baiduTongji.isp if baiduTongji.os is not None: data['os'] = baiduTongji.os if baiduTongji.osType is not None: data['osType'] = baiduTongji.osType if baiduTongji.browser is not None: data['browser'] = baiduTongji.browser if baiduTongji.browserType is not None: data['browserType'] = baiduTongji.browserType if baiduTongji.language is not None: data['language'] = baiduTongji.language if baiduTongji.resolution is not None: data['resolution'] = baiduTongji.resolution if baiduTongji.color is not None: data['color'] = baiduTongji.color if baiduTongji.accessPage is not None: data['accessPage'] = baiduTongji.accessPage if baiduTongji.antiCode is not None: data['antiCode'] = baiduTongji.antiCode if data: session.query(BaiduTongji).filter( baiduTongji.id == query[0]).update(data) if len(all_data) > 0: session.add_all(all_data)
def process_jiankongbao(self, item): if isinstance(item, items.ChinaTimeItem): chinaTime = ChinaTime(**item) with session_scope(self.sess) as session: session.add(chinaTime) elif isinstance(item.values()[0], items.ErrorTopItem): all_items = [] for it in item.values(): all_items.append(ErrorTop(**it)) with session_scope(self.sess) as session: session.add_all(all_items) elif isinstance(item.values()[0], items.MonitorAreaStasticItem): all_items = [] for it in item.values(): all_items.append(MonitorAreaStastic(**it)) with session_scope(self.sess) as session: session.add_all(all_items) elif isinstance(item.values()[0], items.MonitorChartItem): all_items = [] for it in item.values(): all_items.append(MonitorChart(**it)) with session_scope(self.sess) as session: for db_item in all_items: query = session.query(MonitorChart.id).filter( and_(MonitorChart.time == db_item.time, MonitorChart.type == db_item.type, MonitorChart.site == db_item.site, MonitorChart.monitor_name == db_item.monitor_name)).one_or_none() if query is None: session.add(db_item) else: data = {} if db_item.value is not None: data['value'] = db_item.value if data: session.query(MonitorChart).filter( MonitorChart.id == query[0]).update(data) elif isinstance(item.values()[0], items.MonitorProvinceItem): all_items = [] for it in item.values(): all_items.append(MonitorProvince(**it)) with session_scope(self.sess) as session: session.add_all(all_items) elif isinstance(item.values()[0], items.MonitorStasticItem): all_items = [] for it in item.values(): all_items.append(MonitorStastic(**it)) with session_scope(self.sess) as session: session.add_all(all_items) elif isinstance(item.values()[0], items.MonitorTypeItem): all_items = [] for it in item.values(): all_items.append(MonitorType(**it)) with session_scope(self.sess) as session: session.add_all(all_items) elif isinstance(item.values()[0], items.ProvinceTimeItem): all_items = [] for it in item.values(): all_items.append(ProvinceTime(**it)) with session_scope(self.sess) as session: session.add_all(all_items) elif isinstance(item.values()[0], items.TypeTimeItem): all_items = [] for it in item.values(): all_items.append(TypeTime(**it)) with session_scope(self.sess) as session: session.add_all(all_items)
def parse_calendar(self, item): if item and len(item) > 0: if 0 in item and isinstance(item[0], items.CrawlEconomicCalendarItem): with session_scope(self.sess) as session: all_data = [] now_dataname_ids = [] for ditem in item: ditem = item[ditem] crawlEconomicCalendar = CrawlEconomicCalendar(**ditem) now_dataname_ids.append( crawlEconomicCalendar.source_id) query = session.query(CrawlEconomicCalendar.id).filter( and_( CrawlEconomicCalendar.source_id == crawlEconomicCalendar.source_id, # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time )).one_or_none() if query is not None: data = {} if crawlEconomicCalendar.country is not None: data['country'] = crawlEconomicCalendar.country if crawlEconomicCalendar.pub_time is not None: data[ 'pub_time'] = crawlEconomicCalendar.pub_time if crawlEconomicCalendar.quota_name is not None: data[ 'quota_name'] = crawlEconomicCalendar.quota_name if crawlEconomicCalendar.importance is not None: data[ 'importance'] = crawlEconomicCalendar.importance if crawlEconomicCalendar.former_value is not None: data[ 'former_value'] = crawlEconomicCalendar.former_value if crawlEconomicCalendar.predicted_value is not None: data[ 'predicted_value'] = crawlEconomicCalendar.predicted_value if crawlEconomicCalendar.published_value is not None: data[ 'published_value'] = crawlEconomicCalendar.published_value if crawlEconomicCalendar.influence is not None: data[ 'influence'] = crawlEconomicCalendar.influence if data: session.query(CrawlEconomicCalendar).filter( CrawlEconomicCalendar.id == query[0]).update(data) else: all_data.append(crawlEconomicCalendar) if len(all_data) > 0: session.add_all(all_data) #删除昨天没有发布的 lastday = item[0]['pub_time'] lastday = datetime.datetime.strptime( lastday, "%Y-%m-%d %H:%M:%S") print lastday.strftime( '%Y-%m-%d 00:00:00'), lastday.strftime( '%Y-%m-%d 23:59:59') session.query(CrawlEconomicCalendar).filter( and_( CrawlEconomicCalendar.pub_time.between( lastday.strftime('%Y-%m-%d 00:00:00'), lastday.strftime('%Y-%m-%d 23:59:59')), ~CrawlEconomicCalendar.source_id.in_( now_dataname_ids))).delete( synchronize_session=False) elif 0 in item and isinstance(item[0], items.CrawlEconomicEventItem): all_data = [] with session_scope(self.sess) as session: for ditem in item: ditem = item[ditem] crawlEconomicEvent = CrawlEconomicEvent(**ditem) query = session.query(CrawlEconomicEvent.id).filter( and_( CrawlEconomicEvent.source_id == crawlEconomicEvent.source_id, # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time )).one_or_none() if query: data = {} if crawlEconomicEvent.country is not None: data['country'] = crawlEconomicEvent.country if crawlEconomicEvent.time is not None: data['time'] = crawlEconomicEvent.time if crawlEconomicEvent.city is not None: data['city'] = crawlEconomicEvent.city if crawlEconomicEvent.importance is not None: data[ 'importance'] = crawlEconomicEvent.importance if crawlEconomicEvent.event is not None: data['event'] = crawlEconomicEvent.event if crawlEconomicEvent.date is not None: data['date'] = crawlEconomicEvent.date if data: session.query(CrawlEconomicEvent).filter( CrawlEconomicEvent.id == query[0]).update( data) else: all_data.append(crawlEconomicEvent) if len(all_data) > 0: session.add_all(all_data) elif 0 in item and isinstance(item[0], items.CrawlEconomicHolidayItem): all_data = [] with session_scope(self.sess) as session: # crawlEconomicHoliday = CrawlEconomicHoliday(**item[0]) # session.query(CrawlEconomicHoliday).filter(CrawlEconomicHoliday.date == crawlEconomicHoliday.date).delete() for ditem in item: ditem = item[ditem] crawlEconomicHoliday = CrawlEconomicHoliday(**ditem) query = session.query(CrawlEconomicHoliday.id).filter( and_( CrawlEconomicHoliday.source_id == crawlEconomicHoliday.source_id, # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time )).one_or_none() if query: data = {} if crawlEconomicHoliday.country is not None: data['country'] = crawlEconomicHoliday.country if crawlEconomicHoliday.time is not None: data['time'] = crawlEconomicHoliday.time if crawlEconomicHoliday.market is not None: data['market'] = crawlEconomicHoliday.market if crawlEconomicHoliday.holiday_name is not None: data[ 'holiday_name'] = crawlEconomicHoliday.holiday_name if crawlEconomicHoliday.detail is not None: data['detail'] = crawlEconomicHoliday.detail if crawlEconomicHoliday.date is not None: data['date'] = crawlEconomicHoliday.date if data: session.query(CrawlEconomicHoliday).filter( CrawlEconomicHoliday.id == query[0]).update(data) else: all_data.append(crawlEconomicHoliday) if len(all_data) > 0: session.add_all(all_data) elif 0 in item and isinstance(item[0], items.CrawlEconomicJieduItem): with session_scope(self.sess) as session: crawlEconomicJiedu = CrawlEconomicJiedu(**item[0]) query = session.query( CrawlEconomicJiedu.dataname_id ).filter( and_( CrawlEconomicJiedu.dataname_id == crawlEconomicJiedu.dataname_id, # CrawlEconomicCalendar.pub_time == crawlEconomicCalendar.pub_time )).one_or_none() if query: data = { 'next_pub_time': crawlEconomicJiedu.next_pub_time, 'pub_agent': crawlEconomicJiedu.pub_agent, 'pub_frequency': crawlEconomicJiedu.pub_frequency, 'count_way': crawlEconomicJiedu.count_way, 'data_influence': crawlEconomicJiedu.data_influence, 'data_define': crawlEconomicJiedu.data_define, 'funny_read': crawlEconomicJiedu.funny_read } session.query(CrawlEconomicJiedu).filter( CrawlEconomicJiedu.dataname_id == crawlEconomicJiedu.dataname_id).update(data) else: session.add(crawlEconomicJiedu)
def parse_cryptomiso(self, item): print item with session_scope(self.sess) as session: rank = Crawl_Cryptomiso(**item) session.add(rank)