def open_spider(self, spider): """ 开始时 更新对应的版本号和开始时间 :param spider: :return: """ logging.warning('开始 spider[%s] start', spider.name) # 根据每个spider的name属性获取SCAC名称 SCAC = self._get_scac(spider) # 静态的不更新 if 'STATIC' in spider.name: return # 新增更新操作 sql = "SELECT max(VERSION_NUMBER) as max_version from new_schedules_dynamic where SCAC='%s'" version = CommonDao.native_query(sql % (SCAC))[0].get('max_version') if version is None or version < 0: version = 0 old = CommonDao.get(NewSchedulesTaskVersion, SCAC=SCAC) if old: from RCL.model.basic import db_session old.VERSION = version old.START_TIME = DateTimeUtils.now() db_session.commit() else: nstv = NewSchedulesTaskVersion() nstv.SCAC = SCAC nstv.VERSION = version nstv.START_TIME = DateTimeUtils.now() CommonDao.add_one_normal(nstv)
def close_spider(self, spider): """ 关闭时 更新对应的结束时间 :param spider: :return: """ log.warning('结束 spider[%s] end', spider.name) SCAC = self._get_scac(spider) if 'static' in spider.name.lower(): return old = CommonDao.get(NewSchedulesTaskVersion, SCAC=SCAC) if old: from RCL.model.basic import db_session old.END_TIME = DateTimeUtils.now() db_session.commit() log.info('MysqlPipeline spider[%s] ended', spider.name)
def native_update(cls, sql, params={}, commit=True): """ 原生sql 更新 新增 删除 :param sql: :param params: 字典 :param commit: 是否提交 :return: 影响行数 """ res = None try: cls._check_params(sql, params) sql = sql.strip() stmt = text(sql) res = db_session.execute( stmt, params) if params else db_session.execute(stmt) if commit: db_session.commit() except Exception as e: db_session.rollback() return res.rowcount if commit and res else 0
def _handle_group_item_v2(self, item, spider): """ 处理 动态船期数据 核心item入库 较复杂 二版本 改动较大 :param item: :param spider: :return: """ log.info('收到group_item 开始处理') try: log.info('查询静态航线') scac = self._get_scac(spider) boolean_none = self._boolean_none if scac == 'MATS': item['ROUTE_CODE'] = 'UNDEFINED' else: item['ROUTE_CODE'] = self._getFirstRangeRouteCode(item, 0) route_code = item.get('ROUTE_CODE') ssql = """ SELECT ID FROM new_schedules_static WHERE FIND_IN_SET( REPLACE(TRIM('%s'),' ',''), CONCAT_WS(',',REPLACE(TRIM(ROUTE_CODE),' ',''), REPLACE(TRIM(MY_ROUTE_CODE),' ',''),REPLACE(TRIM(ROUTE_NAME_EN),' ',''))) AND SCAC = '%s' ORDER BY CREATE_TIME ASC LIMIT 1 """ query_list = CommonDao.native_query(ssql % (item['ROUTE_CODE'], scac)) if len(query_list) > 0: result = CommonDao.native_query( ssql % (item['ROUTE_CODE'], scac))[0].get('ID') else: result = None main_id = '' if result: # 查到对应关系 直接主表id赋值 main_id = result else: # 如果匹配不上静态航线code # 生成主键 mdd = '%s,%s,%s,%s' % (scac.upper(), "NULL", "NULL", item['ROUTE_CODE']) main_id = EncrptUtils.md5_str(mdd) # 根据动态生成静态航线 insert_main_sql = """ INSERT INTO new_schedules_static ( ID,SCAC,ROUTE_CODE,FLAG) SELECT '%s','%s','%s','%s' FROM DUAL WHERE NOT EXISTS ( SELECT * FROM new_schedules_static s WHERE s.SCAC='%s' and s.ROUTE_PARENT IS NULL and s.ROUTE_NAME_EN IS NULL and s.ROUTE_CODE='%s') """ # 插入静态船期主表 CommonDao.native_update(insert_main_sql % ((main_id, scac, item['ROUTE_CODE'], 1, scac, item['ROUTE_CODE']))) start_name = item['polName'] end_name = item['podName'] log.info('获取组合数据id') # 可以优化 存在内存中 port_res = CommonDao.get(NewSchedulesSpiderPort, DEL_FLAG=0, START_PORT=start_name, END_PORT=end_name, SCAC=scac) if port_res is None: log.error(spider.name) log.error('error port_res is none start_name %s end_name %s', start_name, end_name) log.error('item is %s', item) return insert_rel_sql_key = '%s,%s,%s' % (scac, port_res.ID, main_id) insert_rel_sql_key = EncrptUtils.md5_str(insert_rel_sql_key) relation_id = insert_rel_sql_key log.info('写入静态航线和动态航线关联关系') insert_rel_sql = """ insert into new_schedules_static_p2p values('%s','%s','%d','%s') on duplicate key update ID=values(ID) """ % (relation_id, scac, port_res.ID, main_id) CommonDao.native_update(sql=insert_rel_sql) log.info('记录船名船次信息') now_time_str = DateTimeUtils.now().strftime('%Y-%m-%d %H:%M:%S') support_vessl_sql_key = '%s,%s,%s,%s' % ( relation_id, item['VESSEL'], item['VOYAGE'], boolean_none(route_code)) support_vessl_sql_key = EncrptUtils.md5_str(support_vessl_sql_key) vessl_sql = """ insert into new_schedules_support_vessel(ID,RELATION_ID,VESSEL,VOYAGE,DYNAMIC_ROUTE_CODE,UPDATE_TIME) values ('%s','%s','%s','%s','%s','%s') on duplicate key update UPDATE_TIME=values(UPDATE_TIME) """ % (support_vessl_sql_key, relation_id, item['VESSEL'], item['VOYAGE'], boolean_none(route_code), now_time_str) CommonDao.native_update(vessl_sql) log.info('录船名船次信息成功') from RCL.model.basic import db_session # transit_id = EncrptUtils.md5_str(str(item['TRANSIT_LIST'])) transitIdList = [] for transitInfo in item['TRANSIT_LIST']: transitIdList.append({ "TRANSIT_ROUTE_CODE": boolean_none(transitInfo.get('TRANSIT_ROUTE_CODE')), "TRANSIT_PORT_EN": boolean_none(transitInfo.get('TRANSIT_PORT_EN')), "TRANSIT_PORT_CODE": boolean_none(transitInfo.get('TRANSIT_PORT_CODE')), "TRANSIT_VESSEL": boolean_none(transitInfo.get('TRANS_VESSEL')), "TRANSIT_VOYAGE": boolean_none(transitInfo.get('TRANS_VOYAGE')), }) # 生成中转关联id transit_id = EncrptUtils.md5_str( str(transitIdList) + support_vessl_sql_key + main_id) log.info('写入动态数据') # new_schedules_dynamic_key = self._get_indenty(item) mdd = '%s,%s,%s,%s,%s' % \ (scac.upper(), support_vessl_sql_key, self._covert_time(item['ETD']) if boolean_none(item['ETD']) else None, self._covert_time(item['ETA']) if boolean_none(item['ETA']) else None, item['IS_TRANSIT'] if item['IS_TRANSIT'] and item['IS_TRANSIT'] != 'None' else 0) new_schedules_dynamic_key = EncrptUtils.md5_str(mdd) nsd = NewSchedulesDynamic() dynamic_res = CommonDao.get(NewSchedulesDynamic, ID=new_schedules_dynamic_key, DEL_FLAG=0) if dynamic_res is None: nsd.ID = new_schedules_dynamic_key nsd.SCAC = scac nsd.VESSEL_RELATION_ID = support_vessl_sql_key nsd.TRANSIT_ID = transit_id nsd.POD_TERMINAL = item.get('POD_TERMINAL') if item.get( 'POD_TERMINAL') else None nsd.POL_TERMINAL = item.get('POL_TERMINAL') if item.get( 'POL_TERMINAL') else None nsd.ETA = self._covert_time(item['ETA']) nsd.ETD = self._covert_time(item['ETD']) nsd.IS_TRANSIT = str(item['IS_TRANSIT']) nsd.TRANSIT_TIME = item['TRANSIT_TIME'] CommonDao.add_one_normal(nsd) log.info('写入动态数据成功') else: dynamic_res.UPDATE_TIME = DateTimeUtils.now() dynamic_res.VERSION_NUMBER = dynamic_res.VERSION_NUMBER + 1 dynamic_res.POD_TERMINAL = item.get( 'POD_TERMINAL') if item.get('POD_TERMINAL') else None dynamic_res.POL_TERMINAL = item.get( 'POL_TERMINAL') if item.get('POL_TERMINAL') else None dynamic_res.ETA = self._covert_time(item['ETA']) dynamic_res.ETD = self._covert_time(item['ETD']) dynamic_res.IS_TRANSIT = str(item['IS_TRANSIT']) dynamic_res.TRANSIT_TIME = item['TRANSIT_TIME'] db_session.commit() log.info('重复 dynamic_res 更新成功') # 序号库里默认从1开始 此处保持一致 for index, transit_info in enumerate(item['TRANSIT_LIST'], start=1): try: log.info('写入中转数据') # transit_key = '%s,%s,%s,%s' % (transit_id, transit_info['TRANSIT_PORT_EN'], # transit_info['TRANS_VESSEL'], # transit_info['TRANS_VOYAGE'], # ) transit_key = "%s,%s,%s,%s" % ( transit_id, boolean_none(transit_info.get('TRANSIT_PORT_EN')), boolean_none(transit_info.get('TRANSIT_PORT_CODE')), boolean_none(transit_info.get('TRANSIT_ROUTE_CODE'))) transit_key = EncrptUtils.md5_str(transit_key) dynamic_trainst = CommonDao.get(NewSchedulesDynamicTransit, ID=transit_key, DEL_FLAG=0) if dynamic_trainst: dynamic_trainst.UPDATE_TIME = DateTimeUtils.now() dynamic_trainst.TRANSIT_SORT = index dynamic_trainst.TRANSIT_ROUTE_CODE = transit_info.get( 'TRANSIT_ROUTE_CODE', None) dynamic_trainst.TRANSIT_VOYAGE = transit_info[ 'TRANS_VOYAGE'] dynamic_trainst.TRANSIT_VESSEL = transit_info[ 'TRANS_VESSEL'] db_session.commit() log.info('重复 dynamic_trainst 更新成功') continue nddt = NewSchedulesDynamicTransit() nddt.ID = transit_key nddt.TRANSIT_SORT = index nddt.TRANSIT_ID = transit_id nddt.TRANSIT_ROUTE_CODE = transit_info.get( 'TRANSIT_ROUTE_CODE', None) nddt.TRANSIT_PORT_EN = transit_info['TRANSIT_PORT_EN'] nddt.TRANSIT_VESSEL = transit_info['TRANS_VESSEL'] nddt.TRANSIT_VOYAGE = transit_info['TRANS_VOYAGE'] CommonDao.add_one_normal(nddt) log.info('写入中转数据成功') except Exception as e: traceback.format_exc() log.error(spider.name) log.error("处理group_item[%s] 出错e[%s]", traceback.format_exc()) log.error("添加中转数据错误 item[%s]出错e[%s]", str(transit_info), e) log.info('写入挂靠港口数据') docking_res_1 = CommonDao.check_repaet(NewSchedulesStaticDocking, STATIC_ID=main_id, DEL_FLAG=0, PORT=item['podName']) if docking_res_1 <= 0 and int(item['IS_TRANSIT']) == 0: nssd = NewSchedulesStaticDocking() nssd.STATIC_ID = main_id nssd.PORT = item['podName'] nssd.IS_POL = 0 nssd.PORT_CODE = item['pod'] nssd.ETD = self._covert_time2weekday(item['ETD']) nssd.ETA = self._covert_time2weekday(item['ETA']) nssd.FLAG = 1 nssd.TRANSIT_TIME = int( self._covert_value(item['TRANSIT_TIME'])) nssd.IS_TRANSI = item['IS_TRANSIT'] CommonDao.add_one_normal(nssd) log.info('写入挂靠港口数据成功') docking_res_2 = CommonDao.check_repaet(NewSchedulesStaticDocking, STATIC_ID=main_id, PORT=item['polName']) log.info('写入挂靠港[%s]', item['IS_TRANSIT']) if docking_res_2 <= 0 and int(item['IS_TRANSIT']) == 0: nssd = NewSchedulesStaticDocking() nssd.STATIC_ID = main_id nssd.PORT = item['polName'] nssd.IS_POL = 1 nssd.FLAG = 1 nssd.PORT_CODE = item['pol'] nssd.ETD = self._covert_time2weekday(item['ETD']) nssd.ETA = self._covert_time2weekday(item['ETA']) nssd.TRANSIT_TIME = int( self._covert_value(item['TRANSIT_TIME'])) nssd.IS_TRANSI = item['IS_TRANSIT'] CommonDao.add_one_normal(nssd) log.info('写入挂靠港口数据成功') except Exception as e: traceback.format_exc() log.error(spider.name) log.error("处理group_item[%s] 出错e[%s]", traceback.format_exc()) log.error("处理group_item[%s] 出错e[%s]", str(item), e)
def add_all_normal(cls, datas): try: db_session.add_all(datas) db_session.commit() except Exception as e: db_session.rollback()
def add_one_normal(cls, data): try: db_session.add(data) db_session.commit() except Exception as e: db_session.rollback()
def update_one(cls, data): try: db_session.commit() except Exception as e: db_session.rollback()