def process_item(self, item, spider): self.taskcode = spider.chainCode if spider.savetype == 'sql': insertItem = { 'taskCode': item['taskCode'], 'updateTime': item['updateTime'] } insertSql = r''' INSERT INTO ''' + DB_SCHEAM + r'''.SPIDER_RESULT_KV_TB(TASK_CODE,RESULT_NAME,RESULT_VALUE,UPDATE_TIME) VALUES ('{taskCode}','{key}','{value}','{updateTime}') ''' if type(item) == JobListItem: columnSql = '' valueSql = "" for itm in range(len(item['values'])): colustr = 'COLUMN_' + str(itm + 1) columnSql += colustr + ',' valueSql += "'{" + colustr + "}'," insertItem[colustr] = item['values'][itm] insertSql = r'''INSERT INTO ''' + DB_SCHEAM + r'''.SPIDER_RESULT_LIST_TB(TASK_CODE,''' + columnSql + '''UPDATE_TIME) VALUES ('{taskCode}',''' + valueSql + ''''{updateTime}') ''' else: insertItem["key"] = item['key'] insertItem["value"] = item['value'] DataConn.executeByConn(self.connect, insertSql, insertItem) return insertItem
def getDiffParamByCode(self, param_code, param_value): execSql = r''' select * from ''' + DB_SCHEAM + r'''.SM_PARAM_TB where PARAM_CODE = '{param_code}'and PARAM_VALUE != '{param_value}' '''.format(param_code=param_code, param_value=param_value) return DataConn.executeQuery(execSql)
def getTaskChainList(self): execSql = r''' select * from ''' + DB_SCHEAM + r'''.SPIDER_TASK_CHAIN_TB where STATUS = '1' ''' return DataConn.executeQuery(execSql)
def insertTaskLog(self,chainCode): execSql = r''' insert into ''' + DB_SCHEAM +r'''.SPIDER_TASK_LOG_TB values('{TASK_CHAIN_CODE}','progress','{UPDATE_TIME}','') ''' currentDate = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) return DataConn.execute(execSql, TASK_CHAIN_CODE=chainCode,UPDATE_TIME=currentDate);
def getTaskChainLevel(self, chainCode): execSql = r''' WITH TASK_TB(LEVE,TASK_CODE, TASK_CHAIN_CODE, BELONG_TASK_CODE)AS ( SELECT 1 , a.TASK_CODE, A.TASK_CHAIN_CODE, A.BELONG_TASK_CODE FROM ''' + DB_SCHEAM + r'''.SPIDER_TASK_TB a WHERE a.TASK_STATUS='1' AND a.BELONG_TASK_CODE IS NULL AND TASK_CHAIN_CODE='{TASK_CHAIN_CODE}' UNION ALL SELECT LEVE+1, b.TASK_CODE, B.TASK_CHAIN_CODE, B.BELONG_TASK_CODE FROM ''' + DB_SCHEAM + r'''.SPIDER_TASK_TB b, TASK_TB a WHERE b.BELONG_TASK_CODE = a.TASK_CODE AND b.TASK_STATUS = '1') SELECT max(leve) FROM TASK_TB where TASK_CHAIN_CODE='{TASK_CHAIN_CODE}' ''' return DataConn.executeQuery(execSql, TASK_CHAIN_CODE=chainCode)
def getTaskChain(self, chainCode): execSql = r''' select * from ''' + DB_SCHEAM + r''' .SPIDER_TASK_CHAIN_TB where STATUS = '1' AND TASK_CHAIN_CODE = '{TASK_CHAIN_CODE}' ''' return DataConn.executeQuery(execSql, TASK_CHAIN_CODE=chainCode)
def selectLog(self,chainCode): execSql = r''' select * from ''' + DB_SCHEAM +r'''.SPIDER_TASK_LOG_TB where TASK_CHAIN_CODE = '{TASK_CHAIN_CODE}' ''' logs = DataConn.executeQuery(execSql, TASK_CHAIN_CODE=chainCode); if len(logs) > 0: return logs[0] return None
def updateTaskLog(self,chainCode,status,reason): execSql = r''' update ''' + DB_SCHEAM +r'''.SPIDER_TASK_LOG_TB set EXEC_STATUS = '{EXEC_STATUS}', UPDATE_TIME = '{UPDATE_TIME}',TASK_REASON='{TASK_REASON}' where TASK_CHAIN_CODE = '{TASK_CHAIN_CODE}' ''' currentDate = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) return DataConn.execute(execSql, TASK_CHAIN_CODE=chainCode,EXEC_STATUS=status,UPDATE_TIME=currentDate,TASK_REASON=reason);
def getTask(self, taskCode): execSql = r''' select * from ''' + DB_SCHEAM + r''' .SPIDER_TASK_TB where TASK_STATUS = '1' and TASK_CODE = '{TASK_CODE}' ''' taskList = DataConn.executeQuery(execSql, TASK_CODE=taskCode) if len(taskList) > 0: return taskList[0] return None
def getLoginTask(self, chainCode): execSql = r''' select * from ''' + DB_SCHEAM + r''' .SPIDER_TASK_TB where TASK_STATUS = '1' and TASK_CHAIN_CODE = '{TASK_CHAIN_CODE}' and TASK_TYPE ='login' ''' taskList = DataConn.executeQuery(execSql, TASK_CHAIN_CODE=chainCode) if len(taskList) > 0: return taskList[0] return None
def getOutputDictByTaskChain(self, chainCode): execSql = r''' SELECT a.MAPPING_CODE, A.MAPPING_TYPE, A.MAPPING_VALUE, A.BELONG_MAPPING_CODE, A.TASK_CODE FROM ''' + DB_SCHEAM + r'''.SPIDER_DICT_MAPPING_TB a LEFT JOIN ''' + DB_SCHEAM + r'''.SPIDER_TASK_TB b ON a.TASK_CODE=b.TASK_CODE where a.INPUT_TYPE = 'out' and b.TASK_CHAIN_CODE = '{TASK_CHAIN_CODE}' ''' return DataConn.executeQuery(execSql, TASK_CHAIN_CODE=chainCode)
def getAllDictByTaskChain(self, chainCode): execSql = r''' SELECT a.MAPPING_CODE, A.MAPPING_TYPE, A.MAPPING_VALUE, A.BELONG_MAPPING_CODE, A.TASK_CODE, A.INPUT_TYPE, EXEC_ORDER FROM ''' + DB_SCHEAM + r'''.SPIDER_DICT_MAPPING_TB a LEFT JOIN ''' + DB_SCHEAM + r'''.SPIDER_TASK_TB b ON a.TASK_CODE=b.TASK_CODE where b.TASK_CHAIN_CODE = '{TASK_CHAIN_CODE}' order by EXEC_ORDER ''' return DataConn.executeQuery(execSql, TASK_CHAIN_CODE=chainCode)
def getInputDictByTaskChain(self, chainCode): execSql = r''' WITH dict_param ( leve, MAPPING_CODE, MAPPING_NAME, MAPPING_TYPE, MAPPING_VALUE, BELONG_MAPPING_CODE, TASK_CODE, INPUT_TYPE, EXEC_ORDER ) AS ( SELECT 1, A.MAPPING_CODE, A.MAPPING_NAME, A.MAPPING_TYPE, A.MAPPING_VALUE, A.BELONG_MAPPING_CODE, A.TASK_CODE, A.INPUT_TYPE, A.EXEC_ORDER FROM ''' + DB_SCHEAM + r'''.SPIDER_DICT_MAPPING_TB a LEFT JOIN ''' + DB_SCHEAM + r'''.SPIDER_TASK_TB b ON a.TASK_CODE=b.TASK_CODE WHERE a.INPUT_TYPE IN ('in','click') AND a.BELONG_MAPPING_CODE IS NULL AND b.TASK_CHAIN_CODE ='{TASK_CHAIN_CODE}' UNION ALL SELECT a.leve+1, b.MAPPING_CODE, b.MAPPING_NAME, b.MAPPING_TYPE, b.MAPPING_VALUE, b.BELONG_MAPPING_CODE, b.TASK_CODE, b.INPUT_TYPE, b.EXEC_ORDER FROM dict_param a , ''' + DB_SCHEAM + r'''.SPIDER_DICT_MAPPING_TB b WHERE b.INPUT_TYPE IN ('in','click') AND b.BELONG_MAPPING_CODE=a.MAPPING_CODE and a.TASK_CODE = b.TASK_CODE ) SELECT b.leve, b.MAPPING_CODE, B.MAPPING_NAME, B.MAPPING_TYPE, B.MAPPING_VALUE, B.BELONG_MAPPING_CODE, B.TASK_CODE, B.INPUT_TYPE, b.EXEC_ORDER FROM dict_param b ORDER BY leve DESC,b.EXEC_ORDER ''' return DataConn.executeQuery(execSql, TASK_CHAIN_CODE=chainCode)
def __init__(self): logging.info('spider start write......................') self.connect = DataConn.getConnect() self.taskcode = '' pass
def getTaskByChain(self, chainCode): execSql = r''' WITH TASK_TB(LEVE,TASK_CODE, TASK_NAME, TASK_STATUS, TASK_TYPE, TASK_CHAIN_CODE, BELONG_TASK_CODE, FORMAT_TYPE, FETCH_URL, UPLOAD_FALG, HEADER_CONTEXT, LOGIN_SUCESS_FLAG, REQUEST_METHOD, RESPONSE_METHOD, UPDATOR_CODE, UPDATE_TIME)AS ( SELECT 1 , a.TASK_CODE, A.TASK_NAME, A.TASK_STATUS, A.TASK_TYPE, A.TASK_CHAIN_CODE, A.BELONG_TASK_CODE, A.FORMAT_TYPE, A.FETCH_URL, A.UPLOAD_FALG, A.HEADER_CONTEXT, A.LOGIN_SUCESS_FLAG, A.REQUEST_METHOD, A.RESPONSE_METHOD, A.UPDATOR_CODE, A.UPDATE_TIME FROM ''' + DB_SCHEAM + r'''.SPIDER_TASK_TB a WHERE a.TASK_STATUS='1' AND a.BELONG_TASK_CODE IS NULL and a.TASK_CHAIN_CODE = '{TASK_CHAIN_CODE}' UNION ALL SELECT LEVE+1, b.TASK_CODE, B.TASK_NAME, B.TASK_STATUS, B.TASK_TYPE, B.TASK_CHAIN_CODE, B.BELONG_TASK_CODE, B.FORMAT_TYPE, B.FETCH_URL, B.UPLOAD_FALG, B.HEADER_CONTEXT, B.LOGIN_SUCESS_FLAG, B.REQUEST_METHOD, B.RESPONSE_METHOD, B.UPDATOR_CODE, B.UPDATE_TIME FROM ''' + DB_SCHEAM + r'''.SPIDER_TASK_TB b, TASK_TB a WHERE b.BELONG_TASK_CODE = a.TASK_CODE AND b.TASK_STATUS = '1') SELECT * FROM TASK_TB where TASK_CHAIN_CODE='{TASK_CHAIN_CODE}' ''' return DataConn.executeQuery(execSql, TASK_CHAIN_CODE=chainCode)