def getNewRequests(requestURL, Host): import requests import Public.PublicFun as PublicFun request = requests.Session() JobID = PublicFun.createID() Chromedriver = PublicFun.getWebDriver("chrome", DataFolderName=JobID) Chromedriver.get(requestURL) cookies = Chromedriver.get_cookies() userAgent = Chromedriver.execute_script("return navigator.userAgent") PublicFun.closeWebDriver(JobID, Chromedriver) header = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7', 'Connection': 'keep-alive', 'DNT': '1', 'Host': Host, 'Upgrade-Insecure-Requests': '1', 'User-Agent': userAgent } request.headers.update(header) for cookie in cookies: request.cookies.set(cookie['name'], cookie['value']) return request
def addQueue(SystemName, QueueType, Path, Files, Param): import Public.PublicFun as PublicFun import json GUID = PublicFun.createID() #str(Param) jsonData = Param if type(Param) is not str: jsonData = json.dumps(Param, separators=(',', ':')) dbcon = getQueueDBConnect() if checkQueue(SystemName, QueueType, jsonData, dbcon): retry = 0 while True: try: if retry > 10: break else: D_INSERTTIME = PublicFun.getNowDateTime( "YYYY/MM/DD HH:MM:SS") sql = ( "INSERT INTO [dbo].[JobQueue]([GUID],[SystemName],[QueueType],[Path],[Files],[Param],[D_INSERTTIME])" + "VALUES(?,?,?,?,?,?,?)") dbcon.Execute(sql, (GUID, SystemName, QueueType, Path, Files, jsonData, D_INSERTTIME)) break except Exception as ex: #若是寫入失敗, 則重試, 最多試10次 retry += 1 print("Retry:" + str(retry)) dbcon.close()
def insertMappingList(dbcon, MapType, Value, RelValue): import Public.PublicFun as PublicFun GUID = PublicFun.createID() sql = ( "INSERT INTO [dbo].[MAPAAA]([GUID],[MAPAAA001],[MAPAAA002],[MAPAAA003],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])" + "VALUES(?,?,?,?,'System',?,'','')") dbcon.Execute(sql, (GUID, MapType, Value, RelValue, PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"))) return GUID
def getPageSize(requestURL): JobID=PublicFun.createID() Chromedriver=PublicFun.getWebDriver("chrome",DataFolderName=JobID) Chromedriver.get(requestURL) time.sleep(2) Soup = bs(Chromedriver.page_source, "html.parser") pageSize=len(Soup.select("select.b-clear-border.js-paging-select.gtm-paging-top option")) if pageSize==0: pageSize=len(Soup.select("select.page-select.js-paging-select.gtm-paging-top option")) PublicFun.closeWebDriver(JobID,Chromedriver) return pageSize
def writeDBMsg(msg): import Public.PublicFun as PublicFun import Public.LogHandler as LogHandler dbcon = getQueueDBConnect() sql = ("INSERT INTO [dbo].[LogMsg]([GUID],[Message],[D_INSERTTIME])" + "VALUES(?,?,?)") try: dbcon.Execute(sql, (PublicFun.createID(), PublicFun.SQLFilter(msg), PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"))) except: msg = "writeDBMsg失敗:" + msg LogHandler.writeMsg(msg) dbcon.close()
def insertOption(dbcon, OptionName, OptionCode, OptionType, RelGUID=None): import Public.PublicFun as PublicFun GUID = PublicFun.createID() sql = ( "INSERT INTO [dbo].[OPTAAA]([GUID],[OPTAAA001],[OPTAAA002],[OPTAAA003],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])" + "VALUES(?,?,?,?,'System',?,'','')") dbcon.Execute(sql, (GUID, OptionName, OptionCode, OptionType, PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"))) if RelGUID is not None: sql = ( "INSERT INTO [dbo].[OPTAAB]([GUID],[OPTAAB001],[OPTAAB002],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])" + "VALUES(?,?,?,'System',?,'','')") dbcon.Execute(sql, (PublicFun.createID(), RelGUID, GUID, PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"))) return GUID
def save_company_info(db_connection, company_info): print(company_info) company_guid = BIASDataIO.CheckCompanyMappingList( db_connection, company_info["factory_name"], NewCompanyGUID=False) if company_guid == "": company_guid = PublicFun.createID() try: companys = Engine.Query(db_connection, Companys.Companys(), "GUID=?", company_guid) except Exception as message: logger.logger.error(message) if companys.GUID == "": companys.GUID = company_guid companys.D_INSERTUSER = "******" else: companys.D_MODIFYUSER = "******" companys.Companys003 = company_info.get("factory_name", '') companys.Companys005 = company_info.get("factory_address", '') if not companys.Companys010 or "暫不提供" in companys.Companys010: companys.Companys010 = company_info.get("factory_phone", '') try: Engine.UpdateData(db_connection, companys) except Exception as message: logger.logger.error(message) return company_guid
def findOption(dbcon, OptionName): import Public.PublicFun as PublicFun sql = ("SELECT GUID FROM OPTAAA WHERE OPTAAA001=N'" + PublicFun.SQLFilter(OptionName) + "'") OptionGUID = dbcon.GetDataTable(sql) if OptionGUID is not None and len(OptionGUID) > 0: return OptionGUID[0].GUID return ""
def insertOption(dbcon, OptionName, RelGUID=None): import Public.PublicFun as PublicFun GUID = PublicFun.createID() sql = ( "INSERT INTO [dbo].[OPTAAA]([GUID],[OPTAAA001],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])" + "VALUES('" + GUID + "',N'" + PublicFun.SQLFilter(OptionName) + "','System','" + PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") + "','','')") dbcon.Execute(sql) if RelGUID is not None: sql = ( "INSERT INTO [dbo].[OPTAAB]([GUID],[OPTAAB001],[OPTAAB002],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])" + "VALUES('" + PublicFun.createID() + "','" + RelGUID + "','" + GUID + "','System','" + PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") + "','','')") dbcon.Execute(sql) return GUID
def findCompany(dbcon, CompanysName): import Public.PublicFun as PublicFun sql = ("SELECT GUID FROM Companys WHERE Companys003=N'" + PublicFun.SQLFilter(CompanysName) + "'") CompanysGUID = dbcon.GetDataTable(sql) if CompanysGUID is not None and len(CompanysGUID) > 0: return CompanysGUID[0].GUID return ""
def CheckMappingList(dbcon, MapType, Value): import Public.PublicFun as PublicFun if (MapType == "CompanyName"): sql = ("SELECT TOP 1 MAPAAA003 FROM MAPAAA WHERE MAPAAA001='" + MapType + "' AND MAPAAA002=N'" + PublicFun.SQLFilter(Value) + "'") else: sql = ("SELECT TOP 1 MAPAAA003 FROM MAPAAA WHERE MAPAAA001='" + MapType + "' AND MAPAAA002=N'" + Value + "'") return dbcon.GetDataTable(sql)
def CheckCompanyMappingList(dbcon, CompanyName, CompanyGUID=None): import Public.PublicFun as PublicFun MAPCompanyGUID = CheckMappingList(dbcon, "CompanyName", CompanyName) if MAPCompanyGUID is None or len(MAPCompanyGUID) == 0: if CompanyGUID is None: CompanyGUID = findCompany(dbcon, CompanyName) if (CompanyGUID is None or CompanyGUID == ""): CompanyGUID = PublicFun.createID() insertMappingList(dbcon, "CompanyName", CompanyName, CompanyGUID) else: CompanyGUID = MAPCompanyGUID[0].MAPAAA003 return CompanyGUID
def save_product_info(db_connection, company_guid, product_info, file_name): product_guid = check_product_info(db_connection, company_guid, product_info["product_name"]) print(product_info) if not product_guid: insert_sql = ( "INSERT INTO CompanyProduct(GUID, CompanyProduct001,CompanyProduct002,CompanyProduct003,CompanyProduct004," "CompanyProduct005,CompanyProduct006, CompanyProduct007, CompanyProduct008, D_INSERTUSER, D_INSERTTIME)" " VALUES (?,?,?,?,?,?,?,?,?,?,?)") try: db_connection.Execute( insert_sql, (PublicFun.createID(), company_guid, "Momo", product_info.get("product_name", ''), product_info.get("product_format", ''), product_info.get("other_info", ''), product_info.get("product_place", ''), product_info.get("brand_name", ''), file_name, "MomoCrawler", PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"))) logger.logger.info("Insert") print(file_name) except Exception as message: logger.logger.error(message) pass else: update_sql = "UPDATE CompanyProduct set CompanyProduct003=?,CompanyProduct004=?,CompanyProduct005=?,CompanyProduct006=?,CompanyProduct007=?,CompanyProduct008=?, D_MODIFYUSER=?, D_MODIFYTIME=? WHERE GUID = ?" try: db_connection.Execute( update_sql, (product_info.get("product_name", ''), product_info.get("product_format", ''), product_info.get("other_info", ''), product_info.get("product_place", ''), product_info.get("brand_name", ''), file_name, "MomoCrawler", PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"), product_guid)) # logger.logger.info("Update") except Exception as message: logger.logger.error(message) pass
def writeDBMsg(JobName, Param, msg, dbcon=None): jsonData = Param if type(jsonData) is not str: jsonData = json.dumps(Param, separators=(',', ':')) if dbcon is None: dbcon = SQLConnect.DBConnect(secName="QueueConnect", publicSetting=True) dbcon.ConnectDB() writeDBMsg(JobName, jsonData, msg, dbcon) dbcon.close() else: try: sql = ( "INSERT INTO [dbo].[LogMsg]([GUID],[JOB],[Param],[Message],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])" + "VALUES(?, ?, ?, ?, ?, ?, ?, ?)") dbcon.Execute( sql, (PublicFun.createID(), JobName, jsonData, msg, JobName, PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"), "", "")) writeMsg(str(msg)) except Exception as ex: writeMsg("寫入資料庫失敗:" + ex)
def Query(dbcon, DataObject, WhereClause="", parameter=None): import Public.PublicFun as PublicFun sql = DataObject.QueryStr if WhereClause is not None and WhereClause != "": WhereClause = " WHERE " + WhereClause excuteSQL = sql + WhereClause FindRow = dbcon.GetDataTable(excuteSQL, parameter) if FindRow is not None and len(FindRow) > 0: DataObject.DataRow = FindRow[0] else: DataObject.DataRow = [""] * len(DataObject.Fields) DataObject.TimeStamp = PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") return DataObject
def UpdateData(dbcon,DataObject): import Public.PublicFun as PublicFun sql=DataObject.QueryStr WhereClause=" WHERE 1=1 " for key in DataObject.KeyFields: WhereClause=WhereClause+" AND "+key+ "='"+DataObject.getData(key)+"'" excuteSQL=sql+WhereClause FindRow = dbcon.GetDataTable(excuteSQL) excuteSQL="" NowTime=PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") if FindRow is not None and len(FindRow)>0: FindRow=FindRow[0] DataObject.D_MODIFYTIME=NowTime if DataObject.CheckTimeStamp: DBTime=FindRow.D_MODIFYTIME if DBTime is None or DBTime =="": DBTime=FindRow.D_INSERTTIME if DBTime >DataObject.TimeStamp: raise Exception("The data has been updated by others!") excuteSQL="UPDATE "+DataObject.TableName+" SET " strField="" for Field in DataObject.Fields: strField=strField+Field+"=N'"+PublicFun.SQLFilter(DataObject.getData(Field))+"'," excuteSQL = excuteSQL+strField.rstrip(',') + WhereClause else: DataObject.D_INSERTTIME=NowTime excuteSQL="INSERT INTO "+DataObject.TableName strField="" strValue="" for Field in DataObject.Fields: strField=strField+Field+"," strValue=strValue+"N'"+PublicFun.SQLFilter(DataObject.getData(Field))+"'," excuteSQL=excuteSQL+"("+strField.rstrip(',')+")VALUES("+strValue.rstrip(',')+")" dbcon.Execute(excuteSQL) return True
def save_product_info(db_connection, company_guid, product_info): """新增產品資訊 """ product_guid = check_product_info(db_connection, company_guid, product_info["NAME"]) if len(product_guid) == 0: logger.info("Insert") insert_sql = ( "INSERT INTO CompanyProduct(GUID, CompanyProduct001,CompanyProduct002,CompanyProduct003,CompanyProduct004,CompanyProduct005,CompanyProduct006, D_INSERTUSER, D_INSERTTIME)" " VALUES (?,?,?,?,?,?,?,?,?)") db_connection.Execute( insert_sql, (PublicFun.createID(), company_guid, "Costco", product_info["NAME"], product_info["SPEC"], product_info["INGREDIENT"], product_info["ORIGIN"], "CostcoCrawler", PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"))) else: logger.info("Update") update_sql = "UPDATE CompanyProduct set D_MODIFYUSER = ?, D_MODIFYTIME = ? WHERE GUID = ?" db_connection.Execute( update_sql, ("CostcoCrawler", PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"), product_guid))
def addQueue(SystemName, QueueType, Path, Files, Param): import Public.PublicFun as PublicFun GUID = PublicFun.createID() jsonData = str(Param) D_INSERTTIME = PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") dbcon = getQueueDBConnect() sql = ( "INSERT INTO [dbo].[JobQueue]([GUID],[SystemName],[QueueType],[Path],[Files],[Param],[D_INSERTTIME])" + "VALUES('" + GUID + "',N'" + PublicFun.SQLFilter(SystemName) + "',N'" + PublicFun.SQLFilter(QueueType) + "',N'" + PublicFun.SQLFilter(Path) + "',N'" + PublicFun.SQLFilter(str(Files)) + "',N'" + PublicFun.SQLFilter(str(jsonData)) + "','" + str(D_INSERTTIME) + "')") dbcon.Execute(sql) dbcon.close()
def save_company_info(db_connection, company_info): """新增或更新公司資訊 """ company_guid = BIASDataIO.CheckCompanyMappingList(db_connection, company_info["NAME"], NewCompanyGUID=False) if company_guid == "": company_guid = PublicFun.createID() companys = Engine.Query(db_connection, Companys.Companys(), "GUID=?", (company_guid, )) if companys.GUID == "": companys.GUID = company_guid companys.D_INSERTUSER = "******" else: companys.D_MODIFYUSER = "******" companys.Companys003 = company_info["NAME"] companys.Companys005 = company_info["ADDRESS"] if companys.Companys010 == None or len( companys.Companys010) == 0 or "暫不提供" in companys.Companys010: companys.Companys010 = company_info["TEL"] Engine.UpdateData(db_connection, companys) return company_guid
def writeDBMsg(JobName, Param, msg, dbcon=None): import Public.PublicFun as PublicFun if dbcon is None: dbcon = SQLConnect.DBConnect(publicSetting=True) dbcon.ConnectDB() writeDBMsg(JobName, Param, msg, dbcon) dbcon.close() else: try: sql = ( "INSERT INTO [dbo].[LogMsg]([GUID],[JOB],[Param],[Message],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])" + "VALUES('" + PublicFun.createID() + "',N'" + PublicFun.SQLFilter(JobName) + "',N'" + PublicFun.SQLFilter(str(Param)) + "',N'" + PublicFun.SQLFilter(str(msg)) + "',N'" + PublicFun.SQLFilter(JobName) + "','" + PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") + "','','')") dbcon.Execute(sql) writeMsg(str(msg)) except Exception as ex: writeMsg("寫入資料庫失敗:" + ex)
def getPhoneNumber(PhoneNumberurl): #Regex格式驗證 import re text_re = re.compile('Text=') queryString = PhoneNumberurl.split("&") for param in queryString: if len(text_re.findall(param)) > 0: return param[5:] if __name__ == '__main__': jsondata = "" try: JobID = sys.argv[1] data = sys.argv[2] jsondata = PublicFun.StringToJson(data) companyURL = str(jsondata["companyURL"]) companyName = str(jsondata["companyName"]) DBConnect = SQLConnect.DBConnect(publicSetting=True) DBConnect.ConnectDB() CompanyGUID = BIASDataIO.CheckCompanyMappingList( DBConnect, companyName) companyInFo = Engine.Query(DBConnect, Companys.Companys(), "GUID='" + CompanyGUID + "'") if companyInFo.GUID == "": companyInFo.GUID = CompanyGUID companyInFo.D_INSERTUSER = "******" else:
from selenium.webdriver.common.action_chains import ActionChains import Public.PublicFun as PublicFun import Public.SQLConnect as SQLConnect import Public.BIASDataIO as BIASDataIO DBConnect=None JobID="" try: JobID=sys.argv[1] DBConnect=SQLConnect.DBConnect(publicSetting=True) DBConnect.ConnectDB() DBConnect.StartTransaction() Chromedriver=PublicFun.getWebDriver("chrome",DataFolderName=JobID) Chromedriver.get("https://www.104.com.tw/jobs/search/") #找到職務類別的按鈕並點擊 CategoryListButton=Chromedriver.find_element_by_id("job-cat") ActionChains(Chromedriver).click(CategoryListButton).perform() time.sleep(5) e104menu=Chromedriver.find_element_by_id("e104menu2011_main") e104menuCount = len(bs(e104menu.get_attribute('innerHTML'), "html.parser").select("ul li")) #逐項取得內容 for ClassACount in range(e104menuCount): #移開選取項目 tempElement=Chromedriver.find_element_by_id("globalbar") ActionChains(Chromedriver).move_to_element(tempElement).perform()
import Public.PublicFun as PublicFun import Public.RequestsHandler as RequestsHandler import Public.SettingReader as SettingReader import Public.QueueIO as QueueIO import Public.LogHandler as LogHandler import Public.SQLConnect as SQLConnect import Public.Engine as Engine import Model.JOB as JOB if __name__ == '__main__': jsondata="" try: JobID=sys.argv[1] data=sys.argv[2] jsondata = PublicFun.StringToJson(data) CompanyGUID=str(jsondata["CompanyGUID"]) JOBAAA009=str(jsondata["JOBAAA009"]) requestHost=SettingReader.getSetting("global","requestHost") DBConnect=SQLConnect.DBConnect(publicSetting=True) DBConnect.ConnectDB() # sql = "select GUID,JOBAAA009 from JOBAAA with(nolock) where JOBAAA029 != 'Y'" # dt = DBConnect.GetDataTable(sql) if(len(JOBAAA009) >0): req=RequestsHandler.getNewRequests(str(JOBAAA009),requestHost) # for rows in dt: if (len(str(JOBAAA009)) > 0 ): res = req.get(str(JOBAAA009))
#Regex格式驗證 import re text_re = re.compile('Text=') queryString = PhoneNumberurl.split("&") for param in queryString: if len(text_re.findall(param)) > 0: return param[5:] if __name__ == '__main__': jsondata = "" try: JobID = sys.argv[1] data = sys.argv[2] jsondata = PublicFun.StringToJson(data) jobdate = str(jsondata["date"]) jobName = PublicFun.SQLFilter(str(jsondata["jobName"])) jobURL = str(jsondata["jobURL"]) jobAREA = str(jsondata["jobAREA"]) companyInfo = jsondata["companyInfo"] try: companyName = PublicFun.SQLFilter(str(companyInfo["companyName"])) except Exception as ex: companyInfo = PublicFun.StringToJson(companyInfo, Default=False) companyName = PublicFun.SQLFilter(str(companyInfo["companyName"])) ScanDate = str(jsondata["ScanDate"]) DBConnect = SQLConnect.DBConnect(publicSetting=True) DBConnect.ConnectDB()
def updateOption(dbcon, jobCategoryGuid, OptionCode): import Public.PublicFun as PublicFun sql = ("UPDATE OPTAAA SET OPTAAA002=?, D_MODIFYTIME=? WHERE GUID = ?") dbcon.Execute(sql, (OptionCode, PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"), jobCategoryGuid))