Esempio n. 1
0
def getNewRequests(requestURL, Host):
    import requests
    import Public.PublicFun as PublicFun
    request = requests.Session()

    JobID = PublicFun.createID()
    Chromedriver = PublicFun.getWebDriver("chrome", DataFolderName=JobID)
    Chromedriver.get(requestURL)
    cookies = Chromedriver.get_cookies()
    userAgent = Chromedriver.execute_script("return navigator.userAgent")
    PublicFun.closeWebDriver(JobID, Chromedriver)

    header = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
        'Connection': 'keep-alive',
        'DNT': '1',
        'Host': Host,
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': userAgent
    }
    request.headers.update(header)

    for cookie in cookies:
        request.cookies.set(cookie['name'], cookie['value'])

    return request
Esempio n. 2
0
def addQueue(SystemName, QueueType, Path, Files, Param):
    import Public.PublicFun as PublicFun
    import json
    GUID = PublicFun.createID()
    #str(Param)
    jsonData = Param
    if type(Param) is not str:
        jsonData = json.dumps(Param, separators=(',', ':'))
    dbcon = getQueueDBConnect()
    if checkQueue(SystemName, QueueType, jsonData, dbcon):
        retry = 0
        while True:
            try:
                if retry > 10:
                    break
                else:
                    D_INSERTTIME = PublicFun.getNowDateTime(
                        "YYYY/MM/DD HH:MM:SS")
                    sql = (
                        "INSERT INTO [dbo].[JobQueue]([GUID],[SystemName],[QueueType],[Path],[Files],[Param],[D_INSERTTIME])"
                        + "VALUES(?,?,?,?,?,?,?)")
                    dbcon.Execute(sql, (GUID, SystemName, QueueType, Path,
                                        Files, jsonData, D_INSERTTIME))
                    break
            except Exception as ex:
                #若是寫入失敗, 則重試, 最多試10次
                retry += 1
                print("Retry:" + str(retry))
    dbcon.close()
Esempio n. 3
0
def insertMappingList(dbcon, MapType, Value, RelValue):
    import Public.PublicFun as PublicFun
    GUID = PublicFun.createID()
    sql = (
        "INSERT INTO [dbo].[MAPAAA]([GUID],[MAPAAA001],[MAPAAA002],[MAPAAA003],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
        + "VALUES(?,?,?,?,'System',?,'','')")
    dbcon.Execute(sql, (GUID, MapType, Value, RelValue,
                        PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
    return GUID
Esempio n. 4
0
def getPageSize(requestURL):
    JobID=PublicFun.createID()
    Chromedriver=PublicFun.getWebDriver("chrome",DataFolderName=JobID)
    Chromedriver.get(requestURL)
    
    time.sleep(2)
    Soup = bs(Chromedriver.page_source, "html.parser")
    pageSize=len(Soup.select("select.b-clear-border.js-paging-select.gtm-paging-top option"))
    if pageSize==0:
        pageSize=len(Soup.select("select.page-select.js-paging-select.gtm-paging-top option"))
    PublicFun.closeWebDriver(JobID,Chromedriver)
    return pageSize
Esempio n. 5
0
def writeDBMsg(msg):
    import Public.PublicFun as PublicFun
    import Public.LogHandler as LogHandler

    dbcon = getQueueDBConnect()
    sql = ("INSERT INTO [dbo].[LogMsg]([GUID],[Message],[D_INSERTTIME])" +
           "VALUES(?,?,?)")
    try:
        dbcon.Execute(sql, (PublicFun.createID(), PublicFun.SQLFilter(msg),
                            PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
    except:
        msg = "writeDBMsg失敗:" + msg
    LogHandler.writeMsg(msg)
    dbcon.close()
Esempio n. 6
0
def insertOption(dbcon, OptionName, OptionCode, OptionType, RelGUID=None):
    import Public.PublicFun as PublicFun
    GUID = PublicFun.createID()
    sql = (
        "INSERT INTO [dbo].[OPTAAA]([GUID],[OPTAAA001],[OPTAAA002],[OPTAAA003],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
        + "VALUES(?,?,?,?,'System',?,'','')")
    dbcon.Execute(sql, (GUID, OptionName, OptionCode, OptionType,
                        PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
    if RelGUID is not None:
        sql = (
            "INSERT INTO [dbo].[OPTAAB]([GUID],[OPTAAB001],[OPTAAB002],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
            + "VALUES(?,?,?,'System',?,'','')")
        dbcon.Execute(sql, (PublicFun.createID(), RelGUID, GUID,
                            PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
    return GUID
Esempio n. 7
0
def save_company_info(db_connection, company_info):
    print(company_info)
    company_guid = BIASDataIO.CheckCompanyMappingList(
        db_connection, company_info["factory_name"], NewCompanyGUID=False)
    if company_guid == "":
        company_guid = PublicFun.createID()
    try:
        companys = Engine.Query(db_connection, Companys.Companys(), "GUID=?",
                                company_guid)
    except Exception as message:
        logger.logger.error(message)

    if companys.GUID == "":
        companys.GUID = company_guid
        companys.D_INSERTUSER = "******"
    else:
        companys.D_MODIFYUSER = "******"
    companys.Companys003 = company_info.get("factory_name", '')
    companys.Companys005 = company_info.get("factory_address", '')
    if not companys.Companys010 or "暫不提供" in companys.Companys010:
        companys.Companys010 = company_info.get("factory_phone", '')
    try:
        Engine.UpdateData(db_connection, companys)
    except Exception as message:
        logger.logger.error(message)
    return company_guid
Esempio n. 8
0
def findOption(dbcon, OptionName):
    import Public.PublicFun as PublicFun
    sql = ("SELECT GUID FROM OPTAAA WHERE OPTAAA001=N'" +
           PublicFun.SQLFilter(OptionName) + "'")
    OptionGUID = dbcon.GetDataTable(sql)
    if OptionGUID is not None and len(OptionGUID) > 0:
        return OptionGUID[0].GUID
    return ""
Esempio n. 9
0
def insertOption(dbcon, OptionName, RelGUID=None):
    import Public.PublicFun as PublicFun
    GUID = PublicFun.createID()
    sql = (
        "INSERT INTO [dbo].[OPTAAA]([GUID],[OPTAAA001],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
        + "VALUES('" + GUID + "',N'" + PublicFun.SQLFilter(OptionName) +
        "','System','" + PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") +
        "','','')")
    dbcon.Execute(sql)
    if RelGUID is not None:
        sql = (
            "INSERT INTO [dbo].[OPTAAB]([GUID],[OPTAAB001],[OPTAAB002],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
            + "VALUES('" + PublicFun.createID() + "','" + RelGUID + "','" +
            GUID + "','System','" +
            PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") + "','','')")
        dbcon.Execute(sql)
    return GUID
Esempio n. 10
0
def findCompany(dbcon, CompanysName):
    import Public.PublicFun as PublicFun
    sql = ("SELECT GUID FROM Companys WHERE Companys003=N'" +
           PublicFun.SQLFilter(CompanysName) + "'")
    CompanysGUID = dbcon.GetDataTable(sql)
    if CompanysGUID is not None and len(CompanysGUID) > 0:
        return CompanysGUID[0].GUID
    return ""
Esempio n. 11
0
def CheckMappingList(dbcon, MapType, Value):
    import Public.PublicFun as PublicFun
    if (MapType == "CompanyName"):
        sql = ("SELECT TOP 1 MAPAAA003 FROM MAPAAA WHERE MAPAAA001='" +
               MapType + "' AND MAPAAA002=N'" + PublicFun.SQLFilter(Value) +
               "'")
    else:
        sql = ("SELECT TOP 1 MAPAAA003 FROM MAPAAA WHERE MAPAAA001='" +
               MapType + "' AND MAPAAA002=N'" + Value + "'")
    return dbcon.GetDataTable(sql)
Esempio n. 12
0
def CheckCompanyMappingList(dbcon, CompanyName, CompanyGUID=None):
    import Public.PublicFun as PublicFun
    MAPCompanyGUID = CheckMappingList(dbcon, "CompanyName", CompanyName)
    if MAPCompanyGUID is None or len(MAPCompanyGUID) == 0:
        if CompanyGUID is None:
            CompanyGUID = findCompany(dbcon, CompanyName)
            if (CompanyGUID is None or CompanyGUID == ""):
                CompanyGUID = PublicFun.createID()
        insertMappingList(dbcon, "CompanyName", CompanyName, CompanyGUID)
    else:
        CompanyGUID = MAPCompanyGUID[0].MAPAAA003
    return CompanyGUID
Esempio n. 13
0
def save_product_info(db_connection, company_guid, product_info, file_name):
    product_guid = check_product_info(db_connection, company_guid,
                                      product_info["product_name"])
    print(product_info)
    if not product_guid:
        insert_sql = (
            "INSERT INTO CompanyProduct(GUID, CompanyProduct001,CompanyProduct002,CompanyProduct003,CompanyProduct004,"
            "CompanyProduct005,CompanyProduct006, CompanyProduct007, CompanyProduct008, D_INSERTUSER, D_INSERTTIME)"
            " VALUES (?,?,?,?,?,?,?,?,?,?,?)")
        try:
            db_connection.Execute(
                insert_sql,
                (PublicFun.createID(), company_guid, "Momo",
                 product_info.get("product_name",
                                  ''), product_info.get("product_format", ''),
                 product_info.get("other_info",
                                  ''), product_info.get("product_place", ''),
                 product_info.get("brand_name", ''), file_name, "MomoCrawler",
                 PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
            logger.logger.info("Insert")
            print(file_name)
        except Exception as message:
            logger.logger.error(message)
            pass
    else:
        update_sql = "UPDATE CompanyProduct set CompanyProduct003=?,CompanyProduct004=?,CompanyProduct005=?,CompanyProduct006=?,CompanyProduct007=?,CompanyProduct008=?, D_MODIFYUSER=?, D_MODIFYTIME=? WHERE GUID = ?"
        try:
            db_connection.Execute(
                update_sql,
                (product_info.get("product_name",
                                  ''), product_info.get("product_format", ''),
                 product_info.get("other_info",
                                  ''), product_info.get("product_place", ''),
                 product_info.get("brand_name", ''), file_name, "MomoCrawler",
                 PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"),
                 product_guid))
            # logger.logger.info("Update")
        except Exception as message:
            logger.logger.error(message)
            pass
Esempio n. 14
0
def writeDBMsg(JobName, Param, msg, dbcon=None):
    jsonData = Param
    if type(jsonData) is not str:
        jsonData = json.dumps(Param, separators=(',', ':'))

    if dbcon is None:
        dbcon = SQLConnect.DBConnect(secName="QueueConnect",
                                     publicSetting=True)
        dbcon.ConnectDB()
        writeDBMsg(JobName, jsonData, msg, dbcon)
        dbcon.close()
    else:
        try:
            sql = (
                "INSERT INTO [dbo].[LogMsg]([GUID],[JOB],[Param],[Message],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
                + "VALUES(?, ?, ?, ?, ?, ?, ?, ?)")
            dbcon.Execute(
                sql, (PublicFun.createID(), JobName, jsonData, msg, JobName,
                      PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"), "", ""))
            writeMsg(str(msg))
        except Exception as ex:
            writeMsg("寫入資料庫失敗:" + ex)
Esempio n. 15
0
def Query(dbcon, DataObject, WhereClause="", parameter=None):
    import Public.PublicFun as PublicFun
    sql = DataObject.QueryStr
    if WhereClause is not None and WhereClause != "":
        WhereClause = " WHERE " + WhereClause
    excuteSQL = sql + WhereClause
    FindRow = dbcon.GetDataTable(excuteSQL, parameter)
    if FindRow is not None and len(FindRow) > 0:
        DataObject.DataRow = FindRow[0]
    else:
        DataObject.DataRow = [""] * len(DataObject.Fields)
    DataObject.TimeStamp = PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")
    return DataObject
Esempio n. 16
0
def UpdateData(dbcon,DataObject):
    import Public.PublicFun as PublicFun
    sql=DataObject.QueryStr
    WhereClause=" WHERE 1=1 "
    for key in DataObject.KeyFields:
        WhereClause=WhereClause+" AND "+key+ "='"+DataObject.getData(key)+"'"

    excuteSQL=sql+WhereClause
    FindRow = dbcon.GetDataTable(excuteSQL)

    excuteSQL=""
    NowTime=PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")
    if FindRow is not None and len(FindRow)>0:
        FindRow=FindRow[0]
        DataObject.D_MODIFYTIME=NowTime
        if DataObject.CheckTimeStamp:
            DBTime=FindRow.D_MODIFYTIME
            if DBTime is None or DBTime =="":
                DBTime=FindRow.D_INSERTTIME
            if DBTime >DataObject.TimeStamp:
                raise Exception("The data has been updated by others!")

        excuteSQL="UPDATE "+DataObject.TableName+" SET "
        strField=""
        for Field in DataObject.Fields:
            strField=strField+Field+"=N'"+PublicFun.SQLFilter(DataObject.getData(Field))+"',"
        excuteSQL = excuteSQL+strField.rstrip(',') + WhereClause
    else:
        DataObject.D_INSERTTIME=NowTime
        excuteSQL="INSERT INTO "+DataObject.TableName 
        strField=""
        strValue=""
        for Field in DataObject.Fields:
            strField=strField+Field+","
            strValue=strValue+"N'"+PublicFun.SQLFilter(DataObject.getData(Field))+"',"
        excuteSQL=excuteSQL+"("+strField.rstrip(',')+")VALUES("+strValue.rstrip(',')+")"
    dbcon.Execute(excuteSQL)
    return True
Esempio n. 17
0
def save_product_info(db_connection, company_guid, product_info):
    """新增產品資訊
    """
    product_guid = check_product_info(db_connection, company_guid,
                                      product_info["NAME"])
    if len(product_guid) == 0:
        logger.info("Insert")
        insert_sql = (
            "INSERT INTO CompanyProduct(GUID, CompanyProduct001,CompanyProduct002,CompanyProduct003,CompanyProduct004,CompanyProduct005,CompanyProduct006, D_INSERTUSER, D_INSERTTIME)"
            " VALUES (?,?,?,?,?,?,?,?,?)")
        db_connection.Execute(
            insert_sql,
            (PublicFun.createID(), company_guid, "Costco",
             product_info["NAME"], product_info["SPEC"],
             product_info["INGREDIENT"], product_info["ORIGIN"],
             "CostcoCrawler", PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
    else:
        logger.info("Update")
        update_sql = "UPDATE CompanyProduct set D_MODIFYUSER = ?, D_MODIFYTIME = ? WHERE GUID = ?"
        db_connection.Execute(
            update_sql,
            ("CostcoCrawler", PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"),
             product_guid))
Esempio n. 18
0
def addQueue(SystemName, QueueType, Path, Files, Param):
    import Public.PublicFun as PublicFun

    GUID = PublicFun.createID()
    jsonData = str(Param)
    D_INSERTTIME = PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")
    dbcon = getQueueDBConnect()
    sql = (
        "INSERT INTO [dbo].[JobQueue]([GUID],[SystemName],[QueueType],[Path],[Files],[Param],[D_INSERTTIME])"
        + "VALUES('" + GUID + "',N'" + PublicFun.SQLFilter(SystemName) +
        "',N'" + PublicFun.SQLFilter(QueueType) + "',N'" +
        PublicFun.SQLFilter(Path) + "',N'" + PublicFun.SQLFilter(str(Files)) +
        "',N'" + PublicFun.SQLFilter(str(jsonData)) + "','" +
        str(D_INSERTTIME) + "')")
    dbcon.Execute(sql)
    dbcon.close()
Esempio n. 19
0
def save_company_info(db_connection, company_info):
    """新增或更新公司資訊
    """
    company_guid = BIASDataIO.CheckCompanyMappingList(db_connection,
                                                      company_info["NAME"],
                                                      NewCompanyGUID=False)
    if company_guid == "":
        company_guid = PublicFun.createID()
    companys = Engine.Query(db_connection, Companys.Companys(), "GUID=?",
                            (company_guid, ))

    if companys.GUID == "":
        companys.GUID = company_guid
        companys.D_INSERTUSER = "******"
    else:
        companys.D_MODIFYUSER = "******"
    companys.Companys003 = company_info["NAME"]
    companys.Companys005 = company_info["ADDRESS"]
    if companys.Companys010 == None or len(
            companys.Companys010) == 0 or "暫不提供" in companys.Companys010:
        companys.Companys010 = company_info["TEL"]
    Engine.UpdateData(db_connection, companys)
    return company_guid
Esempio n. 20
0
def writeDBMsg(JobName, Param, msg, dbcon=None):
    import Public.PublicFun as PublicFun
    if dbcon is None:
        dbcon = SQLConnect.DBConnect(publicSetting=True)
        dbcon.ConnectDB()
        writeDBMsg(JobName, Param, msg, dbcon)
        dbcon.close()
    else:
        try:
            sql = (
                "INSERT INTO [dbo].[LogMsg]([GUID],[JOB],[Param],[Message],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
                + "VALUES('" + PublicFun.createID() + "',N'" +
                PublicFun.SQLFilter(JobName) + "',N'" +
                PublicFun.SQLFilter(str(Param)) + "',N'" +
                PublicFun.SQLFilter(str(msg)) + "',N'" +
                PublicFun.SQLFilter(JobName) + "','" +
                PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") + "','','')")
            dbcon.Execute(sql)
            writeMsg(str(msg))
        except Exception as ex:
            writeMsg("寫入資料庫失敗:" + ex)
Esempio n. 21
0
def getPhoneNumber(PhoneNumberurl):
    #Regex格式驗證
    import re
    text_re = re.compile('Text=')
    queryString = PhoneNumberurl.split("&")
    for param in queryString:
        if len(text_re.findall(param)) > 0:
            return param[5:]


if __name__ == '__main__':
    jsondata = ""
    try:
        JobID = sys.argv[1]
        data = sys.argv[2]
        jsondata = PublicFun.StringToJson(data)
        companyURL = str(jsondata["companyURL"])
        companyName = str(jsondata["companyName"])

        DBConnect = SQLConnect.DBConnect(publicSetting=True)
        DBConnect.ConnectDB()

        CompanyGUID = BIASDataIO.CheckCompanyMappingList(
            DBConnect, companyName)
        companyInFo = Engine.Query(DBConnect, Companys.Companys(),
                                   "GUID='" + CompanyGUID + "'")

        if companyInFo.GUID == "":
            companyInFo.GUID = CompanyGUID
            companyInFo.D_INSERTUSER = "******"
        else:
Esempio n. 22
0
from selenium.webdriver.common.action_chains import ActionChains

import Public.PublicFun as PublicFun
import Public.SQLConnect as SQLConnect
import Public.BIASDataIO as BIASDataIO

DBConnect=None
JobID=""
try:
    JobID=sys.argv[1]
    DBConnect=SQLConnect.DBConnect(publicSetting=True)
    DBConnect.ConnectDB()
    DBConnect.StartTransaction()
    
    Chromedriver=PublicFun.getWebDriver("chrome",DataFolderName=JobID)
    Chromedriver.get("https://www.104.com.tw/jobs/search/")

    #找到職務類別的按鈕並點擊
    CategoryListButton=Chromedriver.find_element_by_id("job-cat")
    ActionChains(Chromedriver).click(CategoryListButton).perform()
    time.sleep(5)

    e104menu=Chromedriver.find_element_by_id("e104menu2011_main")
    e104menuCount = len(bs(e104menu.get_attribute('innerHTML'), "html.parser").select("ul li"))

    #逐項取得內容
    for ClassACount in range(e104menuCount):
        #移開選取項目
        tempElement=Chromedriver.find_element_by_id("globalbar")
        ActionChains(Chromedriver).move_to_element(tempElement).perform()
Esempio n. 23
0
import Public.PublicFun as PublicFun
import Public.RequestsHandler as RequestsHandler
import Public.SettingReader as SettingReader
import Public.QueueIO as QueueIO
import Public.LogHandler as LogHandler
import Public.SQLConnect as SQLConnect
import Public.Engine as Engine

import Model.JOB as JOB

if __name__ == '__main__':
    jsondata=""
    try:
        JobID=sys.argv[1]
        data=sys.argv[2]
        jsondata = PublicFun.StringToJson(data)
        CompanyGUID=str(jsondata["CompanyGUID"])
        JOBAAA009=str(jsondata["JOBAAA009"])

        requestHost=SettingReader.getSetting("global","requestHost")
        
        DBConnect=SQLConnect.DBConnect(publicSetting=True)
        DBConnect.ConnectDB()
        
#        sql = "select GUID,JOBAAA009 from JOBAAA with(nolock) where JOBAAA029 != 'Y'"
#        dt = DBConnect.GetDataTable(sql)
        if(len(JOBAAA009) >0):
            req=RequestsHandler.getNewRequests(str(JOBAAA009),requestHost)
#            for rows in dt:
            if (len(str(JOBAAA009)) > 0 ):
                res = req.get(str(JOBAAA009))
Esempio n. 24
0
    #Regex格式驗證
    import re
    text_re = re.compile('Text=')
    queryString = PhoneNumberurl.split("&")
    for param in queryString:
        if len(text_re.findall(param)) > 0:
            return param[5:]


if __name__ == '__main__':
    jsondata = ""
    try:
        JobID = sys.argv[1]
        data = sys.argv[2]

        jsondata = PublicFun.StringToJson(data)
        jobdate = str(jsondata["date"])
        jobName = PublicFun.SQLFilter(str(jsondata["jobName"]))
        jobURL = str(jsondata["jobURL"])
        jobAREA = str(jsondata["jobAREA"])
        companyInfo = jsondata["companyInfo"]
        try:
            companyName = PublicFun.SQLFilter(str(companyInfo["companyName"]))
        except Exception as ex:
            companyInfo = PublicFun.StringToJson(companyInfo, Default=False)
            companyName = PublicFun.SQLFilter(str(companyInfo["companyName"]))
        ScanDate = str(jsondata["ScanDate"])

        DBConnect = SQLConnect.DBConnect(publicSetting=True)
        DBConnect.ConnectDB()
Esempio n. 25
0
def updateOption(dbcon, jobCategoryGuid, OptionCode):
    import Public.PublicFun as PublicFun
    sql = ("UPDATE OPTAAA SET OPTAAA002=?, D_MODIFYTIME=? WHERE GUID = ?")
    dbcon.Execute(sql,
                  (OptionCode, PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"),
                   jobCategoryGuid))