Beispiel #1
0
def detectAndApplyChanges(pdfUrlList):
    tmpmap = {}
    connection.ping(True)
    for pdfUrl in pdfUrlList:
        subjectCode = os.path.basename(unquote(pdfUrl)).split(".")[0].replace(" ", "")
        logger.info("Checking subject: " + subjectCode)
        resultcenter.ping("Scanning " + subjectCode)
        pdfhead = requests.get(pdfUrl)
        hashInput = str(pdfhead.headers['Last-Modified'])
        hash = (hashlib.sha256(hashInput.encode('utf-8')).hexdigest())

        if subjectCode not in subjectCheckSums:
            logger.warn("Unknown subject: " + subjectCode)
            continue

        if subjectCheckSums[subjectCode] == None:
            subjectCheckSums[subjectCode] = hash
            updateSubjectCheckSumRemort(subjectCode, hash, "None")

        if subjectCheckSums[subjectCode] != hash:
            logger.info("Changes detected for " + subjectCode)
            try:
                xmlData = downloader.getXML(pdfUrl)
                jsonData = converter.jsonGenerator(xmlData, pdfUrl)
                if resultcenter.submitDataSet(jsonData):
                    updateSubjectCheckSumRemort(subjectCode, hash, "Update")
                subjectCheckSums[subjectCode] = hash
            except Exception as error:
                print("ERROR" + str(error))
                traceback.print_exc()
    return tmpmap
def getXML(url, manualMode=False):
    if not manualMode:
        resultcenter.ping("Processing result sheet")
    logger.info("Resolving: " + url)
    session = requests.Session()

    pageLoad = session.get('https://www.freefileconvert.com')
    searchObj = re.search(r'( <meta name="csrf-token" content=")(\w{2,})(">)',
                          str(pageLoad.content))

    accessToken = ""
    if searchObj:
        accessToken = searchObj.group()[34:-2]
    else:
        logger.crit("Unable to fetch the access token.")
        raise Exception('Unable to fetch the access token.')

    logger.info("Using token: " + accessToken)
    headers = {
        "Origin": "https://www.freefileconvert.com",
        "Accept-Encoding": "gzip, deflate, br",
        "X-CSRF-TOKEN": accessToken,
        "Accept": "application/json, text/javascript, */*; q=0.01",
        "Referer": "https://www.freefileconvert.com/",
        "X-Requested-With": "XMLHttpRequest",
        "Connection": "keep-alive"
    }
    progressKey = id_generator()
    logger.info("Using progress key: " + progressKey)
    payload = {
        "_token": accessToken,
        "url": url,
        "output_format": "xml",
        "progress_key": progressKey,
    }
    if not manualMode:
        resultcenter.ping("Processing result sheet")
    xmlRequest = session.post('https://www.freefileconvert.com/file/url',
                              data=payload,
                              headers=headers)
    parsedJSON = json.loads("" + xmlRequest.content.strip().decode('utf-8'))
    if (parsedJSON['status'] == "success"):
        fileURL = 'https://www.freefileconvert.com/file/' + parsedJSON[
            'id'] + '/download'
        logger.info("Reading XML: " + fileURL)
        logger.info("Waiting for the PDF -> XML conversion to finish")
        while True:
            if not manualMode:
                resultcenter.ping("Processing result sheet")
            statusResp = session.get("https://www.freefileconvert.com/file/" +
                                     parsedJSON['id'] + "/status",
                                     headers=headers)
            if "Success" in statusResp.content.strip().decode('utf-8'):
                break
            time.sleep(1)
        logger.info("Fetching XML translation")
        xml = session.get(fileURL, headers=headers)
        return xml.content.strip().decode('utf-8')
    else:
        logger.crit("Error occured: " + parsedJSON['error'])
        raise Exception("Error occured: " + parsedJSON['error'])
def detectAndApplyChanges(pdfUrlList):
    global unknownSubjects, zeroSheets
    tmpmap = {}
    connection.ping(True)
    if len(pdfUrlList) == 0:
        logger.warn("There are no result sheets in the UGVLE", zeroSheets == False)
        zeroSheets = True
        return

    zeroSheets = False
    for pdfUrl in pdfUrlList:
        subjectCode = os.path.basename(unquote(pdfUrl)).split(".")[0].replace(" ", "")
        logger.info("Checking subject: " + subjectCode)
        resultcenter.ping("Scanning " + subjectCode)
        pdfhead = None
        try:
            pdfhead = requests.head(pdfUrl)
        except:
            logger.warn("Unable to fetch results sheet using URL: " + pdfUrl, True)

        if (pdfhead.status_code != 200):
            report = pdfhead.status_code not in [503, 404]
            logger.warn("Request to fetch " + pdfUrl + " returned: " + str(pdfhead.status_code), report)
            continue

        hash = GetHashCheckSum(pdfhead.headers['Last-Modified'])
        fileHash = GetHashCheckSum(pdfhead.headers['content-length'] + '_' + subjectCode)

        if subjectCode not in subjectCheckSums:
            if subjectCode not in unknownSubjects:
                logger.warn("Unknown subject detected: " + subjectCode, True)
                unknownSubjects.append(subjectCode)
            else:
                logger.info("Skipping unknown subject: " + subjectCode)
            continue

        if subjectCheckSums[subjectCode] == None:
            subjectCheckSums[subjectCode] = hash
            updateSubjectCheckSumRemort(subjectCode, hash, "None")
            subjectCheckSums[subjectCode] = hash

        if subjectCheckSums[subjectCode] != hash:
            if IsPreviouslyProcessed(subjectCode, fileHash):
                updateSubjectCheckSumRemort(subjectCode, hash, "Checksum Update")
                continue

            logger.info("Changes detected for " + subjectCode)
            try:
                xmlData = downloader.getXML(pdfUrl)
                jsonData = converter.jsonGenerator(xmlData, pdfUrl)
                dataSetId = resultcenter.submitDataSet(jsonData)
                if dataSetId:
                    updateSubjectCheckSumRemort(subjectCode, hash, "Update")
                else:
                    logger.crit("Failed to submit dataset: " + subjectCode, True)
                subjectCheckSums[subjectCode] = hash
                AddAsProcessedFile(subjectCode, fileHash, dataSetId)
            except Exception as error:
                logger.crit(subjectCode + ": " + str(error))
                traceback.print_exc()
    return tmpmap
def fetchFromDB(map):
    logger.info("Fecthing previous checksums from Database")
    with connection.cursor() as cursor:
        sql = "SELECT code, checksum FROM subject;"
        cursor.execute(sql)
        for result in cursor.fetchall():
            map[result['code']] = result['checksum']


logger.info("Starting Monitoring Client")
manualMode = False
zeroSheets = False
stabilizeMode = False
if (len(sys.argv) == 1):
    logger.announceLogFile(True)
    resultcenter.ping("Starting")
else:
    logger.announceLogFile(False)
    if sys.argv[1] == 'stabilize':
        stabilizeMode = True
    else:
        logger.info('Starting in manual mode')
        manualMode = True

connection = pymysql.connect(host=os.environ['AWS_RDB_HOST'],
                                 user=os.environ['AWS_RDB_USERNAME'],
                                 password=os.environ['AWS_RDB_PASSWORD'],
                                 db=os.environ['AWS_RDB_DATABASE'],
                                 charset='utf8mb4',
                                 cursorclass=pymysql.cursors.DictCursor)
if connection.open == False:
Beispiel #5
0
                traceback.print_exc()
    return tmpmap


def fetchFromDB(map):
    logger.info("Fecthing previous checksums from Database")
    with connection.cursor() as cursor:
        sql = "SELECT code, checksum FROM results.subject;"
        cursor.execute(sql)
        for result in cursor.fetchall():
            map[result['code']] = result['checksum']



logger.info("Initializing loadup")
resultcenter.ping("Starting")
connection = pymysql.connect(host=os.environ['AWS_RDB_HOST'],
                                 user=os.environ['AWS_RDB_USERNAME'],
                                 password=os.environ['AWS_RDB_PASSWORD'],
                                 db=os.environ['AWS_RDB_DATABASE'],
                                 charset='utf8mb4',
                                 cursorclass=pymysql.cursors.DictCursor)
if connection.open == False:
    logger.crit("Failed to connect to the database")
    exit(1)
logger.info("Connected to database")
subjectCheckSums = {}
fetchFromDB(subjectCheckSums)
waitTime = os.environ['MONIT_WAIT_TIME']
logger.info("Wait time is: " + waitTime)
itterationNumber = 1