Example #1
0
def main():
    # Login PodiumApp & start REST request session
    print "Starting REST session ..."
    s = utils.startRestSession(PodiumApp)

    # Pprocess start
    starttime = time.time()

    # Sample call#0: Create source/entity metadata using an FDL
    entityinfo = utils.createEntityMeta(PodiumApp, s, dirToFDL)
    print "-------------Succesfully created new entity-----------"
    print entityinfo

    # Sample call#1: Get entities for the provided source id
    entities = utils.getEntityObjectList(PodiumApp, s, SourceId)
    print "------------entities for source with id of %d---------" % SourceId
    print entities

    # Sample call#2: Get all external sources
    print "------------All external Sources---------"
    extSources = utils.getAllExternalSources(PodiumApp, s)
    print extSources

    # Sample call#3: Get all external entities
    print "------------All external entities---------"
    extEntities = utils.getAllExternalEntities(PodiumApp, s)
    print extEntities

    # Sample call#4: Load data for entity with a given id via an asynchornous call
    print "------------Kicking off load data for entity with id %d---------" % externalEntityId
    utils.loadEntity(PodiumApp, s, externalEntityId)

    # Sample call#5: Poll to check for load to finish
    while not utils.checkLoadFinished(
            utils.getLoadLogs(PodiumApp, s, externalEntityId)):
        time.sleep(10)

    # Sample call#6: Export data for provided entity id
    print "------------Exporting entity data---------"
    utils.exportEntityData(PodiumApp, s, internalEntityId)

    # Pprocess End
    endtime = time.time()

    # Calculate Total Runtime:
    print "\nTotal Pprocessing Time: %f seconds" % (endtime - starttime)
Example #2
0
def main():
    # Login PodiumApp & start REST request session
    print "Starting REST session ..."
    s = utils.startRestSession(PodiumApp)

    # Pprocess start
    starttime = time.time()

    # Sample call#0: Create source/entity metadata using an FDL
    entityinfo = utils.createEntityMeta(PodiumApp, s, dirToFDL)
    print "-------------Succesfully created new entity-----------"
    print entityinfo

    # Sample call#1: Get entities for the provided source id
    entities = utils.getEntityObjectList(PodiumApp,s,SourceId)
    print "------------entities for source with id of %d---------" % SourceId
    print entities

    # Sample call#2: Get all external sources
    print "------------All external Sources---------"
    extSources = utils.getAllExternalSources(PodiumApp,s)
    print extSources

    # Sample call#3: Get all external entities
    print "------------All external entities---------"
    extEntities = utils.getAllExternalEntities(PodiumApp,s)
    print extEntities

    # Sample call#4: Load data for entity with a given id via an asynchornous call
    print "------------Kicking off load data for entity with id %d---------" %externalEntityId
    utils.loadEntity(PodiumApp, s, externalEntityId)

    # Sample call#5: Poll to check for load to finish
    while not utils.checkLoadFinished(utils.getLoadLogs(PodiumApp,s,externalEntityId)):
        time.sleep(10)

    # Sample call#6: Export data for provided entity id
    print "------------Exporting entity data---------"
    utils.exportEntityData(PodiumApp, s, internalEntityId)

    # Pprocess End
    endtime = time.time()

    # Calculate Total Runtime:
    print "\nTotal Pprocessing Time: %f seconds" % (endtime - starttime)
Example #3
0
def main():
    # Login PodiumApp & start REST request session
    print "Starting REST session ..."
    s = utils.startRestSession(PodiumApp)

    # Get S3 bucket handler
    print "Getting S3 bucket handler ..."
    bucket = utils.getS3Bucket()

    # Get initial S3 data list under /omniture/gannett
    currentDataList = utils.getS3DataList(bucket)

    # Get current data list state
    currentState = utils.getGannettDataInfo(
        currentDataList[len(currentDataList) - 1])

    #lastState = dict(day='2015-08-25', hour='19', file='AAM_CDF_1244_000000_0.gz')

    # Get entity ID for AAMSourceId
    entityId = utils.getEntityId(
        utils.getEntityObjectList(PodiumApp, s, AAMSourceId))

    # Get entity IDs for AAMPrepSourceId
    entityList = utils.getEntityIdList(
        utils.getEntityObjectList(PodiumApp, s, AAMPrepSourceId))

    while True:
        # Assign last state with current state every loop
        lastState = currentState

        # Update current S3 data list under /omniture/gannett
        utils.printStepHeader(1, "Update S3 data file list")
        currentDataList = utils.getS3DataList(bucket)

        # Update current data list state
        currentState = utils.getGannettDataInfo(
            currentDataList[len(currentDataList) - 1])

        # Test if current state still equals to last state
        if utils.testIfNeedLoad(lastState, currentState):
            # Preprocess start
            starttime = time.time()

            # Get entity's old properties
            utils.printStepHeader(2, "Retrieve old props")
            oldProp = utils.getEntityProps(
                utils.getEntityInfo(PodiumApp, s, entityId))

            # Generate NEW properties with last state
            utils.printStepHeader(3, "Generate new props")
            newProp = utils.changeEntityPropByName(
                oldProp, "src.file.glob", utils.genSrcFileGlob(lastState))

            # Update entity properties
            utils.printStepHeader(4, "Update entity props")
            utils.updateEntityProp(PodiumApp, s, entityId, newProp)

            # Load data for entity Adobe_Audience_Manager
            utils.printStepHeader(5, "Load data for entity")
            utils.updateEntity(PodiumApp, s, entityId)

            # Check if load finished
            utils.printStepHeader(6, "Wait for entity loading finished")
            while not \
                utils.checkLoadFinishedForEntity(
                    utils.getLoadLogsForEntity(PodiumApp,s,entityId)):
                time.sleep(EntityLoadCheckPolling)
            print "Loading data for entity finished"

            # Execute Hive Script
            utils.printStepHeader(7, "Execute Hive Scripts")
            print " --> Hive step 1 ..."
            os.system("hive -f step1.sql")

            print " --> Hive step 2 ..."
            os.system("hive -f step2.sql")

            print " --> Hive step 3 ..."
            os.system("hive -f step3.sql")

            print " --> Hive step 4 ..."
            os.system("hive -f step4.sql")

            print " --> Hive step 5 ..."
            os.system("hive -f step5.sql")

            # Load data for entities
            utils.printStepHeader(8, "Load data for entities")
            utils.updateEntities(PodiumApp, s, entityList)

            # Check if load finished
            utils.printStepHeader(9, "Wait for entities loading finished")
            while not \
                utils.checkLoadFinishedForEntities(
                        utils.getLoadLogsForEntities(PodiumApp,s,entityList)):
                time.sleep(EntityLoadCheckPolling)
            print "Loading data for entities finished"

            # Preprocess End
            endtime = time.time()

            # Calculate Total Runtime:
            print "\nTotal Preprocessing Time: %f seconds" % \
                  (endtime - starttime)

        else:
            print "%s: No new data added" % datetime.now()
            print "Waiting for next checking cycle ..."
            time.sleep(NewDataCheckPolling)
Example #4
0
def main():
    # Login PodiumApp & start REST request session
    print "Starting REST session ..."
    s = utils.startRestSession(PodiumApp)

    # Get S3 bucket handler
    print "Getting S3 bucket handler ..."
    bucket = utils.getS3Bucket()

    # Get initial S3 data list under /omniture/gannett
    currentDataList = utils.getS3DataList(bucket)

    # Get current data list state
    currentState = utils.getGannettDataInfo(
        currentDataList[len(currentDataList)-1])

    #lastState = dict(day='2015-08-25', hour='19', file='AAM_CDF_1244_000000_0.gz')

    # Get entity ID for AAMSourceId
    entityId = utils.getEntityId(
        utils.getEntityObjectList(PodiumApp,s,AAMSourceId))

    # Get entity IDs for AAMPrepSourceId
    entityList  = utils.getEntityIdList(
        utils.getEntityObjectList(PodiumApp,s,AAMPrepSourceId))

    while True:
        # Assign last state with current state every loop
        lastState = currentState

        # Update current S3 data list under /omniture/gannett
        utils.printStepHeader(1, "Update S3 data file list")
        currentDataList = utils.getS3DataList(bucket)

        # Update current data list state
        currentState = utils.getGannettDataInfo(
            currentDataList[len(currentDataList)-1])

        # Test if current state still equals to last state
        if utils.testIfNeedLoad(lastState, currentState):
            # Preprocess start
            starttime = time.time()

            # Get entity's old properties
            utils.printStepHeader(2, "Retrieve old props")
            oldProp = utils.getEntityProps(
                utils.getEntityInfo(PodiumApp,s,entityId))

            # Generate NEW properties with last state
            utils.printStepHeader(3, "Generate new props")
            newProp = utils.changeEntityPropByName(
                oldProp,
                "src.file.glob",
                utils.genSrcFileGlob(lastState) )

            # Update entity properties
            utils.printStepHeader(4, "Update entity props")
            utils.updateEntityProp(PodiumApp, s, entityId, newProp)

            # Load data for entity Adobe_Audience_Manager
            utils.printStepHeader(5, "Load data for entity")
            utils.updateEntity(PodiumApp, s, entityId)

            # Check if load finished
            utils.printStepHeader(6, "Wait for entity loading finished")
            while not \
                utils.checkLoadFinishedForEntity(
                    utils.getLoadLogsForEntity(PodiumApp,s,entityId)):
                time.sleep(EntityLoadCheckPolling)
            print "Loading data for entity finished"

            # Execute Hive Script
            utils.printStepHeader(7, "Execute Hive Scripts")
            print " --> Hive step 1 ..."
            os.system("hive -f step1.sql")

            print " --> Hive step 2 ..."
            os.system("hive -f step2.sql")

            print " --> Hive step 3 ..."
            os.system("hive -f step3.sql")

            print " --> Hive step 4 ..."
            os.system("hive -f step4.sql")

            print " --> Hive step 5 ..."
            os.system("hive -f step5.sql")

            # Load data for entities
            utils.printStepHeader(8, "Load data for entities")
            utils.updateEntities(PodiumApp, s, entityList)

            # Check if load finished
            utils.printStepHeader(9, "Wait for entities loading finished")
            while not \
                utils.checkLoadFinishedForEntities(
                        utils.getLoadLogsForEntities(PodiumApp,s,entityList)):
                time.sleep(EntityLoadCheckPolling)
            print "Loading data for entities finished"

            # Preprocess End
            endtime = time.time()

            # Calculate Total Runtime:
            print "\nTotal Preprocessing Time: %f seconds" % \
                  (endtime - starttime)

        else:
            print "%s: No new data added" % datetime.now()
            print "Waiting for next checking cycle ..."
            time.sleep(NewDataCheckPolling)