Esempio n. 1
0
def processMatchId(match_id):
    stage_start_time = pytz.utc.localize(datetime.now())

    startTime = datetime.now()
    match_data = client.query(
        q.get(
            q.ref(
                q.collection(COLLECTION_NAME),
                match_id
            )
        )
    )
    logging.info(log_with_process_name(PROCESS_NAME, f'Fetching record: {datetime.now() - startTime}'))

    match_data = match_data['data']

    if not preProcessData(match_data):
        return

    startTime = datetime.now()
    count = processMatchCounter()

    startTime = datetime.now()
    processAggregates(match_data, count)

    match = createMatchDocument(match_data)

    startTime = datetime.now()
    processHeroInformation(match_data)

    startTime = datetime.now()
    processTemporalHeroInformation(match_data)

    startTime = datetime.now()
    processHeroPairInformation(match_data)

    startTime = datetime.now()
    processMatchPredictor(match_data)

    addProvenance(match, match_data, stage_start_time)

    try:
        client.query(
            q.create(
                q.ref(
                    q.collection('matches'), match_data['result']['match_id']
                ),
                {"data": match}
            )
        )
    except Exception as e:
        logging.error(log_with_process_name(PROCESS_NAME, str(e) + ' for ' + str(match_data['result']['match_id'])))
def getMatchDetails(matchID, process_name, key, collection_name,
                    stage_start_time):
    try:
        startTime = pytz.utc.localize(datetime.now())
        response = requests.get(GET_MATCH_DETAILS,
                                params={
                                    'match_id': matchID,
                                    'key': key
                                })
        endTime = pytz.utc.localize(datetime.now())

        if response.status_code == 200:
            try:
                responseJson = response.json()

                writeDataToFile(responseJson)
                addProvenance(responseJson, startTime, endTime, process_name,
                              stage_start_time)
                writeDataToDatabase(responseJson, matchID, process_name,
                                    collection_name)

                publishMatchIdToQueue(process_name, matchID)

                logging.info(
                    log_with_process_name(
                        process_name,
                        f'Successfully written match details for match {matchID}'
                    ))

            except ValueError as v:
                logging.error(
                    log_with_process_name(
                        process_name, f'Decoding JSON has failed: {str(v)}'))
        else:
            logging.error(
                log_with_process_name(
                    process_name,
                    f'Response status code: {response.status_code} for matchID: {matchID}'
                ))

            data = matchID
            publisher.publish(topic_path, data=data.encode('utf-8'))

        return response.status_code
    except Exception as e:
        logging.error(
            log_with_process_name(process_name, f'Error occurred {str(e)}'))
    return
def writeDataToDatabase(responseJson, matchID, process_name, collection_name):
    try:
        client.query(
            q.create(q.ref(q.collection(collection_name), matchID),
                     {"data": responseJson}))
    except Exception as e:
        if str(e) != 'Document already exists':
            data = matchID
            publisher.publish(topic_path, data=data.encode('utf-8'))
    logging.debug(
        log_with_process_name(process_name,
                              f'Added matchID {matchID} to database'))
Esempio n. 4
0
from google.cloud import pubsub_v1
from faunadb import query as q
from faunadb.objects import Ref
from faunadb.client import FaunaClient

# System Argument ProcesName is needed - Name of process for provenance
PROCESS_NAME = sys.argv[1]
# PROCESS_NAME = 'test-sid'

# System Argument Collection is needed - To decide which collection to persist to
COLLECTION_NAME = sys.argv[2]
# COLLECTION_NAME = 'matches_raw'

logging.basicConfig(filename=LOG_ROOT + 'data_processor.log', level=logging.DEBUG,
                    format='%(levelname)s:%(asctime)s %(message)s')
logging.info(log_with_process_name(PROCESS_NAME, 'Started'))

subscriber = pubsub_v1.SubscriberClient()
subscription_path = subscriber.subscription_path(PROJECT_ID, DATA_SUBSCRIPTION_NAME)

client = FaunaClient(secret="secret", domain=DATABASE_URL, scheme="http", port="8443")


def processMatchId(match_id):
    stage_start_time = pytz.utc.localize(datetime.now())

    startTime = datetime.now()
    match_data = client.query(
        q.get(
            q.ref(
                q.collection(COLLECTION_NAME),
Esempio n. 5
0
from datetime import datetime
from google.cloud import pubsub_v1

# System Argument ProcesName is needed - Name of process for provenance
PROCESS_NAME = sys.argv[1]
# System Argument Key is needed - To decide which key to use
KEY = sys.argv[2]

# System Argument Collection is needed - To decide which collection to persist to
collection_name = sys.argv[3]

logging.basicConfig(filename=LOG_ROOT + 'detail_fetcher.log',
                    level=logging.DEBUG,
                    format='%(levelname)s:%(asctime)s %(message)s')
logging.info(log_with_process_name(PROCESS_NAME, 'Started'))

subscriber = pubsub_v1.SubscriberClient()
subscription_path = subscriber.subscription_path(PROJECT_ID,
                                                 SUBSCRIPTION_NAME + '-new')

while True:
    try:
        logging.info(
            log_with_process_name(PROCESS_NAME,
                                  'Fetching unique match details'))
        response = subscriber.pull(subscription_path,
                                   max_messages=NUM_MESSAGES)

        for message in response.received_messages:
            match_id = message.message.data.decode("utf-8")
def publishMatchIdToQueue(process_name, matchID):
    data = matchID
    publisher.publish(data_topic_path, data=data.encode('utf-8'))
    logging.info(
        log_with_process_name(
            process_name, f'Published match: {data} to data process queue'))