def processMatchId(match_id): stage_start_time = pytz.utc.localize(datetime.now()) startTime = datetime.now() match_data = client.query( q.get( q.ref( q.collection(COLLECTION_NAME), match_id ) ) ) logging.info(log_with_process_name(PROCESS_NAME, f'Fetching record: {datetime.now() - startTime}')) match_data = match_data['data'] if not preProcessData(match_data): return startTime = datetime.now() count = processMatchCounter() startTime = datetime.now() processAggregates(match_data, count) match = createMatchDocument(match_data) startTime = datetime.now() processHeroInformation(match_data) startTime = datetime.now() processTemporalHeroInformation(match_data) startTime = datetime.now() processHeroPairInformation(match_data) startTime = datetime.now() processMatchPredictor(match_data) addProvenance(match, match_data, stage_start_time) try: client.query( q.create( q.ref( q.collection('matches'), match_data['result']['match_id'] ), {"data": match} ) ) except Exception as e: logging.error(log_with_process_name(PROCESS_NAME, str(e) + ' for ' + str(match_data['result']['match_id'])))
def getMatchDetails(matchID, process_name, key, collection_name, stage_start_time): try: startTime = pytz.utc.localize(datetime.now()) response = requests.get(GET_MATCH_DETAILS, params={ 'match_id': matchID, 'key': key }) endTime = pytz.utc.localize(datetime.now()) if response.status_code == 200: try: responseJson = response.json() writeDataToFile(responseJson) addProvenance(responseJson, startTime, endTime, process_name, stage_start_time) writeDataToDatabase(responseJson, matchID, process_name, collection_name) publishMatchIdToQueue(process_name, matchID) logging.info( log_with_process_name( process_name, f'Successfully written match details for match {matchID}' )) except ValueError as v: logging.error( log_with_process_name( process_name, f'Decoding JSON has failed: {str(v)}')) else: logging.error( log_with_process_name( process_name, f'Response status code: {response.status_code} for matchID: {matchID}' )) data = matchID publisher.publish(topic_path, data=data.encode('utf-8')) return response.status_code except Exception as e: logging.error( log_with_process_name(process_name, f'Error occurred {str(e)}')) return
def writeDataToDatabase(responseJson, matchID, process_name, collection_name): try: client.query( q.create(q.ref(q.collection(collection_name), matchID), {"data": responseJson})) except Exception as e: if str(e) != 'Document already exists': data = matchID publisher.publish(topic_path, data=data.encode('utf-8')) logging.debug( log_with_process_name(process_name, f'Added matchID {matchID} to database'))
from google.cloud import pubsub_v1 from faunadb import query as q from faunadb.objects import Ref from faunadb.client import FaunaClient # System Argument ProcesName is needed - Name of process for provenance PROCESS_NAME = sys.argv[1] # PROCESS_NAME = 'test-sid' # System Argument Collection is needed - To decide which collection to persist to COLLECTION_NAME = sys.argv[2] # COLLECTION_NAME = 'matches_raw' logging.basicConfig(filename=LOG_ROOT + 'data_processor.log', level=logging.DEBUG, format='%(levelname)s:%(asctime)s %(message)s') logging.info(log_with_process_name(PROCESS_NAME, 'Started')) subscriber = pubsub_v1.SubscriberClient() subscription_path = subscriber.subscription_path(PROJECT_ID, DATA_SUBSCRIPTION_NAME) client = FaunaClient(secret="secret", domain=DATABASE_URL, scheme="http", port="8443") def processMatchId(match_id): stage_start_time = pytz.utc.localize(datetime.now()) startTime = datetime.now() match_data = client.query( q.get( q.ref( q.collection(COLLECTION_NAME),
from datetime import datetime from google.cloud import pubsub_v1 # System Argument ProcesName is needed - Name of process for provenance PROCESS_NAME = sys.argv[1] # System Argument Key is needed - To decide which key to use KEY = sys.argv[2] # System Argument Collection is needed - To decide which collection to persist to collection_name = sys.argv[3] logging.basicConfig(filename=LOG_ROOT + 'detail_fetcher.log', level=logging.DEBUG, format='%(levelname)s:%(asctime)s %(message)s') logging.info(log_with_process_name(PROCESS_NAME, 'Started')) subscriber = pubsub_v1.SubscriberClient() subscription_path = subscriber.subscription_path(PROJECT_ID, SUBSCRIPTION_NAME + '-new') while True: try: logging.info( log_with_process_name(PROCESS_NAME, 'Fetching unique match details')) response = subscriber.pull(subscription_path, max_messages=NUM_MESSAGES) for message in response.received_messages: match_id = message.message.data.decode("utf-8")
def publishMatchIdToQueue(process_name, matchID): data = matchID publisher.publish(data_topic_path, data=data.encode('utf-8')) logging.info( log_with_process_name( process_name, f'Published match: {data} to data process queue'))