def _logging_service(): expected_name = 'test' logging_service = LoggingService(expected_name) assert expected_name == logging_service.name assert default_logging_format == logging_service.logging_format assert default_level == logging_service.level assert logging_service._logger is None return logging_service
from ingestor.readers.read_tweet_json import read_tweet_json from ingestor.senders.send_event_hub_message import send_event_hub_message, stop_client from ingestor.utils.env import env from ingestor.utils.logging_service import LoggingService logger = LoggingService('Send tweet.json').logger PROFILE = env('PROFILE_NAME') message_type = 'tweet' data_delimiter = '|~|' def build_message(profile: str, message_type: str, event_id: str, message: str, *, delimiter: str = data_delimiter): structured_message = \ f'profile:{profile}{delimiter}' \ f'type:{message_type}{delimiter}' \ f'id:{event_id}{delimiter}' \ f'message:{message}' return structured_message if __name__ == "__main__": tweets = read_tweet_json()
from ingestor.utils.logging_service import LoggingService from tests.integration import build_firebase_connection_for_tests logger = LoggingService('Write Firebase').logger def test_write(): db, id_token = build_firebase_connection_for_tests() # create archer = {"name": "Sterling Archer", "agency": "Figgis Agency"} db.child("agents").push(archer, id_token) # set lana = {"name": "Lana Kane", "agency": "Figgis Agency"} db.child("agents").child("Lana").set(lana, id_token) # read all all_agents = db.child("agents").get(id_token).val() assert len(all_agents) >= 2 # read one lana_data = db.child("agents").child("Lana").get(id_token).val() assert "Lana Kane" == lana_data['name'] # update db.child("agents").child("Lana").update({"name": "Lana Anthony Kane"}, id_token) new_lana_data = db.child("agents").child("Lana").get(id_token).val() assert 'Lana Anthony Kane' == new_lana_data['name']
import operator from ingestor.extractors.extract_hashtags_from_tweets_list import extract_hashtags from ingestor.readers.read_tweet_json import read_tweet_json from ingestor.utils.logging_service import LoggingService logger = LoggingService('Read tweet.json').logger def _add_to_aggregate(aggregate_dict: dict, key: str): aggregate_dict[key] = aggregate_dict[key] + 1 if __name__ == "__main__": tweets = read_tweet_json() print(tweets) hashtags = extract_hashtags(tweets) hashtags_sorted = sorted(hashtags.items(), key=operator.itemgetter(1), reverse=True) print(hashtags_sorted)
import time from azure.eventhub import EventHubClient, Offset from ingestor.builders.build_firebase import get_firebase_auth_and_db from ingestor.daos.last_received_event_offset import LastReceivedEventOffset from ingestor.retrievers.event_hub_retriever import EventHubRetriever from ingestor.utils.env import env from ingestor.utils.logging_service import LoggingService logger = LoggingService('Receive').logger CONNECTION_STRING = env('EVENT_HUB_CONNECTION_STRING') EVENT_HUB_TOPIC = env('EVENT_HUB_TOPIC_HELLO_WORLD_NAME') EVENT_HUB_PARTITION = env('EVENT_HUB_TOPIC_HELLO_WORLD_PARTITION') CONSUMER_GROUP = "$default" PREFETCH = 1 OFFSET = Offset('1', inclusive=True) USER = env('FIREBASE_AUTH_USER') PASSWORD = env('FIREBASE_AUTH_PASSWORD') def receive_one(): auth, db = get_firebase_auth_and_db() user = auth.sign_in_with_email_and_password(USER, PASSWORD) id_token = user['idToken'] last_received_event_offset = LastReceivedEventOffset(db, id_token)
import time from azure.eventhub import EventData from ingestor.builders.build_sender_client import build_sender_client from ingestor.utils.logging_service import LoggingService logger = LoggingService('send_event_hub_message').logger client = None sender = None def send_event_hub_message(message: str): global client global sender if not client or not sender: client, sender = build_sender_client() logger.info(f"Sending tweet message: {message}") start_time = time.time() sender.send(EventData(message)) end_time = time.time() run_time = end_time - start_time logger.info("Runtime: {} seconds".format(run_time)) def stop_client():
from collections import defaultdict from ingestor.utils.env import env from ingestor.utils.logging_service import LoggingService logger = LoggingService('extract_hashtags_from_tweets_list').logger profile = env('PROFILE_NAME') def extract_hashtags(tweets: list) -> dict: hashtags_found = defaultdict(int) for tweet in tweets: tweet_hashtags = tweet['entities']['hashtags'] [_add_to_aggregate(hashtags_found, tweet_hashtag['text'].lower()) for tweet_hashtag in tweet_hashtags] return hashtags_found def _add_to_aggregate(aggregate_dict: dict, key: str): aggregate_dict[key] = aggregate_dict[key] + 1
from ingestor.utils.env import env from ingestor.utils.logging_service import LoggingService from ingestor.utils.read_json import read_json logger = LoggingService('read_tweet_json').logger PROFILE = env('PROFILE_NAME') data_path = './data' tweet_json_file_name = 'tweet.json' def read_tweet_json(profile_to_read: str = PROFILE) -> list: tweet_json_profile_data_path = f'{data_path}/{profile_to_read}/{tweet_json_file_name}' logger.info(f"Reading tweet file: {tweet_json_profile_data_path}") return read_json(tweet_json_profile_data_path)
from azure.eventhub import EventHubClient from ingestor.utils.env import env from ingestor.utils.logging_service import LoggingService logger = LoggingService('build_sender_client').logger CONNECTION_STRING = env('EVENT_HUB_CONNECTION_STRING') EVENT_HUB_TOPIC = env('EVENT_HUB_TOPIC_HELLO_WORLD_NAME') EVENT_HUB_PARTITION = env('EVENT_HUB_TOPIC_HELLO_WORLD_PARTITION') def build_sender_client(*, topic: str = EVENT_HUB_TOPIC, partition: str = EVENT_HUB_PARTITION) -> tuple: if not CONNECTION_STRING: raise ValueError("No EventHubs URL supplied.") client = EventHubClient.from_connection_string(CONNECTION_STRING, topic) sender = client.add_sender(partition) client.run() return client, sender