class HubSender(object): sender = "" client = "" def __init__(self, devenv='doh'): """ Create Event Hub Sender """ if devenv == 'prod': ADDRESS = "amqps://doh-airqual-eventhub.servicebus.windows.net/doh-airqual-event-hub-prod" USER = "******" KEY = "1p1aHCJc5IbamvnzlnvUa2wlvXsaJpSAbAORGlPRaQ4=" elif devenv == "dev": ADDRESS = "amqps://az-doh-airqual-eventhub.servicebus.windows.net/az-doh-airqual-eventhub" USER = "******" KEY = "pfWGtB6obtiUCwwAobAAuaq7B9SZSsnpHY6ArEbeS1A=" self.client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY) self.sender = self.client.add_sender(partition="1") self.client.run() def senddata(self, readings): self.sender.send(EventData(readings)) def stop(self): self.client.stop() def __del__(self): #self.sender.close self.client.stop()
def get(): if request.args.get('since') is None: since = -1 else: since = request.args.get('since') client = EventHubClient(address, debug=False, username=user, password=key) receiver = client.add_receiver(consumergroup, PARTITION, prefetch=1000, offset=Offset(since), keep_alive=72000) client.run() def generate(): batched_events = receiver.receive(max_batch_size=100, timeout=500) yield '[' index = 0 while batched_events: for event_data in batched_events: if index > 0: yield ',' last_sn = event_data.sequence_number data = str(event_data.message) output_entity = literal_eval(data) output_entity.update({"_updated": str(last_sn)}) yield json.dumps(output_entity) index = index + 1 batched_events = receiver.receive(max_batch_size=100, timeout=500) yield ']' return Response(generate(), mimetype='application/json')
def add_receiver(client: EventHubClient, offset: Offset): receiver = client.add_receiver(CONSUMER_GROUP, EVENT_HUB_PARTITION, prefetch=PREFETCH, offset=offset) client.run() return receiver
def cosmosDBServiceToCosmosDB(self): database_link = 'dbs/' + DATABASE_ID collection_link = database_link + '/colls/' + COLLECTION_ID counter = 0 filepath = '' CONSUMER_GROUP = "$Default" OFFSET = Offset("0") PARTITION = "0" eh_client = EventHubClient('amqps://xxxxx.servicebus.windows.net/txxxxqueue', debug=True, username='******', password='******') receiver = eh_client.add_receiver(CONSUMER_GROUP, PARTITION, prefetch=300, offset=OFFSET) try: eh_client.run() while True: for event_data in receiver.receive(timeout=100): rcv_msg = str(event_data.message) # Filter the Null messages if len(rcv_msg)>5: # Load the messages in CosmosDB cosmos_client.CreateDocument(collection_link, json.loads(str(event_data.message))) eh_client.stop() except Exception as e: print("Failed Receiving Record {}".format(str(e)) ) finally: eh_client.stop()
class Consumer: consumer_group = None eventhubs_client = None offset = Offset("-1") redis_cache = None def __init__(self, eventhub, address, user, key, consumer_group, redis_hostname, redis_key): self.consumer_group = consumer_group self.eventhubs_client = EventHubClient(address, debug=False, username=user, password=key) redis_topic = f"eventhubs-{eventhub}-{consumer_group}" self.redis_cache = RedisCache(redis_hostname, redis_key, redis_topic) def recieve(self): OFFSET = Offset(self.redis_cache.get_offset()) receiver = self.eventhubs_client.add_receiver(self.consumer_group, "0", prefetch=5000, offset=OFFSET) self.eventhubs_client.run() messages = receiver.receive(timeout=100) self.eventhubs_client.stop() return messages def commit(self, event_data): self.redis_cache.set_offset(event_data.sequence_number)
def main(req: func.HttpRequest) -> func.HttpResponse: logger = logging.getLogger(__name__) formatter = logging.Formatter( '%(asctime)s %(name)s %(levelname)s: %(message)s') func_context = os.environ['FUNCTION_CONTEXT'] logger.debug(f"Function context --> {func_context}") credentials = None subscription_id = None kv_credentials = None kv_subscription_id = None if func_context == 'local': filehandler = logging.FileHandler('func.log') filehandler.setFormatter(formatter) logger.addHandler(filehandler) logger.setLevel(logging.DEBUG) credentials, subscription_id = get_local_credentials() else: console = logging.StreamHandler() console.setLevel(logging.INFO) console.setFormatter(formatter) credentials, subscription_id = get_azure_credentials() logger.debug('Python HTTP trigger function processed a request.') logger.debug(f"method={req.method}, url={req.url}, params={req.params}") logger.debug(f"body={req.get_json()}") # Handle WebHook webhook = req.get_json() #Create an empty dict within webhook for motsID webhook['additionalData'] = {} addl_data = webhook['additionalData'] addl_data['motsID'] = "0000" # Key Vault stuff. kv_mgmt_client = KeyVaultManagementClient(credentials, subscription_id) kv_client = KeyVaultClient(credentials) namespace = get_kv_secret(kv_client, 'EventHubNamespace') event_hub = get_kv_secret(kv_client, 'EventHub') user = get_kv_secret(kv_client, 'EventHubKeyName') key = get_kv_secret(kv_client, 'EventHubKey') amqp_uri = f"https://{namespace}.servicebus.windows.net/{event_hub}" eh_client = EventHubClient(amqp_uri, debug=False, username=user, password=key) eh_sender = eh_client.add_sender(partition="0") eh_client.run() eh_sender.send(EventData(json.dumps(webhook))) logger.info(f"sending event to {amqp_uri}, {json.dumps(webhook)}") date = datetime.datetime.now() return func.HttpResponse(json.dumps({'date': date, 'status': 'SUCCESS'}))
def to_azure(events): client = EventHubClient(ADDRESS, debug=True) sender = client.add_sender(partition="0") client.run() try: for event in events: sender.send(EventData(event)) except: raise finally: client.stop()
def __init__(self): #load config with open('config.json', 'r') as json_file: self.config = json.load(json_file) # Create Event Hubs client client = EventHubClient(self.config["EH_ADDRESS"], debug=False, username=self.config["EH_USER"], password=self.config["EH_KEY"]) Worker.event_hub_sender = client.add_sender(partition="0") client.run()
def test_long_running_receive(connection_str): parser = argparse.ArgumentParser() parser.add_argument("--duration", help="Duration in seconds of the test", type=int, default=30) parser.add_argument("--consumer", help="Consumer group name", default="$default") parser.add_argument("--partitions", help="Comma seperated partition IDs") parser.add_argument("--offset", help="Starting offset", default="-1") parser.add_argument("--conn-str", help="EventHub connection string", default=connection_str) parser.add_argument("--eventhub", help="Name of EventHub") parser.add_argument("--address", help="Address URI to the EventHub entity") parser.add_argument( "--sas-policy", help="Name of the shared access policy to authenticate with") parser.add_argument("--sas-key", help="Shared access key") args, _ = parser.parse_known_args() if args.conn_str: client = EventHubClient.from_connection_string(args.conn_str, eventhub=args.eventhub, debug=False) elif args.address: client = EventHubClient(args.address, username=args.sas_policy, password=args.sas_key) else: try: import pytest pytest.skip("Must specify either '--conn-str' or '--address'") except ImportError: raise ValueError("Must specify either '--conn-str' or '--address'") try: if not args.partitions: partitions = get_partitions(client) else: partitions = args.partitions.split(",") pumps = {} for pid in partitions: pumps[pid] = client.add_receiver(consumer_group=args.consumer, partition=pid, offset=Offset(args.offset), prefetch=50) client.run() pump(pumps, args.duration) finally: client.stop()
def isNewLabeledData(eh_url, eh_offset_url, eh_account, eh_key): ''' Examines the EventHub to identify if sufficient quantities of new training data is available to trigger a re-train ''' CONSUMER_GROUP = "$default" PARTITION = "0" offset_client = EventHubClient(eh_offset_url, debug=False, username=eh_account, password=eh_key) offset_receiver = offset_client.add_receiver(CONSUMER_GROUP, PARTITION, prefetch=5000) offset_sender = offset_client.add_sender(partition="0") offset_client.run() #Retrieves the current offset/sequence number for the write event queue from the dedicated offset queue offsets = offset_receiver.receive(timeout=50) current_offset = -1 #Default to -1 or reading the entire feed if another offset is not retrieved logging.info("{0} write messages recieved".format(len(offsets))) for offset in offsets: offset_event = json.loads(offset.body_as_str()) current_offset = offset_event['CURRENT_OFFSET'] logging.info("Retrieved previous offset event {0}".format(offset_event)) current_offset = -1 #Use the retrieved offset/sequence number to retrieve new writes event_client = EventHubClient(eh_url, debug=False, username=eh_account, password=eh_key) receiver = event_client.add_receiver(CONSUMER_GROUP, PARTITION, prefetch=5000, offset=Offset(current_offset)) event_client.run() batch = receiver.receive(timeout=50) new_label_count = len(batch) for stuff in batch: logging.info("Offset {0}".format(stuff.sequence_number)) current_offset = int(stuff.sequence_number) if int(stuff.sequence_number) > current_offset else current_offset logging.info("Message {0}".format(stuff.body_as_str())) logging.info("Processed {0} new label writes".format(new_label_count)) #Write the last retrieved offset/sequence number to the offset message queue to be used in the next read offset_sender.send(EventData(json.dumps({"TIMESTAMP": datetime.datetime.now().timestamp(), "CURRENT_OFFSET": current_offset}))) logging.info("Stored current offset event {0}".format(current_offset)) #sender.send(EventData(json.dumps({"EVENT_TYPE": "LABEL_WRITE", "LABEL_INDEX":face_hash, "WRITE_TIMESTAMP": datetime.datetime.now().timestamp()}))) #Close queue clients offset_client.stop() event_client.stop() #Return true if more then results found to execute retrain return True if new_label_count > 5 else False
class Producer: client = None sender = None def __init__(self, address, user, key): self.client = EventHubClient(address, debug=False, username=user, password=key) self.sender = self.client.add_sender(partition="0") self.client.run() def send(self, msg): self.sender.send(EventData(str(msg))) def stop(self): self.client.stop()
def eventhubReceiveToFile(test_queue_url): # next, we dequeue these messages - 10 messages at a time # (SQS max limit) till the queue is exhausted. # in production/real setup, I suggest using long polling as # you get billed for each request, regardless of an empty response counter = 0 filepath = '' CONSUMER_GROUP = "$Default" OFFSET = Offset("0") PARTITION = "0" client = EventHubClient('amqps://xxxxx.servicebus.windows.net/txxxxxqueue', debug=True, username='******', password='******') receiver = client.add_receiver(CONSUMER_GROUP, PARTITION, prefetch=300, offset=OFFSET) try: client.run() while True: for event_data in receiver.receive(timeout=100): rcv_msg = str(event_data.message) #print((rcv_msg)) if len(rcv_msg)>=5: if counter!=0 and counter <= 50000: #print(message['Body']) file = open(filepath,'a') file.write(rcv_msg) file.write('\n') # next, we delete the message from the queue so no one else will process it again elif counter == 0: filepath = createfile() # print(filepath) file = open(filepath,'w') else: filepath = createfile() #print(filepath) counter = 1 file = open(filepath,'w') file.close() counter = counter + 1 except Exception as e: print("Failed Receiving Record {}".format(str(e)) ) finally: client.stop()
def test_long_running_receive(connection_str): parser = argparse.ArgumentParser() parser.add_argument("--duration", help="Duration in seconds of the test", type=int, default=30) parser.add_argument("--consumer", help="Consumer group name", default="$default") parser.add_argument("--partitions", help="Comma seperated partition IDs") parser.add_argument("--offset", help="Starting offset", default="-1") parser.add_argument("--conn-str", help="EventHub connection string", default=connection_str) parser.add_argument("--eventhub", help="Name of EventHub") parser.add_argument("--address", help="Address URI to the EventHub entity") parser.add_argument("--sas-policy", help="Name of the shared access policy to authenticate with") parser.add_argument("--sas-key", help="Shared access key") args, _ = parser.parse_known_args() if args.conn_str: client = EventHubClient.from_connection_string( args.conn_str, eventhub=args.eventhub, debug=False) elif args.address: client = EventHubClient( args.address, username=args.sas_policy, password=args.sas_key) else: try: import pytest pytest.skip("Must specify either '--conn-str' or '--address'") except ImportError: raise ValueError("Must specify either '--conn-str' or '--address'") try: if not args.partitions: partitions = get_partitions(client) else: partitions = args.partitions.split(",") pumps = {} for pid in partitions: pumps[pid] = client.add_receiver( consumer_group=args.consumer, partition=pid, offset=Offset(args.offset), prefetch=50) client.run() pump(pumps, args.duration) finally: client.stop()
def send_msg(addr, user, password): from azure.eventhub import EventHubClient, Sender, EventData client = EventHubClient(addr, username=user, password=password) sender = client.add_sender(partition='1') client.run() try: from datetime import datetime current_time = datetime.now().isoformat() message = "hi from event hub {}".format(current_time) event_data = EventData(message) sender.send(event_data) except: raise # return client finally: client.stop()
def execute(self): dataset_id = '{}_{}'.format(self.dataset_url, self.dataset_filename) latest_status = self.redis_client.get(dataset_id) if latest_status is not None: latest_status = latest_status.decode(ENCODING) client = EventHubClient( read_config.cfg.get('nulplabs', 'hub_address'), debug=False, username=read_config.cfg.get('nulplabs', 'hub_user'), password=read_config.cfg.get('nulplabs', 'hub_passwd')) sender = client.add_sender(partition="0") client.run() if latest_status == 'COMPLETED' or latest_status == 'ATTEMPT TO REFILL': self.redis_client.set(dataset_id, 'ATTEMPT TO REFILL') print('ATTEMPT TO REFILL') return False client = Socrata(self.dataset_url, None) self.redis_client.set(dataset_id, 'STARTED') for i in range(int(NUMBER_OF_MESSAGES / MESSAGES_PER_FETCH)): results = client.get(self.dataset_filename, limit=MESSAGES_PER_FETCH, offset=MESSAGES_PER_FETCH * i) results_df = pd.DataFrame.from_records(results) # print(json.dumps(results)) current_progress = '{} - {}'.format( str(i * MESSAGES_PER_FETCH + 1), str((i + 1) * MESSAGES_PER_FETCH)) message = EventData(batch=self.data_generator(results)) sender.send(message) self.redis_client.set(dataset_id, current_progress) # print('Progress {}'.format(current_progress)) # print(results_df) self.redis_client.set(self.dataset_url + "_" + self.dataset_filename, 'COMPLETED') print('COMPLETED')
def run_job(): # Create Event Hubs client client = EventHubClient(config.ADDRESS, debug=False, username=config.USER, password=config.KEY) sender = client.add_sender(partition="0") client.run() while True: try: messages = [ create_advert_event(brands), create_impression_event(urls) ] for message in messages: print('Sending message to Event Hubs: ' + str(message)) sender.send(EventData(message)) time.sleep(1) except: raise
class RetrieveEventHub(QThread): def __init__(self,output_buffer,ADDRESS,USER,KEY,CONSUMER_GROUP,OFFSET,PARTITION,parent=None): super(RetrieveEventHub,self).__init__(parent) self.address = ADDRESS # SAS policy and key are not required if they are encoded in the URL self.user = USER self.key = KEY self.CONSUMER_GROUP = CONSUMER_GROUP self.OFFSET = OFFSET self.PARTITION = PARTITION self.total = 0 self.last_sn = -1 self.last_offset = "-1" self.client = EventHubClient(self.address, debug=False, username=self.user, password=self.key) self.receiver = self.client.add_receiver(self.CONSUMER_GROUP, self.PARTITION, prefetch=1000,offset=self.OFFSET) self.output_buffer = output_buffer self.last_frame = -1 def ordered_by_index(self,elem): return int(elem[0]) def run(self): self.client.run() global exit_value while(exit_value.value != 1): time.sleep(0.05) batched_events = self.receiver.receive(max_batch_size=10) contents = [] for event in batched_events: oneLine = str(event.message).strip('&') content = oneLine.split('&') contents.append(content) #self.queue_service.delete_message(self.queue_name, message.id, message.pop_receipt) contents.sort(key=self.ordered_by_index) for content in contents: if int(content[0])>self.last_frame: if not self.output_buffer.full(): self.output_buffer.put(content) self.last_frame = int(content[0])
def main(): """ Main method of sender """ namespace = os.environ['EVENT_HUB_NAMESPACE'] ehname = os.environ['EVENT_HUB_NAME'] address = "amqps://" + namespace + ".servicebus.windows.net/" + ehname # SAS policy and key are not required if they are encoded in the URL user = os.environ.get('EVENT_HUB_SAS_POLICY') key = os.environ.get('EVENT_HUB_SAS_KEY') try: if not address: raise ValueError("No EventHubs URL supplied.") client = EventHubClient(address, debug=False, username=user, password=key) sender = client.add_sender(partition="0") client.run() try: start_time = time.time() for i in range(100): message = '{ "intValue": ' + str(i) + ' }' LOGGER.info("Sending message: %s", message) sender.send(EventData(message)) except: raise finally: end_time = time.time() client.stop() run_time = end_time - start_time LOGGER.info("Runtime: %s seconds", run_time) except KeyboardInterrupt: pass
class QueueEventHub(QueueInterface): def __init__(self, address, user, key): self.address = address self.user = user self.key = key self.counter = 0 self.client_batch = EventHubClient(self.address, debug=False, username=self.user, password=self.key) self.sender = self.client_batch.add_sender() self.client_batch.run() def batch_send(self, events: List[TimeserieRecord]): self.counter += len(events) data = EventData( batch=[json.dumps(dataclasses.asdict(e)) for e in events if e]) self.sender.transfer(data) self.sender.wait() def send(self, events: List[TimeserieRecord]): return self.batch_send(events)
def sendToEventHub(data): try: client = EventHubClient(cfg['EventHubURL'], debug=False, username=cfg['EventHubPolicyName'], password=cfg['EventHubPrimaryKey']) sender = client.add_sender(partition="0") client.run() try: count = 0 for payload in data: sender.send(EventData(json.dumps(payload))) #logPrint("Payload sent: " + json.dumps(payload)) count += 1 except: logPrint("Send to Eventhub Failed") raise finally: logPrint(str(count) + " payloads sent") data.clear() client.stop() except KeyboardInterrupt: pass
class EventHubOutputFormatter(OutputFormatterBaseClass): def __init__(self): NAMESPACE = os.environ['EVENT_HUB_NAMESPACE'] EHNAME = os.environ['EVENT_HUB_NAME'] ADDRESS = "amqps://" + NAMESPACE + ".servicebus.windows.net/" + EHNAME # SAS policy and key are not required if they are encoded in the URL USER = os.environ.get('EVENT_HUB_SAS_POLICY') KEY = os.environ.get('EVENT_HUB_SAS_KEY') self.client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY) self.sender = self.client.add_sender(partition="0") self.client.run() def send(self, message): self.sender.send(EventData(message)) pass def close(self): self.client.stop() pass
class PgCDCPluginToEventHub(CDCBase): """ Postgres CDC plugin to send logs to event hub. :param poll_frequency: frequency at which Postresql database should be polled. Default is `1` seconds. :type poll_frequency: int """ # plugin type and plugin name plugin_type = PlugInType.CDC plugin_name = "PgCDCPluginToEventHub" plugin_parameters = { "poll_frequency": { "type": float }, "event_hub_address": { "type": str }, "event_hub_user": { "type": str }, "event_hub_key": { "type": str } } def __init__(self, poll_frequency: int = 1, event_hub_address: str = None, event_hub_user: str = None, event_hub_key: str = None, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.poll_frequency = poll_frequency self.event_hub_address = event_hub_address self.event_hub_user = event_hub_user self.event_hub_key = event_hub_key self._validate_parameters() self.client = EventHubClient(self.event_hub_address, debug=False, username=event_hub_user, password=event_hub_key) self.sender = self.client.add_sender(partition="0") def _validate_parameters(self): """ Validate the parameters :return: """ if self.event_hub_address is None \ or len(self.event_hub_address.strip()) == 0 or \ self.event_hub_user is None or \ len(self.event_hub_user.strip()) == 0 or \ self.event_hub_key is None or \ len(self.event_hub_key.strip()) == 0: raise ValueError("Check event hub configuration") def _set_status(self, status: str): """ Set the status of the running plugin :param status: status """ status = f"CDC - {status}" self.status = status self.logger.info(status) def execute(self): self.client.run() self.logger.info("in progress - started") connection = psycopg2.connect(self.connection_string) cursor = connection.cursor() slot_name = "siirto_slot" # create the slot, if doesn't already exists cursor.execute(f"SELECT 1 FROM pg_replication_slots WHERE slot_name = '{slot_name}';") rows = cursor.fetchall() cursor_exists = False for row in rows: cursor_exists = True if not cursor_exists: cursor.execute(f"SELECT 'init' FROM " f"pg_create_logical_replication_slot('{slot_name}', 'wal2json');") current_table_cdc_file_name = {} for table_name in self.table_names: table_name_in_folder = table_name.replace(".", "_") cdc_folder_for_table = os.path.join(self.output_folder_location, table_name_in_folder) file_indexes = [] if os.path.exists(cdc_folder_for_table): file_indexes = [int(file_name.replace(f"{table_name}_cdc_", "").replace(".csv", "")) for file_name in list(os.listdir(cdc_folder_for_table)) if re.search(f"^{table_name}_cdc_.*.csv$", file_name)] file_index = 1 if len(file_indexes) > 0: file_index = max(file_indexes) + 1 if not os.path.exists(cdc_folder_for_table): os.mkdir(cdc_folder_for_table) file_to_write = os.path.join(self.output_folder_location, table_name_in_folder, f"{table_name}_cdc_{file_index}.csv") current_table_cdc_file_name[table_name] = { 'file_to_write': file_to_write, 'index': file_index } tables_string = ",".join(self.table_names) while self.is_running: self.logger.info("running cdc pull iteration") cursor.execute(f"SELECT lsn, data FROM pg_logical_slot_peek_changes('{slot_name}', " f"NULL, NULL, 'pretty-print', '1', " f"'add-tables', '{tables_string}');") rows = cursor.fetchall() rows_collected = {} max_lsn = None # read the WALs for row in rows: max_lsn = row[0] change_set = json.loads(row[1]) change_set_entries = change_set["change"] if 'change' in change_set else [] for change_set_entry in change_set_entries: table_name = f"{change_set_entry['schema']}.{change_set_entry['table']}" \ if 'table' in change_set_entry else None if table_name: self.sender.send(EventData(json.dumps(change_set_entry))) # remove the WALs if max_lsn: cursor.execute(f"SELECT 1 FROM pg_logical_slot_get_changes('{slot_name}', " f"'{max_lsn}', NULL, 'pretty-print', '1', " f"'add-tables', '{tables_string}');") # sleep for one second, before next pool time.sleep(self.poll_frequency) print(f'cleaning the {slot_name}') cursor.execute(f"SELECT 'stop' FROM pg_drop_replication_slot('{slot_name}');") print(f'cleared the {slot_name}') self.logger.info("stopped") def setup_graceful_shutdown(self) -> None: """ Handles the graceful shutdown of the process. Cleans the slot from pg, if already created :return: """ def signal_handler(sig, frame): print("cleaning the slot") self.is_running = False signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler)
def inbound_sync_listener(): """Initialize a delta inbound sync with Azure Active Directory.""" while True: # pylint: disable=too-many-nested-blocks provider_id = TENANT_ID try: initial_sync_time = check_last_sync("azure-user", "initial") LOGGER.info(initial_sync_time) LOGGER.info("This is your initial sync time") initial_sync_time = initial_sync_time["timestamp"][:26] latest_delta_sync_time = get_last_delta_sync(provider_id, "delta") if latest_delta_sync_time: latest_delta_sync_time = latest_delta_sync_time[ "timestamp"][:26] previous_sync_datetime = datetime.strptime( latest_delta_sync_time, "%Y-%m-%dT%H:%M:%S.%f") else: previous_sync_datetime = datetime.strptime( initial_sync_time, "%Y-%m-%dT%H:%M:%S.%f") # Create an eventhub client. LOGGER.info(ADDRESS) client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY) try: LOGGER.info("Opening connection to EventHub...") # Set prefetch to 1, we only want one event at a time. receiver = client.add_receiver(CONSUMER_GROUP, PARTITION, prefetch=1, offset=OFFSET) # Open the connection to the EventHub. client.run() # Get one event from EventHub. batch = receiver.receive(timeout=5000) while batch: for event_data in batch: # Get the event as a json record from the batch of events. event_json = event_data.body_as_json() record = event_json["records"][0] operation_name = record["operationName"] time = record["time"][:26] record_timestamp = datetime.strptime( time, "%Y-%m-%dT%H:%M:%S.%f") # Only process events logged after the previous initial/delta sync. # Only grab events concerning User or Group objects. if (operation_name in VALID_OPERATIONS and record_timestamp > previous_sync_datetime): data = { "initated_by": record["properties"]["initiatedBy"], "target_resources": record["properties"]["targetResources"], "operation_name": operation_name, "resultType": record["resultType"], } LOGGER.info("Operation name: %s", operation_name) LOGGER.info("Record to Change: %s", record) record_timestamp_utc = record_timestamp.isoformat() insert_change_to_db(data, record_timestamp_utc) sync_source = "azure-" + VALID_OPERATIONS[ operation_name] provider_id = TENANT_ID conn = connect_to_db() save_sync_time( provider_id, sync_source, "delta", conn, record_timestamp_utc, ) conn.close() previous_sync_datetime = record_timestamp batch = receiver.receive(timeout=50) LOGGER.info("Closing connection to EventHub...") # Close the connection to the EventHub. client.stop() except KeyboardInterrupt: pass finally: client.stop() except ExpectedError as err: LOGGER.debug(( "%s Repolling after %s seconds...", err.__str__, LISTENER_POLLING_DELAY, )) time.sleep(LISTENER_POLLING_DELAY) except Exception as err: LOGGER.exception(err) raise err
"name": "User", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": ["int", "null"]}, {"name": "favorite_color", "type": ["string", "null"]} ] } try: if not ADDRESS: raise ValueError("No EventHubs URL supplied.") # Create Event Hubs client client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY) sender = client.add_sender(partition="0") client.run() try: start_time = time.time() for i in range(100): print("Sending message: {}".format(i)) writer = DatumWriter(SCHEMA) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), SCHEMA) writer.append({"name": "Alyssa", "favorite_number": 256}) writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) writer.close() writer.write({"name": "123", "favorite_number": 10}, encoder) raw_bytes = bytes_writer.getvalue() sender.send(EventData(raw_bytes)) except:
class EventHubStreamingClient(AbstractStreamingClient): # pylint: disable=too-many-instance-attributes """Event Hub streaming client.""" def __init__(self, config): # pragma: no cover """ Azure EventHub streaming client implementation. Configuration keys: AZURE_STORAGE_ACCESS_KEY AZURE_STORAGE_ACCOUNT EVENT_HUB_CONSUMER_GROUP EVENT_HUB_NAME EVENT_HUB_NAMESPACE EVENT_HUB_SAS_KEY EVENT_HUB_SAS_POLICY LEASE_CONTAINER_NAME TIMEOUT """ storage_account_name = config.get("AZURE_STORAGE_ACCOUNT") storage_key = config.get("AZURE_STORAGE_ACCESS_KEY") lease_container_name = config.get("LEASE_CONTAINER_NAME") namespace = config.get("EVENT_HUB_NAMESPACE") eventhub = config.get("EVENT_HUB_NAME") consumer_group = config.get("EVENT_HUB_CONSUMER_GROUP", '$Default') user = config.get("EVENT_HUB_SAS_POLICY") key = config.get("EVENT_HUB_SAS_KEY") try: self.timeout = int(config['TIMEOUT']) except (KeyError, ValueError): self.timeout = None self.logger = Logger() self.loop = None # Create EPH Client if storage_account_name is not None and storage_key is not None: self.eph_client = EventHubConfig(sb_name=namespace, eh_name=eventhub, policy=user, sas_key=key, consumer_group=consumer_group) self.eh_options = EPHOptions() self.eh_options.release_pump_on_timeout = True self.eh_options.auto_reconnect_on_error = False self.eh_options.debug_trace = False self.storage_manager = AzureStorageCheckpointLeaseManager( storage_account_name, storage_key, lease_container_name) self.tasks = None signal.signal(signal.SIGTERM, self.exit_gracefully) # Create Send client else: address = "amqps://" + namespace + \ ".servicebus.windows.net/" + eventhub try: self.send_client = EventHubClient(address, debug=False, username=user, password=key) self.sender = self.send_client.add_sender() self.send_client.run() except Exception as ex: self.logger.error('Failed to init EH send client: %s', ex) raise def start_receiving(self, on_message_received_callback): # pragma: no cover self.loop = asyncio.get_event_loop() try: host = EventProcessorHost(EventProcessor, self.eph_client, self.storage_manager, ep_params=[on_message_received_callback], eph_options=self.eh_options, loop=self.loop) self.tasks = asyncio.gather( host.open_async(), self.wait_and_close(host, self.timeout)) self.loop.run_until_complete(self.tasks) except KeyboardInterrupt: self.logger.info( "Handling keyboard interrupt or SIGINT gracefully.") # Canceling pending tasks and stopping the loop for task in asyncio.Task.all_tasks(): task.cancel() self.loop.run_forever() self.tasks.exception() raise finally: if self.loop.is_running(): self.loop.stop() def exit_gracefully(self, signum, frame): # pylint: disable=unused-argument """Handle signal interrupt (SIGTERM) gracefully.""" self.logger.info("Handling signal interrupt %s gracefully." % signum) # Canceling pending tasks and stopping the loop self.stop() def send(self, message): # pragma: no cover try: self.sender.send(EventData(body=message)) self.logger.info('Sent message: %s', message) return True except Exception as ex: self.logger.error('Failed to send message to EH: %s', ex) return False def stop(self): # pragma: no cover if self.loop: # Stop consumer for task in asyncio.Task.all_tasks(): task.cancel() self.loop.run_forever() if self.tasks: self.tasks.exception() if self.loop.is_running(): self.loop.stop() else: # Stop producer try: self.send_client.stop() except Exception as ex: self.logger.error('Failed to close send client: %s', ex) @staticmethod async def wait_and_close( host: EventProcessorHost, timeout: Optional[float] = None): # pragma: no cover """Run a host indefinitely or until the timeout is reached.""" if timeout is None: while True: await asyncio.sleep(1) else: await asyncio.sleep(timeout) await host.close_async()
USER = os.environ.get('EVENT_HUB_SAS_POLICY') KEY = os.environ.get('EVENT_HUB_SAS_KEY') def callback(outcome, condition): logger.info("Message sent. Outcome: {}, Condition: {}".format( outcome, condition)) try: if not ADDRESS: raise ValueError("No EventHubs URL supplied.") client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY) sender = client.add_sender(partition="1") client.run() try: start_time = time.time() for i in range(100): sender.transfer(EventData(str(i)), callback=callback) logger.info("Queued 100 messages.") sender.wait() logger.info("Finished processing queue.") except: raise finally: end_time = time.time() client.stop() run_time = end_time - start_time logger.info("Runtime: {} seconds".format(run_time))
def main(req: func.HttpRequest) -> func.HttpResponse: logger = logging.getLogger(__name__) formatter = logging.Formatter( '%(asctime)s %(name)s %(levelname)s: %(message)s') func_context = os.environ['FUNCTION_CONTEXT'] logger.debug(f"Function context --> {func_context}") credentials = None subscription_id = None kv_credentials = None kv_subscription_id = None if func_context == 'local': filehandler = logging.FileHandler('func.log') filehandler.setFormatter(formatter) logger.addHandler(filehandler) logger.setLevel(logging.DEBUG) credentials, subscription_id = get_local_credentials() else: console = logging.StreamHandler() console.setLevel(logging.INFO) console.setFormatter(formatter) credentials, subscription_id = get_azure_credentials() logger.debug('Python HTTP trigger function processed a request.') logger.debug(f"method={req.method}, url={req.url}, params={req.params}") logger.debug(f"body={req.get_json()}") # Handle WebHook webhook = req.get_json() # Get resource information specifically tags if this is an alert resource_id = None if check_keys(webhook, 'data', 'context', 'resourceId'): resource_id = webhook['data']['context']['resourceId'] elif check_keys('data', 'context', 'activityLog', 'resourceId'): resource_id = webhook['data']['context']['activityLog']['resourceId'] elif check_keys('data', 'context', 'scope'): resource_id = webhook['data']['context']['scope'] elif check_keys('data', 'context', 'activityLog', 'authorization', 'scope'): resource_id = webhook['data']['context']['activityLog'][ 'authorization']['scope'] if resource_id: resource_client = ResourceManagementClient(credentials, subscription_id) try: resource = resource_client.resources.get_by_id( resource_id, api_version='2018-06-01') if resource.tags: webhook['tags'] = resource.tags logger.info(f"adding tags {resource.tags}") else: logger.info(f"no tags found in resource {resource_id}") except: logger.error( f"received exception from ResourceManagementClient for {resource_id}" ) else: logger.info("no resource_id found in webhook") # Key Vault stuff kv_mgmt_client = KeyVaultManagementClient(credentials, subscription_id) kv_client = KeyVaultClient(credentials) namespace = get_kv_secret(kv_client, 'EventHubNamespace') event_hub = get_kv_secret(kv_client, 'EventHub') user = get_kv_secret(kv_client, 'EventHubKeyName') key = get_kv_secret(kv_client, 'EventHubKey') amqp_uri = f"https://{namespace}.servicebus.windows.net/{event_hub}" eh_client = EventHubClient(amqp_uri, debug=False, username=user, password=key) eh_sender = eh_client.add_sender(partition="0") eh_client.run() eh_sender.send(EventData(json.dumps(webhook))) logger.info(f"sending event to {amqp_uri}, {json.dumps(webhook)}") date = datetime.datetime.now() return func.HttpResponse(json.dumps({'status': 'SUCCESS'}))
# Author: Mark Moore # Created: 6/10/2020 # Last updated: 6/11/2020 # Simulate a sine wave to send to an event hub and plot in real time. # Reference: https://en.wikipedia.org/wiki/Sine_wave to get the math to plot a sine wave import time # get time for the delay from math import * # get math for the sine function from azure.eventhub import EventHubClient, Sender, EventData # get eventhub functions Frequency=80 # Frequency is the number of cycles over time. Frequencydelay = .1 # Frequencydelay is a delay of time clock time for each loop to smooth the plot in PowerBI. pi=3.1415926535897932384626433832795028841971693993751057 # The value of pi ADDRESS = "amqps://yournamespace.servicebus.windows.net/youehname" # Event hub and namespace Policy = "YourPolicyName" # Event hub Username KEY = "YourPolicyKey" # Event hub Key client = EventHubClient(ADDRESS, debug=False, username=Policy, password=KEY) # Create the client object sender = client.add_sender(partition="0") # Create the sender object client.run() # Start the client while True: # Infinite loop for n in range(Frequency): # inner loop that repeats 'Frequency' times p=sin(2*pi*n/Frequency) # fudged sine wave math since this is plotted in PowerBI in real time not on a graph where time is plotted as static. p=p*100 # Increase the value of p. Left alone, p will be between 1 and -1 and will plot as 0 for all points. msg = '{"p":%i}' % (p) # construct json message with plot point to send to event hub sender.send(EventData(msg)) # send json to event hub time.sleep(Frequencydelay) # Added to smooth out plot. Increse to have fewer cycle per second decrease to have more
Párametros: address = El URL para conectarse al Event Hub username = El nompre del SAS Policy password = La contraseña del SAS Policy debug = Si se quiere hacer debug de la conexión """ ehClient = EventHubClient(ehAddress, SASName, PrimaryKey, debug=False) # Agrega un sender para enviar los eventos ehSender = ehClient.add_sender() """ Abre las conecciones y corre los clientes Sender/Receiver En caso de iniciar con exito no regresa nada y en caso contrario va a mostrar una excepción """ ehClient.run() # Ejecuta el envío y cacha los posibles errores try: iN = 100 # Número de eventos a enviar iniTime = time.time() # Tiempo en el que comenzó el programa # Loop para enviar iN eventos for iI in range(iN): # Crea el JSON String a enviar como eventos message = "{ \"PartitionKey\": \"" + "PK" + str( iI ) + "\", \"RowKey\": \"" + "RK" + str( iI ) + "\", \"Text\": \"" + "NAaaaaaah" + "\", \"Fecha\": \"" + "12-12-19" + "\" }"
def main(): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() #STEP-2 model_xml = '/opt/intel/computer_vision_sdk/deployment_tools/intel_models/face-detection-retail-0004/FP16/face-detection-retail-0004.xml' model_bin = '/opt/intel/computer_vision_sdk/deployment_tools/intel_models/face-detection-retail-0004/FP16/face-detection-retail-0004.bin' net = IENetwork.from_ir(model=model_xml, weights=model_bin) model_age_xml = '/opt/intel/computer_vision_sdk/deployment_tools/intel_models/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.xml' model_age_bin = '/opt/intel/computer_vision_sdk/deployment_tools/intel_models/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013.bin' net_age = IENetwork.from_ir(model=model_age_xml, weights=model_age_bin) model_reid_xml = '/opt/intel/computer_vision_sdk/deployment_tools/intel_models/face-reidentification-retail-0001/FP16/face-reidentification-retail-0001.xml' model_reid_bin = '/opt/intel/computer_vision_sdk/deployment_tools/intel_models/face-reidentification-retail-0001/FP16/face-reidentification-retail-0001.bin' net_reid = IENetwork.from_ir(model=model_reid_xml, weights=model_reid_bin) # Plugin initialization for specified device and load extensions library if specified log.info("Initializing plugin for {} device...".format(args.device)) plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir) if args.cpu_extension and 'CPU' in args.device: # plugin.add_cpu_extension(args.cpu_extension) plugin.add_cpu_extension( '/home/tsunamac/inference_engine_samples/intel64/Release/lib/libcpu_extension.so' ) # plugin = IEPlugin(device='CPU', plugin_dirs=None) # plugin.add_cpu_extension('C:/Intel/computer_vision_sdk_2018.3.343/deployment_tools/inference_engine/bin/intel64/Release/cpu_extension.dll') # plugin.add_cpu_extension('/opt/intel/computer_vision_sdk/inference_engine/lib/ubuntu_16.04/intel64/libcpu_extension.so') if "CPU" in plugin.device: supported_layers = plugin.get_supported_layers(net) not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(plugin.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) #STEP-3 exec_net = plugin.load(network=net, num_requests=1) exec_net_age = plugin.load(network=net_age, num_requests=1) exec_net_reid = plugin.load(network=net_reid, num_requests=1) #STEP-4 input_blob = next(iter(net.inputs)) #input_blob = 'data' out_blob = next(iter(net.outputs)) #out_blob = 'detection_out' model_n, model_c, model_h, model_w = net.inputs[ input_blob] #model_n, model_c, model_h, model_w = 1, 3, 300, 300 input_blob_age = next(iter(net_age.inputs)) #input_blob = 'data' out_blob_age = next(iter(net_age.outputs)) #out_blob = '' out_blob_prob = "prob" model_age_n, model_age_c, model_age_h, model_age_w = net_age.inputs[ input_blob_age] #model_n, model_c, model_h, model_w = 1, 3, 62, 62 input_blob_reid = next(iter(net_reid.inputs)) #input_blob = 'data' out_blob_reid = next(iter(net_reid.outputs)) #out_blob = '' model_reid_n, model_reid_c, model_reid_h, model_reid_w = net_reid.inputs[ input_blob_reid] #model_n, model_c, model_h, model_w = 1, 3, 62, 62 print("B={},C={},H={},W={}".format(model_reid_n, model_reid_c, model_reid_h, model_reid_w)) del net del net_age del net_reid if args.input == 'cam': input_stream = 0 else: input_stream = args.input # assert os.path.isfile(args.input), "Specified input file doesn't exist" #STEP-5 url = "http://192.168.1.16:8081/?action=stream" video = "video.mp4" cap = cv2.VideoCapture(input_stream) personData = {} ADDRESS = args.address USER = args.user KEY = args.key client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY) sender = client.add_sender(partition="0") client.run() while cap.isOpened(): ret, frame = cap.read() if not ret: break #STEP-6 cap_w = cap.get(3) cap_h = cap.get(4) in_frame = cv2.resize(frame, (model_w, model_h)) in_frame = in_frame.transpose( (2, 0, 1)) # Change data layout from HWC to CHW in_frame = in_frame.reshape((model_n, model_c, model_h, model_w)) #STEP-7 exec_net.start_async(request_id=0, inputs={input_blob: in_frame}) if exec_net.requests[0].wait(-1) == 0: res = exec_net.requests[0].outputs[out_blob] #STEP-8 for obj in res[0][0]: if obj[2] > args.prob_threshold: xmin = int(obj[3] * cap_w) ymin = int(obj[4] * cap_h) xmax = int(obj[5] * cap_w) ymax = int(obj[6] * cap_h) frame_org = frame.copy() face = frame_org[ymin:ymax, xmin:xmax] in_frame_age = cv2.resize(face, (model_age_w, model_age_h)) in_frame_age = in_frame_age.transpose( (2, 0, 1)) # Change data layout from HWC to CHW in_frame_age = in_frame_age.reshape( (model_age_n, model_age_c, model_age_h, model_age_w)) # cv2.rectangle(dst,(xmin, ymin), (xmax, ymax), (255, 0 ,0), 1) cv2.imshow("age", face) exec_net_age.start_async(request_id=0, inputs={input_blob: in_frame_age}) if exec_net_age.requests[0].wait(-1) == 0: res_gender = exec_net_age.requests[0].outputs[ out_blob_prob] # res_age = exec_net_age.requests[0].outputs[out_blob_age] # print(res_age[0].reshape(-1,)) AgeGender = res_gender[0].reshape(-1, ) # print(AgeGender) in_frame_reid = cv2.resize(face, (model_reid_w, model_reid_h)) in_frame_reid = in_frame_reid.transpose( (2, 0, 1)) # Change data layout from HWC to CHW in_frame_reid = in_frame_reid.reshape( (model_reid_n, model_reid_c, model_reid_h, model_reid_w)) exec_net_reid.start_async( request_id=0, inputs={input_blob: in_frame_reid}) if exec_net_reid.requests[0].wait(-1) == 0: res_reid = exec_net_reid.requests[0].outputs[ out_blob_reid] reIdVector = res_reid[0].reshape(-1, ) # print(reIdVector) foundId = findMatchingPerson(reIdVector) print("ID:" + str(foundId)) class_id = int(obj[1]) # Draw box and label\class_id color = (0, 0, 255) genderColor = (147, 20, 255) if AgeGender[1] < 0.5 else (255, 0, 0) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), genderColor, 2) # cv2.putText(frame, str(class_id) + ' ' + str(round(obj[2] * 100, 1)) + ' %', (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 0.8, color, 1) cv2.putText(frame, "ID_{0:4d}".format(foundId), (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 0.8, color, 1) try: personData["detecttime"] = datetime.datetime.now( ).isoformat() personData["No"] = str(1) personData["faceId"] = str(foundId) message = json.dumps(personData) sender.send(EventData(message)) except: raise #STEP-9 cv2.imshow("Detection Results", frame) key = cv2.waitKey(1) if key == 27: break #STEP-10 cv2.destroyAllWindows() del exec_net del plugin client.stop()
def main(argv): FILE_DATA = '' ADDRESS = '' try: opts, args = getopt.getopt(argv,"hi:o:",["iSym="]) except getopt.GetoptError: print ('pushtsdata.py -i <FILE_DATA>') sys.exit(2) for opt, arg in opts: if opt == '-h': print ('pushtsdata.py -i <FILE_DATA>') sys.exit() elif opt in ("-i", "--iSym"): FILE_DATA = arg from azure.eventhub import EventHubClient, Sender, EventData logger = logging.getLogger("azure") # Address can be in either of these formats: # "amqps://<URL-encoded-SAS-policy>:<URL-encoded-SAS-key>@<mynamespace>.serviceb us.windows.net/myeventhub" # "amqps://<mynamespace>.servicebus.windows.net/myeventhub" # For example: #ADDRESS = "amqps://mynamespace.servicebus.windows.net/myeventhub" #ADDRESS = "sb://timeseriesns.servicebus.windows.net/timeserieseh" # Get pricing data from static file created using gettsdata.py script if FILE_DATA == "price_data": #ADDRESS="sb://tseventhubsns.servicebus.windows.net/tseventhubs" #USER = "******" #KEY = "" ADDRESS = sb://<Your event hub namespace>.servicebus.windows.net/<Your event hub name for price data> USER = <Your shared access policy name for price data> KEY = <generated key for price data> # Get sentiment from static file created using gettsdata.py script elif FILE_DATA == "senti_data": #ADDRESS="sb://tseventhubsns.servicebus.windows.net/sentieventhub" #USER = "******" #KEY = "" ADDRESS = sb://<Your event hub namespace>.servicebus.windows.net/<Your event hub name for sentiment data> USER = <Your shared access policy name for sentiment data> KEY = <generated key for sentiment data> try: if not ADDRESS: raise ValueError("No EventHubs URL supplied.") # Create Event Hubs client on partition 1 client = EventHubClient(ADDRESS, debug=False, username=USER, password=KEY) sender = client.add_sender(partition="1") client.run() try: start_time = time.time() f = open(FILE_DATA, "r") for line in f: words = line.split() # Push data to Azure eventhub sender.send(EventData(line)) print (line) except: raise finally: end_time = time.time() client.stop() run_time = end_time - start_time f.close() logger.info("Runtime: {} seconds".format(run_time)) except KeyboardInterrupt: pass
def processEntityImages(choice_blobs, db_account_name, db_account_key, ca_file_uri, db_config, cs_account, cs_key, eh_url, eh_account, eh_key): ''' Tests entity images for the presence of a face using Azure Cognitive Services, extracts the face based on the provided bounding box, applies the facial classifier if available and then writes the raw image, face to CosmosDB ''' #Initialize the cognitive services account to perform facial detection BASE_CS_URL = 'https://virginia.api.cognitive.microsoft.us/face/v1.0/' # Replace with your regional Base URL CF.Key.set(cs_key) CF.BaseUrl.set(BASE_CS_URL) def extractFace(image_bytes): ''' ''' face_list = CF.face.detect(image_bytes) if len(face_list) == 1: face_rectangle = face_list[0]['faceRectangle'] nparr = np.fromstring(image_bytes, np.uint8) img_byte = Image.open(io.BytesIO(image_bytes)) face_iso_image = img_byte.crop( (face_rectangle['left'], face_rectangle['top'], face_rectangle['left'] + face_rectangle['width'], face_rectangle['top'] + face_rectangle['height'])) return face_iso_image else: return None #Convert the base image to PIL object, then detect and extract the face component of the image blob_image_faces = list( map( lambda blob_tuple: (blob_tuple[0], Image.open(io.BytesIO(blob_tuple[1])), extractFace(blob_tuple[1])), choice_blobs)) logging.debug("Face detection run on {0} images".format( len(blob_image_faces))) #Connect to the database ssl_opts = { 'ca_certs': ca_file_uri, 'ssl_version': PROTOCOL_TLSv1_2, 'cert_reqs': CERT_REQUIRED # Certificates are required and validated } auth_provider = PlainTextAuthProvider(username=db_account_name, password=db_account_key) endpoint_uri = db_account_name + '.cassandra.cosmosdb.azure.com' cluster = Cluster([endpoint_uri], port=10350, auth_provider=auth_provider, ssl_options=ssl_opts) #If no db config file is passed, look for the container environment variables if db_config is None: keyspace = os.environ['DB_KEYSPACE'] personaTableName = os.environ['DB_PERSONA_TABLE'] subPersonaTableName = os.environ['DB_SUB_PERSONA_TABLE'] subPersonaFaceEdgeTableName = os.environ['DB_SUB_PERSONA_EDGE_TABLE'] faceSubPersonaEdgeTableName = os.environ['DB_SUB_PERSONA_EDGE_TABLE'] rawImageTableName = os.environ['DB_RAW_IMAGE_TABLE'] faceTableName = os.environ['DB_REFINED_IMAGE_TABLE'] #Otherwise load db config else: keyspace = db_config['db-keyspace'] personaTableName = db_config['db-persona-table'] subPersonaTableName = db_config['db-sub-persona-table'] subPersonaFaceEdgeTableName = db_config[ 'db-sub-persona-face-edge-table'] faceSubPersonaEdgeTableName = db_config[ 'db-face-sub-persona-edge-table'] rawImageTableName = db_config['db-raw-image-table'] faceTableName = db_config['db-face-image-table'] #Prepare Cosmos DB session and insertion queries session = cluster.connect(keyspace) personaInsertQuery = session.prepare("INSERT INTO " + personaTableName + " (persona_name) VALUES (?)") subPersonaInsertQuery = session.prepare( "INSERT INTO " + subPersonaTableName + " (sub_persona_name, persona_name) VALUES (?, ?)") subPersonaFaceEdgeInsertQuery = session.prepare( "INSERT INTO " + subPersonaFaceEdgeTableName + " (sub_persona_name, assoc_face_id, label_v_predict_assoc_flag) VALUES (?,?,?)" ) faceSubPersonaEdgeInsertQuery = session.prepare( "INSERT INTO " + faceSubPersonaEdgeTableName + " (sub_persona_name, assoc_face_id, label_v_predict_assoc_flag) VALUES (?,?,?)" ) rawInsertQuery = session.prepare( "INSERT INTO " + rawImageTableName + " (image_id, file_uri, image_bytes) VALUES (?,?,?)") refinedInsertQuery = session.prepare( "INSERT INTO " + faceTableName + " (face_id, raw_image_edge_id, face_bytes, feature_bytes) VALUES (?,?,?,?)" ) client = EventHubClient(eh_url, debug=False, username=eh_account, password=eh_key) sender = client.add_sender(partition="0") client.run() face_write_count = 0 face_label_write_count = 0 face_unlabeled_write_count = 0 for (blob, image_bytes, face_bytes) in blob_image_faces: if face_bytes is not None: file_name = blob.name (entity, usage, number) = file_name.split('-') #Generate the face classifier features from the face using VGG-face on Keras if face_bytes.mode != "RGB": face_bytes = face_bytes.convert("RGB") face_bytes = face_bytes.resize((img_width, img_height)) image = img_to_array(face_bytes) image = np.expand_dims(image, axis=0) image = imagenet_utils.preprocess_input(image) stuff = vgg_face_feature_gen.predict(image, batch_size=1).flatten() writer = avro.io.DatumWriter(schema) bytes_writer = io.BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) writer.write({"features": stuff.tolist()}, encoder) face_feature_bytes = bytes_writer.getvalue() #For each image extract the label and use to generate a persona, redundant writes will cancel out #Writes the entity label to the persona table #For the time being also write the entity label to the subpersona table and associate with the persona table session.execute(personaInsertQuery, (entity, )) session.execute(subPersonaInsertQuery, (entity, entity)) logging.info( "Writing persona, sub-persona {0} to DB table {1}".format( entity, subPersonaTableName)) #Resizes the image to ensure the write query does not exceed maximum size width, height = image_bytes.size if width > height: transform_factor = 256 / width else: transform_factor = 256 / height compact_image_bytes = image_bytes.resize( (round(height * transform_factor), round(width * transform_factor))) #Writes the raw image to its table imgByteArr = io.BytesIO() compact_image_bytes.save(imgByteArr, format='PNG') compact_image_bytes = imgByteArr.getvalue() hashed_bytes = hashlib.md5(compact_image_bytes).digest() hashed_bytes_int = int.from_bytes( hashed_bytes, byteorder='big') #Good identifier, sadly un image_hash = str( hashed_bytes_int ) #Stupid workaround to Python high precision int incompatability session.execute(rawInsertQuery, (image_hash, file_name, compact_image_bytes)) logging.info("Writing raw image to DB {0}".format(image_hash)) #Resizes the image to ensure the write query does not exceed maximum size width, height = face_bytes.size if width > height: transform_factor = 256 / width else: transform_factor = 256 / height compact_face_bytes = face_bytes.resize( (round(height * transform_factor), round(width * transform_factor))) #Write each face extracted from the image to the DB as a refined image imgByteArr = io.BytesIO() compact_face_bytes.save(imgByteArr, format='PNG') compact_face_bytes = imgByteArr.getvalue() face_hash_bytes = hashlib.md5(compact_face_bytes).digest() face_hashed_bytes_int = int.from_bytes( face_hash_bytes, byteorder='big') #Good identifier, sadly un face_hash = str( face_hashed_bytes_int ) #Stupid workaround to Python high precision int incompatability session.execute(refinedInsertQuery, (face_hash, image_hash, compact_face_bytes, face_feature_bytes)) logging.info("Writing face image to DB {0}".format(face_hash)) face_write_count += 1 #If the data is part of the training set, write edges between the sub-personas, face images if usage == "Train": session.execute(subPersonaFaceEdgeInsertQuery, (entity, face_hash, True)) session.execute(faceSubPersonaEdgeInsertQuery, (entity, face_hash, True)) sender.send( EventData( json.dumps({ "EVENT_TYPE": "LABEL_WRITE", "LABEL_INDEX": face_hash, "WRITE_TIMESTAMP": datetime.datetime.now().timestamp() }))) logging.info("Writing face label to DB {0}".format(face_hash)) face_label_write_count += 1 else: #Silly engineering workaround to make it easier to find the unlabeled images later for re-prediction session.execute(subPersonaFaceEdgeInsertQuery, ("TEMPORARY", face_hash, False)) session.execute(faceSubPersonaEdgeInsertQuery, ("TEMPORARY", face_hash, False)) logging.info( "Writing unlabeled face {0} to DB".format(face_hash)) face_unlabeled_write_count += 1 #Otherwise do not write an edge, these will be predicted later by the training service #session.close() client.stop() logging.info("Wrote {0} faces to DB".format(face_write_count)) logging.info("Wrote {0} face labels to DB".format(face_label_write_count))