Example #1
0
class FileLayer(object):
    def __init__(self, storage_account_name="", storage_account_access_key=""):

        self.storage_account_name = storage_account_name
        self.storage_account_access_key = storage_account_access_key
        self.file_service = BlockBlobService(
            account_name=self.storage_account_name,
            account_key=self.storage_account_access_key)

    def download_blob(self,
                      blobpath="",
                      local_file_path="",
                      container="testing-environment"):
        """
        Downloads file in the Azure Blob to current local directory
        Args:
            blobpath: blob name oz azure
            local_file_path:File path on local system
            container: Azure Blob Container name

        Returns: Downloaded local file path

        """

        if len(local_file_path) == 0:
            local_path = "/" + blobpath
        else:
            local_path = local_file_path

        os.makedirs("/".join(local_path.split("/")[:-1]), exist_ok=True)
        self.file_service.get_blob_to_path(container_name=container,
                                           blob_name=blobpath,
                                           file_path=local_path)
        print("downloaded to = {}".format(local_path))
        return local_path

    def upload_to_blob(self,
                       local_file_path="",
                       blobpath="",
                       container="testing-environment"):
        """
        Uploads local file to the blob
        Args:
            blobpath: blob name oz azure
            local_file_path:File path on local system
            container: Azure Blob Container name

        Returns: Blob name on azure

        """
        if len(bloppath) == 0:
            blob_file = "/".join(local_file_path.split("/")[1:])
        else:
            blob_file = blobpath
        self.file_service.create_blob_from_path(container_name=container,
                                                blob_name=blob_file,
                                                file_path=local_file_path)
        return blob_file

    def read_pickle(self, path_on_blob, container="testing-environment"):
        """
        Method reads serialized object stored in azure containers
        Args:
            path_on_blob: pickle file Path on the blob
            container: Azure Blob Container name

        Returns:Serialized object

        """
        model_obj = pickle.loads(
            self.file_service.get_blob_to_bytes(
                container_name="testing-environment",
                blob_name=path_on_blob).content)
        return model_obj

    def read_config(self, path_on_blob="", container="testing-environment"):
        """
        Reads the json config file present on the blob
        Args:
            path_on_blob: Json file Path on the blob
            container: Azure Blob Container name

        Returns:

        """
        config_file = self.file_service.get_blob_to_text(
            container_name=container, blob_name=path_on_blob)
        return json.loads(config_file.content)

    def list_folders_in_blob_path(self,
                                  blob_path="",
                                  container="testing-environment"):
        """
        Lists the Azure blob folder contents
        Args:
            blob_path: Folder path pn the blob
            container: Azure Blob Container name

        Returns:

        """
        list_generator = self.file_service.list_blobs(container_name=container,
                                                      prefix=blob_path)
        folders_under_blob = list(set([pth.name for pth in list_generator]))
        return folders_under_blob

    def copy_blob_same_storage(self,
                               sourceblobpath="",
                               destinationblobpath="",
                               sourcecontainer="",
                               destinationcontainer=""):
        """
            This method copies blob across containers in same storage account.
            Args:
               sourceblobpath:source blob name
               destinationblobpath:destionation blob name
               sourcecontainer:source container name
               destinationcontainer:destination container
            """

        if len(destinationblobpath) == 0:
            destinationblobpath = sourceblobpath

        source_blob_url = self.file_service.make_blob_url(
            sourcecontainer, sourceblobpath)

        self.file_service.copy_blob(destinationcontainer, destinationblobpath,
                                    source_blob_url)

    def copy_blob_across_storage(self,
                                 sourceblobpath="",
                                 destinationblobpath="",
                                 sourcecontainer="",
                                 destinationcontainer="",
                                 destination_source_account_name="",
                                 destination_source_account_key=""):
        """
        This method copies blob across different storage accounts.

        Args:
            sourceblobpath:source blob name
            destinationblobpath:destionation blob name
            sourcecontainer:source container name
            destinationcontainer:destination container
            destination_source_account_name: storage account name for destination storage account
            destination_source_account_key:storage account key for destination storage account

        """

        destinationfileservice = BlockBlobService(
            account_name=destination_source_account_name,
            account_key=destination_source_account_key)
        local_path = self.download_blob(blobpath=sourceblobpath,
                                        container=sourcecontainer)

        if len(destinationblobpath) == 0:
            destinationblobpath = "/".join(local_path.split("/")[1:])

        destinationfileservice.create_blob_from_path(
            container_name=destinationcontainer,
            blob_name=destinationblobpath,
            file_path=local_path)
        os.remove(local_path)
Example #2
0
class AzureBlobFileSystem(object):
    def __init__(self,
                 account_name=None,
                 account_key=None,
                 sas_token=None,
                 connection_string=None,
                 **storage_options):
        account_name = account_name or os.environ.get(
            "AZURE_BLOB_ACCOUNT_NAME")
        account_key = account_key or os.environ.get("AZURE_BLOB_ACCOUNT_KEY")
        sas_token = sas_token or os.environ.get("AZURE_BLOB_SAS_TOKEN")
        connection_string = connection_string or os.environ.get(
            "AZURE_BLOB_CONNECTION_STRING")
        print(account_name, account_key)
        self.connection = BlockBlobService(
            account_name=account_name,
            account_key=account_key,
            sas_token=sas_token,
            connection_string=connection_string,
            protocol=storage_options.get("protocol") or "https",
            endpoint_suffix=storage_options.get("endpoint_suffix"),
            custom_domain=storage_options.get("custom_domain"))
        self.sep = "/"

    def ls(self, container, pattern=None):
        return list(
            set(
                filter(
                    lambda x: fnmatch.fnmatch(x, pattern) if pattern else x,
                    map(lambda x: x.name,
                        self.connection.list_blobs(container)))))

    def mkdir(self, container, dir_name):
        self.touch(container, "{dir_name}/".format(dir_name=dir_name))

    def rm(self, container, full_path):
        if self.connection.exists(container, full_path):
            path_delete_lease = None
            try:
                path_delete_lease = self.connection.acquire_blob_lease(
                    container, full_path)
                self.connection.delete_blob(container,
                                            full_path,
                                            lease_id=path_delete_lease)
            except:
                if path_delete_lease is not None:
                    self.connection.release_blob_lease(container, full_path,
                                                       path_delete_lease)
        else:
            raise IOError(
                "File '{file}' does not exist under container '{container}'".
                format(file=full_path, container=container))

    def touch(self, container, full_path):
        container_lease = None
        try:
            container_lease = self.connection.acquire_container_lease(
                container)
            self.connection.create_blob_from_text(container, full_path, "")
        finally:
            if container_lease is not None:
                self.connection.release_container_lease(
                    container, container_lease)
        return full_path

    def mv(self, container, src_path, dst_path):
        try:
            self.cp(container, src_path, dst_path)
            self.rm(container, src_path)
            return True
        except:
            self.rm(container, dst_path)
            return False

    def cp(self, container, full_src_path, full_dst_path):
        copy_container_lease = None
        try:
            copy_container_lease = self.connection.acquire_container_lease(
                container)
            self.connection.copy_blob(
                container, full_dst_path,
                self.connection.make_blob_url(container, full_src_path))
        finally:
            if copy_container_lease is not None:
                self.connection.release_container_lease(
                    container, copy_container_lease)

    def du(self, container):
        return {
            blob.name: blob.properties.content_length
            for blob in self.connection.list_blobs(container)
        }

    def last_modified(self, container, full_path):
        return self.connection.get_blob_properties(
            container, full_path).properties.last_modified

    def head(self, container, full_path, bytes_count):
        return self.connection.get_blob_to_bytes(container,
                                                 full_path,
                                                 start_range=0,
                                                 end_range=bytes_count -
                                                 1).content

    def tail(self, container, full_path, bytes_count):
        size = self.connection.get_blob_properties(
            container, full_path).properties.content_length
        return self.connection.get_blob_to_bytes(container,
                                                 full_path,
                                                 start_range=size -
                                                 bytes_count,
                                                 end_range=size - 1).content

    def exists(self, container, full_path):
        return self.connection.exists(container, full_path)
Example #3
0
def main(msg: func.QueueMessage) -> None:

    logging.info("Processing audio analysis queue...")

    stopwords = nltk.corpus.stopwords.words("portuguese")

    input_message = msg.get_body().decode('utf-8')

    logging.info(input_message)

    input_message = json.loads(input_message)

    logging.info("Processing file " + input_message["blob"] + "...")

    table_service = TableService(account_name=ACCOUNT_NAME,
                                 account_key=ACCOUNT_KEY)
    records = table_service.query_entities(
        TABLE_NAME_API_T2S,
        filter="PartitionKey eq 'recording' and RowKey eq '" +
        input_message["meeting-code"] + "' and RecognitionStatus eq 'Success'")

    if len(records.items) == 0:
        blob_service = BlockBlobService(account_name=ACCOUNT_NAME,
                                        account_key=ACCOUNT_KEY)
        blob_entry = blob_service.get_blob_to_bytes(CONTAINER_NAME,
                                                    input_message["blob"],
                                                    timeout=60)
        audio_bytes = blob_entry.content

        url_token_api = "https://"+AI_API_REGION + \
            ".api.cognitive.microsoft.com/sts/v1.0/issueToken"
        api_key = SPEECH2TEXT_API_KEY

        headers = {"Content-Length": "0", "Ocp-Apim-Subscription-Key": api_key}

        start_time = datetime.now()

        api_response = requests.post(url_token_api, headers=headers)
        access_token = str(api_response.content.decode('utf-8'))

        url_stt_api = "https://"+AI_API_REGION + \
            ".stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language=pt-BR"

        headers = {
            "Authorization": "Bearer {0}".format(access_token),
            "Content-Length": str(len(audio_bytes)),
            "Content-type": "audio/wav",
            "codec": "audio/pcm",
            "samplerate": "16000"
        }

        record = {}
        api_response = None
        res_json = None

        try:
            api_response = requests.post(url_stt_api,
                                         headers=headers,
                                         params=None,
                                         data=audio_bytes)

            end_time = datetime.now()
            api_time = end_time - start_time

            logging.info(api_response)

            res_json = json.loads(api_response.content.decode('utf-8'))

            record["RecognitionStatus"] = res_json["RecognitionStatus"]
            record["TextConverted"] = res_json["DisplayText"]
            record["ApiResponse"] = json.dumps(res_json)
            record["ApiTimeResponseSeconds"] = api_time.seconds

            logging.info("Speech to text processed.")

        except Exception as error:
            record["RecognitionStatus"] = "Request Fail"
            record["Exception"] = traceback.format_exc()

            logging.error(error)

        finally:
            record["PartitionKey"] = input_message["meeting-code"]
            record["RowKey"] = input_message["file-name"]
            table_service.insert_or_replace_entity(TABLE_NAME_API_T2S, record)

            logging.info("Result persisted.")

        logging.info("Result:" + str(res_json))

        if res_json is not None and "Message" in res_json:
            raise Exception(res_json["Message"])

        if res_json is not None and res_json["RecognitionStatus"] == "Success":
            logging.info("Decoded speech: " + str(res_json["DisplayText"]))

            records = table_service.query_entities(
                TABLE_NAME_TRACKING,
                filter="PartitionKey eq 'tracking-analysis' and RowKey eq '" +
                input_message["meeting-code"] + "'")
            texts_converted = []

            if len(records.items) > 0:
                record = records.items[0]
                if "TextConverted" in records.items[0]:
                    texts_converted = json.loads(record["TextConverted"])
                    text_converted = {
                        "file-name": input_message["file-name"],
                        "text": res_json["DisplayText"]
                    }

                    if text_converted not in texts_converted:
                        texts_converted.append(text_converted)

                    record["TextConverted"] = json.dumps(texts_converted)
                else:
                    text_converted = {
                        "file-name": input_message["file-name"],
                        "text": res_json["DisplayText"]
                    }
                    texts_converted.append(text_converted)

                    record["TextConverted"] = json.dumps(texts_converted)
            else:
                text_converted = {
                    "file-name": input_message["file-name"],
                    "text": res_json["DisplayText"]
                }
                texts_converted.append(text_converted)
                record = {
                    "PartitionKey": "tracking-analysis",
                    "RowKey": input_message["meeting-code"],
                    "TextConverted": json.dumps(texts_converted)
                }

            text_list = []

            for text_converted in texts_converted:
                text_list.append(text_converted["text"])

            logging.info("Text List: " + str(text_list))

            text_list = set(text_list)
            freq_dist = processar_palavra_chave(text_list)

            record["FreqDist"] = freq_dist

            table_service.insert_or_replace_entity(TABLE_NAME_TRACKING, record)

            logging.info("Message processed successfully:" +
                         str(res_json["DisplayText"]))

        else:
            print("Descartado por falha no reconhecimento de voz.")
            logging.info(
                "Item discarded. Bad quality or audio file corrupted.")
    else:
        logging.info("Item already processed.")