예제 #1
0
def store(container_name,img_path,key,connect_str,acc_name):

    # Create a container in azure storage if not created (comment the code if already created mannually)
    #----
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    # container_name = container_name + str(uuid.uuid4())
    container = ContainerClient.from_connection_string(connect_str, container_name)
    
    # Container check whether it already exists
    try:
        container_properties = container.get_container_properties()
        container_client = ContainerClient.from_connection_string(conn_str=connect_str, container_name=container_name)

    except Exception as e:
        # Create the container
        container_client = blob_service_client.create_container(container_name)
    
    #----

    # block_blob_service = BlockBlobService(account_name=acc_name,account_key=key)
    blob_name=os.path.basename(img_path)
    # block_blob_service.create_blob_from_path(container_client, blob_name, img_path)
    
    # Blob check if the blob already exists
    blob_client = container_client.get_blob_client(blob_name)
    if not (blob_client.exists()):
        with open(img_path, "rb") as data:
            blob_client.upload_blob(data, blob_type="BlockBlob")
예제 #2
0
def download_blobs_as_one_json(dateDir, outputDir):
    # generate json
    viaDict = []
    try:
        with open('../packages/aerialnet/aerialnet/data/AZURE_STORAGE'
                  ) as version_file:
            AZURE_STORAGE_CONNECTION_STRING = version_file.read()
            CONTAINER_NAME = "aihistory"

        container = ContainerClient.from_connection_string(
            AZURE_STORAGE_CONNECTION_STRING, container_name=CONTAINER_NAME)

        blob_list = container.list_blobs(name_starts_with=dateDir + '/')

        for idx, blob in enumerate(blob_list):
            print('Downloading blob #{}: {}\n'.format(idx + 1, blob.name),
                  flush=True)

            if '.json' in blob.name:
                blob_client = container.get_blob_client(blob.name)
                download_stream = blob_client.download_blob()
                jsonContent = json.loads(download_stream.readall())
                viaDict.append(jsonContent)

        print('Total blobs downloaded: {}'.format(idx + 1))
        with open(os.path.join(outputDir, 'viaJsonFile_ORIGINAL.json'),
                  'w') as f:
            json.dump(viaDict, f)
    except Exception as ex:
        print('Exception:')
        print(ex)
def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')
    
    # Get HTTP parameters from URL, setting defaults if empty
    folder = req.params.get('folder')

    # Lookup the folder and return a list of blobs
    container = ContainerClient.from_connection_string(conn_str=os.environ["AzureWebJobsStorage"], container_name="photos")
    blob_list = list(container.list_blobs(name_starts_with=folder))

    if len(blob_list) == 0:
        return func.HttpResponse(
            status_code=404
        )

    # Create our response obejct
    photo_response = {
        "files": []
    }

    # Iterate the list to populate it
    for blob in blob_list:
        photo_response["files"].append(blob.name)
        
    
    return func.HttpResponse(
        json.dumps(photo_response),
        status_code=200,
        mimetype="application/json"
    )
예제 #4
0
def azureUploaderFirst(self, photoURL, instagramURL, photoName, blobName):
    content = requests.get(loginJsonFile)
    data = json.loads(content.content)
    accountName = data["Login"]["Account"]
    accountKey = data["Login"]["Key"]

    # Makes our connection string to make it easier to upload
    connectStr = data["Login"]["connectionString"]

    # Assigns the name of the container to the blobName
    container = blobName
    # Creates a storage client to check if the storage is created or not
    storage = ContainerClient.from_connection_string(connectStr, container)

    # Creates a blob service so we can create our first storage container, and upload our blobs to our storage account.
    blobCreator = BlobServiceClient.from_connection_string(connectStr)

    try:
        createContainer("", photoURL, instagramURL, photoName, blobName,
                        container, blobCreator)
    except:
        if (storage.get_container_properties()):
            photoDownloader.pictureDownloader(self, photoURL, instagramURL,
                                              photoName, blobName, container,
                                              blobCreator)
    def download_all_models(self):
        try:
            connect_str = self.connect_str
            # Create the BlobServiceClient object which will be used to create a container client
            blob_service_client = BlobServiceClient.from_connection_string(
                connect_str)

            container_client = ContainerClient.from_connection_string(
                conn_str=connect_str, container_name='models')
            file_path = 'static/models/'
            if not os.path.exists(file_path):
                print("making model folder")
                os.makedirs(file_path)

            blob_list = container_client.list_blobs()
            for blob in blob_list:
                print(blob.name + '\n')

                bc = blob_service_client.get_blob_client(container='models',
                                                         blob=blob)
                filename = blob.name
                try:
                    with open(file_path + filename, 'wb') as file:
                        data = bc.download_blob()
                        file.write(data.readall())
                except IOError as e:
                    print(e)
        except Exception as ex:
            print('Exception:')
            print(ex)
예제 #6
0
def CopyImages():
    """Ingest the images to the data lake.
       """

    print('Transferring images to Data Lake.')

    container = ContainerClient.from_container_url(
        container_url="https://bus5wb.blob.core.windows.net/imagecollection",
        credential=
        "?st=2021-05-19T04%3A43%3A08Z&se=2022-05-20T04%3A43%3A00Z&sp=rl&sv=2018-03-28&sr=c&sig=yV19KD0EzGYQOecRpa2em6Fc6IRQ7%2FhowiAaO%2Bk70O4%3D"
    )
    container_client = ContainerClient.from_connection_string(
        connection_string, container_name=container_name_images)

    blobs_list = container.list_blobs()

    for blob in blobs_list:
        blob_client = container.get_blob_client(blob.name)
        nycImageUrl = blob_client.url

        # Generate filename for image metadata file.
        filePath = urlparse(nycImageUrl)[2]
        filePathParts = filePath.split('/')
        fileName = filePathParts[len(filePathParts) - 1]

        try:
            # Instantiate a new BlobClient
            blob_client = container_client.get_blob_client(fileName)
            # Upload the blob
            blob_client.start_copy_from_url(nycImageUrl)
        except Exception as e:
            print(f"Error {e} - {fileName} - {nycImageUrl}")
예제 #7
0
def download_file(save_path, cloud_file_name, container_name):

    blobnames = []
    try:
        with ContainerClient.from_connection_string(
                storageConnectionString, container_name) as container_client:

            # First check if the file actually exists
            blob_list = container_client.list_blobs()
            for blob in blob_list:
                if (blob.name == cloud_file_name):
                    blobnames.append(blob.name)
            # Download files to specified download folder
            if len(blobnames) == 1:
                for filename in blobnames:
                    with container_client.get_blob_client(
                            filename) as blob_client:
                        with open(os.path.join(save_path, filename),
                                  "wb") as file_path:
                            file_path.write(
                                blob_client.download_blob().readall())
                print("Downloaded file: " + str(cloud_file_name))
                return os.path.join(save_path, filename), True
            else:
                print("Could not find requested blob ", str(cloud_file_name),
                      " in the following list:")
                for blob in blob_list:
                    print(blob.name)
                return " ", False
    except Exception as ex:
        print('Azure Blob Storage Exception:')
        print(ex)
        return " ", False
예제 #8
0
def main(args) -> None:
    """Entry point.

    Args:
        args: CLI arguments.
    """

    cc = ContainerClient.from_connection_string(
        AZ_CONN_STR.format(key=args.key), AZ_CONTAINER)

    cache_files = [b.name for b in cc.list_blobs()]
    for cache_file in Path(args.input).iterdir():
        m = re.match(r"([a-z]+)-([a-f0-9]+)\.zip", cache_file.name)
        if not m:
            log.info(f"Skipping {cache_file} (not a cache file)")
            continue

        docset = m.group(1)
        if args.only and docset not in args.only:
            continue

        if not args.force and cache_file.name in cache_files:
            log.info(f"Skipping upload of {cache_file.name} (already exists)")
            continue

        with open(cache_file, "rb") as f:
            log.info(f"Uploading {cache_file.name}...")
            bc = cc.get_blob_client(cache_file.name)
            bc.upload_blob(f, overwrite=True)
예제 #9
0
def get_newest_file(container_name, substring):
    newest_filename = ""
    i = 0
    try:
        with ContainerClient.from_connection_string(
                storageConnectionString, container_name) as container_client:
            blob_list = container_client.list_blobs()
            for filename in blob_list:
                if substring in filename.name:
                    i += 1
                    try:
                        timestamp = datetime.datetime.strptime(
                            filename.name[len(substring):], '%Y-%m-%d_%H-%M')
                    except:
                        continue
                    if i == 1:
                        newest_timestamp = timestamp
                        newest_filename = filename.name
                    else:
                        if (newest_timestamp < timestamp):
                            newest_timestamp = timestamp
                            newest_filename = filename.name
    except Exception as ex:
        i = 0
        print(ex)

    return newest_filename, i > 0
예제 #10
0
 def _reinit_session(self):
     """
     Create a new session
     """
     if "AZURE_STORAGE_CONNECTION_STRING" in os.environ:
         logging.info("Authenticating to Azure with connection string")
         self.container_client = ContainerClient.from_connection_string(
             conn_str=os.getenv("AZURE_STORAGE_CONNECTION_STRING"),
             container_name=self.bucket_name,
         )
     else:
         if "AZURE_STORAGE_SAS_TOKEN" in os.environ:
             logging.info("Authenticating to Azure with SAS token")
             credential = os.getenv("AZURE_STORAGE_SAS_TOKEN")
         elif "AZURE_STORAGE_KEY" in os.environ:
             logging.info("Authenticating to Azure with shared key")
             credential = os.getenv("AZURE_STORAGE_KEY")
         else:
             logging.info(
                 "Authenticating to Azure with default credentials")
             # azure-identity is not part of azure-storage-blob so only import
             # it if needed
             try:
                 from azure.identity import DefaultAzureCredential
             except ImportError:
                 raise SystemExit(
                     "Missing required python module: azure-identity")
             credential = DefaultAzureCredential()
         self.container_client = ContainerClient(
             account_url=self.account_url,
             container_name=self.bucket_name,
             credential=credential,
         )
예제 #11
0
    def upload_json(self,rawdata,fname):

        CONNECT_STR = "DefaultEndpointsProtocol=https;AccountName=stosblobv2;AccountKey=4lcPBLS0bAypEaU1QFGd4QadH5WzvyL3vy3IS+gNhrij4I1dPaXcu9ATl+XdrctTQlH8/oG3qKpdy19FYg6WEg==;EndpointSuffix=core.windows.net"
        CONTAINER_NAME = "test"

        # Instantiate a ContainerClient. This is used when uploading a blob from your local file.
        container_client = ContainerClient.from_connection_string(
            conn_str=CONNECT_STR, 
            container_name=CONTAINER_NAME
        )
        data = rawdata
        output_blob_name = fname

        #This is an optional setting for guaranteeing the MIME type to be always json.
        content_setting = ContentSettings(
            content_type='application/json', 
            content_encoding=None, 
            content_language=None, 
            content_disposition=None, 
            cache_control=None, 
            content_md5=None
        )

        # Upload file

        container_client.upload_blob(
            name=output_blob_name, 
            data=data, 
            content_settings=content_setting)
                
        # Check the result
        all_blobs = container_client.list_blobs(name_starts_with="BLOB", include=None)
        for each in all_blobs:
            print("RES: ", each)    
 def __init__(self, connection_string: str, storage_name: str) -> None:
     self.__client = ContainerClient.from_connection_string(conn_str=connection_string, container_name=storage_name)
     self.__remote_files_cache: Optional[List[dict]] = None
     super().__init__(
         remote_root_dir=Path(""),
         local_root_dir=Path(DEFAULT_ROOT_DIR, storage_name),
     )
예제 #13
0
def list_blob_in_container(connection_s: str, container_n: str) -> list:
    """ list the blobs within a given container of an Azure storage account
    Helper function for debugging in case no access to azure

    Arguments:
        connection_s {str} -- an azure storage account connection string
        container_n {str} -- a container within a storage account

    Returns:
        blob_names_list -- the list of blobs within container
    """
    try:
        campaign_container = ContainerClient.from_connection_string(
            conn_str=connection_s, container_name=container_n)
        blob_list = campaign_container.list_blobs()
        blob_names_list = []
        for blob in blob_list:
            blob_names_list.append(blob.name)
        return blob_names_list
    except:
        logger.info(
            "The container you are trying to list blob from probably does not exist."
        )
        logger.info(
            "Early exit of ETL process as container probably does not exist.")
        exit()
예제 #14
0
    def __init__(self,
                 storage_account,
                 storage_container,
                 key,
                 protocol='https',
                 endpoint_suffix='core.windows.net'):
        self.AZURE_STORAGE_ACCOUNT = storage_account
        self.AZURE_STORAGE_CONTAINER = storage_container
        self.PROTOCOL = protocol
        self.ENDPOINT_SUFFIX = endpoint_suffix

        self.AZURE_STORAGE_KEY = key
        self.AZURE_STORAGE_CONNECTION_STRING = 'DefaultEndpointsProtocol={0};AccountName={1};AccountKey={2};EndpointSuffix={3}'.format(
            self.PROTOCOL, self.AZURE_STORAGE_ACCOUNT, self.AZURE_STORAGE_KEY,
            self.ENDPOINT_SUFFIX)

        self.container_url = _make_url(
            f'https://{self.AZURE_STORAGE_ACCOUNT}.blob.core.windows.net',
            self.AZURE_STORAGE_CONTAINER)

        self.container_client = ContainerClient.from_connection_string(
            self.AZURE_STORAGE_CONNECTION_STRING, self.AZURE_STORAGE_CONTAINER)

        self.blob_service_client = BlobServiceClient.from_connection_string(
            self.AZURE_STORAGE_CONNECTION_STRING)
        self.blob_client = None
예제 #15
0
    def test_cache_correctness(self):
        with self._setup_test() as az_info:
            for suffix in ('.jsonl.gz', '.msgpack.l.gz'):
                random_elements = list(range(100))
                remote_path = RichPath.create("azure://devstoreaccount1/test1/compressed/data" + suffix, az_info)
                remote_path.save_as_compressed_file(random_elements)

                # Read once
                read_nums = list(remote_path.read_by_file_suffix())
                self.assertListEqual(read_nums, random_elements)

                # Hit Cache
                read_nums = list(remote_path.read_by_file_suffix())
                self.assertListEqual(read_nums, random_elements)
                self.assertTrue(remote_path.exists())
                self.assertTrue(remote_path.is_file())

                # Update file through other means, and ensure that cache is appropriately invalidated.
                new_elements = list(range(500))
                with TemporaryDirectory() as tmp:
                    path = os.path.join(tmp, 'tst'+suffix)
                    if suffix == '.jsonl.gz':
                        save_jsonl_gz(new_elements, path)
                    else:
                        save_msgpack_l_gz(new_elements, path)
                    container_client = ContainerClient.from_connection_string(self.AZURITE_DEVELOPMENT_CONNECTION_STRING,
                                                                              "test1")
                    blob_client = container_client.get_blob_client("compressed/data" + suffix)
                    with open(path, 'rb') as f:
                        blob_client.upload_blob(f, overwrite=True)

                read_nums = list(remote_path.read_by_file_suffix())
                self.assertListEqual(read_nums, new_elements)
                self.assertTrue(remote_path.exists())
                self.assertTrue(remote_path.is_file())
예제 #16
0
 def _create_test_container(self):
     client: ContainerClient = ContainerClient.from_connection_string(
         self.AZURITE_DEVELOPMENT_CONNECTION_STRING, container_name="test1")
     try:
         client.create_container()
     except ResourceExistsError:
         pass
예제 #17
0
def ProcessAllImages():

    print('Processing Images.')

    container = ContainerClient.from_connection_string(
        connection_string, container_name=container_name_images)

    blobs_list = container.list_blobs()

    token = GetSASToken()

    for blob in blobs_list:
        blob_client = container.get_blob_client(blob.name)

        # Generate filename for image metadata file.
        fileName = GetFilePathFromImageURL(blob_client.url)
        nycImageUrl = f'{blob_client.url}?{token}'

        try:
            jsonImageMetadata = GetNYCImageMetadata(nycImageUrl)
            SaveImageMetadata(jsonImageMetadata, fileName)

            print(f'Completed processing {fileName}.')
        except Exception as e:
            print(f"Error {e} - {fileName}")
예제 #18
0
    def __init__(
        self,
        blob_storage_conn_str,
        container_base,
        container_processed,
        container_quarantined,
    ):
        """Creates a Blob Storage object

        ...

        Parameters
        -------
        blob_storage_conn_str: str
            Connection string with Blob Storage.
        container_base: str
            Container where files will be read.
        container_processed: str
            Container where files will be sent after being processed
            (Processed with sucess).
        container_quarantined: str
            Container that stores quarantine files (Processed with failure).

        """

        self.blob_storage_conn_str = blob_storage_conn_str
        self.container_base = container_base
        self.container_processed = container_processed
        self.container_quarantined = container_quarantined

        # init blob service & container connectivity
        # instantiate the blob storage class to perform operations on it
        self.blob_service_client = BlobServiceClient.from_connection_string(
            conn_str=self.blob_storage_conn_str)
        self.get_container_base_info = ContainerClient.from_connection_string(
            conn_str=self.blob_storage_conn_str,
            container_name=self.container_base)
        log.info(
            f"successfully established connection with container base: {self.container_base}"
        )
        print(
            f"successfully established connection with container base: {self.container_base}"
        )

        # get sku of the blob storage account
        account_info = self.get_container_base_info.get_account_information()
        log.info("storage sku: {}".format(account_info["sku_name"].lower()))

        # get stats of blob storage & container service info
        stats_blob_storage = self.blob_service_client.get_service_stats()
        log.info("blob storage replication status: {}".format(
            stats_blob_storage["geo_replication"]["status"]))
        log.info("last blob storage sync replication time: {}".format(
            self.utc_to_local(
                stats_blob_storage["geo_replication"]["last_sync_time"])))
        stats_container_base = self.get_container_base_info.get_container_properties(
        )
        log.info("last container modified time: {}".format(
            self.utc_to_local(stats_container_base.last_modified)))
예제 #19
0
 def create_container_client(self, account, container):
     if self.connection_string:
         client = ContainerClient.from_connection_string(
             self.connection_string, container)
     else:
         client = ContainerClient.from_container_url("https://{}/{}".format(
             account, container))
     return client
예제 #20
0
 def bucket_exists(self):
     container = ContainerClient.from_connection_string(self._account, self._bucket, connection_timeout=300)
     try:
         container.get_container_properties()
         log.debug(output_messages['DEBUG_CONTAINER_ALREADY_EXISTS'] % self._bucket, class_name=AZURE_STORAGE_NAME)
         return True
     except Exception:
         return False
예제 #21
0
def find_azure_storage_blob_file_names(conn_str, container_name, prefix=''):
    """
    Fetched all the files in the bucket which are returned in a list as 
    Google Blob objects
    """
    container = ContainerClient.from_connection_string(
        conn_str=conn_str, container_name=container_name)
    return list(container.list_blobs(prefix=prefix))
예제 #22
0
def getBlobUrl(imagename, connectionString):
    try:
        container_client = ContainerClient.from_connection_string(
            conn_str=connectionString, container_name="droneimages")
        blob_client = container_client.get_blob_client(imagename)
        return blob_client.url
    except:
        xlog('getBlobUrl: error:', sys.exc_info()[0])
 def _blob_service(self,
                   custom_domain=None,
                   connection_string=None) -> ContainerClient:
     # This won't open a connection or anything,
     # it's akin to a client
     return ContainerClient.from_connection_string(
         conn_str=connection_string,
         container_name=self.azure_container,
         custom_domain=custom_domain)
예제 #24
0
 def __init__(self, connection_string, **options):
     self.container_name = options.get('container_name',
                                       "defaultcontainerpython")
     self.connection_string = connection_string
     self.blob_service_client = BlobServiceClient.from_connection_string(
         connection_string)
     self.container_client = ContainerClient.from_connection_string(
         connection_string, self.container_name)
     self.local_dir_path = "./"
예제 #25
0
 def create_bucket(self, connection_string, container):
     client = BlobServiceClient.from_connection_string(connection_string)
     try:
         client.create_container(container)
         container = ContainerClient.from_connection_string(connection_string, container, connection_timeout=300)
         container.get_container_properties()
     except Exception:
         raise Exception('Can\'t create Azure container.')
     pass
예제 #26
0
def get_files_from_blob():
    connect_str = config['AZURE_STORAGE_CONNECTION_STRING']
    container_client = ContainerClient.from_connection_string(connect_str, container_name="data")

    # List the blobs in the container
    blob_list = container_client.list_blobs()
    for blob in blob_list:
        print("\t" + blob.name)
        print(blob)
예제 #27
0
 def from_connection_string(cls,
                            conn_str,
                            container_name,
                            credential=None,
                            **kwargs):
     # type: (str, str, Optional[Any], str) -> BlobCheckpointStore
     container_client = ContainerClient.from_connection_string(
         conn_str, container_name, credential=credential, **kwargs)
     return cls(None, None, container_client=container_client)
예제 #28
0
    def __init__(self, conn_str: str, container_name: str,
                 overwrite: bool = True):
        self.conn_str = conn_str
        self.container_name = container_name
        self.overwrite = overwrite

        self.container_client = ContainerClient.from_connection_string(
                    conn_str=conn_str,
                    container_name=container_name)
예제 #29
0
def az_fs():
    if should_skip(AZ_URI):
        yield [None] * NUM_ATR_FS
        return

    from azure.storage.blob import ContainerClient

    monkeypatch = pytest.MonkeyPatch()
    container_name = os.environ.get("AZ_TEST_CONTAINER")
    account = None
    client = None

    # This means we are running against emulator.
    if container_name is None:
        monkeypatch.setenv("TF_AZURE_USE_DEV_STORAGE", "1")
        container_name = f"tf-io-bucket-az-{int(time.time())}"
        account = "devstoreaccount1"
        conn_str = (
            "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
            "AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq"
            "/K1SZFPTOtr/KBHBeksoGMGw==;"
            "BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
        )
        client = ContainerClient.from_connection_string(conn_str, container_name)
        client.create_container()
    else:
        # TODO(vnvo2409): Implement for testing against production scenario
        pass

    client.upload_blob(ROOT_PREFIX, b"")

    def parse(path):
        res = urlparse(path, scheme=AZ_URI, allow_fragments=False)
        return res.path.split("/", 2)[2]

    def path_to(*args):
        return f"{AZ_URI}://{account}/{container_name}/{posixpath.join(ROOT_PREFIX, *args)}"

    def read(path):
        key_name = parse(path)
        return client.download_blob(key_name).content_as_bytes()

    def write(path, body):
        key_name = parse(path)
        client.upload_blob(key_name, body)

    def mkdirs(path):
        if path[-1] == "/":
            write(path, b"")

    yield path_to, read, write, mkdirs, posixpath.join, (
        client,
        container_name,
        account,
    )
    monkeypatch.undo()
예제 #30
0
def upload(files, connection_string, container_name):
    container_client = ContainerClient.from_connection_string(
        connection_string, container_name)
    print("อัพโหลดอยู่รอแปป...")

    for file in files:
        blob_client = container_client.get_blob_client(file.name)
        with open(file.path, "rb") as data:
            blob_client.upload_blob(data)
            print(f'{file.name} uploaded to blob storage')