def store(container_name,img_path,key,connect_str,acc_name): # Create a container in azure storage if not created (comment the code if already created mannually) #---- blob_service_client = BlobServiceClient.from_connection_string(connect_str) # container_name = container_name + str(uuid.uuid4()) container = ContainerClient.from_connection_string(connect_str, container_name) # Container check whether it already exists try: container_properties = container.get_container_properties() container_client = ContainerClient.from_connection_string(conn_str=connect_str, container_name=container_name) except Exception as e: # Create the container container_client = blob_service_client.create_container(container_name) #---- # block_blob_service = BlockBlobService(account_name=acc_name,account_key=key) blob_name=os.path.basename(img_path) # block_blob_service.create_blob_from_path(container_client, blob_name, img_path) # Blob check if the blob already exists blob_client = container_client.get_blob_client(blob_name) if not (blob_client.exists()): with open(img_path, "rb") as data: blob_client.upload_blob(data, blob_type="BlockBlob")
def download_blobs_as_one_json(dateDir, outputDir): # generate json viaDict = [] try: with open('../packages/aerialnet/aerialnet/data/AZURE_STORAGE' ) as version_file: AZURE_STORAGE_CONNECTION_STRING = version_file.read() CONTAINER_NAME = "aihistory" container = ContainerClient.from_connection_string( AZURE_STORAGE_CONNECTION_STRING, container_name=CONTAINER_NAME) blob_list = container.list_blobs(name_starts_with=dateDir + '/') for idx, blob in enumerate(blob_list): print('Downloading blob #{}: {}\n'.format(idx + 1, blob.name), flush=True) if '.json' in blob.name: blob_client = container.get_blob_client(blob.name) download_stream = blob_client.download_blob() jsonContent = json.loads(download_stream.readall()) viaDict.append(jsonContent) print('Total blobs downloaded: {}'.format(idx + 1)) with open(os.path.join(outputDir, 'viaJsonFile_ORIGINAL.json'), 'w') as f: json.dump(viaDict, f) except Exception as ex: print('Exception:') print(ex)
def main(req: func.HttpRequest) -> func.HttpResponse: logging.info('Python HTTP trigger function processed a request.') # Get HTTP parameters from URL, setting defaults if empty folder = req.params.get('folder') # Lookup the folder and return a list of blobs container = ContainerClient.from_connection_string(conn_str=os.environ["AzureWebJobsStorage"], container_name="photos") blob_list = list(container.list_blobs(name_starts_with=folder)) if len(blob_list) == 0: return func.HttpResponse( status_code=404 ) # Create our response obejct photo_response = { "files": [] } # Iterate the list to populate it for blob in blob_list: photo_response["files"].append(blob.name) return func.HttpResponse( json.dumps(photo_response), status_code=200, mimetype="application/json" )
def azureUploaderFirst(self, photoURL, instagramURL, photoName, blobName): content = requests.get(loginJsonFile) data = json.loads(content.content) accountName = data["Login"]["Account"] accountKey = data["Login"]["Key"] # Makes our connection string to make it easier to upload connectStr = data["Login"]["connectionString"] # Assigns the name of the container to the blobName container = blobName # Creates a storage client to check if the storage is created or not storage = ContainerClient.from_connection_string(connectStr, container) # Creates a blob service so we can create our first storage container, and upload our blobs to our storage account. blobCreator = BlobServiceClient.from_connection_string(connectStr) try: createContainer("", photoURL, instagramURL, photoName, blobName, container, blobCreator) except: if (storage.get_container_properties()): photoDownloader.pictureDownloader(self, photoURL, instagramURL, photoName, blobName, container, blobCreator)
def download_all_models(self): try: connect_str = self.connect_str # Create the BlobServiceClient object which will be used to create a container client blob_service_client = BlobServiceClient.from_connection_string( connect_str) container_client = ContainerClient.from_connection_string( conn_str=connect_str, container_name='models') file_path = 'static/models/' if not os.path.exists(file_path): print("making model folder") os.makedirs(file_path) blob_list = container_client.list_blobs() for blob in blob_list: print(blob.name + '\n') bc = blob_service_client.get_blob_client(container='models', blob=blob) filename = blob.name try: with open(file_path + filename, 'wb') as file: data = bc.download_blob() file.write(data.readall()) except IOError as e: print(e) except Exception as ex: print('Exception:') print(ex)
def CopyImages(): """Ingest the images to the data lake. """ print('Transferring images to Data Lake.') container = ContainerClient.from_container_url( container_url="https://bus5wb.blob.core.windows.net/imagecollection", credential= "?st=2021-05-19T04%3A43%3A08Z&se=2022-05-20T04%3A43%3A00Z&sp=rl&sv=2018-03-28&sr=c&sig=yV19KD0EzGYQOecRpa2em6Fc6IRQ7%2FhowiAaO%2Bk70O4%3D" ) container_client = ContainerClient.from_connection_string( connection_string, container_name=container_name_images) blobs_list = container.list_blobs() for blob in blobs_list: blob_client = container.get_blob_client(blob.name) nycImageUrl = blob_client.url # Generate filename for image metadata file. filePath = urlparse(nycImageUrl)[2] filePathParts = filePath.split('/') fileName = filePathParts[len(filePathParts) - 1] try: # Instantiate a new BlobClient blob_client = container_client.get_blob_client(fileName) # Upload the blob blob_client.start_copy_from_url(nycImageUrl) except Exception as e: print(f"Error {e} - {fileName} - {nycImageUrl}")
def download_file(save_path, cloud_file_name, container_name): blobnames = [] try: with ContainerClient.from_connection_string( storageConnectionString, container_name) as container_client: # First check if the file actually exists blob_list = container_client.list_blobs() for blob in blob_list: if (blob.name == cloud_file_name): blobnames.append(blob.name) # Download files to specified download folder if len(blobnames) == 1: for filename in blobnames: with container_client.get_blob_client( filename) as blob_client: with open(os.path.join(save_path, filename), "wb") as file_path: file_path.write( blob_client.download_blob().readall()) print("Downloaded file: " + str(cloud_file_name)) return os.path.join(save_path, filename), True else: print("Could not find requested blob ", str(cloud_file_name), " in the following list:") for blob in blob_list: print(blob.name) return " ", False except Exception as ex: print('Azure Blob Storage Exception:') print(ex) return " ", False
def main(args) -> None: """Entry point. Args: args: CLI arguments. """ cc = ContainerClient.from_connection_string( AZ_CONN_STR.format(key=args.key), AZ_CONTAINER) cache_files = [b.name for b in cc.list_blobs()] for cache_file in Path(args.input).iterdir(): m = re.match(r"([a-z]+)-([a-f0-9]+)\.zip", cache_file.name) if not m: log.info(f"Skipping {cache_file} (not a cache file)") continue docset = m.group(1) if args.only and docset not in args.only: continue if not args.force and cache_file.name in cache_files: log.info(f"Skipping upload of {cache_file.name} (already exists)") continue with open(cache_file, "rb") as f: log.info(f"Uploading {cache_file.name}...") bc = cc.get_blob_client(cache_file.name) bc.upload_blob(f, overwrite=True)
def get_newest_file(container_name, substring): newest_filename = "" i = 0 try: with ContainerClient.from_connection_string( storageConnectionString, container_name) as container_client: blob_list = container_client.list_blobs() for filename in blob_list: if substring in filename.name: i += 1 try: timestamp = datetime.datetime.strptime( filename.name[len(substring):], '%Y-%m-%d_%H-%M') except: continue if i == 1: newest_timestamp = timestamp newest_filename = filename.name else: if (newest_timestamp < timestamp): newest_timestamp = timestamp newest_filename = filename.name except Exception as ex: i = 0 print(ex) return newest_filename, i > 0
def _reinit_session(self): """ Create a new session """ if "AZURE_STORAGE_CONNECTION_STRING" in os.environ: logging.info("Authenticating to Azure with connection string") self.container_client = ContainerClient.from_connection_string( conn_str=os.getenv("AZURE_STORAGE_CONNECTION_STRING"), container_name=self.bucket_name, ) else: if "AZURE_STORAGE_SAS_TOKEN" in os.environ: logging.info("Authenticating to Azure with SAS token") credential = os.getenv("AZURE_STORAGE_SAS_TOKEN") elif "AZURE_STORAGE_KEY" in os.environ: logging.info("Authenticating to Azure with shared key") credential = os.getenv("AZURE_STORAGE_KEY") else: logging.info( "Authenticating to Azure with default credentials") # azure-identity is not part of azure-storage-blob so only import # it if needed try: from azure.identity import DefaultAzureCredential except ImportError: raise SystemExit( "Missing required python module: azure-identity") credential = DefaultAzureCredential() self.container_client = ContainerClient( account_url=self.account_url, container_name=self.bucket_name, credential=credential, )
def upload_json(self,rawdata,fname): CONNECT_STR = "DefaultEndpointsProtocol=https;AccountName=stosblobv2;AccountKey=4lcPBLS0bAypEaU1QFGd4QadH5WzvyL3vy3IS+gNhrij4I1dPaXcu9ATl+XdrctTQlH8/oG3qKpdy19FYg6WEg==;EndpointSuffix=core.windows.net" CONTAINER_NAME = "test" # Instantiate a ContainerClient. This is used when uploading a blob from your local file. container_client = ContainerClient.from_connection_string( conn_str=CONNECT_STR, container_name=CONTAINER_NAME ) data = rawdata output_blob_name = fname #This is an optional setting for guaranteeing the MIME type to be always json. content_setting = ContentSettings( content_type='application/json', content_encoding=None, content_language=None, content_disposition=None, cache_control=None, content_md5=None ) # Upload file container_client.upload_blob( name=output_blob_name, data=data, content_settings=content_setting) # Check the result all_blobs = container_client.list_blobs(name_starts_with="BLOB", include=None) for each in all_blobs: print("RES: ", each)
def __init__(self, connection_string: str, storage_name: str) -> None: self.__client = ContainerClient.from_connection_string(conn_str=connection_string, container_name=storage_name) self.__remote_files_cache: Optional[List[dict]] = None super().__init__( remote_root_dir=Path(""), local_root_dir=Path(DEFAULT_ROOT_DIR, storage_name), )
def list_blob_in_container(connection_s: str, container_n: str) -> list: """ list the blobs within a given container of an Azure storage account Helper function for debugging in case no access to azure Arguments: connection_s {str} -- an azure storage account connection string container_n {str} -- a container within a storage account Returns: blob_names_list -- the list of blobs within container """ try: campaign_container = ContainerClient.from_connection_string( conn_str=connection_s, container_name=container_n) blob_list = campaign_container.list_blobs() blob_names_list = [] for blob in blob_list: blob_names_list.append(blob.name) return blob_names_list except: logger.info( "The container you are trying to list blob from probably does not exist." ) logger.info( "Early exit of ETL process as container probably does not exist.") exit()
def __init__(self, storage_account, storage_container, key, protocol='https', endpoint_suffix='core.windows.net'): self.AZURE_STORAGE_ACCOUNT = storage_account self.AZURE_STORAGE_CONTAINER = storage_container self.PROTOCOL = protocol self.ENDPOINT_SUFFIX = endpoint_suffix self.AZURE_STORAGE_KEY = key self.AZURE_STORAGE_CONNECTION_STRING = 'DefaultEndpointsProtocol={0};AccountName={1};AccountKey={2};EndpointSuffix={3}'.format( self.PROTOCOL, self.AZURE_STORAGE_ACCOUNT, self.AZURE_STORAGE_KEY, self.ENDPOINT_SUFFIX) self.container_url = _make_url( f'https://{self.AZURE_STORAGE_ACCOUNT}.blob.core.windows.net', self.AZURE_STORAGE_CONTAINER) self.container_client = ContainerClient.from_connection_string( self.AZURE_STORAGE_CONNECTION_STRING, self.AZURE_STORAGE_CONTAINER) self.blob_service_client = BlobServiceClient.from_connection_string( self.AZURE_STORAGE_CONNECTION_STRING) self.blob_client = None
def test_cache_correctness(self): with self._setup_test() as az_info: for suffix in ('.jsonl.gz', '.msgpack.l.gz'): random_elements = list(range(100)) remote_path = RichPath.create("azure://devstoreaccount1/test1/compressed/data" + suffix, az_info) remote_path.save_as_compressed_file(random_elements) # Read once read_nums = list(remote_path.read_by_file_suffix()) self.assertListEqual(read_nums, random_elements) # Hit Cache read_nums = list(remote_path.read_by_file_suffix()) self.assertListEqual(read_nums, random_elements) self.assertTrue(remote_path.exists()) self.assertTrue(remote_path.is_file()) # Update file through other means, and ensure that cache is appropriately invalidated. new_elements = list(range(500)) with TemporaryDirectory() as tmp: path = os.path.join(tmp, 'tst'+suffix) if suffix == '.jsonl.gz': save_jsonl_gz(new_elements, path) else: save_msgpack_l_gz(new_elements, path) container_client = ContainerClient.from_connection_string(self.AZURITE_DEVELOPMENT_CONNECTION_STRING, "test1") blob_client = container_client.get_blob_client("compressed/data" + suffix) with open(path, 'rb') as f: blob_client.upload_blob(f, overwrite=True) read_nums = list(remote_path.read_by_file_suffix()) self.assertListEqual(read_nums, new_elements) self.assertTrue(remote_path.exists()) self.assertTrue(remote_path.is_file())
def _create_test_container(self): client: ContainerClient = ContainerClient.from_connection_string( self.AZURITE_DEVELOPMENT_CONNECTION_STRING, container_name="test1") try: client.create_container() except ResourceExistsError: pass
def ProcessAllImages(): print('Processing Images.') container = ContainerClient.from_connection_string( connection_string, container_name=container_name_images) blobs_list = container.list_blobs() token = GetSASToken() for blob in blobs_list: blob_client = container.get_blob_client(blob.name) # Generate filename for image metadata file. fileName = GetFilePathFromImageURL(blob_client.url) nycImageUrl = f'{blob_client.url}?{token}' try: jsonImageMetadata = GetNYCImageMetadata(nycImageUrl) SaveImageMetadata(jsonImageMetadata, fileName) print(f'Completed processing {fileName}.') except Exception as e: print(f"Error {e} - {fileName}")
def __init__( self, blob_storage_conn_str, container_base, container_processed, container_quarantined, ): """Creates a Blob Storage object ... Parameters ------- blob_storage_conn_str: str Connection string with Blob Storage. container_base: str Container where files will be read. container_processed: str Container where files will be sent after being processed (Processed with sucess). container_quarantined: str Container that stores quarantine files (Processed with failure). """ self.blob_storage_conn_str = blob_storage_conn_str self.container_base = container_base self.container_processed = container_processed self.container_quarantined = container_quarantined # init blob service & container connectivity # instantiate the blob storage class to perform operations on it self.blob_service_client = BlobServiceClient.from_connection_string( conn_str=self.blob_storage_conn_str) self.get_container_base_info = ContainerClient.from_connection_string( conn_str=self.blob_storage_conn_str, container_name=self.container_base) log.info( f"successfully established connection with container base: {self.container_base}" ) print( f"successfully established connection with container base: {self.container_base}" ) # get sku of the blob storage account account_info = self.get_container_base_info.get_account_information() log.info("storage sku: {}".format(account_info["sku_name"].lower())) # get stats of blob storage & container service info stats_blob_storage = self.blob_service_client.get_service_stats() log.info("blob storage replication status: {}".format( stats_blob_storage["geo_replication"]["status"])) log.info("last blob storage sync replication time: {}".format( self.utc_to_local( stats_blob_storage["geo_replication"]["last_sync_time"]))) stats_container_base = self.get_container_base_info.get_container_properties( ) log.info("last container modified time: {}".format( self.utc_to_local(stats_container_base.last_modified)))
def create_container_client(self, account, container): if self.connection_string: client = ContainerClient.from_connection_string( self.connection_string, container) else: client = ContainerClient.from_container_url("https://{}/{}".format( account, container)) return client
def bucket_exists(self): container = ContainerClient.from_connection_string(self._account, self._bucket, connection_timeout=300) try: container.get_container_properties() log.debug(output_messages['DEBUG_CONTAINER_ALREADY_EXISTS'] % self._bucket, class_name=AZURE_STORAGE_NAME) return True except Exception: return False
def find_azure_storage_blob_file_names(conn_str, container_name, prefix=''): """ Fetched all the files in the bucket which are returned in a list as Google Blob objects """ container = ContainerClient.from_connection_string( conn_str=conn_str, container_name=container_name) return list(container.list_blobs(prefix=prefix))
def getBlobUrl(imagename, connectionString): try: container_client = ContainerClient.from_connection_string( conn_str=connectionString, container_name="droneimages") blob_client = container_client.get_blob_client(imagename) return blob_client.url except: xlog('getBlobUrl: error:', sys.exc_info()[0])
def _blob_service(self, custom_domain=None, connection_string=None) -> ContainerClient: # This won't open a connection or anything, # it's akin to a client return ContainerClient.from_connection_string( conn_str=connection_string, container_name=self.azure_container, custom_domain=custom_domain)
def __init__(self, connection_string, **options): self.container_name = options.get('container_name', "defaultcontainerpython") self.connection_string = connection_string self.blob_service_client = BlobServiceClient.from_connection_string( connection_string) self.container_client = ContainerClient.from_connection_string( connection_string, self.container_name) self.local_dir_path = "./"
def create_bucket(self, connection_string, container): client = BlobServiceClient.from_connection_string(connection_string) try: client.create_container(container) container = ContainerClient.from_connection_string(connection_string, container, connection_timeout=300) container.get_container_properties() except Exception: raise Exception('Can\'t create Azure container.') pass
def get_files_from_blob(): connect_str = config['AZURE_STORAGE_CONNECTION_STRING'] container_client = ContainerClient.from_connection_string(connect_str, container_name="data") # List the blobs in the container blob_list = container_client.list_blobs() for blob in blob_list: print("\t" + blob.name) print(blob)
def from_connection_string(cls, conn_str, container_name, credential=None, **kwargs): # type: (str, str, Optional[Any], str) -> BlobCheckpointStore container_client = ContainerClient.from_connection_string( conn_str, container_name, credential=credential, **kwargs) return cls(None, None, container_client=container_client)
def __init__(self, conn_str: str, container_name: str, overwrite: bool = True): self.conn_str = conn_str self.container_name = container_name self.overwrite = overwrite self.container_client = ContainerClient.from_connection_string( conn_str=conn_str, container_name=container_name)
def az_fs(): if should_skip(AZ_URI): yield [None] * NUM_ATR_FS return from azure.storage.blob import ContainerClient monkeypatch = pytest.MonkeyPatch() container_name = os.environ.get("AZ_TEST_CONTAINER") account = None client = None # This means we are running against emulator. if container_name is None: monkeypatch.setenv("TF_AZURE_USE_DEV_STORAGE", "1") container_name = f"tf-io-bucket-az-{int(time.time())}" account = "devstoreaccount1" conn_str = ( "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" "AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq" "/K1SZFPTOtr/KBHBeksoGMGw==;" "BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" ) client = ContainerClient.from_connection_string(conn_str, container_name) client.create_container() else: # TODO(vnvo2409): Implement for testing against production scenario pass client.upload_blob(ROOT_PREFIX, b"") def parse(path): res = urlparse(path, scheme=AZ_URI, allow_fragments=False) return res.path.split("/", 2)[2] def path_to(*args): return f"{AZ_URI}://{account}/{container_name}/{posixpath.join(ROOT_PREFIX, *args)}" def read(path): key_name = parse(path) return client.download_blob(key_name).content_as_bytes() def write(path, body): key_name = parse(path) client.upload_blob(key_name, body) def mkdirs(path): if path[-1] == "/": write(path, b"") yield path_to, read, write, mkdirs, posixpath.join, ( client, container_name, account, ) monkeypatch.undo()
def upload(files, connection_string, container_name): container_client = ContainerClient.from_connection_string( connection_string, container_name) print("อัพโหลดอยู่รอแปป...") for file in files: blob_client = container_client.get_blob_client(file.name) with open(file.path, "rb") as data: blob_client.upload_blob(data) print(f'{file.name} uploaded to blob storage')