def file_storage_connect(): global file_service global file_storage_dir global file_storage_share global overwrite_remote_files file_storage_url = dbparameters['fs_server'].strip() file_storage_user = dbparameters['fs_username'].strip() file_storage_pwd = dbparameters['fs_password'].strip() file_storage_share = dbparameters['fs_share'].strip() file_storage_dir = dbparameters['fs_directory_prefix'].strip() overwrite_remote_files = dbparameters['overwrite_remote_files'].strip() file_service = FileService(account_name=file_storage_user, account_key=file_storage_pwd) try: if file_service.exists(file_storage_share): print( 'Connection to Azure file storage successfully established...') if len(file_storage_dir) > 0 and not file_service.exists( file_storage_share, directory_name=file_storage_dir): subdirs = file_storage_dir.split('/') subdirfull = "" for subdir in subdirs: subdirfull += subdir file_service.create_directory(file_storage_share, subdirfull) subdirfull += "/" print('Created directory:' + file_storage_dir) else: print( 'Filaed to connect to Asure file storage, share does not exist: ' + file_storage_share) except Exception as ex: print('Error connecting to Azure file storage: ', ex)
def create_azure_fileshare(share_prefix, account_name, account_key): """ Generate a unique share name to avoid overlaps in shared infra :param share_prefix: :param account_name: :param account_key: :return: """ # FIXME - Need to remove hardcoded directoty link below d_dir = './WebInDeploy/bootstrap' share_name = "{0}-{1}".format(share_prefix.lower(), str(uuid.uuid4())) print('using share_name of: {}'.format(share_name)) # archive_file_path = _create_archive_directory(files, share_prefix) try: # ignore SSL warnings - bad form, but SSL Decrypt causes issues with this s = requests.Session() s.verify = False file_service = FileService(account_name=account_name, account_key=account_key, request_session=s) # print(file_service) if not file_service.exists(share_name): file_service.create_share(share_name) for d in ['config', 'content', 'software', 'license']: print('creating directory of type: {}'.format(d)) if not file_service.exists(share_name, directory_name=d): file_service.create_directory(share_name, d) # FIXME - We only handle bootstrap files. May need to handle other dirs if d == 'config': for filename in os.listdir(d_dir): print('creating file: {0}'.format(filename)) file_service.create_file_from_path( share_name, d, filename, os.path.join(d_dir, filename)) except AttributeError as ae: # this can be returned on bad auth information print(ae) return "Authentication or other error creating bootstrap file_share in Azure" except AzureException as ahe: print(ahe) return str(ahe) except ValueError as ve: print(ve) return str(ve) print('all done') return share_name
def _get_files_from_afs(cli_ctx, afs, path, expiry): """Returns a list of files and directories under given path on mounted Azure File share. :param models.AzureFileShareReference afs: Azure file share reference. :param str path: path to list files from. :param int expiry: SAS expiration time in minutes. """ from azure.storage.file import FileService from azure.storage.file.models import File, FilePermissions result = [] service = FileService(afs.account_name, _get_storage_account_key(cli_ctx, afs.account_name, None)) share_name = afs.azure_file_url.split('/')[-1] effective_path = _get_path_for_storage(path) if not service.exists(share_name, effective_path): return result for f in service.list_directories_and_files(share_name, effective_path): if isinstance(f, File): sas = service.generate_file_shared_access_signature( share_name, effective_path, f.name, permission=FilePermissions(read=True), expiry=datetime.datetime.utcnow() + datetime.timedelta(minutes=expiry)) result.append( LogFile( f.name, service.make_file_url(share_name, effective_path, f.name, 'https', sas), False, f.properties.content_length)) else: result.append(LogFile(f.name, None, True, None)) return result
def getLatestModel(customer, modelName, storage_account_name, storage_account_key): fileService = FileService(account_name=storage_account_name, account_key=storage_account_key) if fileService.exists('trainedmodels', customer): modelTimestampArr = [] files = fileService.list_directories_and_files('trainedmodels', customer + '/' + modelName, prefix=modelName) for file in files: date = file.name.split('.')[0].split('_')[1] modelTimestampArr.append(date) latestModelFileName = modelName + '_' + max(modelTimestampArr) + '.pkl' print(latestModelFileName) file = fileService.get_file_to_bytes('trainedmodels', customer + '/' + modelName, latestModelFileName) model = pickle.loads(file.content)['model'] return model else: print('Customer or model not found.')
def run(job, **kwargs): resource = kwargs.get('resource') create_custom_fields_as_needed() storage_account = '{{ storage_account }}' file = "{{ file }}" azure_storage_file_share_name = '{{ azure_storage_file_share_name }}' file_name = Path(file).name if file.startswith(settings.MEDIA_URL): set_progress("Converting relative URL to filesystem path") file = file.replace(settings.MEDIA_URL, settings.MEDIA_ROOT) account_key = Resource.objects.filter(name__icontains='{{ storage_account }}')[0].azure_account_key fallback_account_key = Resource.objects.filter(name__icontains="{{ storage_account }}")[0].azure_account_key_fallback set_progress("Connecting To Azure...") file_service = FileService(account_name=storage_account, account_key=account_key) set_progress('Creating a file share...') file_service.create_share(share_name=azure_storage_file_share_name, quota=1) set_progress('Creating a file...') if file_service.exists(share_name=azure_storage_file_share_name, file_name=file_name, directory_name=''): file_service.create_file_from_path(share_name=azure_storage_file_share_name, file_name=file_name, directory_name='', local_file_path=file) return "WARNING", "File with this name already exists", "The file will be updated." else: file_service.create_file_from_path(share_name=azure_storage_file_share_name, file_name=file_name, directory_name='', local_file_path=file) resource.name = azure_storage_file_share_name + '-' + file_name resource.azure_storage_account_name = storage_account resource.azure_account_key = account_key resource.azure_account_key_fallback = fallback_account_key resource.azure_storage_file_share_name = azure_storage_file_share_name resource.azure_storage_file_name = file_name resource.save() return "Success", "The File has succesfully been uploaded", ""
def create_azure_fileshare(files, share_prefix, account_name, account_key): # generate a unique share name to avoid overlaps in shared infra share_name = "{0}-{1}".format(share_prefix.lower(), str(uuid.uuid4())) print('using share_name of: {}'.format(share_name)) archive_file_path = _create_archive_directory(files, share_prefix) try: # ignore SSL warnings - bad form, but SSL Decrypt causes issues with this s = requests.Session() s.verify = False file_service = FileService(account_name=account_name, account_key=account_key, request_session=s) # print(file_service) if not file_service.exists(share_name): file_service.create_share(share_name) for d in ['config', 'content', 'software', 'license']: print('creating directory of type: {}'.format(d)) if not file_service.exists(share_name, directory_name=d): file_service.create_directory(share_name, d) d_dir = os.path.join(archive_file_path, d) for filename in os.listdir(d_dir): print('creating file: {0}'.format(filename)) file_service.create_file_from_path( share_name, d, filename, os.path.join(d_dir, filename)) except AttributeError as ae: # this can be returned on bad auth information print(ae) return "Authentication or other error creating bootstrap file_share in Azure" except AzureException as ahe: print(ahe) return str(ahe) except ValueError as ve: print(ve) return str(ve) print('all done') return 'Azure file-share {} created successfully'.format(share_name)
def saveModel(customer, modelName, model, storage_account_name, storage_account_key): fileService = FileService(account_name=storage_account_name, account_key=storage_account_key) if not fileService.exists('trainedmodels', customer): fileService.create_share('trainedmodels') fileService.create_directory('trainedmodels', customer) if not fileService.exists('trainedmodels', customer + '/' + modelName): fileService.create_directory('trainedmodels', customer + '/' + modelName) modelPickle = pickle.dumps(model) timestr = time.strftime('%Y%m%d-%H%M%S') fileName = modelName + '_' + timestr + '.pkl' fileService.create_file_from_bytes('trainedmodels', customer + '/' + modelName, fileName, modelPickle) print(fileName + ' saved.')
def file_storage_connect(): file_service = FileService(account_name=file_storage_user, account_key=file_storage_pwd, socket_timeout=15) try: if file_service.exists(file_storage_share): print('Connection to Azure file storage successfully established...') else: print('Filaed to connect to Asure file storage, share does not exist: ' + file_storage_share) except Exception as ex: print('Error connecting to Azure file storage: ', ex) return file_service
def upload_scripts(config, job_name, filenames): service = FileService(config.storage_account['name'], config.storage_account['key']) if not service.exists(config.fileshare_name, directory_name=job_name): service.create_directory(config.fileshare_name, job_name, fail_on_exist=False) trasfer_file = lambda fname: service.create_file_from_path( config.fileshare_name, job_name, os.path.basename(fname), fname) for filename in filenames: trasfer_file(filename)
def file(): static_dir_path = "D:\home\site\wwwroot\static" static_file_dir_path = static_dir_path + '\\' + 'files' account_name = 'hanastragetest' account_key = 'acount_key' root_share_name = 'root' share_name = 'images' directory_url = 'https://hanastragetest.file.core.windows.net/' + root_share_name + '/' + share_name # create local save directory if os.path.exist(static_file_dir_path) is False: os.mkdir(static_file_dir_path) file_service = FileService(account_name=account_name, account_key=account_key) # create share file_service.create_share(root_share_name) # create directory file_service.create_directory(root_share_name, share_name) files = os.listdir(static_dir_path) for file in files: # delete if file_service.exists(root_share_name, share_name, file): file_service.delete_file(root_share_name, share_name, file) # file upload file_service.create_file_from_path( root_share_name, share_name, # We want to create this blob in the root directory, so we specify None for the directory_name file, static_dir_path + '\\' + file, content_settings=ContentSettings(content_type='image/png')) generator = file_service.list_directories_and_files( root_share_name, share_name) html = "" for file in generator: # file download file_save_path = static_file_dir_path + '\\' + file file_service.get_file_to_path(root_share_name, share_name, file, file_save_path) html = "{}<img src='{}'>".format(html, file_save_path) result = { "result": True, "data": { "file_or_dir_name": [file_or_dir.name for file_or_dir in generator] } } return make_response(json.dumps(result, ensure_ascii=False) + html)
def clean(shared_directory, config_path, remove_directory): config_path = os.path.expanduser(config_path) with open(config_path) as f: config = json.load(f) from azure.storage.file import FileService service = FileService(account_name=config['account_name'], account_key=config['account_key']) if service.exists(config['share_name'], shared_directory): for file in service.list_directories_and_files(config['share_name'], shared_directory): service.delete_file(config['share_name'], shared_directory, file.name) if remove_directory: service.delete_directory(config['share_name'], shared_directory)
def downloadStoredFiles(config, accountKey, sourceDir, targetDir): fs = FileService(account_name=config['STORAGE_ACCOUNT'], account_key=accountKey) storageLoc = config['STORAGE_LOCATION'] if not path.exists(targetDir): makedirs(targetDir) print( f'\nFileService: reading files from Azure Storage location="{storageLoc}" directory="{sourceDir}"' ) if not fs.exists(storageLoc, sourceDir): return dirsFiles = fs.list_directories_and_files(storageLoc, sourceDir) fileNames = [ df.name for df in dirsFiles if df.name.endswith('.txt') or df.name.endswith('.csv') ] for fname in fileNames: if path.exists(path.join(targetDir, fname)): print(f'already got file={fname}') else: print(f'downloading file={fname}') fs.get_file_to_path(storageLoc, sourceDir, fname, path.join(targetDir, fname))
def test_job_level_mounting(self, resource_group, location, cluster, storage_account, storage_account_key): """Tests if it's possible to mount external file systems for a job.""" job_name = 'job' # Create file share and container to mount on the job level if storage_account.name != helpers.FAKE_STORAGE.name: files = FileService(storage_account.name, storage_account_key) files.create_share('jobshare', fail_on_exist=False) blobs = BlockBlobService(storage_account.name, storage_account_key) blobs.create_container('jobcontainer', fail_on_exist=False) job = self.client.jobs.create( resource_group.name, job_name, parameters=models.JobCreateParameters( location=location, cluster=models.ResourceId(id=cluster.id), node_count=1, mount_volumes=models.MountVolumes( azure_file_shares=[ models.AzureFileShareReference( account_name=storage_account.name, azure_file_url='https://{0}.file.core.windows.net/{1}'.format( storage_account.name, 'jobshare'), relative_mount_path='job_afs', credentials=models.AzureStorageCredentialsInfo( account_key=storage_account_key ), ) ], azure_blob_file_systems=[ models.AzureBlobFileSystemReference( account_name=storage_account.name, container_name='jobcontainer', relative_mount_path='job_bfs', credentials=models.AzureStorageCredentialsInfo( account_key=storage_account_key ), ) ] ), # Put standard output on cluster level AFS to check that the job has access to it. std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(helpers.AZURE_FILES_MOUNTING_PATH), # Create two output directories on job level AFS and blobfuse. output_directories=[ models.OutputDirectory(id='OUTPUT1', path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_afs'), models.OutputDirectory(id='OUTPUT2', path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_bfs') ], # Check that the job preparation has access to job level file systems. job_preparation=models.JobPreparation( command_line='echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/prep_afs.txt; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/prep_bfs.txt; ' 'echo done' ), # Check that the job has access to job custom_toolkit_settings=models.CustomToolkitSettings( command_line='echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/job_afs.txt; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/job_bfs.txt; ' 'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT1/afs; ' 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/afs/job_afs.txt; ' 'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs/job_bfs.txt; ' 'echo done' ) ) ).result() self.assertEqual( helpers.wait_for_job_completion(self.is_live, self.client, resource_group.name, job.name, helpers.MINUTE), models.ExecutionState.succeeded) job = self.client.jobs.get(resource_group.name, job.name) # Assert job and job prep standard output is populated on cluster level filesystem helpers.assert_job_files_are(self, self.client, resource_group.name, job.name, helpers.STANDARD_OUTPUT_DIRECTORY_ID, {u'stdout.txt': u'done\n', u'stderr.txt': u'', u'stdout-job_prep.txt': u'done\n', u'stderr-job_prep.txt': u''}) # Assert files are generated on job level AFS helpers.assert_job_files_are(self, self.client, resource_group.name, job.name, 'OUTPUT1', {u'job_afs.txt': u'afs\n', u'prep_afs.txt': u'afs\n', u'afs': None}) # Assert files are generated on job level blobfuse helpers.assert_job_files_are(self, self.client, resource_group.name, job.name, 'OUTPUT2', {u'job_bfs.txt': u'bfs\n', u'prep_bfs.txt': u'bfs\n', u'bfs': None}) # Assert subfolders are available via API helpers.assert_job_files_in_path_are(self, self.client, resource_group.name, job.name, 'OUTPUT1', 'afs', {u'job_afs.txt': u'afs\n'}) helpers.assert_job_files_in_path_are(self, self.client, resource_group.name, job.name, 'OUTPUT2', 'bfs', {u'job_bfs.txt': u'bfs\n'}) # Assert that we can access the output files created on job level mount volumes directly in storage using path # segment returned by the server. if storage_account.name != helpers.FAKE_STORAGE.name: files = FileService(storage_account.name, storage_account_key) self.assertTrue( files.exists('jobshare', job.job_output_directory_path_segment + '/' + helpers.OUTPUT_DIRECTORIES_FOLDER_NAME, 'job_afs.txt')) blobs = BlockBlobService(storage_account.name, storage_account_key) self.assertTrue( blobs.exists('jobcontainer', job.job_output_directory_path_segment + '/' + helpers.OUTPUT_DIRECTORIES_FOLDER_NAME + '/job_bfs.txt')) # After the job is done the filesystems should be unmounted automatically, check this by submitting a new job. checker = self.client.jobs.create( resource_group.name, 'checker', parameters=models.JobCreateParameters( location=location, cluster=models.ResourceId(id=cluster.id), node_count=1, std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(helpers.AZURE_FILES_MOUNTING_PATH), custom_toolkit_settings=models.CustomToolkitSettings( command_line='echo job; df | grep -E "job_bfs|job_afs"' ) ) ).result() # Check the job failed because there are not job level mount volumes anymore self.assertEqual( helpers.wait_for_job_completion(self.is_live, self.client, resource_group.name, checker.name, helpers.MINUTE), models.ExecutionState.failed) # Check that the cluster level AFS was still mounted helpers.assert_job_files_are(self, self.client, resource_group.name, checker.name, helpers.STANDARD_OUTPUT_DIRECTORY_ID, {u'stdout.txt': u'job\n', u'stderr.txt': u''})
def test_job_level_mounting(self, resource_group, location, cluster, storage_account, storage_account_key): """Tests if it's possible to mount external file systems for a job.""" job_name = 'job' # Create file share and container to mount on the job level if storage_account.name != FAKE_STORAGE.name: files = FileService(storage_account.name, storage_account_key) files.create_share('jobshare', fail_on_exist=False) blobs = BlockBlobService(storage_account.name, storage_account_key) blobs.create_container('jobcontainer', fail_on_exist=False) job = self.client.jobs.create( resource_group.name, job_name, parameters=models.JobCreateParameters( location=location, cluster=models.ResourceId(id=cluster.id), node_count=1, mount_volumes=models. MountVolumes(azure_file_shares=[ models.AzureFileShareReference( account_name=storage_account.name, azure_file_url='https://{0}.file.core.windows.net/{1}'. format(storage_account.name, 'jobshare'), relative_mount_path='job_afs', credentials=models.AzureStorageCredentialsInfo( account_key=storage_account_key), ) ], azure_blob_file_systems=[ models.AzureBlobFileSystemReference( account_name=storage_account.name, container_name='jobcontainer', relative_mount_path='job_bfs', credentials=models. AzureStorageCredentialsInfo( account_key=storage_account_key), ) ]), # Put standard output on cluster level AFS to check that the job has access to it. std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format( AZURE_FILES_MOUNTING_PATH), # Create two output directories on job level AFS and blobfuse. output_directories=[ models.OutputDirectory( id='OUTPUT1', path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_afs'), models.OutputDirectory( id='OUTPUT2', path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_bfs') ], # Check that the job preparation has access to job level file systems. job_preparation=models.JobPreparation( command_line= 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/prep_afs.txt; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/prep_bfs.txt; ' 'echo done'), # Check that the job has access to job custom_toolkit_settings=models.CustomToolkitSettings( command_line= 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/job_afs.txt; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/job_bfs.txt; ' 'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT1/afs; ' 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/afs/job_afs.txt; ' 'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs/job_bfs.txt; ' 'echo done'))).result() self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, job.name, MINUTE), models.ExecutionState.succeeded) job = self.client.jobs.get(resource_group.name, job.name) # Assert job and job prep standard output is populated on cluster level filesystem assert_job_files_are( self, self.client, resource_group.name, job.name, STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'done\n', u'stderr.txt': u'', u'stdout-job_prep.txt': u'done\n', u'stderr-job_prep.txt': u'' }) # Assert files are generated on job level AFS assert_job_files_are(self, self.client, resource_group.name, job.name, 'OUTPUT1', { u'job_afs.txt': u'afs\n', u'prep_afs.txt': u'afs\n', u'afs': None }) # Assert files are generated on job level blobfuse assert_job_files_are(self, self.client, resource_group.name, job.name, 'OUTPUT2', { u'job_bfs.txt': u'bfs\n', u'prep_bfs.txt': u'bfs\n', u'bfs': None }) # Assert subfolders are available via API assert_job_files_in_path_are(self, self.client, resource_group.name, job.name, 'OUTPUT1', 'afs', {u'job_afs.txt': u'afs\n'}) assert_job_files_in_path_are(self, self.client, resource_group.name, job.name, 'OUTPUT2', 'bfs', {u'job_bfs.txt': u'bfs\n'}) # Assert that we can access the output files created on job level mount volumes directly in storage using path # segment returned by the server. if storage_account.name != FAKE_STORAGE.name: files = FileService(storage_account.name, storage_account_key) self.assertTrue( files.exists( 'jobshare', job.job_output_directory_path_segment + '/' + OUTPUT_DIRECTORIES_FOLDER_NAME, 'job_afs.txt')) blobs = BlockBlobService(storage_account.name, storage_account_key) self.assertTrue( blobs.exists( 'jobcontainer', job.job_output_directory_path_segment + '/' + OUTPUT_DIRECTORIES_FOLDER_NAME + '/job_bfs.txt')) # After the job is done the filesystems should be unmounted automatically, check this by submitting a new job. checker = self.client.jobs.create( resource_group.name, 'checker', parameters=models.JobCreateParameters( location=location, cluster=models.ResourceId(id=cluster.id), node_count=1, std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format( AZURE_FILES_MOUNTING_PATH), custom_toolkit_settings=models.CustomToolkitSettings( command_line='echo job; df | grep -E "job_bfs|job_afs"')) ).result() # Check the job failed because there are not job level mount volumes anymore self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, checker.name, MINUTE), models.ExecutionState.failed) # Check that the cluster level AFS was still mounted assert_job_files_are(self, self.client, resource_group.name, checker.name, STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'job\n', u'stderr.txt': u'' })
# PG DUMP try: COMMANDS = ['pg_dump', '-F', 'c', '-b', '-v', '-f', './%s' % FILENAME] print("Running: '%s'" % (' '.join(COMMANDS))) exit_code = subprocess.call(COMMANDS) if exit_code is 1: raise Exception('Could not Backup, please check logs') # AZURE CONNECTION file_service = FileService(endpoint_suffix=AZURE_ENDPOINT_SUFFIX, account_name=AZURE_ACCOUNT_NAME, account_key=AZURE_ACCOUNT_KEY) # Check if AZURE_BACKUP_FOLDER exists, if not create it if not file_service.exists(AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER): file_service.create_directory(AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER) # Upload print("uploading to: '%s/%s/%s'" % (AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER, FILENAME)) file_service.create_file_from_path(AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER, FILENAME, FILENAME, progress_callback=upload_callback) # Cleaning Backup Files backup_files = file_service.list_directories_and_files( AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER) filenames = []
FileService(account_name=AZURE_CPT, account_key=AZURE_KEY) file_service = FileService(account_name=AZURE_CPT, account_key=AZURE_KEY) print("Autorisation d'accès au compte Microsoft AZURE OK") logging.debug("Autorisation d'accès au compte Microsoft AZURE OK") # syslog.syslog(syslog.LOG_DEBUG,"Autorisation d'accès au compte Microsoft AZURE OK") except: print("Problème d'autorisation d'accès au compte Microsoft AZURE") logging.error("Problème d'autorisation d'accès au compte Microsoft AZURE") # syslog.syslog(syslog.LOG_ERR,"Problème d'autorisation d'accès au compte Microsoft AZURE") exit(2) # sortie avec erreur ! # Création du répertoire: backup6 sur Microsoft AZURE de notre exemple # # Vérifier si le répertoire de sauvegarde backup6 sur Microsoft AZURE existe ou non # try: file_service.exists(AZURE_REP_BKP) print("Le répertoire de sauvegarde AZURE existe !") logging.debug("Le répertoire de sauvegarde AZURE existe !") # syslog.syslog(syslog.LOG_DEBUG,"Le répertoire de sauvegarde AZURE existe !") except FileNotFoundError: file_service.create_share(AZURE_REP_BKP) print("Création du répertoire de sauvegarde AZURE ") logging.warning("Création du répertoire de sauvegarde AZURE ") # syslog.syslog(syslog.LOG_WARNING,"Création du répertoire de sauvegarde AZURE ") ############################## Temps ################################ BACKUP_DATE = date.today().strftime("%d-%m-%Y") # date d'aujourd'hui au format Jour/Mois/Année BACKUP_DATE_OLD = (date.today()-datetime.timedelta(days=int(NBjourDEretention))).strftime("%d-%m-%Y") # date d'aujourd'hui - le nb de jour de rétention au format Jour/Mois/Année ############################# Fonction ##############################
class storageFileService(clsLoggingBase): """ This class wraps the Blob storage. Should be created in two phases. First passing the account name and second passing the accountkey from the KeyVault. After this the service object is created and can be used to access the blob items """ def __init__(self, account_name): super().__init__(__name__) self.account_name = account_name self.account_key = None self.service = None self.maskFileName = 'mask_file.txt' return def set_storageKey(self, storageKey): self.account_key = storageKey if (self.account_name): self.service = FileService(account_name=self.account_name, account_key=self.account_key) return def preCheck(self, _sourceFileShareFolderName, _sourceDirectoryName, AdditionalCheck=True): super().getLoggingObj().debug('preCheck') if (self.service == None): if ((self.account_name is None) or len(self.account_name) == 0): AZURE_ACN_NAME = 'AZURE_ACN_NAME' self.account_name = os.environ.get(AZURE_ACN_NAME) if (self.account_name is None) or (len(self.account_name) == 0): return False, 'AZURE_ACN_NAME Environment Variable not set', None, None, None if ((self.account_key is None) or len(self.account_key) == 0): AZURE_ACN_STRG_KEY = 'AZURE_ACN_STRG_KEY' self.account_key = os.environ.get(AZURE_ACN_STRG_KEY) if (self.account_key is None) or (len(self.account_key) == 0): return False, 'AZURE_ACN_STRG_KEY Environment Variable not set', None, None, None self.service = FileService(account_name=self.account_name, account_key=self.account_key) # Can we create file_share service if (self.service is None): return False, "Unable to create File share, check Account Name, Key and connectivity", None, None, None if (AdditionalCheck == True): # check for existence of Source share folder if (self.service.exists(_sourceFileShareFolderName) == False): return False, "source share does not exist", None, None, None # check for existence of source share directory if (self.service.exists( _sourceFileShareFolderName, directory_name=_sourceDirectoryName) == False): return False, "source directory does not exist", None, None, None return True, "OK", self.service, self.account_name, self.account_key def CopySourceDestinationImpl(self, _sourceFileShareFolderName, _sourceDirectoryName, _destinationFileShareFolderName, _destinationDirectoryName, _ExperimentName, _fileExtensionFilter='.jpg'): ''' This method copies raw data from the source directory to the experiment folder _sourceDirectoryName, _destinationDirectoryName: format should be directoryName/secondDirectoryName, no trailing slashes. ''' start_time = datetime.datetime.now() rv, description, file_service, _accountName, _accountKey = self.preCheck( _sourceFileShareFolderName, _sourceDirectoryName) if (rv == False): return rv, description else: # check for existence of destination share and create it if it does not exist if (file_service.exists(_destinationFileShareFolderName) == False): file_service.create_share(_destinationFileShareFolderName) # check the existence of destination directory and create it if it does not exist if (file_service.exists( _destinationFileShareFolderName, directory_name=_destinationDirectoryName) == False): file_service.create_directory(_destinationFileShareFolderName, _destinationDirectoryName) # check the existence of destination experiment folder and create it if it does not exist combinedDestinationFolderName = _destinationDirectoryName + "/" + _ExperimentName if (file_service.exists( _destinationFileShareFolderName, directory_name=combinedDestinationFolderName) == False): file_service.create_directory(_destinationFileShareFolderName, combinedDestinationFolderName) fileList = list( file_service.list_directories_and_files( _sourceFileShareFolderName, directory_name=_sourceDirectoryName)) if (fileList is None and len(fileList) < 1): return False, "No files found @ source" else: for i, imageFileName in enumerate(fileList): #print(imageFileName.name) if ((_ExperimentName in imageFileName.name) and imageFileName.name.endswith(_fileExtensionFilter)): source = "https://{0}.file.core.windows.net/{1}/{2}/{3}".format( _accountName, _sourceFileShareFolderName, _sourceDirectoryName, imageFileName.name) #print(source) copy = file_service.copy_file( _destinationFileShareFolderName, combinedDestinationFolderName, imageFileName.name, source) # Poll for copy completion while copy.status != 'success': count = count + 1 if count > 5: return False, 'Timed out waiting for async copy to complete., Filename = {0} '.format( imageFileName) time.sleep(5) copy = self.service.get_file_properties( _destinationFileShareFolderName, combinedDestinationFolderName, imageFileName.name).properties.copy time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime def GetAllExperimentsWithMaskAndImageFileImpl( self, _destinationFileShareFolderName, _destinationDirectoryName, _fileExtensionFilter='.jpg'): rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description else: returnList = [] experimentList = list( file_service.list_directories_and_files( _destinationFileShareFolderName, directory_name=_destinationDirectoryName)) if (not (experimentList is None and len(experimentList) < 1)): for i, experimentName in enumerate(experimentList): filenameList = list( file_service.list_directories_and_files( _destinationFileShareFolderName, _destinationDirectoryName + "/" + experimentName.name)) if (not (filenameList is None and len(filenameList) < 1)): for j, filenameList in enumerate(filenameList): maskContent = '' # check if maskFile exists and load its content if (file_service.exists( _destinationFileShareFolderName, _destinationDirectoryName + "/" + experimentName.name, self.maskFileName) != False): #print(_destinationFileShareFolderName + "/" + _destinationDirectoryName+ "/" + experimentName.name + "/" + self.maskFileName) fileMask = file_service.get_file_to_text( _destinationFileShareFolderName, _destinationDirectoryName + "/" + experimentName.name, self.maskFileName) if (fileMask is not None and fileMask.content is not None and len(fileMask.content) > 0): #print("load content") maskContent = json.loads(fileMask.content) # load name of first file with extsnsion = _fileExtensionFilter if (filenameList.name.endswith( _fileExtensionFilter)): myVar = { "experimentName": experimentName.name, "filename": filenameList.name, "maskContent": maskContent } returnList.append(myVar) # we've got our file, lets exit from this inner loop break return True, returnList def SaveMaskFileDataImpl(self, _sourceFileShareFolderName, _sourceDirectoryName, _maskTags): start_time = datetime.datetime.now() rv = False rv, description, file_service, _accountName, _accountKey = self.preCheck( _sourceFileShareFolderName, _sourceDirectoryName) if (rv == False): return rv, description else: if (_maskTags is None or len(_maskTags) == 0): return rv, "Invalid mask values!!!" else: masks = [] bDataValid = False try: masks = json.loads(_maskTags) bDataValid = True except ValueError: pass if (bDataValid == True): if (masks is None or len(masks) == 0): return rv, "Incorrect format of ask values!!!" else: file_service.create_file_from_text( _sourceFileShareFolderName, _sourceDirectoryName, self.maskFileName, _maskTags) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime else: return rv, "masks passed cannot be converted to json objects" def GetAllExperimentsFilesNotCopiedImpl(self, _destinationFileShareFolderName, _destinationDirectoryName, _experimentNames): ''' This function expects parameter _experimentNames to contain a list of experimentNames. It then checks if the destination folder has been created or not!. if NOT, it then adds it to the list. This function is used to figure out if the original source files have been copied or not. If the destination experiment folder exists, it is assumed that the source files have been copied. //TODO:: better implementation would be to check for filename in source and under experiment folder are same and check if those are same. then return true, else return false. ''' start_time = datetime.datetime.now() rv = False rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description, None returnList = [] for experimentName in (_experimentNames): if (file_service.exists( _destinationFileShareFolderName, _destinationDirectoryName + "/" + experimentName) == False): returnList.append(experimentName) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, returnList def TestGetAllExperimentNames(self, _destinationFileShareFolderName, _destinationDirectoryName): start_time = datetime.datetime.now() rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description, None else: experimentList = list( file_service.list_directories_and_files( _destinationFileShareFolderName, directory_name=_destinationDirectoryName)) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, experimentList def GetAllSourceUniqueExperimentNamesImpl(self, _sourceFileShareFolderName, _sourceDirectoryName, _fileExtensionFilter='.jpg'): ''' This function is to be used @ the source folder, where the images are all clubbed together. and we want to extract out the various experiment names that have been created. In our context the _sourceFileShareFolderName = 'linuxraspshare' and '_sourceDirectoryName' = 'Share' ''' start_time = datetime.datetime.now() rv = False rv, description, file_service, _accountName, _accountKey = self.preCheck( _sourceFileShareFolderName, _sourceDirectoryName) if (rv == False): return rv, description, None returnList = [] experimentList = list( file_service.list_directories_and_files(_sourceFileShareFolderName, _sourceDirectoryName)) if (not (experimentList is None and len(experimentList) < 1)): for i, experimentName in enumerate(experimentList): if (experimentName.name.endswith(_fileExtensionFilter)): n = experimentName.name.find('_') if (n > 0): expName = experimentName.name[0:n] if expName not in returnList: returnList.append(expName) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, returnList def GetAllDestinationExperimentsWhereMaskFileNotPresentImpl( self, _destinationFileShareFolderName, _destinationDirectoryName, _experimentNames): ''' _experimentNames contains list of all the experiment names. This function looks for existence of mask file under the destination folders and if it does not exists, marks that experiment as not yet processed and returns that as part of the list. If the mask file exists under the destination folder, it is assumed that the masking exercise has been done for that experiment ''' start_time = datetime.datetime.now() rv = False rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description, None returnList = [] for experimentName in (_experimentNames): # check if maskFile exists and load its content if (file_service.exists( _destinationFileShareFolderName, _destinationDirectoryName + "/" + experimentName, self.maskFileName) == False): returnList.append(experimentName) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, returnList def GetAllDestinationUniqueExperimentNamesImpl( self, _destinationFileShareFolderName, _destinationDirectoryName): ''' This function returns the number of folders that currently exists under the destination folders. _destinationFileShareFolderName = 'experiment' _destinationDirectoryName = 'object-detection' ''' start_time = datetime.datetime.now() rv = False rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description, None returnList = [] experimentList = list( file_service.list_directories_and_files( _destinationFileShareFolderName, _destinationDirectoryName)) if (not (experimentList is None and len(experimentList) < 1)): for i, experimentName in enumerate(experimentList): returnList.append(experimentName.name) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, returnList def GetAllDestinationExperimentNamesWithOutputFilesImpl( self, _destinationFileShareFolderName, _destinationDirectoryName, _outputFolderName='output', _fileExtensionFilter='.jpg'): ''' This function returns the number of folders/experiment that currently exists under the destination folders. plus it returns all the image files contained inside the outpur folder _destinationFileShareFolderName = 'experiment' _destinationDirectoryName = 'object-detection' ''' start_time = datetime.datetime.now() rv = False rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description, None returnList = [] experimentList = list( file_service.list_directories_and_files( _destinationFileShareFolderName, _destinationDirectoryName)) if (not (experimentList is None and len(experimentList) < 1)): for i, experimentName in enumerate(experimentList): outputFiles = [] combinedFolderName = _destinationDirectoryName + "/" + experimentName.name + "/" + _outputFolderName #print(combinedFolderName) if (file_service.exists(_destinationFileShareFolderName, combinedFolderName)): fileList = list( file_service.list_directories_and_files( _destinationFileShareFolderName, combinedFolderName)) if (not (fileList is None and len(fileList) < 1)): for j, fileName in enumerate(fileList): if (fileName.name.endswith(_fileExtensionFilter)): outputFiles.append(fileName.name) returnList.append({ 'experimentName': experimentName.name, 'outputFiles': outputFiles }) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, returnList def deleteAllFiles(self, _sourceFileShareFolderName, _sourceDirectoryName, _fileExtensionFilter='.jpg'): rv, description, file_service, _accountName, _accountKey = self.preCheck( _sourceFileShareFolderName, _sourceDirectoryName) if (rv == False): return rv, description else: fileList = list( file_service.list_directories_and_files( _sourceFileShareFolderName, directory_name=_sourceDirectoryName)) if (fileList is None and len(fileList) < 1): return False, "No files found @ source" else: for i, imageFileName in enumerate(fileList): if (_fileExtensionFilter is not None): if (imageFileName.name.endswith(_fileExtensionFilter)): rv = file_service.delete_file( _sourceFileShareFolderName, _sourceDirectoryName, imageFileName.name) else: rv = file_service.delete_file( _sourceFileShareFolderName, _sourceDirectoryName, imageFileName.name) return True, "OK" def DashBoardGetAllFilesInfoImpl(self, _sourceFileShareFolderName, _sourceDirectoryNameList, _destinationFileShareFolderName, _destinationDirectoryName, _outputFolderName='output', _fileExtensionFilter='.jpg'): ''' Mother of all functions and scans through each and every file and returns lots of information. Could take up-to 40+ minutes to run ''' start_time = datetime.datetime.now() print('phase1') result, description, returnSourceDict = self.DashBoardGetAllSourceFilesInfoImpl( _sourceFileShareFolderName, _sourceDirectoryNameList, _fileExtensionFilter) if (result == True): print('phase2') result, description, returnDestinationDict = self.DashBoardGetAllDestinationFilesInfoImpl( _destinationFileShareFolderName, _destinationDirectoryName, _outputFolderName, _fileExtensionFilter, _returnDict=returnSourceDict) if (result == True): print('phase3') # combine the two dictionaries combinedDict = dict() for key in returnSourceDict: # assumed to contain superset of keys if key not in combinedDict: combinedDict[key] = [0, 0, False, 0, 0, False, 0, 0] combinedDict[key][0] = returnSourceDict[key][0] combinedDict[key][1] = returnSourceDict[key][1] if key in returnDestinationDict: combinedDict[key][2] = returnDestinationDict[key][2] combinedDict[key][3] = returnDestinationDict[key][3] combinedDict[key][4] = returnDestinationDict[key][4] combinedDict[key][5] = returnDestinationDict[key][5] combinedDict[key][6] = returnDestinationDict[key][6] combinedDict[key][7] = returnDestinationDict[key][7] return self.returnFormattedValue(start_time, True, "OK", combinedDict) else: return self.returnFormattedValue(start_time, result, description, None) else: self.returnFormattedValue(start_time, result, description, None) def DashBoardGetAllSourceFilesInfoImplWrapper(self, _sourceFileShareFolderName, _sourceDirectoryNameList, _fileExtensionFilter='.jpg'): start_time = datetime.datetime.now() result, description, returnDict = self.DashBoardGetAllSourceFilesInfoImpl( _sourceFileShareFolderName, _sourceDirectoryNameList, _fileExtensionFilter) return self.returnFormattedValue(start_time, result, description, returnDict) def DashBoardGetAllSourceFilesInfoImpl(self, _sourceFileShareFolderName, _sourceDirectoryNameList, _fileExtensionFilter='.jpg'): ''' This function is to be used @ the source folder, where the images are all clubbed together. and we want to extract out the various experiment names that have been created. In our context the _sourceFileShareFolderName = 'linuxraspshare' and '_sourceDirectoryName' = 'Share' This could also be _sourceFileShareFolderName = 'linuxraspshare' and '_sourceDirectoryName' = 'backup' as this function now caters for ''' start_time = datetime.datetime.now() rv = False # check the existence of first source folder rv, description, file_service, _accountName, _accountKey = self.preCheck( _sourceFileShareFolderName, _sourceDirectoryNameList[0]) if (rv == False): return rv, description, None returnDict = dict() for _sourceDirectoryName in _sourceDirectoryNameList: #print(_sourceDirectoryName) experimentList = list( file_service.list_directories_and_files( _sourceFileShareFolderName, _sourceDirectoryName)) if (not (experimentList is None and len(experimentList) < 1)): for i, imageFileName in enumerate(experimentList): if (imageFileName.name.endswith(_fileExtensionFilter)): fileProperties = file_service.get_file_properties( _sourceFileShareFolderName, _sourceDirectoryName, imageFileName.name) fileLength = fileProperties.properties.content_length n = imageFileName.name.find('_') if (n > 0): expName = imageFileName.name[0:n] if expName not in returnDict: returnDict[expName] = [ 1, fileLength, False, 0, 0, False, 0, 0 ] else: returnDict[expName][0] += 1 returnDict[expName][1] += fileLength return True, "OK", returnDict def DashBoardGetAllDestinationFilesInfoImplWrapper( self, _destinationFileShareFolderName, _destinationDirectoryName, _outputFolderName='output', _fileExtensionFilter='.jpg', _returnDict=None, _file_service=None): start_time = datetime.datetime.now() result, description, returnDict = self.DashBoardGetAllDestinationFilesInfoImpl( _destinationFileShareFolderName, _destinationDirectoryName, _outputFolderName, _fileExtensionFilter, _returnDict) return self.returnFormattedValue(start_time, result, description, returnDict) def DashBoardGetAllDestinationFilesInfoImpl( self, _destinationFileShareFolderName, _destinationDirectoryName, _outputFolderName='output', _fileExtensionFilter='.jpg', _returnDict=None): start_time = datetime.datetime.now() print(start_time) rv = False file_service = None description = '' rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description, None returnDict = dict() if _returnDict is not None: returnDict = _returnDict else: experimentList = list( file_service.list_directories_and_files( _destinationFileShareFolderName, _destinationDirectoryName)) # 1st pass, get all the experiment names, which are provided by the folder names if (not (experimentList is None and len(experimentList) < 1)): for i, experimentName in enumerate(experimentList): if experimentName.name not in returnDict: # Mask file exists # number of Files in the experiment root folder # size of the files in the experiment root folder # output folder exists # number of files in the output folder # size of files in the output folder. returnDict[experimentName.name] = [ 0, 0, False, 0, 0, False, 0, 0 ] # 2nd pass, find all the properties of the images # print("1st pass done") # maxIteration = 3 # startIteration = 0 for key in returnDict: print(key) # if (startIteration > maxIteration ): # break # else: # startIteration += 1 combinedFolderName = _destinationDirectoryName + "/" + key if (file_service.exists(_destinationFileShareFolderName, combinedFolderName, self.maskFileName)): if _returnDict is None: returnDict[key][0] = 0 returnDict[key][1] = 0 returnDict[key][2] = True numberOfFiles, sizeOfFiles = self.getNumberOfFilesAndFileSize( file_service, _destinationFileShareFolderName, combinedFolderName, _fileExtensionFilter) returnDict[key][3] = numberOfFiles returnDict[key][4] = sizeOfFiles combinedFolderName = _destinationDirectoryName + "/" + key + "/" + _outputFolderName if (file_service.exists(_destinationFileShareFolderName, combinedFolderName)): returnDict[key][5] = True numberOfFiles, sizeOfFiles = self.getNumberOfFilesAndFileSize( file_service, _destinationFileShareFolderName, combinedFolderName, _fileExtensionFilter) returnDict[key][6] = numberOfFiles returnDict[key][7] = sizeOfFiles return True, "OK", returnDict def returnFormattedValue(self, start_time, result, description, returnDict): if (result == True): retValue = [] for key, value in returnDict.items(): item = {'ExperimentName': key, 'Properties': value} retValue.append(item) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, retValue else: return False, description, None def getNumberOfFilesAndFileSize(self, file_service, shareFolder, directoryName, _fileExtensionFilter): numberOfFiles = 0 sizeOfFiles = 0 experimentList = list( file_service.list_directories_and_files(shareFolder, directoryName)) if (not (experimentList is None and len(experimentList) < 1)): for i, imageFileName in enumerate(experimentList): if (imageFileName.name.endswith(_fileExtensionFilter)): numberOfFiles += 1 fileProperties = file_service.get_file_properties( shareFolder, directoryName, imageFileName.name) fileLength = fileProperties.properties.content_length sizeOfFiles += fileLength return numberOfFiles, sizeOfFiles def getListOfAllFiles(self, _destinationFileShareFolderName, _destinationDirectoryName): start_time = datetime.datetime.now() rv = False file_service = None description = '' rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description, None experimentList = list( file_service.list_directories_and_files( _destinationFileShareFolderName, _destinationDirectoryName)) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, experimentList def isFile(self, _destinationFileShareFolderName, _destinationDirectoryName, fileName): start_time = datetime.datetime.now() rv = False file_service = None description = '' rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description, None rv = file_service.exists(_destinationFileShareFolderName, _destinationDirectoryName, fileName) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, rv def createDirectory(self, _destinationFileShareFolderName, _destinationDirectoryName): start_time = datetime.datetime.now() rv = False file_service = None description = '' rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName, False) if (rv == False): return rv, description, None if (file_service.exists( _destinationFileShareFolderName, directory_name=_destinationDirectoryName) == False): print(_accountName) print(_accountKey) print(_destinationFileShareFolderName) print(_destinationDirectoryName) rv = file_service.create_directory(_destinationFileShareFolderName, _destinationDirectoryName) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, rv def removeAllFiles(self, _destinationFileShareFolderName, _destinationDirectoryName): start_time = datetime.datetime.now() rv, desc = self.deleteAllFiles(_destinationFileShareFolderName, _destinationDirectoryName, None) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, rv def getMaskFileContent(self, _destinationFileShareFolderName, _destinationDirectoryName): start_time = datetime.datetime.now() rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description, None else: maskContent = '' # check if maskFile exists and load its content if (file_service.exists(_destinationFileShareFolderName, _destinationDirectoryName, self.maskFileName) != False): fileMask = file_service.get_file_to_text( _destinationFileShareFolderName, _destinationDirectoryName, self.maskFileName) if (fileMask is not None and fileMask.content is not None and len(fileMask.content) > 0): maskContent = json.loads(fileMask.content) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, maskContent def saveFileImage(self, _destinationFileShareFolderName, _destinationDirectoryName, fileName, byteArray): start_time = datetime.datetime.now() rv, description, file_service, _accountName, _accountKey = self.preCheck( _destinationFileShareFolderName, _destinationDirectoryName) if (rv == False): return rv, description, None else: # create file from the byteArray passed. Will need to check if this can be read back later. # Return value is in the call-back which is not triggered file_service.create_file_from_bytes( _destinationFileShareFolderName, _destinationDirectoryName, fileName, byteArray) time_elapsed = datetime.datetime.now() - start_time elapsedTime = "{}:{}".format(time_elapsed.seconds, time_elapsed.microseconds) return True, elapsedTime, 0 # masked file implementations def GetMaskedImageImpl(self, _sourceFileShareFolderName, _sourceDirectoryName, _imageFileName, _maskTags): ''' _sourceDirectoryName : format should be directoryName/secondDirectoryName/ ''' rv = False rv, description, file_service, _accountName, _accountKey = self.preCheck( _sourceFileShareFolderName, _sourceDirectoryName) if (rv == False): return rv, description, None else: masks = [] # more validations loadFromCloud = False if (_maskTags is None): loadFromCloud = True else: # try to load the masks to a temporary objectt if (len(_maskTags) == 0): loadFromCloud = True else: masks = json.loads(_maskTags) # expectation is that the mask file exists in the source folder if (loadFromCloud == True): print('loadFromCloud') if (file_service.exists(_sourceFileShareFolderName, _sourceDirectoryName, self.maskFileName) == False): return rv, "_maskTags cannot be null as maskImage file also not exist!!!", None else: fileMask = file_service.get_file_to_text( _sourceFileShareFolderName, _sourceDirectoryName, self.maskFileName) if (fileMask is not None and fileMask.content is not None and len(fileMask.content) > 0): masks = json.loads(fileMask.content) if not (masks is not None and len(masks) > 0): return rv, "unable to load valid values for mask", None else: return rv, "Unable to load filemask ", None if (masks is not None and len(masks) > 0): return self.GetRawSourceImageImpl(_sourceFileShareFolderName, _sourceDirectoryName, _imageFileName, True, masks) else: return rv, "Mask value not set in logic!!!", None def GetRawSourceImageImpl(self, _sourceFileShareFolderName, _sourceDirectoryName, _imageFileName, loadMask=False, masks=None): rv = False rv, description, file_service, _accountName, _accountKey = self.preCheck( _sourceFileShareFolderName, _sourceDirectoryName) if (rv == False): return rv, description, None else: if (file_service.exists(_sourceFileShareFolderName, _sourceDirectoryName, _imageFileName) == False): return rv, "Image file does not exist", None else: # load our source file output_stream = io.BytesIO() fileImage = file_service.get_file_to_stream( _sourceFileShareFolderName, _sourceDirectoryName, _imageFileName, output_stream) content_length = fileImage.properties.content_length if (content_length is not None and content_length > 0): output_stream.seek(0) file_bytes = np.asarray(bytearray(output_stream.read()), dtype=np.uint8) if (file_bytes is not None): cv2_img = cv2.imdecode( file_bytes, 1) # don't know what 1 does but it sorta works if (cv2_img is not None): colorImage = cv2.cvtColor( cv2_img, cv2.COLOR_RGB2BGR ) #TODO Not sure this is needed, COLOR_BGR2RGB or might be reversing the image if (colorImage is not None): height, width = colorImage.shape[:2] colourMask = colorImage[0:height, 0:width] if (loadMask == True): cv2.fillPoly(colourMask, [np.array(masks)], (0, 0, 0)) _, _encoded_image = cv2.imencode( '.jpg', colourMask) return True, "OK", _encoded_image #cv2.imencode('.jpg',colourMask) else: return rv, "Unable to convert image to COLOR_BGR2RGB :" + _imageFileName, None else: return rv, "Unable to decode : " + _imageFileName, None else: return rv, "Unable to decode convert to byteArray :" + _imageFileName, None else: return rv, "Null content obtained from the image source file", None def GetRawImage(self, _sourceFileShareFolderName, _sourceDirectoryName, _imageFileName): rv = False rv, description, file_service, _accountName, _accountKey = self.preCheck( _sourceFileShareFolderName, _sourceDirectoryName) if (rv == False): return rv, description, None else: if (file_service.exists(_sourceFileShareFolderName, _sourceDirectoryName, _imageFileName) == False): return rv, "Image file does not exist", None else: # load our source file output_stream = io.BytesIO() fileImage = file_service.get_file_to_stream( _sourceFileShareFolderName, _sourceDirectoryName, _imageFileName, output_stream) content_length = fileImage.properties.content_length if (content_length is not None and content_length > 0): output_stream.seek(0) file_bytes = np.asarray(bytearray(output_stream.read()), dtype=np.uint8) if (file_bytes is not None): cv2_img = cv2.imdecode(file_bytes, 1) if (cv2_img is not None): return True, "OK", cv2_img #cv2.imencode('.jpg',colourMask) else: return rv, "Unable to decode : " + _imageFileName, None else: return rv, "Unable to decode convert to byteArray :" + _imageFileName, None else: return rv, "Null content obtained from the image source file", None def GetRawImageAsBytes(self, _sourceFileShareFolderName, _sourceDirectoryName, _imageFileName): rv = False rv, description, file_service, _accountName, _accountKey = self.preCheck( _sourceFileShareFolderName, _sourceDirectoryName) if (rv == False): return rv, description, None else: if (file_service.exists(_sourceFileShareFolderName, _sourceDirectoryName, _imageFileName) == False): return rv, "Image file does not exist", None else: # load our source file output_stream = io.BytesIO() fileImage = file_service.get_file_to_stream( _sourceFileShareFolderName, _sourceDirectoryName, _imageFileName, output_stream) content_length = fileImage.properties.content_length if (content_length is not None and content_length > 0): output_stream.seek(0) file_bytes = output_stream.read() if (file_bytes is not None): return True, "OK", file_bytes else: return rv, "Unable to get byte byteArray :" + _imageFileName, None else: return rv, "Null content obtained from the image source file", None
class Crawler: def __init__(self, config, section, script_name=None, error_message=None): self.script_name = script_name self.config = config self.db = DbCommunicator(config) self.error_message = error_message try: self.section = section self.dbparams = self.db.readProps('general') self.dbparams.update(self.db.readProps(section)) self.downloads_path = self.get_property('downloads_path', section) self.overwrite_remote_files = self.get_property( 'overwrite_remote_files', section, 'bool') if not os.path.exists(self.downloads_path): os.makedirs(self.downloads_path) elif not os.path.isdir(self.downloads_path): print( 'ERROR:{} downloads_path parameter points to file!'.format( section)) sys.exit(1) self.headless_mode = self.get_property('headless_mode', 'general', 'bool') if self.headless_mode: display = Display(visible=0, size=(1920, 1080)) display.start() options = webdriver.ChromeOptions() options.add_argument("--no-sandbox") options.add_argument('--headless') prefs = { 'download.default_directory': self.downloads_path, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'plugins.always_open_pdf_externally': True, } options.add_experimental_option("prefs", prefs) self.browser = webdriver.Chrome( chrome_options=options, service_args=["--verbose", "--log-path=/tmp/selenium.log"]) self.browser.implicitly_wait(10) self.browser.set_page_load_timeout(10000) self.browser.set_script_timeout(10000) # self.ftp_connect() self.file_storage_connect() except Exception as e: self.error_message = str(e) def get_property(self, prop, section, type='str'): if type == 'str': if self.dbparams is not None and prop in self.dbparams: return self.dbparams[prop] else: return self.config.get(section, prop).strip() elif type == 'bool': if self.dbparams is not None and prop in self.dbparams: return self.dbparams[prop] == 'True' else: return self.config.getboolean(section, prop, fallback=False) def file_storage_connect(self): self.file_storage_url = self.get_property('fs_server', 'general') self.file_storage_user = self.get_property('fs_username', 'general') self.file_storage_pwd = self.get_property('fs_password', 'general') self.file_storage_share = self.get_property('fs_share', 'general') self.file_storage_dir = self.get_property('fs_directory_prefix', 'general') self.file_service = FileService(account_name=self.file_storage_user, account_key=self.file_storage_pwd) try: if self.file_service.exists(self.file_storage_share): print( 'Connection to Azure file storage successfully established...' ) if len(self.file_storage_dir ) > 0 and not self.file_service.exists( self.file_storage_share, directory_name=self.file_storage_dir): subdirs = self.file_storage_dir.split('/') subdirfull = "" for subdir in subdirs: subdirfull += subdir self.file_service.create_directory( self.file_storage_share, subdirfull) subdirfull += "/" print('Created directory:' + self.file_storage_dir) else: print( 'Filaed to connect to Asure file storage, share does not exist: ' + self.file_storage_share) except Exception as ex: print('Error connecting to Azure file storage: ', ex) def ftp_connect(self): self.ftp = FTP() self.ftp.connect( self.config.get('general', 'ftp_server').strip(), int(self.config.get('general', 'ftp_port')), ) self.ftp.login( user=self.config.get('general', 'ftp_username').strip(), passwd=self.config.get('general', 'ftp_password').strip(), ) print('Connection to ftp successfully established...') def get(self, url): self.browser.get(url) time.sleep(3) def assert_exists(self, selector): _ = self.browser.find_element_by_css_selector(selector) def get_elements(self, selector, root=None): if root is None: root = self.browser return root.find_elements_by_css_selector(selector) def wait_for_displayed(self, selector): element = self.browser.find_element_by_css_selector(selector) while not element.is_displayed(): pass def click_by_text(self, text): self.browser.find_element_by_link_text(text) time.sleep(3) def click_xpath(self, path, single=True): if single: self.browser.find_element_by_xpath(path).click() else: for el in self.browser.find_elements_by_xpath(path): el.click() time.sleep(3) def click(self, selector, single=True, root=None): if root is None: root = self.browser if single: root.find_element_by_css_selector(selector).click() else: for el in root.find_elements_by_css_selector(selector): el.click() time.sleep(3) def send_keys(self, selector, keys): elem = self.browser.find_element_by_css_selector(selector) elem.clear() elem.send_keys(keys) time.sleep(3) def open_new_tab(self): self.browser.execute_script("window.open('');") self.browser.switch_to.window(self.browser.window_handles[1]) def close_current_tab(self): self.browser.close() self.browser.switch_to.window(self.browser.window_handles[-1]) def get_text(self, selector, single=True, root=None): if root is None: root = self.browser if single: return root.find_element_by_css_selector(selector).text return [el.text for el in root.find_elements_by_css_selector(selector)] def get_attr(self, selector, attr, single=True, root=None): if root is None: root = self.browser if single: return root.find_element_by_css_selector(selector).get_attribute( attr) return [ el.get_attribute(attr) for el in root.find_elements_by_css_selector(selector) ] def execute(self, script): self.browser.execute_script(script, []) time.sleep(3) def deselect_all(self, selector): select = Select(self.browser.find_element_by_css_selector(selector)) select.deselect_all() time.sleep(3) def select_option(self, selector, option): select = Select(self.browser.find_element_by_css_selector(selector)) select.select_by_visible_text(option) time.sleep(3) def select_option_by_index(self, selector, index): select = Select(self.browser.find_element_by_css_selector(selector)) if index < len(select.options): select.select_by_index(index) time.sleep(3) return True return False def back(self): self.browser.back() time.sleep(3) def close_dialog(self): try: alert = self.browser.switch_to.alert alert.dismiss() # alert.accept() except Exception as e: pass def close(self): if hasattr(self, 'browser'): self.browser.quit() if hasattr(self, 'db'): self.db.close() # self.ftp.quit() def download(self, url, filename, file_db_id=None): # print('Downloading', filename, self._get_remote_filename(filename)) # return downloaded = False if url.startswith('https'): ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE else: ctx = None content_length = 1 retry = 0 file_size = 0 file_name = '' while file_size != content_length and retry < 3: try: r = urllib.request.urlopen(url, context=ctx) content_length = r.length file_name = os.path.join(self.downloads_path, filename) with open(file_name, 'wb') as f: f.write(r.read()) file_size = os.stat(file_name).st_size retry += 1 # print('Attempt', retry, 'Downloaded', file_size, 'bytes of', content_length) except Exception as e: retry += 1 print('Attempt', retry, 'ERROR: Downloading failed!', url, str(e)) try: os.remove(file_name) except OSError: pass if file_size == content_length: downloaded = True if file_db_id: self.db.saveFileStatus(id=file_db_id, script_name=self.script_name, file_original_name=filename, file_status='Downloaded') else: self.db.saveFileStatus(script_name=self.script_name, file_original_name=filename, file_status='Downloaded') else: if file_db_id: self.db.saveFileStatus(id=file_db_id, script_name=self.script_name, file_original_name=filename, file_status='None') else: self.db.saveFileStatus(script_name=self.script_name, file_original_name=filename, file_status='None') return downloaded def _get_remote_filename(self, local_filename): raise NotImplemented def merge_files(self, filenames): pdfline = '"' + '" "'.join(filenames) + '"' res_filename = '"' + filenames[0].split(' part')[0] + '.pdf"' command = 'pdftk ' + pdfline + ' cat output ' + res_filename os.system(command) return res_filename def upload_to_ftp(self, filename): self.upload_to_file_storage(filename) def upload_to_ftp_old(self, filename): retries = 0 while retries < 3: try: path = os.path.join(self.downloads_path, filename) # print('Uploading {}'.format(path)) pdf_file = open(path, 'rb') remote_filename = self._get_remote_filename(filename) if not remote_filename: return directory, filename = remote_filename try: self.ftp.cwd('/{}'.format(directory)) except Exception: self.ftp.mkd('/{}'.format(directory)) self.ftp.cwd('/{}'.format(directory)) if not self.overwrite_remote_files: # print('Checking if {}/{} already exists'.format(directory, filename)) try: self.ftp.retrbinary('RETR {}'.format(filename), lambda x: x) return except error_perm: pass self.ftp.storbinary('STOR {}'.format(filename), pdf_file) # print('{} uploaded'.format(path)) pdf_file.close() retries = 3 except Exception as e: print('Error uploading to ftp,', str(e)) retries += 1 try: self.ftp.voidcmd("NOOP") except Exception as ex: self.ftp_connect() def move_to_another(self, filename): try: entity_type = filename.split('|')[1] remote_filename = self._get_remote_filename(filename) if not remote_filename: return if (entity_type == 'County') or (entity_type == 'City') or \ (entity_type == 'Township') or (entity_type == 'Village'): return directory, server_filename = remote_filename self.ftp.rename('/General Purpose/{}'.format(server_filename), '/{}/{}'.format(directory, server_filename)) print('Moved {} to {}'.format(server_filename, directory)) except Exception as e: print(str(e)) def upload_to_file_storage(self, filename): fnm = FilenameManager() retries = 0 while retries < 3: try: path = os.path.join(self.downloads_path, filename) file_details = self.db.readFileStatus( file_original_name=filename, file_status='Uploaded') if file_details is not None: print( 'File {} was already uploaded before'.format(filename)) return file_details = self.db.readFileStatus( file_original_name=filename, file_status='Other', notes='Uplodaed Before') if file_details is not None: print( 'File {} was already uploaded before'.format(filename)) return file_details = self.db.readFileStatus( file_original_name=filename, file_status='Downloaded') print('Uploading {}'.format(path)) remote_filename = self._get_remote_filename(filename) old_filename = filename directory = None if not remote_filename: return try: directory, filename, year = remote_filename except: directory, filename = remote_filename filename = fnm.azure_validate_filename(filename) if len(self.file_storage_dir) > 0: directory = self.file_storage_dir + '/' + directory if not self.file_service.exists(self.file_storage_share, directory_name=directory): self.file_service.create_directory(self.file_storage_share, directory) if year: directory += '/' + year if not self.file_service.exists(self.file_storage_share, directory_name=directory): self.file_service.create_directory( self.file_storage_share, directory) if not self.overwrite_remote_files: print('Checking if {}/{} already exists'.format( directory, filename)) if self.file_service.exists(self.file_storage_share, directory_name=directory, file_name=filename): print('{}/{} already exists'.format( directory, filename)) if file_details is None: self.db.saveFileStatus( script_name=self.script_name, file_original_name=old_filename, file_upload_path=directory, file_upload_name=filename, file_status='Other', notes='Uplodaed Before') else: self.db.saveFileStatus(id=file_details['id'], file_upload_path=directory, file_upload_name=filename, file_status='Other', notes='Uplodaed Before') return self.file_service.create_file_from_path( self.file_storage_share, directory, filename, path, content_settings=ContentSettings( content_type='application/pdf')) if file_details is None: self.db.saveFileStatus(script_name=self.script_name, file_original_name=old_filename, file_upload_path=directory, file_upload_name=filename, file_status='Uploaded') else: self.db.saveFileStatus(id=file_details['id'], file_upload_path=directory, file_upload_name=filename, file_status='Uploaded') print('{} uploaded'.format(path)) retries = 3 except Exception as e: print('Error uploading to Asure file storage,', str(e)) filename = old_filename retries += 1
class AmlAgent(): file_service=None def __init__(self): self.file_service = FileService(account_name='chrisamlstoragemjeiyhfu', sas_token='?sv=2018-03-28&ss=bfqt&srt=sco&sp=rwdlacup&se=2029-01-31T18:58:16Z&st=2019-01-31T10:58:16Z&spr=https,http&sig=FMkEUPqTAT7%2BmXZXFjeBaQ1Ydoik8Kic1ZYcGtHYc3w%3D', protocol='http') self.share_name ='azureml-filestore-53a6a9d0-a7f9-4336-a702-19f40d38db08' ''' # Basic # List from root root_file_dir = list(self.file_service.list_directories_and_files(share_name)) for res in root_file_dir: print(res.name) # dir1, dir2, rootfile # Num results root_file_dir = list(self.file_service.list_directories_and_files(share_name, num_results=2)) for res in root_file_dir: print(res.name) # dir1, dir2 # List from directory dir1 = list(self.file_service.list_directories_and_files(share_name, 'test data')) for res in dir1: print(res.name) # file1, file2 ''' def upload_data(self): print("uploading data...") source_folder_name='tub' source_path = '.' self.copy_files(source_path,source_folder_name) def copy_files(self,source_path,source_folder_name): dest_folder_name = source_folder_name if self.file_service.exists(share_name=self.share_name ,directory_name=dest_folder_name)==False: self.file_service.create_directory(self.share_name, dest_folder_name) full_source_path=join(source_path,source_folder_name) print ('full_source_path: {}'.format(full_source_path)) #files_in_source_dir = [f for f in listdir(data_dir_name) if isfile(join(data_dir_name, f))] files_in_source_folder = [f for f in listdir(full_source_path) if isfile(join(full_source_path, f))] for file_name in files_in_source_folder: full_source_file_name = join(full_source_path, file_name) print(file_name) # file1, file2 #source = self.file_service.make_file_url(self.share_name, dest_directory_name, file_name) #copy = self.file_service.copy_file(self.share_name, None, 'file1copy', source) self.file_service.create_file_from_path( self.share_name, # share name source_folder_name, # directory path - root path if none file_name, # destination file name full_source_file_name, # full source path with file name progress_callback=self.generate_progress_callback(file_name)) #report progress def generate_progress_callback(self,file_name): def progress_callback(current, total): print('({}, {}, {})'.format(file_name, current, total)) return progress_callback def download_model(): print("downloading model...") def download_zip_file_from_blob(): try: # Create the BlockBlockService that is used to call the Blob service for the storage account block_blob_service = BlockBlobService(account_name='chrisamlstoragemjeiyhfu', sas_token='?sv=2018-03-28&ss=bf&srt=sco&sp=rwdlac&se=2029-02-04T23:12:23Z&st=2019-02-04T15:12:23Z&sip=0.0.0.0-255.255.255.255&spr=https,http&sig=SwxeDkbctxYI2nV9acctrUaCvL5EsM2PO7GK4eMCNv4%3D') # Create a container called 'quickstartblobs'. container_name ='azureml-blobstore-53a6a9d0-a7f9-4336-a702-19f40d38db08' # List the blobs in the container print("\nList blobs in the container") generator = block_blob_service.list_blobs(container_name) local_path ="C:/Users/chris/mtccar/data" for blob in generator: print("\t Blob name: " + blob.name) full_path_to_file = os.path.join(local_path, blob.name) print("\nDownloading blob to " + full_path_to_file) block_blob_service.get_blob_to_path(container_name, blob.name, full_path_to_file) with zipfile.ZipFile(full_path_to_file,"r") as zip_ref: zip_ref.extractall(local_path) except Exception as e: print(e)
def upload_to_file_storage(): #init file manager fnm = FilenameManager() # get a list of pdf files in dir_pdfs template = dir_upload + "**" if operating_system == 'mac' or operating_system == 'linux': template += '/*.pdf' elif operating_system == 'windows': template += '\\*.pdf' lpdfs = glob.glob(template, recursive=True) lpdfs.sort() #os.chdir(dir_pdfs) # needed for ftp.storbinary('STOR command work not with paths but with filenames # connect to FTP server and upload files try: file_storage_url = dparameters['fs_server'].strip() file_storage_user = dparameters['fs_username'].strip() file_storage_pwd = dparameters['fs_password'].strip() file_storage_share = dparameters['fs_share'].strip() file_storage_dir = dparameters['fs_directory_prefix'].strip() file_service = FileService(account_name=file_storage_user, account_key=file_storage_pwd) try: if file_service.exists(file_storage_share): print( 'Connection to Azure file storage successfully established...' ) if len(file_storage_dir) > 0 and not file_service.exists( file_storage_share, directory_name=file_storage_dir): file_service.create_directory(file_storage_share, file_storage_dir) print('Created directory:' + file_storage_dir) else: print( 'Failed to connect to Asure file storage, share does not exist: ' + file_storage_share) except Exception as ex: print('Error connecting to Azure file storage: ', ex) for pdffile in lpdfs: file_details = db.readFileStatus(file_original_name=pdffile, file_status='Uploaded') if file_details is None: file_id = None file_details = db.readFileStatus( file_original_name=pdffile, file_status='Classified') if file_details is not None: file_id = file_details["id"] dir, rpdffile = ntpath.split(pdffile) destinationdir = '' if (dir + '\\') == dir_upload or (dir + '/') == dir_upload: destinationdir = 'Unclassified' else: dir, year = ntpath.split(dir) dir, destinationdir = ntpath.split(dir) retries = 0 while retries < 3: try: path = pdffile print('Uploading {}'.format(path)) filename = pdffile remote_filename = fnm.azure_validate_filename( rpdffile) if not remote_filename: return if len(file_storage_dir) > 0: directory = file_storage_dir + '/' + destinationdir else: directory = destinationdir if not file_service.exists( file_storage_share, directory_name=directory): file_service.create_directory( file_storage_share, directory) directory += '/' + year if not file_service.exists( file_storage_share, directory_name=directory): file_service.create_directory( file_storage_share, directory) print('Checking if {}/{} already exists'.format( directory, remote_filename)) if file_service.exists(file_storage_share, directory_name=directory, file_name=remote_filename): print('{}/{} already exists'.format( directory, remote_filename)) if file_id is None: db.saveFileStatus( script_name=script_name, file_original_name=pdffile, file_upload_path=directory, file_upload_name=remote_filename, file_status='Uploaded') else: db.saveFileStatus( id=file_details["id"], file_upload_path=directory, file_upload_name=remote_filename, file_status='Uploaded') os.remove(pdffile) break file_service.create_file_from_path( file_storage_share, directory, remote_filename, path, content_settings=ContentSettings( content_type='application/pdf')) if file_id is None: db.saveFileStatus( script_name=script_name, file_original_name=pdffile, file_upload_path=directory, file_upload_name=remote_filename, file_status='Uploaded') else: db.saveFileStatus( id=file_details["id"], file_upload_path=directory, file_upload_name=remote_filename, file_status='Uploaded') print('{}/{} uploaded'.format( directory, remote_filename)) retries = 3 os.remove(pdffile) except Exception as e: print('Error uploading to Asure file storage,', str(e)) retries += 1 else: print('File {} was uploaded before'.format( file_details["file_original_name"])) os.remove(pdffile) except Exception as e: print(str(e)) logging.critical(str(e))
def run(job, **kwargs): resource = kwargs.get('resource') create_custom_fields_as_needed() storage_account = '{{ storage_account }}' file_path = "{{ file }}" azure_storage_file_share_name = '{{ azure_storage_file_share_name }}' overwrite_files = {{overwrite_files}} file_name = Path(file_path).name if file_path.startswith(settings.MEDIA_URL): set_progress("Converting relative URL to filesystem path") file_path = file_path.replace(settings.MEDIA_URL, settings.MEDIA_ROOT) if not file_path.startswith(settings.MEDIA_ROOT): file_path = os.path.join(settings.MEDIA_ROOT, file_path) try: set_progress("Connecting To Azure...") account_key = Resource.objects.filter( name__icontains=storage_account)[0].azure_account_key fallback_account_key = Resource.objects.filter( name__icontains=storage_account)[0].azure_account_key_fallback file_service = FileService(account_name=storage_account, account_key=account_key) set_progress( 'Creating file share {file_share_name} if it doesn\'t already exist...' .format(file_share_name=azure_storage_file_share_name)) file_service.create_share(share_name=azure_storage_file_share_name, quota=1) set_progress('Connecting to file share') file_name_on_azure = file_name count = 0 while (not overwrite_files) and file_service.exists( share_name=azure_storage_file_share_name, file_name=file_name_on_azure, directory_name=''): count += 1 file_name_on_azure = '{file_name}({duplicate_number})'.format( file_name=file_name, duplicate_number=count) set_progress( 'File with name already exists on given file share, testing new name: {new_name}' .format(new_name=file_name_on_azure)) local_resource_name = azure_storage_file_share_name + '-' + file_name_on_azure if overwrite_files and file_service.exists( share_name=azure_storage_file_share_name, file_name=file_name_on_azure, directory_name=''): set_progress( 'File with name already exists on given file share, overwriting' ) old_resource_to_overwite = Resource.objects.filter( name=local_resource_name, lifecycle='ACTIVE').first() if old_resource_to_overwite: old_resource_to_overwite.delete() set_progress( 'Creating the file with name {file_name} on the Storage Account {storage_account} using the share named {share_name}' .format(file_name=file_name_on_azure, storage_account=storage_account, share_name=azure_storage_file_share_name)) file_service.create_file_from_path( share_name=azure_storage_file_share_name, file_name=file_name_on_azure, directory_name='', local_file_path=file_path) os.remove(file_path) set_progress( 'Creating local storage resource named {resource_name}'.format( resource_name=local_resource_name)) resource.name = local_resource_name resource.azure_storage_account_name = storage_account resource.azure_account_key = account_key resource.azure_account_key_fallback = fallback_account_key resource.azure_storage_file_share_name = azure_storage_file_share_name resource.azure_storage_file_name = file_name_on_azure resource.save() return "Success", "The File has succesfully been uploaded", "" except Exception as e: if os.path.exists(file_path): os.remove(file_path) if resource: resource.delete() raise CloudBoltException( "File could not be uploaded because of the following error: {error}" .format(error=e))
from azure.storage.file import FileService from azure.storage.file import ContentSettings import os omnipresence_storage_account_name = 'cloudinfraprovision' omnipresence_storage_account_key = 'WVIc4TiKPDLxjtIWLpnk5fITbI6AFoZahvfTz4SgSjyP+fE3/qwgSgIo/UNavXPPjQDWrCfT4da6vnL209pThQ==' omnipresence_storage_file_share = 'azure-provision' #Azure Storage Account File Share Name allows only lowercase letters, numbers and hypen. remote_dir_path = '' #Initialize an Azure Storage Account File Service Instance omnipresence_storage_account = FileService(account_name=omnipresence_storage_account_name, account_key=omnipresence_storage_account_key) #test if your storage file share exists on Azure or not, if not, create it if (not omnipresence_storage_account.exists(omnipresence_storage_file_share)): omnipresence_storage_account.create_share(omnipresence_storage_file_share, quota='10') #walk through current directory, make directorys under Azure File Share and upload local files onto your Azure storage account File Share except for hiden files and directory for base_dir, dirs, file_names in os.walk(".", topdown=True): file_names = [ f for f in file_names if not f[0] == '.'] #parse out files whose name begins with a dot dirs[:] = [d for d in dirs if not d[0] == '.'] #parse out directorys whose name begins with a dot for local_file_name in file_names: remote_file_name = os.path.join(base_dir, local_file_name)[2:] local_file_name = remote_file_name if (omnipresence_storage_account.exists(omnipresence_storage_file_share)): omnipresence_storage_account.create_file_from_path( omnipresence_storage_file_share, None, # We want to create files under current remote directory, so we specify None for the directory_name remote_file_name, local_file_name,