Esempio n. 1
0
def file_storage_connect():
    global file_service
    global file_storage_dir
    global file_storage_share
    global overwrite_remote_files
    file_storage_url = dbparameters['fs_server'].strip()
    file_storage_user = dbparameters['fs_username'].strip()
    file_storage_pwd = dbparameters['fs_password'].strip()
    file_storage_share = dbparameters['fs_share'].strip()
    file_storage_dir = dbparameters['fs_directory_prefix'].strip()
    overwrite_remote_files = dbparameters['overwrite_remote_files'].strip()

    file_service = FileService(account_name=file_storage_user,
                               account_key=file_storage_pwd)
    try:
        if file_service.exists(file_storage_share):
            print(
                'Connection to Azure file storage successfully established...')
            if len(file_storage_dir) > 0 and not file_service.exists(
                    file_storage_share, directory_name=file_storage_dir):
                subdirs = file_storage_dir.split('/')
                subdirfull = ""
                for subdir in subdirs:
                    subdirfull += subdir
                    file_service.create_directory(file_storage_share,
                                                  subdirfull)
                    subdirfull += "/"
                print('Created directory:' + file_storage_dir)
        else:
            print(
                'Filaed to connect to Asure file storage, share does not exist: '
                + file_storage_share)
    except Exception as ex:
        print('Error connecting to Azure file storage: ', ex)
Esempio n. 2
0
def create_azure_fileshare(share_prefix, account_name, account_key):
    """
    Generate a unique share name to avoid overlaps in shared infra
    :param share_prefix:
    :param account_name:
    :param account_key:
    :return:
    """

    # FIXME - Need to remove hardcoded directoty link below

    d_dir = './WebInDeploy/bootstrap'
    share_name = "{0}-{1}".format(share_prefix.lower(), str(uuid.uuid4()))
    print('using share_name of: {}'.format(share_name))

    # archive_file_path = _create_archive_directory(files, share_prefix)

    try:
        # ignore SSL warnings - bad form, but SSL Decrypt causes issues with this
        s = requests.Session()
        s.verify = False

        file_service = FileService(account_name=account_name,
                                   account_key=account_key,
                                   request_session=s)

        # print(file_service)
        if not file_service.exists(share_name):
            file_service.create_share(share_name)

        for d in ['config', 'content', 'software', 'license']:
            print('creating directory of type: {}'.format(d))
            if not file_service.exists(share_name, directory_name=d):
                file_service.create_directory(share_name, d)

            # FIXME - We only handle bootstrap files.  May need to handle other dirs

            if d == 'config':
                for filename in os.listdir(d_dir):
                    print('creating file: {0}'.format(filename))
                    file_service.create_file_from_path(
                        share_name, d, filename, os.path.join(d_dir, filename))

    except AttributeError as ae:
        # this can be returned on bad auth information
        print(ae)
        return "Authentication or other error creating bootstrap file_share in Azure"

    except AzureException as ahe:
        print(ahe)
        return str(ahe)
    except ValueError as ve:
        print(ve)
        return str(ve)

    print('all done')
    return share_name
Esempio n. 3
0
def _get_files_from_afs(cli_ctx, afs, path, expiry):
    """Returns a list of files and directories under given path on mounted Azure File share.

    :param models.AzureFileShareReference afs: Azure file share reference.
    :param str path: path to list files from.
    :param int expiry: SAS expiration time in minutes.
    """
    from azure.storage.file import FileService
    from azure.storage.file.models import File, FilePermissions
    result = []
    service = FileService(afs.account_name, _get_storage_account_key(cli_ctx, afs.account_name, None))
    share_name = afs.azure_file_url.split('/')[-1]
    effective_path = _get_path_for_storage(path)
    if not service.exists(share_name, effective_path):
        return result
    for f in service.list_directories_and_files(share_name, effective_path):
        if isinstance(f, File):
            sas = service.generate_file_shared_access_signature(
                share_name, effective_path, f.name, permission=FilePermissions(read=True),
                expiry=datetime.datetime.utcnow() + datetime.timedelta(minutes=expiry))
            result.append(
                LogFile(
                    f.name, service.make_file_url(share_name, effective_path, f.name, 'https', sas),
                    False, f.properties.content_length))
        else:
            result.append(LogFile(f.name, None, True, None))
    return result
Esempio n. 4
0
def getLatestModel(customer, modelName, storage_account_name,
                   storage_account_key):
    fileService = FileService(account_name=storage_account_name,
                              account_key=storage_account_key)
    if fileService.exists('trainedmodels', customer):
        modelTimestampArr = []
        files = fileService.list_directories_and_files('trainedmodels',
                                                       customer + '/' +
                                                       modelName,
                                                       prefix=modelName)

        for file in files:
            date = file.name.split('.')[0].split('_')[1]
            modelTimestampArr.append(date)

        latestModelFileName = modelName + '_' + max(modelTimestampArr) + '.pkl'
        print(latestModelFileName)

        file = fileService.get_file_to_bytes('trainedmodels',
                                             customer + '/' + modelName,
                                             latestModelFileName)
        model = pickle.loads(file.content)['model']
        return model
    else:
        print('Customer or model not found.')
Esempio n. 5
0
def run(job, **kwargs):
    resource = kwargs.get('resource')
    create_custom_fields_as_needed()

    storage_account = '{{ storage_account }}'
    file = "{{ file }}"
    azure_storage_file_share_name = '{{ azure_storage_file_share_name }}'
    file_name = Path(file).name
    if file.startswith(settings.MEDIA_URL):
        set_progress("Converting relative URL to filesystem path")
        file = file.replace(settings.MEDIA_URL, settings.MEDIA_ROOT)

    account_key = Resource.objects.filter(name__icontains='{{ storage_account }}')[0].azure_account_key
    fallback_account_key = Resource.objects.filter(name__icontains="{{ storage_account }}")[0].azure_account_key_fallback

    set_progress("Connecting To Azure...")
    file_service = FileService(account_name=storage_account, account_key=account_key)

    set_progress('Creating a file share...')
    file_service.create_share(share_name=azure_storage_file_share_name, quota=1)

    set_progress('Creating a file...')
    if file_service.exists(share_name=azure_storage_file_share_name, file_name=file_name, directory_name=''):
        file_service.create_file_from_path(share_name=azure_storage_file_share_name, file_name=file_name, directory_name='', local_file_path=file)
        return "WARNING", "File with this name already exists", "The file will be updated."
    else:
        file_service.create_file_from_path(share_name=azure_storage_file_share_name, file_name=file_name, directory_name='', local_file_path=file)
        resource.name = azure_storage_file_share_name + '-' + file_name
        resource.azure_storage_account_name = storage_account
        resource.azure_account_key = account_key
        resource.azure_account_key_fallback = fallback_account_key
        resource.azure_storage_file_share_name = azure_storage_file_share_name
        resource.azure_storage_file_name = file_name
        resource.save()
    return "Success", "The File has succesfully been uploaded", ""
Esempio n. 6
0
def create_azure_fileshare(files, share_prefix, account_name, account_key):
    # generate a unique share name to avoid overlaps in shared infra
    share_name = "{0}-{1}".format(share_prefix.lower(), str(uuid.uuid4()))
    print('using share_name of: {}'.format(share_name))

    archive_file_path = _create_archive_directory(files, share_prefix)

    try:
        # ignore SSL warnings - bad form, but SSL Decrypt causes issues with this
        s = requests.Session()
        s.verify = False

        file_service = FileService(account_name=account_name,
                                   account_key=account_key,
                                   request_session=s)

        # print(file_service)
        if not file_service.exists(share_name):
            file_service.create_share(share_name)

        for d in ['config', 'content', 'software', 'license']:
            print('creating directory of type: {}'.format(d))
            if not file_service.exists(share_name, directory_name=d):
                file_service.create_directory(share_name, d)

            d_dir = os.path.join(archive_file_path, d)
            for filename in os.listdir(d_dir):
                print('creating file: {0}'.format(filename))
                file_service.create_file_from_path(
                    share_name, d, filename, os.path.join(d_dir, filename))

    except AttributeError as ae:
        # this can be returned on bad auth information
        print(ae)
        return "Authentication or other error creating bootstrap file_share in Azure"

    except AzureException as ahe:
        print(ahe)
        return str(ahe)
    except ValueError as ve:
        print(ve)
        return str(ve)

    print('all done')
    return 'Azure file-share {} created successfully'.format(share_name)
Esempio n. 7
0
def saveModel(customer, modelName, model, storage_account_name,
              storage_account_key):
    fileService = FileService(account_name=storage_account_name,
                              account_key=storage_account_key)
    if not fileService.exists('trainedmodels', customer):
        fileService.create_share('trainedmodels')
        fileService.create_directory('trainedmodels', customer)

    if not fileService.exists('trainedmodels', customer + '/' + modelName):
        fileService.create_directory('trainedmodels',
                                     customer + '/' + modelName)

    modelPickle = pickle.dumps(model)
    timestr = time.strftime('%Y%m%d-%H%M%S')
    fileName = modelName + '_' + timestr + '.pkl'
    fileService.create_file_from_bytes('trainedmodels',
                                       customer + '/' + modelName, fileName,
                                       modelPickle)
    print(fileName + ' saved.')
Esempio n. 8
0
def file_storage_connect():
    file_service = FileService(account_name=file_storage_user, account_key=file_storage_pwd, socket_timeout=15)
    try:
        if file_service.exists(file_storage_share):
            print('Connection to Azure file storage successfully established...')
        else:
            print('Filaed to connect to Asure file storage, share does not exist: ' + file_storage_share)
    except Exception as ex:
        print('Error connecting to Azure file storage: ', ex)
    return file_service
Esempio n. 9
0
def upload_scripts(config, job_name, filenames):
    service = FileService(config.storage_account['name'],
                          config.storage_account['key'])
    if not service.exists(config.fileshare_name, directory_name=job_name):
        service.create_directory(config.fileshare_name,
                                 job_name,
                                 fail_on_exist=False)
    trasfer_file = lambda fname: service.create_file_from_path(
        config.fileshare_name, job_name, os.path.basename(fname), fname)
    for filename in filenames:
        trasfer_file(filename)
Esempio n. 10
0
def file():
    static_dir_path = "D:\home\site\wwwroot\static"
    static_file_dir_path = static_dir_path + '\\' + 'files'
    account_name = 'hanastragetest'
    account_key = 'acount_key'
    root_share_name = 'root'
    share_name = 'images'
    directory_url = 'https://hanastragetest.file.core.windows.net/' + root_share_name + '/' + share_name

    # create local save directory
    if os.path.exist(static_file_dir_path) is False:
        os.mkdir(static_file_dir_path)

    file_service = FileService(account_name=account_name,
                               account_key=account_key)
    # create share
    file_service.create_share(root_share_name)

    # create directory
    file_service.create_directory(root_share_name, share_name)

    files = os.listdir(static_dir_path)
    for file in files:
        # delete
        if file_service.exists(root_share_name, share_name, file):
            file_service.delete_file(root_share_name, share_name, file)

        # file upload
        file_service.create_file_from_path(
            root_share_name,
            share_name,  # We want to create this blob in the root directory, so we specify None for the directory_name
            file,
            static_dir_path + '\\' + file,
            content_settings=ContentSettings(content_type='image/png'))

    generator = file_service.list_directories_and_files(
        root_share_name, share_name)

    html = ""
    for file in generator:
        # file download
        file_save_path = static_file_dir_path + '\\' + file
        file_service.get_file_to_path(root_share_name, share_name, file,
                                      file_save_path)
        html = "{}<img src='{}'>".format(html, file_save_path)

    result = {
        "result": True,
        "data": {
            "file_or_dir_name":
            [file_or_dir.name for file_or_dir in generator]
        }
    }
    return make_response(json.dumps(result, ensure_ascii=False) + html)
Esempio n. 11
0
def clean(shared_directory, config_path, remove_directory):
    config_path = os.path.expanduser(config_path)
    with open(config_path) as f:
        config = json.load(f)

    from azure.storage.file import FileService
    service = FileService(account_name=config['account_name'],
                          account_key=config['account_key'])
    if service.exists(config['share_name'], shared_directory):
        for file in service.list_directories_and_files(config['share_name'],
                                                       shared_directory):
            service.delete_file(config['share_name'], shared_directory,
                                file.name)
        if remove_directory:
            service.delete_directory(config['share_name'], shared_directory)
Esempio n. 12
0
def downloadStoredFiles(config, accountKey, sourceDir, targetDir):
    fs = FileService(account_name=config['STORAGE_ACCOUNT'],
                     account_key=accountKey)
    storageLoc = config['STORAGE_LOCATION']
    if not path.exists(targetDir):
        makedirs(targetDir)
    print(
        f'\nFileService: reading files from Azure Storage location="{storageLoc}" directory="{sourceDir}"'
    )
    if not fs.exists(storageLoc, sourceDir):
        return
    dirsFiles = fs.list_directories_and_files(storageLoc, sourceDir)
    fileNames = [
        df.name for df in dirsFiles
        if df.name.endswith('.txt') or df.name.endswith('.csv')
    ]
    for fname in fileNames:
        if path.exists(path.join(targetDir, fname)):
            print(f'already got file={fname}')
        else:
            print(f'downloading file={fname}')
            fs.get_file_to_path(storageLoc, sourceDir, fname,
                                path.join(targetDir, fname))
    def test_job_level_mounting(self, resource_group, location, cluster, storage_account, storage_account_key):
        """Tests if it's possible to mount external file systems for a job."""
        job_name = 'job'

        # Create file share and container to mount on the job level
        if storage_account.name != helpers.FAKE_STORAGE.name:
            files = FileService(storage_account.name, storage_account_key)
            files.create_share('jobshare', fail_on_exist=False)
            blobs = BlockBlobService(storage_account.name, storage_account_key)
            blobs.create_container('jobcontainer', fail_on_exist=False)

        job = self.client.jobs.create(
            resource_group.name,
            job_name,
            parameters=models.JobCreateParameters(
                location=location,
                cluster=models.ResourceId(id=cluster.id),
                node_count=1,
                mount_volumes=models.MountVolumes(
                    azure_file_shares=[
                        models.AzureFileShareReference(
                            account_name=storage_account.name,
                            azure_file_url='https://{0}.file.core.windows.net/{1}'.format(
                                storage_account.name, 'jobshare'),
                            relative_mount_path='job_afs',
                            credentials=models.AzureStorageCredentialsInfo(
                                account_key=storage_account_key
                            ),
                        )
                    ],
                    azure_blob_file_systems=[
                        models.AzureBlobFileSystemReference(
                            account_name=storage_account.name,
                            container_name='jobcontainer',
                            relative_mount_path='job_bfs',
                            credentials=models.AzureStorageCredentialsInfo(
                                account_key=storage_account_key
                            ),
                        )
                    ]
                ),
                # Put standard output on cluster level AFS to check that the job has access to it.
                std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(helpers.AZURE_FILES_MOUNTING_PATH),
                # Create two output directories on job level AFS and blobfuse.
                output_directories=[
                    models.OutputDirectory(id='OUTPUT1', path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_afs'),
                    models.OutputDirectory(id='OUTPUT2', path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_bfs')
                ],
                # Check that the job preparation has access to job level file systems.
                job_preparation=models.JobPreparation(
                    command_line='echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/prep_afs.txt; '
                                 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/prep_bfs.txt; '
                                 'echo done'
                ),
                # Check that the job has access to job
                custom_toolkit_settings=models.CustomToolkitSettings(
                    command_line='echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/job_afs.txt; '
                                 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/job_bfs.txt; '
                                 'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT1/afs; '
                                 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/afs/job_afs.txt; '
                                 'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs; '
                                 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs/job_bfs.txt; '
                                 'echo done'
                )
            )
        ).result()
        self.assertEqual(
            helpers.wait_for_job_completion(self.is_live, self.client, resource_group.name, job.name,
                                            helpers.MINUTE),
            models.ExecutionState.succeeded)

        job = self.client.jobs.get(resource_group.name, job.name)
        # Assert job and job prep standard output is populated on cluster level filesystem
        helpers.assert_job_files_are(self, self.client, resource_group.name, job.name,
                                     helpers.STANDARD_OUTPUT_DIRECTORY_ID,
                                     {u'stdout.txt': u'done\n', u'stderr.txt': u'',
                                      u'stdout-job_prep.txt': u'done\n', u'stderr-job_prep.txt': u''})
        # Assert files are generated on job level AFS
        helpers.assert_job_files_are(self, self.client, resource_group.name, job.name, 'OUTPUT1',
                                     {u'job_afs.txt': u'afs\n', u'prep_afs.txt': u'afs\n', u'afs': None})
        # Assert files are generated on job level blobfuse
        helpers.assert_job_files_are(self, self.client, resource_group.name, job.name, 'OUTPUT2',
                                     {u'job_bfs.txt': u'bfs\n', u'prep_bfs.txt': u'bfs\n', u'bfs': None})
        # Assert subfolders are available via API
        helpers.assert_job_files_in_path_are(self, self.client, resource_group.name, job.name, 'OUTPUT1',
                                             'afs', {u'job_afs.txt': u'afs\n'})
        helpers.assert_job_files_in_path_are(self, self.client, resource_group.name, job.name, 'OUTPUT2',
                                             'bfs', {u'job_bfs.txt': u'bfs\n'})

        # Assert that we can access the output files created on job level mount volumes directly in storage using path
        # segment returned by the server.
        if storage_account.name != helpers.FAKE_STORAGE.name:
            files = FileService(storage_account.name, storage_account_key)
            self.assertTrue(
                files.exists('jobshare', job.job_output_directory_path_segment +
                             '/' + helpers.OUTPUT_DIRECTORIES_FOLDER_NAME, 'job_afs.txt'))
            blobs = BlockBlobService(storage_account.name, storage_account_key)
            self.assertTrue(
                blobs.exists('jobcontainer', job.job_output_directory_path_segment +
                             '/' + helpers.OUTPUT_DIRECTORIES_FOLDER_NAME + '/job_bfs.txt'))
        # After the job is done the filesystems should be unmounted automatically, check this by submitting a new job.
        checker = self.client.jobs.create(
            resource_group.name,
            'checker',
            parameters=models.JobCreateParameters(
                location=location,
                cluster=models.ResourceId(id=cluster.id),
                node_count=1,
                std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(helpers.AZURE_FILES_MOUNTING_PATH),
                custom_toolkit_settings=models.CustomToolkitSettings(
                    command_line='echo job; df | grep -E "job_bfs|job_afs"'
                )
            )
        ).result()
        # Check the job failed because there are not job level mount volumes anymore
        self.assertEqual(
            helpers.wait_for_job_completion(self.is_live, self.client, resource_group.name, checker.name,
                                            helpers.MINUTE),
            models.ExecutionState.failed)
        # Check that the cluster level AFS was still mounted
        helpers.assert_job_files_are(self, self.client, resource_group.name, checker.name,
                                     helpers.STANDARD_OUTPUT_DIRECTORY_ID,
                                     {u'stdout.txt': u'job\n', u'stderr.txt': u''})
Esempio n. 14
0
    def test_job_level_mounting(self, resource_group, location, cluster,
                                storage_account, storage_account_key):
        """Tests if it's possible to mount external file systems for a job."""
        job_name = 'job'

        # Create file share and container to mount on the job level
        if storage_account.name != FAKE_STORAGE.name:
            files = FileService(storage_account.name, storage_account_key)
            files.create_share('jobshare', fail_on_exist=False)
            blobs = BlockBlobService(storage_account.name, storage_account_key)
            blobs.create_container('jobcontainer', fail_on_exist=False)

        job = self.client.jobs.create(
            resource_group.name,
            job_name,
            parameters=models.JobCreateParameters(
                location=location,
                cluster=models.ResourceId(id=cluster.id),
                node_count=1,
                mount_volumes=models.
                MountVolumes(azure_file_shares=[
                    models.AzureFileShareReference(
                        account_name=storage_account.name,
                        azure_file_url='https://{0}.file.core.windows.net/{1}'.
                        format(storage_account.name, 'jobshare'),
                        relative_mount_path='job_afs',
                        credentials=models.AzureStorageCredentialsInfo(
                            account_key=storage_account_key),
                    )
                ],
                             azure_blob_file_systems=[
                                 models.AzureBlobFileSystemReference(
                                     account_name=storage_account.name,
                                     container_name='jobcontainer',
                                     relative_mount_path='job_bfs',
                                     credentials=models.
                                     AzureStorageCredentialsInfo(
                                         account_key=storage_account_key),
                                 )
                             ]),
                # Put standard output on cluster level AFS to check that the job has access to it.
                std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(
                    AZURE_FILES_MOUNTING_PATH),
                # Create two output directories on job level AFS and blobfuse.
                output_directories=[
                    models.OutputDirectory(
                        id='OUTPUT1',
                        path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_afs'),
                    models.OutputDirectory(
                        id='OUTPUT2',
                        path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_bfs')
                ],
                # Check that the job preparation has access to job level file systems.
                job_preparation=models.JobPreparation(
                    command_line=
                    'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/prep_afs.txt; '
                    'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/prep_bfs.txt; '
                    'echo done'),
                # Check that the job has access to job
                custom_toolkit_settings=models.CustomToolkitSettings(
                    command_line=
                    'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/job_afs.txt; '
                    'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/job_bfs.txt; '
                    'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT1/afs; '
                    'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/afs/job_afs.txt; '
                    'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs; '
                    'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs/job_bfs.txt; '
                    'echo done'))).result()
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, job.name, MINUTE),
            models.ExecutionState.succeeded)

        job = self.client.jobs.get(resource_group.name, job.name)
        # Assert job and job prep standard output is populated on cluster level filesystem
        assert_job_files_are(
            self, self.client, resource_group.name, job.name,
            STANDARD_OUTPUT_DIRECTORY_ID, {
                u'stdout.txt': u'done\n',
                u'stderr.txt': u'',
                u'stdout-job_prep.txt': u'done\n',
                u'stderr-job_prep.txt': u''
            })
        # Assert files are generated on job level AFS
        assert_job_files_are(self, self.client, resource_group.name, job.name,
                             'OUTPUT1', {
                                 u'job_afs.txt': u'afs\n',
                                 u'prep_afs.txt': u'afs\n',
                                 u'afs': None
                             })
        # Assert files are generated on job level blobfuse
        assert_job_files_are(self, self.client, resource_group.name, job.name,
                             'OUTPUT2', {
                                 u'job_bfs.txt': u'bfs\n',
                                 u'prep_bfs.txt': u'bfs\n',
                                 u'bfs': None
                             })
        # Assert subfolders are available via API
        assert_job_files_in_path_are(self, self.client, resource_group.name,
                                     job.name, 'OUTPUT1', 'afs',
                                     {u'job_afs.txt': u'afs\n'})
        assert_job_files_in_path_are(self, self.client, resource_group.name,
                                     job.name, 'OUTPUT2', 'bfs',
                                     {u'job_bfs.txt': u'bfs\n'})

        # Assert that we can access the output files created on job level mount volumes directly in storage using path
        # segment returned by the server.
        if storage_account.name != FAKE_STORAGE.name:
            files = FileService(storage_account.name, storage_account_key)
            self.assertTrue(
                files.exists(
                    'jobshare', job.job_output_directory_path_segment + '/' +
                    OUTPUT_DIRECTORIES_FOLDER_NAME, 'job_afs.txt'))
            blobs = BlockBlobService(storage_account.name, storage_account_key)
            self.assertTrue(
                blobs.exists(
                    'jobcontainer', job.job_output_directory_path_segment +
                    '/' + OUTPUT_DIRECTORIES_FOLDER_NAME + '/job_bfs.txt'))
        # After the job is done the filesystems should be unmounted automatically, check this by submitting a new job.
        checker = self.client.jobs.create(
            resource_group.name,
            'checker',
            parameters=models.JobCreateParameters(
                location=location,
                cluster=models.ResourceId(id=cluster.id),
                node_count=1,
                std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(
                    AZURE_FILES_MOUNTING_PATH),
                custom_toolkit_settings=models.CustomToolkitSettings(
                    command_line='echo job; df | grep -E "job_bfs|job_afs"'))
        ).result()
        # Check the job failed because there are not job level mount volumes anymore
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, checker.name, MINUTE),
            models.ExecutionState.failed)
        # Check that the cluster level AFS was still mounted
        assert_job_files_are(self, self.client, resource_group.name,
                             checker.name, STANDARD_OUTPUT_DIRECTORY_ID, {
                                 u'stdout.txt': u'job\n',
                                 u'stderr.txt': u''
                             })
Esempio n. 15
0
# PG DUMP
try:
    COMMANDS = ['pg_dump', '-F', 'c', '-b', '-v', '-f', './%s' % FILENAME]
    print("Running: '%s'" % (' '.join(COMMANDS)))
    exit_code = subprocess.call(COMMANDS)
    if exit_code is 1:
        raise Exception('Could not Backup, please check logs')

    # AZURE CONNECTION
    file_service = FileService(endpoint_suffix=AZURE_ENDPOINT_SUFFIX,
                               account_name=AZURE_ACCOUNT_NAME,
                               account_key=AZURE_ACCOUNT_KEY)

    # Check if AZURE_BACKUP_FOLDER exists, if not create it
    if not file_service.exists(AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER):
        file_service.create_directory(AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER)

    # Upload
    print("uploading to: '%s/%s/%s'" %
          (AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER, FILENAME))
    file_service.create_file_from_path(AZURE_SHARE_NAME,
                                       AZURE_BACKUP_FOLDER,
                                       FILENAME,
                                       FILENAME,
                                       progress_callback=upload_callback)

    # Cleaning Backup Files
    backup_files = file_service.list_directories_and_files(
        AZURE_SHARE_NAME, AZURE_BACKUP_FOLDER)
    filenames = []
Esempio n. 16
0
    FileService(account_name=AZURE_CPT, account_key=AZURE_KEY)
    file_service = FileService(account_name=AZURE_CPT, account_key=AZURE_KEY)
    print("Autorisation d'accès au compte Microsoft AZURE OK")
    logging.debug("Autorisation d'accès au compte Microsoft AZURE OK")
#    syslog.syslog(syslog.LOG_DEBUG,"Autorisation d'accès au compte Microsoft AZURE OK")
except:
    print("Problème d'autorisation d'accès au compte Microsoft AZURE")
    logging.error("Problème d'autorisation d'accès au compte Microsoft AZURE")
#    syslog.syslog(syslog.LOG_ERR,"Problème d'autorisation d'accès au compte Microsoft AZURE")
    exit(2) # sortie avec erreur !

# Création du répertoire: backup6 sur Microsoft AZURE de notre exemple #
# Vérifier si le répertoire de sauvegarde backup6 sur Microsoft AZURE existe ou non #

try:
    file_service.exists(AZURE_REP_BKP)
    print("Le répertoire de sauvegarde AZURE existe !")
    logging.debug("Le répertoire de sauvegarde AZURE existe !")
#    syslog.syslog(syslog.LOG_DEBUG,"Le répertoire de sauvegarde AZURE existe !")
except FileNotFoundError:
    file_service.create_share(AZURE_REP_BKP)
    print("Création du répertoire de sauvegarde AZURE ")
    logging.warning("Création du répertoire de sauvegarde AZURE ")
#    syslog.syslog(syslog.LOG_WARNING,"Création du répertoire de sauvegarde AZURE ")

############################## Temps ################################

BACKUP_DATE = date.today().strftime("%d-%m-%Y") # date d'aujourd'hui au format Jour/Mois/Année
BACKUP_DATE_OLD = (date.today()-datetime.timedelta(days=int(NBjourDEretention))).strftime("%d-%m-%Y") # date d'aujourd'hui - le nb de jour de rétention au format Jour/Mois/Année

############################# Fonction ##############################
Esempio n. 17
0
class storageFileService(clsLoggingBase):
    """
    This class wraps the Blob storage. Should be created in two phases. First passing the 
    account name and second passing the accountkey from the KeyVault. After this the service 
    object is created and can be used to access the blob items 
    """
    def __init__(self, account_name):
        super().__init__(__name__)
        self.account_name = account_name
        self.account_key = None
        self.service = None
        self.maskFileName = 'mask_file.txt'
        return

    def set_storageKey(self, storageKey):
        self.account_key = storageKey
        if (self.account_name):
            self.service = FileService(account_name=self.account_name,
                                       account_key=self.account_key)
        return

    def preCheck(self,
                 _sourceFileShareFolderName,
                 _sourceDirectoryName,
                 AdditionalCheck=True):
        super().getLoggingObj().debug('preCheck')
        if (self.service == None):
            if ((self.account_name is None) or len(self.account_name) == 0):
                AZURE_ACN_NAME = 'AZURE_ACN_NAME'
                self.account_name = os.environ.get(AZURE_ACN_NAME)
                if (self.account_name is None) or (len(self.account_name)
                                                   == 0):
                    return False, 'AZURE_ACN_NAME Environment Variable not set', None, None, None

            if ((self.account_key is None) or len(self.account_key) == 0):
                AZURE_ACN_STRG_KEY = 'AZURE_ACN_STRG_KEY'
                self.account_key = os.environ.get(AZURE_ACN_STRG_KEY)
                if (self.account_key is None) or (len(self.account_key) == 0):
                    return False, 'AZURE_ACN_STRG_KEY Environment Variable not set', None, None, None

            self.service = FileService(account_name=self.account_name,
                                       account_key=self.account_key)

            # Can we create file_share service
            if (self.service is None):
                return False, "Unable to create File share, check Account Name, Key and connectivity", None, None, None

        if (AdditionalCheck == True):
            # check for existence of Source share folder
            if (self.service.exists(_sourceFileShareFolderName) == False):
                return False, "source share does not exist", None, None, None

            # check for existence of source share directory
            if (self.service.exists(
                    _sourceFileShareFolderName,
                    directory_name=_sourceDirectoryName) == False):
                return False, "source directory does not exist", None, None, None

        return True, "OK", self.service, self.account_name, self.account_key

    def CopySourceDestinationImpl(self,
                                  _sourceFileShareFolderName,
                                  _sourceDirectoryName,
                                  _destinationFileShareFolderName,
                                  _destinationDirectoryName,
                                  _ExperimentName,
                                  _fileExtensionFilter='.jpg'):
        '''
        This method copies raw data from the source directory to the experiment folder
        _sourceDirectoryName, _destinationDirectoryName: format should be directoryName/secondDirectoryName, no trailing 
        slashes.  
        '''

        start_time = datetime.datetime.now()
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _sourceFileShareFolderName, _sourceDirectoryName)
        if (rv == False):
            return rv, description
        else:
            # check for existence of destination share and create it if it does not exist
            if (file_service.exists(_destinationFileShareFolderName) == False):
                file_service.create_share(_destinationFileShareFolderName)

            # check the existence of destination directory and create it if it does not exist
            if (file_service.exists(
                    _destinationFileShareFolderName,
                    directory_name=_destinationDirectoryName) == False):
                file_service.create_directory(_destinationFileShareFolderName,
                                              _destinationDirectoryName)

            # check the existence of destination experiment folder and create it if it does not exist

            combinedDestinationFolderName = _destinationDirectoryName + "/" + _ExperimentName

            if (file_service.exists(
                    _destinationFileShareFolderName,
                    directory_name=combinedDestinationFolderName) == False):
                file_service.create_directory(_destinationFileShareFolderName,
                                              combinedDestinationFolderName)

            fileList = list(
                file_service.list_directories_and_files(
                    _sourceFileShareFolderName,
                    directory_name=_sourceDirectoryName))

            if (fileList is None and len(fileList) < 1):
                return False, "No files found @ source"
            else:
                for i, imageFileName in enumerate(fileList):
                    #print(imageFileName.name)
                    if ((_ExperimentName in imageFileName.name) and
                            imageFileName.name.endswith(_fileExtensionFilter)):
                        source = "https://{0}.file.core.windows.net/{1}/{2}/{3}".format(
                            _accountName, _sourceFileShareFolderName,
                            _sourceDirectoryName, imageFileName.name)
                        #print(source)
                        copy = file_service.copy_file(
                            _destinationFileShareFolderName,
                            combinedDestinationFolderName, imageFileName.name,
                            source)

                        # Poll for copy completion
                        while copy.status != 'success':
                            count = count + 1
                            if count > 5:
                                return False, 'Timed out waiting for async copy to complete., Filename = {0} '.format(
                                    imageFileName)
                            time.sleep(5)
                            copy = self.service.get_file_properties(
                                _destinationFileShareFolderName,
                                combinedDestinationFolderName,
                                imageFileName.name).properties.copy

            time_elapsed = datetime.datetime.now() - start_time
            elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                         time_elapsed.microseconds)
            return True, elapsedTime

    def GetAllExperimentsWithMaskAndImageFileImpl(
            self,
            _destinationFileShareFolderName,
            _destinationDirectoryName,
            _fileExtensionFilter='.jpg'):
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description
        else:
            returnList = []
            experimentList = list(
                file_service.list_directories_and_files(
                    _destinationFileShareFolderName,
                    directory_name=_destinationDirectoryName))

            if (not (experimentList is None and len(experimentList) < 1)):
                for i, experimentName in enumerate(experimentList):
                    filenameList = list(
                        file_service.list_directories_and_files(
                            _destinationFileShareFolderName,
                            _destinationDirectoryName + "/" +
                            experimentName.name))
                    if (not (filenameList is None and len(filenameList) < 1)):
                        for j, filenameList in enumerate(filenameList):
                            maskContent = ''
                            # check if maskFile exists and load its content
                            if (file_service.exists(
                                    _destinationFileShareFolderName,
                                    _destinationDirectoryName + "/" +
                                    experimentName.name, self.maskFileName) !=
                                    False):
                                #print(_destinationFileShareFolderName + "/" + _destinationDirectoryName+ "/" + experimentName.name + "/" + self.maskFileName)
                                fileMask = file_service.get_file_to_text(
                                    _destinationFileShareFolderName,
                                    _destinationDirectoryName + "/" +
                                    experimentName.name, self.maskFileName)
                                if (fileMask is not None
                                        and fileMask.content is not None
                                        and len(fileMask.content) > 0):
                                    #print("load content")
                                    maskContent = json.loads(fileMask.content)
                            # load name of first file with extsnsion = _fileExtensionFilter
                            if (filenameList.name.endswith(
                                    _fileExtensionFilter)):
                                myVar = {
                                    "experimentName": experimentName.name,
                                    "filename": filenameList.name,
                                    "maskContent": maskContent
                                }
                                returnList.append(myVar)
                                # we've got our file, lets exit from this inner loop
                                break

        return True, returnList

    def SaveMaskFileDataImpl(self, _sourceFileShareFolderName,
                             _sourceDirectoryName, _maskTags):
        start_time = datetime.datetime.now()
        rv = False
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _sourceFileShareFolderName, _sourceDirectoryName)
        if (rv == False):
            return rv, description
        else:
            if (_maskTags is None or len(_maskTags) == 0):
                return rv, "Invalid mask values!!!"
            else:
                masks = []
                bDataValid = False
                try:
                    masks = json.loads(_maskTags)
                    bDataValid = True
                except ValueError:
                    pass

                if (bDataValid == True):
                    if (masks is None or len(masks) == 0):
                        return rv, "Incorrect format of ask values!!!"
                    else:
                        file_service.create_file_from_text(
                            _sourceFileShareFolderName, _sourceDirectoryName,
                            self.maskFileName, _maskTags)
                        time_elapsed = datetime.datetime.now() - start_time
                        elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                                     time_elapsed.microseconds)
                        return True, elapsedTime
                else:
                    return rv, "masks passed cannot be converted to json objects"

    def GetAllExperimentsFilesNotCopiedImpl(self,
                                            _destinationFileShareFolderName,
                                            _destinationDirectoryName,
                                            _experimentNames):
        '''
        This function expects parameter _experimentNames to contain a list of experimentNames. 
        It then checks if the destination folder has been created or not!.  if NOT, it then adds it to the list. 
        This function is used to figure out if the original source files have been copied or not. 
        If the destination experiment folder exists, it is assumed that the source files have been copied. 
        //TODO:: better implementation would be to check for filename in source and under experiment folder are same and check if those are 
        same. then return true, else return false. 
        '''

        start_time = datetime.datetime.now()
        rv = False
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description, None

        returnList = []

        for experimentName in (_experimentNames):
            if (file_service.exists(
                    _destinationFileShareFolderName,
                    _destinationDirectoryName + "/" +
                    experimentName) == False):
                returnList.append(experimentName)

        time_elapsed = datetime.datetime.now() - start_time
        elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                     time_elapsed.microseconds)
        return True, elapsedTime, returnList

    def TestGetAllExperimentNames(self, _destinationFileShareFolderName,
                                  _destinationDirectoryName):
        start_time = datetime.datetime.now()
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description, None
        else:
            experimentList = list(
                file_service.list_directories_and_files(
                    _destinationFileShareFolderName,
                    directory_name=_destinationDirectoryName))
            time_elapsed = datetime.datetime.now() - start_time
            elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                         time_elapsed.microseconds)
            return True, elapsedTime, experimentList

    def GetAllSourceUniqueExperimentNamesImpl(self,
                                              _sourceFileShareFolderName,
                                              _sourceDirectoryName,
                                              _fileExtensionFilter='.jpg'):
        '''
        This function is to be used @ the source folder, where the images are all clubbed together. and we want to extract out 
        the various experiment names that have been created. 
        In our context the _sourceFileShareFolderName = 'linuxraspshare' and '_sourceDirectoryName' = 'Share'
        '''
        start_time = datetime.datetime.now()
        rv = False
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _sourceFileShareFolderName, _sourceDirectoryName)
        if (rv == False):
            return rv, description, None
        returnList = []
        experimentList = list(
            file_service.list_directories_and_files(_sourceFileShareFolderName,
                                                    _sourceDirectoryName))

        if (not (experimentList is None and len(experimentList) < 1)):
            for i, experimentName in enumerate(experimentList):
                if (experimentName.name.endswith(_fileExtensionFilter)):
                    n = experimentName.name.find('_')
                    if (n > 0):
                        expName = experimentName.name[0:n]
                        if expName not in returnList:
                            returnList.append(expName)

        time_elapsed = datetime.datetime.now() - start_time
        elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                     time_elapsed.microseconds)
        return True, elapsedTime, returnList

    def GetAllDestinationExperimentsWhereMaskFileNotPresentImpl(
            self, _destinationFileShareFolderName, _destinationDirectoryName,
            _experimentNames):
        '''
        _experimentNames contains list of all the experiment names. 
        This function looks for existence of mask file under the destination folders and if it does not exists, marks that experiment 
        as not yet processed and returns that as part of the list. If the mask file exists under the destination folder, it is assumed 
        that the masking exercise has been done for that experiment
        '''
        start_time = datetime.datetime.now()
        rv = False
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description, None

        returnList = []

        for experimentName in (_experimentNames):
            # check if maskFile exists and load its content
            if (file_service.exists(
                    _destinationFileShareFolderName,
                    _destinationDirectoryName + "/" + experimentName,
                    self.maskFileName) == False):
                returnList.append(experimentName)

        time_elapsed = datetime.datetime.now() - start_time
        elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                     time_elapsed.microseconds)
        return True, elapsedTime, returnList

    def GetAllDestinationUniqueExperimentNamesImpl(
            self, _destinationFileShareFolderName, _destinationDirectoryName):
        '''
        This function returns the number of folders that currently exists under the destination folders. 
        _destinationFileShareFolderName = 'experiment'
        _destinationDirectoryName = 'object-detection'
        '''
        start_time = datetime.datetime.now()
        rv = False
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description, None
        returnList = []
        experimentList = list(
            file_service.list_directories_and_files(
                _destinationFileShareFolderName, _destinationDirectoryName))

        if (not (experimentList is None and len(experimentList) < 1)):
            for i, experimentName in enumerate(experimentList):
                returnList.append(experimentName.name)

        time_elapsed = datetime.datetime.now() - start_time
        elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                     time_elapsed.microseconds)
        return True, elapsedTime, returnList

    def GetAllDestinationExperimentNamesWithOutputFilesImpl(
            self,
            _destinationFileShareFolderName,
            _destinationDirectoryName,
            _outputFolderName='output',
            _fileExtensionFilter='.jpg'):
        '''
        This function returns the number of folders/experiment that currently exists under the destination folders. 
        plus it returns all the image files contained inside the outpur folder
        _destinationFileShareFolderName = 'experiment'
        _destinationDirectoryName = 'object-detection'
        '''
        start_time = datetime.datetime.now()
        rv = False
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description, None
        returnList = []
        experimentList = list(
            file_service.list_directories_and_files(
                _destinationFileShareFolderName, _destinationDirectoryName))

        if (not (experimentList is None and len(experimentList) < 1)):
            for i, experimentName in enumerate(experimentList):
                outputFiles = []
                combinedFolderName = _destinationDirectoryName + "/" + experimentName.name + "/" + _outputFolderName
                #print(combinedFolderName)
                if (file_service.exists(_destinationFileShareFolderName,
                                        combinedFolderName)):
                    fileList = list(
                        file_service.list_directories_and_files(
                            _destinationFileShareFolderName,
                            combinedFolderName))
                    if (not (fileList is None and len(fileList) < 1)):
                        for j, fileName in enumerate(fileList):
                            if (fileName.name.endswith(_fileExtensionFilter)):
                                outputFiles.append(fileName.name)
                returnList.append({
                    'experimentName': experimentName.name,
                    'outputFiles': outputFiles
                })

        time_elapsed = datetime.datetime.now() - start_time
        elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                     time_elapsed.microseconds)
        return True, elapsedTime, returnList

    def deleteAllFiles(self,
                       _sourceFileShareFolderName,
                       _sourceDirectoryName,
                       _fileExtensionFilter='.jpg'):
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _sourceFileShareFolderName, _sourceDirectoryName)
        if (rv == False):
            return rv, description
        else:
            fileList = list(
                file_service.list_directories_and_files(
                    _sourceFileShareFolderName,
                    directory_name=_sourceDirectoryName))

            if (fileList is None and len(fileList) < 1):
                return False, "No files found @ source"
            else:
                for i, imageFileName in enumerate(fileList):
                    if (_fileExtensionFilter is not None):
                        if (imageFileName.name.endswith(_fileExtensionFilter)):
                            rv = file_service.delete_file(
                                _sourceFileShareFolderName,
                                _sourceDirectoryName, imageFileName.name)
                    else:
                        rv = file_service.delete_file(
                            _sourceFileShareFolderName, _sourceDirectoryName,
                            imageFileName.name)
        return True, "OK"

    def DashBoardGetAllFilesInfoImpl(self,
                                     _sourceFileShareFolderName,
                                     _sourceDirectoryNameList,
                                     _destinationFileShareFolderName,
                                     _destinationDirectoryName,
                                     _outputFolderName='output',
                                     _fileExtensionFilter='.jpg'):
        '''
        Mother of all functions and scans through each and every file and returns lots of information. Could take up-to 40+ minutes to run
        '''
        start_time = datetime.datetime.now()
        print('phase1')
        result, description, returnSourceDict = self.DashBoardGetAllSourceFilesInfoImpl(
            _sourceFileShareFolderName, _sourceDirectoryNameList,
            _fileExtensionFilter)
        if (result == True):
            print('phase2')
            result, description, returnDestinationDict = self.DashBoardGetAllDestinationFilesInfoImpl(
                _destinationFileShareFolderName,
                _destinationDirectoryName,
                _outputFolderName,
                _fileExtensionFilter,
                _returnDict=returnSourceDict)
            if (result == True):
                print('phase3')
                # combine the two dictionaries
                combinedDict = dict()
                for key in returnSourceDict:  # assumed to contain superset of keys
                    if key not in combinedDict:
                        combinedDict[key] = [0, 0, False, 0, 0, False, 0, 0]
                    combinedDict[key][0] = returnSourceDict[key][0]
                    combinedDict[key][1] = returnSourceDict[key][1]

                    if key in returnDestinationDict:
                        combinedDict[key][2] = returnDestinationDict[key][2]
                        combinedDict[key][3] = returnDestinationDict[key][3]
                        combinedDict[key][4] = returnDestinationDict[key][4]
                        combinedDict[key][5] = returnDestinationDict[key][5]
                        combinedDict[key][6] = returnDestinationDict[key][6]
                        combinedDict[key][7] = returnDestinationDict[key][7]

                return self.returnFormattedValue(start_time, True, "OK",
                                                 combinedDict)
            else:
                return self.returnFormattedValue(start_time, result,
                                                 description, None)
        else:
            self.returnFormattedValue(start_time, result, description, None)

    def DashBoardGetAllSourceFilesInfoImplWrapper(self,
                                                  _sourceFileShareFolderName,
                                                  _sourceDirectoryNameList,
                                                  _fileExtensionFilter='.jpg'):
        start_time = datetime.datetime.now()
        result, description, returnDict = self.DashBoardGetAllSourceFilesInfoImpl(
            _sourceFileShareFolderName, _sourceDirectoryNameList,
            _fileExtensionFilter)
        return self.returnFormattedValue(start_time, result, description,
                                         returnDict)

    def DashBoardGetAllSourceFilesInfoImpl(self,
                                           _sourceFileShareFolderName,
                                           _sourceDirectoryNameList,
                                           _fileExtensionFilter='.jpg'):
        '''
        This function is to be used @ the source folder, where the images are all clubbed together. and we want to extract out 
        the various experiment names that have been created. 
        In our context the _sourceFileShareFolderName = 'linuxraspshare' and '_sourceDirectoryName' = 'Share'
        This could also be _sourceFileShareFolderName = 'linuxraspshare' and '_sourceDirectoryName' = 'backup' as this function 
        now caters for
        '''
        start_time = datetime.datetime.now()
        rv = False
        # check the existence of first source folder
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _sourceFileShareFolderName, _sourceDirectoryNameList[0])
        if (rv == False):
            return rv, description, None
        returnDict = dict()

        for _sourceDirectoryName in _sourceDirectoryNameList:
            #print(_sourceDirectoryName)
            experimentList = list(
                file_service.list_directories_and_files(
                    _sourceFileShareFolderName, _sourceDirectoryName))

            if (not (experimentList is None and len(experimentList) < 1)):
                for i, imageFileName in enumerate(experimentList):
                    if (imageFileName.name.endswith(_fileExtensionFilter)):
                        fileProperties = file_service.get_file_properties(
                            _sourceFileShareFolderName, _sourceDirectoryName,
                            imageFileName.name)
                        fileLength = fileProperties.properties.content_length
                        n = imageFileName.name.find('_')
                        if (n > 0):
                            expName = imageFileName.name[0:n]
                            if expName not in returnDict:
                                returnDict[expName] = [
                                    1, fileLength, False, 0, 0, False, 0, 0
                                ]
                            else:
                                returnDict[expName][0] += 1
                                returnDict[expName][1] += fileLength
        return True, "OK", returnDict

    def DashBoardGetAllDestinationFilesInfoImplWrapper(
            self,
            _destinationFileShareFolderName,
            _destinationDirectoryName,
            _outputFolderName='output',
            _fileExtensionFilter='.jpg',
            _returnDict=None,
            _file_service=None):
        start_time = datetime.datetime.now()
        result, description, returnDict = self.DashBoardGetAllDestinationFilesInfoImpl(
            _destinationFileShareFolderName, _destinationDirectoryName,
            _outputFolderName, _fileExtensionFilter, _returnDict)

        return self.returnFormattedValue(start_time, result, description,
                                         returnDict)

    def DashBoardGetAllDestinationFilesInfoImpl(
            self,
            _destinationFileShareFolderName,
            _destinationDirectoryName,
            _outputFolderName='output',
            _fileExtensionFilter='.jpg',
            _returnDict=None):
        start_time = datetime.datetime.now()
        print(start_time)
        rv = False
        file_service = None
        description = ''
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description, None

        returnDict = dict()
        if _returnDict is not None:
            returnDict = _returnDict
        else:
            experimentList = list(
                file_service.list_directories_and_files(
                    _destinationFileShareFolderName,
                    _destinationDirectoryName))

            # 1st pass, get all the experiment names, which are provided by the folder names
            if (not (experimentList is None and len(experimentList) < 1)):
                for i, experimentName in enumerate(experimentList):
                    if experimentName.name not in returnDict:
                        # Mask file exists
                        # number of Files in the experiment root folder
                        # size of the files in the experiment root folder
                        # output folder exists
                        # number of files in the output folder
                        # size of files in the output folder.
                        returnDict[experimentName.name] = [
                            0, 0, False, 0, 0, False, 0, 0
                        ]
        # 2nd pass, find all the properties of the images
        # print("1st pass done")

        # maxIteration = 3
        # startIteration = 0

        for key in returnDict:
            print(key)

            # if (startIteration > maxIteration ):
            #     break
            # else:
            #     startIteration += 1

            combinedFolderName = _destinationDirectoryName + "/" + key
            if (file_service.exists(_destinationFileShareFolderName,
                                    combinedFolderName, self.maskFileName)):
                if _returnDict is None:
                    returnDict[key][0] = 0
                    returnDict[key][1] = 0

                returnDict[key][2] = True
                numberOfFiles, sizeOfFiles = self.getNumberOfFilesAndFileSize(
                    file_service, _destinationFileShareFolderName,
                    combinedFolderName, _fileExtensionFilter)
                returnDict[key][3] = numberOfFiles
                returnDict[key][4] = sizeOfFiles
                combinedFolderName = _destinationDirectoryName + "/" + key + "/" + _outputFolderName
                if (file_service.exists(_destinationFileShareFolderName,
                                        combinedFolderName)):
                    returnDict[key][5] = True
                    numberOfFiles, sizeOfFiles = self.getNumberOfFilesAndFileSize(
                        file_service, _destinationFileShareFolderName,
                        combinedFolderName, _fileExtensionFilter)
                    returnDict[key][6] = numberOfFiles
                    returnDict[key][7] = sizeOfFiles

        return True, "OK", returnDict

    def returnFormattedValue(self, start_time, result, description,
                             returnDict):
        if (result == True):
            retValue = []
            for key, value in returnDict.items():
                item = {'ExperimentName': key, 'Properties': value}
                retValue.append(item)

            time_elapsed = datetime.datetime.now() - start_time
            elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                         time_elapsed.microseconds)

            return True, elapsedTime, retValue
        else:
            return False, description, None

    def getNumberOfFilesAndFileSize(self, file_service, shareFolder,
                                    directoryName, _fileExtensionFilter):
        numberOfFiles = 0
        sizeOfFiles = 0

        experimentList = list(
            file_service.list_directories_and_files(shareFolder,
                                                    directoryName))

        if (not (experimentList is None and len(experimentList) < 1)):
            for i, imageFileName in enumerate(experimentList):
                if (imageFileName.name.endswith(_fileExtensionFilter)):
                    numberOfFiles += 1
                    fileProperties = file_service.get_file_properties(
                        shareFolder, directoryName, imageFileName.name)
                    fileLength = fileProperties.properties.content_length
                    sizeOfFiles += fileLength

        return numberOfFiles, sizeOfFiles

    def getListOfAllFiles(self, _destinationFileShareFolderName,
                          _destinationDirectoryName):
        start_time = datetime.datetime.now()
        rv = False
        file_service = None
        description = ''
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description, None

        experimentList = list(
            file_service.list_directories_and_files(
                _destinationFileShareFolderName, _destinationDirectoryName))
        time_elapsed = datetime.datetime.now() - start_time
        elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                     time_elapsed.microseconds)

        return True, elapsedTime, experimentList

    def isFile(self, _destinationFileShareFolderName,
               _destinationDirectoryName, fileName):
        start_time = datetime.datetime.now()
        rv = False
        file_service = None
        description = ''
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description, None

        rv = file_service.exists(_destinationFileShareFolderName,
                                 _destinationDirectoryName, fileName)
        time_elapsed = datetime.datetime.now() - start_time
        elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                     time_elapsed.microseconds)

        return True, elapsedTime, rv

    def createDirectory(self, _destinationFileShareFolderName,
                        _destinationDirectoryName):
        start_time = datetime.datetime.now()
        rv = False
        file_service = None
        description = ''
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName, False)
        if (rv == False):
            return rv, description, None

        if (file_service.exists(
                _destinationFileShareFolderName,
                directory_name=_destinationDirectoryName) == False):
            print(_accountName)
            print(_accountKey)
            print(_destinationFileShareFolderName)
            print(_destinationDirectoryName)
            rv = file_service.create_directory(_destinationFileShareFolderName,
                                               _destinationDirectoryName)

        time_elapsed = datetime.datetime.now() - start_time
        elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                     time_elapsed.microseconds)

        return True, elapsedTime, rv

    def removeAllFiles(self, _destinationFileShareFolderName,
                       _destinationDirectoryName):
        start_time = datetime.datetime.now()
        rv, desc = self.deleteAllFiles(_destinationFileShareFolderName,
                                       _destinationDirectoryName, None)
        time_elapsed = datetime.datetime.now() - start_time
        elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                     time_elapsed.microseconds)
        return True, elapsedTime, rv

    def getMaskFileContent(self, _destinationFileShareFolderName,
                           _destinationDirectoryName):
        start_time = datetime.datetime.now()
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description, None
        else:
            maskContent = ''
            # check if maskFile exists and load its content
            if (file_service.exists(_destinationFileShareFolderName,
                                    _destinationDirectoryName,
                                    self.maskFileName) != False):
                fileMask = file_service.get_file_to_text(
                    _destinationFileShareFolderName, _destinationDirectoryName,
                    self.maskFileName)
                if (fileMask is not None and fileMask.content is not None
                        and len(fileMask.content) > 0):
                    maskContent = json.loads(fileMask.content)
            time_elapsed = datetime.datetime.now() - start_time
            elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                         time_elapsed.microseconds)
            return True, elapsedTime, maskContent

    def saveFileImage(self, _destinationFileShareFolderName,
                      _destinationDirectoryName, fileName, byteArray):
        start_time = datetime.datetime.now()
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _destinationFileShareFolderName, _destinationDirectoryName)
        if (rv == False):
            return rv, description, None
        else:
            # create file from the byteArray passed. Will need to check if this can be read back later.
            # Return value is in the call-back which is not triggered
            file_service.create_file_from_bytes(
                _destinationFileShareFolderName, _destinationDirectoryName,
                fileName, byteArray)
            time_elapsed = datetime.datetime.now() - start_time
            elapsedTime = "{}:{}".format(time_elapsed.seconds,
                                         time_elapsed.microseconds)
            return True, elapsedTime, 0

    # masked file implementations
    def GetMaskedImageImpl(self, _sourceFileShareFolderName,
                           _sourceDirectoryName, _imageFileName, _maskTags):
        '''
        _sourceDirectoryName : format should be directoryName/secondDirectoryName/

        '''
        rv = False
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _sourceFileShareFolderName, _sourceDirectoryName)
        if (rv == False):
            return rv, description, None
        else:
            masks = []
            # more validations
            loadFromCloud = False
            if (_maskTags is None):
                loadFromCloud = True
            else:
                # try to load the masks to a temporary objectt
                if (len(_maskTags) == 0):
                    loadFromCloud = True
                else:
                    masks = json.loads(_maskTags)

            # expectation is that the mask file exists in the source folder
            if (loadFromCloud == True):
                print('loadFromCloud')
                if (file_service.exists(_sourceFileShareFolderName,
                                        _sourceDirectoryName,
                                        self.maskFileName) == False):
                    return rv, "_maskTags cannot be null as maskImage file also not exist!!!", None
                else:
                    fileMask = file_service.get_file_to_text(
                        _sourceFileShareFolderName, _sourceDirectoryName,
                        self.maskFileName)
                    if (fileMask is not None and fileMask.content is not None
                            and len(fileMask.content) > 0):
                        masks = json.loads(fileMask.content)
                        if not (masks is not None and len(masks) > 0):
                            return rv, "unable to load valid values for mask", None
                    else:
                        return rv, "Unable to load filemask ", None

            if (masks is not None and len(masks) > 0):
                return self.GetRawSourceImageImpl(_sourceFileShareFolderName,
                                                  _sourceDirectoryName,
                                                  _imageFileName, True, masks)
            else:
                return rv, "Mask value not set in logic!!!", None

    def GetRawSourceImageImpl(self,
                              _sourceFileShareFolderName,
                              _sourceDirectoryName,
                              _imageFileName,
                              loadMask=False,
                              masks=None):
        rv = False
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _sourceFileShareFolderName, _sourceDirectoryName)
        if (rv == False):
            return rv, description, None
        else:
            if (file_service.exists(_sourceFileShareFolderName,
                                    _sourceDirectoryName,
                                    _imageFileName) == False):
                return rv, "Image file does not exist", None
            else:
                # load our source file
                output_stream = io.BytesIO()
                fileImage = file_service.get_file_to_stream(
                    _sourceFileShareFolderName, _sourceDirectoryName,
                    _imageFileName, output_stream)

                content_length = fileImage.properties.content_length
                if (content_length is not None and content_length > 0):
                    output_stream.seek(0)
                    file_bytes = np.asarray(bytearray(output_stream.read()),
                                            dtype=np.uint8)
                    if (file_bytes is not None):
                        cv2_img = cv2.imdecode(
                            file_bytes,
                            1)  # don't know what 1 does but it sorta works
                        if (cv2_img is not None):
                            colorImage = cv2.cvtColor(
                                cv2_img, cv2.COLOR_RGB2BGR
                            )  #TODO Not sure this is needed, COLOR_BGR2RGB or might be reversing the image
                            if (colorImage is not None):
                                height, width = colorImage.shape[:2]
                                colourMask = colorImage[0:height, 0:width]
                                if (loadMask == True):
                                    cv2.fillPoly(colourMask, [np.array(masks)],
                                                 (0, 0, 0))
                                _, _encoded_image = cv2.imencode(
                                    '.jpg', colourMask)
                                return True, "OK", _encoded_image  #cv2.imencode('.jpg',colourMask)
                            else:
                                return rv, "Unable to convert image to COLOR_BGR2RGB :" + _imageFileName, None
                        else:
                            return rv, "Unable to decode : " + _imageFileName, None
                    else:
                        return rv, "Unable to decode convert to byteArray :" + _imageFileName, None
                else:
                    return rv, "Null content obtained from the image source file", None

    def GetRawImage(self, _sourceFileShareFolderName, _sourceDirectoryName,
                    _imageFileName):
        rv = False
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _sourceFileShareFolderName, _sourceDirectoryName)
        if (rv == False):
            return rv, description, None
        else:
            if (file_service.exists(_sourceFileShareFolderName,
                                    _sourceDirectoryName,
                                    _imageFileName) == False):
                return rv, "Image file does not exist", None
            else:
                # load our source file
                output_stream = io.BytesIO()
                fileImage = file_service.get_file_to_stream(
                    _sourceFileShareFolderName, _sourceDirectoryName,
                    _imageFileName, output_stream)

                content_length = fileImage.properties.content_length
                if (content_length is not None and content_length > 0):
                    output_stream.seek(0)
                    file_bytes = np.asarray(bytearray(output_stream.read()),
                                            dtype=np.uint8)
                    if (file_bytes is not None):
                        cv2_img = cv2.imdecode(file_bytes, 1)
                        if (cv2_img is not None):
                            return True, "OK", cv2_img  #cv2.imencode('.jpg',colourMask)
                        else:
                            return rv, "Unable to decode : " + _imageFileName, None
                    else:
                        return rv, "Unable to decode convert to byteArray :" + _imageFileName, None
                else:
                    return rv, "Null content obtained from the image source file", None

    def GetRawImageAsBytes(self, _sourceFileShareFolderName,
                           _sourceDirectoryName, _imageFileName):
        rv = False
        rv, description, file_service, _accountName, _accountKey = self.preCheck(
            _sourceFileShareFolderName, _sourceDirectoryName)
        if (rv == False):
            return rv, description, None
        else:
            if (file_service.exists(_sourceFileShareFolderName,
                                    _sourceDirectoryName,
                                    _imageFileName) == False):
                return rv, "Image file does not exist", None
            else:
                # load our source file
                output_stream = io.BytesIO()
                fileImage = file_service.get_file_to_stream(
                    _sourceFileShareFolderName, _sourceDirectoryName,
                    _imageFileName, output_stream)
                content_length = fileImage.properties.content_length
                if (content_length is not None and content_length > 0):
                    output_stream.seek(0)
                    file_bytes = output_stream.read()
                    if (file_bytes is not None):
                        return True, "OK", file_bytes
                    else:
                        return rv, "Unable to get byte byteArray :" + _imageFileName, None
                else:
                    return rv, "Null content obtained from the image source file", None
Esempio n. 18
0
class Crawler:
    def __init__(self, config, section, script_name=None, error_message=None):
        self.script_name = script_name
        self.config = config
        self.db = DbCommunicator(config)
        self.error_message = error_message
        try:
            self.section = section
            self.dbparams = self.db.readProps('general')
            self.dbparams.update(self.db.readProps(section))
            self.downloads_path = self.get_property('downloads_path', section)
            self.overwrite_remote_files = self.get_property(
                'overwrite_remote_files', section, 'bool')
            if not os.path.exists(self.downloads_path):
                os.makedirs(self.downloads_path)
            elif not os.path.isdir(self.downloads_path):
                print(
                    'ERROR:{} downloads_path parameter points to file!'.format(
                        section))
                sys.exit(1)
            self.headless_mode = self.get_property('headless_mode', 'general',
                                                   'bool')
            if self.headless_mode:
                display = Display(visible=0, size=(1920, 1080))
                display.start()
            options = webdriver.ChromeOptions()
            options.add_argument("--no-sandbox")
            options.add_argument('--headless')
            prefs = {
                'download.default_directory': self.downloads_path,
                'download.prompt_for_download': False,
                'download.directory_upgrade': True,
                'plugins.always_open_pdf_externally': True,
            }
            options.add_experimental_option("prefs", prefs)
            self.browser = webdriver.Chrome(
                chrome_options=options,
                service_args=["--verbose", "--log-path=/tmp/selenium.log"])
            self.browser.implicitly_wait(10)
            self.browser.set_page_load_timeout(10000)
            self.browser.set_script_timeout(10000)
            # self.ftp_connect()
            self.file_storage_connect()
        except Exception as e:
            self.error_message = str(e)

    def get_property(self, prop, section, type='str'):
        if type == 'str':
            if self.dbparams is not None and prop in self.dbparams:
                return self.dbparams[prop]
            else:
                return self.config.get(section, prop).strip()
        elif type == 'bool':
            if self.dbparams is not None and prop in self.dbparams:
                return self.dbparams[prop] == 'True'
            else:
                return self.config.getboolean(section, prop, fallback=False)

    def file_storage_connect(self):
        self.file_storage_url = self.get_property('fs_server', 'general')
        self.file_storage_user = self.get_property('fs_username', 'general')
        self.file_storage_pwd = self.get_property('fs_password', 'general')
        self.file_storage_share = self.get_property('fs_share', 'general')
        self.file_storage_dir = self.get_property('fs_directory_prefix',
                                                  'general')
        self.file_service = FileService(account_name=self.file_storage_user,
                                        account_key=self.file_storage_pwd)
        try:
            if self.file_service.exists(self.file_storage_share):
                print(
                    'Connection to Azure file storage successfully established...'
                )
                if len(self.file_storage_dir
                       ) > 0 and not self.file_service.exists(
                           self.file_storage_share,
                           directory_name=self.file_storage_dir):
                    subdirs = self.file_storage_dir.split('/')
                    subdirfull = ""
                    for subdir in subdirs:
                        subdirfull += subdir
                        self.file_service.create_directory(
                            self.file_storage_share, subdirfull)
                        subdirfull += "/"
                    print('Created directory:' + self.file_storage_dir)
            else:
                print(
                    'Filaed to connect to Asure file storage, share does not exist: '
                    + self.file_storage_share)
        except Exception as ex:
            print('Error connecting to Azure file storage: ', ex)

    def ftp_connect(self):
        self.ftp = FTP()
        self.ftp.connect(
            self.config.get('general', 'ftp_server').strip(),
            int(self.config.get('general', 'ftp_port')),
        )
        self.ftp.login(
            user=self.config.get('general', 'ftp_username').strip(),
            passwd=self.config.get('general', 'ftp_password').strip(),
        )
        print('Connection to ftp successfully established...')

    def get(self, url):
        self.browser.get(url)
        time.sleep(3)

    def assert_exists(self, selector):
        _ = self.browser.find_element_by_css_selector(selector)

    def get_elements(self, selector, root=None):
        if root is None:
            root = self.browser
        return root.find_elements_by_css_selector(selector)

    def wait_for_displayed(self, selector):
        element = self.browser.find_element_by_css_selector(selector)
        while not element.is_displayed():
            pass

    def click_by_text(self, text):
        self.browser.find_element_by_link_text(text)
        time.sleep(3)

    def click_xpath(self, path, single=True):
        if single:
            self.browser.find_element_by_xpath(path).click()
        else:
            for el in self.browser.find_elements_by_xpath(path):
                el.click()
        time.sleep(3)

    def click(self, selector, single=True, root=None):
        if root is None:
            root = self.browser
        if single:
            root.find_element_by_css_selector(selector).click()
        else:
            for el in root.find_elements_by_css_selector(selector):
                el.click()
        time.sleep(3)

    def send_keys(self, selector, keys):
        elem = self.browser.find_element_by_css_selector(selector)
        elem.clear()
        elem.send_keys(keys)
        time.sleep(3)

    def open_new_tab(self):
        self.browser.execute_script("window.open('');")
        self.browser.switch_to.window(self.browser.window_handles[1])

    def close_current_tab(self):
        self.browser.close()
        self.browser.switch_to.window(self.browser.window_handles[-1])

    def get_text(self, selector, single=True, root=None):
        if root is None:
            root = self.browser
        if single:
            return root.find_element_by_css_selector(selector).text
        return [el.text for el in root.find_elements_by_css_selector(selector)]

    def get_attr(self, selector, attr, single=True, root=None):
        if root is None:
            root = self.browser
        if single:
            return root.find_element_by_css_selector(selector).get_attribute(
                attr)
        return [
            el.get_attribute(attr)
            for el in root.find_elements_by_css_selector(selector)
        ]

    def execute(self, script):
        self.browser.execute_script(script, [])
        time.sleep(3)

    def deselect_all(self, selector):
        select = Select(self.browser.find_element_by_css_selector(selector))
        select.deselect_all()
        time.sleep(3)

    def select_option(self, selector, option):
        select = Select(self.browser.find_element_by_css_selector(selector))
        select.select_by_visible_text(option)
        time.sleep(3)

    def select_option_by_index(self, selector, index):
        select = Select(self.browser.find_element_by_css_selector(selector))
        if index < len(select.options):
            select.select_by_index(index)
            time.sleep(3)
            return True
        return False

    def back(self):
        self.browser.back()
        time.sleep(3)

    def close_dialog(self):
        try:
            alert = self.browser.switch_to.alert
            alert.dismiss()
            # alert.accept()
        except Exception as e:
            pass

    def close(self):
        if hasattr(self, 'browser'):
            self.browser.quit()
        if hasattr(self, 'db'):
            self.db.close()
        # self.ftp.quit()

    def download(self, url, filename, file_db_id=None):
        # print('Downloading', filename, self._get_remote_filename(filename))
        # return
        downloaded = False
        if url.startswith('https'):
            ctx = ssl.create_default_context()
            ctx.check_hostname = False
            ctx.verify_mode = ssl.CERT_NONE
        else:
            ctx = None

        content_length = 1
        retry = 0
        file_size = 0
        file_name = ''
        while file_size != content_length and retry < 3:
            try:
                r = urllib.request.urlopen(url, context=ctx)
                content_length = r.length
                file_name = os.path.join(self.downloads_path, filename)
                with open(file_name, 'wb') as f:
                    f.write(r.read())
                    file_size = os.stat(file_name).st_size
                    retry += 1
                    # print('Attempt', retry, 'Downloaded', file_size, 'bytes of', content_length)
            except Exception as e:
                retry += 1
                print('Attempt', retry, 'ERROR: Downloading failed!', url,
                      str(e))
                try:
                    os.remove(file_name)
                except OSError:
                    pass
        if file_size == content_length:
            downloaded = True
            if file_db_id:
                self.db.saveFileStatus(id=file_db_id,
                                       script_name=self.script_name,
                                       file_original_name=filename,
                                       file_status='Downloaded')
            else:
                self.db.saveFileStatus(script_name=self.script_name,
                                       file_original_name=filename,
                                       file_status='Downloaded')
        else:
            if file_db_id:
                self.db.saveFileStatus(id=file_db_id,
                                       script_name=self.script_name,
                                       file_original_name=filename,
                                       file_status='None')
            else:
                self.db.saveFileStatus(script_name=self.script_name,
                                       file_original_name=filename,
                                       file_status='None')
        return downloaded

    def _get_remote_filename(self, local_filename):
        raise NotImplemented

    def merge_files(self, filenames):
        pdfline = '"' + '" "'.join(filenames) + '"'
        res_filename = '"' + filenames[0].split(' part')[0] + '.pdf"'
        command = 'pdftk ' + pdfline + ' cat output ' + res_filename
        os.system(command)
        return res_filename

    def upload_to_ftp(self, filename):
        self.upload_to_file_storage(filename)

    def upload_to_ftp_old(self, filename):
        retries = 0
        while retries < 3:
            try:
                path = os.path.join(self.downloads_path, filename)
                # print('Uploading {}'.format(path))
                pdf_file = open(path, 'rb')
                remote_filename = self._get_remote_filename(filename)
                if not remote_filename:
                    return
                directory, filename = remote_filename
                try:
                    self.ftp.cwd('/{}'.format(directory))
                except Exception:
                    self.ftp.mkd('/{}'.format(directory))
                    self.ftp.cwd('/{}'.format(directory))
                if not self.overwrite_remote_files:
                    # print('Checking if {}/{} already exists'.format(directory, filename))
                    try:
                        self.ftp.retrbinary('RETR {}'.format(filename),
                                            lambda x: x)
                        return
                    except error_perm:
                        pass

                self.ftp.storbinary('STOR {}'.format(filename), pdf_file)
                # print('{} uploaded'.format(path))
                pdf_file.close()
                retries = 3
            except Exception as e:
                print('Error uploading to ftp,', str(e))
                retries += 1
                try:
                    self.ftp.voidcmd("NOOP")
                except Exception as ex:
                    self.ftp_connect()

    def move_to_another(self, filename):
        try:
            entity_type = filename.split('|')[1]
            remote_filename = self._get_remote_filename(filename)
            if not remote_filename:
                return
            if (entity_type == 'County') or (entity_type == 'City') or \
                    (entity_type == 'Township') or (entity_type == 'Village'):
                return
            directory, server_filename = remote_filename
            self.ftp.rename('/General Purpose/{}'.format(server_filename),
                            '/{}/{}'.format(directory, server_filename))
            print('Moved {} to {}'.format(server_filename, directory))
        except Exception as e:
            print(str(e))

    def upload_to_file_storage(self, filename):
        fnm = FilenameManager()
        retries = 0
        while retries < 3:
            try:
                path = os.path.join(self.downloads_path, filename)
                file_details = self.db.readFileStatus(
                    file_original_name=filename, file_status='Uploaded')
                if file_details is not None:
                    print(
                        'File {} was already uploaded before'.format(filename))
                    return
                file_details = self.db.readFileStatus(
                    file_original_name=filename,
                    file_status='Other',
                    notes='Uplodaed Before')
                if file_details is not None:
                    print(
                        'File {} was already uploaded before'.format(filename))
                    return
                file_details = self.db.readFileStatus(
                    file_original_name=filename, file_status='Downloaded')
                print('Uploading {}'.format(path))
                remote_filename = self._get_remote_filename(filename)
                old_filename = filename
                directory = None
                if not remote_filename:
                    return
                try:
                    directory, filename, year = remote_filename
                except:
                    directory, filename = remote_filename
                filename = fnm.azure_validate_filename(filename)
                if len(self.file_storage_dir) > 0:
                    directory = self.file_storage_dir + '/' + directory
                if not self.file_service.exists(self.file_storage_share,
                                                directory_name=directory):
                    self.file_service.create_directory(self.file_storage_share,
                                                       directory)
                if year:
                    directory += '/' + year
                    if not self.file_service.exists(self.file_storage_share,
                                                    directory_name=directory):
                        self.file_service.create_directory(
                            self.file_storage_share, directory)
                if not self.overwrite_remote_files:
                    print('Checking if {}/{} already exists'.format(
                        directory, filename))
                    if self.file_service.exists(self.file_storage_share,
                                                directory_name=directory,
                                                file_name=filename):
                        print('{}/{} already exists'.format(
                            directory, filename))
                        if file_details is None:
                            self.db.saveFileStatus(
                                script_name=self.script_name,
                                file_original_name=old_filename,
                                file_upload_path=directory,
                                file_upload_name=filename,
                                file_status='Other',
                                notes='Uplodaed Before')
                        else:
                            self.db.saveFileStatus(id=file_details['id'],
                                                   file_upload_path=directory,
                                                   file_upload_name=filename,
                                                   file_status='Other',
                                                   notes='Uplodaed Before')
                        return
                self.file_service.create_file_from_path(
                    self.file_storage_share,
                    directory,
                    filename,
                    path,
                    content_settings=ContentSettings(
                        content_type='application/pdf'))
                if file_details is None:
                    self.db.saveFileStatus(script_name=self.script_name,
                                           file_original_name=old_filename,
                                           file_upload_path=directory,
                                           file_upload_name=filename,
                                           file_status='Uploaded')
                else:
                    self.db.saveFileStatus(id=file_details['id'],
                                           file_upload_path=directory,
                                           file_upload_name=filename,
                                           file_status='Uploaded')
                print('{} uploaded'.format(path))
                retries = 3
            except Exception as e:
                print('Error uploading to Asure file storage,', str(e))
                filename = old_filename
                retries += 1
Esempio n. 19
0
class AmlAgent():

    file_service=None

    def __init__(self):
        self.file_service = FileService(account_name='chrisamlstoragemjeiyhfu', 
            sas_token='?sv=2018-03-28&ss=bfqt&srt=sco&sp=rwdlacup&se=2029-01-31T18:58:16Z&st=2019-01-31T10:58:16Z&spr=https,http&sig=FMkEUPqTAT7%2BmXZXFjeBaQ1Ydoik8Kic1ZYcGtHYc3w%3D',
            protocol='http')
        self.share_name ='azureml-filestore-53a6a9d0-a7f9-4336-a702-19f40d38db08'
    '''
        # Basic
        # List from root
        root_file_dir = list(self.file_service.list_directories_and_files(share_name))
        for res in root_file_dir:
            print(res.name)  # dir1, dir2, rootfile

        # Num results
        root_file_dir = list(self.file_service.list_directories_and_files(share_name, num_results=2))
        for res in root_file_dir:
            print(res.name)  # dir1, dir2

        # List from directory
        dir1 = list(self.file_service.list_directories_and_files(share_name, 'test data'))
        for res in dir1:
            print(res.name)  # file1, file2
    '''
    def upload_data(self):
        print("uploading data...")
        source_folder_name='tub'
        source_path = '.'
        self.copy_files(source_path,source_folder_name)                  



    def copy_files(self,source_path,source_folder_name):
            
            dest_folder_name = source_folder_name
            if self.file_service.exists(share_name=self.share_name ,directory_name=dest_folder_name)==False:
                self.file_service.create_directory(self.share_name, dest_folder_name)

            full_source_path=join(source_path,source_folder_name)
            print ('full_source_path: {}'.format(full_source_path))
            #files_in_source_dir = [f for f in listdir(data_dir_name) if isfile(join(data_dir_name, f))]
            files_in_source_folder = [f for f in listdir(full_source_path) if isfile(join(full_source_path, f))]

            for file_name in files_in_source_folder:
                full_source_file_name = join(full_source_path, file_name)
                print(file_name)  # file1, file2
                #source = self.file_service.make_file_url(self.share_name, dest_directory_name, file_name)
                #copy = self.file_service.copy_file(self.share_name, None, 'file1copy', source)
                self.file_service.create_file_from_path(
                    self.share_name,              # share name
                    source_folder_name,                   # directory path - root path if none
                    file_name,               # destination file name
                    full_source_file_name,      # full source path with file name
                    progress_callback=self.generate_progress_callback(file_name)) #report progress


    def generate_progress_callback(self,file_name):
        def progress_callback(current, total):
            print('({}, {}, {})'.format(file_name, current, total))
        return progress_callback


    def download_model():
        print("downloading model...")



    def download_zip_file_from_blob():
        try:
            # Create the BlockBlockService that is used to call the Blob service for the storage account
            block_blob_service = BlockBlobService(account_name='chrisamlstoragemjeiyhfu', sas_token='?sv=2018-03-28&ss=bf&srt=sco&sp=rwdlac&se=2029-02-04T23:12:23Z&st=2019-02-04T15:12:23Z&sip=0.0.0.0-255.255.255.255&spr=https,http&sig=SwxeDkbctxYI2nV9acctrUaCvL5EsM2PO7GK4eMCNv4%3D')
            # Create a container called 'quickstartblobs'.
            container_name ='azureml-blobstore-53a6a9d0-a7f9-4336-a702-19f40d38db08'
            # List the blobs in the container
            print("\nList blobs in the container")
            generator = block_blob_service.list_blobs(container_name)
            local_path ="C:/Users/chris/mtccar/data"
            for blob in generator:
                print("\t Blob name: " + blob.name)
                full_path_to_file = os.path.join(local_path, blob.name)
                print("\nDownloading blob to " + full_path_to_file)
                block_blob_service.get_blob_to_path(container_name, blob.name, full_path_to_file)
                with zipfile.ZipFile(full_path_to_file,"r") as zip_ref:
                    zip_ref.extractall(local_path)

        except Exception as e:
            print(e)
Esempio n. 20
0
    def upload_to_file_storage():
        #init file manager
        fnm = FilenameManager()

        # get a list of pdf files in dir_pdfs
        template = dir_upload + "**"
        if operating_system == 'mac' or operating_system == 'linux':
            template += '/*.pdf'
        elif operating_system == 'windows':
            template += '\\*.pdf'
        lpdfs = glob.glob(template, recursive=True)
        lpdfs.sort()
        #os.chdir(dir_pdfs) # needed for ftp.storbinary('STOR command work not with paths but with filenames
        # connect to FTP server and upload files
        try:
            file_storage_url = dparameters['fs_server'].strip()
            file_storage_user = dparameters['fs_username'].strip()
            file_storage_pwd = dparameters['fs_password'].strip()
            file_storage_share = dparameters['fs_share'].strip()
            file_storage_dir = dparameters['fs_directory_prefix'].strip()
            file_service = FileService(account_name=file_storage_user,
                                       account_key=file_storage_pwd)
            try:
                if file_service.exists(file_storage_share):
                    print(
                        'Connection to Azure file storage successfully established...'
                    )
                    if len(file_storage_dir) > 0 and not file_service.exists(
                            file_storage_share,
                            directory_name=file_storage_dir):
                        file_service.create_directory(file_storage_share,
                                                      file_storage_dir)
                        print('Created directory:' + file_storage_dir)
                else:
                    print(
                        'Failed to connect to Asure file storage, share does not exist: '
                        + file_storage_share)
            except Exception as ex:
                print('Error connecting to Azure file storage: ', ex)

            for pdffile in lpdfs:
                file_details = db.readFileStatus(file_original_name=pdffile,
                                                 file_status='Uploaded')
                if file_details is None:
                    file_id = None
                    file_details = db.readFileStatus(
                        file_original_name=pdffile, file_status='Classified')
                    if file_details is not None:
                        file_id = file_details["id"]
                    dir, rpdffile = ntpath.split(pdffile)

                    destinationdir = ''

                    if (dir + '\\') == dir_upload or (dir + '/') == dir_upload:
                        destinationdir = 'Unclassified'
                    else:
                        dir, year = ntpath.split(dir)
                        dir, destinationdir = ntpath.split(dir)

                    retries = 0
                    while retries < 3:
                        try:
                            path = pdffile
                            print('Uploading {}'.format(path))
                            filename = pdffile
                            remote_filename = fnm.azure_validate_filename(
                                rpdffile)
                            if not remote_filename:
                                return
                            if len(file_storage_dir) > 0:
                                directory = file_storage_dir + '/' + destinationdir
                            else:
                                directory = destinationdir
                            if not file_service.exists(
                                    file_storage_share,
                                    directory_name=directory):
                                file_service.create_directory(
                                    file_storage_share, directory)
                            directory += '/' + year
                            if not file_service.exists(
                                    file_storage_share,
                                    directory_name=directory):
                                file_service.create_directory(
                                    file_storage_share, directory)
                            print('Checking if {}/{} already exists'.format(
                                directory, remote_filename))
                            if file_service.exists(file_storage_share,
                                                   directory_name=directory,
                                                   file_name=remote_filename):
                                print('{}/{} already exists'.format(
                                    directory, remote_filename))
                                if file_id is None:
                                    db.saveFileStatus(
                                        script_name=script_name,
                                        file_original_name=pdffile,
                                        file_upload_path=directory,
                                        file_upload_name=remote_filename,
                                        file_status='Uploaded')
                                else:
                                    db.saveFileStatus(
                                        id=file_details["id"],
                                        file_upload_path=directory,
                                        file_upload_name=remote_filename,
                                        file_status='Uploaded')
                                os.remove(pdffile)
                                break
                            file_service.create_file_from_path(
                                file_storage_share,
                                directory,
                                remote_filename,
                                path,
                                content_settings=ContentSettings(
                                    content_type='application/pdf'))
                            if file_id is None:
                                db.saveFileStatus(
                                    script_name=script_name,
                                    file_original_name=pdffile,
                                    file_upload_path=directory,
                                    file_upload_name=remote_filename,
                                    file_status='Uploaded')
                            else:
                                db.saveFileStatus(
                                    id=file_details["id"],
                                    file_upload_path=directory,
                                    file_upload_name=remote_filename,
                                    file_status='Uploaded')
                            print('{}/{} uploaded'.format(
                                directory, remote_filename))
                            retries = 3
                            os.remove(pdffile)
                        except Exception as e:
                            print('Error uploading to Asure file storage,',
                                  str(e))
                            retries += 1
                else:
                    print('File {} was uploaded before'.format(
                        file_details["file_original_name"]))
                    os.remove(pdffile)
        except Exception as e:
            print(str(e))
            logging.critical(str(e))
Esempio n. 21
0
def run(job, **kwargs):
    resource = kwargs.get('resource')
    create_custom_fields_as_needed()

    storage_account = '{{ storage_account }}'
    file_path = "{{ file }}"
    azure_storage_file_share_name = '{{ azure_storage_file_share_name }}'
    overwrite_files = {{overwrite_files}}
    file_name = Path(file_path).name

    if file_path.startswith(settings.MEDIA_URL):
        set_progress("Converting relative URL to filesystem path")
        file_path = file_path.replace(settings.MEDIA_URL, settings.MEDIA_ROOT)

    if not file_path.startswith(settings.MEDIA_ROOT):
        file_path = os.path.join(settings.MEDIA_ROOT, file_path)

    try:
        set_progress("Connecting To Azure...")
        account_key = Resource.objects.filter(
            name__icontains=storage_account)[0].azure_account_key
        fallback_account_key = Resource.objects.filter(
            name__icontains=storage_account)[0].azure_account_key_fallback
        file_service = FileService(account_name=storage_account,
                                   account_key=account_key)

        set_progress(
            'Creating file share {file_share_name} if it doesn\'t already exist...'
            .format(file_share_name=azure_storage_file_share_name))
        file_service.create_share(share_name=azure_storage_file_share_name,
                                  quota=1)

        set_progress('Connecting to file share')
        file_name_on_azure = file_name
        count = 0
        while (not overwrite_files) and file_service.exists(
                share_name=azure_storage_file_share_name,
                file_name=file_name_on_azure,
                directory_name=''):
            count += 1
            file_name_on_azure = '{file_name}({duplicate_number})'.format(
                file_name=file_name, duplicate_number=count)
            set_progress(
                'File with name already exists on given file share, testing new name: {new_name}'
                .format(new_name=file_name_on_azure))

        local_resource_name = azure_storage_file_share_name + '-' + file_name_on_azure
        if overwrite_files and file_service.exists(
                share_name=azure_storage_file_share_name,
                file_name=file_name_on_azure,
                directory_name=''):
            set_progress(
                'File with name already exists on given file share, overwriting'
            )
            old_resource_to_overwite = Resource.objects.filter(
                name=local_resource_name, lifecycle='ACTIVE').first()

            if old_resource_to_overwite:
                old_resource_to_overwite.delete()

        set_progress(
            'Creating the file with name {file_name} on the Storage Account {storage_account} using the share named {share_name}'
            .format(file_name=file_name_on_azure,
                    storage_account=storage_account,
                    share_name=azure_storage_file_share_name))
        file_service.create_file_from_path(
            share_name=azure_storage_file_share_name,
            file_name=file_name_on_azure,
            directory_name='',
            local_file_path=file_path)
        os.remove(file_path)

        set_progress(
            'Creating local storage resource named {resource_name}'.format(
                resource_name=local_resource_name))
        resource.name = local_resource_name
        resource.azure_storage_account_name = storage_account
        resource.azure_account_key = account_key
        resource.azure_account_key_fallback = fallback_account_key
        resource.azure_storage_file_share_name = azure_storage_file_share_name
        resource.azure_storage_file_name = file_name_on_azure
        resource.save()

        return "Success", "The File has succesfully been uploaded", ""
    except Exception as e:
        if os.path.exists(file_path):
            os.remove(file_path)

        if resource:
            resource.delete()

        raise CloudBoltException(
            "File could not be uploaded because of the following error: {error}"
            .format(error=e))
from azure.storage.file import FileService
from azure.storage.file import ContentSettings
import os


omnipresence_storage_account_name = 'cloudinfraprovision'
omnipresence_storage_account_key = 'WVIc4TiKPDLxjtIWLpnk5fITbI6AFoZahvfTz4SgSjyP+fE3/qwgSgIo/UNavXPPjQDWrCfT4da6vnL209pThQ=='
omnipresence_storage_file_share = 'azure-provision' #Azure Storage Account File Share Name allows only lowercase letters, numbers and hypen.
remote_dir_path = ''


#Initialize an Azure Storage Account File Service Instance
omnipresence_storage_account = FileService(account_name=omnipresence_storage_account_name, account_key=omnipresence_storage_account_key)

#test if your storage file share exists on Azure or not, if not, create it
if (not omnipresence_storage_account.exists(omnipresence_storage_file_share)):
    omnipresence_storage_account.create_share(omnipresence_storage_file_share, quota='10')

#walk through current directory, make directorys under Azure File Share and upload local files onto your Azure storage account File Share except for hiden files and directory
for base_dir, dirs, file_names in os.walk(".", topdown=True):
    file_names = [ f for f in file_names if not f[0] == '.'] #parse out files whose name begins with a dot
    dirs[:] = [d for d in dirs if not d[0] == '.'] #parse out directorys whose name begins with a dot
    for local_file_name in file_names:
        remote_file_name = os.path.join(base_dir, local_file_name)[2:]
        local_file_name = remote_file_name
        if (omnipresence_storage_account.exists(omnipresence_storage_file_share)):
            omnipresence_storage_account.create_file_from_path(
                omnipresence_storage_file_share,
                None, # We want to create files under current remote directory, so we specify None for the directory_name
                remote_file_name,
                local_file_name,