Ejemplo n.º 1
0
    def files_import_from_agave(self, system_id, file_path, file_name,
                                url_to_ingest):
        """
        Wrap AgavePy import data file command.

        Args:
            self: class instance.
            system_id: Identifier for Agave storage system.
            file_path: Path where file is to be imported.
            file_name: Name of the imported file.
            url_to_ingest: Agave URL to be ingested.

        Returns:
            On success: True with no exceptions.
            On failure: Throws exception.

        """
        response = self._agave.files.importData(systemId=system_id,
                                                filePath=file_path,
                                                fileName=file_name,
                                                urlToIngest=urllib.parse.quote(
                                                    str(url_to_ingest or ''),
                                                    safe='/:'))
        async_response = AgaveAsyncResponse(self._agave, response)
        status = async_response.result()
        Log.some().debug('import %s: %s -> agave://%s/%s/%s', str(status),
                         url_to_ingest, system_id, file_path, file_name)
        if str(status) == 'FINISHED':
            return True

        # not finished, try again
        raise Exception('agave import failed')
Ejemplo n.º 2
0
def test_submit_job(agave, test_job):
    job = agave.jobs.submit(body=test_job)
    validate_job(job)
    # create an async object
    arsp = AgaveAsyncResponse(agave, job)
    # block until job finishes with a timeout of 3 minutes.
    assert arsp.result(180) == 'FINISHED'
Ejemplo n.º 3
0
def test_upload_large_file(agave, credentials):
    rsp = agave.files.importData(systemId=credentials['storage'],
                                 filePath=credentials['storage_user'],
                                 fileToUpload=open(
                                     'test_largefile_upload_python_sdk', 'rb'))
    arsp = AgaveAsyncResponse(agave, rsp)
    status = arsp.result(timeout=120)
    assert status == 'FINISHED'
Ejemplo n.º 4
0
def test_submit_archive_job(agave, test_job, credentials):
    test_job['archive'] = True
    test_job['archiveSystem'] = credentials['storage']
    job = agave.jobs.submit(body=test_job)
    validate_job(job)
    # create an async object
    arsp = AgaveAsyncResponse(agave, job)
    # block until job finishes with a timeout of 3 minutes.
    assert arsp.result(180) == 'FINISHED'
Ejemplo n.º 5
0
    def files_import_from_local(self, system_id, file_path, file_name,
                                file_to_upload):
        """
        Wrap AgavePy import data file command.

        Args:
            self: class instance.
            system_id: Identifier for Agave storage system.
            file_path: Path where file is to be imported.
            file_name: Name of the imported file.
            file_to_upload: File or folder path to upload to Agave.

        Returns:
            On success: True with no exceptions.
            On failure: Throws exception.

        """
        if os.path.isdir(file_to_upload):
            # create target directory, which is "file_name"
            if not self.files_mkdir(system_id, file_path, file_name):
                Log.an().error('cannot create folder at uri: agave://%s%s/%s',
                               system_id, file_path, file_name)
                return False

            # walk through local directory structure
            for root, dirs, files in os.walk(file_to_upload, topdown=True):
                # translate local path to dest path
                dest_file_path = os.path.join(file_path, file_name,
                                              root[len(file_to_upload) + 1:])
                # upload each file in this directory level
                for name in files:
                    # read file in binary mode to transfer
                    response = self._agave.files.importData(
                        systemId=system_id,
                        filePath=dest_file_path,
                        fileName=name,
                        fileToUpload=open('%s/%s' % (root, name), "rb"))
                    async_response = AgaveAsyncResponse(self._agave, response)
                    status = async_response.result()
                    Log.some().debug('import %s: %s/%s -> agave://%s/%s/%s',
                                     str(status), root, name, system_id,
                                     dest_file_path, name)
                    if status != 'FINISHED':
                        return False

                # create new directory for each directory in this level
                for name in dirs:
                    # create dest directory
                    if not self.files_mkdir(system_id, dest_file_path, name):
                        Log.an().error(
                            'cannot create folder at uri: agave://%s%s/%s',
                            system_id, dest_file_path, name)
                        return False

        elif os.path.isfile(file_to_upload):
            # import single file
            response = self._agave.files.importData(systemId=system_id,
                                                    filePath=file_path,
                                                    fileName=file_name,
                                                    fileToUpload=open(
                                                        file_to_upload, 'rb'))
            async_response = AgaveAsyncResponse(self._agave, response)
            status = async_response.result()
            Log.some().debug('import %s: %s -> agave://%s/%s/%s', str(status),
                             file_to_upload, system_id, file_path, file_name)
            if status != 'FINISHED':
                return False

        return True
Ejemplo n.º 6
0
Archivo: files.py Proyecto: TACC/protx
    def import_data(self, from_system, from_path, retries=5, remote_url=None, external_resource=False):
        """Imports data from an external storage system

        :param str from_system: System to import from.
        :param str from_path: Path to import from.
        :param int retries: Maximum retries if something goes wrong.

        :returns: Agave File Resource imported.
        :rtype: :class:`BaseFile`

        .. note:: This function should be used to move data from one
        Agave storage system to another Agave storage system.

        .. todo:: We should implement a fallback using another type of
        data transfer method if this fails.
        """
        if not remote_url:
            remote_url = 'agave://{}/{}'.format(
                from_system,
                urllib.parse.quote(from_path)
            )
        file_name = os.path.split(from_path)[1]
        _retries = retries
        while _retries > 0:
            try:
                result = self._ac.files.importData(
                    systemId=self.system,
                    filePath=urllib.parse.quote(self.path),
                    fileName=str(file_name),
                    urlToIngest=remote_url
                )
                async_resp = AgaveAsyncResponse(self._ac, result)
                async_status = async_resp.result(600)
                _retries = 0
            except Error as err:
                logger.error(
                    'There was an error importing data. %s. Retrying...',
                    err,
                    exc_info=True
                )
                _retries -= 1

        if str(async_status) == 'FAILED':
            logger.error(
                'Import Data failed from: systemId=%s, filePath=%s. '
                'to: systemId=%s, filePath=%s '
                'using URI: %s',
                from_system,
                from_path,
                self.system,
                self.path,
                remote_url,
                exc_info=True
            )

        # If import is coming from an external resource like google drive,
        # don't return a listing for every recursive file upload.
        if external_resource:
            return BaseFile(system=result['systemId'],
                            path=result['path'],
                            client=self._ac)
        return BaseFile.listing(self._ac, self.system, result['path'])