def main():
    tokens = None
    client = NativeClient(client_id=CLIENT_ID, app_name=APP_NAME)
    try:
        # if we already have tokens, load and use them
        tokens = client.load_tokens(requested_scopes=SCOPES)
    except:
        pass

    if not tokens:
        # if we need to get tokens, start the Native App authentication process
        # need to specify that we want refresh tokens
        tokens = client.login(requested_scopes=SCOPES,
                              refresh_tokens=True)
        try:
            client.save_tokens(tokens)
        except:
            pass

    transfer = setup_transfer_client(tokens['transfer.api.globus.org'])

    try:
        data = load_data_from_file(DATA_FILE)
        if len(data) > 0:
            task_data = data['task']
            task = transfer.get_task(task_data['task_id'])
            if task['status'] not in PREVIOUS_TASK_RUN_CASES:
                print('The last transfer status is {}, skipping run...'.format(
                    task['status']
                ))
                sys.exit(1)
    except KeyError:
        # Ignore if there is no previous task
        pass

    check_endpoint_path(transfer, SOURCE_ENDPOINT, SOURCE_PATH)
    if CREATE_DESTINATION_FOLDER:
        create_destination_directory(transfer, DESTINATION_ENDPOINT,
                                     DESTINATION_PATH)
    else:
        check_endpoint_path(transfer, DESTINATION_ENDPOINT, DESTINATION_PATH)

    tdata = TransferData(
        transfer,
        SOURCE_ENDPOINT,
        DESTINATION_ENDPOINT,
        label=TRANSFER_LABEL,
        sync_level="checksum"
    )
    tdata.add_item(SOURCE_PATH, DESTINATION_PATH, recursive=True)

    task = transfer.submit_transfer(tdata)
    save_data_to_file(DATA_FILE, 'task', task.data)
    print('Transfer has been started from\n  {}:{}\nto\n  {}:{}'.format(
        SOURCE_ENDPOINT,
        SOURCE_PATH,
        DESTINATION_ENDPOINT,
        DESTINATION_PATH
    ))
    url_string = 'https://globus.org/app/transfer?' + \
        six.moves.urllib.parse.urlencode({
            'origin_id': SOURCE_ENDPOINT,
            'origin_path': SOURCE_PATH,
            'destination_id': DESTINATION_ENDPOINT,
            'destination_path': DESTINATION_PATH
        })
    print('Visit the link below to see the changes:\n{}'.format(url_string))
Beispiel #2
0
def submit_transfer():
    """
    - Take the data returned by the Browse Endpoint helper page
      and make a Globus transfer request.
    - Send the user to the transfer status page with the task id
      from the transfer.
    """
    browse_endpoint_form = request.form

    dirselect = session['form']['dirselect']
    selected = session['form']['datasets']
    if dirselect:
        filtered_datasets = [ds for ds in datasets if ds['id'] in selected]
    else:
        path = session['form']['path']
        myid = session['form']['id']
        filtered_datasets = [{'name':name, 'path': path, 'id': myid}
              for name, path, myid in zip(selected, path, myid)
              ]

    transfer_tokens = session['tokens']['transfer.api.globus.org']

    authorizer = RefreshTokenAuthorizer(
        transfer_tokens['refresh_token'],
        load_portal_client(),
        access_token=transfer_tokens['access_token'],
        expires_at=transfer_tokens['expires_at_seconds'])

    transfer = TransferClient(authorizer=authorizer)

    source_endpoint_id = app.config['DATASET_ENDPOINT_ID']
    source_endpoint_base = app.config['DATASET_ENDPOINT_BASE']
    destination_endpoint_id = browse_endpoint_form['endpoint_id']
    destination_folder = browse_endpoint_form.get('folder[0]') 

    transfer_data = TransferData(transfer_client=transfer,
                                 source_endpoint=source_endpoint_id,
                                 destination_endpoint=destination_endpoint_id,
                                 label=browse_endpoint_form.get('label'))

    for ds in filtered_datasets:
        print("printing ds")
        print(ds)
        if dirselect:
            source_path = source_endpoint_base + ds['path']
        else:
            source_path = source_endpoint_base + ds['path'] + "/" + ds['name']

        dest_path = browse_endpoint_form['path']

        if destination_folder:
            dest_path += destination_folder + '/'

        if dirselect:
            dest_path +=  ds['path'] + '/'
        else:
            dest_path += ds['path'] + '/' + ds['name']

        transfer_data.add_item(source_path=source_path,
                               destination_path=dest_path,
                               recursive=dirselect)

    transfer.endpoint_autoactivate(source_endpoint_id)
    transfer.endpoint_autoactivate(destination_endpoint_id)
    task_id = transfer.submit_transfer(transfer_data)['task_id']

    flash('Transfer request submitted successfully. Task ID: ' + task_id)

    return(redirect(url_for('transfer_status', task_id=task_id)))
Beispiel #3
0
def globus_transfer(  # noqa: C901
        remote_endpoint,
        remote_path,
        name,
        transfer_type,
        non_blocking=False):
    """
    Read the local globus endpoint UUID from ~/.zstash.ini.
    If the ini file does not exist, create an ini file with empty values,
    and try to find the local endpoint UUID based on the FQDN
    """
    ini_path = os.path.expanduser("~/.zstash.ini")
    ini = configparser.ConfigParser()
    local_endpoint = None
    if ini.read(ini_path):
        if "local" in ini.sections():
            local_endpoint = ini["local"].get("globus_endpoint_uuid")
    else:
        ini["local"] = {"globus_endpoint_uuid": ""}
        try:
            with open(ini_path, "w") as f:
                ini.write(f)
        except Exception as e:
            logger.error(e)
            sys.exit(1)
    if not local_endpoint:
        fqdn = socket.getfqdn()
        for pattern in regex_endpoint_map.keys():
            if re.fullmatch(pattern, fqdn):
                local_endpoint = regex_endpoint_map.get(pattern)
                break
    if not local_endpoint:
        logger.error(
            "{} does not have the local Globus endpoint set".format(ini_path))
        sys.exit(1)

    if remote_endpoint.upper() in hpss_endpoint_map.keys():
        remote_endpoint = hpss_endpoint_map.get(remote_endpoint.upper())

    if transfer_type == "get":
        src_ep = remote_endpoint
        src_path = os.path.join(remote_path, name)
        dst_ep = local_endpoint
        dst_path = os.path.join(os.getcwd(), name)
    else:
        src_ep = local_endpoint
        src_path = os.path.join(os.getcwd(), name)
        dst_ep = remote_endpoint
        dst_path = os.path.join(remote_path, name)

    subdir = os.path.basename(os.path.normpath(remote_path))
    subdir_label = re.sub("[^A-Za-z0-9_ -]", "", subdir)
    filename = name.split(".")[0]
    label = subdir_label + " " + filename

    native_client = NativeClient(
        client_id="6c1629cf-446c-49e7-af95-323c6412397f",
        app_name="Zstash",
        default_scopes=
        "openid urn:globus:auth:scope:transfer.api.globus.org:all",
    )
    native_client.login(no_local_server=True, refresh_tokens=True)
    transfer_authorizer = native_client.get_authorizers().get(
        "transfer.api.globus.org")
    tc = TransferClient(transfer_authorizer)

    for ep_id in [src_ep, dst_ep]:
        r = tc.endpoint_autoactivate(ep_id, if_expires_in=600)
        if r.get("code") == "AutoActivationFailed":
            logger.error(
                "The {} endpoint is not activated or the current activation expires soon. Please go to https://app.globus.org/file-manager/collections/{} and (re)activate the endpoint."
                .format(ep_id, ep_id))
            sys.exit(1)

    td = TransferData(
        tc,
        src_ep,
        dst_ep,
        label=label,
        sync_level="checksum",
        verify_checksum=True,
        preserve_timestamp=True,
        fail_on_quota_errors=True,
    )
    td.add_item(src_path, dst_path)
    try:
        task = tc.submit_transfer(td)
    except TransferAPIError as e:
        if e.code == "NoCredException":
            logger.error(
                "{}. Please go to https://app.globus.org/endpoints and activate the endpoint."
                .format(e.message))
        else:
            logger.error(e)
        sys.exit(1)
    except Exception as e:
        logger.error("Exception: {}".format(e))
        sys.exit(1)

    if non_blocking:
        return

    try:
        task_id = task.get("task_id")
        """
        A Globus transfer job (task) can be in one of the three states:
        ACTIVE, SUCCEEDED, FAILED. The script every 20 seconds polls a
        status of the transfer job (task) from the Globus Transfer service,
        with 20 second timeout limit. If the task is ACTIVE after time runs
        out 'task_wait' returns False, and True otherwise.
        """
        while not tc.task_wait(task_id, 20, 20):
            pass
        """
        The Globus transfer job (task) has been finished (SUCCEEDED or FAILED).
        Check if the transfer SUCCEEDED or FAILED.
        """
        task = tc.get_task(task_id)
        if task["status"] == "SUCCEEDED":
            logger.info(
                "Globus transfer {}, from {}{} to {}{} succeeded".format(
                    task_id, src_ep, src_path, dst_ep, dst_path))
        else:
            logger.error("Transfer FAILED")
    except TransferAPIError as e:
        if e.code == "NoCredException":
            logger.error(
                "{}. Please go to https://app.globus.org/endpoints and activate the endpoint."
                .format(e.message))
        else:
            logger.error(e)
        sys.exit(1)
    except Exception as e:
        logger.error("Exception: {}".format(e))
        sys.exit(1)
Beispiel #4
0
    def __init__(
        self,
        endpoint1,
        endpoint2,
        label,
        sync_level="checksum",
        verify_checksum=False,
        encrypt_data=False,
    ):
        """
        Parameters
        ----------
        endpoint1 : :py:class:models.Endpoint
            The endpoint to transfer from
        
        endp
        
        sync_level : int or string [default: "checksum"]
            "exists", "size", "mtime", or "checksum"
            For compatibility, this can also be 0, 1, 2, or 3

            The meanings are as follows:

            0, exists
            Determine whether or not to transfer based on file existence. If the
            destination file is absent, do the transfer.

            1, size
            Determine whether or not to transfer based on the size of the file. If
            destination file size does not match the source, do the transfer.

            2, mtime
            Determine whether or not to transfer based on modification times. If source
            has a newer modififed time than the destination, do the transfer.

            3, checksum
            Determine whether or not to transfer based on checksums of file contents. If
            source and destination contents differ, as determined by a checksum of their
            contents, do the transfer.

        verify_checksum :  bool [default: False]
            When true, after transfer verify that the source and destination file
            checksums match. If they don't, re-transfer the entire file and keep
            trying until it succeeds.

            This will create CPU load on both the origin and destination of the transfer,
            and may even be a bottleneck if the network speed is high enough.

        encrypt_data : bool [default: False]
            When true, all files will be TLS-protected during transfer.

        """
        if not "Endpoint" in str(endpoint1.__class__):
            raise AttributeError(
                "Positional argument `endpoint1` expected to be `:py:class:Endpoint`",
                ", recieved `:py:class:{0} instead".format(type(endpoint1)),
            )
        if not "Endpoint" in str(endpoint2.__class__):
            raise AttributeError(
                "Positional argument `endpoint1` expected to be `:py:class:Endpoint`",
                ", recieved `:py:class:{0} instead".format(type(endpoint1)),
            )
        self.endpoint1 = endpoint1
        self.endpoint2 = endpoint2
        self.endpoint1.transfer_client.get_submission_id()
        self.transfer_data = TransferData(
            self.endpoint1.transfer_client,
            self.endpoint1.endpoint_id,
            self.endpoint2.endpoint_id,
            label=label,
            sync_level=sync_level,
            encrypt_data=encrypt_data,
        )
        self.add_transfers = []
Beispiel #5
0
async def create_transfer_globus(transferObject: TransferBase,
                                 transfer_client: TransferClient,
                                 isFolder: bool = False):
    """This function verifies if globus authentication is present in session"""
    #transfer_client = await get_transfer_client(request)
    source = transferObject.source
    target = transferObject.target
    source_name = ''
    source_path = ''
    # example source: globus://fd9c190c-b824-11e9-98d7-0a63aa6b37da:/gridftp/pub/databases/eva/PRJEB6057/MOCH.population_sites.CHIR1_0.20140307_EVA_ss_IDs.fixed.vcf.gz
    if source:
        source_endpoint_id = source.split(':')[1].replace('/', '')
        source_path = source.split(':')[2]
        source_path_array = source_path.split('/')
        source_name = source_path_array[len(source_path_array) - 1]

    if target:
        target_endpoint_id = target.split(':')[1].replace('/', '')
        target_path = target.split(':')[2]

    if target_path.endswith('/'):
        if source_name:
            target_path = target_path + source_name

    transfer_response = None

    # source path ends with '/'
    if source_name == '':
        isFolder = True

    if transferObject.options:
        if 'recursive' in transferObject.options:
            if transferObject.options['recursive'] == "True":
                isFolder = True

    time = datetime.now().strftime("%d-%m-%Y %H-%M-%S")

    try:

        tdata = TransferData(transfer_client,
                             source_endpoint_id,
                             target_endpoint_id,
                             label='RDSDS ' + time,
                             sync_level="checksum")

        if isFolder:
            tdata.add_item(source_path, target_path, recursive=True)
        else:
            tdata.add_item(source_path, target_path)

        transfer_result = transfer_client.submit_transfer(tdata)

        transfer_result_json = json.loads(str(transfer_result))
        transfer_response = {'globus_response': transfer_result_json}
        transfer_response['status'] = 200
        rdsds_tracking_id = 'globus-' + transfer_result["task_id"]
        transfer_response['rdsds_tracking_id'] = rdsds_tracking_id
        transfer_response[
            'globus_status_url'] = 'https://app.globus.org/activity/' + transfer_result[
                "task_id"] + '/overview'

        return transfer_response

    except GlobusAPIError as e:
        # Error response from the REST service, check the code and message for
        # details.
        return handle_globus_api_error(e)
    except NetworkError:
        logging.error(("Network Failure. "
                       "Possibly a firewall or connectivity issue"))
        raise
    except GlobusError:
        logging.exception("Totally unexpected GlobusError!")
        raise
Beispiel #6
0
def transfer_command(
    batch,
    sync_level,
    recursive,
    destination,
    source,
    checksum_algorithm,
    external_checksum,
    label,
    preserve_mtime,
    verify_checksum,
    encrypt,
    submission_id,
    dry_run,
    delete,
    deadline,
    skip_activation_check,
    notify,
    perf_cc,
    perf_p,
    perf_pp,
    perf_udt,
):
    """
    Executor for `globus transfer`
    """
    source_endpoint, cmd_source_path = source
    dest_endpoint, cmd_dest_path = destination

    if recursive and batch:
        raise click.UsageError(
            ("You cannot use --recursive in addition to --batch. "
             "Instead, use --recursive on lines of --batch input "
             "which need it"))

    if external_checksum and batch:
        raise click.UsageError(
            ("You cannot use --external-checksum in addition to --batch. "
             "Instead, use --external-checksum on lines of --batch input "
             "which need it"))

    if recursive and external_checksum:
        raise click.UsageError(
            "--recursive and --external-checksum are mutually exclusive")

    if (cmd_source_path is None or cmd_dest_path is None) and (not batch):
        raise click.UsageError(
            "transfer requires either SOURCE_PATH and DEST_PATH or --batch")

    # because python can't handle multiple **kwargs expansions in a single
    # call, we need to get a little bit clever
    # both the performance options (of which there are a few), and the
    # notification options (also there are a few) have elements which should be
    # omitted in some cases
    # notify comes to us clean, perf opts need more care
    # put them together into a dict before passing to TransferData
    kwargs = {}
    perf_opts = dict((k, v) for (k, v) in dict(
        perf_cc=perf_cc, perf_p=perf_p, perf_pp=perf_pp,
        perf_udt=perf_udt).items() if v is not None)
    kwargs.update(perf_opts)
    kwargs.update(notify)

    client = get_client()
    transfer_data = TransferData(client,
                                 source_endpoint,
                                 dest_endpoint,
                                 label=label,
                                 sync_level=sync_level,
                                 verify_checksum=verify_checksum,
                                 preserve_timestamp=preserve_mtime,
                                 encrypt_data=encrypt,
                                 submission_id=submission_id,
                                 delete_destination_extra=delete,
                                 deadline=deadline,
                                 skip_activation_check=skip_activation_check,
                                 **kwargs)

    if batch:

        @click.command()
        @click.option("--external-checksum")
        @click.option("--recursive", "-r", is_flag=True)
        @click.argument("source_path", type=TaskPath(base_dir=cmd_source_path))
        @click.argument("dest_path", type=TaskPath(base_dir=cmd_dest_path))
        def process_batch_line(dest_path, source_path, recursive,
                               external_checksum):
            """
            Parse a line of batch input and turn it into a transfer submission
            item.
            """
            if recursive and external_checksum:
                raise click.UsageError("--recursive and --external-checksum "
                                       "are mutually exclusive")
            transfer_data.add_item(
                str(source_path),
                str(dest_path),
                external_checksum=external_checksum,
                checksum_algorithm=checksum_algorithm,
                recursive=recursive,
            )

        shlex_process_stdin(
            process_batch_line,
            ("Enter transfers, line by line, as\n\n"
             "    [--recursive] [--external-checksum TEXT] SOURCE_PATH DEST_PATH\n"
             ),
        )
    else:
        transfer_data.add_item(
            cmd_source_path,
            cmd_dest_path,
            external_checksum=external_checksum,
            checksum_algorithm=checksum_algorithm,
            recursive=recursive,
        )

    if dry_run:
        formatted_print(
            transfer_data,
            response_key="DATA",
            fields=(
                ("Source Path", "source_path"),
                ("Dest Path", "destination_path"),
                ("Recursive", "recursive"),
                ("External Checksum", "external_checksum"),
            ),
        )
        # exit safely
        return

    # autoactivate after parsing all args and putting things together
    # skip this if skip-activation-check is given
    if not skip_activation_check:
        autoactivate(client, source_endpoint, if_expires_in=60)
        autoactivate(client, dest_endpoint, if_expires_in=60)

    res = client.submit_transfer(transfer_data)
    formatted_print(
        res,
        text_format=FORMAT_TEXT_RECORD,
        fields=(("Message", "message"), ("Task ID", "task_id")),
    )
    def transfer(self):
        # this isn't very scalable and there isn't much wisdom in wasting a thread on a transfer
        # that is directed by another machine, but we waste an entire process or more on the
        # gridftp server processes anyway, so this may not quite be the bottleneck
        self._maybeStartServer()
        userEndpointId = self.server.getUserEndpointId(self.user)
        tc = self.clients.getUserTransferClient(self.user)

        tmpName = str(uuid.uuid4())
        transfer = TransferData(tc,
                                self._getSourceEndpointId(),
                                userEndpointId,
                                label=str(self.transferId))
        transfer['notify_on_succeeded'] = False
        transfer['notify_on_failed'] = False
        transfer['notify_on_inactive'] = False
        transfer.add_item(self._getSourcePath(), tmpName)
        res = tc.submit_transfer(transfer)
        if res['code'] != 'Accepted':
            raise Exception('Transfer submission failed: %s - %s' %
                            (res.code, res.message))
        taskId = res['task_id']
        self._updateTransfer(tmpName, taskId)
        while True:
            task = tc.get_task(taskId)
            status = task['status']
            if status == 'ACTIVE':
                # update bytes
                pass
            elif status == 'INACTIVE':
                # credential expiration
                # TODO: deal with this properly or ensure it does not happen
                msg = 'Credential expired for Globus task %s, transfer %s.' % (
                    taskId, self.transferId)
                logger.warn(msg)
                raise Exception(msg)
            elif status == 'SUCCEEDED':
                dir = os.path.dirname(self.psPath)
                try:
                    os.makedirs(dir)
                except OSError:
                    if not os.path.exists(dir):
                        raise Exception(
                            'Could not create transfer destination directory: %s'
                            % dir)
                shutil.move(
                    '%s/%s' % (self.server.getUserDir(self.user), tmpName),
                    self.psPath)
                return
            elif status == 'FAILED':
                if task['fatal_error']:
                    raise Exception(
                        'Globus transfer %s failed: %s' %
                        (self.transferId, task['fatal_error']['description']))
                else:
                    raise Exception(
                        'Globus transfer %s failed for unknown reasons' %
                        self.transferId)
            else:
                raise Exception(
                    'Unknown globus task status %s for transfer %s' %
                    (status, self.transferId))
            time.sleep(10)
Beispiel #8
0
 def trigger_stage_out(self, jobspec):
     # make logger
     tmpLog = self.make_logger(_logger,
                               'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_stage_out')
     tmpLog.debug('start')
     # default return
     tmpRetVal = (True, '')
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(
             jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc:
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(
         tmpLog, self.tc, label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(
             str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     #self.Globus_srcPath = queueConfig.stager['Globus_srcPath']
     self.srcEndpoint = queueConfig.stager['srcEndpoint']
     self.Globus_srcPath = self.basePath
     self.Globus_dstPath = queueConfig.stager['Globus_dstPath']
     self.dstEndpoint = queueConfig.stager['dstEndpoint']
     # Test the endpoints and create the transfer data class
     errMsg = None
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errMsg = ''
             if not tmpStatsrc:
                 errMsg += ' source Endpoint not activated '
             if not tmpStatdst:
                 errMsg += ' destination Endpoint not activated '
             tmpLog.error(errMsg)
             tmpRetVal = (False, errMsg)
             return tmpRetVal
         # both endpoints activated now prepare to transfer data
         tdata = TransferData(self.tc,
                              self.srcEndpoint,
                              self.dstEndpoint,
                              label=label,
                              sync_level="checksum")
     except:
         errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
         tmpRetVal = (errStat, errMsg)
         return tmpRetVal
     # loop over all files
     fileAttrs = jobspec.get_output_file_attributes()
     lfns = []
     for fileSpec in jobspec.outFiles:
         scope = fileAttrs[fileSpec.lfn]['scope']
         hash = hashlib.md5()
         hash.update('%s:%s' % (scope, fileSpec.lfn))
         hash_hex = hash.hexdigest()
         correctedscope = "/".join(scope.split('.'))
         srcURL = fileSpec.path
         dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(
             endPoint=self.Globus_dstPath,
             scope=correctedscope,
             hash1=hash_hex[0:2],
             hash2=hash_hex[2:4],
             lfn=fileSpec.lfn)
         tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL,
                                                         dstURL=dstURL))
         # add files to transfer object - tdata
         if os.access(srcURL, os.R_OK):
             tmpLog.debug("tdata.add_item({},{})".format(srcURL, dstURL))
             tdata.add_item(srcURL, dstURL)
             lfns.append(fileSpec.lfn)
         else:
             errMsg = "source file {} does not exist".format(srcURL)
             tmpLog.error(errMsg)
             tmpRetVal = (False, errMsg)
             return tmpRetVal
     # submit transfer
     try:
         transfer_result = self.tc.submit_transfer(tdata)
         # check status code and message
         tmpLog.debug(str(transfer_result))
         if transfer_result['code'] == "Accepted":
             # succeeded
             # set transfer ID which are used for later lookup
             transferID = transfer_result['task_id']
             tmpLog.debug(
                 'successfully submitted id={0}'.format(transferID))
             jobspec.set_groups_to_files(
                 {transferID: {
                     'lfns': lfns,
                     'groupStatus': 'active'
                 }})
             # set
             for fileSpec in jobspec.outFiles:
                 if fileSpec.fileAttributes == None:
                     fileSpec.fileAttributes = {}
                     fileSpec.fileAttributes['transferID'] = transferID
         else:
             tmpRetVal = (False, transfer_result['message'])
     except Exception as e:
         errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
         if errMsg is None:
             errtype, errvalue = sys.exc_info()[:2]
             errMsg = "{0} {1}".format(errtype.__name__, errvalue)
         tmpRetVal = (errStat, errMsg)
     # return
     tmpLog.debug('done')
     return tmpRetVal
Beispiel #9
0
def run_agent():
    dataset_name = "hopv"
    local_ep = ""
    dest_ep = "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec"
    dest_path = "/sample_data/" + dataset_name + "_train.csv"
    timeout = False
    timeout_intervals = 10
    interval_time = 10
    verbose = True

    search_client = globus_auth.login("https://search.api.globus.org/",
                                      "globus_search")
    transfer_client = transfer_auth.login()
    if not local_ep:
        pgr_res = transfer_client.endpoint_search(filter_scope="my-endpoints")
        ep_candidates = pgr_res.data
        if len(ep_candidates) < 1:  #Nothing found
            raise GlobusError("Error: No local endpoints found")
        elif len(ep_candidates) == 1:  #Exactly one candidate
            if ep_candidates[0]["gcp_connected"] == False:  #Is GCP, is not on
                raise GlobusError("Error: Globus Connect is not running")
            else:  #Is GCServer or GCP and connected
                local_ep = ep_candidates[0]["id"]
        else:  # >1 found
            #Filter out disconnected GCP
            ep_connections = [
                candidate for candidate in ep_candidates
                if candidate["gcp_connected"] is not False
            ]
            #Recheck list
            if len(ep_connections) < 1:  #Nothing found
                raise GlobusError("Error: No local endpoints running")
            elif len(ep_connections) == 1:  #Exactly one candidate
                if ep_connections[0][
                        "gcp_connected"] == False:  #Is GCP, is not on
                    raise GlobusError("Error: Globus Connect is not active")
                else:  #Is GCServer or GCP and connected
                    local_ep = ep_connections[0]["id"]
            else:  # >1 found
                #Prompt user
                print("Multiple endpoints found:")
                count = 0
                for ep in ep_connections:
                    count += 1
                    print(count, ": ", ep["display_name"], "\t", ep["id"])
                print("\nPlease choose the endpoint on this machine")
                ep_num = 0
                while ep_num == 0:
                    usr_choice = input(
                        "Enter the number of the correct endpoint (-1 to cancel): "
                    )
                    try:
                        ep_choice = int(usr_choice)
                        if ep_choice == -1:  #User wants to quit
                            ep_num = -1  #Will break out of while to exit program
                        elif ep_choice in range(1,
                                                count + 1):  #Valid selection
                            ep_num = ep_choice  #Break out of while, return valid ID
                        else:  #Invalid number
                            print("Invalid selection")
                    except:
                        print("Invalid input")

                if ep_num == -1:
                    print("Cancelling")
                    sys.exit()
                local_ep = ep_connections[ep_num - 1]["id"]

    # Fetch and aggregate records into training set

    count = 0
    num_processed = 0
    data_list = []
    while True:
        query = {
            "q": ("mdf_source_name:" + dataset_name +
                  " AND mdf_node_type:record AND "
                  "globus_scroll_id:(>=" + str(count) + " AND <" +
                  str(count + 10000) + ")"),
            "advanced":
            True,
            "limit":
            10000
        }
        raw_res = search_client.structured_search(query)
        search_res = gmeta_pop(raw_res, True)
        for res in search_res:
            data_dict = json.loads(res["data"]["hopv-experimental_data"])
            data_list.append(data_dict)
        num_ret = len(search_res)
        if num_ret:
            num_processed += num_ret
            count += 10000
        else:
            break
    if verbose:
        print("Processed:", len(data_list), "/", num_processed, "|",
              len(data_list) - num_processed)

    df = pd.DataFrame(data_list)
    df.to_csv(os.path.join(os.getcwd(), "temp_train.csv"))

    # Upload to NCSA endpoint

    try:
        tdata = TransferData(transfer_client,
                             local_ep,
                             dest_ep,
                             verify_checksum=True,
                             notify_on_succeeded=False,
                             notify_on_failed=False,
                             notify_on_inactive=False)
        tdata.add_item(os.path.join(os.getcwd(), "temp_train.csv"), dest_path)
        res = transfer_client.submit_transfer(tdata)
        if res["code"] != "Accepted":
            raise GlobusError("Failed to transfer files: Transfer " +
                              res["code"])
        else:
            intervals = 0
            while not transfer_client.task_wait(
                    res["task_id"],
                    timeout=interval_time,
                    polling_interval=interval_time):
                for event in transfer_client.task_event_list(res["task_id"]):
                    if event["is_error"]:
                        transfer_client.cancel_task(res["task_id"])
                        raise GlobusError("Error: " + event["description"])
                    if timeout and intervals >= timeout_intervals:
                        transfer_client.cancel_task(res["task_id"])
                        raise GlobusError("Transfer timed out.")
                    intervals += 1
    except Exception as e:
        raise
    finally:
        os.remove(os.path.join(os.getcwd(), "temp_train.csv"))

    # Update dataset entry

    query = {
        "q": "mdf_source_name:" + dataset_name + " AND mdf_node_type:dataset",
        "advanced": True
    }
    raw_res = search_client.structured_search(query)
    search_res = gmeta_pop(raw_res)
    if len(search_res) != 1:
        raise ValueError("Incorrect number of results: " +
                         str(len(search_res)))
    ingest = search_res[0]
    ingest["globus_subject"] = raw_res["gmeta"][0]["subject"]
    ingest["acl"] = ["public"]
    ingest["http://materialsdatafacility.org/#training_set"] = {
        "http://materialsdatafacility.org/#endpoint":
        dest_ep,
        "http://materialsdatafacility.org/#path":
        dest_path,
        "http://materialsdatafacility.org/#https":
        "https://data.materialsdatafacility.org" + dest_path
    }
    gmeta = format_gmeta([format_gmeta(ingest)])

    gmeta = json.loads(
        json.dumps(gmeta).replace(
            "mdf-publish.publication.community",
            "http://globus.org/publish-terms/#publication/community"))

    search_client.ingest(gmeta)

    # Check ingest

    query = {
        "q": "mdf_source_name:" + dataset_name + " AND mdf_node_type:dataset",
        "advanced": True
    }
    raw_res = search_client.structured_search(query)
    search_res = gmeta_pop(raw_res, True)

    if verbose:
        print(
            "Verification:\n",
            json.dumps(search_res[0]["training_set"],
                       sort_keys=True,
                       indent=4,
                       separators=(',', ': ')))
    tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
        tmpLog, tc, srcEndpoint)
    if tmpStatsrc and tmpStatdst:
        errStr = 'source Endpoint and destination Endpoint activated'
        tmpLog.debug(errStr)
    else:
        errStr = ''
        if not tmpStatsrc:
            errStr += ' source Endpoint not activated '
        if not tmpStatdst:
            errStr += ' destination Endpoint not activated '
        tmpLog.error(errStr)
        sys.exit(2)
    # We are sending test files from our destination machine to the source machine
    # both endpoints activated now prepare to transfer data
    tdata = TransferData(tc, dstEndpoint, srcEndpoint, sync_level="checksum")
except:
    errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
    sys.exit(1)

# create JobSpec
jobSpec = JobSpec()
jobSpec.jobParams = {
    'scopeLog': 'panda',
    'logFile': 'log',
}
jobSpec.computingSite = queueName
jobSpec.PandaID = job_id
jobSpec.modificationTime = datetime.datetime.now()
realDataset = 'panda.sgotest.' + uuid.uuid4().hex
ddmEndPointIn = 'BNL-OSG2_DATADISK'
    def execute(self, event):
        """
        Start the transfer

        Parameters:
            event (thread.event): event to trigger job cancel
        """
        # reject if job isnt valid
        self.prevalidate()
        if self.status != JobStatus.VALID:
            logging.error('Transfer job in invalid state')
            logging.error(str(self))
            return
        if not check_logged_in():
            self.status = JobStatus.INVALID
            logging.error('Transfer failed, not logged into globus')
            return
        self.start_time = datetime.now()
        # Get source and destination UUIDs
        srcendpoint = self.config.get('source_endpoint')
        dstendpoint = self.config.get('destination_endpoint')
        message = 'Starting setup for transfer job from {src} to {dst}'.format(
            src=srcendpoint, dst=dstendpoint)
        logging.info(message)

        # Log into globus and activate endpoints
        endpoints = [srcendpoint, dstendpoint]
        setup_globus(endpoints=endpoints,
                     event_list=self.event_list,
                     no_ui=not self.config.get('ui', True),
                     src=self.config.get('source_email'),
                     dst=self.config.get('source_email'),
                     display_event=self.config.get('display_event'))
        client = get_client()
        # task_label = "{start} to {end}".format(
        #     start=self.file_list[0]['name'],
        #     end=self.file_list[-1]['name'])
        task_label = 'Autotransfer of {number} files at {time}'.format(
            number=len(self.file_list), time=time.strftime("%I-%M"))
        try:
            transfer_task = TransferData(client,
                                         srcendpoint,
                                         dstendpoint,
                                         sync_level='checksum',
                                         label=task_label)
        except Exception as e:
            logging.error('Error creating transfer task')
            logging.error(format_debug(e))
            self.status = JobStatus.FAILED
            return

        if not self.config['file_list']:
            logging.error('Unable to transfer files without a source list')
            self.status = JobStatus.FAILED
            return

        for datafile in self.config['file_list']:
            transfer_task.add_item(source_path=datafile['remote_path'],
                                   destination_path=datafile['local_path'],
                                   recursive=False)

        # Start the transfer
        task_id = None
        result = None
        try:
            result = client.submit_transfer(transfer_task)
            task_id = result["task_id"]
            logging.info('starting transfer with task id %s', task_id)
        except Exception as e:
            if result:
                logging.error("result: %s", str(result))
            logging.error("Could not submit the transfer")
            logging.error(format_debug(e))
            self.status = JobStatus.FAILED
            return

        # Check a status of the transfer every 10 secs
        number_transfered = -1
        while True:
            try:
                while True:
                    try:
                        status = client.get_task(task_id)
                    except:
                        time.sleep(1)
                    else:
                        break
                if status['status'] == 'SUCCEEDED':
                    logging.info('progress %d/%d', status['files_transferred'],
                                 status['files'])
                    percent_complete = 100.0
                    self.display_status(
                        percent_complete=percent_complete,
                        task_id=task_id,
                        num_completed=int(status['files_transferred']) +
                        int(status['files_skipped']),
                        num_total=status['files'])
                    message = 'Transfer job completed'
                    self.status = JobStatus.COMPLETED
                    return
                elif status['status'] == 'FAILED':
                    logging.error('Error transfering files %s',
                                  status.get('nice_status_details'))
                    self.status = JobStatus.FAILED
                    return
                elif status['status'] == 'ACTIVE':
                    if number_transfered < status['files_transferred']:
                        number_transfered = status['files_transferred']
                        logging.info('progress %d/%d',
                                     status['files_transferred'],
                                     status['files'])
                        percent_complete = (
                            float(status['files_transferred'] +
                                  float(status['files_skipped'])) /
                            float(status['files'])) * 100
                        self.display_status(
                            percent_complete=percent_complete,
                            task_id=task_id,
                            num_completed=int(status['files_transferred']) +
                            int(status['files_skipped']),
                            num_total=status['files'])
                    self.status = JobStatus.RUNNING
                if event and event.is_set():
                    client.cancel_task(task_id)
                    # self.error_cleanup()
                    return
            except Exception as e:
                logging.error(format_debug(e))
                client.cancel_task(task_id)
                # self.error_cleanup()
                return
            time.sleep(5)
def validate():
    params = request.json
    crawl_id = params["crawl_id"]
    globus_eid = params["globus_eid"]
    transfer_token = params["transfer_token"]
    source_destination = params["source_destination"]
    dataset_info = params["dataset_info"]  # To be implemented later

    client = boto3.client('sqs',
                          aws_access_key_id=os.environ["aws_access"],
                          aws_secret_access_key=os.environ["aws_secret"],
                          region_name='us-east-1')

    try:
        response = client.get_queue_url(
            QueueName=f'validate_{crawl_id}',
            QueueOwnerAWSAccountId=os.environ["aws_account_id"])
    except:  # Add SQS.Client.exceptions.QueueDoesNotExist error
        abort(400, "Invalid crawl ID")

    try:
        authorizer = AccessTokenAuthorizer(transfer_token)
        tc = TransferClient(authorizer=authorizer)
    except:  # Add exception
        abort(400, "Invalid transfer token")

    crawl_queue = response["QueueUrl"]

    date = datetime.datetime.now()
    file_name = date.strftime("%m_%d_%Y-%H_%M_%S") + ".txt"

    try:
        with open(file_name, "w") as f:

            while True:
                sqs_response = client.receive_message(
                    QueueUrl=crawl_queue,
                    MaxNumberOfMessages=1,  # To be toggled
                    WaitTimeSeconds=1)

                if "Messages" not in sqs_response:
                    # xtract_status = requests.get(f"{eb_url}/get_extract_status", json={"crawl_id": crawl_id})
                    # print("HERE")
                    # print(xtract_status.content)
                    # xtract_content = json.loads(xtract_status.content)
                    # # print(xtract_content)
                    #
                    # if xtract_content["IDLE"] == 0 and xtract_content["PENDING"] == 0:
                    break

                del_list = []

                for message in sqs_response["Messages"]:
                    message_body = message["Body"]

                    # PROCESS MESSAGE_BODY
                    f.write(message_body)
                    # print(message_body)

                    del_list.append({
                        'ReceiptHandle': message["ReceiptHandle"],
                        'Id': message["MessageId"]
                    })

                if len(del_list) > 0:
                    client.delete_message_batch(QueueUrl=crawl_queue,
                                                Entries=del_list)

        tdata = TransferData(
            tc,
            "5ecf6444-affc-11e9-98d4-0a63aa6b37da",  #TODO: Add source endpoint
            globus_eid,
            label=f"{crawl_id}")
        tdata.add_item(os.path.abspath(file_name),
                       os.path.join(source_destination, file_name))

        tc.endpoint_autoactivate(
            "5ecf6444-affc-11e9-98d4-0a63aa6b37da")  #TODO: Add source endpoint
        tc.endpoint_autoactivate(globus_eid)
        submit_result = tc.submit_transfer(tdata)

        while True:
            result = tc.get_task(submit_result['task_id'])
            if result.data["status"] == "SUCCEEDED":
                break
            elif result.data["status"] == "FAILED":
                raise RuntimeError  # TODO: Change this
            else:
                time.sleep(0.5)

    except Exception as e:
        print(e)
        abort(400, "Failed to validate")
    finally:
        os.remove(file_name)

    return "[200] Submitted"
    on_refresh=update_tokens_file_on_refresh)

try:
    tc = TransferClient(authorizer=authorizer)
except:
    print(
        "ERROR: TransferClient() call failed!  Unable to call the Globus transfer interface with the provided auth info!"
    )
    sys.exit(-1)
# print(transfer)

# Now we should have auth, try setting up a transfer.

tdata = TransferData(tc,
                     source_endpoint_id,
                     destination_endpoint_id,
                     label="DCDE Relion transfer",
                     sync_level="size")

tdata.add_item(source_dir, dest_dir, recursive=True)

transfer_result = tc.submit_transfer(tdata)

print("task_id =", transfer_result["task_id"])

while not tc.task_wait(
        transfer_result['task_id'], timeout=1200, polling_interval=10):
    print(".", end="")
print("\n{} completed!".format(transfer_result['task_id']))

os.listdir(path=dest_dir)
 def check_status(self, jobspec):
     # make logger
     tmpLog = core_utils.make_logger(_logger,
                                     'PandaID={0}'.format(jobspec.PandaID),
                                     method_name='check_status')
     tmpLog.debug('start')
     # default return
     tmpRetVal = (True, '')
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(
             jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc:
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # set transferID to None
     transferID = None
     # get transfer groups
     groups = jobspec.get_groups_of_output_files()
     tmpLog.debug(
         'jobspec.get_groups_of_output_files() = : {0}'.format(groups))
     # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests
     if self.dummy_transfer_id in groups:
         # lock for 120 sec
         if not self.have_db_lock:
             tmpLog.debug(
                 'attempt to set DB lock for self.id - {0} self.dummy_transfer_id - {1}'
                 .format(self.id, self.dummy_transfer_id))
             self.have_db_lock = self.dbInterface.get_object_lock(
                 self.dummy_transfer_id, lock_interval=120)
         if not self.have_db_lock:
             # escape since locked by another thread
             msgStr = 'escape since locked by another thread'
             tmpLog.debug(msgStr)
             return None, msgStr
         # refresh group information since that could have been updated by another thread before getting the lock
         self.dbInterface.refresh_file_group_info(jobspec)
         # get transfer groups again with refreshed info
         groups = jobspec.get_groups_of_output_files()
         # the dummy transfer ID is still there
         if self.dummy_transfer_id in groups:
             groupUpdateTime = groups[
                 self.dummy_transfer_id]['groupUpdateTime']
             # get files with the dummy transfer ID across jobs
             fileSpecs = self.dbInterface.get_files_with_group_id(
                 self.dummy_transfer_id)
             # submit transfer if there are more than 10 files or the group was made before more than 10 min
             msgStr = 'self.dummy_transfer_id = {0}  number of files = {1}'.format(
                 self.dummy_transfer_id, len(fileSpecs))
             tmpLog.debug(msgStr)
             if len(fileSpecs) >= 10 or \
                     groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10):
                 tmpLog.debug('prepare to transfer files')
                 # submit transfer and get a real transfer ID
                 # set the Globus destination Endpoint id and path will get them from Agis eventually
                 from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
                 queueConfigMapper = QueueConfigMapper()
                 queueConfig = queueConfigMapper.get_queue(
                     jobspec.computingSite)
                 #self.Globus_srcPath = queueConfig.stager['Globus_srcPath']
                 self.srcEndpoint = queueConfig.stager['srcEndpoint']
                 self.Globus_srcPath = self.basePath
                 self.Globus_dstPath = queueConfig.stager['Globus_dstPath']
                 self.dstEndpoint = queueConfig.stager['dstEndpoint']
                 # Test the endpoints and create the transfer data class
                 errMsg = None
                 try:
                     # Test endpoints for activation
                     tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(
                         tmpLog, self.tc, self.srcEndpoint)
                     tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
                         tmpLog, self.tc, self.dstEndpoint)
                     if tmpStatsrc and tmpStatdst:
                         errStr = 'source Endpoint and destination Endpoint activated'
                         tmpLog.debug(errStr)
                     else:
                         errMsg = ''
                         if not tmpStatsrc:
                             errMsg += ' source Endpoint not activated '
                         if not tmpStatdst:
                             errMsg += ' destination Endpoint not activated '
                         # release process lock
                         tmpLog.debug(
                             'attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'
                             .format(self.id, self.dummy_transfer_id))
                         self.have_db_lock = self.dbInterface.release_object_lock(
                             self.dummy_transfer_id)
                         if not self.have_db_lock:
                             errMsg += ' - Could not release DB lock for {}'.format(
                                 self.dummy_transfer_id)
                         tmpLog.error(errMsg)
                         tmpRetVal = (None, errMsg)
                         return tmpRetVal
                     # both endpoints activated now prepare to transfer data
                     tdata = TransferData(self.tc,
                                          self.srcEndpoint,
                                          self.dstEndpoint,
                                          sync_level="checksum")
                 except:
                     errStat, errMsg = globus_utils.handle_globus_exception(
                         tmpLog)
                     # release process lock
                     tmpLog.debug(
                         'attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'
                         .format(self.id, self.dummy_transfer_id))
                     self.have_db_lock = self.dbInterface.release_object_lock(
                         self.dummy_transfer_id)
                     if not self.have_db_lock:
                         errMsg += ' - Could not release DB lock for {}'.format(
                             self.dummy_transfer_id)
                     tmpLog.error(errMsg)
                     tmpRetVal = (errStat, errMsg)
                     return tmpRetVal
                 # loop over all files
                 for fileSpec in fileSpecs:
                     attrs = jobspec.get_output_file_attributes()
                     msgStr = "len(jobSpec.get_output_file_attributes()) = {0} type - {1}".format(
                         len(attrs), type(attrs))
                     tmpLog.debug(msgStr)
                     for key, value in attrs.iteritems():
                         msgStr = "output file attributes - {0} {1}".format(
                             key, value)
                         tmpLog.debug(msgStr)
                     msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format(
                         fileSpec.lfn, fileSpec.scope)
                     tmpLog.debug(msgStr)
                     scope = fileSpec.scope
                     hash = hashlib.md5()
                     hash.update('%s:%s' % (scope, fileSpec.lfn))
                     hash_hex = hash.hexdigest()
                     correctedscope = "/".join(scope.split('.'))
                     srcURL = fileSpec.path
                     dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(
                         endPoint=self.Globus_dstPath,
                         scope=correctedscope,
                         hash1=hash_hex[0:2],
                         hash2=hash_hex[2:4],
                         lfn=fileSpec.lfn)
                     tmpLog.debug('src={srcURL} dst={dstURL}'.format(
                         srcURL=srcURL, dstURL=dstURL))
                     # add files to transfer object - tdata
                     if os.access(srcURL, os.R_OK):
                         tmpLog.debug("tdata.add_item({},{})".format(
                             srcURL, dstURL))
                         tdata.add_item(srcURL, dstURL)
                     else:
                         errMsg = "source file {} does not exist".format(
                             srcURL)
                         # release process lock
                         tmpLog.debug(
                             'attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'
                             .format(self.id, self.dummy_transfer_id))
                         self.have_db_lock = self.dbInterface.release_object_lock(
                             self.dummy_transfer_id)
                         if not self.have_db_lock:
                             errMsg += ' - Could not release DB lock for {}'.format(
                                 self.dummy_transfer_id)
                         tmpLog.error(errMsg)
                         tmpRetVal = (False, errMsg)
                         return tmpRetVal
                 # submit transfer
                 try:
                     transfer_result = self.tc.submit_transfer(tdata)
                     # check status code and message
                     tmpLog.debug(str(transfer_result))
                     if transfer_result['code'] == "Accepted":
                         # succeeded
                         # set transfer ID which are used for later lookup
                         transferID = transfer_result['task_id']
                         tmpLog.debug(
                             'successfully submitted id={0}'.format(
                                 transferID))
                         # set status for files
                         self.dbInterface.set_file_group(
                             fileSpecs, transferID, 'running')
                         msgStr = 'submitted transfer with ID={0}'.format(
                             transferID)
                         tmpLog.debug(msgStr)
                     else:
                         # release process lock
                         tmpLog.debug(
                             'attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'
                             .format(self.id, self.dummy_transfer_id))
                         self.have_db_lock = self.dbInterface.release_object_lock(
                             self.dummy_transfer_id)
                         if not self.have_db_lock:
                             errMsg = 'Could not release DB lock for {}'.format(
                                 self.dummy_transfer_id)
                             tmpLog.error(errMsg)
                         tmpRetVal = (None, transfer_result['message'])
                         return tmpRetVal
                 except Exception as e:
                     errStat, errMsg = globus_utils.handle_globus_exception(
                         tmpLog)
                     # release process lock
                     tmpLog.debug(
                         'attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'
                         .format(self.id, self.dummy_transfer_id))
                     self.have_db_lock = self.dbInterface.release_object_lock(
                         self.dummy_transfer_id)
                     if not self.have_db_lock:
                         errMsg += ' - Could not release DB lock for {}'.format(
                             self.dummy_transfer_id)
                     tmpLog.error(errMsg)
                     return errStat, errMsg
             else:
                 msgStr = 'wait until enough files are pooled'
                 tmpLog.debug(msgStr)
             # release the lock
             tmpLog.debug(
                 'attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'
                 .format(self.id, self.dummy_transfer_id))
             self.have_db_lock = self.dbInterface.release_object_lock(
                 self.dummy_transfer_id)
             if not self.have_db_lock:
                 msgStr += ' - Could not release DB lock for {}'.format(
                     self.dummy_transfer_id)
                 tmpLog.error(msgStr)
             # return None to retry later
             return None, msgStr
     # check transfer with real transfer IDs
     # get transfer groups
     groups = jobspec.get_groups_of_output_files()
     for transferID in groups:
         if transferID != self.dummy_transfer_id:
             # get transfer task
             tmpStat, transferTasks = globus_utils.get_transfer_task_by_id(
                 tmpLog, self.tc, transferID)
             # return a temporary error when failed to get task
             if not tmpStat:
                 errStr = 'failed to get transfer task'
                 tmpLog.error(errStr)
                 return None, errStr
             # return a temporary error when task is missing
             if transferID not in transferTasks:
                 errStr = 'transfer task ID - {} is missing'.format(
                     transferID)
                 tmpLog.error(errStr)
                 return None, errStr
             # succeeded in finding a transfer task by tranferID
             if transferTasks[transferID]['status'] == 'SUCCEEDED':
                 tmpLog.debug(
                     'transfer task {} succeeded'.format(transferID))
                 self.set_FileSpec_status(jobspec, 'finished')
                 return True, ''
             # failed
             if transferTasks[transferID]['status'] == 'FAILED':
                 errStr = 'transfer task {} failed'.format(transferID)
                 tmpLog.error(errStr)
                 self.set_FileSpec_status(jobspec, 'failed')
                 return False, errStr
             # another status
             tmpStr = 'transfer task {0} status: {1}'.format(
                 transferID, transferTasks[transferID]['status'])
             tmpLog.debug(tmpStr)
             return None, ''
Beispiel #15
0
 def trigger_preparation(self, jobspec):
     # get logger
     tmpLog = self.make_logger(_logger,
                               'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_preparation')
     tmpLog.debug('start')
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(
             jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc:
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(
         tmpLog, self.tc, label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(
             str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     self.Globus_srcPath = queueConfig.preparator['Globus_srcPath']
     self.srcEndpoint = queueConfig.preparator['srcEndpoint']
     self.Globus_dstPath = self.basePath
     #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath']
     self.dstEndpoint = queueConfig.preparator['dstEndpoint']
     # get input files
     files = []
     lfns = []
     inFiles = jobspec.get_input_file_attributes(skip_ready=True)
     for inLFN, inFile in iteritems(inFiles):
         # set path to each file
         inFile['path'] = mover_utils.construct_file_path(
             self.basePath, inFile['scope'], inLFN)
         dstpath = inFile['path']
         # check if path exists if not create it.
         if not os.access(self.basePath, os.F_OK):
             os.makedirs(self.basePath)
         # create the file paths for the Globus source and destination endpoints
         Globus_srcpath = mover_utils.construct_file_path(
             self.Globus_srcPath, inFile['scope'], inLFN)
         Globus_dstpath = mover_utils.construct_file_path(
             self.Globus_dstPath, inFile['scope'], inLFN)
         files.append({
             'scope': inFile['scope'],
             'name': inLFN,
             'Globus_dstPath': Globus_dstpath,
             'Globus_srcPath': Globus_srcpath
         })
         lfns.append(inLFN)
     tmpLog.debug('files[] {0}'.format(files))
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errStr = ''
             if not tmpStatsrc:
                 errStr += ' source Endpoint not activated '
             if not tmpStatdst:
                 errStr += ' destination Endpoint not activated '
             tmpLog.error(errStr)
             return False, errStr
         # both endpoints activated now prepare to transfer data
         if len(files) > 0:
             tdata = TransferData(self.tc,
                                  self.srcEndpoint,
                                  self.dstEndpoint,
                                  label=label,
                                  sync_level="checksum")
             # loop over all input files and add
             for myfile in files:
                 tdata.add_item(myfile['Globus_srcPath'],
                                myfile['Globus_dstPath'])
             # submit
             transfer_result = self.tc.submit_transfer(tdata)
             # check status code and message
             tmpLog.debug(str(transfer_result))
             if transfer_result['code'] == "Accepted":
                 # succeeded
                 # set transfer ID which are used for later lookup
                 transferID = transfer_result['task_id']
                 jobspec.set_groups_to_files(
                     {transferID: {
                         'lfns': lfns,
                         'groupStatus': 'active'
                     }})
                 tmpLog.debug('done')
                 return True, ''
             else:
                 return False, transfer_result['message']
         # if no files to transfer return True
         return True, 'No files to transfer'
     except:
         errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
         return errStat, {}
import uuid

from globus_sdk import TransferClient, TransferData

# simple usage, ok
tc = TransferClient()
TransferData(tc, "srcep", "destep")

# can set sync level
TransferData(tc, "srcep", "destep", sync_level=1)
TransferData(tc, "srcep", "destep", sync_level="exists")
# unknown int values are allowed
TransferData(tc, "srcep", "destep", sync_level=100)
# unknown str values are rejected (Literal)
TransferData(tc, "srcep", "destep",
             sync_level="sizes")  # type: ignore[arg-type]

# TransferData.add_filter_rule
tdata = TransferData(tc, uuid.UUID(), uuid.UUID())
tdata.add_filter_rule("*.tgz")
tdata.add_filter_rule("*.tgz", method="exclude")
tdata.add_filter_rule("*.tgz", type="file")
# bad values rejected (Literal)
tdata.add_filter_rule("*.tgz", type="files")  # type: ignore[arg-type]
tdata.add_filter_rule("*.tgz", method="include")  # type: ignore[arg-type]
Beispiel #17
0
    def check_stage_in_status(self, jobspec):
        # make logger
        tmpLog = self.make_logger(_logger, 'PandaID={0} ThreadID={1}'.format(jobspec.PandaID,threading.current_thread().ident),
                                  method_name='check_stage_in_status')
        tmpLog.debug('start')
        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
        # show the dummy transfer id and set to a value with the jobspec.computingSite if needed.
        tmpLog.debug('self.dummy_transfer_id = {}'.format(self.dummy_transfer_id))
        if self.dummy_transfer_id == '{0}_{1}'.format(dummy_transfer_id_base,'XXXX') :
            old_dummy_transfer_id = self.dummy_transfer_id
            self.dummy_transfer_id = '{0}_{1}'.format(dummy_transfer_id_base,jobspec.computingSite)
            tmpLog.debug('Change self.dummy_transfer_id  from {0} to {1}'.format(old_dummy_transfer_id,self.dummy_transfer_id))
            
        # default return
        tmpRetVal = (True, '')
        # set flag if have db lock
        have_db_lock = False 
        queueConfigMapper = QueueConfigMapper()
        queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
        # test we have a Globus Transfer Client
        if not self.tc :
            errStr = 'failed to get Globus Transfer Client'
            tmpLog.error(errStr)
            return False, errStr
        # set transferID to None
        transferID = None
        # get transfer groups
        groups = jobspec.get_groups_of_input_files(skip_ready=True)
        tmpLog.debug('jobspec.get_groups_of_input_files() = : {0}'.format(groups))
        # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests
        for dummy_transferID in groups:
            # skip if valid transfer ID not dummy one
            if validate_transferid(dummy_transferID) :
                continue
            # lock for 120 sec
            tmpLog.debug('attempt to set DB lock for self.id - {0} self.dummy_transfer_id - {1}, dummy_transferID - {2}'.format(self.id,self.dummy_transfer_id,dummy_transferID))
            have_db_lock = self.dbInterface.get_object_lock(dummy_transferID, lock_interval=120)
            tmpLog.debug(' DB lock result - {0}'.format(have_db_lock))
            if not have_db_lock:
                # escape since locked by another thread
                msgStr = 'escape since locked by another thread'
                tmpLog.debug(msgStr)
                return None, msgStr
            # refresh group information since that could have been updated by another thread before getting the lock
            tmpLog.debug('self.dbInterface.refresh_file_group_info(jobspec)')
            self.dbInterface.refresh_file_group_info(jobspec)
            tmpLog.debug('after self.dbInterface.refresh_file_group_info(jobspec)')
            # get transfer groups again with refreshed info
            tmpLog.debug('groups = jobspec.get_groups_of_input_files(skip_ready=True)')
            groups = jobspec.get_groups_of_input_files(skip_ready=True)
            tmpLog.debug('after db lock and refresh - jobspec.get_groups_of_input_files(skip_ready=True) = : {0}'.format(groups))            
            # the dummy transfer ID is still there
            if dummy_transferID in groups:
                groupUpdateTime = groups[dummy_transferID]['groupUpdateTime']
                # get files with the dummy transfer ID across jobs
                fileSpecs_allgroups = self.dbInterface.get_files_with_group_id(dummy_transferID)
                msgStr = 'dummy_transferID = {0} self.dbInterface.get_files_with_group_id(dummy_transferID)  number of files = {1}'.format(dummy_transferID,len(fileSpecs_allgroups))
                tmpLog.debug(msgStr)
                fileSpecs = jobspec.get_input_file_specs(dummy_transferID, skip_ready=True)
                msgStr = 'dummy_transferID = {0} jobspec.get_input_file_specs(dummy_transferID,skip_ready=True)  number of files = {1}'.format(dummy_transferID,len(fileSpecs))
                tmpLog.debug(msgStr)
                # submit transfer if there are more than 10 files or the group was made before more than 10 min
                if len(fileSpecs) >= 10 or \
                        groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10):
                    tmpLog.debug('prepare to transfer files')
                    # submit transfer and get a real transfer ID
                    # set the Globus destination Endpoint id and path will get them from Agis eventually  
                    self.Globus_srcPath = queueConfig.preparator['Globus_srcPath']
                    self.srcEndpoint = queueConfig.preparator['srcEndpoint']
                    self.Globus_dstPath = self.basePath
                    #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath']
                    self.dstEndpoint = queueConfig.preparator['dstEndpoint']
                    # Test the endpoints and create the transfer data class 
                    errMsg = None
                    try:
                        # Test endpoints for activation
                        tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint)
                        tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint)
                        if tmpStatsrc and tmpStatdst:
                            errStr = 'source Endpoint and destination Endpoint activated'
                            tmpLog.debug(errStr)
                        else:
                            errMsg = ''
                            if not tmpStatsrc :
                                errMsg += ' source Endpoint not activated '
                            if not tmpStatdst :
                                errMsg += ' destination Endpoint not activated '
                            # release process lock
                            tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}, dummy_transferID - {2}'.format(self.id,self.dummy_transfer_id,dummy_transferID))
                            have_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                            if not have_db_lock:
                                errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID)
                            tmpLog.error(errMsg)
                            tmpRetVal = (None,errMsg)
                            return tmpRetVal
                        # both endpoints activated now prepare to transfer data
                        tdata = None
                        tdata = TransferData(self.tc,
                                             self.srcEndpoint,
                                             self.dstEndpoint,
                                             sync_level="exists")
#                                             sync_level="checksum")
                        tmpLog.debug('size of tdata[DATA] - {}'.format(len(tdata['DATA'])))

                    except:
                        errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
                        # release process lock
                        tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}, dummy_transferID - {2}'.format(self.id,self.dummy_transfer_id,dummy_transferID))
                        release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                        if not release_db_lock:
                            errMsg += ' - Could not release DB lock for {}'.format(self.dummy_transferID)
                        tmpLog.error(errMsg)
                        tmpRetVal = (errStat, errMsg)
                        return tmpRetVal
                    # loop over all files
                    ifile = 0
                    for fileSpec in fileSpecs:
                        # only print to log file first 25 files
                        if ifile < 25 :
                            msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format(fileSpec.lfn, fileSpec.scope)
                            tmpLog.debug(msgStr)
                        if ifile == 25 :
                            msgStr = "printed first 25 files skipping the rest".format(fileSpec.lfn, fileSpec.scope)
                            tmpLog.debug(msgStr)
                        # end debug log file test
                        scope = 'panda'
                        if fileSpec.scope is not None :
                            scope = fileSpec.scope
                        hash = hashlib.md5()
                        if sys.version_info.major == 2:
                            hash.update('%s:%s' % (scope, fileSpec.lfn))
                        if sys.version_info.major == 3:
                            hash_string = "{0}:{1}".format(scope, fileSpec.lfn)
                            hash.update(bytes(hash_string, 'utf-8'))
                        hash_hex = hash.hexdigest()
                        correctedscope = "/".join(scope.split('.'))
                        #srcURL = fileSpec.path
                        srcURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_srcPath,
                                                                                   scope=correctedscope,
                                                                                   hash1=hash_hex[0:2],
                                                                                   hash2=hash_hex[2:4],
                                                                                   lfn=fileSpec.lfn)
                        dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath,
                                                                                   scope=correctedscope,
                                                                                   hash1=hash_hex[0:2],
                                                                                   hash2=hash_hex[2:4],
                                                                                   lfn=fileSpec.lfn)
                        # add files to transfer object - tdata
                        if ifile < 25 :
                            tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL))
                        tdata.add_item(srcURL,dstURL)
                        ifile += 1
                    # submit transfer 
                    tmpLog.debug('Number of files to transfer - {}'.format(len(tdata['DATA'])))
                    try:
                        transfer_result = self.tc.submit_transfer(tdata)
                        # check status code and message
                        tmpLog.debug(str(transfer_result))
                        if transfer_result['code'] == "Accepted":
                            # succeeded
                            # set transfer ID which are used for later lookup
                            transferID = transfer_result['task_id']
                            tmpLog.debug('successfully submitted id={0}'.format(transferID))
                            # set status for files
                            self.dbInterface.set_file_group(fileSpecs, transferID, 'running')
                            msgStr = 'submitted transfer with ID={0}'.format(transferID)
                            tmpLog.debug(msgStr)
                        else:
                            # release process lock
                            tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                            release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                            if release_db_lock:
                                tmpLog.debug('Released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                                have_db_lock = False
                            else:
                                errMsg = 'Could not release DB lock for {}'.format(dummy_transferID)
                                tmpLog.error(errMsg)
                            tmpRetVal = (None, transfer_result['message'])
                            return tmpRetVal
                    except Exception as e:
                        errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
                        # release process lock
                        tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                        release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                        if release_db_lock:
                            tmpLog.debug('Released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                            have_db_lock = False
                        else :
                            errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID)
                        tmpLog.error(errMsg)
                        return errStat, errMsg
                else:
                    msgStr = 'wait until enough files are pooled'
                    tmpLog.debug(msgStr)
                # release the lock
                tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) 
                if release_db_lock:
                    tmpLog.debug('released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                    have_db_lock = False 
                else:
                    msgStr += ' - Could not release DB lock for {}'.format(dummy_transferID)
                    tmpLog.error(msgStr)
                # return None to retry later
                return None, msgStr
            # release the db lock if needed
            if have_db_lock:
                tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) 
                if release_db_lock:
                    tmpLog.debug('released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                    have_db_lock = False 
                else:
                    msgStr += ' - Could not release DB lock for {}'.format(dummy_transferID)
                    tmpLog.error(msgStr)
                    return None, msgStr
        # check transfer with real transfer IDs
        # get transfer groups 
        tmpLog.debug("groups = jobspec.get_groups_of_input_files(skip_ready=True)")
        groups = jobspec.get_groups_of_input_files(skip_ready=True)
        tmpLog.debug('Number of transfer groups (skip_ready)- {0}'.format(len(groups)))
        tmpLog.debug('transfer groups any state (skip_ready)- {0}'.format(groups))
        tmpLog.debug("groups = jobspec.get_groups_of_input_files()")
        groups = jobspec.get_groups_of_input_files()
        tmpLog.debug('Number of transfer groups - {0}'.format(len(groups)))
        tmpLog.debug('transfer groups any state - {0}'.format(groups))
        tmpLog.debug("groups = jobspec.get_groups_of_input_files(skip_ready=True)")
        groups = jobspec.get_groups_of_input_files(skip_ready=True)
        if len(groups) == 0:
            tmpLog.debug("jobspec.get_groups_of_input_files(skip_ready=True) returned no files ")
            tmpLog.debug("check_stage_in_status return status - True ")
            return True,''
        for transferID in groups:
            # allow only valid UUID
            if validate_transferid(transferID) :
                # get transfer task
                tmpStat, transferTasks = globus_utils.get_transfer_task_by_id(tmpLog,self.tc,transferID)
                # return a temporary error when failed to get task
                if not tmpStat:
                    errStr = 'failed to get transfer task; tc = %s; transferID = %s' % (str(self.tc),str(transferID))
                    tmpLog.error(errStr)
                    return None, errStr
                # return a temporary error when task is missing 
                if transferID not in transferTasks:
                    errStr = 'transfer task ID - {} is missing'.format(transferID)
                    tmpLog.error(errStr)
                    return None, errStr
                # succeeded in finding a transfer task by tranferID
                if transferTasks[transferID]['status'] == 'SUCCEEDED':
                    tmpLog.debug('transfer task {} succeeded'.format(transferID))
                    self.set_FileSpec_status(jobspec,'finished')
                    return True, ''
                # failed
                if transferTasks[transferID]['status'] == 'FAILED':
                    errStr = 'transfer task {} failed'.format(transferID)
                    tmpLog.error(errStr)
                    self.set_FileSpec_status(jobspec,'failed')
                    return False, errStr
                # another status
                tmpStr = 'transfer task {0} status: {1}'.format(transferID,transferTasks[transferID]['status'])
                tmpLog.debug(tmpStr)
                return None, tmpStr
        # end of loop over transfer groups
        tmpLog.debug('End of loop over transfers groups - ending check_stage_in_status function')
        return None,'no valid transfer id found'
Beispiel #18
0
def transfer_command(
    batch,
    sync_level,
    recursive,
    destination,
    source,
    checksum_algorithm,
    external_checksum,
    label,
    preserve_mtime,
    verify_checksum,
    encrypt,
    submission_id,
    dry_run,
    delete,
    deadline,
    skip_activation_check,
    notify,
    perf_cc,
    perf_p,
    perf_pp,
    perf_udt,
):
    """
    Copy a file or directory from one endpoint to another as an asynchronous
    task.

    'globus transfer' has two modes. Single target, which transfers one
    file or one directory, and batch, which takes in several lines to transfer
    multiple files or directories. See "Batch Input" below for more information.

    'globus transfer' will always place the dest files in a
    consistent, deterministic location.  The contents of a source directory will
    be placed inside the dest directory.  A source file will be copied to
    the dest file path, which must not be an existing  directory.  All
    intermediate / parent directories on the dest will be automatically
    created if they don't exist.

    If the files or directories given as input are symbolic links, they are
    followed.  However, no other symbolic links are followed and no symbolic links
    are ever created on the dest.

    \b
    === Batched Input

    If you use `SOURCE_PATH` and `DEST_PATH` without the `--batch` flag, you
    will submit a single-file or single-directory transfer task.
    This has behavior similar to `cp` and `cp -r` across endpoints.

    Using `--batch`, `globus transfer` can submit a task which transfers
    multiple files or directories. Paths to transfer are taken from stdin.
    Lines are split on spaces, respecting quotes, and every line is treated as
    a file or directory to transfer.

    \b
    Lines are of the form
    [--recursive] [--external-checksum TEXT] SOURCE_PATH DEST_PATH\n

    Skips empty lines and allows comments beginning with "#".

    \b
    If you use `--batch` and a commandline SOURCE_PATH and/or DEST_PATH, these
    paths will be used as dir prefixes to any paths on stdin.

    \b
    === Sync Levels

    Sync Levels are ways to decide whether or not files are copied, with the
    following definitions:

    EXISTS: Determine whether or not to transfer based on file existence.
    If the destination file is absent, do the transfer.

    SIZE: Determine whether or not to transfer based on the size of the file.
    If destination file size does not match the source, do the transfer.

    MTIME: Determine whether or not to transfer based on modification times.
    If source has a newer modififed time than the destination, do the transfer.

    CHECKSUM: Determine whether or not to transfer based on checksums of file
    contents.
    If source and destination contents differ, as determined by a checksum of
    their contents, do the transfer.

    If a transfer fails, CHECKSUM must be used to restart the transfer.
    All other levels can lead to data corruption.

    {AUTOMATIC_ACTIVATION}
    """
    source_endpoint, cmd_source_path = source
    dest_endpoint, cmd_dest_path = destination

    if recursive and batch:
        raise click.UsageError(
            (
                "You cannot use --recursive in addition to --batch. "
                "Instead, use --recursive on lines of --batch input "
                "which need it"
            )
        )

    if external_checksum and batch:
        raise click.UsageError(
            (
                "You cannot use --external-checksum in addition to --batch. "
                "Instead, use --external-checksum on lines of --batch input "
                "which need it"
            )
        )

    if recursive and external_checksum:
        raise click.UsageError(
            "--recursive and --external-checksum are mutually exclusive"
        )

    if (cmd_source_path is None or cmd_dest_path is None) and (not batch):
        raise click.UsageError(
            "transfer requires either SOURCE_PATH and DEST_PATH or --batch"
        )

    # because python can't handle multiple **kwargs expansions in a single
    # call, we need to get a little bit clever
    # both the performance options (of which there are a few), and the
    # notification options (also there are a few) have elements which should be
    # omitted in some cases
    # notify comes to us clean, perf opts need more care
    # put them together into a dict before passing to TransferData
    kwargs = {}
    perf_opts = dict(
        (k, v)
        for (k, v) in dict(
            perf_cc=perf_cc, perf_p=perf_p, perf_pp=perf_pp, perf_udt=perf_udt
        ).items()
        if v is not None
    )
    kwargs.update(perf_opts)
    kwargs.update(notify)

    client = get_client()
    transfer_data = TransferData(
        client,
        source_endpoint,
        dest_endpoint,
        label=label,
        sync_level=sync_level,
        verify_checksum=verify_checksum,
        preserve_timestamp=preserve_mtime,
        encrypt_data=encrypt,
        submission_id=submission_id,
        delete_destination_extra=delete,
        deadline=deadline,
        skip_activation_check=skip_activation_check,
        **kwargs
    )

    if batch:

        @click.command()
        @click.option("--external-checksum")
        @click.option("--recursive", "-r", is_flag=True)
        @click.argument("source_path", type=TaskPath(base_dir=cmd_source_path))
        @click.argument("dest_path", type=TaskPath(base_dir=cmd_dest_path))
        def process_batch_line(dest_path, source_path, recursive, external_checksum):
            """
            Parse a line of batch input and turn it into a transfer submission
            item.
            """
            if recursive and external_checksum:
                raise click.UsageError(
                    "--recursive and --external-checksum are mutually exclusive"
                )
            transfer_data.add_item(
                str(source_path),
                str(dest_path),
                external_checksum=external_checksum,
                checksum_algorithm=checksum_algorithm,
                recursive=recursive,
            )

        shlex_process_stdin(
            process_batch_line,
            (
                "Enter transfers, line by line, as\n\n"
                "    [--recursive] [--external-checksum TEXT] SOURCE_PATH DEST_PATH\n"
            ),
        )
    else:
        transfer_data.add_item(
            cmd_source_path,
            cmd_dest_path,
            external_checksum=external_checksum,
            checksum_algorithm=checksum_algorithm,
            recursive=recursive,
        )

    if dry_run:
        formatted_print(
            transfer_data,
            response_key="DATA",
            fields=(
                ("Source Path", "source_path"),
                ("Dest Path", "destination_path"),
                ("Recursive", "recursive"),
                ("External Checksum", "external_checksum"),
            ),
        )
        # exit safely
        return

    # autoactivate after parsing all args and putting things together
    # skip this if skip-activation-check is given
    if not skip_activation_check:
        autoactivate(client, source_endpoint, if_expires_in=60)
        autoactivate(client, dest_endpoint, if_expires_in=60)

    res = client.submit_transfer(transfer_data)
    formatted_print(
        res,
        text_format=FORMAT_TEXT_RECORD,
        fields=(("Message", "message"), ("Task ID", "task_id")),
    )