Example #1
0
    def copy_directory(self, ori, destiny, tc):
        """
        copy a directory using globus transfer

        :param ori: path where the data is in the source machine
        :type ori: str

        :param destiny: path where the data will be put on the destiny machine
        :type destiny: str

        :param tc: globus transfer client
        :type tc: :class:`globus_sdk.TransferClient`

        :return: status of the transference
        :rtype: str
        """

        transference_data = TransferData(tc,
                                         self._from,
                                         self._to,
                                         label="SDK example",
                                         sync_level="checksum")

        transference_data.add_item(ori, destiny, recursive=True)
        transfer_result = tc.submit_transfer(transference_data)
        while not tc.task_wait(transfer_result["task_id"], timeout=1):
            task = tc.get_task(transfer_result["task_id"])

            if task['nice_status'] == "NOT_A_DIRECTORY":
                tc.cancel_task(task["task_id"])
                return task['nice_status']
        return "OK"
    def transfer_data(self, src_endpoint: str, src_path: Union[str, Path, PathLike],
                      dest_endpoint: str, dest_path: Union[str, Path, PathLike]):
        self._src_endpoint = src_endpoint
        self._dest_endpoint = dest_endpoint
        src_endpoint_id = self.get_endpoint_id(src_endpoint)
        if not src_endpoint_id:
            print(f'ERROR: Unable to find source endpoint id for: "{self._src_endpoint}"')
            return

        dest_endpoint_id = self.get_endpoint_id(dest_endpoint)
        if not dest_endpoint_id:
            print(f'ERROR: Unable to find destination endpoint id for: "{self._dest_endpoint}"')
            return

        transfer_data = TransferData(self._transfer_client,
                                     src_endpoint_id,
                                     dest_endpoint_id,
                                     encrypt_data=True)
        transfer_data.add_item(src_path, dest_path, recursive=True)
        try:
            print(
                f'Submitting a transfer task from {self._src_endpoint}:{src_path} to {self._dest_endpoint}:{dest_path}')
            task = self._transfer_client.submit_transfer(transfer_data)
        except TransferAPIError as e:
            print(str(e))
            sys.exit(1)
        task_id = task['task_id']
        print(f'\tWaiting for transfer to complete with task_id: {task_id}')
        while not self._transfer_client.task_wait(task_id=task_id, timeout=3600, polling_interval=60):
            print('.', end='')

        print('Transferred files:')
        for info in self._transfer_client.task_successful_transfers(task_id=task_id, num_results=None):
            print("\t{} -> {}".format(info["source_path"], info["destination_path"]))
Example #3
0
    def copy_file(self, ori, destiny, tc):
        """
        copy a file using globus

        :param ori: path where the data is in the source machine
        :type ori: str

        :param destiny: path where the data will be put on the destiny machine
        :type destiny: str

        :param tc: globus transfer client
        :type tc: :class:`globus_sdk.TransferClient`

        :return: status of the transference
        :rtype: str
        """

        transference_data = TransferData(tc,
                                         self._from,
                                         self._to,
                                         label="SDK example",
                                         sync_level="checksum")

        transference_data.add_item(ori, destiny)
        transfer_result = tc.submit_transfer(transference_data)
        while not tc.task_wait(transfer_result["task_id"], timeout=1):
            # wait until transfer ends
            continue

        return "OK"
Example #4
0
    def transfer(self):
        self.transfer_client = transfer_client = globus_sdk.TransferClient(
            authorizer=self.authorizer)
        try:
            transfer_client.endpoint_autoactivate(self.source_id)
            transfer_client.endpoint_autoactivate(self.dest_id)
        except GlobusAPIError as ex:
            if ex.http_status == 401:
                sys.exit('Refresh token has expired. '
                         'Please delete the `tokens` object from '
                         '{} and try again.'.format(self.refresh_token_file))
            else:
                raise ex

        while True:
            self._check_end_point(self.source_id, self.source_folder)
            self._check_end_point(self.dest_id, self.dest_folder)
            tdata = TransferData(self.transfer_client,
                                 self.source_id,
                                 self.dest_id,
                                 label=self.transfer_label,
                                 sync_level="checksum")
            tdata.add_item(self.source_folder,
                           self.dest_folder,
                           recursive=True)
            task = transfer_client.submit_transfer(tdata)
            task_id = task['task_id']
            self.logger.info("Task id {} submitted".format(task_id))
            transfer_client.task_wait(task_id=task_id,
                                      timeout=self.poll_time,
                                      polling_interval=5)
            td = transfer_client.get_task(task_id)
            self.logger.info("Task id {} complete".format(task_id))
            self.logger.debug(td)
            time.sleep(self.poll_time)
Example #5
0
def submit_xfer(source_endpoint_id,
                destination_endpoint_id,
                source_path,
                dest_path,
                job_label,
                recursive=False,
                logger=logging.log):
    tc = get_transfer_client(logger=logger)
    # as both endpoints are expected to be Globus Server endpoints, send auto-activate commands for both globus endpoints
    auto_activate_endpoint(tc, source_endpoint_id, logger=logger)
    auto_activate_endpoint(tc, destination_endpoint_id, logger=logger)

    # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source and
    # destination files, and only transfer files that have different checksums are transferred. verify_checksum=True means that after
    # a file is transferred, Globus will compute checksums on the source and destination files to verify that the file was transferred
    # correctly.  If the checksums do not match, it will redo the transfer of that file.
    # tdata = TransferData(tc, source_endpoint_id, destination_endpoint_id, label=job_label, sync_level="checksum", verify_checksum=True)
    tdata = TransferData(tc,
                         source_endpoint_id,
                         destination_endpoint_id,
                         label=job_label,
                         sync_level="checksum",
                         notify_on_succeeded=False,
                         notify_on_failed=False)
    tdata.add_item(source_path, dest_path, recursive=recursive)

    # logging.info('submitting transfer...')
    transfer_result = tc.submit_transfer(tdata)
    # logging.info("task_id =", transfer_result["task_id"])

    return transfer_result["task_id"]
Example #6
0
def main():
    tokens = None
    client = NativeClient(client_id=CLIENT_ID, app_name=APP_NAME)
    try:
        # if we already have tokens, load and use them
        tokens = client.load_tokens(requested_scope=SCOPES)
    except:
        pass

    if not tokens:
        # if we need to get tokens, start the Native App authentication process
        # need to specify that we want refresh tokens
        tokens = client.login(requested_scopes=SCOPES, refresh_tokens=True)
        try:
            client.save_tokens(tokens)
        except:
            pass

    transfer = setup_transfer_client(tokens['transfer.api.globus.org'])

    try:
        task_data = load_data_from_file(DATA_FILE)['task']
        task = transfer.get_task(task_data['task_id'])
        if task['status'] not in PREVIOUS_TASK_RUN_CASES:
            print('The last transfer status is {}, skipping run...'.format(
                task['status']))
            sys.exit(1)
    except KeyError:
        # Ignore if there is no previous task
        pass

    check_endpoint_path(transfer, SOURCE_ENDPOINT, SOURCE_PATH)
    if CREATE_DESTINATION_FOLDER:
        create_destination_directory(transfer, DESTINATION_ENDPOINT,
                                     DESTINATION_PATH)
    else:
        check_endpoint_path(transfer, DESTINATION_ENDPOINT, DESTINATION_PATH)

    tdata = TransferData(transfer,
                         SOURCE_ENDPOINT,
                         DESTINATION_ENDPOINT,
                         label=TRANSFER_LABEL,
                         sync_level="checksum")
    tdata.add_item(SOURCE_PATH, DESTINATION_PATH, recursive=True)

    task = transfer.submit_transfer(tdata)
    save_data_to_file(DATA_FILE, 'task', task.data)
    print('Transfer has been started from\n  {}:{}\nto\n  {}:{}'.format(
        SOURCE_ENDPOINT, SOURCE_PATH, DESTINATION_ENDPOINT, DESTINATION_PATH))
    url_string = 'https://globus.org/app/transfer?' + \
        six.moves.urllib.parse.urlencode({
            'origin_id': SOURCE_ENDPOINT,
            'origin_path': SOURCE_PATH,
            'destination_id': DESTINATION_ENDPOINT,
            'destination_path': DESTINATION_PATH
        })
    print('Visit the link below to see the changes:\n{}'.format(url_string))
 def transfer_file(self, src_file, dst_file):
     transfer_data = TransferData(self.transferClient,
                                  self.src_endpoint['id'],
                                  self.dst_endpoint['id'])
     transfer_data.add_item(src_file, dst_file)
     result = self.transferClient.submit_transfer(transfer_data)
     self.logger.info('task_id [{0}] {1}'.format(result['task_id'],
                                                 result['code']))
     return result
def transfer(client, remote_uuid, local_uuid, file_list, event=None):
    """
    Setup a file transfer between two endpoints
    
    Parameters:
        remote_uuid (str): the globus uuid of the source endpoint
        local_uuid (str): the globus uuid of the destination endpoint
        file_list (list): a list of dictionaries with keys remote_path, local_path
        event (Threadding.Event): a kill event for running inside a thread
    """

    # create the transfer object
    try:
        task_label = 'Processflow auto transfer'
        transfer_task = TransferData(client,
                                     remote_uuid,
                                     local_uuid,
                                     sync_level='checksum',
                                     label=task_label)
    except Exception as e:
        logging.error('Error creating transfer task')
        logging.error(format_debug(e))
        return

    # add in our transfer items
    for datafile in file_list:
        transfer_task.add_item(source_path=datafile['remote_path'],
                               destination_path=datafile['local_path'],
                               recursive=False)

    # Start the transfer
    task_id = None
    result = None
    try:
        result = client.submit_transfer(transfer_task)
        task_id = result["task_id"]
        logging.info('starting transfer with task id %s', task_id)
    except Exception as e:
        if result:
            logging.error("result: %s", str(result))
        logging.error("Could not submit the transfer")
        logging.error(format_debug(e))
        return

    # loop until transfer is complete
    while True:
        status = client.get_task(task_id)
        if status['status'] == 'SUCCEEDED':
            return True, None
        elif status['status'] == 'FAILED':
            return False, status.get('nice_status_details')
        if event and event.is_set():
            client.cancel_task(task_id)
            return None, None
        sleep(10)
Example #9
0
def bulk_submit_xfer(submitjob, recursive=False):
    cfg = load_config()
    client_id = cfg['globus']['apps'][GLOBUS_AUTH_APP]['client_id']
    auth_client = NativeAppAuthClient(client_id)
    refresh_token = cfg['globus']['apps'][GLOBUS_AUTH_APP]['refresh_token']
    source_endpoint_id = submitjob[0].get('metadata').get(
        'source_globus_endpoint_id')
    destination_endpoint_id = submitjob[0].get('metadata').get(
        'dest_globus_endpoint_id')
    authorizer = RefreshTokenAuthorizer(refresh_token=refresh_token,
                                        auth_client=auth_client)
    tc = TransferClient(authorizer=authorizer)
    # as both endpoints are expected to be Globus Server endpoints, send auto-activate commands for both globus endpoints
    a = auto_activate_endpoint(tc, source_endpoint_id)
    logging.debug('a: %s' % a)
    if a != 'AlreadyActivated':
        return None

    b = auto_activate_endpoint(tc, destination_endpoint_id)
    logging.debug('b: %s' % b)
    if b != 'AlreadyActivated':
        return None

    # make job_label for task a timestamp
    x = datetime.now()
    job_label = x.strftime('%Y%m%d%H%M%s')

    # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source
    # and destination files, and only transfer files that have different checksums are transferred. verify_checksum=True means
    # that after a file is transferred, Globus will compute checksums on the source and destination files to verify that the
    # file was transferred correctly.  If the checksums do not match, it will redo the transfer of that file.
    tdata = TransferData(tc,
                         source_endpoint_id,
                         destination_endpoint_id,
                         label=job_label,
                         sync_level="checksum")

    for file in submitjob:
        source_path = file.get('sources')[0]
        dest_path = file.get('destinations')[0]
        filesize = file['metadata']['filesize']
        # TODO: support passing a recursive parameter to Globus
        # md5 = file['metadata']['md5']
        # tdata.add_item(source_path, dest_path, recursive=False, external_checksum=md5)
        tdata.add_item(source_path, dest_path, recursive=False)
        record_counter(
            'daemons.conveyor.transfer_submitter.globus.transfers.submit.filesize',
            filesize)

    # logging.info('submitting transfer...')
    transfer_result = tc.submit_transfer(tdata)
    # logging.info("task_id =", transfer_result["task_id"])

    return transfer_result["task_id"]
Example #10
0
def transfer_directory(**kwargs):
    """
    Transfer all the contents from source_endpoint:src_path to destination_endpoint:dst_path

    parameters:
        source_endpoint (str) the globus UUID for the source files
        destination_endpoint (str) the globus UUID for the destination
        src_path (str) the path to the source directory to copy
        dst_path (str) the path on the destination directory
    """
    source_endpoint = kwargs['source_endpoint']
    destination_endpoint = kwargs['destination_endpoint']
    src_path = kwargs['src_path']
    dst_path = kwargs['dst_path']
    event_list = kwargs['event_list']
    event = kwargs['event']

    client = get_client()
    transfer = TransferData(
        client,
        source_endpoint,
        destination_endpoint,
        sync_level='checksum')
    transfer.add_item(
        source_path=src_path,
        destination_path=dst_path,
        recursive=True)
    try:
        result = client.submit_transfer(transfer)
        task_id = result['task_id']
    except:
        return False

    head, directory_name = os.path.split(src_path)
    msg = '{dir} transfer starting'.format(dir=directory_name)
    event_list.push(message=msg)
    retcode = 0
    while True:
        if event and event.is_set():
            client.cancel_task(task_id)
            return
        status = client.get_task(task_id).get('status')
        if status == 'SUCCEEDED':
            msg = '{dir} transfer complete'.format(dir=directory_name)
            retcode = True
            break
        elif status == 'FAILED':
            msg = '{dir} transfer FAILED'.format(dir=directory_name)
            retcode = False
            break
        else:
            sleep(5)
    event_list.push(message=msg)
    return retcode
Example #11
0
def transfer_directory(src_uuid,
                       dst_uuid,
                       src_path,
                       dst_path,
                       event_list=None,
                       killevent=None):
    """
    Transfer all the contents from source_endpoint:src_path to destination_endpoint:dst_path

    parameters:
        src_uuid (str): the globus UUID for the source files
        dst_uuid (str) the globus UUID for the destination
        src_path (str) the path to the source directory to copy
        dst_path (str) the path on the destination directory
        event_list (EventList): an eventlist to push user notifications into
        killevent (Threadding.Event): an event to listen for if running inside a thread to terminate
    """

    client = get_client()
    transfer = TransferData(client, src_uuid, dst_uuid, sync_level='checksum')
    transfer.add_item(source_path=src_path,
                      destination_path=dst_path,
                      recursive=True)

    try:
        msg = 'Starting globus directory transfer from {src} to {dst}'.format(
            src=src_path, dst=dst_path)
        print_line(msg, event_list)
        logging.info(msg)

        result = client.submit_transfer(transfer)
        task_id = result['task_id']
    except:
        msg = 'Transfer setup for {src_uuid}:{src_path} tp {dst_uuid}:{dst_pathj} failed'.format(
            src_uuid=src_uuid,
            src_path=src_path,
            dst_uuid=dst_uuid,
            dst_path=dst_path)
        logging.error(msg)
        return False

    while True:
        status = client.get_task(task_id).get('status')
        if status == 'SUCCEEDED':
            return True
        elif status == 'FAILED':
            return False
        else:
            msg = 'Unexpected globus code: {}'.format(status)
            print_line(msg, event_list)
        if event and event.is_set():
            client.cancel_task(task_id)
            return False
        sleep(10)
Example #12
0
    def __init__(self, source_id, dest_id, label=None, sync_level=None):

        self.source_id = auth.verify_uuid(source_id)
        self.dest_id = auth.verify_uuid(dest_id)

        self.transfer_client = get_transfer_client()

        self.transfer_data = TransferData(self.transfer_client,
                                          source_id,
                                          dest_id,
                                          label=label,
                                          sync_level=sync_level)
def submit_transfer():
    """
    - Take the data returned by the Browse Endpoint helper page
      and make a Globus transfer request.
    - Send the user to the transfer status page with the task id
      from the transfer.
    """
    browse_endpoint_form = request.form

    selected = session['form']['datasets']
    filtered_datasets = [ds for ds in datasets if ds['id'] in selected]

    transfer_tokens = session['tokens']['transfer.api.globus.org']

    authorizer = RefreshTokenAuthorizer(
        transfer_tokens['refresh_token'],
        load_portal_client(),
        access_token=transfer_tokens['access_token'],
        expires_at=transfer_tokens['expires_at_seconds'])

    transfer = TransferClient(authorizer=authorizer)

    source_endpoint_id = app.config['DATASET_ENDPOINT_ID']
    source_endpoint_base = app.config['DATASET_ENDPOINT_BASE']
    destination_endpoint_id = browse_endpoint_form['endpoint_id']
    destination_folder = browse_endpoint_form.get('folder[0]')

    transfer_data = TransferData(transfer_client=transfer,
                                 source_endpoint=source_endpoint_id,
                                 destination_endpoint=destination_endpoint_id,
                                 label=browse_endpoint_form.get('label'))

    for ds in filtered_datasets:
        source_path = source_endpoint_base + ds['path']
        dest_path = browse_endpoint_form['path']

        if destination_folder:
            dest_path += destination_folder + '/'

        dest_path += ds['name'] + '/'

        transfer_data.add_item(source_path=source_path,
                               destination_path=dest_path,
                               recursive=True)

    transfer.endpoint_autoactivate(source_endpoint_id)
    transfer.endpoint_autoactivate(destination_endpoint_id)
    task_id = transfer.submit_transfer(transfer_data)['task_id']

    flash('Transfer request submitted successfully. Task ID: ' + task_id)

    return (redirect(url_for('transfer_status', task_id=task_id)))
Example #14
0
def bulk_submit_xfer(submitjob, recursive=False, logger=logging.log):
    cfg = load_config(logger=logger)
    client_id = cfg['globus']['apps'][GLOBUS_AUTH_APP]['client_id']
    auth_client = NativeAppAuthClient(client_id)
    refresh_token = cfg['globus']['apps'][GLOBUS_AUTH_APP]['refresh_token']
    source_endpoint_id = submitjob[0].get('metadata').get(
        'source_globus_endpoint_id')
    destination_endpoint_id = submitjob[0].get('metadata').get(
        'dest_globus_endpoint_id')
    authorizer = RefreshTokenAuthorizer(refresh_token=refresh_token,
                                        auth_client=auth_client)
    tc = TransferClient(authorizer=authorizer)

    # make job_label for task a timestamp
    now = datetime.datetime.now()
    job_label = now.strftime('%Y%m%d%H%M%s')

    # retrieve globus_task_deadline value to enforce time window to complete transfers
    # default is 2880 minutes or 48 hours
    globus_task_deadline = config_get_int('conveyor', 'globus_task_deadline',
                                          False, 2880)
    deadline = now + datetime.timedelta(minutes=globus_task_deadline)

    # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source
    # and destination files, and only transfer files that have different checksums are transferred. verify_checksum=True means
    # that after a file is transferred, Globus will compute checksums on the source and destination files to verify that the
    # file was transferred correctly.  If the checksums do not match, it will redo the transfer of that file.
    tdata = TransferData(tc,
                         source_endpoint_id,
                         destination_endpoint_id,
                         label=job_label,
                         sync_level="checksum",
                         deadline=str(deadline))

    for file in submitjob:
        source_path = file.get('sources')[0]
        dest_path = file.get('destinations')[0]
        filesize = file['metadata']['filesize']
        # TODO: support passing a recursive parameter to Globus
        # md5 = file['metadata']['md5']
        # tdata.add_item(source_path, dest_path, recursive=False, external_checksum=md5)
        tdata.add_item(source_path, dest_path, recursive=False)
        record_counter(
            'daemons.conveyor.transfer_submitter.globus.transfers.submit.filesize',
            filesize)

    # logging.info('submitting transfer...')
    transfer_result = tc.submit_transfer(tdata)
    logger(logging.INFO, "transfer_result: %s" % transfer_result)

    return transfer_result["task_id"]
Example #15
0
def sync(source_endpoint, source_path, destination_endpoint, destination_path,
         synctype, sync_data_file, transfer_label):

    global DATA_FILE

    if source_endpoint == destination_endpoint:
        raise click.UsageError(
            'Source and destination endpoints must be different.')

    DATA_FILE = sync_data_file

    tokens = get_tokens()
    transfer = setup_transfer_client(tokens['transfer.api.globus.org'],
                                     source_endpoint, destination_endpoint)

    try:
        task_data = load_data_from_file(DATA_FILE)['task']
        task = transfer.get_task(task_data['task_id'])
        if task['status'] not in ['SUCCEEDED', 'FAILED']:
            print('The last transfer status is {}, skipping run...'.format(
                task['status']))
            sys.exit(1)
    except KeyError:
        # Ignore if there is no previous task
        pass

    check_endpoint_path(transfer, source_endpoint, source_path)
    create_destination_directory(transfer, destination_endpoint,
                                 destination_path)

    tdata = TransferData(transfer,
                         source_endpoint,
                         destination_endpoint,
                         label=transfer_label,
                         sync_level=synctype)
    tdata.add_item(source_path, destination_path, recursive=True)

    task = transfer.submit_transfer(tdata)
    save_data_to_file(DATA_FILE, 'task', task.data)
    print('Transfer has been started from\n  {}:{}\nto\n  {}:{}'.format(
        source_endpoint, source_path, destination_endpoint, destination_path))
    url_string = 'https://globus.org/app/transfer?' + \
        six.moves.urllib.parse.urlencode({
            'origin_id': source_endpoint,
            'origin_path': source_path,
            'destination_id': destination_endpoint,
            'destination_path': destination_path
        })
    print('Visit the link below to see the changes:\n{}'.format(url_string))
Example #16
0
 def make_transfer(self, night, project, ndir):
     """ Function to create the data transfer object
     """
     self.active_transfer_count += 1
     tdata = TransferData(self.client,
                          self.config['src_ep'],
                          self.config['dest_ep'],
                          label='transfer %s_%s' % (night, project),
                          sync_level="checksum",
                          verify_checksum=True,
                          preserve_timestamp=True)
     tdata.add_item(os.path.join(self.config['noao_root'], ndir),
                    os.path.join(self.config['transfer_dir'], ndir),
                    recursive=True)
     return tdata
Example #17
0
    def cp(self, src_ep_path, dst_ep_path, recursive=False):
        '''
        copy file/path
        todo: support label, sync_level, etc?
        sync_level: ["exists", "size", "mtime", "checksum"]
        '''
        tc = self.xfer_client
        sep, spath = self.ep_parts(src_ep_path)
        dep, dpath = self.ep_parts(dst_ep_path)

        td = TransferData(tc, sep, dep)
        td.add_item(spath, dpath, recursive=recursive)

        task_id = tc.submit_transfer(td)['task_id']
        return self._wait(task_id)
Example #18
0
def test_create_job(timer_client, start, interval):
    meta = load_response(timer_client.create_job).metadata
    transfer_client = TransferClient()
    transfer_client.get_submission_id = lambda *_0, **_1: {"value": "mock"}
    transfer_data = TransferData(transfer_client, GO_EP1_ID, GO_EP2_ID)
    timer_job = TimerJob.from_transfer_data(transfer_data, start, interval)
    response = timer_client.create_job(timer_job)
    assert response.http_status == 201
    assert response.data["job_id"] == meta["job_id"]
    timer_job = TimerJob.from_transfer_data(dict(transfer_data), start,
                                            interval)
    response = timer_client.create_job(timer_job)
    assert response.http_status == 201
    assert response.data["job_id"] == meta["job_id"]
    req_body = json.loads(get_last_request().body)
    if isinstance(start, datetime):
        assert req_body["start"] == start.isoformat()
    else:
        assert req_body["start"] == start
    if isinstance(interval, timedelta):
        assert req_body["interval"] == interval.total_seconds()
    else:
        assert req_body["interval"] == interval
    assert req_body["callback_url"] == slash_join(get_service_url("actions"),
                                                  "/transfer/transfer/run")
Example #19
0
def getTransferData():
    cfg = load_config()
    client_id = cfg['globus']['apps']['SDK Tutorial App']['client_id']
    auth_client = NativeAppAuthClient(client_id)
    refresh_token = cfg['globus']['apps']['SDK Tutorial App']['refresh_token']
    source_endpoint_id = cfg['globus']['apps']['SDK Tutorial App'][
        'win10_endpoint_id']
    destination_endpoint_id = cfg['globus']['apps']['SDK Tutorial App'][
        'sdccfed_endpoint_id']
    authorizer = RefreshTokenAuthorizer(refresh_token=refresh_token,
                                        auth_client=auth_client)
    tc = TransferClient(authorizer=authorizer)
    # as both endpoints are expected to be Globus Server endpoints, send auto-activate commands for both globus endpoints
    auto_activate_endpoint(tc, source_endpoint_id)
    auto_activate_endpoint(tc, destination_endpoint_id)

    # make job_label for task a timestamp
    x = datetime.now()
    job_label = x.strftime('%Y%m%d%H%M%s')

    # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source and destination files,
    # and only transfer files that have different checksums are transferred. verify_checksum=True means that after a file is transferred, Globus will
    # compute checksums on the source and destination files to verify that the file was transferred correctly.  If the checksums do not match, it will
    # redo the transfer of that file.
    tdata = TransferData(tc,
                         source_endpoint_id,
                         destination_endpoint_id,
                         label=job_label,
                         sync_level="checksum",
                         verify_checksum=True)

    return tdata
Example #20
0
def main():

    tokens = get_tokens()
    transfer = setup_transfer_client(tokens['transfer.api.globus.org'])

    try:
        task_data = load_data_from_file(DATA_FILE)['task']
        task = transfer.get_task(task_data['task_id'])
        if task['status'] not in PREVIOUS_TASK_RUN_CASES:
            print('The last transfer status is {}, skipping run...'.format(
                task['status']))
            sys.exit(1)
    except KeyError:
        # Ignore if there is no previous task
        pass

    check_endpoint_path(transfer, SOURCE_ENDPOINT, SOURCE_PATH)
    if CREATE_DESTINATION_FOLDER:
        create_destination_directory(transfer, DESTINATION_ENDPOINT,
                                     DESTINATION_PATH)
    else:
        check_endpoint_path(transfer, DESTINATION_ENDPOINT, DESTINATION_PATH)

    tdata = TransferData(transfer,
                         SOURCE_ENDPOINT,
                         DESTINATION_ENDPOINT,
                         label=TRANSFER_LABEL,
                         sync_level="checksum")
    tdata.add_item(SOURCE_PATH, DESTINATION_PATH, recursive=True)

    task = transfer.submit_transfer(tdata)
    save_data_to_file(DATA_FILE, 'task', task.data)
    print('Transfer has been started from\n  {}:{}\nto\n  {}:{}'.format(
        SOURCE_ENDPOINT, SOURCE_PATH, DESTINATION_ENDPOINT, DESTINATION_PATH))
    url_string = 'https://globus.org/app/transfer?' + \
        six.moves.urllib.parse.urlencode({
            'origin_id': SOURCE_ENDPOINT,
            'origin_path': SOURCE_PATH,
            'destination_id': DESTINATION_ENDPOINT,
            'destination_path': DESTINATION_PATH
        })
    print('Visit the link below to see the changes:\n{}'.format(url_string))
Example #21
0
 def make_transfer(self, path):
     """ Function to create the data transfer object
     """
     self.active_transfer_count += 1
     tdata = TransferData(self.client,
                          self.config['src_ep'],
                          self.config['dest_ep'],
                          label='transfer %s'%(path),
                          sync_level="checksum",
                          verify_checksum=True,
                          preserve_timestamp=True
                         )
     return tdata
Example #22
0
class Transfer:
    '''
    This class describes a Globus transfer task.
    '''
    def __init__(self, source_id, dest_id, label=None, sync_level=None):

        self.source_id = auth.verify_uuid(source_id)
        self.dest_id = auth.verify_uuid(dest_id)

        self.transfer_client = get_transfer_client()

        self.transfer_data = TransferData(self.transfer_client,
                                          source_id,
                                          dest_id,
                                          label=label,
                                          sync_level=sync_level)

    def add_item(self, src_path, dst_path, recursive=False):
        self.transfer_data.add_item(src_path, dst_path, recursive=recursive)

    def submit(self, **kwargs):
        self.transfer_client.submit_transfer(self.transfer_data, **kwargs)
Example #23
0
    def run(self):
        logger.info(f"{self} - started")
        source = self.source
        destination = self.destination
        src_path = os.path.join(source.src_path, self.dataset)
        dst_path = destination.dst_path
        tc = GlobusTransfer.transfer_client
        td = TransferData(tc, source.uuid, destination.uuid)
        td.add_item(src_path, dst_path, recursive=True)

        try:
            task = tc.submit_transfer(td)
            request_time = datetime.utcnow()
            task_id = task.get("task_id")
            """
            A Globus transfer job (task) can be in one of the three states:
            ACTIVE, SUCCEEDED, FAILED. The script every 60 seconds polls a
            status of the transfer job (task) from the Globus Transfer service,
            with 60 second timeout limit. If the task is ACTIVE after time runs
            out 'task_wait' returns False, and True otherwise.
            """
            while not tc.task_wait(task_id, 60, 60):
                if datetime.utcnow() - request_time >= timedelta(
                        seconds=GlobusTransfer.deadline):
                    break
                task = tc.get_task(task_id)
                if task.get("is_paused"):
                    break
            """
            The Globus transfer job (task) has been finished (SUCCEEDED or FAILED),
            or is still active (ACTIVE). Check if the transfer SUCCEEDED or FAILED.
            """
            task = tc.get_task(task_id)
            if task["status"] == "SUCCEEDED":
                bps = task.get("effective_bytes_per_second")
                rate = GlobusTransfer.convert_bps(bps)
                logger.info(
                    f"Globus transfer {task_id}, from {source.uuid}{src_path} to {destination.uuid}{dst_path} succeeded"
                )
                logger.info(
                    "{} - files transferred: {}, bytes transferred: {}, effective transfer rate: {}, faults: {}"
                    .format(self, task.get("files_transferred"),
                            task.get("bytes_transferred"), rate,
                            task.get("faults")))
                faults = task.get("faults")
                message = None
                if faults > 0:
                    message = self.get_error_events(tc, task_id)
                t = TransferModel(uuid=task_id,
                                  set=self.set,
                                  source=source.name,
                                  destination=destination.name,
                                  dataset=self.dataset,
                                  status=SUCCEEDED,
                                  rate=bps,
                                  message=message,
                                  faults=faults)
            elif task.get("status") == "ACTIVE":
                if task.get("is_paused"):
                    pause_info = tc.task_pause_info(task_id)
                    paused_rules = pause_info.get("pause_rules")
                    reason = paused_rules[0].get("message")
                    message = f"The task was paused. Reason: {reason}"
                    status = PAUSED
                    logger.info("{} - {}".format(self, message))
                else:
                    message = f"The task reached a {GlobusTransfer.deadline} second deadline\n"
                    events = tc.task_event_list(task_id,
                                                num_results=5,
                                                filter="is_error:1")
                    message += self.get_error_events(tc, task_id)
                    status = DEADLINE
                    logger.warning("{} - faults: {}, error: {}".format(
                        self, task.get("faults"), message))
                tc.cancel_task(task_id)
                t = TransferModel(uuid=task_id,
                                  set=self.set,
                                  source=source.name,
                                  destination=destination.name,
                                  dataset=self.dataset,
                                  status=status,
                                  message=message,
                                  faults=task.get("faults"))
            else:
                t = TransferModel(uuid=task_id,
                                  set=self.set,
                                  source=source.name,
                                  destination=destination.name,
                                  dataset=self.dataset,
                                  status=FAILED)
        except Exception as e:
            logger.error(f"{self} - exception: {e}")
            t = TransferModel(set=self.set,
                              source=source.name,
                              destination=destination.name,
                              dataset=self.dataset,
                              status=EXCEPTION,
                              message=f"Globus SDK Exception: {e}")

        self.done = True
        GlobusTransfer.transfers2do -= 1
        session = DBSession()
        session.add(t)
        session.commit()
        DBSession.remove()
        self.release()
        logger.info(f"{self} - finished")
Example #24
0
 def trigger_preparation(self, jobspec):
     # get logger
     tmpLog = core_utils.make_logger(_logger,
                                     'PandaID={0}'.format(jobspec.PandaID),
                                     method_name='trigger_preparation')
     tmpLog.debug('start')
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(
             jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc:
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(
         tmpLog, self.tc, label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(
             str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     self.Globus_srcPath = queueConfig.preparator['Globus_srcPath']
     self.srcEndpoint = queueConfig.preparator['srcEndpoint']
     self.Globus_dstPath = self.basePath
     #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath']
     self.dstEndpoint = queueConfig.preparator['dstEndpoint']
     # get input files
     files = []
     lfns = []
     inFiles = jobspec.get_input_file_attributes(skip_ready=True)
     for inLFN, inFile in iteritems(inFiles):
         # set path to each file
         inFile['path'] = mover_utils.construct_file_path(
             self.basePath, inFile['scope'], inLFN)
         dstpath = inFile['path']
         # check if path exists if not create it.
         if not os.access(self.basePath, os.F_OK):
             os.makedirs(self.basePath)
         # create the file paths for the Globus source and destination endpoints
         Globus_srcpath = mover_utils.construct_file_path(
             self.Globus_srcPath, inFile['scope'], inLFN)
         Globus_dstpath = mover_utils.construct_file_path(
             self.Globus_dstPath, inFile['scope'], inLFN)
         files.append({
             'scope': inFile['scope'],
             'name': inLFN,
             'Globus_dstPath': Globus_dstpath,
             'Globus_srcPath': Globus_srcpath
         })
         lfns.append(inLFN)
     tmpLog.debug('files[] {0}'.format(files))
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errStr = ''
             if not tmpStatsrc:
                 errStr += ' source Endpoint not activated '
             if not tmpStatdst:
                 errStr += ' destination Endpoint not activated '
             tmpLog.error(errStr)
             return False, errStr
         # both endpoints activated now prepare to transfer data
         if len(files) > 0:
             tdata = TransferData(self.tc,
                                  self.srcEndpoint,
                                  self.dstEndpoint,
                                  label=label,
                                  sync_level="checksum")
             # loop over all input files and add
             for myfile in files:
                 tdata.add_item(myfile['Globus_srcPath'],
                                myfile['Globus_dstPath'])
             # submit
             transfer_result = self.tc.submit_transfer(tdata)
             # check status code and message
             tmpLog.debug(str(transfer_result))
             if transfer_result['code'] == "Accepted":
                 # succeeded
                 # set transfer ID which are used for later lookup
                 transferID = transfer_result['task_id']
                 jobspec.set_groups_to_files(
                     {transferID: {
                         'lfns': lfns,
                         'groupStatus': 'active'
                     }})
                 tmpLog.debug('done')
                 return True, ''
             else:
                 return False, transfer_result['message']
         # if no files to transfer return True
         return True, 'No files to transfer'
     except:
         errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
         return errStat, {}
Example #25
0
def run_agent():
    # In[2]:

    search_client = globus_auth.login("https://search.api.globus.org/", "globus_search")
    transfer_client = transfer_auth.login()


    # In[3]:

    dataset_name = "pppdb"
    local_ep = "0bc1cb98-d2af-11e6-9cb1-22000a1e3b52"
    dest_ep = "82f1b5c6-6e9b-11e5-ba47-22000b92c6ec"
    dest_path = "/sample_data/"+dataset_name+"_train.csv"
    timeout = False
    timeout_intervals = 10
    interval_time = 10
    verbose = True

    # In[4]:

    if not local_ep:
        pgr_res = transfer_client.endpoint_search(filter_scope="my-endpoints")
        ep_candidates = pgr_res.data
        if len(ep_candidates) < 1: #Nothing found
            raise GlobusError("Error: No local endpoints found")
        elif len(ep_candidates) == 1: #Exactly one candidate
            if ep_candidates[0]["gcp_connected"] == False: #Is GCP, is not on
                raise GlobusError("Error: Globus Connect is not running")
            else: #Is GCServer or GCP and connected
                local_ep = ep_candidates[0]["id"]
        else: # >1 found
            #Filter out disconnected GCP
            ep_connections = [candidate for candidate in ep_candidates if candidate["gcp_connected"] is not False]
            #Recheck list
            if len(ep_connections) < 1: #Nothing found
                raise GlobusError("Error: No local endpoints running")
            elif len(ep_connections) == 1: #Exactly one candidate
                if ep_connections[0]["gcp_connected"] == False: #Is GCP, is not on
                    raise GlobusError("Error: Globus Connect is not active")
                else: #Is GCServer or GCP and connected
                    local_ep = ep_connections[0]["id"]
            else: # >1 found
                #Prompt user
                print("Multiple endpoints found:")
                count = 0
                for ep in ep_connections:
                    count += 1
                    print(count, ": ", ep["display_name"], "\t", ep["id"])
                print("\nPlease choose the endpoint on this machine")
                ep_num = 0
                while ep_num == 0:
                    usr_choice = input("Enter the number of the correct endpoint (-1 to cancel): ")
                    try:
                        ep_choice = int(usr_choice)
                        if ep_choice == -1: #User wants to quit
                            ep_num = -1 #Will break out of while to exit program
                        elif ep_choice in range(1, count+1): #Valid selection
                            ep_num = ep_choice #Break out of while, return valid ID
                        else: #Invalid number
                            print("Invalid selection")
                    except:
                        print("Invalid input")

                if ep_num == -1:
                    print("Cancelling")
                    sys.exit()
                local_ep = ep_connections[ep_num-1]["id"]


    # # Fetch and aggregate records into training set

    # In[5]:

    count = 0
    num_processed = 0
    data_list = []
    while True:
        query = {
            "q": ("mdf_source_name:"+dataset_name+" AND mdf_node_type:record AND "
            "globus_scroll_id:(>=" + str(count) + " AND <" + str(count + 10000) + ")"),
            "advanced": True,
            "limit": 10000
        }
        raw_res = search_client.structured_search(query)
        search_res = gmeta_pop(raw_res, True)
        for res in search_res:
            data_dict = json.loads(res["data"]["raw"])
            data_list.append(data_dict)
        num_ret = len(search_res)
        if num_ret:
            num_processed += num_ret
            count += 10000
        else:
            break
    if verbose:
        print("Processed:", len(data_list), "/", num_processed, "|", len(data_list) - num_processed)


    # In[6]:

    df = pd.DataFrame(data_list)
    df.to_csv(os.path.join(os.getcwd(), "temp_train.csv"))


    # # Upload to NCSA endpoint

    # In[7]:

    try:
        tdata = TransferData(transfer_client, local_ep, dest_ep, verify_checksum=True, notify_on_succeeded=False, notify_on_failed=False, notify_on_inactive=False)
        tdata.add_item(os.path.join(os.getcwd(), "temp_train.csv"), dest_path)
        res = transfer_client.submit_transfer(tdata)
        if res["code"] != "Accepted":
            raise GlobusError("Failed to transfer files: Transfer " + res["code"])
        else:
            intervals = 0
            while not transfer_client.task_wait(res["task_id"], timeout=interval_time, polling_interval=interval_time):
                for event in transfer_client.task_event_list(res["task_id"]):
                    if event["is_error"]:
                        transfer_client.cancel_task(res["task_id"])
                        raise GlobusError("Error: " + event["description"])
                    if timeout and intervals >= timeout_intervals:
                        transfer_client.cancel_task(res["task_id"])
                        raise GlobusError("Transfer timed out.")
                    intervals += 1
    except Exception as e:
        raise
    finally:
        os.remove(os.path.join(os.getcwd(), "temp_train.csv"))


    # # Update dataset entry

    # In[8]:

    query = {
        "q": "mdf_source_name:"+dataset_name+" AND mdf_node_type:dataset",
        "advanced": True
    }
    raw_res = search_client.structured_search(query)
    search_res = gmeta_pop(raw_res)
    if len(search_res) != 1:
        raise ValueError("Incorrect number of results: " + str(len(search_res)))
    ingest = search_res[0]
    ingest["globus_subject"] = raw_res["gmeta"][0]["subject"]
    ingest["acl"] = ["public"]
    ingest["http://materialsdatafacility.org/#training_set"] = {
        "http://materialsdatafacility.org/#endpoint": dest_ep,
        "http://materialsdatafacility.org/#path": dest_path,
        "http://materialsdatafacility.org/#https": "https://data.materialsdatafacility.org" + dest_path
    }
    gmeta = format_gmeta([format_gmeta(ingest)])

    gmeta = json.loads(json.dumps(gmeta).replace("mdf-publish.publication.community", "http://globus.org/publish-terms/#publication/community"))


    # In[9]:

    search_client.ingest(gmeta)


    # # Check ingest

    # In[10]:

    query = {
        "q": "mdf_source_name:"+dataset_name+" AND mdf_node_type:dataset",
        "advanced": True
    }
    raw_res = search_client.structured_search(query)
    search_res = gmeta_pop(raw_res, True)


    # In[11]:

    if verbose:
        print("Verification:\n", json.dumps(search_res[0]["training_set"], sort_keys=True, indent=4, separators=(',', ': ')))
    on_refresh=update_tokens_file_on_refresh)

try:
    tc = TransferClient(authorizer=authorizer)
except:
    print(
        "ERROR: TransferClient() call failed!  Unable to call the Globus transfer interface with the provided auth info!"
    )
    sys.exit(-1)
# print(transfer)

# Now we should have auth, try setting up a transfer.

tdata = TransferData(tc,
                     source_endpoint_id,
                     destination_endpoint_id,
                     label="DCDE Relion transfer",
                     sync_level="size")

tdata.add_item(source_dir, dest_dir, recursive=True)

transfer_result = tc.submit_transfer(tdata)

print("task_id =", transfer_result["task_id"])

while not tc.task_wait(
        transfer_result['task_id'], timeout=1200, polling_interval=10):
    print(".", end="")
print("\n{} completed!".format(transfer_result['task_id']))

os.listdir(path=dest_dir)
   tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,tc,dstEndpoint)
   tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,tc,srcEndpoint)
   if tmpStatsrc and tmpStatdst:
      errStr = 'source Endpoint and destination Endpoint activated'
      tmpLog.debug(errStr)
   else:
      errStr = ''
      if not tmpStatsrc :
         errStr += ' source Endpoint not activated '
      if not tmpStatdst :
         errStr += ' destination Endpoint not activated '
      tmpLog.error(errStr)
      sys.exit(2)
   # both endpoints activated now prepare to transfer data
   # We are sending test files from our destination machine to the source machine
   tdata = TransferData(tc,dstEndpoint,srcEndpoint,sync_level="checksum")
except:
   errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
   sys.exit(1)
 
# loop over the job id's creating various JobSpecs
jobSpec_list = []
for job_id in range(begin_job_id,end_job_id+1):
   jobSpec = JobSpec()
   jobSpec.jobParams = {
                        'scopeLog': 'panda',
                        'logFile': 'log',
                        }
   jobSpec.computingSite = queueName
   jobSpec.PandaID = job_id
   jobSpec.modificationTime = datetime.datetime.now()
    def check_status(self, jobspec):
        # make logger
        tmpLog = self.make_logger(_logger, 'PandaID={0} ThreadID={1}'.format(jobspec.PandaID,threading.current_thread().ident),
                                  method_name='check_status')
        tmpLog.debug('start')
        # show the dummy transfer id and set to a value with the PandaID if needed.
        tmpLog.debug('self.dummy_transfer_id = {}'.format(self.dummy_transfer_id))
        if self.dummy_transfer_id == '{0}_{1}'.format(dummy_transfer_id_base,'XXXX') :
            old_dummy_transfer_id = self.dummy_transfer_id
            self.dummy_transfer_id = '{0}_{1}'.format(dummy_transfer_id_base,jobspec.PandaID)
            tmpLog.debug('Change self.dummy_transfer_id  from {0} to {1}'.format(old_dummy_transfer_id,self.dummy_transfer_id))
 
        # default return
        tmpRetVal = (True, '')
        # set flag if have db lock
        have_db_lock = False 
        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
        # get the queueConfig and corresponding objStoreID_ES
        queueConfigMapper = QueueConfigMapper()
        queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
        # check queueConfig stager section to see if jobtype is set
        if 'jobtype' in queueConfig.stager:
            if queueConfig.stager['jobtype'] == "Yoda" :
                self.Yodajob = True
        # set the location of the files in fileSpec.objstoreID
        # see file /cvmfs/atlas.cern.ch/repo/sw/local/etc/agis_ddmendpoints.json 
        self.objstoreID = int(queueConfig.stager['objStoreID_ES'])
        if self.Yodajob :
            self.pathConvention = int(queueConfig.stager['pathConvention'])
            tmpLog.debug('Yoda Job - PandaID = {0} objstoreID = {1} pathConvention ={2}'.format(jobspec.PandaID,self.objstoreID,self.pathConvention))
        else:
            self.pathConvention = None
            tmpLog.debug('PandaID = {0} objstoreID = {1}'.format(jobspec.PandaID,self.objstoreID))
        # test we have a Globus Transfer Client
        if not self.tc :
            errStr = 'failed to get Globus Transfer Client'
            tmpLog.error(errStr)
            return False, errStr
        # set transferID to None
        transferID = None
        # get the scope of the log files
        outfileattrib = jobspec.get_output_file_attributes()
        scopeLog = 'xxxx'
        for key in outfileattrib.keys():
            if "log.tgz" in key :
                scopeLog = outfileattrib[key]['scope']
        # get transfer groups
        groups = jobspec.get_groups_of_output_files()
        tmpLog.debug('jobspec.get_groups_of_output_files() = : {0}'.format(groups))
        # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests
        for dummy_transferID in groups:
            # skip if valid transfer ID not dummy one
            if validate_transferid(dummy_transferID) :
                continue
            # lock for 120 sec
            tmpLog.debug('attempt to set DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
            have_db_lock = self.dbInterface.get_object_lock(dummy_transferID, lock_interval=120)
            if not have_db_lock:
                # escape since locked by another thread
                msgStr = 'escape since locked by another thread'
                tmpLog.debug(msgStr)
                return None, msgStr
            # refresh group information since that could have been updated by another thread before getting the lock
            tmpLog.debug('self.dbInterface.refresh_file_group_info(jobspec)')
            self.dbInterface.refresh_file_group_info(jobspec)
            # get transfer groups again with refreshed info
            tmpLog.debug('After db refresh call groups=jobspec.get_groups_of_output_files()')
            groups = jobspec.get_groups_of_output_files()
            tmpLog.debug('jobspec.get_groups_of_output_files() = : {0}'.format(groups))
            # the dummy transfer ID is still there
            if dummy_transferID in groups:
                groupUpdateTime = groups[dummy_transferID]['groupUpdateTime']
                # get files with the dummy transfer ID across jobs
                fileSpecs = self.dbInterface.get_files_with_group_id(dummy_transferID)
                # submit transfer if there are more than 10 files or the group was made before more than 10 min
                msgStr = 'dummy_transferID = {0}  number of files = {1}'.format(dummy_transferID,len(fileSpecs))
                tmpLog.debug(msgStr)
                if len(fileSpecs) >= 10 or \
                        groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10):
                    tmpLog.debug('prepare to transfer files')
                    # submit transfer and get a real transfer ID
                    # set the Globus destination Endpoint id and path will get them from Agis eventually  
                    #self.Globus_srcPath = queueConfig.stager['Globus_srcPath']
                    self.srcEndpoint = queueConfig.stager['srcEndpoint']
                    self.Globus_srcPath = self.basePath
                    self.Globus_dstPath = queueConfig.stager['Globus_dstPath']
                    self.dstEndpoint = queueConfig.stager['dstEndpoint']
                    # Test the endpoints and create the transfer data class 
                    errMsg = None
                    try:
                        # Test endpoints for activation
                        tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint)
                        tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint)
                        if tmpStatsrc and tmpStatdst:
                            errStr = 'source Endpoint and destination Endpoint activated'
                            tmpLog.debug(errStr)
                        else:
                            errMsg = ''
                            if not tmpStatsrc :
                                errMsg += ' source Endpoint not activated '
                            if not tmpStatdst :
                                errMsg += ' destination Endpoint not activated '
                            # release process lock
                            tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                            self.have_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                            if not self.have_db_lock:
                                errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID)
                            tmpLog.error(errMsg)
                            tmpRetVal = (None,errMsg)
                            return tmpRetVal
                        # both endpoints activated now prepare to transfer data
                        tdata = None
                        tdata = TransferData(self.tc,
                                             self.srcEndpoint,
                                             self.dstEndpoint,
                                             sync_level="checksum")
                    except:
                        errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
                        # release process lock
                        tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                        release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                        if not release_db_lock:
                            errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID)
                        tmpLog.error(errMsg)
                        tmpRetVal = (errStat, errMsg)
                        return tmpRetVal
                    # loop over all files
                    ifile = 0
                    for fileSpec in fileSpecs:
                        logfile = False
                        scope ='panda'
                        if fileSpec.scope is not None :
                            scope = fileSpec.scope
                        # for Yoda job set the scope to transient for non log files
                        if self.Yodajob :
                            scope = 'transient'
                        if fileSpec.fileType == "log" :
                            logfile = True
                            scope = scopeLog
                        # only print to log file first 25 files
                        if ifile < 25 :
                            msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format(fileSpec.lfn, fileSpec.scope)
                            tmpLog.debug(msgStr)
                        if ifile == 25 :
                            msgStr = "printed first 25 files skipping the rest".format(fileSpec.lfn, fileSpec.scope)
                            tmpLog.debug(msgStr)
                        hash = hashlib.md5()
                        hash.update('%s:%s' % (scope, fileSpec.lfn))
                        hash_hex = hash.hexdigest()
                        correctedscope = "/".join(scope.split('.'))
                        srcURL = fileSpec.path
                        dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath,
                                                                                   scope=correctedscope,
                                                                                   hash1=hash_hex[0:2],
                                                                                   hash2=hash_hex[2:4],
                                                                                   lfn=fileSpec.lfn)
                        if logfile :
                            tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL))
                        if ifile < 25 :
                            tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL))
                        # add files to transfer object - tdata
                        if os.access(srcURL, os.R_OK):
                            if ifile < 25 :
                                tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL))
                            tdata.add_item(srcURL,dstURL)
                        else:
                            errMsg = "source file {} does not exist".format(srcURL)
                            # release process lock
                            tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                            release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                            if not release_db_lock:
                                errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID)
                            tmpLog.error(errMsg)
                            tmpRetVal = (False,errMsg)
                            return tmpRetVal
                        ifile += 1
                    # submit transfer 
                    tmpLog.debug('Number of files to transfer - {}'.format(len(tdata['DATA'])))
                    try:
                        transfer_result = self.tc.submit_transfer(tdata)
                        # check status code and message
                        tmpLog.debug(str(transfer_result))
                        if transfer_result['code'] == "Accepted":
                            # succeeded
                            # set transfer ID which are used for later lookup
                            transferID = transfer_result['task_id']
                            tmpLog.debug('successfully submitted id={0}'.format(transferID))
                            # set status for files
                            self.dbInterface.set_file_group(fileSpecs, transferID, 'running')
                            msgStr = 'submitted transfer with ID={0}'.format(transferID)
                            tmpLog.debug(msgStr)
                        else:
                            # release process lock
                            tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                            release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                            if not release_db_lock:
                                errMsg = 'Could not release DB lock for {}'.format(dummy_transferID)
                                tmpLog.error(errMsg)
                            tmpRetVal = (None, transfer_result['message'])
                            return tmpRetVal
                    except Exception as e:
                        errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
                        # release process lock
                        tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                        release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                        if not release_db_lock:
                            errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID)
                        tmpLog.error(errMsg)
                        return errStat, errMsg
                else:
                    msgStr = 'wait until enough files are pooled'
                    tmpLog.debug(msgStr)
                # release the lock
                tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) 
                if release_db_lock:
                    tmpLog.debug('released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                    have_db_lock = False
                else:
                    msgStr += ' - Could not release DB lock for {}'.format(dummy_transferID)
                    tmpLog.error(msgStr)
                # return None to retry later
                return None, msgStr
            # release the db lock if needed
            if have_db_lock:
                tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) 
                if release_db_lock:
                    tmpLog.debug('released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                    have_db_lock = False 
                else:
                    msgStr += ' - Could not release DB lock for {}'.format(dummy_transferID)
                    tmpLog.error(msgStr)
                    return None, msgStr
        # check transfer with real transfer IDs
        # get transfer groups 
        tmpLog.debug("groups = jobspec.get_groups_of_output_files()")
        groups = jobspec.get_groups_of_output_files()
        tmpLog.debug('Number of transfer groups - {0}'.format(len(groups)))
        tmpLog.debug('transfer groups any state - {0}'.format(groups))
        if len(groups) == 0:
            tmpLog.debug("jobspec.get_groups_of_output_files(skip_done=True) returned no files ")
            tmpLog.debug("check_status return status - True ")
            return True,''

        for transferID in groups:
            # allow only valid UUID
            if validate_transferid(transferID) :
                # get transfer task
                tmpStat, transferTasks = globus_utils.get_transfer_task_by_id(tmpLog,self.tc,transferID)
                # return a temporary error when failed to get task
                if not tmpStat:
                    errStr = 'failed to get transfer task; tc = %s; transferID = %s' % (str(self.tc),str(transferID))
                    tmpLog.error(errStr)
                    return None, errStr
                # return a temporary error when task is missing 
                if transferID not in transferTasks:
                    errStr = 'transfer task ID - {} is missing'.format(transferID)
                    tmpLog.error(errStr)
                    return None, errStr
                # succeeded in finding a transfer task by tranferID
                if transferTasks[transferID]['status'] == 'SUCCEEDED':
                    tmpLog.debug('transfer task {} succeeded'.format(transferID))
                    self.set_FileSpec_objstoreID(jobspec, self.objstoreID, self.pathConvention)
                    if self.changeFileStatusOnSuccess:
                        self.set_FileSpec_status(jobspec, 'finished')
                    return True, ''
                # failed
                if transferTasks[transferID]['status'] == 'FAILED':
                    errStr = 'transfer task {} failed'.format(transferID)
                    tmpLog.error(errStr)
                    self.set_FileSpec_status(jobspec,'failed')
                    return False, errStr
                # another status
                tmpStr = 'transfer task {0} status: {1}'.format(transferID,transferTasks[transferID]['status'])
                tmpLog.debug(tmpStr)
                return None, ''
        # end of loop over transfer groups
        tmpLog.debug('End of loop over transfers groups - ending check_status function')
        return None,'no valid transfer id found'
Example #29
0
 def trigger_preparation(self, jobspec):
     # get logger
     tmpLog = self.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_preparation')
     tmpLog.debug('start')               
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc :
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(tmpLog,self.tc,label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually  
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     self.Globus_srcPath = queueConfig.preparator['Globus_srcPath']
     self.srcEndpoint = queueConfig.preparator['srcEndpoint']
     self.Globus_dstPath = self.basePath
     #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath']
     self.dstEndpoint = queueConfig.preparator['dstEndpoint']
     # get input files
     files = []
     lfns = []
     inFiles = jobspec.get_input_file_attributes(skip_ready=True)
     for inLFN, inFile in iteritems(inFiles):
         # set path to each file
         inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
         dstpath = inFile['path']
         # check if path exists if not create it.
         if not os.access(self.basePath, os.F_OK):
             os.makedirs(self.basePath)
         # create the file paths for the Globus source and destination endpoints 
         Globus_srcpath = mover_utils.construct_file_path(self.Globus_srcPath, inFile['scope'], inLFN)
         Globus_dstpath = mover_utils.construct_file_path(self.Globus_dstPath, inFile['scope'], inLFN)
         files.append({'scope': inFile['scope'],
                       'name': inLFN,
                       'Globus_dstPath': Globus_dstpath,
                       'Globus_srcPath': Globus_srcpath})
         lfns.append(inLFN)
     tmpLog.debug('files[] {0}'.format(files))
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errStr = ''
             if not tmpStatsrc :
                 errStr += ' source Endpoint not activated '
             if not tmpStatdst :
                 errStr += ' destination Endpoint not activated '
             tmpLog.error(errStr)
             return False,errStr
         # both endpoints activated now prepare to transfer data
         if len(files) > 0:
             tdata = TransferData(self.tc,
                                  self.srcEndpoint,
                                  self.dstEndpoint,
                                  label=label,
                                  sync_level="checksum")
             # loop over all input files and add 
             for myfile in files:
                 tdata.add_item(myfile['Globus_srcPath'],myfile['Globus_dstPath'])
             # submit
             transfer_result = self.tc.submit_transfer(tdata)
             # check status code and message
             tmpLog.debug(str(transfer_result))
             if transfer_result['code'] == "Accepted":
                 # succeeded
                 # set transfer ID which are used for later lookup
                 transferID = transfer_result['task_id']
                 jobspec.set_groups_to_files({transferID: {'lfns': lfns, 'groupStatus': 'active'}})
                 tmpLog.debug('done')
                 return True,''
             else:
                 return False,transfer_result['message']
         # if no files to transfer return True
         return True, 'No files to transfer'
     except:
         errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
         return errStat, {}
    def check_stage_out_status(self, jobspec):
        # make logger
        tmpLog = self.make_logger(_logger,
                                  'PandaID={0} ThreadID={1}'.format(
                                      jobspec.PandaID,
                                      threading.current_thread().ident),
                                  method_name='check_stage_out_status')
        tmpLog.debug('start')
        # show the dummy transfer id and set to a value with the PandaID if needed.
        tmpLog.debug('self.dummy_transfer_id = {}'.format(
            self.dummy_transfer_id))
        if self.dummy_transfer_id == '{0}_{1}'.format(dummy_transfer_id_base,
                                                      'XXXX'):
            old_dummy_transfer_id = self.dummy_transfer_id
            self.dummy_transfer_id = '{0}_{1}'.format(dummy_transfer_id_base,
                                                      jobspec.PandaID)
            tmpLog.debug(
                'Change self.dummy_transfer_id  from {0} to {1}'.format(
                    old_dummy_transfer_id, self.dummy_transfer_id))

        # default return
        tmpRetVal = (True, '')
        # set flag if have db lock
        have_db_lock = False
        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(
                jobspec.computingSite))
        # get the queueConfig and corresponding objStoreID_ES
        queueConfigMapper = QueueConfigMapper()
        queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
        # check queueConfig stager section to see if jobtype is set
        if 'jobtype' in queueConfig.stager:
            if queueConfig.stager['jobtype'] == "Yoda":
                self.Yodajob = True
        # set the location of the files in fileSpec.objstoreID
        # see file /cvmfs/atlas.cern.ch/repo/sw/local/etc/agis_ddmendpoints.json
        self.objstoreID = int(queueConfig.stager['objStoreID_ES'])
        if self.Yodajob:
            self.pathConvention = int(queueConfig.stager['pathConvention'])
            tmpLog.debug(
                'Yoda Job - PandaID = {0} objstoreID = {1} pathConvention ={2}'
                .format(jobspec.PandaID, self.objstoreID, self.pathConvention))
        else:
            self.pathConvention = None
            tmpLog.debug('PandaID = {0} objstoreID = {1}'.format(
                jobspec.PandaID, self.objstoreID))
        # test we have a Globus Transfer Client
        if not self.tc:
            errStr = 'failed to get Globus Transfer Client'
            tmpLog.error(errStr)
            return False, errStr
        # set transferID to None
        transferID = None
        # get the scope of the log files
        outfileattrib = jobspec.get_output_file_attributes()
        scopeLog = 'xxxx'
        for key in outfileattrib.keys():
            if "log.tgz" in key:
                scopeLog = outfileattrib[key]['scope']
        # get transfer groups
        groups = jobspec.get_groups_of_output_files()
        tmpLog.debug(
            'jobspec.get_groups_of_output_files() = : {0}'.format(groups))
        # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests
        for dummy_transferID in groups:
            # skip if valid transfer ID not dummy one
            if validate_transferid(dummy_transferID):
                continue
            # lock for 120 sec
            tmpLog.debug(
                'attempt to set DB lock for self.id - {0} dummy_transferID - {1}'
                .format(self.id, dummy_transferID))
            have_db_lock = self.dbInterface.get_object_lock(dummy_transferID,
                                                            lock_interval=120)
            if not have_db_lock:
                # escape since locked by another thread
                msgStr = 'escape since locked by another thread'
                tmpLog.debug(msgStr)
                return None, msgStr
            # refresh group information since that could have been updated by another thread before getting the lock
            tmpLog.debug('self.dbInterface.refresh_file_group_info(jobspec)')
            self.dbInterface.refresh_file_group_info(jobspec)
            # get transfer groups again with refreshed info
            tmpLog.debug(
                'After db refresh call groups=jobspec.get_groups_of_output_files()'
            )
            groups = jobspec.get_groups_of_output_files()
            tmpLog.debug(
                'jobspec.get_groups_of_output_files() = : {0}'.format(groups))
            # the dummy transfer ID is still there
            if dummy_transferID in groups:
                groupUpdateTime = groups[dummy_transferID]['groupUpdateTime']
                # get files with the dummy transfer ID across jobs
                fileSpecs = self.dbInterface.get_files_with_group_id(
                    dummy_transferID)
                # submit transfer if there are more than 10 files or the group was made before more than 10 min
                msgStr = 'dummy_transferID = {0}  number of files = {1}'.format(
                    dummy_transferID, len(fileSpecs))
                tmpLog.debug(msgStr)
                if len(fileSpecs) >= 10 or \
                        groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10):
                    tmpLog.debug('prepare to transfer files')
                    # submit transfer and get a real transfer ID
                    # set the Globus destination Endpoint id and path will get them from Agis eventually
                    #self.Globus_srcPath = queueConfig.stager['Globus_srcPath']
                    self.srcEndpoint = queueConfig.stager['srcEndpoint']
                    self.Globus_srcPath = self.basePath
                    self.Globus_dstPath = queueConfig.stager['Globus_dstPath']
                    self.dstEndpoint = queueConfig.stager['dstEndpoint']
                    # Test the endpoints and create the transfer data class
                    errMsg = None
                    try:
                        # Test endpoints for activation
                        tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(
                            tmpLog, self.tc, self.srcEndpoint)
                        tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
                            tmpLog, self.tc, self.dstEndpoint)
                        if tmpStatsrc and tmpStatdst:
                            errStr = 'source Endpoint and destination Endpoint activated'
                            tmpLog.debug(errStr)
                        else:
                            errMsg = ''
                            if not tmpStatsrc:
                                errMsg += ' source Endpoint not activated '
                            if not tmpStatdst:
                                errMsg += ' destination Endpoint not activated '
                            # release process lock
                            tmpLog.debug(
                                'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                                .format(self.id, dummy_transferID))
                            self.have_db_lock = self.dbInterface.release_object_lock(
                                dummy_transferID)
                            if not self.have_db_lock:
                                errMsg += ' - Could not release DB lock for {}'.format(
                                    dummy_transferID)
                            tmpLog.error(errMsg)
                            tmpRetVal = (None, errMsg)
                            return tmpRetVal
                        # both endpoints activated now prepare to transfer data
                        tdata = None
                        tdata = TransferData(self.tc,
                                             self.srcEndpoint,
                                             self.dstEndpoint,
                                             sync_level="checksum")
                    except:
                        errStat, errMsg = globus_utils.handle_globus_exception(
                            tmpLog)
                        # release process lock
                        tmpLog.debug(
                            'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                            .format(self.id, dummy_transferID))
                        release_db_lock = self.dbInterface.release_object_lock(
                            dummy_transferID)
                        if not release_db_lock:
                            errMsg += ' - Could not release DB lock for {}'.format(
                                dummy_transferID)
                        tmpLog.error(errMsg)
                        tmpRetVal = (errStat, errMsg)
                        return tmpRetVal
                    # loop over all files
                    ifile = 0
                    for fileSpec in fileSpecs:
                        logfile = False
                        scope = 'panda'
                        if fileSpec.scope is not None:
                            scope = fileSpec.scope
                        # for Yoda job set the scope to transient for non log files
                        if self.Yodajob:
                            scope = 'transient'
                        if fileSpec.fileType == "log":
                            logfile = True
                            scope = scopeLog
                        # only print to log file first 25 files
                        if ifile < 25:
                            msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format(
                                fileSpec.lfn, fileSpec.scope)
                            tmpLog.debug(msgStr)
                        if ifile == 25:
                            msgStr = "printed first 25 files skipping the rest".format(
                                fileSpec.lfn, fileSpec.scope)
                            tmpLog.debug(msgStr)
                        hash = hashlib.md5()
                        hash.update('%s:%s' % (scope, fileSpec.lfn))
                        hash_hex = hash.hexdigest()
                        correctedscope = "/".join(scope.split('.'))
                        srcURL = fileSpec.path
                        dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(
                            endPoint=self.Globus_dstPath,
                            scope=correctedscope,
                            hash1=hash_hex[0:2],
                            hash2=hash_hex[2:4],
                            lfn=fileSpec.lfn)
                        if logfile:
                            tmpLog.debug('src={srcURL} dst={dstURL}'.format(
                                srcURL=srcURL, dstURL=dstURL))
                        if ifile < 25:
                            tmpLog.debug('src={srcURL} dst={dstURL}'.format(
                                srcURL=srcURL, dstURL=dstURL))
                        # add files to transfer object - tdata
                        if os.access(srcURL, os.R_OK):
                            if ifile < 25:
                                tmpLog.debug("tdata.add_item({},{})".format(
                                    srcURL, dstURL))
                            tdata.add_item(srcURL, dstURL)
                        else:
                            errMsg = "source file {} does not exist".format(
                                srcURL)
                            # release process lock
                            tmpLog.debug(
                                'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                                .format(self.id, dummy_transferID))
                            release_db_lock = self.dbInterface.release_object_lock(
                                dummy_transferID)
                            if not release_db_lock:
                                errMsg += ' - Could not release DB lock for {}'.format(
                                    dummy_transferID)
                            tmpLog.error(errMsg)
                            tmpRetVal = (False, errMsg)
                            return tmpRetVal
                        ifile += 1
                    # submit transfer
                    tmpLog.debug('Number of files to transfer - {}'.format(
                        len(tdata['DATA'])))
                    try:
                        transfer_result = self.tc.submit_transfer(tdata)
                        # check status code and message
                        tmpLog.debug(str(transfer_result))
                        if transfer_result['code'] == "Accepted":
                            # succeeded
                            # set transfer ID which are used for later lookup
                            transferID = transfer_result['task_id']
                            tmpLog.debug(
                                'successfully submitted id={0}'.format(
                                    transferID))
                            # set status for files
                            self.dbInterface.set_file_group(
                                fileSpecs, transferID, 'running')
                            msgStr = 'submitted transfer with ID={0}'.format(
                                transferID)
                            tmpLog.debug(msgStr)
                        else:
                            # release process lock
                            tmpLog.debug(
                                'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                                .format(self.id, dummy_transferID))
                            release_db_lock = self.dbInterface.release_object_lock(
                                dummy_transferID)
                            if not release_db_lock:
                                errMsg = 'Could not release DB lock for {}'.format(
                                    dummy_transferID)
                                tmpLog.error(errMsg)
                            tmpRetVal = (None, transfer_result['message'])
                            return tmpRetVal
                    except Exception as e:
                        errStat, errMsg = globus_utils.handle_globus_exception(
                            tmpLog)
                        # release process lock
                        tmpLog.debug(
                            'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                            .format(self.id, dummy_transferID))
                        release_db_lock = self.dbInterface.release_object_lock(
                            dummy_transferID)
                        if not release_db_lock:
                            errMsg += ' - Could not release DB lock for {}'.format(
                                dummy_transferID)
                        tmpLog.error(errMsg)
                        return errStat, errMsg
                else:
                    msgStr = 'wait until enough files are pooled'
                    tmpLog.debug(msgStr)
                # release the lock
                tmpLog.debug(
                    'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                    .format(self.id, dummy_transferID))
                release_db_lock = self.dbInterface.release_object_lock(
                    dummy_transferID)
                if release_db_lock:
                    tmpLog.debug(
                        'released DB lock for self.id - {0} dummy_transferID - {1}'
                        .format(self.id, dummy_transferID))
                    have_db_lock = False
                else:
                    msgStr += ' - Could not release DB lock for {}'.format(
                        dummy_transferID)
                    tmpLog.error(msgStr)
                # return None to retry later
                return None, msgStr
            # release the db lock if needed
            if have_db_lock:
                tmpLog.debug(
                    'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                    .format(self.id, dummy_transferID))
                release_db_lock = self.dbInterface.release_object_lock(
                    dummy_transferID)
                if release_db_lock:
                    tmpLog.debug(
                        'released DB lock for self.id - {0} dummy_transferID - {1}'
                        .format(self.id, dummy_transferID))
                    have_db_lock = False
                else:
                    msgStr += ' - Could not release DB lock for {}'.format(
                        dummy_transferID)
                    tmpLog.error(msgStr)
                    return None, msgStr
        # check transfer with real transfer IDs
        # get transfer groups
        tmpLog.debug("groups = jobspec.get_groups_of_output_files()")
        groups = jobspec.get_groups_of_output_files()
        tmpLog.debug('Number of transfer groups - {0}'.format(len(groups)))
        tmpLog.debug('transfer groups any state - {0}'.format(groups))
        if len(groups) == 0:
            tmpLog.debug(
                "jobspec.get_groups_of_output_files(skip_done=True) returned no files "
            )
            tmpLog.debug("check_stage_out_status return status - True ")
            return True, ''

        for transferID in groups:
            # allow only valid UUID
            if validate_transferid(transferID):
                # get transfer task
                tmpStat, transferTasks = globus_utils.get_transfer_task_by_id(
                    tmpLog, self.tc, transferID)
                # return a temporary error when failed to get task
                if not tmpStat:
                    errStr = 'failed to get transfer task; tc = %s; transferID = %s' % (
                        str(self.tc), str(transferID))
                    tmpLog.error(errStr)
                    return None, errStr
                # return a temporary error when task is missing
                if transferID not in transferTasks:
                    errStr = 'transfer task ID - {} is missing'.format(
                        transferID)
                    tmpLog.error(errStr)
                    return None, errStr
                # succeeded in finding a transfer task by tranferID
                if transferTasks[transferID]['status'] == 'SUCCEEDED':
                    tmpLog.debug(
                        'transfer task {} succeeded'.format(transferID))
                    self.set_FileSpec_objstoreID(jobspec, self.objstoreID,
                                                 self.pathConvention)
                    if self.changeFileStatusOnSuccess:
                        self.set_FileSpec_status(jobspec, 'finished')
                    return True, ''
                # failed
                if transferTasks[transferID]['status'] == 'FAILED':
                    errStr = 'transfer task {} failed'.format(transferID)
                    tmpLog.error(errStr)
                    self.set_FileSpec_status(jobspec, 'failed')
                    return False, errStr
                # another status
                tmpStr = 'transfer task {0} status: {1}'.format(
                    transferID, transferTasks[transferID]['status'])
                tmpLog.debug(tmpStr)
                return None, ''
        # end of loop over transfer groups
        tmpLog.debug(
            'End of loop over transfers groups - ending check_stage_out_status function'
        )
        return None, 'no valid transfer id found'
Example #31
0
    def stage_upload_files(self, project_id, inbound_endpoint_id,
                           inbound_endpoint_path):
        if not self.transfer_client:
            error = "Missing authenticated transfer client"
            self.log.info("Error: " + str(error))
            raise AuthenticationException(error)

        conn = DbConnection().connection()
        r = DbConnection().interface()
        proj = r.table('projects').get(project_id).run(conn)
        if not proj:
            error = "Unable to find project, " + project_id
            self.log.info("Error: " + str(error))
            raise NoSuchItem(error)

        if not proj['owner'] == self.mc_user_id:
            error = "Current user is not project owner, " + self.mc_user_id + ", " + project_id
            self.log.info("Error: " + str(error))
            raise AccessNotAllowedException(error)

        transfer = self.transfer_client
        self.log.info(
            "Starting upload staging... function: stage_upload_files(inbound_endpoint_id)"
        )
        self.log.info("Materials Commons user = "******"Globus transfer endpoint uuid = " + inbound_endpoint_id)

        # confirm target and inbound endpoints
        target_endpoint = transfer.get_endpoint(self.mc_target_ep_id)
        inbound_endpoint = transfer.get_endpoint(inbound_endpoint_id)

        if not target_endpoint:
            error = "Missing target endpoint, Materials Commons staging"
            self.log.info("Error: " + str(error))
            raise NoSuchItem(error)

        if not inbound_endpoint:
            error = "Missing inbound endpoint, user's input for staging"
            self.log.info("Error: " + str(error))
            raise NoSuchItem(error)

        target_endpoint_id = target_endpoint['id']

        self.log.info("About to confirm inbound path: " +
                      inbound_endpoint_path)
        # confirm inbound path
        try:
            transfer.operation_ls(inbound_endpoint_id,
                                  path=inbound_endpoint_path)
        except TransferAPIError as error:
            self.log.info("Error: " + str(error))
            raise error

        self.log.info("Finished confirm of inbound path: " +
                      inbound_endpoint_path)
        # database entries and one-time-directory on target
        dir_name = "transfer-" + project_id
        response = transfer.operation_mkdir(target_endpoint_id, dir_name)
        if not response["code"] == "DirectoryCreated":
            error = "Unable to create directory on target endpoint " + dir_name
            self.log.info("Error: " + str(error))
            raise TransferAPIError(error)

        self.log.info("Found for target endpoint: " +
                      target_endpoint['display_name'])
        self.log.info("    - target endpoint id " + target_endpoint_id)
        self.log.info("Found inbound endpoint: " +
                      inbound_endpoint['display_name'] + " from " +
                      inbound_endpoint["owner_string"])
        self.log.info("Initiating transfer to target directory: " + dir_name)

        # initiate transfer
        transfer_label = "Transfer from " + inbound_endpoint['display_name'] + \
                         "Materials Commons"
        transfer_data = TransferData(transfer,
                                     inbound_endpoint_id,
                                     target_endpoint_id,
                                     label=transfer_label,
                                     sync_level="checksum")
        transfer_data.add_item(inbound_endpoint_path,
                               "/" + dir_name,
                               recursive=True)
        transfer_result = transfer.submit_transfer(transfer_data)
        self.log.info("Finished upload staging: successfully completed")
        return_result = {}
        keys = ["code", "message", "task_id", "submission_id"]
        for key in keys:
            return_result[key] = transfer_result[key]

        return return_result
Example #32
0
 def trigger_stage_out(self, jobspec):
     # make logger
     tmpLog = self.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_stage_out')
     tmpLog.debug('start')
     # default return
     tmpRetVal = (True, '')
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc :
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(tmpLog,self.tc,label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually  
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     #self.Globus_srcPath = queueConfig.stager['Globus_srcPath']
     self.srcEndpoint = queueConfig.stager['srcEndpoint']
     self.Globus_srcPath = self.basePath
     self.Globus_dstPath = queueConfig.stager['Globus_dstPath']
     self.dstEndpoint = queueConfig.stager['dstEndpoint']
     # Test the endpoints and create the transfer data class 
     errMsg = None
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errMsg = ''
             if not tmpStatsrc :
                 errMsg += ' source Endpoint not activated '
             if not tmpStatdst :
                 errMsg += ' destination Endpoint not activated '
             tmpLog.error(errMsg)
             tmpRetVal = (False,errMsg)
             return tmpRetVal
         # both endpoints activated now prepare to transfer data
         tdata = TransferData(self.tc,
                              self.srcEndpoint,
                              self.dstEndpoint,
                              label=label,
                              sync_level="checksum")
     except:
         errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
         tmpRetVal = (errStat, errMsg)
         return tmpRetVal
     # loop over all files
     fileAttrs = jobspec.get_output_file_attributes()
     lfns = []
     for fileSpec in jobspec.outFiles:
         scope = fileAttrs[fileSpec.lfn]['scope']
         hash = hashlib.md5()
         hash.update('%s:%s' % (scope, fileSpec.lfn))
         hash_hex = hash.hexdigest()
         correctedscope = "/".join(scope.split('.'))
         srcURL = fileSpec.path
         dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath,
                                                                    scope=correctedscope,
                                                                    hash1=hash_hex[0:2],
                                                                    hash2=hash_hex[2:4],
                                                                    lfn=fileSpec.lfn)
         tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL))
         # add files to transfer object - tdata
         if os.access(srcURL, os.R_OK):
             tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL))
             tdata.add_item(srcURL,dstURL)
             lfns.append(fileSpec.lfn)
         else:
             errMsg = "source file {} does not exist".format(srcURL)
             tmpLog.error(errMsg)
             tmpRetVal = (False,errMsg)
             return tmpRetVal
     # submit transfer 
     try:
         transfer_result = self.tc.submit_transfer(tdata)
         # check status code and message
         tmpLog.debug(str(transfer_result))
         if transfer_result['code'] == "Accepted":
             # succeeded
             # set transfer ID which are used for later lookup
             transferID = transfer_result['task_id']
             tmpLog.debug('successfully submitted id={0}'.format(transferID))
             jobspec.set_groups_to_files({transferID: {'lfns': lfns, 'groupStatus': 'active'}})
             # set
             for fileSpec in jobspec.outFiles:
                 if fileSpec.fileAttributes == None:
                     fileSpec.fileAttributes = {}
                     fileSpec.fileAttributes['transferID'] = transferID
         else:
             tmpRetVal = (False, transfer_result['message'])
     except Exception as e:
         errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
         if errMsg is None:
             errtype, errvalue = sys.exc_info()[:2]
             errMsg = "{0} {1}".format(errtype.__name__, errvalue)
         tmpRetVal = (errStat,errMsg)
     # return
     tmpLog.debug('done')
     return tmpRetVal
    tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
        tmpLog, tc, srcEndpoint)
    if tmpStatsrc and tmpStatdst:
        errStr = 'source Endpoint and destination Endpoint activated'
        tmpLog.debug(errStr)
    else:
        errStr = ''
        if not tmpStatsrc:
            errStr += ' source Endpoint not activated '
        if not tmpStatdst:
            errStr += ' destination Endpoint not activated '
        tmpLog.error(errStr)
        sys.exit(2)
    # We are sending test files from our destination machine to the source machine
    # both endpoints activated now prepare to transfer data
    tdata = TransferData(tc, dstEndpoint, srcEndpoint, sync_level="checksum")
except:
    errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
    sys.exit(1)

# create JobSpec
jobSpec = JobSpec()
jobSpec.jobParams = {
    'scopeLog': 'panda',
    'logFile': 'log',
}
jobSpec.computingSite = queueName
jobSpec.PandaID = job_id
jobSpec.modificationTime = datetime.datetime.now()
realDataset = 'panda.sgotest.' + uuid.uuid4().hex
ddmEndPointIn = 'BNL-OSG2_DATADISK'
Example #34
0
def transfer_command(
    batch,
    sync_level,
    recursive,
    destination,
    source,
    label,
    preserve_mtime,
    verify_checksum,
    encrypt,
    submission_id,
    dry_run,
    delete,
    deadline,
    skip_activation_check,
    notify,
    perf_cc,
    perf_p,
    perf_pp,
    perf_udt,
):
    """
    Executor for `globus transfer`
    """
    source_endpoint, cmd_source_path = source
    dest_endpoint, cmd_dest_path = destination

    if recursive and batch:
        raise click.UsageError(
            ("You cannot use --recursive in addition to --batch. "
             "Instead, use --recursive on lines of --batch input "
             "which need it"))

    if (cmd_source_path is None or cmd_dest_path is None) and (not batch):
        raise click.UsageError(
            ("transfer requires either SOURCE_PATH and DEST_PATH or "
             "--batch"))

    # because python can't handle multiple **kwargs expansions in a single
    # call, we need to get a little bit clever
    # both the performance options (of which there are a few), and the
    # notification options (also there are a few) have elements which should be
    # omitted in some cases
    # notify comes to us clean, perf opts need more care
    # put them together into a dict before passing to TransferData
    kwargs = {}
    perf_opts = dict((k, v) for (k, v) in dict(
        perf_cc=perf_cc, perf_p=perf_p, perf_pp=perf_pp,
        perf_udt=perf_udt).items() if v is not None)
    kwargs.update(perf_opts)
    kwargs.update(notify)

    client = get_client()
    transfer_data = TransferData(client,
                                 source_endpoint,
                                 dest_endpoint,
                                 label=label,
                                 sync_level=sync_level,
                                 verify_checksum=verify_checksum,
                                 preserve_timestamp=preserve_mtime,
                                 encrypt_data=encrypt,
                                 submission_id=submission_id,
                                 delete_destination_extra=delete,
                                 deadline=deadline,
                                 skip_activation_check=skip_activation_check,
                                 **kwargs)

    if batch:

        @click.command()
        @click.option("--recursive", "-r", is_flag=True)
        @click.argument("source_path", type=TaskPath(base_dir=cmd_source_path))
        @click.argument("dest_path", type=TaskPath(base_dir=cmd_dest_path))
        def process_batch_line(dest_path, source_path, recursive):
            """
            Parse a line of batch input and turn it into a transfer submission
            item.
            """
            transfer_data.add_item(str(source_path),
                                   str(dest_path),
                                   recursive=recursive)

        shlex_process_stdin(
            process_batch_line,
            ("Enter transfers, line by line, as\n\n"
             "    [--recursive] SOURCE_PATH DEST_PATH\n"),
        )
    else:
        transfer_data.add_item(cmd_source_path,
                               cmd_dest_path,
                               recursive=recursive)

    if dry_run:
        formatted_print(
            transfer_data,
            response_key="DATA",
            fields=(
                ("Source Path", "source_path"),
                ("Dest Path", "destination_path"),
                ("Recursive", "recursive"),
            ),
        )
        # exit safely
        return

    # autoactivate after parsing all args and putting things together
    # skip this if skip-activation-check is given
    if not skip_activation_check:
        autoactivate(client, source_endpoint, if_expires_in=60)
        autoactivate(client, dest_endpoint, if_expires_in=60)

    res = client.submit_transfer(transfer_data)
    formatted_print(
        res,
        text_format=FORMAT_TEXT_RECORD,
        fields=(("Message", "message"), ("Task ID", "task_id")),
    )
Example #35
0
def transfer_command(
    batch,
    sync_level,
    recursive,
    destination,
    source,
    label,
    preserve_mtime,
    verify_checksum,
    encrypt,
    submission_id,
    dry_run,
    delete,
    deadline,
    skip_activation_check,
    notify,
    perf_cc,
    perf_p,
    perf_pp,
    perf_udt,
):
    """
    Executor for `globus transfer`
    """
    source_endpoint, cmd_source_path = source
    dest_endpoint, cmd_dest_path = destination

    if recursive and batch:
        raise click.UsageError(
            (
                "You cannot use --recursive in addition to --batch. "
                "Instead, use --recursive on lines of --batch input "
                "which need it"
            )
        )

    if (cmd_source_path is None or cmd_dest_path is None) and (not batch):
        raise click.UsageError(
            ("transfer requires either SOURCE_PATH and DEST_PATH or " "--batch")
        )

    # because python can't handle multiple **kwargs expansions in a single
    # call, we need to get a little bit clever
    # both the performance options (of which there are a few), and the
    # notification options (also there are a few) have elements which should be
    # omitted in some cases
    # notify comes to us clean, perf opts need more care
    # put them together into a dict before passing to TransferData
    kwargs = {}
    perf_opts = dict(
        (k, v)
        for (k, v) in dict(
            perf_cc=perf_cc, perf_p=perf_p, perf_pp=perf_pp, perf_udt=perf_udt
        ).items()
        if v is not None
    )
    kwargs.update(perf_opts)
    kwargs.update(notify)

    client = get_client()
    transfer_data = TransferData(
        client,
        source_endpoint,
        dest_endpoint,
        label=label,
        sync_level=sync_level,
        verify_checksum=verify_checksum,
        preserve_timestamp=preserve_mtime,
        encrypt_data=encrypt,
        submission_id=submission_id,
        delete_destination_extra=delete,
        deadline=deadline,
        skip_activation_check=skip_activation_check,
        **kwargs
    )

    if batch:

        @click.command()
        @click.option("--recursive", "-r", is_flag=True)
        @click.argument("source_path", type=TaskPath(base_dir=cmd_source_path))
        @click.argument("dest_path", type=TaskPath(base_dir=cmd_dest_path))
        def process_batch_line(dest_path, source_path, recursive):
            """
            Parse a line of batch input and turn it into a transfer submission
            item.
            """
            transfer_data.add_item(
                str(source_path), str(dest_path), recursive=recursive
            )

        shlex_process_stdin(
            process_batch_line,
            (
                "Enter transfers, line by line, as\n\n"
                "    [--recursive] SOURCE_PATH DEST_PATH\n"
            ),
        )
    else:
        transfer_data.add_item(cmd_source_path, cmd_dest_path, recursive=recursive)

    if dry_run:
        formatted_print(
            transfer_data,
            response_key="DATA",
            fields=(
                ("Source Path", "source_path"),
                ("Dest Path", "destination_path"),
                ("Recursive", "recursive"),
            ),
        )
        # exit safely
        return

    # autoactivate after parsing all args and putting things together
    # skip this if skip-activation-check is given
    if not skip_activation_check:
        autoactivate(client, source_endpoint, if_expires_in=60)
        autoactivate(client, dest_endpoint, if_expires_in=60)

    res = client.submit_transfer(transfer_data)
    formatted_print(
        res,
        text_format=FORMAT_TEXT_RECORD,
        fields=(("Message", "message"), ("Task ID", "task_id")),
    )