Ejemplo n.º 1
0
def bulk_submit_xfer(submitjob, recursive=False):
    cfg = load_config()
    client_id = cfg['globus']['apps'][GLOBUS_AUTH_APP]['client_id']
    auth_client = NativeAppAuthClient(client_id)
    refresh_token = cfg['globus']['apps'][GLOBUS_AUTH_APP]['refresh_token']
    source_endpoint_id = submitjob[0].get('metadata').get(
        'source_globus_endpoint_id')
    destination_endpoint_id = submitjob[0].get('metadata').get(
        'dest_globus_endpoint_id')
    authorizer = RefreshTokenAuthorizer(refresh_token=refresh_token,
                                        auth_client=auth_client)
    tc = TransferClient(authorizer=authorizer)
    # as both endpoints are expected to be Globus Server endpoints, send auto-activate commands for both globus endpoints
    a = auto_activate_endpoint(tc, source_endpoint_id)
    logging.debug('a: %s' % a)
    if a != 'AlreadyActivated':
        return None

    b = auto_activate_endpoint(tc, destination_endpoint_id)
    logging.debug('b: %s' % b)
    if b != 'AlreadyActivated':
        return None

    # make job_label for task a timestamp
    x = datetime.now()
    job_label = x.strftime('%Y%m%d%H%M%s')

    # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source
    # and destination files, and only transfer files that have different checksums are transferred. verify_checksum=True means
    # that after a file is transferred, Globus will compute checksums on the source and destination files to verify that the
    # file was transferred correctly.  If the checksums do not match, it will redo the transfer of that file.
    tdata = TransferData(tc,
                         source_endpoint_id,
                         destination_endpoint_id,
                         label=job_label,
                         sync_level="checksum")

    for file in submitjob:
        source_path = file.get('sources')[0]
        dest_path = file.get('destinations')[0]
        filesize = file['metadata']['filesize']
        # TODO: support passing a recursive parameter to Globus
        # md5 = file['metadata']['md5']
        # tdata.add_item(source_path, dest_path, recursive=False, external_checksum=md5)
        tdata.add_item(source_path, dest_path, recursive=False)
        record_counter(
            'daemons.conveyor.transfer_submitter.globus.transfers.submit.filesize',
            filesize)

    # logging.info('submitting transfer...')
    transfer_result = tc.submit_transfer(tdata)
    # logging.info("task_id =", transfer_result["task_id"])

    return transfer_result["task_id"]
Ejemplo n.º 2
0
def globus_download_files(client: globus_sdk.TransferClient, endpoint_id: str,
                          files: tuple) -> None:
    """Gets the details of the files in the list
    Arguments:
        client: the Globus transfer client to use
        endpoint_id: the ID of the endpoint to access
        files: the list of files to fetch
    Return:
        Returns an updated list of file details
    """
    # Fetch metadata and pull information out of it
    file_transfers = {}
    for one_file in files:
        globus_save_path = os.path.join(LOCAL_SAVE_PATH,
                                        os.path.basename(one_file))
        if not os.path.exists(globus_save_path):
            globus_remote_path = one_file
            file_transfers[globus_remote_path] = globus_save_path

    if file_transfers:
        have_exception = False
        cnt = 1

        for remote_path, save_path in file_transfers.items():
            try:
                logging.info("Trying transfer %s: %s", str(cnt),
                             str(remote_path))
                cnt += 1
                transfer_setup = globus_sdk.TransferData(
                    client,
                    endpoint_id,
                    GLOBUS_LOCAL_ENDPOINT_ID,
                    label="Get image file",
                    sync_level="checksum")
                transfer_setup.add_item(remote_path, save_path)
                transfer_request = client.submit_transfer(transfer_setup)
                task_result = client.task_wait(transfer_request['task_id'],
                                               timeout=600,
                                               polling_interval=5)
                if not task_result:
                    raise RuntimeError("Unable to retrieve JSON metadata: %s" %
                                       remote_path)
                if not os.path.exists(save_path):
                    raise RuntimeError(
                        "Unable to find downloaded file at: %s" % save_path)

            except RuntimeError as ex:
                have_exception = True
                logging.warning("Failed to get image: %s", str(ex))
        if have_exception:
            raise RuntimeError("Unable to retrieve all files individually")
        del file_transfers
Ejemplo n.º 3
0
def submit_transfer():
    """
    - Take the data returned by the Browse Endpoint helper page
      and make a Globus transfer request.
    - Send the user to the transfer status page with the task id
      from the transfer.
    """
    browse_endpoint_form = request.form

    selected = session['form']['datasets']
    filtered_datasets = [ds for ds in datasets if ds['id'] in selected]

    transfer_tokens = session['tokens']['transfer.api.globus.org']

    authorizer = RefreshTokenAuthorizer(
        transfer_tokens['refresh_token'],
        load_portal_client(),
        access_token=transfer_tokens['access_token'],
        expires_at=transfer_tokens['expires_at_seconds'])

    transfer = TransferClient(authorizer=authorizer)

    source_endpoint_id = app.config['DATASET_ENDPOINT_ID']
    source_endpoint_base = app.config['DATASET_ENDPOINT_BASE']
    destination_endpoint_id = browse_endpoint_form['endpoint_id']
    destination_folder = browse_endpoint_form.get('folder[0]')

    transfer_data = TransferData(transfer_client=transfer,
                                 source_endpoint=source_endpoint_id,
                                 destination_endpoint=destination_endpoint_id,
                                 label=browse_endpoint_form.get('label'))

    for ds in filtered_datasets:
        source_path = source_endpoint_base + ds['path']
        dest_path = browse_endpoint_form['path']

        if destination_folder:
            dest_path += destination_folder + '/'

        dest_path += ds['name'] + '/'

        transfer_data.add_item(source_path=source_path,
                               destination_path=dest_path,
                               recursive=True)

    transfer.endpoint_autoactivate(source_endpoint_id)
    transfer.endpoint_autoactivate(destination_endpoint_id)
    task_id = transfer.submit_transfer(transfer_data)['task_id']

    flash('Transfer request submitted successfully. Task ID: ' + task_id)

    return (redirect(url_for('transfer_status', task_id=task_id)))
Ejemplo n.º 4
0
def bulk_submit_xfer(submitjob, recursive=False, logger=logging.log):
    cfg = load_config(logger=logger)
    client_id = cfg['globus']['apps'][GLOBUS_AUTH_APP]['client_id']
    auth_client = NativeAppAuthClient(client_id)
    refresh_token = cfg['globus']['apps'][GLOBUS_AUTH_APP]['refresh_token']
    source_endpoint_id = submitjob[0].get('metadata').get(
        'source_globus_endpoint_id')
    destination_endpoint_id = submitjob[0].get('metadata').get(
        'dest_globus_endpoint_id')
    authorizer = RefreshTokenAuthorizer(refresh_token=refresh_token,
                                        auth_client=auth_client)
    tc = TransferClient(authorizer=authorizer)

    # make job_label for task a timestamp
    now = datetime.datetime.now()
    job_label = now.strftime('%Y%m%d%H%M%s')

    # retrieve globus_task_deadline value to enforce time window to complete transfers
    # default is 2880 minutes or 48 hours
    globus_task_deadline = config_get_int('conveyor', 'globus_task_deadline',
                                          False, 2880)
    deadline = now + datetime.timedelta(minutes=globus_task_deadline)

    # from Globus... sync_level=checksum means that before files are transferred, Globus will compute checksums on the source
    # and destination files, and only transfer files that have different checksums are transferred. verify_checksum=True means
    # that after a file is transferred, Globus will compute checksums on the source and destination files to verify that the
    # file was transferred correctly.  If the checksums do not match, it will redo the transfer of that file.
    tdata = TransferData(tc,
                         source_endpoint_id,
                         destination_endpoint_id,
                         label=job_label,
                         sync_level="checksum",
                         deadline=str(deadline))

    for file in submitjob:
        source_path = file.get('sources')[0]
        dest_path = file.get('destinations')[0]
        filesize = file['metadata']['filesize']
        # TODO: support passing a recursive parameter to Globus
        # md5 = file['metadata']['md5']
        # tdata.add_item(source_path, dest_path, recursive=False, external_checksum=md5)
        tdata.add_item(source_path, dest_path, recursive=False)
        record_counter(
            'daemons.conveyor.transfer_submitter.globus.transfers.submit.filesize',
            filesize)

    # logging.info('submitting transfer...')
    transfer_result = tc.submit_transfer(tdata)
    logger(logging.INFO, "transfer_result: %s" % transfer_result)

    return transfer_result["task_id"]
Ejemplo n.º 5
0
class UOCloudTransferClient:
    def __init__(self, config: UOCloudSyncConfig):
        confidential_client = ConfidentialAppAuthClient(
            client_id=config.get_client_id(), client_secret=config.get_client_secret())
        scopes = "urn:globus:auth:scope:transfer.api.globus.org:all"
        cc_authorizer = ClientCredentialsAuthorizer(confidential_client, scopes)
        # create a new client
        self._transfer_client = TransferClient(authorizer=cc_authorizer)
        self._src_endpoint = None
        self._dest_endpoint = None

    def get_endpoint_id(self, endpoint_name: str):
        endpoints = self._transfer_client.endpoint_search(filter_fulltext=endpoint_name)
        # Just return the first result. Hope it is right!
        for ep in endpoints:
            return ep['id']

    def transfer_data(self, src_endpoint: str, src_path: Union[str, Path, PathLike],
                      dest_endpoint: str, dest_path: Union[str, Path, PathLike]):
        self._src_endpoint = src_endpoint
        self._dest_endpoint = dest_endpoint
        src_endpoint_id = self.get_endpoint_id(src_endpoint)
        if not src_endpoint_id:
            print(f'ERROR: Unable to find source endpoint id for: "{self._src_endpoint}"')
            return

        dest_endpoint_id = self.get_endpoint_id(dest_endpoint)
        if not dest_endpoint_id:
            print(f'ERROR: Unable to find destination endpoint id for: "{self._dest_endpoint}"')
            return

        transfer_data = TransferData(self._transfer_client,
                                     src_endpoint_id,
                                     dest_endpoint_id,
                                     encrypt_data=True)
        transfer_data.add_item(src_path, dest_path, recursive=True)
        try:
            print(
                f'Submitting a transfer task from {self._src_endpoint}:{src_path} to {self._dest_endpoint}:{dest_path}')
            task = self._transfer_client.submit_transfer(transfer_data)
        except TransferAPIError as e:
            print(str(e))
            sys.exit(1)
        task_id = task['task_id']
        print(f'\tWaiting for transfer to complete with task_id: {task_id}')
        while not self._transfer_client.task_wait(task_id=task_id, timeout=3600, polling_interval=60):
            print('.', end='')

        print('Transferred files:')
        for info in self._transfer_client.task_successful_transfers(task_id=task_id, num_results=None):
            print("\t{} -> {}".format(info["source_path"], info["destination_path"]))
Ejemplo n.º 6
0
def globus_download_files(client: globus_sdk.TransferClient, endpoint_id: str,
                          files: tuple) -> None:
    """Gets the details of the files in the list
    Arguments:
        client: the Globus transfer client to use
        endpoint_id: the ID of the endpoint to access
        files: the list of files to fetch
    Return:
        Returns an updated list of file details
    """
    # Fetch metadata and pull information out of it
    file_transfers = {}
    for one_file in files:
        globus_save_path = os.path.join(LOCAL_SAVE_PATH,
                                        os.path.basename(one_file))
        if not os.path.exists(globus_save_path):
            globus_remote_path = one_file
            file_transfers[globus_remote_path] = globus_save_path

    if file_transfers:
        have_exception = False
        cnt = 1

        resp = subprocess.run(['icd', IRODS_LOCATION], stdout=subprocess.PIPE)
        if resp.returncode != 0:
            raise RuntimeError("Unable to change to iRODS location %s" %
                               IRODS_LOCATION)

        for remote_path, save_path in file_transfers.items():
            try:
                logging.info("Trying transfer %s: %s", str(cnt),
                             str(remote_path))
                cnt += 1
                transfer_setup = globus_sdk.TransferData(
                    client,
                    endpoint_id,
                    GLOBUS_LOCAL_ENDPOINT_ID,
                    label="Get image file",
                    sync_level="checksum")
                transfer_setup.add_item(remote_path, save_path)
                transfer_request = client.submit_transfer(transfer_setup)
                task_result = client.task_wait(transfer_request['task_id'],
                                               timeout=600,
                                               polling_interval=5)
                if not task_result:
                    raise RuntimeError("Unable to retrieve file: %s" %
                                       remote_path)
                if not os.path.exists(save_path):
                    raise RuntimeError(
                        "Unable to find downloaded file at: %s" % save_path)

                local_dir = os.getcwd()
                os.chdir(os.path.dirname(save_path))
                print("Uploading file to irods: %s", save_path)
                resp = subprocess.run(
                    ['iput', '-K', '-f',
                     os.path.basename(save_path)],
                    stdout=subprocess.PIPE)
                if resp.returncode != 0:
                    os.chdir(local_dir)
                    raise RuntimeError("Unable to load file to iRODS %s" %
                                       save_path)
                os.chdir(local_dir)
                print("    removing uploaded file")
                os.remove(save_path)

            except RuntimeError as ex:
                have_exception = True
                logging.warning("Failed to get image: %s", str(ex))
        if have_exception:
            raise RuntimeError("Unable to retrieve all files individually")
        del file_transfers
Ejemplo n.º 7
0
try:
    tc = TransferClient(authorizer=authorizer)
except:
    print(
        "ERROR: TransferClient() call failed!  Unable to call the Globus transfer interface with the provided auth info!"
    )
    sys.exit(-1)
# print(transfer)

# Now we should have auth, try setting up a transfer.

tdata = TransferData(tc,
                     source_endpoint_id,
                     destination_endpoint_id,
                     label="DCDE Relion transfer",
                     sync_level="size")

tdata.add_item(source_dir, dest_dir, recursive=True)

transfer_result = tc.submit_transfer(tdata)

print("task_id =", transfer_result["task_id"])

while not tc.task_wait(
        transfer_result['task_id'], timeout=1200, polling_interval=10):
    print(".", end="")
print("\n{} completed!".format(transfer_result['task_id']))

os.listdir(path=dest_dir)
Ejemplo n.º 8
0
def do_job(self,tokens, task_color,stage_in_source,stage_in_dest,stage_out_dest,stage_in_source_path,stage_in_dest_path,stage_out_dest_path):

    def post_refresh_message(token_data):
        print("I got called")
        requests.post('http://localhost:8081/api/messenger', headers={'content-type': 'application/json'},data=json.dumps({'key_message':token_data.by_resource_server['transfer.api.globus.org']['access_token'], 'task_id':task_id,'step':'1','task_color':task_color}))


    #socketio.emit('message_log', {'message_body':'Testing for emit'})

    auth_client = dill.loads(redis_store.get('auth_client'))

    #send json message with key special_message that include new access token
    #requests.post('http://localhost:8081/api/messenger', headers={'content-type': 'application/json'},data=json.dumps({'key_message':token_data.by_resource_server['transfer.api.globus.org']['access_token'], 'task_id':task_id}
    authorizer = globus_sdk.RefreshTokenAuthorizer(tokens['transfer.api.globus.org']['refresh_token'], auth_client,tokens['transfer.api.globus.org']['access_token'], expires_at=tokens['transfer.api.globus.org']['expires_at_seconds'],on_refresh=post_refresh_message)

    #stage_in_source = stage_in_source
    stage_in_destination= stage_in_dest
    stage_out_destination = stage_out_dest
    
    #stage_in_source_path = redis_store.get('stage_in_source_path').decode('utf-8')
    stage_in_destination_path = stage_in_dest_path
    stage_out_destination_path = stage_out_dest_path
    task_id = do_job.request.id


    tc = TransferClient(authorizer=authorizer)   
   

    #auth_client=load_auth_client()


    data = globus_sdk.TransferData(tc,stage_in_source, stage_in_destination,label="stagein")

    data.add_item(stage_in_source_path, stage_in_destination_path, True)
    
    status = tc.submit_transfer(data)

    requests.post('http://localhost:8081/api/messenger', headers={'content-type': 'application/json'},data=json.dumps({'message':'['+task_id+']Queue wait is done, now initiating Stage in....','task_id':task_id,'step':'2','task_color':task_color}))

    tc.task_wait(status["task_id"])#task id of the stage_in


    result_in=tc.get_task(status["task_id"])
    #print("The response for task is :")
    #print(result_in)

    complete_status = result_in['status']
    print("The complete status is :")
    print(complete_status)

    if complete_status == "SUCCEEDED":
        requests.post('http://localhost:8081/api/messenger', headers={'content-type': 'application/json'},data=json.dumps({'message':'['+task_id+']      Stage In succeeded', 'task_id':task_id,'step':'2','task_color':task_color}))

    else:
        requests.post('http://localhost:8081/api/messenger', headers={'content-type': 'application/json'},data=json.dumps({'message':'['+task_id+']      Stage In failed, canceling the job..... ','task_id':task_id,'step':'2','task_color':task_color}))
        # stop and delete the job
        raise Reject("Stage in Failed",requeue=False)

   

    

    #print to the log that job informations, with id, running the fake job
    requests.post('http://localhost:8081/api/messenger', headers={'content-type': 'application/json'},data=json.dumps({'message':'['+task_id+']Running the job','task_id':task_id,'step':'3','task_color':task_color}))
    
    time.sleep(3)

    #fetching new token
    #validate now active 


    #fake job is done
    requests.post('http://localhost:8081/api/messenger', headers={'content-type': 'application/json'},data=json.dumps({'message':'['+task_id+']Job is done','task_id':task_id,'step':'3','task_color':task_color}))


    requests.post('http://localhost:8081/api/messenger', headers={'content-type': 'application/json'},data=json.dumps({'message':'['+task_id+']      Initiating Stage out.... ','task_id':task_id,'step':'4','task_color':task_color}))

    
    #tc = TransferClient(authorizer=authorizer)   

    data = globus_sdk.TransferData(tc, stage_in_destination, stage_out_destination,label="stageout")

    data.add_item(stage_in_destination_path, stage_out_destination_path, True)

    #hopefully refresh token lambda called here or after here supposed to log refreshed ok
    status = tc.submit_transfer(data)

    
    tc.task_wait(status["task_id"])


    result_in=tc.get_task(status["task_id"])

    complete_status = result_in['status']

    if complete_status == "SUCCEEDED":
        requests.post('http://localhost:8081/api/messenger', headers={'content-type': 'application/json'},data=json.dumps({'message':'['+task_id+']      Stage Out succeeded ','task_id':task_id,'step':'4','task_color':task_color}))

    else:
        requests.post('http://localhost:8081/api/messenger', headers={'content-type': 'application/json'},data=json.dumps({'message':'['+task_id+']      Stage Out failed, canceling the job.....','task_id':task_id,'step':'4','task_color':task_color}))
        raise Reject("Stage out Failed",requeue=False)
Ejemplo n.º 9
0
def globus_transfer(  # noqa: C901
        remote_endpoint,
        remote_path,
        name,
        transfer_type,
        non_blocking=False):
    """
    Read the local globus endpoint UUID from ~/.zstash.ini.
    If the ini file does not exist, create an ini file with empty values,
    and try to find the local endpoint UUID based on the FQDN
    """
    ini_path = os.path.expanduser("~/.zstash.ini")
    ini = configparser.ConfigParser()
    local_endpoint = None
    if ini.read(ini_path):
        if "local" in ini.sections():
            local_endpoint = ini["local"].get("globus_endpoint_uuid")
    else:
        ini["local"] = {"globus_endpoint_uuid": ""}
        try:
            with open(ini_path, "w") as f:
                ini.write(f)
        except Exception as e:
            logger.error(e)
            sys.exit(1)
    if not local_endpoint:
        fqdn = socket.getfqdn()
        for pattern in regex_endpoint_map.keys():
            if re.fullmatch(pattern, fqdn):
                local_endpoint = regex_endpoint_map.get(pattern)
                break
    if not local_endpoint:
        logger.error(
            "{} does not have the local Globus endpoint set".format(ini_path))
        sys.exit(1)

    if remote_endpoint.upper() in hpss_endpoint_map.keys():
        remote_endpoint = hpss_endpoint_map.get(remote_endpoint.upper())

    if transfer_type == "get":
        src_ep = remote_endpoint
        src_path = os.path.join(remote_path, name)
        dst_ep = local_endpoint
        dst_path = os.path.join(os.getcwd(), name)
    else:
        src_ep = local_endpoint
        src_path = os.path.join(os.getcwd(), name)
        dst_ep = remote_endpoint
        dst_path = os.path.join(remote_path, name)

    subdir = os.path.basename(os.path.normpath(remote_path))
    subdir_label = re.sub("[^A-Za-z0-9_ -]", "", subdir)
    filename = name.split(".")[0]
    label = subdir_label + " " + filename

    native_client = NativeClient(
        client_id="6c1629cf-446c-49e7-af95-323c6412397f",
        app_name="Zstash",
        default_scopes=
        "openid urn:globus:auth:scope:transfer.api.globus.org:all",
    )
    native_client.login(no_local_server=True, refresh_tokens=True)
    transfer_authorizer = native_client.get_authorizers().get(
        "transfer.api.globus.org")
    tc = TransferClient(transfer_authorizer)

    for ep_id in [src_ep, dst_ep]:
        r = tc.endpoint_autoactivate(ep_id, if_expires_in=600)
        if r.get("code") == "AutoActivationFailed":
            logger.error(
                "The {} endpoint is not activated or the current activation expires soon. Please go to https://app.globus.org/file-manager/collections/{} and (re)activate the endpoint."
                .format(ep_id, ep_id))
            sys.exit(1)

    td = TransferData(
        tc,
        src_ep,
        dst_ep,
        label=label,
        sync_level="checksum",
        verify_checksum=True,
        preserve_timestamp=True,
        fail_on_quota_errors=True,
    )
    td.add_item(src_path, dst_path)
    try:
        task = tc.submit_transfer(td)
    except TransferAPIError as e:
        if e.code == "NoCredException":
            logger.error(
                "{}. Please go to https://app.globus.org/endpoints and activate the endpoint."
                .format(e.message))
        else:
            logger.error(e)
        sys.exit(1)
    except Exception as e:
        logger.error("Exception: {}".format(e))
        sys.exit(1)

    if non_blocking:
        return

    try:
        task_id = task.get("task_id")
        """
        A Globus transfer job (task) can be in one of the three states:
        ACTIVE, SUCCEEDED, FAILED. The script every 20 seconds polls a
        status of the transfer job (task) from the Globus Transfer service,
        with 20 second timeout limit. If the task is ACTIVE after time runs
        out 'task_wait' returns False, and True otherwise.
        """
        while not tc.task_wait(task_id, 20, 20):
            pass
        """
        The Globus transfer job (task) has been finished (SUCCEEDED or FAILED).
        Check if the transfer SUCCEEDED or FAILED.
        """
        task = tc.get_task(task_id)
        if task["status"] == "SUCCEEDED":
            logger.info(
                "Globus transfer {}, from {}{} to {}{} succeeded".format(
                    task_id, src_ep, src_path, dst_ep, dst_path))
        else:
            logger.error("Transfer FAILED")
    except TransferAPIError as e:
        if e.code == "NoCredException":
            logger.error(
                "{}. Please go to https://app.globus.org/endpoints and activate the endpoint."
                .format(e.message))
        else:
            logger.error(e)
        sys.exit(1)
    except Exception as e:
        logger.error("Exception: {}".format(e))
        sys.exit(1)
Ejemplo n.º 10
0
async def create_transfer_globus(transferObject: TransferBase,
                                 transfer_client: TransferClient,
                                 isFolder: bool = False):
    """This function verifies if globus authentication is present in session"""
    #transfer_client = await get_transfer_client(request)
    source = transferObject.source
    target = transferObject.target
    source_name = ''
    source_path = ''
    # example source: globus://fd9c190c-b824-11e9-98d7-0a63aa6b37da:/gridftp/pub/databases/eva/PRJEB6057/MOCH.population_sites.CHIR1_0.20140307_EVA_ss_IDs.fixed.vcf.gz
    if source:
        source_endpoint_id = source.split(':')[1].replace('/', '')
        source_path = source.split(':')[2]
        source_path_array = source_path.split('/')
        source_name = source_path_array[len(source_path_array) - 1]

    if target:
        target_endpoint_id = target.split(':')[1].replace('/', '')
        target_path = target.split(':')[2]

    if target_path.endswith('/'):
        if source_name:
            target_path = target_path + source_name

    transfer_response = None

    # source path ends with '/'
    if source_name == '':
        isFolder = True

    if transferObject.options:
        if 'recursive' in transferObject.options:
            if transferObject.options['recursive'] == "True":
                isFolder = True

    time = datetime.now().strftime("%d-%m-%Y %H-%M-%S")

    try:

        tdata = TransferData(transfer_client,
                             source_endpoint_id,
                             target_endpoint_id,
                             label='RDSDS ' + time,
                             sync_level="checksum")

        if isFolder:
            tdata.add_item(source_path, target_path, recursive=True)
        else:
            tdata.add_item(source_path, target_path)

        transfer_result = transfer_client.submit_transfer(tdata)

        transfer_result_json = json.loads(str(transfer_result))
        transfer_response = {'globus_response': transfer_result_json}
        transfer_response['status'] = 200
        rdsds_tracking_id = 'globus-' + transfer_result["task_id"]
        transfer_response['rdsds_tracking_id'] = rdsds_tracking_id
        transfer_response[
            'globus_status_url'] = 'https://app.globus.org/activity/' + transfer_result[
                "task_id"] + '/overview'

        return transfer_response

    except GlobusAPIError as e:
        # Error response from the REST service, check the code and message for
        # details.
        return handle_globus_api_error(e)
    except NetworkError:
        logging.error(("Network Failure. "
                       "Possibly a firewall or connectivity issue"))
        raise
    except GlobusError:
        logging.exception("Totally unexpected GlobusError!")
        raise
Ejemplo n.º 11
0
def transfer(sp,destination_endpoint_id,one_endpoint):
    tokens = None
    try:
        # if we already have tokens, load and use them
        tokens = load_tokens_from_file(TOKEN_FILE)
    except:
        pass

    if not tokens:
        # if we need to get tokens, start the Native App authentication process
        tokens = do_native_app_authentication(CLIENT_ID, REDIRECT_URI, SCOPES)

        try:
            save_tokens_to_file(TOKEN_FILE, tokens)
        except:
            pass

    transfer_tokens = tokens['transfer.api.globus.org']

    auth_client = NativeAppAuthClient(client_id=CLIENT_ID,environment='sandbox')

    authorizer = RefreshTokenAuthorizer(
        transfer_tokens['refresh_token'],
        auth_client,
        access_token=transfer_tokens['access_token'],
        expires_at=transfer_tokens['expires_at_seconds'],
        on_refresh=update_tokens_file_on_refresh)

    #transfer = TransferClient(authorizer=authorizer,environment='sandbox')
    tc = TransferClient(authorizer=authorizer, environment="sandbox")

    ##################---ENDPOINTS---###########################

    source_endpoint_id = '5a2e5704-b028-11e7-bdad-22000bdb2406' #sb vmtb4
    #source_endpoint_id = '55705028-aa15-11e7-bdad-22000bdb2406' #sb yulie7t
    #source_endpoint_id = 'b0b16296-88e7-11e7-a971-22000a92523b' #bare chameleon
    #source_endpoint_id = 'e5762bc2-8466-11e7-a8ed-22000a92523b' #large_chameleon
    #source_endpoint_id = '8b26cc0e-877b-11e7-a949-22000a92523b'#ubuntu-vm
    #source_endpoint_id = 'ad19b012-77cf-11e7-8b98-22000b9923ef'#chameleon
    # source_endpoint_id = raw_input('Input source endpoint UUID: ')
    
    #destination path
    ##############---SOURCE PATH---######################
    #source_path = '/home/parallels/stream_transfer/test_files/'
    #source_path = '/home/parallels/stream_transfer/zero_globus/test_files/'
    source_path = sp
    #source_path ='/home/cc/streaming/zero_globus/test_files/test.txt'
    #source_path = '/home/parallels/stream_transfer/zero_globus/test_files/test.txt'
    #destination path
    destination_path = '/~/'
    #destination_path = '/~/'+ sp.split("/")[-1] #use for one file
    ##if one_endpoint:
    ##    destination_path = '/projects/BrainImagingADSP/yzamora/'
    ##else:
    ##    destination_path = '/projects/BrainImagingADSP/yzamora/'+ sp.split("/")[-1] #use for one file
    #Using my sample UUID from globus tutorial
    #destination_endpoint_id = 'ddb59aef-6d04-11e5-ba46-22000b92c6ec' #globus
    #destination_endpoint_id = '5d1da0fe-3c07-11e7-bcfc-22000b9a448b' #laptop



    #tc.endpoint_autoactivate(source_endpoint_id)
    #tc.endpoint_autoactive(destination_endpoint_id)
    ep1 = tc.get_endpoint(destination_endpoint_id)
    tc.endpoint_autoactivate(destination_endpoint_id)
    #ep1 is setting the activated endpoint to be a variable to work with
    tc.endpoint_autoactivate(source_endpoint_id)

    label = "medium data transfer"
    #tdata = globus_sdk.TransferData(tc, source_endpoint_id, destination_endpoint_id,label=label, sync_level='0')
    tdata = globus_sdk.TransferData(tc, source_endpoint_id, destination_endpoint_id,label=label, perf_cc=3, sync_level=None, verify_checksum=False)
    #tdata = globus_sdk.TransferData(tc, source_endpoint_id, destination_endpoint_id,label=label)
    if one_endpoint:
        tdata.add_item(source_path,destination_path,recursive=True)
    else:
        tdata.add_item(source_path,destination_path,recursive=False)

    submit_result = tc.submit_transfer(tdata)
    print("Task ID:", submit_result["task_id"])
    """
    Checking for time completion using globus calls
    
    """
    #print("Completion time:", submit_result["completion_time"])

    #setup of the transfer, submits as a https post request
    #transfer_data = TransferData(transfer_client=tc,
    #                     source_endpoint=source_endpoint_id,
    #                     destination_endpoint=destination_endpoint_id,
    #                     label='Transfer',
    #                     sync_level='checksum')
    #transfer_data.add_item(source_path=source_path,destination_path=destination_path, recursive=False)
    #task_id=transfer.submit_transfer(transfer_data)['task_id']

    #waiting for file to transfer
    status = tc.get_task(submit_result["task_id"],fields="status")["status"]
    poll_interval = 2
    max_wait = 90
    wait_time = 0
    while not (status in ["SUCCEEDED", "FAILED"]):
        if (wait_time >= max_wait): break
        print("Task not yet complete (status {}), sleeping for {} seconds..." \
          .format(status, poll_interval))
        time.sleep(poll_interval)
        wait_time += poll_interval
        status = tc.get_task(submit_result["task_id"], fields="status")["status"]

    if status == "FAILED":
        print("WARNING! File transfer FAILED!")

    #deleting file after transfer
    if status == "SUCCEEDED":
        end_time = datetime.datetime.utcnow()
        start_time = end_time - datetime.timedelta(minutes=200)

    #limit = response objects
    #        data = tc.task_list(filter="type:TRANSFER,DELETE/request_time:%s,%s"
    #        % (start_time, end_time), limit=5)

        #print("File transfer SUCCEEDED, will delete file from local directory now")
        """ r = tc.task_list(num_results=1, filter="type:TRANSFER,DELETE")
Ejemplo n.º 12
0
def validate():
    params = request.json
    crawl_id = params["crawl_id"]
    globus_eid = params["globus_eid"]
    transfer_token = params["transfer_token"]
    source_destination = params["source_destination"]
    dataset_info = params["dataset_info"]  # To be implemented later

    client = boto3.client('sqs',
                          aws_access_key_id=os.environ["aws_access"],
                          aws_secret_access_key=os.environ["aws_secret"],
                          region_name='us-east-1')

    try:
        response = client.get_queue_url(
            QueueName=f'validate_{crawl_id}',
            QueueOwnerAWSAccountId=os.environ["aws_account_id"])
    except:  # Add SQS.Client.exceptions.QueueDoesNotExist error
        abort(400, "Invalid crawl ID")

    try:
        authorizer = AccessTokenAuthorizer(transfer_token)
        tc = TransferClient(authorizer=authorizer)
    except:  # Add exception
        abort(400, "Invalid transfer token")

    crawl_queue = response["QueueUrl"]

    date = datetime.datetime.now()
    file_name = date.strftime("%m_%d_%Y-%H_%M_%S") + ".txt"

    try:
        with open(file_name, "w") as f:

            while True:
                sqs_response = client.receive_message(
                    QueueUrl=crawl_queue,
                    MaxNumberOfMessages=1,  # To be toggled
                    WaitTimeSeconds=1)

                if "Messages" not in sqs_response:
                    # xtract_status = requests.get(f"{eb_url}/get_extract_status", json={"crawl_id": crawl_id})
                    # print("HERE")
                    # print(xtract_status.content)
                    # xtract_content = json.loads(xtract_status.content)
                    # # print(xtract_content)
                    #
                    # if xtract_content["IDLE"] == 0 and xtract_content["PENDING"] == 0:
                    break

                del_list = []

                for message in sqs_response["Messages"]:
                    message_body = message["Body"]

                    # PROCESS MESSAGE_BODY
                    f.write(message_body)
                    # print(message_body)

                    del_list.append({
                        'ReceiptHandle': message["ReceiptHandle"],
                        'Id': message["MessageId"]
                    })

                if len(del_list) > 0:
                    client.delete_message_batch(QueueUrl=crawl_queue,
                                                Entries=del_list)

        tdata = TransferData(
            tc,
            "5ecf6444-affc-11e9-98d4-0a63aa6b37da",  #TODO: Add source endpoint
            globus_eid,
            label=f"{crawl_id}")
        tdata.add_item(os.path.abspath(file_name),
                       os.path.join(source_destination, file_name))

        tc.endpoint_autoactivate(
            "5ecf6444-affc-11e9-98d4-0a63aa6b37da")  #TODO: Add source endpoint
        tc.endpoint_autoactivate(globus_eid)
        submit_result = tc.submit_transfer(tdata)

        while True:
            result = tc.get_task(submit_result['task_id'])
            if result.data["status"] == "SUCCEEDED":
                break
            elif result.data["status"] == "FAILED":
                raise RuntimeError  # TODO: Change this
            else:
                time.sleep(0.5)

    except Exception as e:
        print(e)
        abort(400, "Failed to validate")
    finally:
        os.remove(file_name)

    return "[200] Submitted"
Ejemplo n.º 13
0
class Transfer:
    '''
    Modified Transfer, add an option to pass refresh token to avoid the web login
    '''
    def __init__(self,
                 src_endpoint_name,
                 dst_endpoint_name,
                 transfer_rt=None,
                 log_lv=logging.INFO):
        log_format = '%(asctime)-15s %(levelname)s:\t  class:%(name)s %(message)s'
        logging.basicConfig(format=log_format)
        self.logger = logging.getLogger(self.__class__.__name__)
        self.logger.setLevel(log_lv)
        self.logger.debug('CLIENT_ID: {0}'.format(CLIENT_ID))
        self.client = NativeAppAuthClient(CLIENT_ID)
        self.client.oauth2_start_flow(refresh_tokens=True)

        if transfer_rt is not None:
            self.authorizer = RefreshTokenAuthorizer(transfer_rt, self.client)
        else:
            authorize_url = self.client.oauth2_get_authorize_url()
            print('Please go to this URL and login: {0}'.format(authorize_url))

            get_input = getattr(__builtins__, 'raw_input', input)
            auth_code = get_input(
                'Please enter the code you get after login here: ').strip()
            token_response = self.client.oauth2_exchange_code_for_tokens(
                auth_code)
            self.globus_auth_data = token_response.by_resource_server[
                'auth.globus.org']
            self.globus_transfer_data = token_response.by_resource_server[
                'transfer.api.globus.org']
            auth_token = self.globus_auth_data['access_token']
            transfer_token = self.globus_transfer_data['access_token']
            transfer_rt = self.globus_transfer_data['refresh_token']
            transfer_at = self.globus_transfer_data['access_token']
            expires_at_s = self.globus_transfer_data['expires_at_seconds']
            self.authorizer = RefreshTokenAuthorizer(transfer_rt,
                                                     self.client,
                                                     access_token=transfer_at,
                                                     expires_at=expires_at_s)
        self.transferClient = TransferClient(authorizer=self.authorizer)
        self.src_endpoint = None
        self.dst_endpoint = None
        for ep in self.transferClient.endpoint_search(
                filter_scope="shared-with-me"):
            if ep["display_name"] == src_endpoint_name:
                self.src_endpoint = ep
                self.logger.info('Source endpoint: [{0}] {1}'.format(
                    self.src_endpoint['id'],
                    self.src_endpoint['display_name']))
        if self.src_endpoint is None:
            self.logger.error(
                'No endpoint shared with you with name: {0}'.format(
                    src_endpoint_name))
            raise LookupError
        for ep in self.transferClient.endpoint_search(
                filter_scope="my-endpoints"):
            if ep['display_name'] == dst_endpoint_name:
                self.dst_endpoint = ep
                self.logger.info('Destination endpoint: [{0}] {1}'.format(
                    self.dst_endpoint['id'],
                    self.dst_endpoint['display_name']))
        if self.dst_endpoint is None:
            self.logger.error('You don\'t have endpoint named: {0}'.format(
                dst_endpoint_name))
            raise LookupError

    def transfer_dir(self, src_dir, dst_dir):
        transfer_data = TransferData(self.transferClient,
                                     self.src_endpoint['id'],
                                     self.dst_endpoint['id'])
        transfer_data.add_item(src_dir, dst_dir, recursive=True)
        result = self.transferClient.submit_transfer(transfer_data)
        self.logger.info('task [{0}] {1}'.format(result['task_id'],
                                                 result['code']))
        return result

    def transfer_file(self, src_file, dst_file):
        transfer_data = TransferData(self.transferClient,
                                     self.src_endpoint['id'],
                                     self.dst_endpoint['id'])
        transfer_data.add_item(src_file, dst_file)
        result = self.transferClient.submit_transfer(transfer_data)
        self.logger.info('task_id [{0}] {1}'.format(result['task_id'],
                                                    result['code']))
        return result

    def ls_src_dir(self, path, ls_filter=''):
        # using iteration to get every entry from result
        # an entry contain two keys: 'name' and 'type'
        # type define the entry is a file or folder
        result = self.transferClient.operation_ls(self.src_endpoint['id'],
                                                  path=path,
                                                  filter=ls_filter)
        for entry in result:
            self.logger.debug('name: {0}\ttype: {1}'.format(
                entry["name"], entry["type"]))
        return result

    def task_list(self, num_results=10):
        result = self.transferClient.task_list(num_results=num_results)
        for task in result:
            self.logger.debug('task_id: [{0}]\t status: {1}'.format(
                task['task_id'], task['status']))

        result = self.transferClient.task_list(num_results=num_results)
        return result

    def get_task(self, task_id):
        return self.transferClient.get_task(task_id)