class TestGlobus(TestCase): @pytest.fixture(autouse=True) def setup(self, tmpdir): globus_dir = tmpdir.mkdir('globus') auth_file = globus_dir.join('globus-auth') expiry = int(time.time()) + (60*60*24*365) self.fake_goauth_token = 'un=test|tokenid=fake-token-id|expiry=%d' % expiry auth_file.write(self.fake_goauth_token) settings = default_settings() settings.globus_auth_file = str(auth_file) self.globus = GlobusAPIClient(settings=settings) def setUp(self): pass def tearDown(self): pass @patch("emop.lib.transfer.globus.get_access_token") @patch("emop.lib.transfer.globus.os.path.isfile") def test_get_goauth_data_fromfile(self, mock_goauth_isfile, mock_get_access_token): mock_goauth_isfile.return_value = True expected_retval = ('test', self.fake_goauth_token) retval = self.globus.get_goauth_data() assert not mock_get_access_token.called self.assertEqual(expected_retval, retval) @patch("emop.lib.transfer.globus.get_access_token") @patch("emop.lib.transfer.globus.os.path.isfile") def test_get_goauth_data_from_api(self, mock_goauth_isfile, mock_get_access_token): mock_goauth_isfile.return_value = False GOAuthResult = namedtuple("GOAuthResult", "username password token") mock_get_access_token.return_value = GOAuthResult('test', 'foo', self.fake_goauth_token) expected_retval = ('test', self.fake_goauth_token) retval = self.globus.get_goauth_data() mock_get_access_token.assert_called_once_with(username='******') self.assertEqual(expected_retval, retval) @patch("emop.lib.transfer.globus.get_access_token") @patch("emop.lib.transfer.globus.os.path.isfile") def test_get_goauth_data_expiry_fromapi(self, mock_goauth_isfile, mock_get_access_token): mock_goauth_isfile.return_value = True expected_retval = ('test', self.fake_goauth_token) self.globus._check_goauth_expiration = MagicMock(return_value=False) GOAuthResult = namedtuple("GOAuthResult", "username password token") mock_get_access_token.return_value = GOAuthResult('test', 'foo', self.fake_goauth_token) retval = self.globus.get_goauth_data() mock_get_access_token.assert_called_once_with(username='******') self.assertEqual(expected_retval, retval) def test_get_endpoint_data(self): data = load_fixture_file('globus_endpoint_data.json') self.globus._api_call = MagicMock(return_value=data) retval = self.globus.get_endpoint_data(endpoint='go#ep1') self.globus._api_call.assert_called_once_with(descr='endpoint go#ep1', func='endpoint', endpoint_name='go#ep1') self.assertEqual(data, retval) def test_check_activated_already_activated(self): endpoint_data = { "activated": True, "expires_in": 788825 } self.globus.get_endpoint_data = MagicMock(return_value=endpoint_data) self.globus.autoactivate = MagicMock() retval = self.globus.check_activated(endpoint='go#ep1') self.globus.get_endpoint_data.assert_called_once_with(endpoint='go#ep1', fields="activated,expires_in") assert not self.globus.autoactivate.called self.assertEqual(788825, retval) def test_check_activated_autoactivate(self): endpoint_data = { "activated": False, "expires_in": 0 } autoactivate_data = { "code": "AutoActivated.CachedCredential", "resource": "/endpoint/go#ep1/activate", "DATA_TYPE": "activation_result", "expires_in": 788825, "length": 0, "endpoint": "go#ep1", "request_id": 'test', "expire_time": 'test', "message": "Endpoint activated successfully using cached credential", "DATA": [], "oauth_server": None, "subject": None } self.globus.get_endpoint_data = MagicMock(return_value=endpoint_data) self.globus.autoactivate = MagicMock(return_value=autoactivate_data) retval = self.globus.check_activated(endpoint='go#ep1') self.globus.get_endpoint_data.assert_called_once_with(endpoint='go#ep1', fields="activated,expires_in") self.globus.autoactivate.assert_called_once_with(endpoint='go#ep1') self.assertEqual(788825, retval) def test_check_activated_not_activated(self): endpoint_data = { "activated": False, "expires_in": 0 } autoactivate_data = { "code": "AutoActivationFailed", "resource": "/endpoint/go#ep1/activate", "DATA_TYPE": "activation_result", "expires_in": 0, "length": 0, "endpoint": "go#ep1", "request_id": None, "expire_time": None, "message": "Auto activation failed", "DATA": [], "oauth_server": None, "subject": None } self.globus.get_endpoint_data = MagicMock(return_value=endpoint_data) self.globus.autoactivate = MagicMock(return_value=autoactivate_data) retval = self.globus.check_activated(endpoint='go#ep1') self.globus.get_endpoint_data.assert_called_once_with(endpoint='go#ep1', fields="activated,expires_in") self.globus.autoactivate.assert_called_once_with(endpoint='go#ep1') self.assertEqual(0, retval) def test_check_activated_no_endpoint_data(self): self.globus.get_endpoint_data = MagicMock(return_value={}) self.globus.autoactivate = MagicMock() retval = self.globus.check_activated(endpoint='go#ep1') self.globus.get_endpoint_data.assert_called_once_with(endpoint='go#ep1', fields="activated,expires_in") assert not self.globus.autoactivate.called self.assertEqual(0, retval) def test_autoactivate(self): autoactivate_data = { "code": "AutoActivated.CachedCredential", "resource": "/endpoint/go#ep1/activate", "DATA_TYPE": "activation_result", "expires_in": 788825, "length": 0, "endpoint": "go#ep1", "request_id": 'test', "expire_time": 'test', "message": "Endpoint activated successfully using cached credential", "DATA": [], "oauth_server": None, "subject": None } self.globus._api_call = MagicMock(return_value=autoactivate_data) retval = self.globus.autoactivate(endpoint="go#ep1") self.globus._api_call.assert_called_once_with(descr="Autoactivate go#ep1", func="endpoint_autoactivate", endpoint_name="go#ep1") self.assertEqual(autoactivate_data, retval) def test_get_activate_url_success(self): data = { "canonical_name": "go#ep1", "id": "ddb59aef-6d04-11e5-ba46-22000b92c6ec", } self.globus.get_endpoint_data = MagicMock(return_value=data) retval = self.globus.get_activate_url(endpoint="go#ep1") self.assertEqual("https://www.globus.org/activate?ep=go%23ep1&ep_ids=ddb59aef-6d04-11e5-ba46-22000b92c6ec", retval) def test_get_activate_url_fail(self): self.globus.get_endpoint_data = MagicMock(return_value={}) retval = self.globus.get_activate_url(endpoint="go#ep1") self.assertEqual("UNKNOWN", retval) def test__get_submission_id(self): data = { "value": "2978e1ce-99d7-11e5-9996-22000b96db58", "DATA_TYPE": "submission_id" } self.globus._api_call = MagicMock(return_value=data) retval = self.globus._get_submission_id() self.globus._api_call.assert_called_once_with(descr="Get submission_id", func="submission_id") self.assertEqual("2978e1ce-99d7-11e5-9996-22000b96db58", retval) def test_create_transfer(self): with patch('globusonline.transfer.api_client.Transfer') as transfer_class: mock_transfer = MagicMock() transfer_class.return_value = mock_transfer self.globus._get_submission_id = MagicMock(return_value="2978e1ce-99d7-11e5-9996-22000b96db58") retval = self.globus.create_transfer(src="go#ep1", dest="go#ep2") transfer_class.assert_called_once_with("2978e1ce-99d7-11e5-9996-22000b96db58", "go#ep1", "go#ep2", notify_on_succeeded=False, notify_on_failed=False, notify_on_inactive=False) self.assertEqual(mock_transfer, retval) def test_create_transfer_fail(self): with patch('globusonline.transfer.api_client.Transfer') as transfer_class: mock_transfer = MagicMock() transfer_class.return_value = mock_transfer self.globus._get_submission_id = MagicMock(return_value=None) retval = self.globus.create_transfer(src="go#ep1", dest="go#ep2") assert not transfer_class.called self.assertEqual(None, retval) def test_send_transfer(self): data = { "task_id": "d237692e-99f1-11e5-9996-22000b96db58", } mock_transfer = MagicMock() self.globus._api_call = MagicMock(return_value=data) retval = self.globus.send_transfer(transfer=mock_transfer) self.globus._api_call.assert_called_once_with(descr="Transfer", func="transfer", transfer=mock_transfer) self.assertEqual("d237692e-99f1-11e5-9996-22000b96db58", retval) def test_get_task(self): data = { "status": "ACTIVE", "task_id": "d237692e-99f1-11e5-9996-22000b96db58", } self.globus._api_call = MagicMock(return_value=data) retval = self.globus.get_task(task_id="d237692e-99f1-11e5-9996-22000b96db58", fields="status,task_id") self.globus._api_call.assert_called_once_with(descr="Get task d237692e-99f1-11e5-9996-22000b96db58", func="task", task_id="d237692e-99f1-11e5-9996-22000b96db58", fields="status,task_id") self.assertEqual(data, retval) def test_get_successful_task(self): data = { "DATA": [ { "DATA_TYPE": "successful_transfer", "destination_path": "/~/test-out.txt", "source_path": "/~/test-in.txt" } ], "DATA_TYPE": "successful_transfers", } self.globus._api_call = MagicMock(return_value=data) retval = self.globus.get_successful_task(task_id="d237692e-99f1-11e5-9996-22000b96db58") self.globus._api_call.assert_called_once_with(descr="Get successful task d237692e-99f1-11e5-9996-22000b96db58", func="task_successful_transfers", task_id="d237692e-99f1-11e5-9996-22000b96db58") self.assertEqual(data, retval) def test__get_task_status(self): data = { "status": "ACTIVE", } mock_transfer = MagicMock() self.globus.get_task = MagicMock(return_value=data) retval = self.globus._get_task_status(task_id="d237692e-99f1-11e5-9996-22000b96db58") self.globus.get_task.assert_called_once_with(task_id="d237692e-99f1-11e5-9996-22000b96db58", fields="status") self.assertEqual("ACTIVE", retval) @skipif(True, reason="Not yet implemented") def test_wait_for_task(self): pass @skipif(True, reason="Not yet implemented") def test_task_list(self): pass @skipif(True, reason="Not yet implemented") def test_endpoint_ls(self): pass
class EmopTransfer(EmopBase): def __init__(self, config_path): super(self.__class__, self).__init__(config_path) self.globus = GlobusAPIClient(settings=self.settings) self.cluster_endpoint = self.settings.globus_cluster_endpoint self.remote_endpoint = self.settings.globus_remote_endpoint def stage_in_files(self, files, wait=0): """ Stage in files This function will start a Globus transfer of the specified files. Args: files (list): List of files to stage in wait (bool): Whether the stage in should wait for the transfer to complete Returns: str: Globus Task ID """ stage_in_data = self._get_stage_in_data(files) src = self.remote_endpoint dest = self.cluster_endpoint label = "emop-stage-in-files" logger.debug("Stage in files:\n%s", json.dumps(stage_in_data, indent=4, sort_keys=True)) task_id = self.start(src=src, dest=dest, data=stage_in_data, label=label, wait=wait) return task_id def stage_in_data(self, data, wait=0): """ Stage in data This function will extract necessary items from data and initiate a Globus transfer. Args: data (dict): Data that contains files to transfer wait (bool): Whether the stage in should wait for the transfer to complete Returns: str: Globus Task ID """ files = self._get_stage_in_files_from_data(data) if not files: return '' task_id = self.stage_in_files(files=files, wait=wait) return task_id def stage_in_proc_ids(self, proc_ids, wait=0): """ Stage in proc_ids This function will find the necessary data from provided proc_ids and initiate a Globus transfer. Args: proc_ids (list): List of proc_ids to stage in wait (bool): Whether the stage in should wait for the transfer to complete Returns: str: Globus Task ID """ stage_in_files = [] stage_in_data = [] src = self.remote_endpoint dest = self.cluster_endpoint label = "emop-stage-in-files" for proc_id in proc_ids: payload = EmopPayload(self.settings, proc_id) if not payload.input_exists(): logger.error("EmopTransfer: Could not find input payload for proc_id %s", proc_id) continue data = payload.load_input() _files = self._get_stage_in_files_from_data(data) stage_in_files = stage_in_files + _files _stage_in_files = list(set(stage_in_files)) stage_in_data = self._get_stage_in_data(_stage_in_files) task_id = self.start(src=src, dest=dest, data=stage_in_data, label=label, wait=wait) return task_id def stage_out_proc_id(self, proc_id, wait=0): """ Stage out proc_id This function will find the necessary data from the provided proc_id and initiate a Globus transfer. Args: proc_id (str): proc_id to stage out wait (bool): Whether the stage out should wait for the transfer to complete Returns: str: Globus Task ID """ payload = EmopPayload(self.settings, proc_id) if payload.completed_output_exists(): filename = payload.completed_output_filename elif payload.output_exists(): filename = payload.output_filename elif payload.uploaded_output_exists(): filename = payload.uploaded_output_filename else: logger.error("EmopTransfer: Could not find payload file for proc_id %s" % proc_id) return '' data = payload.load(filename) if not data: logger.error("EmopTransfer: Unable to load payload data") return '' stage_out_data = self._get_stage_out_data(data) src = self.cluster_endpoint dest = self.remote_endpoint label = "emop-stage-out-%s" % proc_id logger.debug("Stage out files:\n%s", json.dumps(stage_out_data, indent=4, sort_keys=True)) task_id = self.start(src=src, dest=dest, data=stage_out_data, label=label, wait=wait) return task_id def check_endpoints(self, fail_on_warn=False): """ Check if endpoints are activated Check if cluster_endpoint and remote_endpoint are activated. Return True is activated and False otherwise. Args: fail_on_warn (bool): Consider endpoint activation about to expire as failure Returns: bool: Whether endpoints are activated and don't expire soon """ _valid = True for endpoint in [self.cluster_endpoint, self.remote_endpoint]: _activated = self._check_activation(endpoint=endpoint, fail_on_warn=fail_on_warn) if not _activated: self.globus.autoactivate(endpoint=endpoint) _activated = self._check_activation(endpoint=endpoint, fail_on_warn=fail_on_warn) if not _activated: _valid = False logger.error("Endpoint %s is not activated!", endpoint) logger.error("To activate, visit this URL:\n\t%s", self.globus.get_activate_url(endpoint=endpoint)) else: logger.info("Endpoint %s activated.", endpoint) return _valid def start(self, src, dest, data, label="emop", wait=0): """ Start a Globus transfer Start a globus transfer from src to dest of files contained in data. The data format is: [ {'src': '/path/to/src/file', 'dest': '/path/to/dest/file'} ] Args: src (str): Source Globus endpoint name dest (str): Destination Globus endpoint name data (list): List of dictionaries defining files to transfer label (str): Label to give Globus transfer wait (int): Number of seconds to wait for transfer to complete, 0 means no wait. Returns: str: Globus Task ID """ # TODO: Determine when to change sync_level _transfer = self.globus.create_transfer(src, dest, label=label, sync_level=2) if not data: logger.error("EmopTransfer.start: No data to transfer") return '' for d in data: logger.debug("TRANSFER: %s:%s -> %s:%s", src, d['src'], dest, d['dest']) if 'recursive' in d: _recursive = d['recursive'] else: _recursive = False _transfer.add_item(d['src'], d['dest'], _recursive) task_id = self.globus.send_transfer(_transfer) if task_id: logger.info("Successfully submitted transfer with task ID %s", task_id) if not wait: return task_id status = self.globus.wait_for_task(task_id=task_id, timeout=wait) if status is None: logging.warn("Task did not complete before timeout!") else: logging.info("Task %s completed with status %s", task_id, status) return task_id def ls(self, endpoint, path): """ Globus ls Perform a Globus ls of endpoint's path. Args: endpoint (str): Globus endpoint name path (str): Path to ls Returns: dict: Globus ls data """ data = self.globus.endpoint_ls(endpoint, path) return data def display_task(self, task_id, wait=0): """ Display task data Information about a Globus task will be printed. This includes files, files skipped, files transferred, and task status. The path of files successfully transferred will also be printed. Args: task_id (str): Globus Task ID wait (int): Number of seconds to wait for transfer to complete, 0 means no wait. Returns: str: Globus Task Status """ task_data = self.globus.get_task(task_id) if not task_data: print "Unable to get task %s data" % task_id return None print "Task: %s" % task_id for k,v in task_data.iteritems(): if k not in ["files", "files_skipped", "files_transferred", "status"]: continue print "\t%s=%s" % (k,v) if wait: status = self.globus.wait_for_task(task_id=task_id, timeout=wait) else: status = task_data["status"] successful_task_data = self.globus.get_successful_task(task_id) if not successful_task_data: print "Unable to get successful task %s data" % task_id return None transfer_list = successful_task_data["DATA"] if transfer_list: print "Successful Transfers (src -> dst)" for t in transfer_list: print " %s:%s -> %s:%s" % (task_data['source_endpoint'], t[u'source_path'], task_data['destination_endpoint'], t[u'destination_path']) return status def _check_activation(self, endpoint, fail_on_warn=False): """ Private function - check endpoint activation An endpoint's activation will be checked and if not active, auto-activation is attempted. The activation time is checked against globus/min_activation_time setting. Args: endpoint (str): Globus endpoint name fail_on_warn (bool): If endpoint activation time less than min_activation_time is a failure Returns: int: Number of seconds remaining for endpoint activation """ _activated = True _expires_in = self.globus.check_activated(endpoint) if not _expires_in: _activated = False else: _expires_in_m, _expires_in_s = divmod(_expires_in, 60) _expires_in_h, _expires_in_m = divmod(_expires_in_m, 60) _expires_in_d, _expires_in_h = divmod(_expires_in_h, 24) logger.info("Endpoint %s expires in %d-%02d:%02d:%02d", endpoint, _expires_in_d, _expires_in_h, _expires_in_m, _expires_in_s) if _expires_in < self.settings.globus_min_activation_time: logger.warn("Endpoint %s expires before minimum activation time setting", endpoint) if fail_on_warn: _activated = False return _activated def _task_list(self, **kw): data = self.globus.task_list(**kw) return data def _get_stage_in_files_from_data(self, data): """ Private function - convert data to stage in files eMOP dashboard API data is converted to list of files to transfer. The file_attributes that define what data to transfer are set in EmopPage.transfer_attributes class variable. Args: data (list): API data containing page information Returns: list: Files to tranfer """ files = [] transfer_keys = { "page": EmopPage.transfer_attributes, "font": EmopFont.transfer_attributes, "language_model": EmopLanguageModel.transfer_attributes, "glyph_substitution_model": EmopGlyphSubstitutionModel.transfer_attributes, } for key, values in transfer_keys.iteritems(): for p in data: _obj = p.get(key) if _obj is None: continue for value in values: _file = _obj.get(value) if _file: files.append(_file) return list(set(files)) def _get_stage_in_data(self, files): """ Private function - convert stage in files to transfer data A list of files is turned into src/dest key/value pairs based on controller/input_path_prefix. Args: files (list): Stage in file list. Returns: list: List of dicts that contain necessary src/dest key/value pairs """ _data = [] for f in files: _paths = {} _paths['src'] = f _local_path = EmopBase.add_prefix(prefix=self.settings.input_path_prefix, path=f) _paths['dest'] = _local_path _data.append(_paths) return _data def _get_stage_out_data(self, data): """ Private function - convert stage out files to transfer data The output API data produced by controller is checked for absolute file paths in data and then that data is converted to src/dest key/value pairs for transfer. Currently only the page_results are searched for data. Args: data (dict): Dictionary containing output API data. Returns: list: List of dicts that contain necessary src/dest key/value pairs """ _data = [] _paths = set() _page_results = data.get("page_results", []) _font_training_results = data.get("font_training_results", []) _extra_tranfers = data.get("extra_transfers", []) _results = _page_results + _font_training_results for _result in _results: for _value in _result.values(): if not isinstance(_value, basestring): continue if os.path.isabs(_value): _local_path = EmopBase.add_prefix(prefix=self.settings.output_path_prefix, path=_value) _paths.add(_local_path) for _extra in _extra_tranfers: for _path in _paths.copy(): if os.path.isdir(_extra): if _extra in _path: _paths.discard(_path) _paths.add(_extra) for _path in _paths: _d = {} _remote_path = EmopBase.remove_prefix(prefix=self.settings.output_path_prefix, path=_path) _d['dest'] = _remote_path _d['src'] = _path if os.path.isdir(_path): _d['recursive'] = True _data.append(_d) return _data