Beispiel #1
0
    def test_57_put_files_uuid_replace_locations(self):
        """Test that a file can replace with the same location."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the files to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }
        metadata2 = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar2')},
            'file_size': 2,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }

        # create the first file; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # try to replace the first file with the second; should be OK
        data = r.request_seq('PUT', '/api/files/'+uid, metadata2)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('logical_name', data)
Beispiel #2
0
 async def _do_work_claim(self) -> bool:
     """Claim a bundle and perform work on it."""
     # 1. Ask the LTA DB for the next Bundle to be built
     # configure a RestClient to talk to the File Catalog
     fc_rc = RestClient(self.file_catalog_rest_url,
                        token=self.file_catalog_rest_token,
                        timeout=self.work_timeout_seconds,
                        retries=self.work_retries)
     # configure a RestClient to talk to the LTA DB
     lta_rc = RestClient(self.lta_rest_url,
                         token=self.lta_rest_token,
                         timeout=self.work_timeout_seconds,
                         retries=self.work_retries)
     self.logger.info("Asking the LTA DB for a Bundle to build.")
     pop_body = {"claimant": f"{self.name}-{self.instance_uuid}"}
     response = await lta_rc.request(
         'POST',
         f'/Bundles/actions/pop?source={self.source_site}&dest={self.dest_site}&status={self.input_status}',
         pop_body)
     self.logger.info(f"LTA DB responded with: {response}")
     bundle = response["bundle"]
     if not bundle:
         self.logger.info(
             "LTA DB did not provide a Bundle to build. Going on vacation.")
         return False
     # process the Bundle that we were given
     try:
         await self._do_work_bundle(fc_rc, lta_rc, bundle)
     except Exception as e:
         await self._quarantine_bundle(lta_rc, bundle, f"{e}")
         raise e
     # signal the work was processed successfully
     return True
Beispiel #3
0
    def test_54_patch_files_uuid_replace_logical_name(self):
        """Test that a file can be updated with the same logical_name."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the file to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }

        # this is a PATCH to metadata; matches the old logical_name
        patch1 = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar2')},
            'file_size': 2,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }

        # create the file; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # try to update the file with a patch; should be OK
        data = r.request_seq('PATCH', '/api/files/' + uid, patch1)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('logical_name', data)
Beispiel #4
0
    def test_15_files_auth(self):
        appkey = 'secret2'
        self.edit_config({
            'auth':{
                'secret': 'secret',
                'expiration': 82400,
            }
        })
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'logical_name': 'blah',
            'checksum': {'sha512':hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        r2 = RestClient(self.address, 'blah', timeout=1, retries=1)
        with self.assertRaises(Exception):
            r2.request_seq('POST', '/api/files', metadata)

        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
    def test_71_snapshot_find(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'collection_name': 'blah',
            'owner': 'foo',
        }
        data = r.request_seq('POST', '/api/collections', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('collection', data)
        url = data['collection']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections/' + uid)

        data = r.request_seq('POST', '/api/collections/{}/snapshots'.format(uid))
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('snapshot', data)
        url = data['snapshot']
        snap_uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/snapshots/' + snap_uid)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files',data)
        self.assertEqual(data['files'], [])
Beispiel #6
0
    def test_10_files(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'logical_name': 'blah',
            'checksum': {'sha512':hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))

        for m in ('PUT','DELETE','PATCH'):
            with self.assertRaises(Exception):
                r.request_seq(m, '/api/files')
Beispiel #7
0
def test_101_request_stream() -> None:
    """Test `request_stream()` when there's no response."""
    mock_url = "http://test"
    rpc = RestClient(mock_url, "passkey", timeout=1)

    empty_streams = [
        [b"\n"],
        [],
        [b"\n", b"\r\n", b"\n"],
        [b" \n"],
        [b" "],
        [b"\t"],
    ]
    for expected_stream in empty_streams:
        # test multiple times
        for test_num in range(2):
            print(f"\niteration #{test_num}")
            HTTPretty.register_uri(
                HTTPretty.POST,
                mock_url + "/stream/no-resp/",
                body=expected_stream,
                streaming=True,
            )
            response_stream = rpc.request_stream("POST", "/stream/no-resp/",
                                                 {})

            never_entered = True
            with _in_time(10, "Iterating by line is taking forever!"):
                for _ in response_stream:
                    never_entered = False
            assert never_entered

        # now w/ chunk sizes
        for chunk_size in [
                None, -1, 0, 1, 2, 3, 4, 8, 9, 20, 100, 1024, 32768
        ]:
            print(f"\nchunk_size: {chunk_size}")
            HTTPretty.register_uri(
                HTTPretty.POST,
                mock_url + "/stream/no-resp/w/chunks",
                body=expected_stream,
                streaming=True,
            )
            response_stream = rpc.request_stream("POST",
                                                 "/stream/no-resp/w/chunks",
                                                 {},
                                                 chunk_size=chunk_size)

            never_entered_w_chunks = True
            with _in_time(10,
                          "Iterating by line is taking forever w/ chunks!"):
                for _ in response_stream:
                    never_entered_w_chunks = False
            assert never_entered_w_chunks
Beispiel #8
0
async def test_92_request_seq(requests_mock: Mock) -> None:
    """Test `request_seq()`."""
    rpc = RestClient("http://test", "passkey", timeout=0.1)

    def response(req: PreparedRequest, ctx: object) -> bytes:  # pylint: disable=W0613
        raise Exception()

    requests_mock.post("/test", content=response)

    with pytest.raises(Exception):
        rpc.request_seq("POST", "test", {})
    def test_80_snapshot_files(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'collection_name': 'blah',
            'owner': 'foo',
        }
        data = r.request_seq('POST', '/api/collections', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('collection', data)
        url = data['collection']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections/' + uid)

        data = r.request_seq('POST', '/api/collections/{}/snapshots'.format(uid))
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('snapshot', data)
        url = data['snapshot']
        snap_uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/snapshots/{}/files'.format(snap_uid))
        self.assertEqual(data['files'], [])

        # add a file
        metadata = {
            'logical_name': 'blah',
            'checksum': {'sha512':hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        file_uid = url.split('/')[-1]
        
        # old snapshot stays empty
        data = r.request_seq('GET', '/api/snapshots/{}/files'.format(snap_uid))
        self.assertEqual(data['files'], [])

        # new snapshot should have file
        data = r.request_seq('POST', '/api/collections/{}/snapshots'.format(uid))
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('snapshot', data)
        url = data['snapshot']
        snap_uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/snapshots/{}/files'.format(snap_uid),
                             {'keys':'uuid|logical_name|checksum|locations'})
        self.assertEqual(len(data['files']), 1)
        self.assertEqual(data['files'][0]['uuid'], file_uid)
        self.assertEqual(data['files'][0]['checksum'], metadata['checksum'])
Beispiel #10
0
    def test_66_patch_files_uuid_locations_1xN(self):
        """Test locations uniqueness under 1xN multiplicity."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the locations to be tested
        loc1a = {'site': 'WIPAC', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1b = {'site': 'DESY', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1c = {'site': 'NERSC', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1d = {'site': 'OSG', 'path': '/data/test/exp/IceCube/foo.dat'}
        locs3a = [loc1a, loc1b, loc1c]
        locs3b = [loc1b, loc1c, loc1d]
        locs3c = [loc1a, loc1b, loc1d]

        # define the files to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [loc1a]
        }
        metadata2 = {
            'logical_name': '/blah/data/exp/IceCube/blah2.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [loc1b]
        }

        # this is a PATCH to metadata; steps on metadata2's location
        patch1 = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar2')},
            'file_size': 2,
            u'locations': locs3c
        }

        # create the first file; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # create the second file; should be OK
        data = r.request_seq('POST', '/api/files', metadata2)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)

        # try to update the first file with a patch; should NOT be OK
        with self.assertRaises(Exception):
            r.request_seq('PATCH', '/api/files/' + uid, patch1)
Beispiel #11
0
    def test_post_files(db_rc: RestClient) -> None:
        """Failure-test role authorization."""
        post_body = {
            "database": "test_histograms",
            "collection": "collection_name",
            "files": ["test.txt"],
        }
        with pytest.raises(requests.exceptions.HTTPError) as e:
            db_rc.request_seq("POST", "/files/names", post_body)
            assert e.response.status_code == 403  # Forbidden Error

        db_rc.close()
Beispiel #12
0
    def test_post_histo(db_rc: RestClient) -> None:
        """Failure-test role authorization."""
        post_body = {
            "database": "test_histograms",
            "collection": "TEST",
            "histogram": {
                "Anything": True
            },
        }
        with pytest.raises(requests.exceptions.HTTPError) as e:
            db_rc.request_seq("POST", "/histogram", post_body)
            assert e.response.status_code == 403  # Forbidden Error

        db_rc.close()
Beispiel #13
0
async def main():
    admin_token = Auth('secret').create_token('foo', payload={'role': 'admin'})
    user_token = Auth('secret').create_token('foo', payload={'role': 'user'})

    api = RestClient('http://localhost:8080/api', token=admin_token)
    await api.request('POST', '/fruits', {'name': 'apple'})
    await api.request('POST', '/fruits', {'name': 'banana'})

    api = RestClient('http://localhost:8080/api', token=user_token)
    ret = await api.request('GET', '/fruits')
    if ret != {'apple': {'name': 'apple'}, 'banana': {'name': 'banana'}}:
        print(ret)
        print('FAIL')
    else:
        print('OK')
Beispiel #14
0
async def test_91_request_seq(requests_mock: Mock) -> None:
    """Test `request_seq()`."""
    result = {"result": "the result"}
    rpc = RestClient("http://test", "passkey", timeout=0.1)

    def response(req: PreparedRequest, ctx: object) -> bytes:  # pylint: disable=W0613
        assert req.body is not None
        _ = json_decode(req.body)
        return json_encode(result).encode("utf-8")

    requests_mock.post("/test", content=response)
    ret = rpc.request_seq("POST", "test", {})

    assert requests_mock.called
    assert ret == result
Beispiel #15
0
    def test_60_post_files_locations_1xN(self):
        """Test locations uniqueness under 1xN multiplicity."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the locations to be tested
        loc1a = {'site': 'WIPAC', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1b = {'site': 'DESY', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1c = {'site': 'NERSC', 'path': '/data/test/exp/IceCube/foo.dat'}
        loc1d = {'site': 'OSG', 'path': '/data/test/exp/IceCube/foo.dat'}
        locs3a = [loc1a, loc1b, loc1c]
        locs3b = [loc1b, loc1c, loc1d]
        locs3c = [loc1a, loc1b, loc1d]

        # define the files to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [loc1a]
        }
        metadata2 = {
            'logical_name': '/blah/data/exp/IceCube/blah2.dat',
            'checksum': {'sha512': hex('foo bar2')},
            'file_size': 2,
            u'locations': locs3a
        }

        # create the first file; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # check that the file was created properly
        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))

        # create the second file; should NOT be OK
        with self.assertRaises(Exception):
            r.request_seq('POST', '/api/files', metadata2)
Beispiel #16
0
async def test_10_request(requests_mock: Mock) -> None:
    """Test `async request()`."""
    result = {"result": "the result"}
    rpc = RestClient("http://test", "passkey", timeout=0.1)

    def response(req: PreparedRequest, ctx: object) -> bytes:  # pylint: disable=W0613
        assert req.body is not None
        _ = json_decode(req.body)
        return json_encode(result).encode("utf-8")

    requests_mock.post("/test", content=response)
    ret = await rpc.request("POST", "test", {})

    assert requests_mock.called
    auth_parts = requests_mock.last_request.headers['Authorization'].split(
        ' ', 1)
    assert auth_parts[0].lower() == 'bearer'
    assert auth_parts[1] == 'passkey'
    assert ret == result

    result2 = {"result2": "the result 2"}

    def response2(req: PreparedRequest, ctx: object) -> bytes:  # pylint: disable=W0613
        assert req.body is not None
        _ = json_decode(req.body)
        return json_encode(result2).encode("utf-8")

    requests_mock.post("/test2", content=response2)
    ret = await rpc.request("POST", "/test2")

    assert requests_mock.called
    assert ret == result2
Beispiel #17
0
 async def _do_work_claim(self) -> bool:
     """Claim a bundle and perform work on it."""
     # 1. Ask the LTA DB for the next Bundle to be transferred
     # configure a RestClient to talk to the LTA DB
     lta_rc = RestClient(self.lta_rest_url,
                         token=self.lta_rest_token,
                         timeout=self.work_timeout_seconds,
                         retries=self.work_retries)
     self.logger.info("Asking the LTA DB for a Bundle to transfer.")
     pop_body = {"claimant": f"{self.name}-{self.instance_uuid}"}
     response = await lta_rc.request(
         'POST',
         f'/Bundles/actions/pop?source={self.source_site}&dest={self.dest_site}&status={self.input_status}',
         pop_body)
     self.logger.info(f"LTA DB responded with: {response}")
     bundle = response["bundle"]
     if not bundle:
         self.logger.info(
             "LTA DB did not provide a Bundle to transfer. Going on vacation."
         )
         return False
     # process the Bundle that we were given
     try:
         await self._replicate_bundle_to_destination_site(lta_rc, bundle)
     except Exception as e:
         await self._quarantine_bundle(lta_rc, bundle, f"{e}")
         return False
     # if we were successful at processing work, let the caller know
     return True
Beispiel #18
0
    async def client(username='******', groups=[], timeout=10):
        await krs.users.create_user(username,
                                    'first',
                                    'last',
                                    username + '@test',
                                    rest_client=keycloak_bootstrap)
        await krs.users.set_user_password(username,
                                          'test',
                                          rest_client=keycloak_bootstrap)
        for group in groups:
            await krs.groups.create_group(group,
                                          rest_client=keycloak_bootstrap)
            await krs.groups.add_user_group(group,
                                            username,
                                            rest_client=keycloak_bootstrap)

        token = krs.apps.get_public_token(
            username=username,
            password='******',
            scopes=['profile'],
            client='user_mgmt',
            openid_url=os.environ["AUTH_OPENID_URL"],
            raw=True)
        print(token)

        return RestClient(f'http://localhost:{port}',
                          token=token,
                          timeout=timeout,
                          retries=0)
Beispiel #19
0
 async def _do_work_claim(self) -> bool:
     """Claim a bundle and perform work on it."""
     # 1. Ask the LTA DB for the next Bundle to be deleted
     # configure a RestClient to talk to the LTA DB
     lta_rc = RestClient(self.lta_rest_url,
                         token=self.lta_rest_token,
                         timeout=self.work_timeout_seconds,
                         retries=self.work_retries)
     self.logger.info(
         "Asking the LTA DB for a Bundle to check for TransferRequest being finished."
     )
     pop_body = {"claimant": f"{self.name}-{self.instance_uuid}"}
     response = await lta_rc.request(
         'POST',
         f'/Bundles/actions/pop?source={self.source_site}&dest={self.dest_site}&status={self.input_status}',
         pop_body)
     self.logger.info(f"LTA DB responded with: {response}")
     bundle = response["bundle"]
     if not bundle:
         self.logger.info(
             "LTA DB did not provide a Bundle to check. Going on vacation.")
         return False
     # update the TransferRequest that spawned the Bundle, if necessary
     await self._update_transfer_request(lta_rc, bundle)
     # even if we processed a Bundle, take a break between Bundles
     return False
Beispiel #20
0
async def patch_status_heartbeat(component: Component) -> bool:
    """PATCH /status/{component} to update LTA with a status heartbeat."""
    component.logger.info("Sending status heartbeat")
    # determine which resource to PATCH
    status_route = f"/status/{component.type}"
    status_url = urljoin(component.lta_rest_url, status_route)
    # determine the body to PATCH with
    status_body = {
        component.name: {
            "timestamp": datetime.utcnow().isoformat(),
            "last_work_begin_timestamp": component.last_work_begin_timestamp,
            "last_work_end_timestamp": component.last_work_end_timestamp,
        }
    }
    # ask the base class to annotate the status body
    status_update = component._do_status()
    status_body[component.name].update(status_update)
    # attempt to PATCH the status resource
    component.logger.info(f"PATCH {status_url} - {status_body}")
    try:
        rc = RestClient(component.lta_rest_url,
                        token=component.lta_rest_token,
                        timeout=component.heartbeat_patch_timeout_seconds,
                        retries=component.heartbeat_patch_retries)
        # Use the RestClient to PATCH our heartbeat to the LTA DB
        await rc.request("PATCH", status_route, status_body)
    except Exception as e:
        # if there was a problem, yo I'll solve it
        component.logger.error(f"Error trying to PATCH {status_route} with heartbeat")
        component.logger.error(f"Error was: '{e}'", exc_info=True)
        return False
    # indicate to the caller that the heartbeat was successful
    return True
Beispiel #21
0
 async def _add_location_to_file_catalog(self, bundle_file: Dict[str, Any],
                                         dest_path: str) -> bool:
     """Update File Catalog record with new Data Warehouse location."""
     # configure a RestClient to talk to the File Catalog
     fc_rc = RestClient(self.file_catalog_rest_url,
                        token=self.file_catalog_rest_token,
                        timeout=self.work_timeout_seconds,
                        retries=self.work_retries)
     # extract the right variables from the metadata structure
     fc_path = dest_path
     fc_uuid = bundle_file["uuid"]
     # add the new location to the File Catalog
     new_location = {
         "locations": [{
             "site": "WIPAC",
             "path": f"{fc_path}",
         }]
     }
     self.logger.info(
         f"POST /api/files/{fc_uuid}/locations - {new_location}")
     # POST /api/files/{uuid}/locations will de-dupe locations for us
     await fc_rc.request("POST", f"/api/files/{fc_uuid}/locations",
                         new_location)
     # indicate that our file catalog updates were successful
     return True
Beispiel #22
0
async def clear_catalog():
    # configure a RestClient from the environment
    config = from_environment(EXPECTED_CONFIG)
    rc = RestClient(config["FILE_CATALOG_REST_URL"], token=config["FILE_CATALOG_REST_TOKEN"])
    # while there are still files
    clearing = True
    while clearing:
        try:
            # get a list of up to 50 files
            response = await rc.request("GET", "/api/files?start=0&limit=50")
            files = response["files"]
            # for each file that we found
            for x in files:
                # remove it from the file catalog
                uuid = x["uuid"]
                logical_name = x["logical_name"]
                print(f"DELETE /api/files/{uuid} - {logical_name}")
                response2 = await rc.request("DELETE", f"/api/files/{uuid}")
            # if we didn't get any files back, we're done
            if len(files) < 1:
                clearing = False
        except Exception as e:
            # whoopsy daisy...
            clearing = False
            print(e)
Beispiel #23
0
async def clear_lta_transfer_requests():
    # configure a RestClient from the environment
    config = from_environment(EXPECTED_CONFIG)
    rc = RestClient(config["LTA_REST_URL"], token=config["LTA_REST_TOKEN"])
    # while there are still transfer requests
    clearing = True
    while clearing:
        try:
            # get a list of up to 50 transfer requests
            # technically a lie; the LTA DB honors neither start nor limit
            response = await rc.request("GET", "/TransferRequests?start=0&limit=50")
            results = response["results"]
            # for each file that we found
            for x in results:
                # remove it from the file catalog
                uuid = x["uuid"]
                print(f"DELETE /TransferRequests/{uuid}")
                response2 = await rc.request("DELETE", f"/TransferRequests/{uuid}")
            # if we didn't get any files back, we're done
            if len(results) < 1:
                clearing = False
        except Exception as e:
            # whoopsy daisy...
            clearing = False
            print(e)
Beispiel #24
0
async def test_22_request(requests_mock: Mock) -> None:
    """Test `async request()`."""
    rpc = RestClient("http://test", "passkey", timeout=0.1)
    requests_mock.get("/test", content=b'{"foo"}')

    with pytest.raises(Exception):
        _ = await rpc.request("GET", "test", {})
Beispiel #25
0
async def test_20_timeout(requests_mock: Mock) -> None:
    """Test timeout in `async request()`."""
    rpc = RestClient("http://test", "passkey", timeout=0.1, backoff=False)
    requests_mock.post("/test", exc=Timeout)

    with pytest.raises(Timeout):
        _ = await rpc.request("POST", "test", {})
Beispiel #26
0
async def test_21_ssl_error(requests_mock: Mock) -> None:
    """Test ssl error in `async request()`."""
    rpc = RestClient("http://test", "passkey", timeout=0.1, backoff=False)
    requests_mock.post("/test", exc=SSLError)

    with pytest.raises(SSLError):
        _ = await rpc.request("POST", "test", {})
Beispiel #27
0
 async def _do_work_claim(self) -> bool:
     """Claim a transfer request and perform work on it."""
     # 1. Ask the LTA DB for the next TransferRequest to be picked
     # configure a RestClient to talk to the LTA DB
     lta_rc = RestClient(self.lta_rest_url,
                         token=self.lta_rest_token,
                         timeout=self.work_timeout_seconds,
                         retries=self.work_retries)
     self.logger.info("Asking the LTA DB for a TransferRequest to work on.")
     pop_body = {"claimant": f"{self.name}-{self.instance_uuid}"}
     response = await lta_rc.request(
         'POST',
         f'/TransferRequests/actions/pop?source={self.source_site}&dest={self.dest_site}',
         pop_body)
     self.logger.info(f"LTA DB responded with: {response}")
     tr = response["transfer_request"]
     if not tr:
         self.logger.info(
             "LTA DB did not provide a TransferRequest to work on. Going on vacation."
         )
         return False
     # process the TransferRequest that we were given
     try:
         await self._do_work_transfer_request(lta_rc, tr)
     except Exception as e:
         await self._quarantine_transfer_request(lta_rc, tr, f"{e}")
         raise e
     # if we were successful at processing work, let the caller know
     return True
Beispiel #28
0
async def test_11_request(requests_mock: Mock) -> None:
    """Test request in `async request()`."""
    rpc = RestClient("http://test", "passkey", timeout=0.1)
    requests_mock.get("/test", content=b"")
    ret = await rpc.request("GET", "test", {})

    assert requests_mock.called
    assert ret is None
Beispiel #29
0
 def __init__(self, lta_rest_url: str, lta_rest_token: str,
              monitoring_interval: str = '60', logger: Any = None) -> None:
     """Initialize a Monitor object."""
     self.logger = logger if logger else logging
     self.interval = int(monitoring_interval)
     self.rest = RestClient(lta_rest_url, lta_rest_token,
                            timeout=self.interval//10, retries=1)
     self.running = False
Beispiel #30
0
    def test_58_patch_files_uuid_unique_locations(self):
        """Test that locations is unique when updating a file."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the files to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }
        metadata2 = {
            'logical_name': '/blah/data/exp/IceCube/blah2.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah2.dat'}]
        }

        # this is a PATCH to metadata; steps on metadata2's location
        patch1 = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar2')},
            'file_size': 2,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah2.dat'}]
        }

        # create the first file; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # create the second file; should be OK
        data = r.request_seq('POST', '/api/files', metadata2)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)

        # try to update the first file with a patch; should NOT be OK
        with self.assertRaises(Exception):
            r.request_seq('PATCH', '/api/files/' + uuid, patch1)
Beispiel #31
0
    def test_get(db_rc: RestClient) -> None:
        """Run some test queries."""
        databases = db_rc.request_seq("GET", "/databases/names")
        print(databases)

        for db in databases["databases"]:
            db_request_body = {"database": db}
            collections = db_rc.request_seq("GET", "/collections/names",
                                            db_request_body)
            print(collections)
            for coll in collections["collections"]:
                coll_request_body = {"database": db, "collection": coll}
                histograms = db_rc.request_seq(
                    "GET", "/collections/histograms/names", coll_request_body)
                print(histograms)
                for histo_name in histograms["histograms"]:
                    histo_request_body = {
                        "database": db,
                        "collection": coll,
                        "name": histo_name,
                    }
                    histo = db_rc.request_seq("GET", "/histogram",
                                              histo_request_body)
                    print(histo)
                filelist = db_rc.request_seq("GET", "/files/names",
                                             coll_request_body)
                print(filelist)

        db_rc.close()
Beispiel #32
0
    def __init__(self, iceprodv1_pass: str, iceprodv2_token: str):
        if not iceprodv1_pass:
            raise RuntimeError("Missing IceProd v1 DB password")
        elif not iceprodv2_token:
            raise RuntimeError("Missing IceProd v2 REST token")

        self._iceprodv1_pass = iceprodv1_pass
        self._iceprodv2_rc = RestClient(
            "https://iceprod2-api.icecube.wisc.edu", iceprodv2_token)
Beispiel #33
0
async def test_30_request(requests_mock: Mock) -> None:
    """Test `async request()` with headers."""
    rpc = RestClient("http://test", "passkey", timeout=0.1)
    requests_mock.get("/test", content=b"")
    ret = await rpc.request("GET", "test", {}, {'foo': 'bar'})

    assert requests_mock.called
    assert requests_mock.last_request.headers['foo'] == 'bar'
    assert ret is None
Beispiel #34
0
def db_rc() -> RestClient:
    """Get database REST client."""
    token_json = requests.get(
        "http://localhost:8888/token?scope=maddash:production"
    ).json()
    rc = RestClient(
        "http://localhost:8080", token=token_json["access"], timeout=5, retries=0
    )
    return rc
    def test_10_collections(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'collection_name': 'blah',
            'owner': 'foo',
        }
        data = r.request_seq('POST', '/api/collections', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('collection', data)
        url = data['collection']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections')
        self.assertIn('collections', data)
        self.assertIn(uid,{row['uuid'] for row in data['collections']})
    def test_21_collection_by_name(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'collection_name': 'blah',
            'owner': 'foo',
        }
        data = r.request_seq('POST', '/api/collections', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('collection', data)
        url = data['collection']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections/blah')
        for k in metadata:
            self.assertIn(k, data)
            self.assertEqual(metadata[k], data[k])
Beispiel #37
0
def create_simprod_dbms_rest_connection() -> RestClient:
    """Return REST Client connection object."""
    token_request_url = urljoin(token_server_url, "token?scope=maddash:web")

    token_json = requests.get(token_request_url).json()
    rc = RestClient(dbms_server_url,
                    token=token_json["access"],
                    timeout=5,
                    retries=0)

    return rc
Beispiel #38
0
def index(
    paths: List[str],
    blacklist: List[str],
    rest_client_args: RestClientArgs,
    site: str,
    indexer_flags: IndexerFlags,
) -> List[str]:
    """Index paths, excluding any matching the blacklist.

    Return all child paths nested under any directories.
    """
    if not isinstance(paths, list):
        raise TypeError(f"`paths` object is not list {paths}")
    if not paths:
        return []

    # Filter
    paths = file_utils.sorted_unique_filepaths(list_of_filepaths=paths)
    paths = [p for p in paths if not path_in_blacklist(p, blacklist)]

    # Prep
    fc_rc = RestClient(
        rest_client_args["url"],
        token=rest_client_args["token"],
        timeout=rest_client_args["timeout"],
        retries=rest_client_args["retries"],
    )
    manager = MetadataManager(
        site,
        basic_only=indexer_flags["basic_only"],
        iceprodv2_rc_token=indexer_flags["iceprodv2_rc_token"],
        iceprodv1_db_pass=indexer_flags["iceprodv1_db_pass"],
    )

    # Index
    child_paths = asyncio.get_event_loop().run_until_complete(
        index_paths(paths, manager, fc_rc, indexer_flags["patch"],
                    indexer_flags["dryrun"]))

    fc_rc.close()
    return child_paths
Beispiel #39
0
    def test_50_post_files_unique_logical_name(self):
        """Test that logical_name is unique when creating a new file."""
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        # define the file to be created
        metadata = {
            'logical_name': '/blah/data/exp/IceCube/blah.dat',
            'checksum': {'sha512': hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site': u'WIPAC', u'path': u'/blah/data/exp/IceCube/blah.dat'}]
        }

        # create the file the first time; should be OK
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        # check that the file was created properly
        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))

        # create the file the second time; should NOT be OK
        with self.assertRaises(Exception):
            data = r.request_seq('POST', '/api/files', metadata)

        # check that the second file was not created
        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
Beispiel #40
0
def test_102_request_stream() -> None:
    """Test `request_stream()` where there's only one response."""
    mock_url = "http://test"
    rpc = RestClient(mock_url, "passkey", timeout=1)

    one_liners = [
        [b'"with-a-newline"\n'],
        [b'"w/o-a-newline"'],
    ]
    for expected_stream in one_liners:
        json_stream = [j for t in expected_stream
                       if (j := _jsonify(t))]  # no blanks
Beispiel #41
0
 async def _add_bundle_to_file_catalog(self, lta_rc: RestClient,
                                       bundle: BundleType) -> bool:
     """Add a FileCatalog entry for the bundle, then update existing records."""
     # configure a RestClient to talk to the File Catalog
     fc_rc = RestClient(self.file_catalog_rest_url,
                        token=self.file_catalog_rest_token,
                        timeout=self.work_timeout_seconds,
                        retries=self.work_retries)
     # determine the path where the bundle is stored on hpss
     data_warehouse_path = bundle["path"]
     basename = os.path.basename(bundle["bundle_path"])
     stupid_python_path = os.path.sep.join(
         [self.tape_base_path, data_warehouse_path, basename])
     hpss_path = os.path.normpath(stupid_python_path)
     # create a File Catalog entry for the bundle itself
     bundle_uuid = bundle["uuid"]
     right_now = now()
     file_record = {
         "uuid":
         bundle_uuid,
         "logical_name":
         hpss_path,
         "checksum":
         bundle["checksum"],
         "locations": [{
             "site": "NERSC",
             "path": hpss_path,
             "hpss": True,
             "online": False,
         }],
         "file_size":
         bundle["size"],
         "lta": {
             "date_archived": right_now,
         },
     }
     # add the bundle file to the File Catalog
     try:
         self.logger.info(f"POST /api/files - {hpss_path}")
         await fc_rc.request("POST", "/api/files", file_record)
     except Exception as e:
         self.logger.error(f"Error: POST /api/files - {hpss_path}")
         self.logger.error(f"Message: {e}")
         bundle_uuid = bundle["uuid"]
         self.logger.info(f"PATCH /api/files/{bundle_uuid}")
         await fc_rc.request("PATCH", f"/api/files/{bundle_uuid}",
                             file_record)
     # update the File Catalog for each file contained in the bundle
     await self._update_files_in_file_catalog(fc_rc, lta_rc, bundle,
                                              hpss_path)
     # indicate that our file catalog updates were successful
     return True
Beispiel #42
0
 def client(role='read', timeout=0.1):
     if CONFIG['AUTH_ISSUER']:
         r = requests.get(CONFIG['AUTH_ISSUER'] + '/token',
                          params={'scope': f'prometheus-reconfig:{role}'})
         r.raise_for_status()
         t = r.json()['access']
     else:
         raise Exception('testing token service not defined')
     print(t)
     return RestClient(f'http://localhost:{http_server_port}',
                       token=t,
                       timeout=timeout,
                       retries=0)
Beispiel #43
0
    def test_30_archive(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            u'logical_name': u'blah',
            u'checksum': {u'sha512':hex('foo bar')},
            u'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        metadata2 = {
            u'logical_name': u'blah2',
            u'checksum': {u'sha512':hex('foo bar baz')},
            u'file_size': 2,
            u'locations': [{u'site':u'test',u'path':u'blah.dat',u'archive':True}]
        }
        data = r.request_seq('POST', '/api/files', metadata)
        url = data['file']
        uid = url.split('/')[-1]
        data = r.request_seq('POST', '/api/files', metadata2)
        url2 = data['file']
        uid2 = url2.split('/')[-1]

        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertFalse(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'query':json_encode({'locations.archive':True})})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertFalse(any(uid == f['uuid'] for f in data['files']))
        self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))
    def test_30_collection_files(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            'collection_name': 'blah',
            'owner': 'foo',
        }
        data = r.request_seq('POST', '/api/collections', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('collection', data)
        url = data['collection']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections/blah/files')
        self.assertEqual(data['files'], [])

        # add a file
        metadata = {
            'logical_name': 'blah',
            'checksum': {'sha512':hex('foo bar')},
            'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        data = r.request_seq('POST', '/api/files', metadata)
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('file', data)
        url = data['file']
        uid = url.split('/')[-1]

        data = r.request_seq('GET', '/api/collections/blah/files',
                             {'keys':'uuid|logical_name|checksum|locations'})
        self.assertEqual(len(data['files']), 1)
        self.assertEqual(data['files'][0]['uuid'], uid)
        self.assertEqual(data['files'][0]['checksum'], metadata['checksum'])
Beispiel #45
0
def main():
    parser = argparse.ArgumentParser(description='manually run IceProd i3exec')
    parser.add_argument('-t', '--token',help='auth token')
    parser.add_argument('-d','--dataset',type=int,help='dataset number')
    parser.add_argument('-j','--job',type=int,help='job number (optional)')
    
    args = parser.parse_args()
    args = vars(args)

    logging.basicConfig(level=logging.DEBUG)
    
    rpc = RestClient('https://iceprod2-api.icecube.wisc.edu', args['token'])
    
    datasets = rpc.request_seq('GET', '/datasets', {'keys': 'dataset_id|dataset'})
    dataset_id = None
    for d in datasets:
        if datasets[d]['dataset'] == args['dataset']:
            dataset_id = d
            break
    else:
        raise Exception('bad dataset num')
    dataset = rpc.request_seq('GET', f'/datasets/{dataset_id}')
    config = rpc.request_seq('GET', f'/config/{dataset_id}')

    jobs = rpc.request_seq('GET', f'/datasets/{dataset_id}/jobs', {'status': 'processing|errors'})
    if args['job']:
        jobs = {j:jobs[j] for j in jobs if jobs[j]['job_index'] == args['job']}
    if not jobs:
        raise Exception('no jobs found')

    for job_id in jobs:
        tasks = rpc.request_seq('GET', f'/datasets/{dataset_id}/tasks',
                                {'job_id': job_id, 'keys': 'task_id|task_index|name|depends',
                                 'status': 'waiting|queued|reset|failed'})
        for task_id in sorted(tasks, key=lambda t:tasks[t]['task_index']):
            print(f'processing {dataset["dataset"]} {jobs[job_id]["job_index"]} {tasks[task_id]["name"]}')
            write_config(config, 'config.json', dataset_id, args['dataset'], task_id)
            run(token=args['token'], config='config.json',
                jobs_submitted=dataset['jobs_submitted'],
                job=jobs[job_id]['job_index'],
                task=tasks[task_id]['name'])
Beispiel #46
0
    def test_20_file(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            u'logical_name': u'blah',
            u'checksum': {u'sha512':hex('foo bar')},
            u'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}]
        }
        data = r.request_seq('POST', '/api/files', metadata)

        url = data['file']

        data = r.request_seq('GET', url)
        data.pop('_links')
        data.pop('meta_modify_date')
        data.pop('uuid')
        self.assertDictEqual(metadata, data)

        metadata['test'] = 100

        metadata_cpy = metadata.copy()
        metadata_cpy['uuid'] = 'something else'
        with self.assertRaises(Exception):
            data = r.request_seq('PUT', url, metadata_cpy)

        data = r.request_seq('PUT', url, metadata)
        data.pop('_links')
        data.pop('meta_modify_date')
        data.pop('uuid')
        self.assertDictEqual(metadata, data)

        data = r.request_seq('GET', url)
        data.pop('_links')
        data.pop('meta_modify_date')
        data.pop('uuid')
        self.assertDictEqual(metadata, data)

        metadata['test2'] = 200
        data = r.request_seq('PATCH', url, {'test2':200})
        data.pop('_links')
        data.pop('meta_modify_date')
        data.pop('uuid')
        self.assertDictEqual(metadata, data)

        data = r.request_seq('GET', url)
        data.pop('_links')
        data.pop('meta_modify_date')
        data.pop('uuid')
        self.assertDictEqual(metadata, data)

        data = r.request_seq('DELETE', url)

        # second delete should raise error
        with self.assertRaises(Exception):
            data = r.request_seq('DELETE', url)

        with self.assertRaises(Exception):
            data = r.request_seq('POST', url)
Beispiel #47
0
class ServerComms:
    """
    Setup JSONRPC communications with the IceProd server.

    Args:
        url (str): address to connect to
        passkey (str): passkey for authorization/authentication
        config (:py:class:`iceprod.server.exe.Config`): Config object
        **kwargs: passed to JSONRPC
    """
    def __init__(self, url, passkey, config, **kwargs):
        self.rest = RestClient(address=url,token=passkey,**kwargs)

    async def download_task(self, gridspec, resources={}):
        """
        Download new task(s) from the server.

        Args:
            gridspec (str): gridspec the pilot was submitted from
            resources (dict): resources available in the pilot

        Returns:
            list: list of task configs
        """
        hostname = functions.gethostname()
        domain = '.'.join(hostname.split('.')[-2:])
        try:
            ifaces = functions.getInterfaces()
        except Exception:
            ifaces = None
        resources = deepcopy(resources)
        if 'gpu' in resources and isinstance(resources['gpu'],list):
            resources['gpu'] = len(resources['gpu'])
        os_type = os.environ['OS_ARCH'] if 'OS_ARCH' in os.environ else None
        if os_type:
            resources['os'] = os_type
        task = await self.rest.request('POST', '/task_actions/process',
                {'gridspec': gridspec,
                 'hostname': hostname, 
                 'domain': domain,
                 'ifaces': ifaces,
                 'requirements': resources,
                })
        if not task:
            return None

        # get config
        try:
            config = await self.rest.request('GET', '/config/{}'.format(task['dataset_id']))
            if not isinstance(config, dataclasses.Job):
                config = dict_to_dataclasses(config)
        except Exception:
            logging.warning('failed to get dataset config for dataset %s', task['dataset_id'])
            await self.task_kill(task['task_id'], dataset_id=task['dataset_id'],
                                 reason='failed to download dataset config')
            raise

        # fill in options
        if 'options' not in config:
            config['options'] = {}
        config['options']['task_id'] = task['task_id']
        config['options']['job_id'] = task['job_id']
        config['options']['dataset_id'] = task['dataset_id']
        config['options']['task'] = task['task_index']
        if 'requirements' in task:
            config['options']['resources'] = {k:task['requirements'][k] for k in Resources.defaults}
        try:
            job = await self.rest.request('GET', '/jobs/{}'.format(task['job_id']))
            config['options']['job'] = job['job_index']
        except Exception:
            logging.warning('failed to get job %s', task['job_id'])
            await self.task_kill(task['task_id'], dataset_id=task['dataset_id'],
                                 reason='failed to download job')
            raise
        try:
            dataset = await self.rest.request('GET', '/datasets/{}'.format(task['dataset_id']))
            config['options']['dataset'] = dataset['dataset']
            config['options']['jobs_submitted'] = dataset['jobs_submitted']
            config['options']['tasks_submitted'] = dataset['tasks_submitted']
            config['options']['debug'] = dataset['debug']
        except Exception:
            logging.warning('failed to get dataset %s', task['dataset_id'])
            await self.task_kill(task['task_id'], dataset_id=task['dataset_id'],
                                 reason='failed to download dataset')
            raise
        return [config]

    async def task_files(self, dataset_id, task_id):
        """
        Get the task files for a dataset and task.

        Args:
            dataset_id (str): dataset_id
            task_id (str): task_id

        Returns:
            list: list of :py:class:`iceprod.core.dataclasses.Data` objects
        """
        ret = await self.rest.request('GET', '/datasets/{}/task_files/{}'.format(dataset_id, task_id))
        data = []
        for r in ret['files']:
            d = dataclasses.Data(r)
            if not d.valid():
                raise Exception('returned Data not valid')
            data.append(d)
        return data

    async def processing(self, task_id):
        """
        Tell the server that we are processing this task.

        Only used for single task config, not for pilots.

        Args:
            task_id (str): task_id to mark as processing
        """
        await self.rest.request('PUT', '/tasks/{}/status'.format(task_id),
                              {'status': 'processing'})

    async def finish_task(self, task_id, dataset_id=None, stats={},
                          stat_filter=None, start_time=None, resources=None):
        """
        Finish a task.

        Args:
            task_id (str): task_id of task
            dataset_id (str): (optional) dataset_id of task
            stats (dict): (optional) task statistics
            stat_filter (iterable): (optional) stat filter by keywords
            start_time (float): (optional) task start time in unix seconds
            resources (dict): (optional) task resource usage
        """
        if stat_filter:
            # filter task stats
            stats = {k:stats[k] for k in stats if k in stat_filter}

        hostname = functions.gethostname()
        domain = '.'.join(hostname.split('.')[-2:])
        if start_time:
            t = time.time() - start_time
        elif resources and 'time' in resources and resources['time']:
            t = int(resources['time']*3600)
        else:
            t = None
        iceprod_stats = {
            'hostname': hostname,
            'domain': domain,
            'time_used': t,
            'task_stats': stats,
            'time': datetime.utcnow().isoformat(),
        }
        if resources:
            iceprod_stats['resources'] = resources
        if dataset_id:
            iceprod_stats['dataset_id'] = dataset_id

        await self.rest.request('POST', '/tasks/{}/task_stats'.format(task_id),
                              iceprod_stats)

        data = {}
        if t:
            data['time_used'] =  t
        await self.rest.request('POST', '/tasks/{}/task_actions/complete'.format(task_id), data)

    async def still_running(self, task_id):
        """
        Check if the task should still be running according to the DB.

        Args:
            task_id (str): task_id of task
        """
        ret = await self.rest.request('GET', '/tasks/{}'.format(task_id))
        if (not ret) or 'status' not in ret or ret['status'] != 'processing':
            raise Exception('task should be stopped')

    async def task_error(self, task_id, dataset_id=None, stats={}, start_time=None, reason=None, resources=None):
        """
        Tell the server about the error experienced

        Args:
            task_id (str): task_id of task
            dataset_id (str): (optional) dataset_id of task
            stats (dict): (optional) task statistics
            start_time (float): (optional) task start time in unix seconds
            reason (str): (optional) one-line summary of error
            resources (dict): (optional) task resource usage
        """
        iceprod_stats = {}
        try:
            hostname = functions.gethostname()
            domain = '.'.join(hostname.split('.')[-2:])
            if start_time:
                t = time.time() - start_time
            elif resources and 'time' in resources and resources['time']:
                t = int(resources['time']*3600)
            else:
                t = None
            iceprod_stats = {
                'task_id': task_id,
                'hostname': hostname,
                'domain': domain,
                'time_used': t,
                'task_stats': json.dumps(stats),
                'time': datetime.utcnow().isoformat(),
                'error_summary': reason if reason else '',
            }
            if dataset_id:
                iceprod_stats['dataset_id'] = dataset_id
            if resources:
                iceprod_stats['resources'] = resources
        except Exception:
            logging.warning('failed to collect error info', exc_info=True)

        try:
            await self.rest.request('POST', '/tasks/{}/task_stats'.format(task_id),
                                    iceprod_stats)
        except Exception:
            logging.warning('failed to post task_stats for %r', task_id, exc_info=True)

        data = {}
        if t:
            data['time_used'] =  t
        if resources:
            data['resources'] = resources
        if reason:
            data['reason'] = reason
        await self.rest.request('POST', '/tasks/{}/task_actions/reset'.format(task_id), data)

    async def task_kill(self, task_id, dataset_id=None, resources=None, reason=None, message=None):
        """
        Tell the server that we killed a task.

        Args:
            task_id (str): task_id of task
            dataset_id (str): (optional) dataset_id of task
            resources (dict): (optional) used resources
            reason (str): (optional) short summary for kill
            message (str): (optional) long message to replace log upload
        """
        if not reason:
            reason = 'killed'
        if not message:
            message = reason
        try:
            hostname = functions.gethostname()
            domain = '.'.join(hostname.split('.')[-2:])
            iceprod_stats = {
                'task_id': task_id,
                'hostname': hostname,
                'domain': domain,
                'time': datetime.utcnow().isoformat(),
                'error_summary': reason if reason else '',
            }
            if dataset_id:
                iceprod_stats['dataset_id'] = dataset_id
            if resources:
                iceprod_stats['resources'] = resources
        except Exception:
            logging.warning('failed to collect error info', exc_info=True)
            iceprod_stats = {}
        try:
            await self.rest.request('POST', '/tasks/{}/task_stats'.format(task_id),
                                    iceprod_stats)
        except Exception:
            logging.warning('failed to post task_stats for %r', task_id, exc_info=True)

        data = {}
        if resources and 'time' in resources and resources['time']:
            data['time_used'] =  resources['time']*3600.
        if resources:
            data['resources'] = resources
        if reason:
            data['reason'] = reason
        else:
            data['data'] = 'task killed'
        await self.rest.request('POST', '/tasks/{}/task_actions/reset'.format(task_id), data)

        data = {'name': 'stdlog', 'task_id': task_id}
        if dataset_id:
            data['dataset_id'] = dataset_id
        if message:
            data['data'] = message
        elif reason:
            data['data'] = reason
        else:
            data['data'] = 'task killed'
        await self.rest.request('POST', '/logs', data)
        data.update({'name':'stdout', 'data': ''})
        await self.rest.request('POST', '/logs', data)
        data.update({'name':'stderr', 'data': ''})
        await self.rest.request('POST', '/logs', data)

    async def _upload_logfile(self, name, filename, task_id=None, dataset_id=None):
        """Upload a log file"""
        data = {'name': name}
        if task_id:
            data['task_id'] = task_id
        if dataset_id:
            data['dataset_id'] = dataset_id
        try:
            with open(filename) as f:
                data['data'] = f.read()
        except Exception as e:
            data['data'] = str(e)
        await self.rest.request('POST', '/logs', data)

    async def uploadLog(self, **kwargs):
        """Upload log file"""
        logging.getLogger().handlers[0].flush()
        await self._upload_logfile('stdlog', os.path.abspath(constants['stdlog']), **kwargs)

    async def uploadErr(self, filename=None, **kwargs):
        """Upload stderr file"""
        if not filename:
            sys.stderr.flush()
            filename = os.path.abspath(constants['stderr'])
        await self._upload_logfile('stderr', filename, **kwargs)

    async def uploadOut(self, filename=None, **kwargs):
        """Upload stdout file"""
        if not filename:
            sys.stdout.flush()
            filename = os.path.abspath(constants['stdout'])
        await self._upload_logfile('stdout', filename, **kwargs)

    async def create_pilot(self, **kwargs):
        """
        Create an entry in the pilot table.

        Args:
            **kwargs: passed through to rest function
        Returns:
            str: pilot id
        """
        ret = await self.rest.request('POST', '/pilots', kwargs)
        return ret['result']

    async def update_pilot(self, pilot_id, **kwargs):
        """
        Update the pilot table.

        Args:
            pilot_id (str): pilot id
            **kwargs: passed through to rest function
        """
        await self.rest.request('PATCH', '/pilots/{}'.format(pilot_id), kwargs)

    async def delete_pilot(self, pilot_id, **kwargs):
        """
        Delete the pilot.

        Args:
            pilot_id (str): pilot id
        """
        await self.rest.request('DELETE', '/pilots/{}'.format(pilot_id))


    # --- synchronous versions to be used from a signal handler
    # --- or other non-async code

    def task_kill_sync(self, task_id, dataset_id=None, resources=None, reason=None, message=None):
        """
        Tell the server that we killed a task (synchronous version).

        Args:
            task_id (str): task_id of task
            dataset_id (str): (optional) dataset_id of task
            resources (dict): (optional) used resources
            reason (str): (optional) short summary for kill
            message (str): (optional) long message to replace log upload
        """
        if not reason:
            reason = 'killed'
        if not message:
            message = reason
        try:
            hostname = functions.gethostname()
            domain = '.'.join(hostname.split('.')[-2:])
            iceprod_stats = {
                'task_id': task_id,
                'hostname': hostname,
                'domain': domain,
                'time': datetime.utcnow().isoformat(),
                'error_summary': reason if reason else '',
            }
            if dataset_id:
                iceprod_stats['dataset_id'] = dataset_id
            if resources:
                iceprod_stats['resources'] = resources
        except Exception:
            logging.warning('failed to collect error info', exc_info=True)
            iceprod_stats = {}
        try:
            self.rest.request_seq('POST', '/tasks/{}/task_stats'.format(task_id),
                                  iceprod_stats)
        except Exception:
            logging.warning('failed to post task_stats for %r', task_id, exc_info=True)

        data = {}
        if resources and 'time' in resources and resources['time']:
            data['time_used'] =  resources['time']*3600.
        if resources:
            data['resources'] = resources
        if reason:
            data['reason'] = reason
        else:
            data['data'] = 'task killed'
        self.rest.request_seq('POST', '/tasks/{}/task_actions/reset'.format(task_id), data)

        data = {'name': 'stdlog', 'task_id': task_id}
        if dataset_id:
            data['dataset_id'] = dataset_id
        if message:
            data['data'] = message
        elif reason:
            data['data'] = reason
        else:
            data['data'] = 'task killed'
        self.rest.request_seq('POST', '/logs', data)
        data.update({'name':'stdout', 'data': ''})
        self.rest.request_seq('POST', '/logs', data)
        data.update({'name':'stderr', 'data': ''})
        self.rest.request_seq('POST', '/logs', data)

    def update_pilot_sync(self, pilot_id, **kwargs):
        """
        Update the pilot table (synchronous version).

        Args:
            pilot_id (str): pilot id
            **kwargs: passed through to rpc function
        """
        self.rest.request_seq('PATCH', '/pilots/{}'.format(pilot_id), kwargs)

    def delete_pilot_sync(self, pilot_id, **kwargs):
        """
        Delete the pilot (synchronous version).

        Args:
            pilot_id (str): pilot id
        """
        self.rest.request_seq('DELETE', '/pilots/{}'.format(pilot_id))
Beispiel #48
0
    def test_40_simple_query(self):
        self.start_server()
        token = self.get_token()
        r = RestClient(self.address, token, timeout=1, retries=1)

        metadata = {
            u'logical_name': u'blah',
            u'checksum': {u'sha512':hex('foo bar')},
            u'file_size': 1,
            u'locations': [{u'site':u'test',u'path':u'blah.dat'}],
            u'processing_level':u'level2',
            u'run_number':12345,
            u'first_event':345,
            u'last_event':456,
            u'iceprod':{
                u'dataset':23453,
            },
            u'offline':{
                u'season':2017,
            },
        }
        metadata2 = {
            u'logical_name': u'blah2',
            u'checksum': {u'sha512':hex('foo bar baz')},
            u'file_size': 2,
            u'locations': [{u'site':u'test',u'path':u'blah2.dat'}],
            u'processing_level':u'level2',
            r'run_number':12356,
            u'first_event':578,
            u'last_event':698,
            u'iceprod':{
                u'dataset':23454,
            },
            u'offline':{
                u'season':2017,
            },
        }
        data = r.request_seq('POST', '/api/files', metadata)
        url = data['file']
        uid = url.split('/')[-1]
        data = r.request_seq('POST', '/api/files', metadata2)
        url2 = data['file']
        uid2 = url2.split('/')[-1]

        data = r.request_seq('GET', '/api/files')
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 2)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'processing_level':'level2'})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 2)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'run_number':12345})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertFalse(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'dataset':23454})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertFalse(any(uid == f['uuid'] for f in data['files']))
        self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'event_id':400})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertFalse(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'season':2017})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 2)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertTrue(any(uid2 == f['uuid'] for f in data['files']))

        data = r.request_seq('GET', '/api/files', {'event_id':400, 'keys':'|'.join(['checksum','file_size','uuid'])})
        self.assertIn('_links', data)
        self.assertIn('self', data['_links'])
        self.assertIn('files', data)
        self.assertEqual(len(data['files']), 1)
        self.assertTrue(any(uid == f['uuid'] for f in data['files']))
        self.assertFalse(any(uid2 == f['uuid'] for f in data['files']))
        self.assertIn('checksum', data['files'][0])
        self.assertIn('file_size', data['files'][0])
Beispiel #49
0
 def __init__(self, url, passkey, config, **kwargs):
     self.rest = RestClient(address=url,token=passkey,**kwargs)