Ejemplo n.º 1
1
class NewtIntegrationTest(BaseIntegrationTest):
    def __init__(self, name, girder_url, girder_user, girder_password, machine, job_timeout=60 * 5):
        super(NewtIntegrationTest, self).__init__(name, girder_url, girder_user, girder_password, job_timeout)
        self._cluster_id = None
        self._machine = machine

    def setUp(self):

        # First authenticate with NEWT
        self._session = Session()
        r = self._session.post(
            "https://newt.nersc.gov/newt/auth", {"username": self._girder_user, "password": self._girder_password}
        )

        self.assertEqual(r.status_code, 200)
        print r.json()
        self._newt_session_id = r.json()["newt_sessionid"]

        # Now authenticate with Girder using the session id
        url = "%s/api/v1/newt/authenticate/%s" % (self._girder_url, self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = "%s/api/v1/newt/authenticate/%s" % (self._girder_url, self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = "%s/api/v1" % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.token = self._session.cookies["girderToken"]

        user = self._client.get("user/me")
        self._user_id = user["_id"]
        r = self._client.listFolder(self._user_id, "user", name="Private")
        r = list(r)
        self.assertEqual(len(r), 1)
        self._private_folder_id = r[0]["_id"]

    def tearDown(self):
        super(NewtIntegrationTest, self).tearDown()
        if self._cluster_id:
            try:
                url = "clusters/%s" % self._cluster_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

    def create_cluster(self):
        body = {"config": {"host": self._machine}, "name": "NewtIntegrationTest", "type": "newt"}

        r = self._client.post("clusters", data=json.dumps(body))
        self._cluster_id = r["_id"]

        # Now test the connection
        r = self._client.put("clusters/%s/start" % self._cluster_id)
        sleeps = 0
        while True:
            time.sleep(1)
            r = self._client.get("clusters/%s/status" % self._cluster_id)

            if r["status"] == "running":
                break
            elif r["status"] == "error":
                r = self._client.get("clusters/%s/log" % self._cluster_id)
                self.fail(str(r))

            if sleeps > 9:
                self.fail("Cluster never moved into running state")
            sleeps += 1

    def assert_output(self):
        r = self._client.listItem(self._output_folder_id)
        self.assertEqual(len(r), 4)

        stdout_item = None
        for i in r:
            if i["name"].startswith("CumulusIntegrationTestJob-%s.o" % self._job_id):
                stdout_item = i
                break

        self.assertIsNotNone(stdout_item)
        r = self._client.get("item/%s/files" % i["_id"])
        self.assertEqual(len(r), 1)

        url = "%s/api/v1/file/%s/download" % (self._girder_url, r[0]["_id"])
        r = self._session.get(url)
        self.assertEqual(r.content, self._data)

    def test(self):
        try:
            self.create_cluster()
            self.create_script()
            self.create_input()
            self.create_output_folder()
            self.create_job()
            self.submit_job(timeout=self._job_timeout)
            self.assert_output()
        except HttpError as error:
            self.fail(error.responseText)
Ejemplo n.º 2
0
def test_upgrade_pipelines(admin_client: GirderClient):
    cnf = admin_client.get('dive_configuration/pipelines')
    if 'detector' not in cnf:
        admin_client.post(
            'dive_configuration/upgrade_pipelines',
            data=json.dumps(tasks.UPGRADE_JOB_DEFAULT_URLS),
        )
    wait_for_jobs(admin_client, 1000)
Ejemplo n.º 3
0
    def _update_girder(taskflow, body):
        taskflow = to_taskflow(taskflow)
        taskflow_id = taskflow['id']
        girder_token = taskflow['girder_token']
        girder_api_url = taskflow['girder_api_url']

        client = GirderClient(apiUrl=girder_api_url)
        client.token = girder_token

        client = _create_girder_client(girder_api_url, girder_token)

        # If this is a retry then we have already create a task get it from
        # the current tasks headers.
        if body['retries'] > 0:
            taskflow_task_id = \
                current_task.request.headers[TASKFLOW_TASK_ID_HEADER]

            # Celery always fires the postrun handler with a state of SUCCESS
            # for retries. So we need to save the retries here so we can
            # determine in the postrun handler if the task is really complete.
            current_task.request.headers[TASKFLOW_RETRY_HEADER] \
                = body['retries']
        else:
                # This is a new task so create a taskflow task instance
            body = {
                'celeryTaskId': body['id'],
                'name': body['task']
            }
            url = 'taskflows/%s/tasks' % taskflow_id
            r = client.post(url, data=json.dumps(body))
            taskflow_task_id = r['_id']
        return taskflow, taskflow_task_id
Ejemplo n.º 4
0
    def _update_girder(taskflow, body):
        taskflow = to_taskflow(taskflow)
        taskflow_id = taskflow['id']
        girder_token = taskflow['girder_token']
        girder_api_url = taskflow['girder_api_url']

        client = GirderClient(apiUrl=girder_api_url)
        client.token = girder_token

        client = _create_girder_client(girder_api_url, girder_token)

        # If this is a retry then we have already create a task get it from
        # the current tasks headers.
        if body['retries'] > 0:
            taskflow_task_id = \
                current_task.request.headers[TASKFLOW_TASK_ID_HEADER]

            # Celery always fires the postrun handler with a state of SUCCESS
            # for retries. So we need to save the retries here so we can
            # determine in the postrun handler if the task is really complete.
            current_task.request.headers[TASKFLOW_RETRY_HEADER] \
                = body['retries']
        else:
            # This is a new task so create a taskflow task instance
            body = {'celeryTaskId': body['id'], 'name': body['task']}
            url = 'taskflows/%s/tasks' % taskflow_id
            r = client.post(url, data=json.dumps(body))
            taskflow_task_id = r['_id']
        return taskflow, taskflow_task_id
Ejemplo n.º 5
0
def test_upload_zip_data(dataset: dict):
    user = zipUser
    client = GirderClient(apiUrl='http://localhost:8010/api/v1')
    client.authenticate(username=user['login'], password=user['password'])

    dsPath = localDataRoot / str(dataset['path'])
    privateFolder = getTestFolder(client)
    newDatasetFolder = client.createFolder(
        privateFolder['_id'],
        dataset['name'],
        metadata={
            'fps': dataset['fps'],
            'type': dataset['type'],
        },
    )
    if Path(dsPath).is_file():
        client.uploadFileToFolder(newDatasetFolder['_id'], str(dsPath))
    client.post(f'dive_rpc/postprocess/{newDatasetFolder["_id"]}')
    wait_for_jobs(client,
                  max_wait_timeout=30,
                  expected_status=dataset['job_status'])

    resultFolder = client.getFolder(newDatasetFolder['_id'])
    # verify sub datasets if they exist
    if dataset.get('subDatasets', False):
        folders = list(client.listFolder(newDatasetFolder['_id']))
        for item in dataset["subDatasets"]:
            matches = [x for x in folders if x["name"] == item["name"]]
            if len(matches) > 0:
                meta = matches[0].get("meta", {})
                assert meta.get("fps", -1) == item["fps"]
                assert meta.get("type", "") == item["type"]
                assert meta.get("annotate", False)
    elif dataset['job_status'] == JobStatus.SUCCESS:
        assert resultFolder['meta'].get("annotate", False)
        assert type(resultFolder['meta'].get("fps")) in [int, float]
        assert type(resultFolder['meta'].get("type")) == str
    else:
        assert resultFolder['meta'].get("annotate", None) is None
class NewtIntegrationTest(BaseIntegrationTest):
    def __init__(self,
                 name,
                 girder_url,
                 girder_user,
                 girder_password,
                 machine,
                 job_timeout=60 * 5):
        super(NewtIntegrationTest,
              self).__init__(name, girder_url, girder_user, girder_password,
                             job_timeout)
        self._cluster_id = None
        self._machine = machine

    def setUp(self):

        # First authenticate with NEWT
        self._session = Session()
        r = self._session.post('https://newt.nersc.gov/newt/auth', {
            'username': self._girder_user,
            'password': self._girder_password
        })

        self.assertEqual(r.status_code, 200)
        print r.json()
        self._newt_session_id = r.json()['newt_sessionid']

        # Now authenticate with Girder using the session id
        url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url,
                                                  self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url,
                                                  self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = '%s/api/v1' % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.token = self._session.cookies['girderToken']

        user = self._client.get('user/me')
        self._user_id = user['_id']
        r = self._client.listFolder(self._user_id, 'user', name='Private')
        r = list(r)
        self.assertEqual(len(r), 1)
        self._private_folder_id = r[0]['_id']

    def tearDown(self):
        super(NewtIntegrationTest, self).tearDown()
        if self._cluster_id:
            try:
                url = 'clusters/%s' % self._cluster_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

    def create_cluster(self):
        body = {
            'config': {
                'host': self._machine
            },
            'name': 'NewtIntegrationTest',
            'type': 'newt'
        }

        r = self._client.post('clusters', data=json.dumps(body))
        self._cluster_id = r['_id']

        # Now test the connection
        r = self._client.put('clusters/%s/start' % self._cluster_id)
        sleeps = 0
        while True:
            time.sleep(1)
            r = self._client.get('clusters/%s/status' % self._cluster_id)

            if r['status'] == 'running':
                break
            elif r['status'] == 'error':
                r = self._client.get('clusters/%s/log' % self._cluster_id)
                self.fail(str(r))

            if sleeps > 9:
                self.fail('Cluster never moved into running state')
            sleeps += 1

    def assert_output(self):
        r = self._client.listItem(self._output_folder_id)
        self.assertEqual(len(r), 4)

        stdout_item = None
        for i in r:
            if i['name'].startswith('CumulusIntegrationTestJob-%s.o' %
                                    self._job_id):
                stdout_item = i
                break

        self.assertIsNotNone(stdout_item)
        r = self._client.get('item/%s/files' % i['_id'])
        self.assertEqual(len(r), 1)

        url = '%s/api/v1/file/%s/download' % (self._girder_url, r[0]['_id'])
        r = self._session.get(url)
        self.assertEqual(r.content, self._data)

    def test(self):
        try:
            self.create_cluster()
            self.create_script()
            self.create_input()
            self.create_output_folder()
            self.create_job()
            self.submit_job(timeout=self._job_timeout)
            self.assert_output()
        except HttpError as error:
            self.fail(error.responseText)
Ejemplo n.º 7
0
def upload_exported_zipped_dataset(
    gc: GirderClient,
    manager: JobManager,
    folderId: str,
    working_directory: Path,
    create_subfolder='',
):
    """Uploads a folder that is generated from the export of a zip file and sets metadata"""
    listOfFileNames = os.listdir(working_directory)
    potential_meta_files = list(
        filter(constants.metaRegex.match, listOfFileNames))
    if len(potential_meta_files) == 0:
        manager.write(
            "Could not find meta.json or config.json file within the subdirectroy\n"
        )
        return
    print(listOfFileNames)
    # load meta.json to get datatype and verify list of files
    meta = {}
    for meta_name in potential_meta_files:
        with open(f"{working_directory}/{meta_name}") as f:
            meta = json.load(f)
    type = meta[constants.TypeMarker]
    if type == constants.ImageSequenceType:
        imageData = meta['imageData']
        for image in imageData:
            if image["filename"] not in listOfFileNames:
                manager.write(
                    "Could not find {item['filename']} file within the list of files\n"
                )
                return
    elif type == constants.VideoType:
        video = meta["video"]
        if video["filename"] not in listOfFileNames:
            manager.write(
                "Could not find {item['filename']} file within the list of files\n"
            )
            return
    # remove the auxilary directory so we don't have to tag them all
    if constants.AuxiliaryFolderName in listOfFileNames and os.path.isdir(
            f'{working_directory}/{constants.AuxiliaryFolderName}'):
        shutil.rmtree(f'{working_directory}/{constants.AuxiliaryFolderName}')
    root_folderId = folderId
    if create_subfolder != '':
        sub_folder = gc.createFolder(
            folderId,
            create_subfolder,
            reuseExisting=True,
        )
        root_folderId = str(sub_folder['_id'])
        manager.updateStatus(JobStatus.PUSHING_OUTPUT)
        # create a source folder to place the zipFile inside of
    gc.upload(f'{working_directory}/*', root_folderId)
    # Now we set all the metadata for the folders and items
    all_files = list(gc.listItem(root_folderId))
    root_meta = {
        "type": type,
        "attributes": meta.get("attributes", None),
        "customTypeStyling": meta.get("customTypeStyling", None),
        "confidenceFilters": meta.get("confidenceFilters", None),
        "fps": meta["fps"],
        "version": meta["version"],
    }
    if type == constants.VideoType:
        # set transcoded and non-transcoded versions
        transcoded_video = list(
            gc.listItem(root_folderId, name=video["filename"]))
        if len(transcoded_video) == 1:
            ffprobe = meta["ffprobe_info"]
            avgFpsString = ffprobe["avg_frame_rate"]
            dividend, divisor = [int(v) for v in avgFpsString.split('/')]
            originalFps = dividend / divisor

            transcoded_metadata = {
                "codec": "h264",
                "originalFps": originalFps,
                "originalFpsString": avgFpsString,
                "source_video": False,
                "transcoder": "ffmpeg",
            }
            gc.addMetadataToItem(str(transcoded_video[0]['_id']),
                                 transcoded_metadata)
            # other video is tagged as the source video
            for item in all_files:
                if (item["name"].endswith(tuple(constants.validVideoFormats))
                        and item["name"] != video["filename"]):
                    source_metadata = {
                        "codec": ffprobe["codec_name"],
                        "originalFps": originalFps,
                        "originalFpsString": avgFpsString,
                        "source_video": False,
                    }
                    gc.addMetadataToItem(str(item['_id']), source_metadata)
            root_meta["originalFps"] = originalFps
            root_meta["originalFpsString"] = avgFpsString

    # Need to tag folder Level data (annotate, and others)
    root_meta[constants.DatasetMarker] = True
    gc.addMetadataToFolder(root_folderId, root_meta)
    gc.post(f'dive_rpc/postprocess/{root_folderId}', data={"skipJobs": True})
Ejemplo n.º 8
0
        offset += limit

    return result


client = GirderClient(host='localhost', port=8080)

if find_user('girder'):
    client.authenticate('girder', 'girder')

ensure_user(client,
            login='******',
            password='******',
            email='*****@*****.**',
            firstName='girder',
            lastName='girder')

client.authenticate('girder', 'girder')

if find_assetstore('local') is None:
    client.post('assetstore',
                parameters=dict(name='local',
                                type=str(AssetstoreType.GRIDFS),
                                db='sumoLocalStore',
                                mongohost='mongodb://localhost:27017',
                                replicaset=''))

client.put('system/plugins',
           parameters=dict(plugins=json.dumps(['jobs', 'worker', 'osumo'])))
Ejemplo n.º 9
0
class NewtIntegrationTest(BaseIntegrationTest):

    def __init__(self, name, girder_url, girder_user, girder_password, machine,
                 job_timeout=60*5):
        super(NewtIntegrationTest, self).__init__(name, girder_url, girder_user,
                                                  girder_password, job_timeout)
        self._cluster_id = None
        self._machine = machine

    def setUp(self):

        # First authenticate with NEWT
        self._session = Session()
        r = self._session.post('https://newt.nersc.gov/newt/auth',
                               {
                                    'username': self._girder_user,
                                    'password': self._girder_password})

        self.assertEqual(r.status_code, 200)
        print r.json()
        self._newt_session_id = r.json()['newt_sessionid']

        # Now authenticate with Girder using the session id
        url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url, self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url, self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = '%s/api/v1' % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.token = self._session.cookies['girderToken']

        user = self._client.get('user/me')
        self._user_id = user['_id']
        r = self._client.listFolder(self._user_id, 'user', name='Private')
        self.assertEqual(len(r), 1)
        self._private_folder_id = r[0]['_id']

    def tearDown(self):
        super(NewtIntegrationTest, self).tearDown()
        if self._cluster_id:
            try:
                url = 'clusters/%s' % self._cluster_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

    def create_cluster(self):
        body = {
            'config': {
                'host': self._machine
            },
            'name': 'NewtIntegrationTest',
            'type': 'newt'
        }

        r = self._client.post('clusters', data=json.dumps(body))
        self._cluster_id = r['_id']

        # Now test the connection
        r = self._client.put('clusters/%s/start' % self._cluster_id)
        sleeps = 0
        while True:
            time.sleep(1)
            r = self._client.get('clusters/%s/status' % self._cluster_id)

            if r['status'] == 'running':
                break
            elif r['status'] == 'error':
                r = self._client.get('clusters/%s/log' % self._cluster_id)
                self.fail(str(r))

            if sleeps > 9:
                self.fail('Cluster never moved into running state')
            sleeps += 1

    def assert_output(self):
        r = self._client.listItem(self._output_folder_id)
        self.assertEqual(len(r), 4)

        stdout_item = None
        for i in r:
            if i['name'].startswith('CumulusIntegrationTestJob-%s.o' % self._job_id):
                stdout_item = i
                break

        self.assertIsNotNone(stdout_item)
        r = self._client.get('item/%s/files' % i['_id'])
        self.assertEqual(len(r), 1)

        url =   '%s/api/v1/file/%s/download' % (self._girder_url, r[0]['_id'])
        r = self._session.get(url)
        self.assertEqual(r.content, self._data)


    def test(self):
        try:
            self.create_cluster()
            self.create_script()
            self.create_input()
            self.create_output_folder()
            self.create_job()
            self.submit_job(timeout=self._job_timeout)
            self.assert_output()
        except HttpError as error:
            self.fail(error.responseText)
Ejemplo n.º 10
0
ensure_user(client,
            login=user,
            password=password,
            email='*****@*****.**',
            firstName='Girder',
            lastName='Admin')

client.authenticate(user, password)

s3_assetstore_name = 's3'

if find_assetstore(s3_assetstore_name) is None:
    client.post('assetstore',
                parameters=dict(name=s3_assetstore_name,
                                type=str(AssetstoreType.S3),
                                bucket=args.s3,
                                accessKeyId=args.aws_key_id,
                                secret=args.aws_secret_key))

client.put('system/plugins',
           parameters=dict(plugins=json.dumps(['jobs', 'worker', 'osumo'])))
client.put('system/restart')

sleep(30)

client.put(
    'system/setting',
    parameters=dict(list=json.dumps([
        dict(key='worker.broker', value=args.broker),
        dict(key='worker.backend', value=args.broker),
        dict(key='core.route_table',
Ejemplo n.º 11
0
class CumulusClient():
    '''Application interface to cumulus-based client for HPC systems
  supporting NEWT API.

  Note: the methods must be called in a specific order!
    create_cluster()
    create_omega3p_script()
    create_job()
    upload_inputs()
    submit_job()

  Then optionally:
    monitor_job()
    download_results()
    release_resources()
  '''

    # ---------------------------------------------------------------------
    def __init__(self, girder_url, newt_sessionid):
        '''
    '''
        self._client = None
        self._cluster_id = None
        self._girder_url = girder_url
        self._input_folder_id = None
        self._job_folder_id = None
        self._job_id = None
        self._output_folder_id = None
        self._private_folder_id = None
        self._script_id = None
        self._session = requests.Session()

        # Authenticate with Girder using the newt session id
        url = '%s/api/v1/newt/authenticate/%s' % \
          (self._girder_url, newt_sessionid)
        r = self._session.put(url)
        if r.status_code != 200:
            raise HttpError(r.status_code, r.text, r.url, r.request.method)

        # Instantiate Girder client
        url = '%s/api/v1' % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.token = self._session.cookies['girderToken']

        user = self._client.get('user/me')
        #print 'user', user
        user_id = user['_id']
        r = self._client.listFolder(user_id, 'user', name='Private')
        if len(r) != 1:
            raise Exception('Wrong number of users; should be 1 got %s' %
                            len(r))
        self._private_folder_id = r[0]['_id']
        print 'private_folder_id', self._private_folder_id

    # ---------------------------------------------------------------------
    def job_id(self):
        '''Returns current job id (which may be None)
    '''
        return self._job_id

    # ---------------------------------------------------------------------
    def create_cluster(self, machine_name, cluster_name=None):
        '''
    '''
        if cluster_name is None:
            user = self._client.get('user/me')
            user_name = user.get('firstName', 'user')
            cluster_name = '%s.%s' % (machine_name, user_name)

        cluster = None
        cluster_list = self._client.get('clusters')
        for extant_cluster in cluster_list:
            if extant_cluster['name'] == cluster_name:
                cluster = extant_cluster
                self._cluster_id = extant_cluster['_id']
                break

        if not cluster:
            body = {
                'config': {
                    'host': machine_name
                },
                'name': cluster_name,
                'type': 'newt'
            }

            r = self._client.post('clusters', data=json.dumps(body))
            self._cluster_id = r['_id']
            print 'cluster_id', self._cluster_id

        # Reset the state of the cluster
        body = {'status': 'created'}
        r = self._client.patch('clusters/%s' % self._cluster_id,
                               data=json.dumps(body))

        # Now test the connection
        r = self._client.put('clusters/%s/start' % self._cluster_id)
        sleeps = 0
        while True:
            time.sleep(1)
            r = self._client.get('clusters/%s/status' % self._cluster_id)

            if r['status'] == 'running':
                break
            elif r['status'] == 'error':
                r = self._client.get('clusters/%s/log' % self._cluster_id)
                print r
                raise Exception('ERROR creating cluster')

            if sleeps > 9:
                raise Exception('Cluster never moved into running state')
            sleeps += 1

    # ---------------------------------------------------------------------
    def create_omega3p_script(self,
                              omega3p_filename,
                              name=None,
                              number_of_tasks=1):
        '''Creates script to submit omega3p job
    '''
        command = 'srun -n %s /project/projectdirs/ace3p/{{machine}}/omega3p %s' % \
          (number_of_tasks, omega3p_filename)
        if name is None:
            name = omega3p_filename
        body = {'commands': [command], 'name': name}
        r = self._client.post('scripts', data=json.dumps(body))
        self._script_id = r['_id']
        print 'script_id', self._script_id

    # ---------------------------------------------------------------------
    def create_input(self, input_paths, folder_name='input_files'):
        '''DEPRECATED Uploads input files
    '''
        folder_id = self.get_folder(self._private_folder_id, folder_name)
        if folder_id is None:
            return
        print 'input_folder_id', folder_id
        self._input_folder_id = folder_id

        def upload_file(path):
            name = os.path.basename(path)
            size = os.path.getsize(path)
            with open(path, 'rb') as fp:
                self._client.uploadFile(self._input_folder_id,
                                        fp,
                                        name,
                                        size,
                                        parentType='folder')

        for input_path in input_paths:
            if not input_path or not os.path.exists(input_path):
                raise Exception('Input file not found: %s' % input_path)
            upload_file(input_path)

    # ---------------------------------------------------------------------
    def create_output_folder(self, folder_name='output_files'):
        '''DEPRECATED
    '''
        folder_id = self.get_folder(self._private_folder_id, folder_name)
        print 'output_folder_id', folder_id
        self._output_folder_id = folder_id

    # ---------------------------------------------------------------------
    def create_job(self, job_name, tail=None):
        '''
    '''
        # Create job folders
        folder_name = uuid.uuid4().hex  # unique name
        self._job_folder_id = self.get_folder(self._private_folder_id,
                                              folder_name)
        print 'Created job folder', folder_name
        self._input_folder_id = self.get_folder(self._job_folder_id,
                                                'input_files')
        self._output_folder_id = self.get_folder(self._job_folder_id,
                                                 'output_files')

        # Make sure job_name isn't null
        if not job_name:
            job_name = 'CumulusJob'

        # Create job spec
        body = {
            'name': job_name,
            'scriptId': self._script_id,
            'output': [{
                'folderId': self._output_folder_id,
                'path': '.'
            }],
            'input': [{
                'folderId': self._input_folder_id,
                'path': '.'
            }]
        }

        if tail:
            body['output'].append({"path": tail, "tail": True})

        job = self._client.post('jobs', data=json.dumps(body))
        self._job_id = job['_id']
        print 'Created job_id', self._job_id

    # ---------------------------------------------------------------------
    def upload_inputs(self, input_paths):
        '''Uploads input files to input folder
    '''
        if not self._input_folder_id:
            raise Exception('Input folder missing')

        def upload_file(path):
            name = os.path.basename(path)
            size = os.path.getsize(path)
            with open(path, 'rb') as fp:
                self._client.uploadFile(self._input_folder_id,
                                        fp,
                                        name,
                                        size,
                                        parentType='folder')

        for input_path in input_paths:
            if not input_path or not os.path.exists(input_path):
                raise Exception('Input file not found: %s' % input_path)
            upload_file(input_path)

    # ---------------------------------------------------------------------
    def submit_job(self,
                   machine,
                   project_account,
                   timeout_minutes,
                   queue='debug',
                   qos=None,
                   number_of_nodes=1,
                   job_output_dir=None):
        '''
    '''
        body = {
            'machine': machine,
            'account': project_account,
            'numberOfNodes': number_of_nodes,
            'maxWallTime': {
                'hours': 0,
                'minutes': timeout_minutes,
                'seconds': 0
            },
            'queue': queue,
        }
        if qos:
            body['qualityOfService'] = qos
        #print 'jobOutputDir', job_output_dir
        if job_output_dir:
            body['jobOutputDir'] = job_output_dir
            print 'Setting jobOutputDir', job_output_dir
        url = 'clusters/%s/job/%s/submit' % (self._cluster_id, self._job_id)
        self._client.put(url, data=json.dumps(body))
        print 'Submitted job', self._job_id

    # ---------------------------------------------------------------------
    def monitor_job(self, tail=None):
        '''Periodically monitors job status
    '''
        log_offset = 0
        job_timeout = 60 * timeout_minutes
        start = time.time()
        while True:
            time.sleep(2)

            # Provide some feedback at startup
            if log_offset == 0:
                sys.stdout.write('.')

            #print 'Checking status'
            r = self._client.get('jobs/%s' % self._job_id)
            #print r

            if r['status'] in ['error', 'unexpectederror']:
                r = self._client.get('jobs/%s/log' % self._job_id)
                raise Exception(str(r))
            elif r['status'] == 'complete':
                break

            # Tail log file
            if tail:
                params = {'offset': log_offset, 'path': tail}
                #print 'Checking tail'
                r = self._client.get('jobs/%s/output' % self._job_id,
                                     parameters=params)
                #print r
                output = r['content']

                if output and log_offset == 0:
                    print  # end the user feedback dots

                log_offset += len(output)

                for l in output:
                    print l

            sys.stdout.flush()

            if time.time() - start > job_timeout:
                raise Exception('Job timeout')

    # ---------------------------------------------------------------------
    def download_results(self, destination_folder):
        '''Downloads all output files to a local directory

    '''
        if not os.path.exists(destination_folder):
            os.makedirs(destination_folder)

        self._client.downloadFolderRecursive(self._output_folder_id,
                                             destination_folder)

        print 'Downloaded files to %s' % destination_folder

    # ---------------------------------------------------------------------
    def release_resources(self):
        '''Closes/deletes any current resources

    '''
        resource_info = {
            'clusters': [self._cluster_id],
            'jobs': [self._job_id],
            'scripts': [self._script_id],
            'folder': [self._job_folder]
        }
        for resource_type, id_list in resource_info.items():
            for resource_id in id_list:
                if resource_id is not None:
                    url = '%s/%s' % (resource_type, resource_id)
                    self._client.delete(url)

        self._input_folder_id = None
        self._job_folder_id = None
        self._job_id = None
        self._output_folder_id = None
        self._script_id = None

    # ---------------------------------------------------------------------
    def get_folder(self, parent_id, name):
        '''Returns folder_id, creating one if needed
    '''
        # Check if folder already exists
        folder_list = self._client.listFolder(parent_id, name=name)
        if folder_list:
            folder = folder_list[0]
            #print 'found folder %s: %s' % (name, str(folder))
            return folder['_id']

        # (else)
        try:
            r = self._client.createFolder(parent_id, name)
            return r['_id']
        except HttpError as e:
            print e.responseText

        return None
Ejemplo n.º 12
0
try:
    # Now download the dataset
    (fd, filepath) = tempfile.mkstemp()
    os.close(fd)
    client.downloadFile(fileId, filepath)

    # Create temp file and convert to GeoJs contour JSON format
    output_dir = tempfile.mkdtemp()
    output_filepath = os.path.join(output_dir, output_file_name)
    with open(output_filepath, 'w') as fp:
        fp.write(json_util.dumps(convert(filepath, variable, timestep)))

    # Create an item for this file
    output_item = client.createItem(dataset_folder_id, output_file_name, output_file_name)

    # Now upload the result
    client.uploadFileToItem(output_item['_id'], output_filepath)

    output_item_id = output_item['_id']

    # Finally promote item to dataset
    client.post('minerva_dataset/%s/dataset' % output_item_id)

finally:
    if filepath and os.path.exists(filepath):
        os.remove(filepath)
    if output_dir and os.path.exists(output_dir):
        shutil.rmtree(output_dir)


Ejemplo n.º 13
0
def create_task_job(job_defaults,
                    sender=None,
                    body=None,
                    exchange=None,
                    routing_key=None,
                    headers=None,
                    properties=None,
                    declare=None,
                    retry_policy=None,
                    **kwargs):
    parent_task = current_app.current_task
    try:
        if parent_task is None:
            raise MissingJobArguments('Parent task is None')
        if parent_task.request is None:
            raise MissingJobArguments("Parent task's request is None")
        if not hasattr(parent_task.request, 'girder_api_url'):
            raise MissingJobArguments(
                "Parent task's request does not contain girder_api_url")
        if not hasattr(parent_task.request, 'girder_client_token'):
            raise MissingJobArguments(
                "Parent task's request does not contain girder_client_token")
        if not hasattr(parent_task.request, 'id'):
            raise MissingJobArguments(
                "Parent task's request does not contain id")
        if 'id' not in headers:
            raise MissingJobArguments('id is not in headers')

        gc = GirderClient(apiUrl=parent_task.request.girder_api_url)
        gc.token = parent_task.request.girder_client_token

        task_args = tuple(_walk_obj(body[0], _maybe_model_repr))
        task_kwargs = _walk_obj(body[1], _maybe_model_repr)
        parameters = {
            'title':
            headers.pop('girder_job_title',
                        job_defaults.get('girder_job_title', '')),
            'type':
            headers.pop('girder_job_type',
                        job_defaults.get('girder_job_type', '')),
            'handler':
            headers.pop('girder_job_handler',
                        job_defaults.get('girder_job_handler', '')),
            'public':
            headers.pop('girder_job_public',
                        job_defaults.get('girder_job_public', '')),
            'args':
            json.dumps(task_args),
            'kwargs':
            task_kwargs,
            'otherFields':
            json.dumps(
                dict(celeryTaskId=headers['id'],
                     celeryParentTaskId=parent_task.request.id,
                     **headers.pop(
                         'girder_job_other_fields',
                         job_defaults.get('girder_job_other_fields', ''))))
        }

        try:
            response = gc.post('job', parameters=parameters, jsonResp=False)
            if response.ok:
                headers['jobInfoSpec'] = response.json().get('jobInfoSpec')
        except requests.exceptions.RequestException as e:
            logger.warn('Failed to post job: {}'.format(e))

    except MissingJobArguments as e:
        logger.warn('Girder job not created: {}'.format(str(e)))
Ejemplo n.º 14
0
            "email": "*****@*****.**",
            "firstName": "Girder",
            "lastName": "Admin"
        })
    c.authenticate('girder', 'girder')

# Create a tangelo hub collection if there isn't one
coll_search = c.get('resource/search',
                    parameters={
                        'q': 'Default',
                        'types': '["collection"]'
                    })
if len(coll_search["collection"]) == 0:
    collection = c.post('collection',
                        parameters={
                            'name': 'Default',
                            'description': 'Default workspace',
                            'public': 'true'
                        })
    c.post('folder',
           parameters={
               'parentType': 'collection',
               'parentId': collection['_id'],
               'name': 'Data',
               'description': 'Data Folder',
               'public': 'true'
           })
    c.post('folder',
           parameters={
               'parentType': 'collection',
               'parentId': collection['_id'],
               'name': 'Analyses',
Ejemplo n.º 15
0
        "user",
        {
            "login": "******",
            "password": "******",
            "email": "*****@*****.**",
            "firstName": "Girder",
            "lastName": "Admin",
        },
    )
    c.authenticate("girder", "girder")

# Create a tangelo hub collection if there isn't one
coll_search = c.get("resource/search", parameters={"q": "Default", "types": '["collection"]'})
if len(coll_search["collection"]) == 0:
    collection = c.post(
        "collection", parameters={"name": "Default", "description": "Default workspace", "public": "true"}
    )
    c.post(
        "folder",
        parameters={
            "parentType": "collection",
            "parentId": collection["_id"],
            "name": "Data",
            "description": "Data Folder",
            "public": "true",
        },
    )
    c.post(
        "folder",
        parameters={
            "parentType": "collection",
Ejemplo n.º 16
0
    # We use the directory name as the collection name in Girder.
    fullpath = os.path.join(root, file)
    analysis_filename = os.path.basename(fullpath)
    analysis_name = os.path.splitext(analysis_filename)[0]
    analysis_dir = os.path.dirname(fullpath)
    collection_name = os.path.basename(analysis_dir)

    # Create this collection if it doesn't already exist.
    collection_search = c.get('resource/search', parameters={
        'q': collection_name,
        'types': '["collection"]'
    })
    if len(collection_search["collection"]) == 0:
        collection = c.post('collection', parameters={
            'name': collection_name,
            'description': collection_name,
            'public': 'true'
        })
        c.post('folder', parameters={
            'parentType': 'collection',
            'parentId': collection['_id'],
            'name': 'Data',
            'description': 'Data Folder',
            'public': 'true'
        })
        c.post('folder', parameters={
            'parentType': 'collection',
            'parentId': collection['_id'],
            'name': 'Analyses',
            'description': 'Analysis folder',
            'public': 'true'
Ejemplo n.º 17
0
    | | / // // /| | / /|_/ / __/     | | /| / / __ \/ ___/ //_/ _ \/ ___/
    | |/ // // ___ |/ /  / / /___     | |/ |/ / /_/ / /  / ,< /  __/ /
    |___/___/_/  |_/_/  /_/_____/     |__/|__/\____/_/  /_/|_|\___/_/

    You are running in private standalone mode.

    Troubleshooting: Try running `docker pull kitware/viame-worker` to get the latest image
    Documentation: https://kitware.github.io/dive/Deployment-Docker-Compose/
    Issues: https://github.com/Kitware/dive/issues
    Support: please email [email protected]
    """)
    # Fetch Celery broker credentials from server
    diveclient = GirderClient(apiUrl=dive_api_url)
    diveclient.authenticate(username=dive_username, password=dive_password)
    me = diveclient.get('user/me')
    creds = diveclient.post(f'rabbit_user_queues/user/{me["_id"]}')
    broker_url = creds['broker_url']
    queue_name = f"{me['login']}@private"
    if not me.get(UserPrivateQueueEnabledMarker, False):
        warn(" Private queues not enabled for this user.")
        warn(
            " You can visit https://viame.kitware/com/#jobs to change these settings"
        )
    info("========================")
    task_default_queue = queue_name

if broker_url is None:
    raise RuntimeError('CELERY_BROKER_URL must be set')

worker_send_task_events = False
# https://docs.celeryproject.org/en/stable/userguide/configuration.html#std-setting-worker_prefetch_multiplier
Ejemplo n.º 18
0
client = GirderClient(host=args.host, port=args.port)
register_and_authenticate(client,
                          login=args.user,
                          password=args.password,
                          email='{}@localhost.com'.format(args.user),
                          firstName='Girder',
                          lastName='Admin')

local_assetstore_name = 'local'

if find_assetstore(local_assetstore_name) is None:
    client.post('assetstore',
                parameters=dict(name=local_assetstore_name,
                                type=str(AssetstoreType.FILESYSTEM),
                                root=os.path.join(args.data_root,
                                                  'assetstores',
                                                  'local'),
                                readOnly="false"))

client.put('system/plugins',
           parameters=dict(plugins=json.dumps(['celery_jobs',
                                               'climos_test',
                                               'user_quota',
                                               'hdfs_assetstore',
                                               'jobs',
                                               'romanesco',
                                               'sparktest'])))

client.put('system/restart')
Ejemplo n.º 19
0
ensure_user(client,
            login=user,
            password=password,
            email='*****@*****.**',
            firstName='Girder',
            lastName='Admin')

client.authenticate(user, password)

s3_assetstore_name = 's3'

if find_assetstore(s3_assetstore_name) is None:
    client.post('assetstore',
                parameters=dict(name=s3_assetstore_name,
                                type=str(AssetstoreType.S3),
                                bucket=args.s3,
                                accessKeyId=args.aws_key_id,
                                secret=args.aws_secret_key))

client.put(
    'system/plugins',
    parameters=dict(plugins=json.dumps(['jobs', 'worker', 'osumo']))
)
client.put('system/restart')

sleep(30)

client.put('system/setting',
           parameters=dict(list=json.dumps([
               dict(key='worker.broker', value=args.broker),
               dict(key='worker.backend', value=args.broker),
Ejemplo n.º 20
0
class BaseIntegrationTest(unittest.TestCase):
    def __init__(self, name, girder_url, girder_user, girder_password, job_timeout=60, cleanup=True):
        super(BaseIntegrationTest, self).__init__(name)
        self._job_id = None
        self._script_id = None
        self._output_folder_id = None
        self._input_folder_id = None
        self._girder_url = girder_url
        self._girder_user = girder_user
        self._girder_password = girder_password
        self._job_timeout = job_timeout
        self._data = 'Need more input!'
        self._cleanup = cleanup

    def setUp(self):
        url = '%s/api/v1' % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.authenticate(self._girder_user,
                                  self._girder_password)

        user = self._client.get('user/me')
        self._user_id = user['_id']
        r = list(self._client.listFolder(self._user_id, 'user', name='Private'))
        self.assertEqual(len(r), 1)
        self._private_folder_id = r[0]['_id']

    def tearDown(self):

        if not self._cleanup:
            return

        if self._job_id:
            try:
                url = 'jobs/%s' % self._job_id
                self._client.delete(url)
            except Exception as e:
                traceback.print_exc()

        if self._script_id:
            try:
                url = 'scripts/%s' % self._script_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

        if self._output_folder_id:
            try:
                url = 'folder/%s' % self._output_folder_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

        if self._input_folder_id:
            try:
                url = 'folder/%s' % self._input_folder_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

    def create_script(self, commands=[
                'sleep 10', 'cat CumulusIntegrationTestInput'
            ]):
        body = {
            'commands': commands,
            'name': 'CumulusIntegrationTestLob'
        }

        r = self._client.post('scripts', data=json.dumps(body))
        self._script_id = r['_id']

    def create_input(self, folder_name='CumulusInput'):

        r = self._client.createFolder(self._private_folder_id, folder_name)
        self._input_folder_id = r['_id']
        size = len(self._data)

        item = self._client.uploadFile(self._input_folder_id,
                    StringIO(self._data), 'CumulusIntegrationTestInput', size,
                    parentType='folder')

        self._item_id = item['itemId']

    def create_output_folder(self, folder_name='CumulusOutput'):
        r = self._client.createFolder(self._private_folder_id, folder_name)
        self._output_folder_id = r['_id']

    def create_job(self, job_name='CumulusIntegrationTestJob', tail=None):
        body = {
            'name': job_name,
            'scriptId': self._script_id,
            'output': [{
              'folderId': self._output_folder_id,
              'path': '.'
            }],
            'input': [
              {
                'folderId': self._input_folder_id,
                'path': '.'
              }
            ]
        }

        if tail:
            body['output'].append({
                "path": tail,
                "tail": True
            })

        job = self._client.post('jobs', data=json.dumps(body))
        self._job_id = job['_id']

    def submit_job(self, job_params={}, timeout=None):
        url = 'clusters/%s/job/%s/submit' % (self._cluster_id, self._job_id)

        self._client.put(url, data=json.dumps(job_params))
        start = time.time()
        while True:
            time.sleep(1)
            r = self._client.get('jobs/%s' % self._job_id)

            if r['status'] in ['error', 'unexpectederror']:
                r = self._client.get('jobs/%s/log' % self._job_id)
                self.fail(str(r))
            elif r['status'] == 'complete':
                break

            if time.time() - start > timeout:
                self.fail('Job didn\'t complete in timeout')

    def assert_output(self):
        r = self._client.listItem(self._output_folder_id)
        self.assertEqual(len(r), 4)

        stdout_item = None
        for i in r:
            if i['name'].startswith('CumulusIntegrationTestJob-%s.o' % self._job_id):
                stdout_item = i
                break

        self.assertIsNotNone(stdout_item)
        r = self._client.get('item/%s/files' % i['_id'])
        self.assertEqual(len(r), 1)

        path = os.path.join(tempfile.gettempdir(), self._job_id)
        try:
            self._client.downloadFile(r[0]['_id'], path)
            with open(path, 'rb') as fp:
                self.assertEqual(fp.read(), self._data)

        finally:
            if os.path.exists(path):
                os.remove(path)
Ejemplo n.º 21
0
if __name__ == "__main__":
    login = '******'
    password = '******'

    gc = GirderClient(apiUrl='http://*****:*****@admin.com',
                  firstName='admin',
                  lastName='admin',
                  password=password,
                  admin=True)
    gc.authenticate(username=login, password=password)

    # Create an assetstore
    gc.post('assetstore',
            parameters={
                'name': 'TestAssetstore',
                'type': 0,
                'root': '/home/circleci/project/assetstore'
            })

    # Enable the 'slicer_extension_manager' plugin
    gc.put('system/plugins',
           parameters={"plugins": '["slicer_extension_manager"]'})

    # Restart the server
    gc.put('system/restart')
Ejemplo n.º 22
0
class BaseIntegrationTest(unittest.TestCase):
    def __init__(self,
                 name,
                 girder_url,
                 girder_user,
                 girder_password,
                 job_timeout=60,
                 cleanup=True):
        super(BaseIntegrationTest, self).__init__(name)
        self._job_id = None
        self._script_id = None
        self._output_folder_id = None
        self._input_folder_id = None
        self._girder_url = girder_url
        self._girder_user = girder_user
        self._girder_password = girder_password
        self._job_timeout = job_timeout
        self._data = 'Need more input!'
        self._cleanup = cleanup

    def setUp(self):
        url = '%s/api/v1' % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.authenticate(self._girder_user, self._girder_password)

        user = self._client.get('user/me')
        self._user_id = user['_id']
        r = list(self._client.listFolder(self._user_id, 'user',
                                         name='Private'))
        self.assertEqual(len(r), 1)
        self._private_folder_id = r[0]['_id']

    def tearDown(self):

        if not self._cleanup:
            return

        if self._job_id:
            try:
                url = 'jobs/%s' % self._job_id
                self._client.delete(url)
            except Exception as e:
                traceback.print_exc()

        if self._script_id:
            try:
                url = 'scripts/%s' % self._script_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

        if self._output_folder_id:
            try:
                url = 'folder/%s' % self._output_folder_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

        if self._input_folder_id:
            try:
                url = 'folder/%s' % self._input_folder_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

    def create_script(self,
                      commands=['sleep 10',
                                'cat CumulusIntegrationTestInput']):
        body = {'commands': commands, 'name': 'CumulusIntegrationTestLob'}

        r = self._client.post('scripts', data=json.dumps(body))
        self._script_id = r['_id']

    def create_input(self, folder_name='CumulusInput'):

        r = self._client.createFolder(self._private_folder_id, folder_name)
        self._input_folder_id = r['_id']
        size = len(self._data)

        item = self._client.uploadFile(self._input_folder_id,
                                       StringIO(self._data),
                                       'CumulusIntegrationTestInput',
                                       size,
                                       parentType='folder')

        self._item_id = item['itemId']

    def create_output_folder(self, folder_name='CumulusOutput'):
        r = self._client.createFolder(self._private_folder_id, folder_name)
        self._output_folder_id = r['_id']

    def create_job(self, job_name='CumulusIntegrationTestJob', tail=None):
        body = {
            'name': job_name,
            'scriptId': self._script_id,
            'output': [{
                'folderId': self._output_folder_id,
                'path': '.'
            }],
            'input': [{
                'folderId': self._input_folder_id,
                'path': '.'
            }]
        }

        if tail:
            body['output'].append({"path": tail, "tail": True})

        job = self._client.post('jobs', data=json.dumps(body))
        self._job_id = job['_id']

    def submit_job(self, job_params={}, timeout=None):
        url = 'clusters/%s/job/%s/submit' % (self._cluster_id, self._job_id)

        self._client.put(url, data=json.dumps(job_params))
        start = time.time()
        while True:
            time.sleep(1)
            r = self._client.get('jobs/%s' % self._job_id)

            if r['status'] in ['error', 'unexpectederror']:
                r = self._client.get('jobs/%s/log' % self._job_id)
                self.fail(str(r))
            elif r['status'] == 'complete':
                break

            if time.time() - start > timeout:
                self.fail('Job didn\'t complete in timeout')

    def assert_output(self):
        r = self._client.listItem(self._output_folder_id)
        self.assertEqual(len(r), 4)

        stdout_item = None
        for i in r:
            if i['name'].startswith('CumulusIntegrationTestJob-%s.o' %
                                    self._job_id):
                stdout_item = i
                break

        self.assertIsNotNone(stdout_item)
        r = self._client.get('item/%s/files' % i['_id'])
        self.assertEqual(len(r), 1)

        path = os.path.join(tempfile.gettempdir(), self._job_id)
        try:
            self._client.downloadFile(r[0]['_id'], path)
            with open(path, 'rb') as fp:
                self.assertEqual(fp.read(), self._data)

        finally:
            if os.path.exists(path):
                os.remove(path)