class NewtIntegrationTest(BaseIntegrationTest):
    def __init__(self, name, girder_url, girder_user, girder_password, machine, job_timeout=60 * 5):
        super(NewtIntegrationTest, self).__init__(name, girder_url, girder_user, girder_password, job_timeout)
        self._cluster_id = None
        self._machine = machine

    def setUp(self):

        # First authenticate with NEWT
        self._session = Session()
        r = self._session.post(
            "https://newt.nersc.gov/newt/auth", {"username": self._girder_user, "password": self._girder_password}
        )

        self.assertEqual(r.status_code, 200)
        print r.json()
        self._newt_session_id = r.json()["newt_sessionid"]

        # Now authenticate with Girder using the session id
        url = "%s/api/v1/newt/authenticate/%s" % (self._girder_url, self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = "%s/api/v1/newt/authenticate/%s" % (self._girder_url, self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = "%s/api/v1" % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.token = self._session.cookies["girderToken"]

        user = self._client.get("user/me")
        self._user_id = user["_id"]
        r = self._client.listFolder(self._user_id, "user", name="Private")
        r = list(r)
        self.assertEqual(len(r), 1)
        self._private_folder_id = r[0]["_id"]

    def tearDown(self):
        super(NewtIntegrationTest, self).tearDown()
        if self._cluster_id:
            try:
                url = "clusters/%s" % self._cluster_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

    def create_cluster(self):
        body = {"config": {"host": self._machine}, "name": "NewtIntegrationTest", "type": "newt"}

        r = self._client.post("clusters", data=json.dumps(body))
        self._cluster_id = r["_id"]

        # Now test the connection
        r = self._client.put("clusters/%s/start" % self._cluster_id)
        sleeps = 0
        while True:
            time.sleep(1)
            r = self._client.get("clusters/%s/status" % self._cluster_id)

            if r["status"] == "running":
                break
            elif r["status"] == "error":
                r = self._client.get("clusters/%s/log" % self._cluster_id)
                self.fail(str(r))

            if sleeps > 9:
                self.fail("Cluster never moved into running state")
            sleeps += 1

    def assert_output(self):
        r = self._client.listItem(self._output_folder_id)
        self.assertEqual(len(r), 4)

        stdout_item = None
        for i in r:
            if i["name"].startswith("CumulusIntegrationTestJob-%s.o" % self._job_id):
                stdout_item = i
                break

        self.assertIsNotNone(stdout_item)
        r = self._client.get("item/%s/files" % i["_id"])
        self.assertEqual(len(r), 1)

        url = "%s/api/v1/file/%s/download" % (self._girder_url, r[0]["_id"])
        r = self._session.get(url)
        self.assertEqual(r.content, self._data)

    def test(self):
        try:
            self.create_cluster()
            self.create_script()
            self.create_input()
            self.create_output_folder()
            self.create_job()
            self.submit_job(timeout=self._job_timeout)
            self.assert_output()
        except HttpError as error:
            self.fail(error.responseText)
Beispiel #2
0
def get_source_video_filename(folderId: str, girder_client: GirderClient):
    """
    Searches a folderId for source videos that are compatible with training/pipelines
    Will look for {"source_video":True} metadata first, then fall back to the converted video
    indicated by  {"codec":"h264"}
    If neither found it will return None

    :folderId: Current path to wehere the items sit

    :girder_client: girder_client used to request the data
    """

    folder_contents = girder_client.listItem(folderId)
    backup_converted_file = None
    for item in folder_contents:
        file_name = item.get("name")
        if item.get("meta", {}).get("source_video") is True:
            return file_name
        if item.get("meta", {}).get("codec") == "h264":
            backup_converted_file = file_name
    return backup_converted_file
class NewtIntegrationTest(BaseIntegrationTest):
    def __init__(self,
                 name,
                 girder_url,
                 girder_user,
                 girder_password,
                 machine,
                 job_timeout=60 * 5):
        super(NewtIntegrationTest,
              self).__init__(name, girder_url, girder_user, girder_password,
                             job_timeout)
        self._cluster_id = None
        self._machine = machine

    def setUp(self):

        # First authenticate with NEWT
        self._session = Session()
        r = self._session.post('https://newt.nersc.gov/newt/auth', {
            'username': self._girder_user,
            'password': self._girder_password
        })

        self.assertEqual(r.status_code, 200)
        print r.json()
        self._newt_session_id = r.json()['newt_sessionid']

        # Now authenticate with Girder using the session id
        url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url,
                                                  self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url,
                                                  self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = '%s/api/v1' % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.token = self._session.cookies['girderToken']

        user = self._client.get('user/me')
        self._user_id = user['_id']
        r = self._client.listFolder(self._user_id, 'user', name='Private')
        r = list(r)
        self.assertEqual(len(r), 1)
        self._private_folder_id = r[0]['_id']

    def tearDown(self):
        super(NewtIntegrationTest, self).tearDown()
        if self._cluster_id:
            try:
                url = 'clusters/%s' % self._cluster_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

    def create_cluster(self):
        body = {
            'config': {
                'host': self._machine
            },
            'name': 'NewtIntegrationTest',
            'type': 'newt'
        }

        r = self._client.post('clusters', data=json.dumps(body))
        self._cluster_id = r['_id']

        # Now test the connection
        r = self._client.put('clusters/%s/start' % self._cluster_id)
        sleeps = 0
        while True:
            time.sleep(1)
            r = self._client.get('clusters/%s/status' % self._cluster_id)

            if r['status'] == 'running':
                break
            elif r['status'] == 'error':
                r = self._client.get('clusters/%s/log' % self._cluster_id)
                self.fail(str(r))

            if sleeps > 9:
                self.fail('Cluster never moved into running state')
            sleeps += 1

    def assert_output(self):
        r = self._client.listItem(self._output_folder_id)
        self.assertEqual(len(r), 4)

        stdout_item = None
        for i in r:
            if i['name'].startswith('CumulusIntegrationTestJob-%s.o' %
                                    self._job_id):
                stdout_item = i
                break

        self.assertIsNotNone(stdout_item)
        r = self._client.get('item/%s/files' % i['_id'])
        self.assertEqual(len(r), 1)

        url = '%s/api/v1/file/%s/download' % (self._girder_url, r[0]['_id'])
        r = self._session.get(url)
        self.assertEqual(r.content, self._data)

    def test(self):
        try:
            self.create_cluster()
            self.create_script()
            self.create_input()
            self.create_output_folder()
            self.create_job()
            self.submit_job(timeout=self._job_timeout)
            self.assert_output()
        except HttpError as error:
            self.fail(error.responseText)
Beispiel #4
0
def upload_exported_zipped_dataset(
    gc: GirderClient,
    manager: JobManager,
    folderId: str,
    working_directory: Path,
    create_subfolder='',
):
    """Uploads a folder that is generated from the export of a zip file and sets metadata"""
    listOfFileNames = os.listdir(working_directory)
    potential_meta_files = list(
        filter(constants.metaRegex.match, listOfFileNames))
    if len(potential_meta_files) == 0:
        manager.write(
            "Could not find meta.json or config.json file within the subdirectroy\n"
        )
        return
    print(listOfFileNames)
    # load meta.json to get datatype and verify list of files
    meta = {}
    for meta_name in potential_meta_files:
        with open(f"{working_directory}/{meta_name}") as f:
            meta = json.load(f)
    type = meta[constants.TypeMarker]
    if type == constants.ImageSequenceType:
        imageData = meta['imageData']
        for image in imageData:
            if image["filename"] not in listOfFileNames:
                manager.write(
                    "Could not find {item['filename']} file within the list of files\n"
                )
                return
    elif type == constants.VideoType:
        video = meta["video"]
        if video["filename"] not in listOfFileNames:
            manager.write(
                "Could not find {item['filename']} file within the list of files\n"
            )
            return
    # remove the auxilary directory so we don't have to tag them all
    if constants.AuxiliaryFolderName in listOfFileNames and os.path.isdir(
            f'{working_directory}/{constants.AuxiliaryFolderName}'):
        shutil.rmtree(f'{working_directory}/{constants.AuxiliaryFolderName}')
    root_folderId = folderId
    if create_subfolder != '':
        sub_folder = gc.createFolder(
            folderId,
            create_subfolder,
            reuseExisting=True,
        )
        root_folderId = str(sub_folder['_id'])
        manager.updateStatus(JobStatus.PUSHING_OUTPUT)
        # create a source folder to place the zipFile inside of
    gc.upload(f'{working_directory}/*', root_folderId)
    # Now we set all the metadata for the folders and items
    all_files = list(gc.listItem(root_folderId))
    root_meta = {
        "type": type,
        "attributes": meta.get("attributes", None),
        "customTypeStyling": meta.get("customTypeStyling", None),
        "confidenceFilters": meta.get("confidenceFilters", None),
        "fps": meta["fps"],
        "version": meta["version"],
    }
    if type == constants.VideoType:
        # set transcoded and non-transcoded versions
        transcoded_video = list(
            gc.listItem(root_folderId, name=video["filename"]))
        if len(transcoded_video) == 1:
            ffprobe = meta["ffprobe_info"]
            avgFpsString = ffprobe["avg_frame_rate"]
            dividend, divisor = [int(v) for v in avgFpsString.split('/')]
            originalFps = dividend / divisor

            transcoded_metadata = {
                "codec": "h264",
                "originalFps": originalFps,
                "originalFpsString": avgFpsString,
                "source_video": False,
                "transcoder": "ffmpeg",
            }
            gc.addMetadataToItem(str(transcoded_video[0]['_id']),
                                 transcoded_metadata)
            # other video is tagged as the source video
            for item in all_files:
                if (item["name"].endswith(tuple(constants.validVideoFormats))
                        and item["name"] != video["filename"]):
                    source_metadata = {
                        "codec": ffprobe["codec_name"],
                        "originalFps": originalFps,
                        "originalFpsString": avgFpsString,
                        "source_video": False,
                    }
                    gc.addMetadataToItem(str(item['_id']), source_metadata)
            root_meta["originalFps"] = originalFps
            root_meta["originalFpsString"] = avgFpsString

    # Need to tag folder Level data (annotate, and others)
    root_meta[constants.DatasetMarker] = True
    gc.addMetadataToFolder(root_folderId, root_meta)
    gc.post(f'dive_rpc/postprocess/{root_folderId}', data={"skipJobs": True})
class BaseIntegrationTest(unittest.TestCase):
    def __init__(self, name, girder_url, girder_user, girder_password, job_timeout=60, cleanup=True):
        super(BaseIntegrationTest, self).__init__(name)
        self._job_id = None
        self._script_id = None
        self._output_folder_id = None
        self._input_folder_id = None
        self._girder_url = girder_url
        self._girder_user = girder_user
        self._girder_password = girder_password
        self._job_timeout = job_timeout
        self._data = 'Need more input!'
        self._cleanup = cleanup

    def setUp(self):
        url = '%s/api/v1' % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.authenticate(self._girder_user,
                                  self._girder_password)

        user = self._client.get('user/me')
        self._user_id = user['_id']
        r = list(self._client.listFolder(self._user_id, 'user', name='Private'))
        self.assertEqual(len(r), 1)
        self._private_folder_id = r[0]['_id']

    def tearDown(self):

        if not self._cleanup:
            return

        if self._job_id:
            try:
                url = 'jobs/%s' % self._job_id
                self._client.delete(url)
            except Exception as e:
                traceback.print_exc()

        if self._script_id:
            try:
                url = 'scripts/%s' % self._script_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

        if self._output_folder_id:
            try:
                url = 'folder/%s' % self._output_folder_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

        if self._input_folder_id:
            try:
                url = 'folder/%s' % self._input_folder_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

    def create_script(self, commands=[
                'sleep 10', 'cat CumulusIntegrationTestInput'
            ]):
        body = {
            'commands': commands,
            'name': 'CumulusIntegrationTestLob'
        }

        r = self._client.post('scripts', data=json.dumps(body))
        self._script_id = r['_id']

    def create_input(self, folder_name='CumulusInput'):

        r = self._client.createFolder(self._private_folder_id, folder_name)
        self._input_folder_id = r['_id']
        size = len(self._data)

        item = self._client.uploadFile(self._input_folder_id,
                    StringIO(self._data), 'CumulusIntegrationTestInput', size,
                    parentType='folder')

        self._item_id = item['itemId']

    def create_output_folder(self, folder_name='CumulusOutput'):
        r = self._client.createFolder(self._private_folder_id, folder_name)
        self._output_folder_id = r['_id']

    def create_job(self, job_name='CumulusIntegrationTestJob', tail=None):
        body = {
            'name': job_name,
            'scriptId': self._script_id,
            'output': [{
              'folderId': self._output_folder_id,
              'path': '.'
            }],
            'input': [
              {
                'folderId': self._input_folder_id,
                'path': '.'
              }
            ]
        }

        if tail:
            body['output'].append({
                "path": tail,
                "tail": True
            })

        job = self._client.post('jobs', data=json.dumps(body))
        self._job_id = job['_id']

    def submit_job(self, job_params={}, timeout=None):
        url = 'clusters/%s/job/%s/submit' % (self._cluster_id, self._job_id)

        self._client.put(url, data=json.dumps(job_params))
        start = time.time()
        while True:
            time.sleep(1)
            r = self._client.get('jobs/%s' % self._job_id)

            if r['status'] in ['error', 'unexpectederror']:
                r = self._client.get('jobs/%s/log' % self._job_id)
                self.fail(str(r))
            elif r['status'] == 'complete':
                break

            if time.time() - start > timeout:
                self.fail('Job didn\'t complete in timeout')

    def assert_output(self):
        r = self._client.listItem(self._output_folder_id)
        self.assertEqual(len(r), 4)

        stdout_item = None
        for i in r:
            if i['name'].startswith('CumulusIntegrationTestJob-%s.o' % self._job_id):
                stdout_item = i
                break

        self.assertIsNotNone(stdout_item)
        r = self._client.get('item/%s/files' % i['_id'])
        self.assertEqual(len(r), 1)

        path = os.path.join(tempfile.gettempdir(), self._job_id)
        try:
            self._client.downloadFile(r[0]['_id'], path)
            with open(path, 'rb') as fp:
                self.assertEqual(fp.read(), self._data)

        finally:
            if os.path.exists(path):
                os.remove(path)
class NewtIntegrationTest(BaseIntegrationTest):

    def __init__(self, name, girder_url, girder_user, girder_password, machine,
                 job_timeout=60*5):
        super(NewtIntegrationTest, self).__init__(name, girder_url, girder_user,
                                                  girder_password, job_timeout)
        self._cluster_id = None
        self._machine = machine

    def setUp(self):

        # First authenticate with NEWT
        self._session = Session()
        r = self._session.post('https://newt.nersc.gov/newt/auth',
                               {
                                    'username': self._girder_user,
                                    'password': self._girder_password})

        self.assertEqual(r.status_code, 200)
        print r.json()
        self._newt_session_id = r.json()['newt_sessionid']

        # Now authenticate with Girder using the session id
        url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url, self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url, self._newt_session_id)
        r = self._session.put(url)
        self.assertEqual(r.status_code, 200)

        url = '%s/api/v1' % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.token = self._session.cookies['girderToken']

        user = self._client.get('user/me')
        self._user_id = user['_id']
        r = self._client.listFolder(self._user_id, 'user', name='Private')
        self.assertEqual(len(r), 1)
        self._private_folder_id = r[0]['_id']

    def tearDown(self):
        super(NewtIntegrationTest, self).tearDown()
        if self._cluster_id:
            try:
                url = 'clusters/%s' % self._cluster_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

    def create_cluster(self):
        body = {
            'config': {
                'host': self._machine
            },
            'name': 'NewtIntegrationTest',
            'type': 'newt'
        }

        r = self._client.post('clusters', data=json.dumps(body))
        self._cluster_id = r['_id']

        # Now test the connection
        r = self._client.put('clusters/%s/start' % self._cluster_id)
        sleeps = 0
        while True:
            time.sleep(1)
            r = self._client.get('clusters/%s/status' % self._cluster_id)

            if r['status'] == 'running':
                break
            elif r['status'] == 'error':
                r = self._client.get('clusters/%s/log' % self._cluster_id)
                self.fail(str(r))

            if sleeps > 9:
                self.fail('Cluster never moved into running state')
            sleeps += 1

    def assert_output(self):
        r = self._client.listItem(self._output_folder_id)
        self.assertEqual(len(r), 4)

        stdout_item = None
        for i in r:
            if i['name'].startswith('CumulusIntegrationTestJob-%s.o' % self._job_id):
                stdout_item = i
                break

        self.assertIsNotNone(stdout_item)
        r = self._client.get('item/%s/files' % i['_id'])
        self.assertEqual(len(r), 1)

        url =   '%s/api/v1/file/%s/download' % (self._girder_url, r[0]['_id'])
        r = self._session.get(url)
        self.assertEqual(r.content, self._data)


    def test(self):
        try:
            self.create_cluster()
            self.create_script()
            self.create_input()
            self.create_output_folder()
            self.create_job()
            self.submit_job(timeout=self._job_timeout)
            self.assert_output()
        except HttpError as error:
            self.fail(error.responseText)
Beispiel #7
0
class BaseIntegrationTest(unittest.TestCase):
    def __init__(self,
                 name,
                 girder_url,
                 girder_user,
                 girder_password,
                 job_timeout=60,
                 cleanup=True):
        super(BaseIntegrationTest, self).__init__(name)
        self._job_id = None
        self._script_id = None
        self._output_folder_id = None
        self._input_folder_id = None
        self._girder_url = girder_url
        self._girder_user = girder_user
        self._girder_password = girder_password
        self._job_timeout = job_timeout
        self._data = 'Need more input!'
        self._cleanup = cleanup

    def setUp(self):
        url = '%s/api/v1' % self._girder_url
        self._client = GirderClient(apiUrl=url)
        self._client.authenticate(self._girder_user, self._girder_password)

        user = self._client.get('user/me')
        self._user_id = user['_id']
        r = list(self._client.listFolder(self._user_id, 'user',
                                         name='Private'))
        self.assertEqual(len(r), 1)
        self._private_folder_id = r[0]['_id']

    def tearDown(self):

        if not self._cleanup:
            return

        if self._job_id:
            try:
                url = 'jobs/%s' % self._job_id
                self._client.delete(url)
            except Exception as e:
                traceback.print_exc()

        if self._script_id:
            try:
                url = 'scripts/%s' % self._script_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

        if self._output_folder_id:
            try:
                url = 'folder/%s' % self._output_folder_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

        if self._input_folder_id:
            try:
                url = 'folder/%s' % self._input_folder_id
                self._client.delete(url)
            except Exception:
                traceback.print_exc()

    def create_script(self,
                      commands=['sleep 10',
                                'cat CumulusIntegrationTestInput']):
        body = {'commands': commands, 'name': 'CumulusIntegrationTestLob'}

        r = self._client.post('scripts', data=json.dumps(body))
        self._script_id = r['_id']

    def create_input(self, folder_name='CumulusInput'):

        r = self._client.createFolder(self._private_folder_id, folder_name)
        self._input_folder_id = r['_id']
        size = len(self._data)

        item = self._client.uploadFile(self._input_folder_id,
                                       StringIO(self._data),
                                       'CumulusIntegrationTestInput',
                                       size,
                                       parentType='folder')

        self._item_id = item['itemId']

    def create_output_folder(self, folder_name='CumulusOutput'):
        r = self._client.createFolder(self._private_folder_id, folder_name)
        self._output_folder_id = r['_id']

    def create_job(self, job_name='CumulusIntegrationTestJob', tail=None):
        body = {
            'name': job_name,
            'scriptId': self._script_id,
            'output': [{
                'folderId': self._output_folder_id,
                'path': '.'
            }],
            'input': [{
                'folderId': self._input_folder_id,
                'path': '.'
            }]
        }

        if tail:
            body['output'].append({"path": tail, "tail": True})

        job = self._client.post('jobs', data=json.dumps(body))
        self._job_id = job['_id']

    def submit_job(self, job_params={}, timeout=None):
        url = 'clusters/%s/job/%s/submit' % (self._cluster_id, self._job_id)

        self._client.put(url, data=json.dumps(job_params))
        start = time.time()
        while True:
            time.sleep(1)
            r = self._client.get('jobs/%s' % self._job_id)

            if r['status'] in ['error', 'unexpectederror']:
                r = self._client.get('jobs/%s/log' % self._job_id)
                self.fail(str(r))
            elif r['status'] == 'complete':
                break

            if time.time() - start > timeout:
                self.fail('Job didn\'t complete in timeout')

    def assert_output(self):
        r = self._client.listItem(self._output_folder_id)
        self.assertEqual(len(r), 4)

        stdout_item = None
        for i in r:
            if i['name'].startswith('CumulusIntegrationTestJob-%s.o' %
                                    self._job_id):
                stdout_item = i
                break

        self.assertIsNotNone(stdout_item)
        r = self._client.get('item/%s/files' % i['_id'])
        self.assertEqual(len(r), 1)

        path = os.path.join(tempfile.gettempdir(), self._job_id)
        try:
            self._client.downloadFile(r[0]['_id'], path)
            with open(path, 'rb') as fp:
                self.assertEqual(fp.read(), self._data)

        finally:
            if os.path.exists(path):
                os.remove(path)