class NewtIntegrationTest(BaseIntegrationTest): def __init__(self, name, girder_url, girder_user, girder_password, machine, job_timeout=60 * 5): super(NewtIntegrationTest, self).__init__(name, girder_url, girder_user, girder_password, job_timeout) self._cluster_id = None self._machine = machine def setUp(self): # First authenticate with NEWT self._session = Session() r = self._session.post( "https://newt.nersc.gov/newt/auth", {"username": self._girder_user, "password": self._girder_password} ) self.assertEqual(r.status_code, 200) print r.json() self._newt_session_id = r.json()["newt_sessionid"] # Now authenticate with Girder using the session id url = "%s/api/v1/newt/authenticate/%s" % (self._girder_url, self._newt_session_id) r = self._session.put(url) self.assertEqual(r.status_code, 200) url = "%s/api/v1/newt/authenticate/%s" % (self._girder_url, self._newt_session_id) r = self._session.put(url) self.assertEqual(r.status_code, 200) url = "%s/api/v1" % self._girder_url self._client = GirderClient(apiUrl=url) self._client.token = self._session.cookies["girderToken"] user = self._client.get("user/me") self._user_id = user["_id"] r = self._client.listFolder(self._user_id, "user", name="Private") r = list(r) self.assertEqual(len(r), 1) self._private_folder_id = r[0]["_id"] def tearDown(self): super(NewtIntegrationTest, self).tearDown() if self._cluster_id: try: url = "clusters/%s" % self._cluster_id self._client.delete(url) except Exception: traceback.print_exc() def create_cluster(self): body = {"config": {"host": self._machine}, "name": "NewtIntegrationTest", "type": "newt"} r = self._client.post("clusters", data=json.dumps(body)) self._cluster_id = r["_id"] # Now test the connection r = self._client.put("clusters/%s/start" % self._cluster_id) sleeps = 0 while True: time.sleep(1) r = self._client.get("clusters/%s/status" % self._cluster_id) if r["status"] == "running": break elif r["status"] == "error": r = self._client.get("clusters/%s/log" % self._cluster_id) self.fail(str(r)) if sleeps > 9: self.fail("Cluster never moved into running state") sleeps += 1 def assert_output(self): r = self._client.listItem(self._output_folder_id) self.assertEqual(len(r), 4) stdout_item = None for i in r: if i["name"].startswith("CumulusIntegrationTestJob-%s.o" % self._job_id): stdout_item = i break self.assertIsNotNone(stdout_item) r = self._client.get("item/%s/files" % i["_id"]) self.assertEqual(len(r), 1) url = "%s/api/v1/file/%s/download" % (self._girder_url, r[0]["_id"]) r = self._session.get(url) self.assertEqual(r.content, self._data) def test(self): try: self.create_cluster() self.create_script() self.create_input() self.create_output_folder() self.create_job() self.submit_job(timeout=self._job_timeout) self.assert_output() except HttpError as error: self.fail(error.responseText)
def get_source_video_filename(folderId: str, girder_client: GirderClient): """ Searches a folderId for source videos that are compatible with training/pipelines Will look for {"source_video":True} metadata first, then fall back to the converted video indicated by {"codec":"h264"} If neither found it will return None :folderId: Current path to wehere the items sit :girder_client: girder_client used to request the data """ folder_contents = girder_client.listItem(folderId) backup_converted_file = None for item in folder_contents: file_name = item.get("name") if item.get("meta", {}).get("source_video") is True: return file_name if item.get("meta", {}).get("codec") == "h264": backup_converted_file = file_name return backup_converted_file
class NewtIntegrationTest(BaseIntegrationTest): def __init__(self, name, girder_url, girder_user, girder_password, machine, job_timeout=60 * 5): super(NewtIntegrationTest, self).__init__(name, girder_url, girder_user, girder_password, job_timeout) self._cluster_id = None self._machine = machine def setUp(self): # First authenticate with NEWT self._session = Session() r = self._session.post('https://newt.nersc.gov/newt/auth', { 'username': self._girder_user, 'password': self._girder_password }) self.assertEqual(r.status_code, 200) print r.json() self._newt_session_id = r.json()['newt_sessionid'] # Now authenticate with Girder using the session id url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url, self._newt_session_id) r = self._session.put(url) self.assertEqual(r.status_code, 200) url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url, self._newt_session_id) r = self._session.put(url) self.assertEqual(r.status_code, 200) url = '%s/api/v1' % self._girder_url self._client = GirderClient(apiUrl=url) self._client.token = self._session.cookies['girderToken'] user = self._client.get('user/me') self._user_id = user['_id'] r = self._client.listFolder(self._user_id, 'user', name='Private') r = list(r) self.assertEqual(len(r), 1) self._private_folder_id = r[0]['_id'] def tearDown(self): super(NewtIntegrationTest, self).tearDown() if self._cluster_id: try: url = 'clusters/%s' % self._cluster_id self._client.delete(url) except Exception: traceback.print_exc() def create_cluster(self): body = { 'config': { 'host': self._machine }, 'name': 'NewtIntegrationTest', 'type': 'newt' } r = self._client.post('clusters', data=json.dumps(body)) self._cluster_id = r['_id'] # Now test the connection r = self._client.put('clusters/%s/start' % self._cluster_id) sleeps = 0 while True: time.sleep(1) r = self._client.get('clusters/%s/status' % self._cluster_id) if r['status'] == 'running': break elif r['status'] == 'error': r = self._client.get('clusters/%s/log' % self._cluster_id) self.fail(str(r)) if sleeps > 9: self.fail('Cluster never moved into running state') sleeps += 1 def assert_output(self): r = self._client.listItem(self._output_folder_id) self.assertEqual(len(r), 4) stdout_item = None for i in r: if i['name'].startswith('CumulusIntegrationTestJob-%s.o' % self._job_id): stdout_item = i break self.assertIsNotNone(stdout_item) r = self._client.get('item/%s/files' % i['_id']) self.assertEqual(len(r), 1) url = '%s/api/v1/file/%s/download' % (self._girder_url, r[0]['_id']) r = self._session.get(url) self.assertEqual(r.content, self._data) def test(self): try: self.create_cluster() self.create_script() self.create_input() self.create_output_folder() self.create_job() self.submit_job(timeout=self._job_timeout) self.assert_output() except HttpError as error: self.fail(error.responseText)
def upload_exported_zipped_dataset( gc: GirderClient, manager: JobManager, folderId: str, working_directory: Path, create_subfolder='', ): """Uploads a folder that is generated from the export of a zip file and sets metadata""" listOfFileNames = os.listdir(working_directory) potential_meta_files = list( filter(constants.metaRegex.match, listOfFileNames)) if len(potential_meta_files) == 0: manager.write( "Could not find meta.json or config.json file within the subdirectroy\n" ) return print(listOfFileNames) # load meta.json to get datatype and verify list of files meta = {} for meta_name in potential_meta_files: with open(f"{working_directory}/{meta_name}") as f: meta = json.load(f) type = meta[constants.TypeMarker] if type == constants.ImageSequenceType: imageData = meta['imageData'] for image in imageData: if image["filename"] not in listOfFileNames: manager.write( "Could not find {item['filename']} file within the list of files\n" ) return elif type == constants.VideoType: video = meta["video"] if video["filename"] not in listOfFileNames: manager.write( "Could not find {item['filename']} file within the list of files\n" ) return # remove the auxilary directory so we don't have to tag them all if constants.AuxiliaryFolderName in listOfFileNames and os.path.isdir( f'{working_directory}/{constants.AuxiliaryFolderName}'): shutil.rmtree(f'{working_directory}/{constants.AuxiliaryFolderName}') root_folderId = folderId if create_subfolder != '': sub_folder = gc.createFolder( folderId, create_subfolder, reuseExisting=True, ) root_folderId = str(sub_folder['_id']) manager.updateStatus(JobStatus.PUSHING_OUTPUT) # create a source folder to place the zipFile inside of gc.upload(f'{working_directory}/*', root_folderId) # Now we set all the metadata for the folders and items all_files = list(gc.listItem(root_folderId)) root_meta = { "type": type, "attributes": meta.get("attributes", None), "customTypeStyling": meta.get("customTypeStyling", None), "confidenceFilters": meta.get("confidenceFilters", None), "fps": meta["fps"], "version": meta["version"], } if type == constants.VideoType: # set transcoded and non-transcoded versions transcoded_video = list( gc.listItem(root_folderId, name=video["filename"])) if len(transcoded_video) == 1: ffprobe = meta["ffprobe_info"] avgFpsString = ffprobe["avg_frame_rate"] dividend, divisor = [int(v) for v in avgFpsString.split('/')] originalFps = dividend / divisor transcoded_metadata = { "codec": "h264", "originalFps": originalFps, "originalFpsString": avgFpsString, "source_video": False, "transcoder": "ffmpeg", } gc.addMetadataToItem(str(transcoded_video[0]['_id']), transcoded_metadata) # other video is tagged as the source video for item in all_files: if (item["name"].endswith(tuple(constants.validVideoFormats)) and item["name"] != video["filename"]): source_metadata = { "codec": ffprobe["codec_name"], "originalFps": originalFps, "originalFpsString": avgFpsString, "source_video": False, } gc.addMetadataToItem(str(item['_id']), source_metadata) root_meta["originalFps"] = originalFps root_meta["originalFpsString"] = avgFpsString # Need to tag folder Level data (annotate, and others) root_meta[constants.DatasetMarker] = True gc.addMetadataToFolder(root_folderId, root_meta) gc.post(f'dive_rpc/postprocess/{root_folderId}', data={"skipJobs": True})
class BaseIntegrationTest(unittest.TestCase): def __init__(self, name, girder_url, girder_user, girder_password, job_timeout=60, cleanup=True): super(BaseIntegrationTest, self).__init__(name) self._job_id = None self._script_id = None self._output_folder_id = None self._input_folder_id = None self._girder_url = girder_url self._girder_user = girder_user self._girder_password = girder_password self._job_timeout = job_timeout self._data = 'Need more input!' self._cleanup = cleanup def setUp(self): url = '%s/api/v1' % self._girder_url self._client = GirderClient(apiUrl=url) self._client.authenticate(self._girder_user, self._girder_password) user = self._client.get('user/me') self._user_id = user['_id'] r = list(self._client.listFolder(self._user_id, 'user', name='Private')) self.assertEqual(len(r), 1) self._private_folder_id = r[0]['_id'] def tearDown(self): if not self._cleanup: return if self._job_id: try: url = 'jobs/%s' % self._job_id self._client.delete(url) except Exception as e: traceback.print_exc() if self._script_id: try: url = 'scripts/%s' % self._script_id self._client.delete(url) except Exception: traceback.print_exc() if self._output_folder_id: try: url = 'folder/%s' % self._output_folder_id self._client.delete(url) except Exception: traceback.print_exc() if self._input_folder_id: try: url = 'folder/%s' % self._input_folder_id self._client.delete(url) except Exception: traceback.print_exc() def create_script(self, commands=[ 'sleep 10', 'cat CumulusIntegrationTestInput' ]): body = { 'commands': commands, 'name': 'CumulusIntegrationTestLob' } r = self._client.post('scripts', data=json.dumps(body)) self._script_id = r['_id'] def create_input(self, folder_name='CumulusInput'): r = self._client.createFolder(self._private_folder_id, folder_name) self._input_folder_id = r['_id'] size = len(self._data) item = self._client.uploadFile(self._input_folder_id, StringIO(self._data), 'CumulusIntegrationTestInput', size, parentType='folder') self._item_id = item['itemId'] def create_output_folder(self, folder_name='CumulusOutput'): r = self._client.createFolder(self._private_folder_id, folder_name) self._output_folder_id = r['_id'] def create_job(self, job_name='CumulusIntegrationTestJob', tail=None): body = { 'name': job_name, 'scriptId': self._script_id, 'output': [{ 'folderId': self._output_folder_id, 'path': '.' }], 'input': [ { 'folderId': self._input_folder_id, 'path': '.' } ] } if tail: body['output'].append({ "path": tail, "tail": True }) job = self._client.post('jobs', data=json.dumps(body)) self._job_id = job['_id'] def submit_job(self, job_params={}, timeout=None): url = 'clusters/%s/job/%s/submit' % (self._cluster_id, self._job_id) self._client.put(url, data=json.dumps(job_params)) start = time.time() while True: time.sleep(1) r = self._client.get('jobs/%s' % self._job_id) if r['status'] in ['error', 'unexpectederror']: r = self._client.get('jobs/%s/log' % self._job_id) self.fail(str(r)) elif r['status'] == 'complete': break if time.time() - start > timeout: self.fail('Job didn\'t complete in timeout') def assert_output(self): r = self._client.listItem(self._output_folder_id) self.assertEqual(len(r), 4) stdout_item = None for i in r: if i['name'].startswith('CumulusIntegrationTestJob-%s.o' % self._job_id): stdout_item = i break self.assertIsNotNone(stdout_item) r = self._client.get('item/%s/files' % i['_id']) self.assertEqual(len(r), 1) path = os.path.join(tempfile.gettempdir(), self._job_id) try: self._client.downloadFile(r[0]['_id'], path) with open(path, 'rb') as fp: self.assertEqual(fp.read(), self._data) finally: if os.path.exists(path): os.remove(path)
class NewtIntegrationTest(BaseIntegrationTest): def __init__(self, name, girder_url, girder_user, girder_password, machine, job_timeout=60*5): super(NewtIntegrationTest, self).__init__(name, girder_url, girder_user, girder_password, job_timeout) self._cluster_id = None self._machine = machine def setUp(self): # First authenticate with NEWT self._session = Session() r = self._session.post('https://newt.nersc.gov/newt/auth', { 'username': self._girder_user, 'password': self._girder_password}) self.assertEqual(r.status_code, 200) print r.json() self._newt_session_id = r.json()['newt_sessionid'] # Now authenticate with Girder using the session id url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url, self._newt_session_id) r = self._session.put(url) self.assertEqual(r.status_code, 200) url = '%s/api/v1/newt/authenticate/%s' % (self._girder_url, self._newt_session_id) r = self._session.put(url) self.assertEqual(r.status_code, 200) url = '%s/api/v1' % self._girder_url self._client = GirderClient(apiUrl=url) self._client.token = self._session.cookies['girderToken'] user = self._client.get('user/me') self._user_id = user['_id'] r = self._client.listFolder(self._user_id, 'user', name='Private') self.assertEqual(len(r), 1) self._private_folder_id = r[0]['_id'] def tearDown(self): super(NewtIntegrationTest, self).tearDown() if self._cluster_id: try: url = 'clusters/%s' % self._cluster_id self._client.delete(url) except Exception: traceback.print_exc() def create_cluster(self): body = { 'config': { 'host': self._machine }, 'name': 'NewtIntegrationTest', 'type': 'newt' } r = self._client.post('clusters', data=json.dumps(body)) self._cluster_id = r['_id'] # Now test the connection r = self._client.put('clusters/%s/start' % self._cluster_id) sleeps = 0 while True: time.sleep(1) r = self._client.get('clusters/%s/status' % self._cluster_id) if r['status'] == 'running': break elif r['status'] == 'error': r = self._client.get('clusters/%s/log' % self._cluster_id) self.fail(str(r)) if sleeps > 9: self.fail('Cluster never moved into running state') sleeps += 1 def assert_output(self): r = self._client.listItem(self._output_folder_id) self.assertEqual(len(r), 4) stdout_item = None for i in r: if i['name'].startswith('CumulusIntegrationTestJob-%s.o' % self._job_id): stdout_item = i break self.assertIsNotNone(stdout_item) r = self._client.get('item/%s/files' % i['_id']) self.assertEqual(len(r), 1) url = '%s/api/v1/file/%s/download' % (self._girder_url, r[0]['_id']) r = self._session.get(url) self.assertEqual(r.content, self._data) def test(self): try: self.create_cluster() self.create_script() self.create_input() self.create_output_folder() self.create_job() self.submit_job(timeout=self._job_timeout) self.assert_output() except HttpError as error: self.fail(error.responseText)
class BaseIntegrationTest(unittest.TestCase): def __init__(self, name, girder_url, girder_user, girder_password, job_timeout=60, cleanup=True): super(BaseIntegrationTest, self).__init__(name) self._job_id = None self._script_id = None self._output_folder_id = None self._input_folder_id = None self._girder_url = girder_url self._girder_user = girder_user self._girder_password = girder_password self._job_timeout = job_timeout self._data = 'Need more input!' self._cleanup = cleanup def setUp(self): url = '%s/api/v1' % self._girder_url self._client = GirderClient(apiUrl=url) self._client.authenticate(self._girder_user, self._girder_password) user = self._client.get('user/me') self._user_id = user['_id'] r = list(self._client.listFolder(self._user_id, 'user', name='Private')) self.assertEqual(len(r), 1) self._private_folder_id = r[0]['_id'] def tearDown(self): if not self._cleanup: return if self._job_id: try: url = 'jobs/%s' % self._job_id self._client.delete(url) except Exception as e: traceback.print_exc() if self._script_id: try: url = 'scripts/%s' % self._script_id self._client.delete(url) except Exception: traceback.print_exc() if self._output_folder_id: try: url = 'folder/%s' % self._output_folder_id self._client.delete(url) except Exception: traceback.print_exc() if self._input_folder_id: try: url = 'folder/%s' % self._input_folder_id self._client.delete(url) except Exception: traceback.print_exc() def create_script(self, commands=['sleep 10', 'cat CumulusIntegrationTestInput']): body = {'commands': commands, 'name': 'CumulusIntegrationTestLob'} r = self._client.post('scripts', data=json.dumps(body)) self._script_id = r['_id'] def create_input(self, folder_name='CumulusInput'): r = self._client.createFolder(self._private_folder_id, folder_name) self._input_folder_id = r['_id'] size = len(self._data) item = self._client.uploadFile(self._input_folder_id, StringIO(self._data), 'CumulusIntegrationTestInput', size, parentType='folder') self._item_id = item['itemId'] def create_output_folder(self, folder_name='CumulusOutput'): r = self._client.createFolder(self._private_folder_id, folder_name) self._output_folder_id = r['_id'] def create_job(self, job_name='CumulusIntegrationTestJob', tail=None): body = { 'name': job_name, 'scriptId': self._script_id, 'output': [{ 'folderId': self._output_folder_id, 'path': '.' }], 'input': [{ 'folderId': self._input_folder_id, 'path': '.' }] } if tail: body['output'].append({"path": tail, "tail": True}) job = self._client.post('jobs', data=json.dumps(body)) self._job_id = job['_id'] def submit_job(self, job_params={}, timeout=None): url = 'clusters/%s/job/%s/submit' % (self._cluster_id, self._job_id) self._client.put(url, data=json.dumps(job_params)) start = time.time() while True: time.sleep(1) r = self._client.get('jobs/%s' % self._job_id) if r['status'] in ['error', 'unexpectederror']: r = self._client.get('jobs/%s/log' % self._job_id) self.fail(str(r)) elif r['status'] == 'complete': break if time.time() - start > timeout: self.fail('Job didn\'t complete in timeout') def assert_output(self): r = self._client.listItem(self._output_folder_id) self.assertEqual(len(r), 4) stdout_item = None for i in r: if i['name'].startswith('CumulusIntegrationTestJob-%s.o' % self._job_id): stdout_item = i break self.assertIsNotNone(stdout_item) r = self._client.get('item/%s/files' % i['_id']) self.assertEqual(len(r), 1) path = os.path.join(tempfile.gettempdir(), self._job_id) try: self._client.downloadFile(r[0]['_id'], path) with open(path, 'rb') as fp: self.assertEqual(fp.read(), self._data) finally: if os.path.exists(path): os.remove(path)