def test_trim(): session1 = KiveAPI('http://localhost') session2 = KiveAPI('http://localhost/') expected_url = 'http://localhost' assert expected_url == session1.server_url assert expected_url == session2.server_url
def fetch_input_sizes(args, slurm_jobs): data_path = os.path.join(args.cache_folder, 'speed_data_sizes.csv') try: with open(data_path) as f: reader = DictReader(f) cache = {int(row['run_id']): float(row['MB']) for row in reader} except OSError as ex: if ex.errno != errno.ENOENT: raise cache = {} session = KiveAPI(args.kive_server) session.login(args.kive_user, args.kive_password) fetcher = partial(fetch_input_size, cache=cache, kive_session=session) pool = ThreadPool() job_count = len(slurm_jobs) fetch_count = 0 failed_run_ids = set() last_error = None data_file = None data_writer = None input_sizes = {} try: for i, (run_id, input_size, is_cached, error_message) in enumerate( pool.imap_unordered(fetcher, slurm_jobs, chunksize=10)): if error_message is not None: last_error = error_message failed_run_ids.add(run_id) if not is_cached: if data_file is None: data_file = open(data_path, 'w') data_writer = DictWriter(data_file, ['run_id', 'MB']) data_writer.writeheader() for old_run_id, old_input_size in input_sizes.items(): data_writer.writerow({'run_id': old_run_id, 'MB': old_input_size}) if fetch_count % 10000 == 0: print('Fetched {} runs after scanning {} of {} at {}.'.format( fetch_count, i, job_count, datetime.now())) fetch_count += 1 input_sizes[run_id] = input_size if data_writer: data_writer.writerow({'run_id': run_id, 'MB': input_size}) finally: if data_file is not None: data_file.close() if failed_run_ids: message = 'Failed to fetch run ids: {}\n Caused by {}'.format( ', '.join(sorted(failed_run_ids)), last_error) raise RuntimeError(message) return input_sizes
def main(): logging.basicConfig( level=logging.INFO, format="%(asctime)s[%(levelname)s]%(name)s:%(message)s") logging.getLogger("requests.packages.urllib3.connectionpool").setLevel( logging.WARNING) logging.info('Starting.') args = parse_args() session = KiveAPI(args.kive_server) session.mount('https://', HTTPAdapter(max_retries=20)) session.login(args.kive_user, args.kive_password) runs = session.find_runs(active=True) pipeline_id = input_id = None for run in runs: if 'dataset_generator' in run.raw['display_name']: pipeline_id = run.pipeline_id input_id = run.raw['inputs'][0]['dataset'] break if pipeline_id is None: raise RuntimeError( 'No active runs found with "dataset_generator" in the name.') pipeline = session.get_pipeline(pipeline_id) input_dataset = session.get_dataset(input_id) while True: launch_if_needed(session, args, pipeline, input_dataset) sleep(1)
def main(): logging.basicConfig(level=logging.INFO, format="%(asctime)s[%(levelname)s]%(name)s:%(message)s") logging.getLogger( "requests.packages.urllib3.connectionpool").setLevel(logging.WARNING) logging.info('Starting.') args = parse_args() session = KiveAPI(args.kive_server) session.mount('https://', HTTPAdapter(max_retries=20)) session.login(args.kive_user, args.kive_password) runs = session.find_runs(active=True) pipeline_id = input_id = None for run in runs: if 'dataset_generator' in run.raw['display_name']: pipeline_id = run.pipeline_id input_id = run.raw['inputs'][0]['dataset'] break if pipeline_id is None: raise RuntimeError( 'No active runs found with "dataset_generator" in the name.') pipeline = session.get_pipeline(pipeline_id) input_dataset = session.get_dataset(input_id) while True: launch_if_needed(session, args, pipeline, input_dataset) sleep(1)
def main(): logging.basicConfig(level=logging.INFO) logging.getLogger('requests.packages.urllib3.connectionpool').setLevel( logging.WARN) args = parse_args() kive = KiveAPI(args.server) kive.mount('https://', HTTPAdapter(max_retries=20)) kive.login(args.username, args.password) folder = choose_folder() pipeline_family = choose_family(kive) # noinspection PyCompatibility groups = input('Groups allowed? [Everyone] ') or 'Everyone' groups = groups.split(',') CompoundDatatypeRequest.load_existing(kive) steps, pipeline_config = load_steps(kive, folder, pipeline_family, groups, args.docker_default) load_pipeline(pipeline_config) print('Uploading {!r} to {} for {}.'.format(folder, pipeline_family, groups)) for i, step in enumerate(steps, start=1): print(' {}: {}'.format(i, step.get_display())) for dependency in step.dependencies: print(' ' + dependency['requirement'].get_display()) new_compound_datatypes = [request.representation for request in CompoundDatatypeRequest.new_requests] new_compound_datatypes.sort() print('New compound datatypes:') print('\n'.join(new_compound_datatypes)) # noinspection PyCompatibility revision_name = input('Enter a revision name, or leave blank to abort: ') if not revision_name: return for request in CompoundDatatypeRequest.new_requests: request.create(kive, groups) create_code_resources(steps, revision_name) create_methods(kive, steps, revision_name) if not isinstance(pipeline_family, PipelineFamily): pipeline_family = create_pipeline_family(kive, pipeline_family, groups) create_pipeline(kive, pipeline_family, revision_name, pipeline_config, steps) print('Done.')
def mocked_api(): with patch.multiple('requests.Session', get=DEFAULT, send=DEFAULT, post=DEFAULT, patch=DEFAULT, delete=DEFAULT, head=DEFAULT): yield KiveAPI('http://localhost')
def kive_login(server_url, user, password): if KiveAPI is None: raise ImportError('Kive API failed to import. Is it installed?') kive = KiveAPI(server_url) kive.mount('https://', HTTPAdapter(max_retries=20)) kive.login(user, password) return kive
def mocked_api(): with patch.multiple('requests.Session', get=DEFAULT, send=DEFAULT, post=DEFAULT, patch=DEFAULT, delete=DEFAULT, head=DEFAULT): Session.head.return_value.status_code = 200 Session.get.return_value.status_code = 200 Session.post.return_value.status_code = 200 Session.delete.return_value.status_code = 200 Session.patch.return_value.status_code = 200 yield KiveAPI('http://localhost')
def main(): logging.basicConfig(level=logging.INFO) logging.getLogger('requests.packages.urllib3.connectionpool').setLevel( logging.WARN) args = parse_args() kive = KiveAPI(args.server) kive.mount('https://', HTTPAdapter(max_retries=20)) kive.login(args.username, args.password) folder = choose_folder() pipeline_family = choose_family(kive) # noinspection PyCompatibility groups = input('Groups allowed? [Everyone] ') or 'Everyone' groups = groups.split(',') CompoundDatatypeRequest.load_existing(kive) steps, pipeline_config = load_steps(kive, folder, pipeline_family, groups, args.docker_default) load_pipeline(pipeline_config) print('Uploading {!r} to {} for {}.'.format(folder, pipeline_family, groups)) for i, step in enumerate(steps, start=1): print(' {}: {}'.format(i, step.get_display())) for dependency in step.dependencies: print(' ' + dependency['requirement'].get_display()) new_compound_datatypes = [ request.representation for request in CompoundDatatypeRequest.new_requests ] new_compound_datatypes.sort() print('New compound datatypes:') print('\n'.join(new_compound_datatypes)) # noinspection PyCompatibility revision_name = input('Enter a revision name, or leave blank to abort: ') if not revision_name: return for request in CompoundDatatypeRequest.new_requests: request.create(kive, groups) create_code_resources(steps, revision_name) create_methods(kive, steps, revision_name) if not isinstance(pipeline_family, PipelineFamily): pipeline_family = create_pipeline_family(kive, pipeline_family, groups) create_pipeline(kive, pipeline_family, revision_name, pipeline_config, steps) print('Done.')
from kiveapi import KiveAPI, KiveMalformedDataException # This assumes you have a Kive instance listening on port 8000, running # the demo fixture. In production, you wouldn't put your authentication # information in source code. KiveAPI.SERVER_URL = 'http://localhost:8000' kive = KiveAPI() kive.login('kive', 'kive') # Upload data try: fastq1 = kive.add_dataset('New fastq file 1', 'None', open('exfastq1.fastq', 'r'), None, None, ["Everyone"]) except KiveMalformedDataException: fastq1 = kive.find_datasets(name='New fastq file 1')[0] try: fastq2 = kive.add_dataset('New fastq file 2', 'None', open('exfastq2.fastq', 'r'), None, None, ["Everyone"]) except KiveMalformedDataException: fastq2 = kive.find_datasets(name='New fastq file 2')[0] # Get the pipeline by family ID pipeline_family = kive.get_pipeline_family(2) print('Using data:') print(fastq1, fastq2) print('With pipeline:') print(pipeline_family.published_or_latest()) # Create a RunBatch. rb = kive.create_run_batch(
import os import StringIO from kiveapi import KiveAPI, KiveMalformedDataException # Testing creation of Datasets using external files. KiveAPI.SERVER_URL = 'http://localhost:8000' kive = KiveAPI() kive.login('kive', 'kive') # don't do this in practice, store your password somewhere safe efd_name = "ExternalFiles" # make an ExternalFileDirectory with this name efd_path = "" # fill this in with your own path external_path = "external_file.dat" external_file_contents = "foo" with open(os.path.join(efd_path, external_path), "wb") as f: f.write(external_file_contents) # Upload data try: ext_ds = kive.add_dataset( 'ExternalDatasetFile', 'External copy of 1234A_R1', None, None, None, ["Everyone"], externalfiledirectory=efd_name, external_path=external_path ) except KiveMalformedDataException:
from kiveapi import KiveAPI, KiveMalformedDataException # This assumes you have a Kive instance listening on port 8000, running # the demo fixture. In production, you wouldn't put your authentication # information in source code. KiveAPI.SERVER_URL = 'http://localhost:8000' kive = KiveAPI() kive.login('kive', 'kive') # Upload data try: fastq1 = kive.add_dataset('New fastq file 1', 'None', open('exfastq1.fastq', 'r'), None, None, ["Everyone"]) except KiveMalformedDataException: fastq1 = kive.find_datasets(name='New fastq file 1')[0] try: fastq2 = kive.add_dataset('New fastq file 2', 'None', open('exfastq2.fastq', 'r'), None, None, ["Everyone"]) except KiveMalformedDataException: fastq2 = kive.find_datasets(name='New fastq file 2')[0] # Get the pipeline by family ID pipeline_family = kive.get_pipeline_family(2) print('Using data:') print(fastq1, fastq2) print('With pipeline:')
def main(): args = parse_args() logger.info('Starting.') kive_server = os.environ.get('KIVE_SERVER', 'http://*****:*****@api_runs', json=params, is_json=True) response_json = response.json() launched_run_ids.add(response_json['id']) except Exception: logger.error('Failed to rerun run id %d: %r.', run['id'], run, exc_info=True) logger.info('Done.')
def kive_login(server_url, user, password): kive = KiveAPI(server_url) kive.mount('https://', HTTPAdapter(max_retries=20)) kive.login(user, password) return kive
from kiveapi import KiveAPI # Testing creation of Datasets using external files. KiveAPI.SERVER_URL = 'http://localhost:8000' kive = KiveAPI() kive.login( 'kive', 'kive') # don't do this in practice, store your password somewhere safe # Kive internal Datatype primary keys: str_pk = 1 bool_pk = 2 float_pk = 3 int_pk = 4 natural_number_pk = 5 # Define a new CompoundDatatype. cdt = kive.create_cdt("CDTCreatedByPythonAPI", users=["kive"], groups=["Everyone"], members=[{ "column_idx": 1, "column_name": "col1_str", "datatype": str_pk }, { "column_idx": 2, "column_name": "col2_bool", "datatype": bool_pk }, { "column_idx": 3, "column_name": "col3_float",
def kive_login(server_url, user, password): kive = KiveAPI(server_url) kive.mount("https://", HTTPAdapter(max_retries=20)) kive.login(user, password) return kive
def open_kive(server_url): session = KiveAPI(server_url) session.mount('https://', HTTPAdapter(max_retries=20)) return session
import os import StringIO from kiveapi import KiveAPI, KiveMalformedDataException # Testing creation of Datasets using external files. KiveAPI.SERVER_URL = 'http://localhost:8000' kive = KiveAPI() kive.login( 'kive', 'kive') # don't do this in practice, store your password somewhere safe efd_name = "ExternalFiles" # make an ExternalFileDirectory with this name efd_path = "" # fill this in with your own path external_path = "external_file.dat" external_file_contents = "foo" with open(os.path.join(efd_path, external_path), "wb") as f: f.write(external_file_contents) # Upload data try: ext_ds = kive.add_dataset('ExternalDatasetFile', 'External copy of 1234A_R1', None, None, None, ["Everyone"], externalfiledirectory=efd_name, external_path=external_path) except KiveMalformedDataException: ext_ds = kive.find_datasets(name='ExternalDatasetFile')[0]
import os import sched import time from kiveapi import KiveAPI, KiveMalformedDataException # This is how I would recommend authenticating to Kive KiveAPI.SERVER_URL = 'http://localhost:8000' # Don't put your real password in source code, store it in a text file # that is only readable by your user account or some more secure storage. kive = KiveAPI() kive.login('kive', 'kive') # Upload data try: fastq1 = kive.add_dataset('New fastq file 1', 'None', open('exfastq1.fastq', 'r'), None, None, ["Everyone"]) except KiveMalformedDataException: fastq1 = kive.find_datasets(name='New fastq file 1')[0] try: fastq2 = kive.add_dataset('New fastq file 2', 'None', open('exfastq2.fastq', 'r'), None, None, ["Everyone"]) except KiveMalformedDataException: fastq2 = kive.find_datasets(name='New fastq file 2')[0] # Get the pipeline by family ID pipeline_family = kive.get_pipeline_family(2) print('Using data:') print(fastq1, fastq2) print('With pipeline:')
import os import sched import time from kiveapi import KiveAPI # Use HTTPS on a real server, so your password is encrypted. KiveAPI.SERVER_URL = 'http://localhost:8000' # Don't put your real password in source code, store it in a text file # that is only readable by your user account or some more secure storage. kive = KiveAPI() kive.login('kive', 'kive') # Get the data by ID fastq1 = kive.get_dataset(2) fastq2 = kive.get_dataset(3) # or get the data by name fastq1 = kive.find_datasets(name='1234A_R1.fastq')[0] fastq2 = kive.find_datasets(name='1234A_R2.fastq')[0] # Pipeline pipeline = kive.get_pipeline(13) print(pipeline) # # Get the pipeline by family ID # pipeline_family = kive.get_pipeline_family(2) # # print('Using data:') # print(fastq1, fastq2) #
from kiveapi import KiveAPI # Testing creation of Datasets using external files. KiveAPI.SERVER_URL = 'http://localhost:8000' kive = KiveAPI() kive.login('kive', 'kive') # don't do this in practice, store your password somewhere safe # Kive internal Datatype primary keys: str_pk = 1 bool_pk = 2 float_pk = 3 int_pk = 4 natural_number_pk = 5 # Define a new CompoundDatatype. cdt = kive.create_cdt( "CDTCreatedByPythonAPI", users=["kive"], groups=["Everyone"], members=[ { "column_idx": 1, "column_name": "col1_str", "datatype": str_pk }, { "column_idx": 2, "column_name": "col2_bool", "datatype": bool_pk }, {
import os import sched import time from kiveapi import KiveAPI, KiveMalformedDataException # This is how I would recommend authenticating to Kive KiveAPI.SERVER_URL = 'http://localhost:8000' # Don't put your real password in source code, store it in a text file # that is only readable by your user account or some more secure storage. kive = KiveAPI() kive.login('kive', 'kive') # Upload data try: fastq1 = kive.add_dataset('New fastq file 1', 'None', open('exfastq1.fastq', 'r'), None, None, ["Everyone"]) except KiveMalformedDataException: fastq1 = kive.find_datasets(name='New fastq file 1')[0] try: fastq2 = kive.add_dataset('New fastq file 2', 'None', open('exfastq2.fastq', 'r'), None, None, ["Everyone"]) except KiveMalformedDataException: fastq2 = kive.find_datasets(name='New fastq file 2')[0] # Get the pipeline by family ID pipeline_family = kive.get_pipeline_family(2)
"""Upload (or find, if it's already been uploaded) a dataset and use it with an app from a container family. """ import example_tools from kiveapi import KiveAPI, KiveMalformedDataException # Use HTTPS on a real server, so your password is encrypted. # Don't put your real password in source code, store it in a text file # that is only readable by your user account or some more secure storage. kive = KiveAPI('http://localhost:8000') kive.login('kive', 'kive') # Upload (or retrieve) an input file dataset = example_tools.upload_or_retrieve_dataset(kive, "API Example 2 Names File", open("names.csv", "r"), groups=["Everyone"]) # Get the app from a container family. containerfamily = kive.filter("/api/containerfamilies/", "name", "samplecode").json()[0] container = kive.get(containerfamily["containers"]).json()[0] app = kive.filter( container["app_list"], "smart", "Minimal example that can run simple Python scripts").json()[0] # Create a run of this app using the file we uploaded appargs = kive.get(app["argument_list"]).json() inputarg = next(a for a in appargs if a["type"] == "I") runspec = {
def main(): logging.basicConfig(level=logging.INFO) logging.getLogger('requests.packages.urllib3.connectionpool').setLevel( logging.WARN) args = parse_args() print('Starting.') kive = KiveAPI(args.server) kive.mount('https://', HTTPAdapter(max_retries=20)) kive.login(args.username, args.password) all_pipelines = kive.get_pipelines() pipelines = list(recent_pipelines(all_pipelines)) hostname = urlparse(kive.server_url).hostname print('Recent pipelines from {}:'.format(hostname)) for pipeline in pipelines: # noinspection PyUnresolvedReferences print('{} - {}, id {}'.format(pipeline.family, pipeline, pipeline.pipeline_id)) # noinspection PyCompatibility pipeline_request = input("Enter pipeline id to dump, or 'm' for more:") if pipeline_request == 'm': for pipeline in all_pipelines: print('{} - {}, id {}'.format(pipeline.family, pipeline, pipeline.pipeline_id)) # noinspection PyCompatibility pipeline_request = input("Enter pipeline id to dump:") pipeline_id = int(pipeline_request) dump_folder = os.path.abspath('dump/{}_pipeline{}'.format( hostname, pipeline_id)) if not os.path.isdir(dump_folder): os.makedirs(dump_folder) compound_datatypes = {} # {id: columns} for compound_datatype in kive.get_cdts(): columns = compound_datatype.name compound_datatypes[compound_datatype.cdt_id] = columns code_resources = {} # {id: {'filename': filename}} for code_resource in kive.get('/api/coderesources/').json(): dump = {} for field in ('groups_allowed', 'users_allowed', 'filename'): dump[field] = code_resource[field] code_resources[code_resource['name']] = dump code_resource_revisions = {} # {id: revision} for revision in kive.get('/api/coderesourcerevisions/').json(): code_resource_revisions[revision['id']] = CodeResourceRevision( revision, code_resources) code_resource_revisions[None] = None api_end_points = kive.get('/api/').json() if 'dockerimages' in api_end_points: docker_images = { img['url']: img['full_name'] for img in kive.get('/api/dockerimages/').json() } else: # Old server doesn't have docker image support. docker_images = {} docker_images[None] = None methods = {} # {id: method} for method in kive.get('/api/methods/').json(): for dep in method['dependencies']: dep['requirement'] = code_resource_revisions[dep['requirement']] if dep['path'] == '././': dep['path'] = '.' method['dependencies'].sort(key=lambda x: (x['path'], x['filename'], x[ 'requirement']['coderesource']['filename'])) dump = { 'driver': code_resource_revisions[method['driver']], 'docker_image': docker_images[method.get('docker_image')] } for field in ('groups_allowed', 'users_allowed', 'reusable', 'threads', 'memory', 'dependencies'): dump[field] = method[field] methods[method['id']] = dump used_revisions = set() pipeline_wrapper = kive.get_pipeline(pipeline_id) pipeline = pipeline_wrapper.details print('Dumping {} in {}.'.format(pipeline_wrapper, dump_folder)) dump = dict(positions=dict(inputs={}, outputs={}, steps={})) for input_item in pipeline['inputs']: input_name = input_item['dataset_name'] dump['positions']['inputs'][input_name] = dict(x=input_item['x'], y=input_item['y']) del input_item['x'] del input_item['y'] replace_structure(input_item, compound_datatypes) dump['inputs'] = pipeline['inputs'] for output_item in pipeline['outputs']: output_name = output_item['dataset_name'] dump['positions']['outputs'][output_name] = dict(x=output_item['x'], y=output_item['y']) del output_item['x'] del output_item['y'] del output_item['dataset_idx'] replace_structure(output_item, compound_datatypes) pipeline['outputs'].sort() dump['outputs'] = pipeline['outputs'] for outcable in pipeline['outcables']: del outcable['pk'] del outcable['source'] if outcable['output_cdt']: columns = compound_datatypes[outcable['output_cdt']] outcable['output_cdt'] = columns pipeline['outcables'].sort(key=itemgetter('output_idx')) dump['outcables'] = pipeline['outcables'] for step in pipeline['steps']: step_name = step['name'] dump['positions']['steps'][step_name] = dict(x=step['x'], y=step['y']) del step['x'] del step['y'] step['cables_in'].sort(key=itemgetter('dest_dataset_name')) for cable in step['cables_in']: del cable['dest'] del cable['source'] for input_item in step['inputs']: replace_structure(input_item, compound_datatypes) for output_item in step['outputs']: replace_structure(output_item, compound_datatypes) del step['transformation_family'] step['transformation'] = methods[step['transformation']] driver = step['transformation']['driver'] if driver is not None: used_revisions.add(driver) used_revisions.update( map(itemgetter('requirement'), step['transformation']['dependencies'])) dump['steps'] = pipeline['steps'] pipeline_filename = 'pipeline.json' with open(os.path.join(dump_folder, pipeline_filename), 'w') as f: json.dump(dump, f, indent=4, sort_keys=True) pipeline_deadline = datetime.now() + timedelta(seconds=90) filename_counts = Counter() for revision in used_revisions: filename = revision['coderesource']['filename'] filename_counts[filename] += 1 response = kive.get(revision.url, is_json=False, stream=True) deadline = max(pipeline_deadline, datetime.now() + timedelta(seconds=10)) is_complete = True with open(os.path.join(dump_folder, filename), 'w') as f: for block in response.iter_content(): f.write(block) if datetime.now() > deadline: is_complete = False break if not is_complete: os.remove(os.path.join(dump_folder, filename)) with open(os.path.join(dump_folder, filename + '_timed_out'), 'w'): pass duplicate_filenames = [ filename for filename, count in filename_counts.items() if count > 1 ] if duplicate_filenames: raise RuntimeError('Multiple versions found: ' + ', '.join(duplicate_filenames)) print('Dumped {}.'.format(pipeline_wrapper))
def main(): args = parse_args() logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(levelname)s:%(name)s:%(message)s') logging.getLogger('requests').setLevel(logging.WARN) logging.info('Starting.') session = KiveAPI("http://localhost:8000") session.login("kive", "kive") cdt = session.get_cdt(args.cdt) pipeline = session.get_pipeline(args.pipeline) response = session.get('/api/datasets/?filters[0][key]=uploaded&page_size=1', is_json=True) dataset_count = response.json()['count'] response = session.get('/api/runs/?page_size=1', is_json=True) run_count = response.json()['count'] while dataset_count < args.datasets or run_count < args.runs: dataset_count += 1 filename = 'pairs_{}.csv'.format(dataset_count) dataset = create_dataset(session, filename, cdt) session.run_pipeline(pipeline, [dataset]) run_count += 1 while True: response = session.get('/api/runs/status/?filters[0][key]=active&page_size=1') active_count = response.json()['count'] if active_count < args.max_active: break sleep(5) logging.info('%d datasets, %d runs', dataset_count, run_count)
def main(): logging.basicConfig(level=logging.INFO) logging.getLogger('requests.packages.urllib3.connectionpool').setLevel( logging.WARN) args = parse_args() print('Starting.') kive = KiveAPI(args.server) kive.mount('https://', HTTPAdapter(max_retries=20)) kive.login(args.username, args.password) all_pipelines = kive.get_pipelines() pipelines = list(recent_pipelines(all_pipelines)) hostname = urlparse(kive.server_url).hostname print('Recent pipelines from {}:'.format(hostname)) for pipeline in pipelines: # noinspection PyUnresolvedReferences print('{} - {}, id {}'.format(pipeline.family, pipeline, pipeline.pipeline_id)) # noinspection PyCompatibility pipeline_request = input("Enter pipeline id to dump, or 'm' for more:") if pipeline_request == 'm': for pipeline in all_pipelines: print('{} - {}, id {}'.format(pipeline.family, pipeline, pipeline.pipeline_id)) # noinspection PyCompatibility pipeline_request = input("Enter pipeline id to dump:") pipeline_id = int(pipeline_request) dump_folder = os.path.abspath( 'dump/{}_pipeline{}'.format(hostname, pipeline_id)) if not os.path.isdir(dump_folder): os.makedirs(dump_folder) compound_datatypes = {} # {id: columns} for compound_datatype in kive.get_cdts(): columns = compound_datatype.name compound_datatypes[compound_datatype.cdt_id] = columns code_resources = {} # {id: {'filename': filename}} for code_resource in kive.get('/api/coderesources/').json(): dump = {} for field in ('groups_allowed', 'users_allowed', 'filename'): dump[field] = code_resource[field] code_resources[code_resource['name']] = dump code_resource_revisions = {} # {id: revision} for revision in kive.get('/api/coderesourcerevisions/').json(): code_resource_revisions[revision['id']] = CodeResourceRevision( revision, code_resources) code_resource_revisions[None] = None api_end_points = kive.get('/api/').json() if 'dockerimages' in api_end_points: docker_images = {img['url']: img['full_name'] for img in kive.get('/api/dockerimages/').json()} else: # Old server doesn't have docker image support. docker_images = {} docker_images[None] = None methods = {} # {id: method} for method in kive.get('/api/methods/').json(): for dep in method['dependencies']: dep['requirement'] = code_resource_revisions[dep['requirement']] if dep['path'] == '././': dep['path'] = '.' method['dependencies'].sort( key=lambda x: (x['path'], x['filename'], x['requirement']['coderesource']['filename'])) dump = {'driver': code_resource_revisions[method['driver']], 'docker_image': docker_images[method.get('docker_image')]} for field in ('groups_allowed', 'users_allowed', 'reusable', 'threads', 'memory', 'dependencies'): dump[field] = method[field] methods[method['id']] = dump used_revisions = set() pipeline_wrapper = kive.get_pipeline(pipeline_id) pipeline = pipeline_wrapper.details print('Dumping {} in {}.'.format(pipeline_wrapper, dump_folder)) dump = dict(positions=dict(inputs={}, outputs={}, steps={})) for input_item in pipeline['inputs']: input_name = input_item['dataset_name'] dump['positions']['inputs'][input_name] = dict(x=input_item['x'], y=input_item['y']) del input_item['x'] del input_item['y'] replace_structure(input_item, compound_datatypes) dump['inputs'] = pipeline['inputs'] for output_item in pipeline['outputs']: output_name = output_item['dataset_name'] dump['positions']['outputs'][output_name] = dict(x=output_item['x'], y=output_item['y']) del output_item['x'] del output_item['y'] del output_item['dataset_idx'] replace_structure(output_item, compound_datatypes) pipeline['outputs'].sort() dump['outputs'] = pipeline['outputs'] for outcable in pipeline['outcables']: del outcable['pk'] del outcable['source'] if outcable['output_cdt']: columns = compound_datatypes[outcable['output_cdt']] outcable['output_cdt'] = columns pipeline['outcables'].sort(key=itemgetter('output_idx')) dump['outcables'] = pipeline['outcables'] for step in pipeline['steps']: step_name = step['name'] dump['positions']['steps'][step_name] = dict(x=step['x'], y=step['y']) del step['x'] del step['y'] step['cables_in'].sort(key=itemgetter('dest_dataset_name')) for cable in step['cables_in']: del cable['dest'] del cable['source'] for input_item in step['inputs']: replace_structure(input_item, compound_datatypes) for output_item in step['outputs']: replace_structure(output_item, compound_datatypes) del step['transformation_family'] step['transformation'] = methods[step['transformation']] driver = step['transformation']['driver'] if driver is not None: used_revisions.add(driver) used_revisions.update(map(itemgetter('requirement'), step['transformation']['dependencies'])) dump['steps'] = pipeline['steps'] pipeline_filename = 'pipeline.json' with open(os.path.join(dump_folder, pipeline_filename), 'w') as f: json.dump(dump, f, indent=4, sort_keys=True) pipeline_deadline = datetime.now() + timedelta(seconds=90) filename_counts = Counter() for revision in used_revisions: filename = revision['coderesource']['filename'] filename_counts[filename] += 1 response = kive.get(revision.url, is_json=False, stream=True) deadline = max(pipeline_deadline, datetime.now() + timedelta(seconds=10)) is_complete = True with open(os.path.join(dump_folder, filename), 'w') as f: for block in response.iter_content(): f.write(block) if datetime.now() > deadline: is_complete = False break if not is_complete: os.remove(os.path.join(dump_folder, filename)) with open(os.path.join(dump_folder, filename + '_timed_out'), 'w'): pass duplicate_filenames = [filename for filename, count in filename_counts.items() if count > 1] if duplicate_filenames: raise RuntimeError('Multiple versions found: ' + ', '.join(duplicate_filenames)) print('Dumped {}.'.format(pipeline_wrapper))