def main(): args = parse_args() logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(levelname)s:%(name)s:%(message)s') logging.getLogger('requests').setLevel(logging.WARN) logging.info('Starting.') session = KiveAPI("http://localhost:8000") session.login("kive", "kive") cdt = session.get_cdt(args.cdt) pipeline = session.get_pipeline(args.pipeline) response = session.get('/api/datasets/?filters[0][key]=uploaded&page_size=1', is_json=True) dataset_count = response.json()['count'] response = session.get('/api/runs/?page_size=1', is_json=True) run_count = response.json()['count'] while dataset_count < args.datasets or run_count < args.runs: dataset_count += 1 filename = 'pairs_{}.csv'.format(dataset_count) dataset = create_dataset(session, filename, cdt) session.run_pipeline(pipeline, [dataset]) run_count += 1 while True: response = session.get('/api/runs/status/?filters[0][key]=active&page_size=1') active_count = response.json()['count'] if active_count < args.max_active: break sleep(5) logging.info('%d datasets, %d runs', dataset_count, run_count)
def main(): logging.basicConfig(level=logging.INFO) logging.getLogger('requests.packages.urllib3.connectionpool').setLevel( logging.WARN) args = parse_args() print('Starting.') kive = KiveAPI(args.server) kive.mount('https://', HTTPAdapter(max_retries=20)) kive.login(args.username, args.password) all_pipelines = kive.get_pipelines() pipelines = list(recent_pipelines(all_pipelines)) hostname = urlparse(kive.server_url).hostname print('Recent pipelines from {}:'.format(hostname)) for pipeline in pipelines: # noinspection PyUnresolvedReferences print('{} - {}, id {}'.format(pipeline.family, pipeline, pipeline.pipeline_id)) # noinspection PyCompatibility pipeline_request = input("Enter pipeline id to dump, or 'm' for more:") if pipeline_request == 'm': for pipeline in all_pipelines: print('{} - {}, id {}'.format(pipeline.family, pipeline, pipeline.pipeline_id)) # noinspection PyCompatibility pipeline_request = input("Enter pipeline id to dump:") pipeline_id = int(pipeline_request) dump_folder = os.path.abspath('dump/{}_pipeline{}'.format( hostname, pipeline_id)) if not os.path.isdir(dump_folder): os.makedirs(dump_folder) compound_datatypes = {} # {id: columns} for compound_datatype in kive.get_cdts(): columns = compound_datatype.name compound_datatypes[compound_datatype.cdt_id] = columns code_resources = {} # {id: {'filename': filename}} for code_resource in kive.get('/api/coderesources/').json(): dump = {} for field in ('groups_allowed', 'users_allowed', 'filename'): dump[field] = code_resource[field] code_resources[code_resource['name']] = dump code_resource_revisions = {} # {id: revision} for revision in kive.get('/api/coderesourcerevisions/').json(): code_resource_revisions[revision['id']] = CodeResourceRevision( revision, code_resources) code_resource_revisions[None] = None api_end_points = kive.get('/api/').json() if 'dockerimages' in api_end_points: docker_images = { img['url']: img['full_name'] for img in kive.get('/api/dockerimages/').json() } else: # Old server doesn't have docker image support. docker_images = {} docker_images[None] = None methods = {} # {id: method} for method in kive.get('/api/methods/').json(): for dep in method['dependencies']: dep['requirement'] = code_resource_revisions[dep['requirement']] if dep['path'] == '././': dep['path'] = '.' method['dependencies'].sort(key=lambda x: (x['path'], x['filename'], x[ 'requirement']['coderesource']['filename'])) dump = { 'driver': code_resource_revisions[method['driver']], 'docker_image': docker_images[method.get('docker_image')] } for field in ('groups_allowed', 'users_allowed', 'reusable', 'threads', 'memory', 'dependencies'): dump[field] = method[field] methods[method['id']] = dump used_revisions = set() pipeline_wrapper = kive.get_pipeline(pipeline_id) pipeline = pipeline_wrapper.details print('Dumping {} in {}.'.format(pipeline_wrapper, dump_folder)) dump = dict(positions=dict(inputs={}, outputs={}, steps={})) for input_item in pipeline['inputs']: input_name = input_item['dataset_name'] dump['positions']['inputs'][input_name] = dict(x=input_item['x'], y=input_item['y']) del input_item['x'] del input_item['y'] replace_structure(input_item, compound_datatypes) dump['inputs'] = pipeline['inputs'] for output_item in pipeline['outputs']: output_name = output_item['dataset_name'] dump['positions']['outputs'][output_name] = dict(x=output_item['x'], y=output_item['y']) del output_item['x'] del output_item['y'] del output_item['dataset_idx'] replace_structure(output_item, compound_datatypes) pipeline['outputs'].sort() dump['outputs'] = pipeline['outputs'] for outcable in pipeline['outcables']: del outcable['pk'] del outcable['source'] if outcable['output_cdt']: columns = compound_datatypes[outcable['output_cdt']] outcable['output_cdt'] = columns pipeline['outcables'].sort(key=itemgetter('output_idx')) dump['outcables'] = pipeline['outcables'] for step in pipeline['steps']: step_name = step['name'] dump['positions']['steps'][step_name] = dict(x=step['x'], y=step['y']) del step['x'] del step['y'] step['cables_in'].sort(key=itemgetter('dest_dataset_name')) for cable in step['cables_in']: del cable['dest'] del cable['source'] for input_item in step['inputs']: replace_structure(input_item, compound_datatypes) for output_item in step['outputs']: replace_structure(output_item, compound_datatypes) del step['transformation_family'] step['transformation'] = methods[step['transformation']] driver = step['transformation']['driver'] if driver is not None: used_revisions.add(driver) used_revisions.update( map(itemgetter('requirement'), step['transformation']['dependencies'])) dump['steps'] = pipeline['steps'] pipeline_filename = 'pipeline.json' with open(os.path.join(dump_folder, pipeline_filename), 'w') as f: json.dump(dump, f, indent=4, sort_keys=True) pipeline_deadline = datetime.now() + timedelta(seconds=90) filename_counts = Counter() for revision in used_revisions: filename = revision['coderesource']['filename'] filename_counts[filename] += 1 response = kive.get(revision.url, is_json=False, stream=True) deadline = max(pipeline_deadline, datetime.now() + timedelta(seconds=10)) is_complete = True with open(os.path.join(dump_folder, filename), 'w') as f: for block in response.iter_content(): f.write(block) if datetime.now() > deadline: is_complete = False break if not is_complete: os.remove(os.path.join(dump_folder, filename)) with open(os.path.join(dump_folder, filename + '_timed_out'), 'w'): pass duplicate_filenames = [ filename for filename, count in filename_counts.items() if count > 1 ] if duplicate_filenames: raise RuntimeError('Multiple versions found: ' + ', '.join(duplicate_filenames)) print('Dumped {}.'.format(pipeline_wrapper))
def main(): args = parse_args() logger.info('Starting.') kive_server = os.environ.get('KIVE_SERVER', 'http://*****:*****@api_runs', json=params, is_json=True) response_json = response.json() launched_run_ids.add(response_json['id']) except Exception: logger.error('Failed to rerun run id %d: %r.', run['id'], run, exc_info=True) logger.info('Done.')
# Use HTTPS on a real server, so your password is encrypted. # Don't put your real password in source code, store it in a text file # that is only readable by your user account or some more secure storage. kive = KiveAPI('http://localhost:8000') kive.login('kive', 'kive') # Upload (or retrieve) an input file dataset = example_tools.upload_or_retrieve_dataset(kive, "API Example 2 Names File", open("names.csv", "r"), groups=["Everyone"]) # Get the app from a container family. containerfamily = kive.filter("/api/containerfamilies/", "name", "samplecode").json()[0] container = kive.get(containerfamily["containers"]).json()[0] app = kive.filter( container["app_list"], "smart", "Minimal example that can run simple Python scripts").json()[0] # Create a run of this app using the file we uploaded appargs = kive.get(app["argument_list"]).json() inputarg = next(a for a in appargs if a["type"] == "I") runspec = { "name": "uploaded-file-example", "app": app["url"], "datasets": [{ "argument": inputarg["url"], "dataset": dataset.raw["url"], }],
def main(): logging.basicConfig(level=logging.INFO) logging.getLogger('requests.packages.urllib3.connectionpool').setLevel( logging.WARN) args = parse_args() print('Starting.') kive = KiveAPI(args.server) kive.mount('https://', HTTPAdapter(max_retries=20)) kive.login(args.username, args.password) all_pipelines = kive.get_pipelines() pipelines = list(recent_pipelines(all_pipelines)) hostname = urlparse(kive.server_url).hostname print('Recent pipelines from {}:'.format(hostname)) for pipeline in pipelines: # noinspection PyUnresolvedReferences print('{} - {}, id {}'.format(pipeline.family, pipeline, pipeline.pipeline_id)) # noinspection PyCompatibility pipeline_request = input("Enter pipeline id to dump, or 'm' for more:") if pipeline_request == 'm': for pipeline in all_pipelines: print('{} - {}, id {}'.format(pipeline.family, pipeline, pipeline.pipeline_id)) # noinspection PyCompatibility pipeline_request = input("Enter pipeline id to dump:") pipeline_id = int(pipeline_request) dump_folder = os.path.abspath( 'dump/{}_pipeline{}'.format(hostname, pipeline_id)) if not os.path.isdir(dump_folder): os.makedirs(dump_folder) compound_datatypes = {} # {id: columns} for compound_datatype in kive.get_cdts(): columns = compound_datatype.name compound_datatypes[compound_datatype.cdt_id] = columns code_resources = {} # {id: {'filename': filename}} for code_resource in kive.get('/api/coderesources/').json(): dump = {} for field in ('groups_allowed', 'users_allowed', 'filename'): dump[field] = code_resource[field] code_resources[code_resource['name']] = dump code_resource_revisions = {} # {id: revision} for revision in kive.get('/api/coderesourcerevisions/').json(): code_resource_revisions[revision['id']] = CodeResourceRevision( revision, code_resources) code_resource_revisions[None] = None api_end_points = kive.get('/api/').json() if 'dockerimages' in api_end_points: docker_images = {img['url']: img['full_name'] for img in kive.get('/api/dockerimages/').json()} else: # Old server doesn't have docker image support. docker_images = {} docker_images[None] = None methods = {} # {id: method} for method in kive.get('/api/methods/').json(): for dep in method['dependencies']: dep['requirement'] = code_resource_revisions[dep['requirement']] if dep['path'] == '././': dep['path'] = '.' method['dependencies'].sort( key=lambda x: (x['path'], x['filename'], x['requirement']['coderesource']['filename'])) dump = {'driver': code_resource_revisions[method['driver']], 'docker_image': docker_images[method.get('docker_image')]} for field in ('groups_allowed', 'users_allowed', 'reusable', 'threads', 'memory', 'dependencies'): dump[field] = method[field] methods[method['id']] = dump used_revisions = set() pipeline_wrapper = kive.get_pipeline(pipeline_id) pipeline = pipeline_wrapper.details print('Dumping {} in {}.'.format(pipeline_wrapper, dump_folder)) dump = dict(positions=dict(inputs={}, outputs={}, steps={})) for input_item in pipeline['inputs']: input_name = input_item['dataset_name'] dump['positions']['inputs'][input_name] = dict(x=input_item['x'], y=input_item['y']) del input_item['x'] del input_item['y'] replace_structure(input_item, compound_datatypes) dump['inputs'] = pipeline['inputs'] for output_item in pipeline['outputs']: output_name = output_item['dataset_name'] dump['positions']['outputs'][output_name] = dict(x=output_item['x'], y=output_item['y']) del output_item['x'] del output_item['y'] del output_item['dataset_idx'] replace_structure(output_item, compound_datatypes) pipeline['outputs'].sort() dump['outputs'] = pipeline['outputs'] for outcable in pipeline['outcables']: del outcable['pk'] del outcable['source'] if outcable['output_cdt']: columns = compound_datatypes[outcable['output_cdt']] outcable['output_cdt'] = columns pipeline['outcables'].sort(key=itemgetter('output_idx')) dump['outcables'] = pipeline['outcables'] for step in pipeline['steps']: step_name = step['name'] dump['positions']['steps'][step_name] = dict(x=step['x'], y=step['y']) del step['x'] del step['y'] step['cables_in'].sort(key=itemgetter('dest_dataset_name')) for cable in step['cables_in']: del cable['dest'] del cable['source'] for input_item in step['inputs']: replace_structure(input_item, compound_datatypes) for output_item in step['outputs']: replace_structure(output_item, compound_datatypes) del step['transformation_family'] step['transformation'] = methods[step['transformation']] driver = step['transformation']['driver'] if driver is not None: used_revisions.add(driver) used_revisions.update(map(itemgetter('requirement'), step['transformation']['dependencies'])) dump['steps'] = pipeline['steps'] pipeline_filename = 'pipeline.json' with open(os.path.join(dump_folder, pipeline_filename), 'w') as f: json.dump(dump, f, indent=4, sort_keys=True) pipeline_deadline = datetime.now() + timedelta(seconds=90) filename_counts = Counter() for revision in used_revisions: filename = revision['coderesource']['filename'] filename_counts[filename] += 1 response = kive.get(revision.url, is_json=False, stream=True) deadline = max(pipeline_deadline, datetime.now() + timedelta(seconds=10)) is_complete = True with open(os.path.join(dump_folder, filename), 'w') as f: for block in response.iter_content(): f.write(block) if datetime.now() > deadline: is_complete = False break if not is_complete: os.remove(os.path.join(dump_folder, filename)) with open(os.path.join(dump_folder, filename + '_timed_out'), 'w'): pass duplicate_filenames = [filename for filename, count in filename_counts.items() if count > 1] if duplicate_filenames: raise RuntimeError('Multiple versions found: ' + ', '.join(duplicate_filenames)) print('Dumped {}.'.format(pipeline_wrapper))