예제 #1
0
def test_trim():
    session1 = KiveAPI('http://localhost')
    session2 = KiveAPI('http://localhost/')
    expected_url = 'http://localhost'

    assert expected_url == session1.server_url
    assert expected_url == session2.server_url
예제 #2
0
def kive_login(server_url, user, password):
    if KiveAPI is None:
        raise ImportError('Kive API failed to import. Is it installed?')
    kive = KiveAPI(server_url)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(user, password)
    return kive
예제 #3
0
def main():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s[%(levelname)s]%(name)s:%(message)s")
    logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(
        logging.WARNING)
    logging.info('Starting.')

    args = parse_args()
    session = KiveAPI(args.kive_server)
    session.mount('https://', HTTPAdapter(max_retries=20))
    session.login(args.kive_user, args.kive_password)

    runs = session.find_runs(active=True)
    pipeline_id = input_id = None
    for run in runs:
        if 'dataset_generator' in run.raw['display_name']:
            pipeline_id = run.pipeline_id
            input_id = run.raw['inputs'][0]['dataset']
            break
    if pipeline_id is None:
        raise RuntimeError(
            'No active runs found with "dataset_generator" in the name.')
    pipeline = session.get_pipeline(pipeline_id)
    input_dataset = session.get_dataset(input_id)

    while True:
        launch_if_needed(session, args, pipeline, input_dataset)
        sleep(1)
예제 #4
0
def main():
    args = parse_args()
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s:%(levelname)s:%(name)s:%(message)s')
    logging.getLogger('requests').setLevel(logging.WARN)
    logging.info('Starting.')
    session = KiveAPI("http://localhost:8000")
    session.login("kive", "kive")
    cdt = session.get_cdt(args.cdt)
    pipeline = session.get_pipeline(args.pipeline)
    response = session.get('/api/datasets/?filters[0][key]=uploaded&page_size=1', is_json=True)
    dataset_count = response.json()['count']
    response = session.get('/api/runs/?page_size=1', is_json=True)
    run_count = response.json()['count']
    while dataset_count < args.datasets or run_count < args.runs:
        dataset_count += 1
        filename = 'pairs_{}.csv'.format(dataset_count)
        dataset = create_dataset(session, filename, cdt)
        session.run_pipeline(pipeline, [dataset])
        run_count += 1
        while True:
            response = session.get('/api/runs/status/?filters[0][key]=active&page_size=1')
            active_count = response.json()['count']
            if active_count < args.max_active:
                break
            sleep(5)
        logging.info('%d datasets, %d runs', dataset_count, run_count)
예제 #5
0
def mocked_api():
    with patch.multiple('requests.Session',
                        get=DEFAULT,
                        send=DEFAULT,
                        post=DEFAULT,
                        patch=DEFAULT,
                        delete=DEFAULT,
                        head=DEFAULT):
        yield KiveAPI('http://localhost')
예제 #6
0
def fetch_input_sizes(args, slurm_jobs):
    data_path = os.path.join(args.cache_folder, 'speed_data_sizes.csv')
    try:
        with open(data_path) as f:
            reader = DictReader(f)
            cache = {int(row['run_id']): float(row['MB'])
                     for row in reader}
    except OSError as ex:
        if ex.errno != errno.ENOENT:
            raise
        cache = {}
    session = KiveAPI(args.kive_server)
    session.login(args.kive_user, args.kive_password)
    fetcher = partial(fetch_input_size, cache=cache, kive_session=session)
    pool = ThreadPool()
    job_count = len(slurm_jobs)
    fetch_count = 0
    failed_run_ids = set()
    last_error = None
    data_file = None
    data_writer = None
    input_sizes = {}
    try:
        for i, (run_id, input_size, is_cached, error_message) in enumerate(
                pool.imap_unordered(fetcher, slurm_jobs, chunksize=10)):
            if error_message is not None:
                last_error = error_message
                failed_run_ids.add(run_id)
            if not is_cached:
                if data_file is None:
                    data_file = open(data_path, 'w')
                    data_writer = DictWriter(data_file, ['run_id', 'MB'])
                    data_writer.writeheader()
                    for old_run_id, old_input_size in input_sizes.items():
                        data_writer.writerow({'run_id': old_run_id,
                                              'MB': old_input_size})
                if fetch_count % 10000 == 0:
                    print('Fetched {} runs after scanning {} of {} at {}.'.format(
                        fetch_count,
                        i,
                        job_count,
                        datetime.now()))
                fetch_count += 1
            input_sizes[run_id] = input_size
            if data_writer:
                data_writer.writerow({'run_id': run_id, 'MB': input_size})
    finally:
        if data_file is not None:
            data_file.close()

    if failed_run_ids:
        message = 'Failed to fetch run ids: {}\n  Caused by {}'.format(
            ', '.join(sorted(failed_run_ids)),
            last_error)
        raise RuntimeError(message)
    return input_sizes
예제 #7
0
def mocked_api():
    with patch.multiple('requests.Session',
                        get=DEFAULT,
                        send=DEFAULT,
                        post=DEFAULT,
                        patch=DEFAULT,
                        delete=DEFAULT,
                        head=DEFAULT):
        Session.head.return_value.status_code = 200
        Session.get.return_value.status_code = 200
        Session.post.return_value.status_code = 200
        Session.delete.return_value.status_code = 200
        Session.patch.return_value.status_code = 200
        yield KiveAPI('http://localhost')
예제 #8
0
def main():
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(
        logging.WARN)
    args = parse_args()

    kive = KiveAPI(args.server)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(args.username, args.password)

    folder = choose_folder()
    pipeline_family = choose_family(kive)
    # noinspection PyCompatibility
    groups = input('Groups allowed? [Everyone] ') or 'Everyone'
    groups = groups.split(',')

    CompoundDatatypeRequest.load_existing(kive)
    steps, pipeline_config = load_steps(kive, folder, pipeline_family, groups,
                                        args.docker_default)
    load_pipeline(pipeline_config)
    print('Uploading {!r} to {} for {}.'.format(folder, pipeline_family,
                                                groups))
    for i, step in enumerate(steps, start=1):
        print('  {}: {}'.format(i, step.get_display()))
        for dependency in step.dependencies:
            print('     ' + dependency['requirement'].get_display())
    new_compound_datatypes = [
        request.representation
        for request in CompoundDatatypeRequest.new_requests
    ]
    new_compound_datatypes.sort()
    print('New compound datatypes:')
    print('\n'.join(new_compound_datatypes))
    # noinspection PyCompatibility
    revision_name = input('Enter a revision name, or leave blank to abort: ')
    if not revision_name:
        return

    for request in CompoundDatatypeRequest.new_requests:
        request.create(kive, groups)
    create_code_resources(steps, revision_name)
    create_methods(kive, steps, revision_name)
    if not isinstance(pipeline_family, PipelineFamily):
        pipeline_family = create_pipeline_family(kive, pipeline_family, groups)
    create_pipeline(kive, pipeline_family, revision_name, pipeline_config,
                    steps)
    print('Done.')
예제 #9
0
import os
import StringIO

from kiveapi import KiveAPI, KiveMalformedDataException

# Testing creation of Datasets using external files.
KiveAPI.SERVER_URL = 'http://localhost:8000'
kive = KiveAPI()
kive.login(
    'kive',
    'kive')  # don't do this in practice, store your password somewhere safe

efd_name = "ExternalFiles"  # make an ExternalFileDirectory with this name
efd_path = ""  # fill this in with your own path
external_path = "external_file.dat"
external_file_contents = "foo"

with open(os.path.join(efd_path, external_path), "wb") as f:
    f.write(external_file_contents)

# Upload data
try:
    ext_ds = kive.add_dataset('ExternalDatasetFile',
                              'External copy of 1234A_R1',
                              None,
                              None,
                              None, ["Everyone"],
                              externalfiledirectory=efd_name,
                              external_path=external_path)
except KiveMalformedDataException:
    ext_ds = kive.find_datasets(name='ExternalDatasetFile')[0]
예제 #10
0
def main():
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(
        logging.WARN)
    args = parse_args()
    print('Starting.')

    kive = KiveAPI(args.server)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(args.username, args.password)

    all_pipelines = kive.get_pipelines()
    pipelines = list(recent_pipelines(all_pipelines))
    hostname = urlparse(kive.server_url).hostname
    print('Recent pipelines from {}:'.format(hostname))
    for pipeline in pipelines:
        # noinspection PyUnresolvedReferences
        print('{} - {}, id {}'.format(pipeline.family, pipeline,
                                      pipeline.pipeline_id))
    # noinspection PyCompatibility
    pipeline_request = input("Enter pipeline id to dump, or 'm' for more:")
    if pipeline_request == 'm':
        for pipeline in all_pipelines:
            print('{} - {}, id {}'.format(pipeline.family, pipeline,
                                          pipeline.pipeline_id))
        # noinspection PyCompatibility
        pipeline_request = input("Enter pipeline id to dump:")
    pipeline_id = int(pipeline_request)
    dump_folder = os.path.abspath('dump/{}_pipeline{}'.format(
        hostname, pipeline_id))

    if not os.path.isdir(dump_folder):
        os.makedirs(dump_folder)

    compound_datatypes = {}  # {id: columns}
    for compound_datatype in kive.get_cdts():
        columns = compound_datatype.name
        compound_datatypes[compound_datatype.cdt_id] = columns
    code_resources = {}  # {id: {'filename': filename}}
    for code_resource in kive.get('/api/coderesources/').json():
        dump = {}
        for field in ('groups_allowed', 'users_allowed', 'filename'):
            dump[field] = code_resource[field]
        code_resources[code_resource['name']] = dump
    code_resource_revisions = {}  # {id: revision}
    for revision in kive.get('/api/coderesourcerevisions/').json():
        code_resource_revisions[revision['id']] = CodeResourceRevision(
            revision, code_resources)
    code_resource_revisions[None] = None
    api_end_points = kive.get('/api/').json()
    if 'dockerimages' in api_end_points:
        docker_images = {
            img['url']: img['full_name']
            for img in kive.get('/api/dockerimages/').json()
        }
    else:
        # Old server doesn't have docker image support.
        docker_images = {}
    docker_images[None] = None
    methods = {}  # {id: method}
    for method in kive.get('/api/methods/').json():
        for dep in method['dependencies']:
            dep['requirement'] = code_resource_revisions[dep['requirement']]
            if dep['path'] == '././':
                dep['path'] = '.'
        method['dependencies'].sort(key=lambda x: (x['path'], x['filename'], x[
            'requirement']['coderesource']['filename']))
        dump = {
            'driver': code_resource_revisions[method['driver']],
            'docker_image': docker_images[method.get('docker_image')]
        }
        for field in ('groups_allowed', 'users_allowed', 'reusable', 'threads',
                      'memory', 'dependencies'):
            dump[field] = method[field]
        methods[method['id']] = dump

    used_revisions = set()
    pipeline_wrapper = kive.get_pipeline(pipeline_id)
    pipeline = pipeline_wrapper.details
    print('Dumping {} in {}.'.format(pipeline_wrapper, dump_folder))
    dump = dict(positions=dict(inputs={}, outputs={}, steps={}))
    for input_item in pipeline['inputs']:
        input_name = input_item['dataset_name']
        dump['positions']['inputs'][input_name] = dict(x=input_item['x'],
                                                       y=input_item['y'])
        del input_item['x']
        del input_item['y']
        replace_structure(input_item, compound_datatypes)
    dump['inputs'] = pipeline['inputs']
    for output_item in pipeline['outputs']:
        output_name = output_item['dataset_name']
        dump['positions']['outputs'][output_name] = dict(x=output_item['x'],
                                                         y=output_item['y'])
        del output_item['x']
        del output_item['y']
        del output_item['dataset_idx']
        replace_structure(output_item, compound_datatypes)
    pipeline['outputs'].sort()
    dump['outputs'] = pipeline['outputs']
    for outcable in pipeline['outcables']:
        del outcable['pk']
        del outcable['source']
        if outcable['output_cdt']:
            columns = compound_datatypes[outcable['output_cdt']]
            outcable['output_cdt'] = columns
    pipeline['outcables'].sort(key=itemgetter('output_idx'))
    dump['outcables'] = pipeline['outcables']
    for step in pipeline['steps']:
        step_name = step['name']
        dump['positions']['steps'][step_name] = dict(x=step['x'], y=step['y'])
        del step['x']
        del step['y']
        step['cables_in'].sort(key=itemgetter('dest_dataset_name'))
        for cable in step['cables_in']:
            del cable['dest']
            del cable['source']
        for input_item in step['inputs']:
            replace_structure(input_item, compound_datatypes)
        for output_item in step['outputs']:
            replace_structure(output_item, compound_datatypes)
        del step['transformation_family']
        step['transformation'] = methods[step['transformation']]
        driver = step['transformation']['driver']
        if driver is not None:
            used_revisions.add(driver)
        used_revisions.update(
            map(itemgetter('requirement'),
                step['transformation']['dependencies']))
    dump['steps'] = pipeline['steps']

    pipeline_filename = 'pipeline.json'
    with open(os.path.join(dump_folder, pipeline_filename), 'w') as f:
        json.dump(dump, f, indent=4, sort_keys=True)

    pipeline_deadline = datetime.now() + timedelta(seconds=90)
    filename_counts = Counter()
    for revision in used_revisions:
        filename = revision['coderesource']['filename']
        filename_counts[filename] += 1
        response = kive.get(revision.url, is_json=False, stream=True)
        deadline = max(pipeline_deadline,
                       datetime.now() + timedelta(seconds=10))
        is_complete = True
        with open(os.path.join(dump_folder, filename), 'w') as f:
            for block in response.iter_content():
                f.write(block)
                if datetime.now() > deadline:
                    is_complete = False
                    break
        if not is_complete:
            os.remove(os.path.join(dump_folder, filename))
            with open(os.path.join(dump_folder, filename + '_timed_out'), 'w'):
                pass
    duplicate_filenames = [
        filename for filename, count in filename_counts.items() if count > 1
    ]
    if duplicate_filenames:
        raise RuntimeError('Multiple versions found: ' +
                           ', '.join(duplicate_filenames))

    print('Dumped {}.'.format(pipeline_wrapper))
예제 #11
0
def kive_login(server_url, user, password):
    kive = KiveAPI(server_url)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(user, password)
    return kive
예제 #12
0
def open_kive(server_url):
    session = KiveAPI(server_url)
    session.mount('https://', HTTPAdapter(max_retries=20))
    return session
예제 #13
0
def main():
    args = parse_args()
    logger.info('Starting.')
    kive_server = os.environ.get('KIVE_SERVER', 'http://*****:*****@api_runs', json=params, is_json=True)
            response_json = response.json()
            launched_run_ids.add(response_json['id'])
        except Exception:
            logger.error('Failed to rerun run id %d: %r.',
                         run['id'],
                         run,
                         exc_info=True)
    logger.info('Done.')
예제 #14
0
"""Upload (or find, if it's already been uploaded) a dataset and use it
with an app from a container family.
"""
import example_tools
from kiveapi import KiveAPI, KiveMalformedDataException

# Use HTTPS on a real server, so your password is encrypted.
# Don't put your real password in source code, store it in a text file
# that is only readable by your user account or some more secure storage.
kive = KiveAPI('http://localhost:8000')
kive.login('kive', 'kive')

# Upload (or retrieve) an input file
dataset = example_tools.upload_or_retrieve_dataset(kive,
                                                   "API Example 2 Names File",
                                                   open("names.csv", "r"),
                                                   groups=["Everyone"])

# Get the app from a container family.
containerfamily = kive.filter("/api/containerfamilies/", "name",
                              "samplecode").json()[0]
container = kive.get(containerfamily["containers"]).json()[0]
app = kive.filter(
    container["app_list"], "smart",
    "Minimal example that can run simple Python scripts").json()[0]

# Create a run of this app using the file we uploaded
appargs = kive.get(app["argument_list"]).json()
inputarg = next(a for a in appargs if a["type"] == "I")

runspec = {