Ejemplo n.º 1
0
def test_trim():
    session1 = KiveAPI('http://localhost')
    session2 = KiveAPI('http://localhost/')
    expected_url = 'http://localhost'

    assert expected_url == session1.server_url
    assert expected_url == session2.server_url
Ejemplo n.º 2
0
def fetch_input_sizes(args, slurm_jobs):
    data_path = os.path.join(args.cache_folder, 'speed_data_sizes.csv')
    try:
        with open(data_path) as f:
            reader = DictReader(f)
            cache = {int(row['run_id']): float(row['MB'])
                     for row in reader}
    except OSError as ex:
        if ex.errno != errno.ENOENT:
            raise
        cache = {}
    session = KiveAPI(args.kive_server)
    session.login(args.kive_user, args.kive_password)
    fetcher = partial(fetch_input_size, cache=cache, kive_session=session)
    pool = ThreadPool()
    job_count = len(slurm_jobs)
    fetch_count = 0
    failed_run_ids = set()
    last_error = None
    data_file = None
    data_writer = None
    input_sizes = {}
    try:
        for i, (run_id, input_size, is_cached, error_message) in enumerate(
                pool.imap_unordered(fetcher, slurm_jobs, chunksize=10)):
            if error_message is not None:
                last_error = error_message
                failed_run_ids.add(run_id)
            if not is_cached:
                if data_file is None:
                    data_file = open(data_path, 'w')
                    data_writer = DictWriter(data_file, ['run_id', 'MB'])
                    data_writer.writeheader()
                    for old_run_id, old_input_size in input_sizes.items():
                        data_writer.writerow({'run_id': old_run_id,
                                              'MB': old_input_size})
                if fetch_count % 10000 == 0:
                    print('Fetched {} runs after scanning {} of {} at {}.'.format(
                        fetch_count,
                        i,
                        job_count,
                        datetime.now()))
                fetch_count += 1
            input_sizes[run_id] = input_size
            if data_writer:
                data_writer.writerow({'run_id': run_id, 'MB': input_size})
    finally:
        if data_file is not None:
            data_file.close()

    if failed_run_ids:
        message = 'Failed to fetch run ids: {}\n  Caused by {}'.format(
            ', '.join(sorted(failed_run_ids)),
            last_error)
        raise RuntimeError(message)
    return input_sizes
Ejemplo n.º 3
0
def main():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s[%(levelname)s]%(name)s:%(message)s")
    logging.getLogger("requests.packages.urllib3.connectionpool").setLevel(
        logging.WARNING)
    logging.info('Starting.')

    args = parse_args()
    session = KiveAPI(args.kive_server)
    session.mount('https://', HTTPAdapter(max_retries=20))
    session.login(args.kive_user, args.kive_password)

    runs = session.find_runs(active=True)
    pipeline_id = input_id = None
    for run in runs:
        if 'dataset_generator' in run.raw['display_name']:
            pipeline_id = run.pipeline_id
            input_id = run.raw['inputs'][0]['dataset']
            break
    if pipeline_id is None:
        raise RuntimeError(
            'No active runs found with "dataset_generator" in the name.')
    pipeline = session.get_pipeline(pipeline_id)
    input_dataset = session.get_dataset(input_id)

    while True:
        launch_if_needed(session, args, pipeline, input_dataset)
        sleep(1)
Ejemplo n.º 4
0
def main():
    logging.basicConfig(level=logging.INFO,
                        format="%(asctime)s[%(levelname)s]%(name)s:%(message)s")
    logging.getLogger(
        "requests.packages.urllib3.connectionpool").setLevel(logging.WARNING)
    logging.info('Starting.')

    args = parse_args()
    session = KiveAPI(args.kive_server)
    session.mount('https://', HTTPAdapter(max_retries=20))
    session.login(args.kive_user, args.kive_password)

    runs = session.find_runs(active=True)
    pipeline_id = input_id = None
    for run in runs:
        if 'dataset_generator' in run.raw['display_name']:
            pipeline_id = run.pipeline_id
            input_id = run.raw['inputs'][0]['dataset']
            break
    if pipeline_id is None:
        raise RuntimeError(
            'No active runs found with "dataset_generator" in the name.')
    pipeline = session.get_pipeline(pipeline_id)
    input_dataset = session.get_dataset(input_id)

    while True:
        launch_if_needed(session, args, pipeline, input_dataset)
        sleep(1)
Ejemplo n.º 5
0
def main():
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(
        logging.WARN)
    args = parse_args()

    kive = KiveAPI(args.server)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(args.username, args.password)

    folder = choose_folder()
    pipeline_family = choose_family(kive)
    # noinspection PyCompatibility
    groups = input('Groups allowed? [Everyone] ') or 'Everyone'
    groups = groups.split(',')

    CompoundDatatypeRequest.load_existing(kive)
    steps, pipeline_config = load_steps(kive,
                                        folder,
                                        pipeline_family,
                                        groups,
                                        args.docker_default)
    load_pipeline(pipeline_config)
    print('Uploading {!r} to {} for {}.'.format(folder, pipeline_family, groups))
    for i, step in enumerate(steps, start=1):
        print('  {}: {}'.format(i, step.get_display()))
        for dependency in step.dependencies:
            print('     ' + dependency['requirement'].get_display())
    new_compound_datatypes = [request.representation
                              for request in CompoundDatatypeRequest.new_requests]
    new_compound_datatypes.sort()
    print('New compound datatypes:')
    print('\n'.join(new_compound_datatypes))
    # noinspection PyCompatibility
    revision_name = input('Enter a revision name, or leave blank to abort: ')
    if not revision_name:
        return

    for request in CompoundDatatypeRequest.new_requests:
        request.create(kive, groups)
    create_code_resources(steps, revision_name)
    create_methods(kive, steps, revision_name)
    if not isinstance(pipeline_family, PipelineFamily):
        pipeline_family = create_pipeline_family(kive, pipeline_family, groups)
    create_pipeline(kive, pipeline_family, revision_name, pipeline_config, steps)
    print('Done.')
Ejemplo n.º 6
0
def mocked_api():
    with patch.multiple('requests.Session',
                        get=DEFAULT,
                        send=DEFAULT,
                        post=DEFAULT,
                        patch=DEFAULT,
                        delete=DEFAULT,
                        head=DEFAULT):
        yield KiveAPI('http://localhost')
Ejemplo n.º 7
0
def kive_login(server_url, user, password):
    if KiveAPI is None:
        raise ImportError('Kive API failed to import. Is it installed?')
    kive = KiveAPI(server_url)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(user, password)
    return kive
Ejemplo n.º 8
0
def mocked_api():
    with patch.multiple('requests.Session',
                        get=DEFAULT,
                        send=DEFAULT,
                        post=DEFAULT,
                        patch=DEFAULT,
                        delete=DEFAULT,
                        head=DEFAULT):
        Session.head.return_value.status_code = 200
        Session.get.return_value.status_code = 200
        Session.post.return_value.status_code = 200
        Session.delete.return_value.status_code = 200
        Session.patch.return_value.status_code = 200
        yield KiveAPI('http://localhost')
Ejemplo n.º 9
0
def main():
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(
        logging.WARN)
    args = parse_args()

    kive = KiveAPI(args.server)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(args.username, args.password)

    folder = choose_folder()
    pipeline_family = choose_family(kive)
    # noinspection PyCompatibility
    groups = input('Groups allowed? [Everyone] ') or 'Everyone'
    groups = groups.split(',')

    CompoundDatatypeRequest.load_existing(kive)
    steps, pipeline_config = load_steps(kive, folder, pipeline_family, groups,
                                        args.docker_default)
    load_pipeline(pipeline_config)
    print('Uploading {!r} to {} for {}.'.format(folder, pipeline_family,
                                                groups))
    for i, step in enumerate(steps, start=1):
        print('  {}: {}'.format(i, step.get_display()))
        for dependency in step.dependencies:
            print('     ' + dependency['requirement'].get_display())
    new_compound_datatypes = [
        request.representation
        for request in CompoundDatatypeRequest.new_requests
    ]
    new_compound_datatypes.sort()
    print('New compound datatypes:')
    print('\n'.join(new_compound_datatypes))
    # noinspection PyCompatibility
    revision_name = input('Enter a revision name, or leave blank to abort: ')
    if not revision_name:
        return

    for request in CompoundDatatypeRequest.new_requests:
        request.create(kive, groups)
    create_code_resources(steps, revision_name)
    create_methods(kive, steps, revision_name)
    if not isinstance(pipeline_family, PipelineFamily):
        pipeline_family = create_pipeline_family(kive, pipeline_family, groups)
    create_pipeline(kive, pipeline_family, revision_name, pipeline_config,
                    steps)
    print('Done.')
Ejemplo n.º 10
0
from kiveapi import KiveAPI, KiveMalformedDataException

# This assumes you have a Kive instance listening on port 8000, running
# the demo fixture.  In production, you wouldn't put your authentication
# information in source code.
KiveAPI.SERVER_URL = 'http://localhost:8000'
kive = KiveAPI()
kive.login('kive', 'kive')

# Upload data
try:
    fastq1 = kive.add_dataset('New fastq file 1', 'None', open('exfastq1.fastq', 'r'), None, None, ["Everyone"])
except KiveMalformedDataException:
    fastq1 = kive.find_datasets(name='New fastq file 1')[0]

try:
    fastq2 = kive.add_dataset('New fastq file 2', 'None', open('exfastq2.fastq', 'r'), None, None, ["Everyone"])
except KiveMalformedDataException:
    fastq2 = kive.find_datasets(name='New fastq file 2')[0]

# Get the pipeline by family ID
pipeline_family = kive.get_pipeline_family(2)

print('Using data:')
print(fastq1, fastq2)

print('With pipeline:')
print(pipeline_family.published_or_latest())

# Create a RunBatch.
rb = kive.create_run_batch(
Ejemplo n.º 11
0
import os
import StringIO

from kiveapi import KiveAPI, KiveMalformedDataException

# Testing creation of Datasets using external files.
KiveAPI.SERVER_URL = 'http://localhost:8000'
kive = KiveAPI()
kive.login('kive', 'kive')  # don't do this in practice, store your password somewhere safe

efd_name = "ExternalFiles"  # make an ExternalFileDirectory with this name
efd_path = ""  # fill this in with your own path
external_path = "external_file.dat"
external_file_contents = "foo"

with open(os.path.join(efd_path, external_path), "wb") as f:
    f.write(external_file_contents)

# Upload data
try:
    ext_ds = kive.add_dataset(
        'ExternalDatasetFile',
        'External copy of 1234A_R1',
        None,
        None,
        None,
        ["Everyone"],
        externalfiledirectory=efd_name,
        external_path=external_path
    )
except KiveMalformedDataException:
Ejemplo n.º 12
0
from kiveapi import KiveAPI, KiveMalformedDataException

# This assumes you have a Kive instance listening on port 8000, running
# the demo fixture.  In production, you wouldn't put your authentication
# information in source code.
KiveAPI.SERVER_URL = 'http://localhost:8000'
kive = KiveAPI()
kive.login('kive', 'kive')

# Upload data
try:
    fastq1 = kive.add_dataset('New fastq file 1', 'None',
                              open('exfastq1.fastq', 'r'), None, None,
                              ["Everyone"])
except KiveMalformedDataException:
    fastq1 = kive.find_datasets(name='New fastq file 1')[0]

try:
    fastq2 = kive.add_dataset('New fastq file 2', 'None',
                              open('exfastq2.fastq', 'r'), None, None,
                              ["Everyone"])
except KiveMalformedDataException:
    fastq2 = kive.find_datasets(name='New fastq file 2')[0]

# Get the pipeline by family ID
pipeline_family = kive.get_pipeline_family(2)

print('Using data:')
print(fastq1, fastq2)

print('With pipeline:')
Ejemplo n.º 13
0
def main():
    args = parse_args()
    logger.info('Starting.')
    kive_server = os.environ.get('KIVE_SERVER', 'http://*****:*****@api_runs', json=params, is_json=True)
            response_json = response.json()
            launched_run_ids.add(response_json['id'])
        except Exception:
            logger.error('Failed to rerun run id %d: %r.',
                         run['id'],
                         run,
                         exc_info=True)
    logger.info('Done.')
Ejemplo n.º 14
0
def kive_login(server_url, user, password):
    kive = KiveAPI(server_url)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(user, password)
    return kive
Ejemplo n.º 15
0
from kiveapi import KiveAPI

# Testing creation of Datasets using external files.
KiveAPI.SERVER_URL = 'http://localhost:8000'
kive = KiveAPI()
kive.login(
    'kive',
    'kive')  # don't do this in practice, store your password somewhere safe

# Kive internal Datatype primary keys:
str_pk = 1
bool_pk = 2
float_pk = 3
int_pk = 4
natural_number_pk = 5

# Define a new CompoundDatatype.
cdt = kive.create_cdt("CDTCreatedByPythonAPI",
                      users=["kive"],
                      groups=["Everyone"],
                      members=[{
                          "column_idx": 1,
                          "column_name": "col1_str",
                          "datatype": str_pk
                      }, {
                          "column_idx": 2,
                          "column_name": "col2_bool",
                          "datatype": bool_pk
                      }, {
                          "column_idx": 3,
                          "column_name": "col3_float",
Ejemplo n.º 16
0
def kive_login(server_url, user, password):
    kive = KiveAPI(server_url)
    kive.mount("https://", HTTPAdapter(max_retries=20))
    kive.login(user, password)
    return kive
Ejemplo n.º 17
0
def open_kive(server_url):
    session = KiveAPI(server_url)
    session.mount('https://', HTTPAdapter(max_retries=20))
    return session
Ejemplo n.º 18
0
import os
import StringIO

from kiveapi import KiveAPI, KiveMalformedDataException

# Testing creation of Datasets using external files.
KiveAPI.SERVER_URL = 'http://localhost:8000'
kive = KiveAPI()
kive.login(
    'kive',
    'kive')  # don't do this in practice, store your password somewhere safe

efd_name = "ExternalFiles"  # make an ExternalFileDirectory with this name
efd_path = ""  # fill this in with your own path
external_path = "external_file.dat"
external_file_contents = "foo"

with open(os.path.join(efd_path, external_path), "wb") as f:
    f.write(external_file_contents)

# Upload data
try:
    ext_ds = kive.add_dataset('ExternalDatasetFile',
                              'External copy of 1234A_R1',
                              None,
                              None,
                              None, ["Everyone"],
                              externalfiledirectory=efd_name,
                              external_path=external_path)
except KiveMalformedDataException:
    ext_ds = kive.find_datasets(name='ExternalDatasetFile')[0]
Ejemplo n.º 19
0
import os
import sched
import time

from kiveapi import KiveAPI, KiveMalformedDataException

# This is how I would recommend authenticating to Kive
KiveAPI.SERVER_URL = 'http://localhost:8000'
# Don't put your real password in source code, store it in a text file
# that is only readable by your user account or some more secure storage.
kive = KiveAPI()
kive.login('kive', 'kive')

# Upload data
try:
    fastq1 = kive.add_dataset('New fastq file 1', 'None', open('exfastq1.fastq', 'r'), None, None, ["Everyone"])
except KiveMalformedDataException:
    fastq1 = kive.find_datasets(name='New fastq file 1')[0]

try:
    fastq2 = kive.add_dataset('New fastq file 2', 'None', open('exfastq2.fastq', 'r'), None, None, ["Everyone"])
except KiveMalformedDataException:
    fastq2 = kive.find_datasets(name='New fastq file 2')[0]

# Get the pipeline by family ID
pipeline_family = kive.get_pipeline_family(2)

print('Using data:')
print(fastq1, fastq2)

print('With pipeline:')
Ejemplo n.º 20
0
import os
import sched
import time

from kiveapi import KiveAPI

# Use HTTPS on a real server, so your password is encrypted.
KiveAPI.SERVER_URL = 'http://localhost:8000'
# Don't put your real password in source code, store it in a text file
# that is only readable by your user account or some more secure storage.
kive = KiveAPI()
kive.login('kive', 'kive')

# Get the data by ID
fastq1 = kive.get_dataset(2)
fastq2 = kive.get_dataset(3)

# or get the data by name
fastq1 = kive.find_datasets(name='1234A_R1.fastq')[0]
fastq2 = kive.find_datasets(name='1234A_R2.fastq')[0]

# Pipeline
pipeline = kive.get_pipeline(13)

print(pipeline)
# # Get the pipeline by family ID
# pipeline_family = kive.get_pipeline_family(2)
#
# print('Using data:')
# print(fastq1, fastq2)
#
Ejemplo n.º 21
0
def open_kive(server_url):
    session = KiveAPI(server_url)
    session.mount('https://', HTTPAdapter(max_retries=20))
    return session
Ejemplo n.º 22
0
from kiveapi import KiveAPI

# Testing creation of Datasets using external files.
KiveAPI.SERVER_URL = 'http://localhost:8000'
kive = KiveAPI()
kive.login('kive', 'kive')  # don't do this in practice, store your password somewhere safe

# Kive internal Datatype primary keys:
str_pk = 1
bool_pk = 2
float_pk = 3
int_pk = 4
natural_number_pk = 5

# Define a new CompoundDatatype.
cdt = kive.create_cdt(
    "CDTCreatedByPythonAPI",
    users=["kive"],
    groups=["Everyone"],
    members=[
        {
            "column_idx": 1,
            "column_name": "col1_str",
            "datatype": str_pk
        },
        {
            "column_idx": 2,
            "column_name": "col2_bool",
            "datatype": bool_pk
        },
        {
Ejemplo n.º 23
0
import os
import sched
import time

from kiveapi import KiveAPI

# Use HTTPS on a real server, so your password is encrypted.
KiveAPI.SERVER_URL = 'http://localhost:8000'
# Don't put your real password in source code, store it in a text file
# that is only readable by your user account or some more secure storage.
kive = KiveAPI()
kive.login('kive', 'kive')

# Get the data by ID
fastq1 = kive.get_dataset(2)
fastq2 = kive.get_dataset(3)

# or get the data by name
fastq1 = kive.find_datasets(name='1234A_R1.fastq')[0]
fastq2 = kive.find_datasets(name='1234A_R2.fastq')[0]

# Pipeline
pipeline = kive.get_pipeline(13)

print(pipeline)
# # Get the pipeline by family ID
# pipeline_family = kive.get_pipeline_family(2)
#
# print('Using data:')
# print(fastq1, fastq2)
#
Ejemplo n.º 24
0
import os
import sched
import time

from kiveapi import KiveAPI, KiveMalformedDataException

# This is how I would recommend authenticating to Kive
KiveAPI.SERVER_URL = 'http://localhost:8000'
# Don't put your real password in source code, store it in a text file
# that is only readable by your user account or some more secure storage.
kive = KiveAPI()
kive.login('kive', 'kive')

# Upload data
try:
    fastq1 = kive.add_dataset('New fastq file 1', 'None',
                              open('exfastq1.fastq', 'r'), None, None,
                              ["Everyone"])
except KiveMalformedDataException:
    fastq1 = kive.find_datasets(name='New fastq file 1')[0]

try:
    fastq2 = kive.add_dataset('New fastq file 2', 'None',
                              open('exfastq2.fastq', 'r'), None, None,
                              ["Everyone"])
except KiveMalformedDataException:
    fastq2 = kive.find_datasets(name='New fastq file 2')[0]

# Get the pipeline by family ID
pipeline_family = kive.get_pipeline_family(2)
Ejemplo n.º 25
0
"""Upload (or find, if it's already been uploaded) a dataset and use it
with an app from a container family.
"""
import example_tools
from kiveapi import KiveAPI, KiveMalformedDataException

# Use HTTPS on a real server, so your password is encrypted.
# Don't put your real password in source code, store it in a text file
# that is only readable by your user account or some more secure storage.
kive = KiveAPI('http://localhost:8000')
kive.login('kive', 'kive')

# Upload (or retrieve) an input file
dataset = example_tools.upload_or_retrieve_dataset(kive,
                                                   "API Example 2 Names File",
                                                   open("names.csv", "r"),
                                                   groups=["Everyone"])

# Get the app from a container family.
containerfamily = kive.filter("/api/containerfamilies/", "name",
                              "samplecode").json()[0]
container = kive.get(containerfamily["containers"]).json()[0]
app = kive.filter(
    container["app_list"], "smart",
    "Minimal example that can run simple Python scripts").json()[0]

# Create a run of this app using the file we uploaded
appargs = kive.get(app["argument_list"]).json()
inputarg = next(a for a in appargs if a["type"] == "I")

runspec = {
Ejemplo n.º 26
0
def main():
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(
        logging.WARN)
    args = parse_args()
    print('Starting.')

    kive = KiveAPI(args.server)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(args.username, args.password)

    all_pipelines = kive.get_pipelines()
    pipelines = list(recent_pipelines(all_pipelines))
    hostname = urlparse(kive.server_url).hostname
    print('Recent pipelines from {}:'.format(hostname))
    for pipeline in pipelines:
        # noinspection PyUnresolvedReferences
        print('{} - {}, id {}'.format(pipeline.family, pipeline,
                                      pipeline.pipeline_id))
    # noinspection PyCompatibility
    pipeline_request = input("Enter pipeline id to dump, or 'm' for more:")
    if pipeline_request == 'm':
        for pipeline in all_pipelines:
            print('{} - {}, id {}'.format(pipeline.family, pipeline,
                                          pipeline.pipeline_id))
        # noinspection PyCompatibility
        pipeline_request = input("Enter pipeline id to dump:")
    pipeline_id = int(pipeline_request)
    dump_folder = os.path.abspath('dump/{}_pipeline{}'.format(
        hostname, pipeline_id))

    if not os.path.isdir(dump_folder):
        os.makedirs(dump_folder)

    compound_datatypes = {}  # {id: columns}
    for compound_datatype in kive.get_cdts():
        columns = compound_datatype.name
        compound_datatypes[compound_datatype.cdt_id] = columns
    code_resources = {}  # {id: {'filename': filename}}
    for code_resource in kive.get('/api/coderesources/').json():
        dump = {}
        for field in ('groups_allowed', 'users_allowed', 'filename'):
            dump[field] = code_resource[field]
        code_resources[code_resource['name']] = dump
    code_resource_revisions = {}  # {id: revision}
    for revision in kive.get('/api/coderesourcerevisions/').json():
        code_resource_revisions[revision['id']] = CodeResourceRevision(
            revision, code_resources)
    code_resource_revisions[None] = None
    api_end_points = kive.get('/api/').json()
    if 'dockerimages' in api_end_points:
        docker_images = {
            img['url']: img['full_name']
            for img in kive.get('/api/dockerimages/').json()
        }
    else:
        # Old server doesn't have docker image support.
        docker_images = {}
    docker_images[None] = None
    methods = {}  # {id: method}
    for method in kive.get('/api/methods/').json():
        for dep in method['dependencies']:
            dep['requirement'] = code_resource_revisions[dep['requirement']]
            if dep['path'] == '././':
                dep['path'] = '.'
        method['dependencies'].sort(key=lambda x: (x['path'], x['filename'], x[
            'requirement']['coderesource']['filename']))
        dump = {
            'driver': code_resource_revisions[method['driver']],
            'docker_image': docker_images[method.get('docker_image')]
        }
        for field in ('groups_allowed', 'users_allowed', 'reusable', 'threads',
                      'memory', 'dependencies'):
            dump[field] = method[field]
        methods[method['id']] = dump

    used_revisions = set()
    pipeline_wrapper = kive.get_pipeline(pipeline_id)
    pipeline = pipeline_wrapper.details
    print('Dumping {} in {}.'.format(pipeline_wrapper, dump_folder))
    dump = dict(positions=dict(inputs={}, outputs={}, steps={}))
    for input_item in pipeline['inputs']:
        input_name = input_item['dataset_name']
        dump['positions']['inputs'][input_name] = dict(x=input_item['x'],
                                                       y=input_item['y'])
        del input_item['x']
        del input_item['y']
        replace_structure(input_item, compound_datatypes)
    dump['inputs'] = pipeline['inputs']
    for output_item in pipeline['outputs']:
        output_name = output_item['dataset_name']
        dump['positions']['outputs'][output_name] = dict(x=output_item['x'],
                                                         y=output_item['y'])
        del output_item['x']
        del output_item['y']
        del output_item['dataset_idx']
        replace_structure(output_item, compound_datatypes)
    pipeline['outputs'].sort()
    dump['outputs'] = pipeline['outputs']
    for outcable in pipeline['outcables']:
        del outcable['pk']
        del outcable['source']
        if outcable['output_cdt']:
            columns = compound_datatypes[outcable['output_cdt']]
            outcable['output_cdt'] = columns
    pipeline['outcables'].sort(key=itemgetter('output_idx'))
    dump['outcables'] = pipeline['outcables']
    for step in pipeline['steps']:
        step_name = step['name']
        dump['positions']['steps'][step_name] = dict(x=step['x'], y=step['y'])
        del step['x']
        del step['y']
        step['cables_in'].sort(key=itemgetter('dest_dataset_name'))
        for cable in step['cables_in']:
            del cable['dest']
            del cable['source']
        for input_item in step['inputs']:
            replace_structure(input_item, compound_datatypes)
        for output_item in step['outputs']:
            replace_structure(output_item, compound_datatypes)
        del step['transformation_family']
        step['transformation'] = methods[step['transformation']]
        driver = step['transformation']['driver']
        if driver is not None:
            used_revisions.add(driver)
        used_revisions.update(
            map(itemgetter('requirement'),
                step['transformation']['dependencies']))
    dump['steps'] = pipeline['steps']

    pipeline_filename = 'pipeline.json'
    with open(os.path.join(dump_folder, pipeline_filename), 'w') as f:
        json.dump(dump, f, indent=4, sort_keys=True)

    pipeline_deadline = datetime.now() + timedelta(seconds=90)
    filename_counts = Counter()
    for revision in used_revisions:
        filename = revision['coderesource']['filename']
        filename_counts[filename] += 1
        response = kive.get(revision.url, is_json=False, stream=True)
        deadline = max(pipeline_deadline,
                       datetime.now() + timedelta(seconds=10))
        is_complete = True
        with open(os.path.join(dump_folder, filename), 'w') as f:
            for block in response.iter_content():
                f.write(block)
                if datetime.now() > deadline:
                    is_complete = False
                    break
        if not is_complete:
            os.remove(os.path.join(dump_folder, filename))
            with open(os.path.join(dump_folder, filename + '_timed_out'), 'w'):
                pass
    duplicate_filenames = [
        filename for filename, count in filename_counts.items() if count > 1
    ]
    if duplicate_filenames:
        raise RuntimeError('Multiple versions found: ' +
                           ', '.join(duplicate_filenames))

    print('Dumped {}.'.format(pipeline_wrapper))
Ejemplo n.º 27
0
def main():
    args = parse_args()
    logger.info('Starting.')
    kive_server = os.environ.get('KIVE_SERVER', 'http://*****:*****@api_runs', json=params, is_json=True)
            response_json = response.json()
            launched_run_ids.add(response_json['id'])
        except Exception:
            logger.error('Failed to rerun run id %d: %r.',
                         run['id'],
                         run,
                         exc_info=True)
    logger.info('Done.')
Ejemplo n.º 28
0
def main():
    args = parse_args()
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s:%(levelname)s:%(name)s:%(message)s')
    logging.getLogger('requests').setLevel(logging.WARN)
    logging.info('Starting.')
    session = KiveAPI("http://localhost:8000")
    session.login("kive", "kive")
    cdt = session.get_cdt(args.cdt)
    pipeline = session.get_pipeline(args.pipeline)
    response = session.get('/api/datasets/?filters[0][key]=uploaded&page_size=1', is_json=True)
    dataset_count = response.json()['count']
    response = session.get('/api/runs/?page_size=1', is_json=True)
    run_count = response.json()['count']
    while dataset_count < args.datasets or run_count < args.runs:
        dataset_count += 1
        filename = 'pairs_{}.csv'.format(dataset_count)
        dataset = create_dataset(session, filename, cdt)
        session.run_pipeline(pipeline, [dataset])
        run_count += 1
        while True:
            response = session.get('/api/runs/status/?filters[0][key]=active&page_size=1')
            active_count = response.json()['count']
            if active_count < args.max_active:
                break
            sleep(5)
        logging.info('%d datasets, %d runs', dataset_count, run_count)
Ejemplo n.º 29
0
def main():
    logging.basicConfig(level=logging.INFO)
    logging.getLogger('requests.packages.urllib3.connectionpool').setLevel(
        logging.WARN)
    args = parse_args()
    print('Starting.')

    kive = KiveAPI(args.server)
    kive.mount('https://', HTTPAdapter(max_retries=20))
    kive.login(args.username, args.password)

    all_pipelines = kive.get_pipelines()
    pipelines = list(recent_pipelines(all_pipelines))
    hostname = urlparse(kive.server_url).hostname
    print('Recent pipelines from {}:'.format(hostname))
    for pipeline in pipelines:
        # noinspection PyUnresolvedReferences
        print('{} - {}, id {}'.format(pipeline.family,
                                      pipeline,
                                      pipeline.pipeline_id))
    # noinspection PyCompatibility
    pipeline_request = input("Enter pipeline id to dump, or 'm' for more:")
    if pipeline_request == 'm':
        for pipeline in all_pipelines:
            print('{} - {}, id {}'.format(pipeline.family,
                                          pipeline,
                                          pipeline.pipeline_id))
        # noinspection PyCompatibility
        pipeline_request = input("Enter pipeline id to dump:")
    pipeline_id = int(pipeline_request)
    dump_folder = os.path.abspath(
        'dump/{}_pipeline{}'.format(hostname, pipeline_id))

    if not os.path.isdir(dump_folder):
        os.makedirs(dump_folder)

    compound_datatypes = {}  # {id: columns}
    for compound_datatype in kive.get_cdts():
        columns = compound_datatype.name
        compound_datatypes[compound_datatype.cdt_id] = columns
    code_resources = {}  # {id: {'filename': filename}}
    for code_resource in kive.get('/api/coderesources/').json():
        dump = {}
        for field in ('groups_allowed', 'users_allowed', 'filename'):
            dump[field] = code_resource[field]
        code_resources[code_resource['name']] = dump
    code_resource_revisions = {}  # {id: revision}
    for revision in kive.get('/api/coderesourcerevisions/').json():
        code_resource_revisions[revision['id']] = CodeResourceRevision(
            revision,
            code_resources)
    code_resource_revisions[None] = None
    api_end_points = kive.get('/api/').json()
    if 'dockerimages' in api_end_points:
        docker_images = {img['url']: img['full_name']
                         for img in kive.get('/api/dockerimages/').json()}
    else:
        # Old server doesn't have docker image support.
        docker_images = {}
    docker_images[None] = None
    methods = {}  # {id: method}
    for method in kive.get('/api/methods/').json():
        for dep in method['dependencies']:
            dep['requirement'] = code_resource_revisions[dep['requirement']]
            if dep['path'] == '././':
                dep['path'] = '.'
        method['dependencies'].sort(
            key=lambda x: (x['path'],
                           x['filename'],
                           x['requirement']['coderesource']['filename']))
        dump = {'driver': code_resource_revisions[method['driver']],
                'docker_image': docker_images[method.get('docker_image')]}
        for field in ('groups_allowed',
                      'users_allowed',
                      'reusable',
                      'threads',
                      'memory',
                      'dependencies'):
            dump[field] = method[field]
        methods[method['id']] = dump

    used_revisions = set()
    pipeline_wrapper = kive.get_pipeline(pipeline_id)
    pipeline = pipeline_wrapper.details
    print('Dumping {} in {}.'.format(pipeline_wrapper, dump_folder))
    dump = dict(positions=dict(inputs={},
                               outputs={},
                               steps={}))
    for input_item in pipeline['inputs']:
        input_name = input_item['dataset_name']
        dump['positions']['inputs'][input_name] = dict(x=input_item['x'],
                                                       y=input_item['y'])
        del input_item['x']
        del input_item['y']
        replace_structure(input_item, compound_datatypes)
    dump['inputs'] = pipeline['inputs']
    for output_item in pipeline['outputs']:
        output_name = output_item['dataset_name']
        dump['positions']['outputs'][output_name] = dict(x=output_item['x'],
                                                         y=output_item['y'])
        del output_item['x']
        del output_item['y']
        del output_item['dataset_idx']
        replace_structure(output_item, compound_datatypes)
    pipeline['outputs'].sort()
    dump['outputs'] = pipeline['outputs']
    for outcable in pipeline['outcables']:
        del outcable['pk']
        del outcable['source']
        if outcable['output_cdt']:
            columns = compound_datatypes[outcable['output_cdt']]
            outcable['output_cdt'] = columns
    pipeline['outcables'].sort(key=itemgetter('output_idx'))
    dump['outcables'] = pipeline['outcables']
    for step in pipeline['steps']:
        step_name = step['name']
        dump['positions']['steps'][step_name] = dict(x=step['x'], y=step['y'])
        del step['x']
        del step['y']
        step['cables_in'].sort(key=itemgetter('dest_dataset_name'))
        for cable in step['cables_in']:
            del cable['dest']
            del cable['source']
        for input_item in step['inputs']:
            replace_structure(input_item, compound_datatypes)
        for output_item in step['outputs']:
            replace_structure(output_item, compound_datatypes)
        del step['transformation_family']
        step['transformation'] = methods[step['transformation']]
        driver = step['transformation']['driver']
        if driver is not None:
            used_revisions.add(driver)
        used_revisions.update(map(itemgetter('requirement'),
                                  step['transformation']['dependencies']))
    dump['steps'] = pipeline['steps']

    pipeline_filename = 'pipeline.json'
    with open(os.path.join(dump_folder, pipeline_filename), 'w') as f:
        json.dump(dump, f, indent=4, sort_keys=True)

    pipeline_deadline = datetime.now() + timedelta(seconds=90)
    filename_counts = Counter()
    for revision in used_revisions:
        filename = revision['coderesource']['filename']
        filename_counts[filename] += 1
        response = kive.get(revision.url, is_json=False, stream=True)
        deadline = max(pipeline_deadline,
                       datetime.now() + timedelta(seconds=10))
        is_complete = True
        with open(os.path.join(dump_folder, filename), 'w') as f:
            for block in response.iter_content():
                f.write(block)
                if datetime.now() > deadline:
                    is_complete = False
                    break
        if not is_complete:
            os.remove(os.path.join(dump_folder, filename))
            with open(os.path.join(dump_folder, filename + '_timed_out'), 'w'):
                pass
    duplicate_filenames = [filename
                           for filename, count in filename_counts.items()
                           if count > 1]
    if duplicate_filenames:
        raise RuntimeError('Multiple versions found: ' +
                           ', '.join(duplicate_filenames))

    print('Dumped {}.'.format(pipeline_wrapper))