Example #1
0
def main(parser):
    args = parser.parse_args()

    if not args.names:
        assert args.dataName, 'Must pass dataName if not using -n option'

    #set up connection to service
    sal    = ServiceAccessLayer(args.host,args.port)
    #get datastore 
    dstore = sal.get_analysis_job_datastore(args.jobNumber)
    #loop through data
    for uuid,dsfile in dstore.files.items():
        if args.names:
            #print the attribute values
            print '\t'.join([fmt(getattr(dsfile,a))
                             for a in attrs])    
        elif dsfile.name in args.dataName:
            #cp file to outdir
            ofile = '{o}{s}{n}'.format(o=args.outDir,
                                       s=os.path.sep,
                                       n=os.path.basename(dsfile.path))
            copy(dsfile.path,ofile)
            print '\t'.join([fmt('\'%s\''%dsfile.name),'=>',ofile])

    return 
Example #2
0
def main(parser):

    args = parser.parse_args()

    jobs = pd.read_csv(args.jobCsv)
    sal = ServiceAccessLayer(args.host, args.port)
    #get dicts of values for all jobs
    rpts = jobs.jobId.apply(sal.get_analysis_job_report_attrs).values
    #check for unfinished jobs, exit if any
    unfinished = [
        j for j in jobs.jobId if sal.get_job_by_id(j).state not in FINISHED
    ]
    if unfinished:
        for j in unfinished:
            print 'job %i still running' % j
        print 'Exiting'
        sys.exit()
    #put the reports together and index with (jobName,host,jobId,link)
    jobs['link'] = jobs[['host', 'jobId']].apply(LINKFMT, axis=1)
    columns = ['jobName', 'host', 'jobId', 'link']
    idx = pd.MultiIndex.from_arrays(jobs[columns].values.T, names=columns)
    collated = pd.DataFrame.from_records(rpts, index=idx).T
    for fmt, fnc in zip(['.csv', '.xls'],
                        [pd.DataFrame.to_csv, pd.DataFrame.to_excel]):
        ofile = '{d}/{name}{fmt}'.format(d=args.outDir,
                                         name=DEFAULTCSV,
                                         fmt=fmt)
        fnc(collated, ofile)  # float_format=FLOATFMT
        print 'Wrote results to %s' % ofile

    return None
Example #3
0
def run_import_fasta(host, port, fasta_path, name, organism, ploidy, block=False):
    sal = ServiceAccessLayer(host, port)
    if block is True:
        sal.run_import_fasta(fasta_path, name, organism, ploidy)
    else:
        sal.import_fasta(fasta_path, name, organism, ploidy)

    return 0
Example #4
0
def get_sal_and_status(host, port):
    """Get Sal or Raise if status isn't successful"""
    try:
        sal = ServiceAccessLayer(host, port)
        sal.get_status()
        return sal
    except RequestException as e:
        log.error("Failed to connect to {h}:{p}".format(h=host, p=port))
        raise
Example #5
0
def get_sal_and_status(host, port):
    """Get Sal or Raise if status isn't successful"""
    try:
        sal = ServiceAccessLayer(host, port)
        sal.get_status()
        return sal
    except RequestException as e:
        log.error("Failed to connect to {h}:{p}".format(h=host, p=port))
        raise
Example #6
0
def run_services_testkit_job(host,
                             port,
                             testkit_cfg,
                             xml_out="test-output.xml",
                             ignore_test_failures=False,
                             time_out=1800,
                             sleep_time=2,
                             import_only=False,
                             test_job_id=None):
    """
    Given a testkit.cfg and host/port parameters:
        1. convert the .cfg to a JSON file
        2. connect to the SMRTLink services and start the job, then block
           until it finishes
        3. run the standard test suite on the job output
    """
    sal = ServiceAccessLayer(host, port, sleep_time=sleep_time)
    if test_job_id is not None:
        engine_job = sal.get_job_by_id(test_job_id)
        return run_butler_tests_from_cfg(testkit_cfg=testkit_cfg,
                                         output_dir=engine_job.path,
                                         output_xml=xml_out,
                                         service_access_layer=sal,
                                         services_job_id=test_job_id)
    entrypoints = get_entrypoints(testkit_cfg)
    pipeline_id = pipeline_id_from_testkit_cfg(testkit_cfg)
    job_id = job_id_from_testkit_cfg(testkit_cfg)
    log.info("job_id = {j}".format(j=job_id))
    log.info("pipeline_id = {p}".format(p=pipeline_id))
    log.info("url = {h}:{p}".format(h=host, p=port))
    task_options, workflow_options = get_task_and_workflow_options(testkit_cfg)
    service_entrypoints = [
        ServiceEntryPoint.from_d(x) for x in entrypoints_dicts(entrypoints)
    ]
    for ep, dataset_xml in entrypoints.iteritems():
        log.info("Importing {x}".format(x=dataset_xml))
        sal.run_import_local_dataset(dataset_xml)
    if import_only:
        log.info("Skipping job execution")
        return 0
    log.info("starting anaylsis job...")
    # XXX note that workflow options are currently ignored
    engine_job = run_analysis_job(sal,
                                  job_id,
                                  pipeline_id,
                                  service_entrypoints,
                                  block=True,
                                  time_out=time_out,
                                  task_options=task_options)
    exit_code = run_butler_tests_from_cfg(testkit_cfg=testkit_cfg,
                                          output_dir=engine_job.path,
                                          output_xml=xml_out,
                                          service_access_layer=sal,
                                          services_job_id=engine_job.id)
    if ignore_test_failures and engine_job.was_successful():
        return 0
    return exit_code
Example #7
0
def args_get_sal_summary(args):

    host = args.host
    port = args.port

    sal = ServiceAccessLayer(host, port)

    print sal.to_summary()

    return 0
Example #8
0
def args_get_sal_summary(args):

    host = args.host
    port = args.port

    sal = ServiceAccessLayer(host, port)

    print sal.to_summary()

    return 0
Example #9
0
def run_import_fasta(host, port, fasta_path, name, organism, ploidy, block=False):
    sal = ServiceAccessLayer(host, port)
    log.info("importing ({s:.2f} MB) {f} ".format(s=_get_size_mb(fasta_path), f=fasta_path))
    if block is True:
        result = sal.run_import_fasta(fasta_path, name, organism, ploidy)
        log.info("Successfully imported {f}".format(f=fasta_path))
        log.info("result {r}".format(r=result))
    else:
        sal.import_fasta(fasta_path, name, organism, ploidy)

    return 0
Example #10
0
def run_import_fasta(host, port, fasta_path, name, organism, ploidy, block=False):
    sal = ServiceAccessLayer(host, port)
    log.info("importing ({s:.2f} MB) {f} ".format(s=_get_size_mb(fasta_path), f=fasta_path))
    if block is True:
        result = sal.run_import_fasta(fasta_path, name, organism, ploidy)
        log.info("Successfully imported {f}".format(f=fasta_path))
        log.info("result {r}".format(r=result))
    else:
        sal.import_fasta(fasta_path, name, organism, ploidy)

    return 0
def run_services_testkit_job(host, port, testkit_cfg,
                             xml_out="test-output.xml",
                             ignore_test_failures=False,
                             time_out=1800, sleep_time=2,
                             import_only=False, test_job_id=None):
    """
    Given a testkit.cfg and host/port parameters:
        1. convert the .cfg to a JSON file
        2. connect to the SMRTLink services and start the job, then block
           until it finishes
        3. run the standard test suite on the job output
    """
    sal = ServiceAccessLayer(host, port, sleep_time=sleep_time)
    if test_job_id is not None:
        engine_job = sal.get_job_by_id(test_job_id)
        return run_butler_tests_from_cfg(
            testkit_cfg=testkit_cfg,
            output_dir=engine_job.path,
            output_xml=xml_out,
            service_access_layer=sal,
            services_job_id=test_job_id)
    entrypoints = get_entrypoints(testkit_cfg)
    pipeline_id = pipeline_id_from_testkit_cfg(testkit_cfg)
    job_id = job_id_from_testkit_cfg(testkit_cfg)
    log.info("job_id = {j}".format(j=job_id))
    log.info("pipeline_id = {p}".format(p=pipeline_id))
    log.info("url = {h}:{p}".format(h=host, p=port))
    task_options, workflow_options = get_task_and_workflow_options(testkit_cfg)
    service_entrypoints = [ServiceEntryPoint.from_d(x) for x in
                           entrypoints_dicts(entrypoints)]
    for ep, dataset_xml in entrypoints.iteritems():
        log.info("Importing {x}".format(x=dataset_xml))
        sal.run_import_local_dataset(dataset_xml)
    if import_only:
        log.info("Skipping job execution")
        return 0
    log.info("starting anaylsis job...")
    # XXX note that workflow options are currently ignored
    engine_job = run_analysis_job(sal, job_id, pipeline_id,
                                  service_entrypoints, block=True,
                                  time_out=time_out,
                                  task_options=task_options)
    exit_code = run_butler_tests_from_cfg(
        testkit_cfg=testkit_cfg,
        output_dir=engine_job.path,
        output_xml=xml_out,
        service_access_layer=sal,
        services_job_id=engine_job.id)
    if ignore_test_failures and engine_job.was_successful():
        return 0
    return exit_code
Example #12
0
def args_run_analysis_job(args):
    log.debug(args)
    with open(args.json_path, 'r') as f:
        d = json.loads(f.read())

    log.debug("Loaded \n" + pprint.pformat(d))
    job_name, pipeline_id, service_entry_points = load_analysis_job_json(d)

    sal = ServiceAccessLayer(args.host, args.port)
    # this should raise if there's a failure
    result = run_analysis_job(sal, job_name, pipeline_id, service_entry_points, block=args.block)
    return 0
Example #13
0
def main(parser, options):
    '''options is a list of json task options added to the parser'''

    args = parser.parse_args()
    #must have at least one input
    assert (args.subreadSetID or args.subreadSetIdCsv), 'Must define -s or -S'
    #load settings from tempalte
    presets = json.load(open(PRESETS_TEMPLATE))

    if args.subreadSetID:
        name = ''
        ssIdx = {name: int(args.subreadSetID)}
    else:
        ssIdx = parseSubreadsetIdCsv(args.subreadSetIdCsv)

    sal = ServiceAccessLayer(args.host, args.port)

    #prepare file to report jobs started
    columns = ['host', 'jobId', 'jobName', 'jobPath']
    csvfmt = ','.join(map('{{{}}}'.format, columns)) + '\n'
    csvFile = open('{d}/{f}'.format(d=args.outDir, f=JOBCSVNAME), 'w')
    #write header
    csvFile.write(','.join(columns) + '\n')
    print 'starting jobs for {i} subreadsets'.format(i=len(ssIdx))
    for name, ssId in ssIdx.items():
        #get the subset
        ss = sal.get_subreadset_by_id(ssId)
        #set the job name
        if args.jobName:
            jobName = args.jobName
        elif name:
            jobName = name + NAMEPOSTFIX
        else:
            jobName = ss['name'] + NAMEPOSTFIX
        presets['name'] = jobName
        #set entry subreadset
        setEntryPoint(presets, ssId)
        #set all options
        for opt in options:
            setTaskOption(presets, opt, getattr(args, opt))
        #write preset_json
        job_pre = '{d}/{name}_presets.json'.format(
            d=os.path.abspath(args.outDir),
            name=cleanName(jobName.replace(' ', '_')))
        with open(job_pre, 'w') as oFile:
            json.dump(presets, oFile, indent=2)

        #start job
        print 'Starting job {name}, {time}'.format(name=jobName,
                                                   time=time.asctime(
                                                       time.localtime()))
        job = startJob(job_pre, host=args.host, port=args.port)
        jobSummary = job['JOB SUMMARY']
        csvFile.write(
            csvfmt.format(host=args.host,
                          jobId=int(jobSummary['id']),
                          jobName=jobSummary['name'],
                          jobPath=jobSummary['path']))
        if args.wait:
            print 'waiting %i minutes' % args.wait
            time.sleep(60 * args.wait)
    csvFile.close()

    return None
Example #14
0
def run_main(host, port, nprocesses, ntimes, profile_csv):
    # logging.basicConfig(level=logging.DEBUG, file=sys.stdout)

    profile_d = {}

    started_at = time.time()

    log.info(FUNCS.keys())

    sal = ServiceAccessLayer(host, port)
    status = sal.get_status()
    log.info("Status {}".format(status))

    profile_d['nprocesses'] = nprocesses
    profile_d["init_nsubreads"] = len(sal.get_subreadsets())
    profile_d['init_nreferences'] = len(sal.get_referencesets())
    profile_d['init_njobs'] = len(sal.get_analysis_jobs())

    chunksize = 6

    info = "{h}:{p} with ntimes:{n} with processors:{x}".format(h=host, p=port, n=ntimes, x=nprocesses)

    # FIXME. All paths are relative to smrtflow root
    def to_p(rpath):
        return os.path.join(os.getcwd(), rpath)

    # DataSet
    referenceset_path = to_p("test-data/smrtserver-testdata/ds-references/mk-01/mk_name_01/referenceset.xml")
    subreadset_path = to_p("test-data/smrtserver-testdata/ds-subreads/PacBioTestData/m54006_160504_020705.tiny.subreadset.xml")

    # Run Design
    run_design_path = to_p("smrt-server-link/src/test/resources/runCreate2.xml")

    # Dev Diagnostic
    analysis_json = to_p("smrt-server-analysis/src/test/resources/analysis-dev-diagnostic-stress-01.json")

    output_dir_prefix = to_p("test-output")
    if not os.path.exists(output_dir_prefix):
        os.mkdir(output_dir_prefix)

    # import referenceset with original UUID for the dev_diagnostic run
    _run_cmd("pbservice import-dataset --host={h} --port={p} {x}".format(h=host, p=port, x=referenceset_path))

    xs = _generate_data(host, port, [referenceset_path, subreadset_path],
                        analysis_json, run_design_path, output_dir_prefix, ntimes)

    log.info("Starting {i}".format(i=info))

    p = multiprocessing.Pool(nprocesses)

    results = p.map(runner, xs, chunksize=chunksize)

    failed = [r for r in results if r.exit_code != 0]
    was_successful = len(failed) == 0
    for f in failed:
        log.error(f)

    log.debug("exiting {i}".format(i=info))
    if failed:
        log.error("Failed Results {r} of {x}".format(r=len(failed), x=len(results)))

    run_time_sec = time.time() - started_at

    profile_d['nresults'] = len(results)
    profile_d['nfailed'] = len(failed)
    profile_d['was_successful'] = was_successful

    profile_d["final_nsubreads"] = len(sal.get_subreadsets())
    profile_d['final_nreferences'] = len(sal.get_referencesets())
    profile_d['final_njobs'] = len(sal.get_analysis_jobs())
    profile_d['run_time_sec'] = run_time_sec

    write_profile(profile_d, profile_csv)
    return 0 if was_successful else 1
Example #15
0
def run_main(host, port, nprocesses, ntimes, profile_csv):
    # logging.basicConfig(level=logging.DEBUG, file=sys.stdout)

    profile_d = {}

    started_at = time.time()

    log.info(FUNCS.keys())

    sal = ServiceAccessLayer(host, port)
    status = sal.get_status()
    log.info("Status {}".format(status))

    profile_d['nprocesses'] = nprocesses
    profile_d["init_nsubreads"] = len(sal.get_subreadsets())
    profile_d['init_nreferences'] = len(sal.get_referencesets())
    profile_d['init_njobs'] = len(sal.get_analysis_jobs())

    chunksize = 6

    info = "{h}:{p} with ntimes:{n} with processors:{x}".format(h=host,
                                                                p=port,
                                                                n=ntimes,
                                                                x=nprocesses)

    # FIXME. All paths are relative to smrtflow root
    def to_p(rpath):
        return os.path.join(os.getcwd(), rpath)

    # DataSet
    referenceset_path = to_p(
        "test-data/smrtserver-testdata/ds-references/mk-01/mk_name_01/referenceset.xml"
    )
    subreadset_path = to_p(
        "test-data/smrtserver-testdata/ds-subreads/PacBioTestData/m54006_160504_020705.tiny.subreadset.xml"
    )

    # Run Design
    run_design_path = to_p(
        "smrt-server-link/src/test/resources/runCreate2.xml")

    # Dev Diagnostic
    analysis_json = to_p(
        "smrt-server-link/src/test/resources/analysis-dev-diagnostic-stress-01.json"
    )

    output_dir_prefix = to_p("test-output")
    if not os.path.exists(output_dir_prefix):
        os.mkdir(output_dir_prefix)

    # import referenceset with original UUID for the dev_diagnostic run
    _run_cmd("{pbservice} import-dataset --host={h} --port={p} {x}".format(
        pbservice=pbservice, h=host, p=port, x=referenceset_path))

    xs = _generate_data(host, port, [referenceset_path, subreadset_path],
                        analysis_json, run_design_path, output_dir_prefix,
                        ntimes)

    log.info("Starting {i}".format(i=info))

    p = multiprocessing.Pool(nprocesses)

    results = p.map(runner, xs, chunksize=chunksize)

    failed = [r for r in results if r.exit_code != 0]
    was_successful = len(failed) == 0
    for f in failed:
        log.error(f)

    log.debug("exiting {i}".format(i=info))
    if failed:
        log.error("Failed Results {r} of {x}".format(r=len(failed),
                                                     x=len(results)))

    run_time_sec = time.time() - started_at

    profile_d['nresults'] = len(results)
    profile_d['nfailed'] = len(failed)
    profile_d['was_successful'] = was_successful

    profile_d["final_nsubreads"] = len(sal.get_subreadsets())
    profile_d['final_nreferences'] = len(sal.get_referencesets())
    profile_d['final_njobs'] = len(sal.get_analysis_jobs())
    profile_d['run_time_sec'] = run_time_sec

    write_profile(profile_d, profile_csv)
    return 0 if was_successful else 1
Example #16
0
def get_sal_and_status(host, port):
    """Get Sal or Raise if status isn't successful"""
    sal = ServiceAccessLayer(host, port)
    sal.get_status()
    return sal
Example #17
0
def run_import_local_datasets(host, port, xml_or_dir):
    sal = ServiceAccessLayer(host, port)
    file_func = functools.partial(import_local_dataset, sal)
    dir_func = functools.partial(import_datasets, sal)
    return run_file_or_dir(file_func, dir_func, xml_or_dir)
def run_main(path, host, port, job_name, pipeline_id, referenceset_uuid, block=False, custom_options=None):
    """

    :param path: Path to SubreadSet XML will be imported (if it's not already been imported)
    :param host: SL Host
    :param port: SL Port
    :param job_name:  Job name
    :param pipeline_id:  Pipeline Id (e.g, pbsmrtpipe.pipelines.my_pipeline
    :param referenceset_uuid: UUID of Rset. This *must* already be imported
    :param block: To block and poll for the analysis job to complete

    :param custom_options: Dictionary of task options for the provided
    Pipeline in the form
    {"pbalign.task_options.concordant":True}


    :type custom_options: dict | None
    :rtype: int
    """

    # look up the reference set UUID from pbservice CLI util or
    # http://smrtlink-beta:8081/secondary-analysis/datasets/references
    # TODO. 1. Import SubreadSet if it's not already imported
    # TODO. 2. Check and see if the Job with the SubreadSet UUID was already submitted
    # TODO. 3. Add option to force a new submission to override (2)
    # TODO. 4. Enable custom pipeline options json file at the CLI

    # sanity test
    sset = SubreadSet(path)
    log.info("Loaded SubreadSet {}".format(sset))

    sal = ServiceAccessLayer(host, port)
    # Sanity Check
    _ = sal.get_status()

    # Step 1. Import SubreadSet (and block) if it's not imported already
    service_sset = sal.get_subreadset_by_id(sset.uuid)
    # TODO. Add check to see if Job was successful
    if service_sset is None:
        log.info("Running Import-DataSet job with {}".format(path))
        sset_import_job = sal.run_import_dataset_subread(path)
        log.info("Import-DataSet job {}".format(sset_import_job))
    else:
        log.info("Found already imported SubreadSet {}".format(service_sset))

    # Step 2. Check and See if an previous analysis job has already been run
    # Immediately exit if an analysis job is found
    analysis_job = get_job_by_subreadset_uuid_or_none(sal, sset.uuid)
    if analysis_job is not None:
        log.info("Found exiting job {} for SubreadSet {}".format(analysis_job, sset))
        return 0

    # Step 3. Create a new Analysis job with custom task options (if provided)
    task_options = {} if custom_options is None else custom_options

    # Get the already Successfully imported DataSets
    service_sset_d = sal.get_dataset_by_uuid(sset.uuid)
    service_rset_d = sal.get_dataset_by_uuid(referenceset_uuid)

    f = sal.run_by_pipeline_template_id if block else sal.create_by_pipeline_template_id

    # The API takes the Int id of the DataSet
    epoints = (ServiceEntryPoint("eid_subread", FileTypes.DS_SUBREADS.file_type_id, service_sset_d['id']),
               ServiceEntryPoint("eid_ref_dataset", FileTypes.DS_REF.file_type_id, service_rset_d['id']))

    job = f(job_name, pipeline_id, epoints, task_options=task_options)

    log.info("Analysis Job {}".format(job))

    if block:
        exit_code = 0 if job.state == JobStates.SUCCESSFUL else 1
    else:
        # the job is in the created state
        exit_code = 0

    return exit_code