コード例 #1
0
    def setUp(self):
        self.data_d = os.path.join(os.path.dirname(__file__), "data")

        self.temp_d = tempfile.TemporaryDirectory()
        self.database_fn = os.path.join(self.temp_d.name, "test.db")
        shutil.copyfile(os.path.join(self.data_d, "test.db"), self.database_fn)

        laimsapp = LaimsApp()
        laimsapp.database = self.database_fn
コード例 #2
0
ファイル: test_app.py プロジェクト: hall-lab/old-laims
 def test3_lims_db(self):
     laimsapp = LaimsApp(config_file=self.config_fn,
                         config={"database": "NOTDB"})
     self.assertIsNotNone(laimsapp)
     lims_db_url = laimsapp.lims_db_url
     self.assertIsNotNone(lims_db_url)
     LaimsApp.lims_db_url = None
     self.assertIsNone(laimsapp.lims_db_url)
     with self.assertRaisesRegex(Exception, "No lims_db_url"):
         laimsapp.lims_db_connection()
コード例 #3
0
    def setUp(self):
        self.data_dn = os.path.join(os.path.dirname(__file__), "data")
        self.sample_dn = os.path.join(self.data_dn, "samples",
                                      "H_XS-356091-0186761975")

        self.temp_d = tempfile.TemporaryDirectory()
        self.database_fn = os.path.join(self.temp_d.name, "test.db")
        shutil.copyfile(os.path.join(self.data_dn, "test.db"),
                        self.database_fn)

        laimsapp = LaimsApp()
        laimsapp.database = self.database_fn
コード例 #4
0
ファイル: test_lsf.py プロジェクト: hall-lab/old-laims
    def test2_lsf_job(self, subprocess_patch):
        laimsapp = LaimsApp()
        config = laimsapp.lsf_job_options()
        config.pop("queue", None)
        config.pop("stdout", None)
        print(config)
        job = LsfJob(config)
        self.assertTrue(isinstance(job, LsfJob))

        available_opts = LsfJob.available_options
        self.assertEqual(len(available_opts), 9,
                         "available options count is 9")

        expected_cmd = [
            'bsub', '-a', 'docker(registry.gsc.wustl.edu/mgi/laims:latest)',
            '-N', '-u', '*****@*****.**', 'echo', 'hello', 'world'
        ]
        self.assertEqual(job.bsub_cmd(['echo', 'hello', 'world']),
                         expected_cmd)

        job.created_options["stdout"] = "/var/log/out"
        expected_cmd = [
            'bsub', '-M', '10000000', '-R',
            '"select[mem>10000] rusage[mem=10000]"', '-a',
            'docker(hello-world)', "-oo", "/var/log/out", '-N', '-u',
            '*****@*****.**', 'echo', 'hello', 'world'
        ]
        self.assertEqual(
            job.bsub_cmd(['echo', 'hello', 'world'], {
                "docker": "hello-world",
                "memory_in_gb": 10
            }), expected_cmd)

        job.created_options["stdout"] = "/var/log"
        expected_cmd = [
            'bsub', '-M', '10000000', '-R',
            '"select[mem>10000] rusage[mem=10000]"', '-a',
            'docker(hello-world)', "-oo", "/var/log/log1.out", '-N', '-u',
            '*****@*****.**', 'echo', 'hello', 'world'
        ]
        self.assertEqual(
            job.bsub_cmd(
                ['echo', 'hello', 'world'], {
                    "docker": "hello-world",
                    "memory_in_gb": 10,
                    "stdout_bn": "log1.out"
                }), expected_cmd)

        subprocess_patch.return_value = 1
        self.assertFalse(
            job.launch(['echo', 'hello', 'world'], {"docker": "hello-world"}),
            expected_cmd)
コード例 #5
0
ファイル: test_app.py プロジェクト: hall-lab/old-laims
 def test4_job_options(self):
     laimsapp = LaimsApp(config_file=self.config_fn)
     laimsapp.queue = "ccdg"
     laimsapp.stdout = "/var/log/out"
     self.assertTrue(laimsapp)
     opts = laimsapp.lsf_job_options()
     expected_opts = {
         "email": "*****@*****.**",
         "queue": "ccdg",
         "docker": "registry.gsc.wustl.edu/mgi/laims:latest",
         "stdout": "/var/log/out",
     }
     self.assertDictEqual(opts, expected_opts,
                          "LSF job options fetched from config")
コード例 #6
0
    def setUp(self):
        self.dir_n = os.path.dirname(__file__)
        self.config_fn = os.path.join(self.dir_n, "data", "laims.json")
        self.laimsapp = LaimsApp(config_file=self.config_fn)

        self.laimsapp.share_dir = os.path.join(os.path.dirname(self.dir_n),
                                               "share")
コード例 #7
0
    def test2_laims_metrics_add(self):
        runner = CliRunner()

        result = runner.invoke(metrics_add_cmd, ["--help"])
        self.assertEqual(result.exit_code, 0)

        sm = LaimsApp().db_connection()
        session = sm()
        for sample in session.query(ComputeWorkflowSample):
            sample.analysis_cram_path = self.sample_dn
            session.add(sample)
        session.flush()
        session.commit()

        result = runner.invoke(metrics_add_cmd)
        try:
            self.assertEqual(result.exit_code, 0)
            expected_output = """STATUS:
NO_DIR: 0
NO_PICARD_WGS: 0
NO_VERIFY_BAMID: 0
OK: 10
"""
            self.assertEqual(result.output, expected_output)
        except:
            print(result.output)
            raise
コード例 #8
0
ファイル: limsdatabase.py プロジェクト: hall-lab/old-laims
    def get_read_counts(self, read_groups):
        sql = """
            select ii.seq_id, ii.filt_clusters * 2 as total_clusters
            from   index_illumina ii
            where  ii.seq_id in ( {{ seq_ids | join(', ') }} )
        """

        template = Template(sql)
        rendered_sql = template.render(seq_ids=read_groups)
        db = LaimsApp().lims_db_connection()
        result = db.query(rendered_sql)
        data = [{
            'seq_id': row['seq_id'],
            'filt_clusters': row['total_clusters']
        } for row in result]
        return data
コード例 #9
0
ファイル: cohorts_cli.py プロジェクト: hall-lab/old-laims
def cohorts_link_cmd(name,samples):
    """
    Link samples to a cohort
    """
    sm = LaimsApp().db_connection()
    session = sm()
    if len(samples) == 1 and os.path.exists(samples[0]):
        with open(samples[0], "r") as f:
            names_to_link = set()
            for l in f.readlines():
                names_to_link.add( l.rstrip() )
    else:
        names_to_link = set(samples)
    stats = { "add": 0, "skip": 0 }
    for sample_name in names_to_link:
        try:
            sample = session.query(ComputeWorkflowSample).filter_by(ingest_sample_name=sample_name).one()
        except NoResultFound:
            raise Exception("Could not find sample named {}".format(sample_name))
        cohort = session.query(SampleCohort).get((sample.id, name))
        if cohort is None:
            cohort = SampleCohort(name=name, sample_id=sample.id)
            session.add(cohort)
            stats["add"] += 1
        else:
            stats["skip"] += 1
    session.commit()
    sys.stderr.write("Added {} samples to cohort {}, skipped {} existing.".format(stats["add"], name, stats["skip"]))
コード例 #10
0
ファイル: cli.py プロジェクト: hall-lab/old-laims
def cli(ctx, config, database, job_group, queue, job_stdout):
    conf = {
        "database": database,
        "job_group": job_group,
        "queue": queue,
        "stdout": job_stdout,
    }
    ctx.obj = LaimsApp(config_file=config, config=conf)
コード例 #11
0
 def test_sample_metric(self):
     sm = LaimsApp().db_connection()
     session = sm()
     metrics = session.query(SampleMetric).filter_by(sample_id=8).all()
     self.assertEqual(len(metrics), 2)
     sample = metrics[0].sample
     self.assertEqual(sample.ingest_sample_name, "H_XY-BGM1073006")
     self.assertEqual(sample.ingest_sample_name,
                      metrics[1].sample.ingest_sample_name)
コード例 #12
0
 def test_sample_file(self):
     sm = LaimsApp().db_connection()
     session = sm()
     files = session.query(SampleFile).filter_by(sample_id=8).all()
     self.assertEqual(len(files), 2)
     sample = files[0].sample
     self.assertEqual(sample.ingest_sample_name, "H_XY-BGM1073006")
     self.assertEqual(sample.ingest_sample_name,
                      files[0].sample.ingest_sample_name)
コード例 #13
0
 def test_sample_cohort(self):
     sm = LaimsApp().db_connection()
     session = sm()
     cohorts = session.query(SampleCohort).filter_by(sample_id=8).all()
     self.assertEqual(len(cohorts), 1)
     sample = cohorts[0].sample
     self.assertEqual(sample.ingest_sample_name, "H_XY-BGM1073006")
     self.assertEqual(sample.ingest_sample_name,
                      cohorts[0].sample.ingest_sample_name)
コード例 #14
0
 def test2_generic_rsync_command(self):
     laimsapp = LaimsApp()
     cmd = GenericRsyncCmd()
     cmdline = [
         'rsync', '--verbose', '--archive', 'INPUT1', 'INPUT2', 'INPUT3',
         'OUTDIR/'
     ]
     self.assertEqual(cmd(['INPUT1', 'INPUT2', 'INPUT3'], 'OUTDIR'),
                      cmdline)
コード例 #15
0
 def __init__(self, reference):
     app = LaimsApp()
     cmd_conf = app.rewrite_gvcfs
     self.cmd = 'java -Xmx{max_mem} -Xms{max_stack} -jar {gatk_jar} -T CombineGVCFs -R {ref} --breakBandsAtMultiplesOf {break_multiple} -V {{input}} -o {{temp_output}} && mv {{temp_output}} {{output}} && mv {{temp_output}}.tbi {{output}}.tbi'.format(
         max_mem=cmd_conf["max_mem"],
         max_stack=cmd_conf["max_stack"],
         gatk_jar=app.gatk_jar,
         ref=str(reference),
         break_multiple=cmd_conf['break_multiple'],
     )
コード例 #16
0
ファイル: cohorts_cli.py プロジェクト: hall-lab/old-laims
def cohorts_list_cmd():
    """
    List cohorts and sample counts
    """
    sm = LaimsApp().db_connection()
    session = sm()
    sql = "select name, count(*) as sample_count from sample_cohorts group by name"
    result = session.execute(sql)
    rows = []
    for cohort in result.fetchall():
        rows += [list(map(str, cohort))]
    sys.stdout.write( tabulate.tabulate(rows, ["NAME", "SAMPLE_COUNT"], tablefmt="simple") )
コード例 #17
0
 def test1_rewrite_gvcf_cmd(self):
     laimsapp = LaimsApp(config_file=os.path.join(os.path.dirname(__file__),
                                                  "data", "laims.json"))
     cmd = RewriteGvcfCmd(
         reference=
         '/gscmnt/gc2802/halllab/ccdg_resources/genomes/human/GRCh38DH/all_sequences.fa',
     )
     self.assertIsNotNone(cmd)
     cmdline = cmd('input.gvcf.gz', 'output.gvcf.gz')
     self.assertEqual(
         'java -Xmx3500M -Xms3500M -jar /gatk/gatk-package-4.0.6.0-local.jar -T CombineGVCFs -R /gscmnt/gc2802/halllab/ccdg_resources/genomes/human/GRCh38DH/all_sequences.fa --breakBandsAtMultiplesOf 1000000 -V input.gvcf.gz -o output.gvcf.gz.tmp.vcf.gz && mv output.gvcf.gz.tmp.vcf.gz output.gvcf.gz && mv output.gvcf.gz.tmp.vcf.gz.tbi output.gvcf.gz.tbi',
         cmdline)
コード例 #18
0
def metrics_add_cmd():
    """
    Add Sample Metrics Into DB

    Currently adds these metrics from ALL samples:
    * picard wgs
     -  mean coverage
    * verify bam id
     - freemix
    """
    db = LaimsApp().db_connection()
    session = db()
    status = {"OK": 0, "NO_DIR": 0, "NO_VERIFY_BAMID": 0, "NO_PICARD_WGS": 0}
    for sample in session.query(ComputeWorkflowSample):
        dn = sample.analysis_cram_path
        qc_dn = os.path.join(dn, "qc")
        if not os.path.exists(dn) or not os.path.exists(qc_dn):
            status["NO_DIR"] += 1
            continue

        # verifyBamID
        qc = QcMetrics(dn=qc_dn)
        try:
            verifyBamID_metrics = qc.verifyBamID_metrics()
        except:
            status["NO_VERIFY_BAMID"] += 1
            continue
        _add_or_update_metrics(session=session,
                               sample=sample,
                               metrics=verifyBamID_metrics,
                               names=["FREEMIX"])

        # picard wgs
        try:
            picard_wgs_metrics = qc.picard_wgs_metrics()
        except:
            status["NO_PICARD_WGS"] += 1
            continue
        _add_or_update_metrics(session=session,
                               sample=sample,
                               metrics=picard_wgs_metrics,
                               names=["MEAN_COVERAGE"])
        status["OK"] += 1
    sys.stderr.write("STATUS:\n" + yaml.dump(status, indent=6))
コード例 #19
0
def verify_bulk_gvcfs(tsv_path, reference_path):
    os.environ['LSF_DOCKER_PRESERVE_ENVIRONMENT'] = 'false'

    job_opts = LaimsApp().lsf_job_options()
    job_opts["memory_in_gb"] = 10
    job_runner = LsfJob(job_opts)

    with open(tsv_path) as f:
        reader = csv.reader(f, delimiter='\t')
        for row in reader:
            interval = get_interval_from_path(row[0])
            cmd = [
                "laims", "verify-gvcf", "--gvcf-path", row[0],
                "--reference-path", reference_path, "--interval", interval
            ]
            job_runner.launch(cmd,
                              cmd_options={
                                  "stdbn":
                                  ".".join([os.path.basename(row[0]), "out"])
                              })
コード例 #20
0
def sample_list_cmd(filter_by):
    """
    List samples and show their attributes
    """
    sm = LaimsApp().db_connection()
    session = sm()
    if filter_by is not None:
        sample_iter = session.query(ComputeWorkflowSample).filter_by(
            source_work_order=filter_by)
    else:
        sample_iter = session.query(ComputeWorkflowSample)
    rows = []
    for sample in sample_iter:
        rows += [
            map(str, [
                sample.id, sample.ingest_sample_name, sample.source_work_order
            ])
        ]
    sys.stdout.write(
        tabulate.tabulate(rows, ["ID", "NAME", "WORK_ORDER"],
                          tablefmt="simple"))
コード例 #21
0
ファイル: sample_files.py プロジェクト: hall-lab/old-laims
def update_cmd(fof, key):
    """
    Update Samples Files

    Give an FOF of files to update. The sample name should be derivable from the filename. It not giving the --key option, the extension will be used as the file's key.
    """
    sm = LaimsApp().db_connection()
    session = sm()
    with open(fof, "r") as f:
        for fn in f.readlines():
            fn = fn.rstrip()
            bn = os.path.basename(fn)
            tokens = bn.split(".")

            sample_name = tokens[0]
            sample = session.query(ComputeWorkflowSample).filter_by(
                ingest_sample_name=sample_name).first()

            if sample is None:
                sys.stderr.write("NO_SAMPLE {}\n".format(sample_name, fn))
                continue

            _key = key
            if _key is None:
                _key = tokens[-1]
                if not _key in valid_keys:
                    sys.stderr.write("INVALID_KEY {} {}\n".format(_key, fn))
                    continue

            sample_file = session.query(SampleFile).get((sample.id, _key))
            if sample_file is not None:
                sample_file.value = fn
            else:
                #sample_file = SampleFile(sample.id, key, fn)
                sample_file = SampleFile(sample_id=sample.id,
                                         name=_key,
                                         value=fn)
            session.add(sample_file)
            sys.stderr.write("OK {} {} {}\n".format(sample_name, _key, fn))
    session.commit()
コード例 #22
0
ファイル: test_app.py プロジェクト: hall-lab/old-laims
    def test2_init(self):
        # init w/o config
        laimsapp = LaimsApp()
        self.assertIsNotNone(LaimsApp.context)

        # init w/ config
        LaimsApp.context = None  # reset
        laimsapp = LaimsApp(config_file=self.config_fn,
                            config={"database": "NOTDB"})
        self.assertIsNotNone(laimsapp)
        self.assertIsNotNone(laimsapp.context)
        self.assertEqual(laimsapp.config_file, self.config_fn)
        self.assertEqual(laimsapp.environment, 'test')
        self.assertEqual(laimsapp.database, 'NOTDB')
        self.assertEqual(laimsapp.lims_db_url, 'sqlite:///:memory:')

        # __setattr__
        self.assertIsNone(laimsapp.foo)
        laimsapp.foo = "bar"
        self.assertEqual(laimsapp.foo, "bar")
        self.assertEqual(LaimsApp().foo, "bar")
コード例 #23
0
ファイル: test_lsf.py プロジェクト: hall-lab/old-laims
 def setUp(self):
     self.laimsapp = LaimsApp(config_file=os.path.join(
         os.path.dirname(__file__), "data", "laims.json"))
コード例 #24
0
 def setUp(self):
     self.data_d = os.path.join(os.path.dirname(__file__), "data")
     self.tsv_path = os.path.join(self.data_d, "gvcfs.tsv")
     self.ref_fn = os.path.join(self.data_d, "ref.fa")
     LaimsApp(config={"queue": "ccdg"})
コード例 #25
0
 def test_sample(self):
     sm = LaimsApp().db_connection()
     session = sm()
     sample = session.query(ComputeWorkflowSample).get(8)
     self.assertIsNotNone(sample)
コード例 #26
0
def downsample_and_recall(app, inputs, output_dir):
    log_dir = os.path.join(output_dir, 'logs')
    os.mkdir(log_dir)
    os.mkdir(os.path.join(output_dir, 'results'))

    cromwell_job_opts = {
        'memory_in_gb' : 32,
        'queue': app.queue,
        'docker': app.docker,
        'stdout': os.path.join(log_dir, 'cromwell.log'),
    }
    if app.job_group is not None: cromwell_job_opts['group'] = app.job_group
    job_runner=LsfJob(cromwell_job_opts)

    chrs = [ (["chr{}".format(c)]) for c in range(1,23) ]
    chrs.extend([
        ["chrX"],
        ["chrY"],
        ["/gscmnt/gc2802/halllab/ccdg_resources/genomes/human/GRCh38DH/all_sequences.filtered-chromosome.ext.list"]
    ])


    workflow_inputs = {
        'reference': '/gscmnt/gc2802/halllab/ccdg_resources/genomes/human/GRCh38DH/all_sequences.fa',
        'downsample_strategy': 'ConstantMemory',
        'downsample_seed': 1,
        'emit_reference_confidence': 'GVCF',
        'max_alternate_alleles': 3,
        'variant_index_type': 'LINEAR',
        'variant_index_parameter': 128000,
        'read_filter': 'OverclippedRead',
        'intervals': chrs,
        'qc_minimum_mapping_quality': 0,
        'qc_minimum_base_quality': 0,
        'crams_to_downsample': [], #filled in from "inputs" file below
    }

    with open(inputs) as fh:
        reader = csv.reader(fh, delimiter='\t')
        for row in reader:
            sam = row[0]
            ratio = row[1]
            freemix = row[2]
            workflow_inputs['crams_to_downsample'].append(
                { 'cram': {'class': 'File', 'path': sam}, 'downsample_ratio': ratio, 'contamination': freemix }
            )

    input_yaml_path = os.path.join(output_dir, 'inputs.yaml')
    with open(input_yaml_path, 'w') as yaml_fh:
        yaml.dump(workflow_inputs, yaml_fh)

    config_template = os.path.join(LaimsApp().share_dir, 'cromwell.config.jinja')
    fs_loader = FileSystemLoader(searchpath = os.path.join(LaimsApp().share_dir))
    env = Environment(loader=fs_loader, autoescape=True)
    template = env.get_template('cromwell.config.jinja')

    cromwell_config_path = os.path.join(output_dir, 'cromwell.config')
    template.stream(
        log_dir=log_dir,
        output_dir=output_dir,
        lsf_queue=app.queue,
        lsf_job_group=app.job_group,
    ).dump(cromwell_config_path)

    cmd = [
        '/usr/bin/java', '-Dconfig.file=' + cromwell_config_path, '-Xmx24g', '-jar', '/opt/cromwell.jar', 'run',
        '-t', 'cwl', '-i', input_yaml_path, 'https://raw.githubusercontent.com/tmooney/cancer-genomics-workflow/downsample_and_recall/definitions/pipelines/gathered_downsample_and_recall.cwl' #TODO get a more canonical URL once things are merged
    ]
    job_runner.launch(cmd)
コード例 #27
0
ファイル: test_app.py プロジェクト: hall-lab/old-laims
 def test1_init_fails(self):
     # FIXME This tests does not raise an exception in the test suite, dunno why
     with self.assertRaisesRegex(
             Exception, "Given config file /laims.json does not exist!"):
         laimsapp = LaimsApp(config_file="/laims.json")