Exemple #1
0
def wait_for_sge_jobs(cmd, jids, timeout):
    """
    This replaces the original qsub -sync y -hold_jid j1,j2..... command
    which can still be hung if certain jobs got stuck.

    If timeout occurs, simply qdel all jids (ignoring whether they exist or not)
    and let the main function that calls it handle what to do
    """
    def get_active_jids():
        stuff = os.popen("qstat").read().strip().split('\n')
        for x in stuff[2:]:
            job_id = x.split()[0]
            yield job_id

    p = Process(target=wait_for_sge_jobs_worker, args=(cmd,))
    p.start()
    p.join(timeout)
    if p.is_alive(): # timed out
        active_jids = [x for x in get_active_jids()]
        while len(active_jids) > 0:
            for jid in active_jids:
                kill_cmd = "qdel " + str(jid)
                backticks(kill_cmd) # don't care whether it worked
            time.sleep(3) # wait 3 sec for qdel to take effect....
            active_jids = [x for x in get_active_jids()] # make sure qdel really worked
        return "TIMEOUT"
    return "SUCCESS"
Exemple #2
0
    def test_newUuid_random_cli(self):
        fn_orig = data.getXml(8)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        fn = os.path.join(outdir, 'fn.alignmentset.xml')
        fn2 = os.path.join(outdir, 'fn2.alignmentset.xml')
        with AlignmentSet(fn_orig) as aln:
            aln.copyTo(fn)
            shutil.copy(fn, fn2)

        pre_uuid = AlignmentSet(fn).uuid
        pre_uuid2 = AlignmentSet(fn2).uuid
        self.assertEqual(pre_uuid, pre_uuid2)

        cmd = "dataset newuuid --random {d}".format(d=fn)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
        self.assertTrue(os.path.exists(fn))

        cmd = "dataset newuuid --random {d}".format(d=fn2)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
        self.assertTrue(os.path.exists(fn2))

        post_uuid = AlignmentSet(fn).uuid
        post_uuid2 = AlignmentSet(fn2).uuid
        self.assertNotEqual(pre_uuid, post_uuid)
        self.assertNotEqual(pre_uuid2, post_uuid2)
        # RANDOM, THEREFORE THESE ARE NOT EQUAL:
        self.assertNotEqual(post_uuid, post_uuid2)
    def test_contigset_consolidate_int_names(self):
        #build set to merge
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")

        inFas = os.path.join(outdir, 'infile.fasta')
        outFas1 = os.path.join(outdir, 'tempfile1.fasta')
        outFas2 = os.path.join(outdir, 'tempfile2.fasta')

        # copy fasta reference to hide fai and ensure FastaReader is used
        backticks('cp {i} {o}'.format(
                      i=ReferenceSet(data.getXml(9)).toExternalFiles()[0],
                      o=inFas))
        rs1 = ContigSet(inFas)

        double = 'B.cereus.1'
        exp_double = rs1.get_contig(double)

        # todo: modify the names first:
        with FastaWriter(outFas1) as writer:
            writer.writeRecord('5141', exp_double.sequence)
        with FastaWriter(outFas2) as writer:
            writer.writeRecord('5142', exp_double.sequence)

        exp_double_seqs = [exp_double.sequence, exp_double.sequence]
        exp_names = ['5141', '5142']

        obs_file = ContigSet(outFas1, outFas2)
        log.debug(obs_file.toExternalFiles())
        obs_file.consolidate()
        log.debug(obs_file.toExternalFiles())

        # open obs and compare to exp
        for name, seq in zip(exp_names, exp_double_seqs):
            self.assertEqual(obs_file.get_contig(name).sequence[:], seq)
    def test_newUuid_random_cli(self):
        fn_orig = data.getXml(8)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        fn = os.path.join(outdir, 'fn.alignmentset.xml')
        fn2 = os.path.join(outdir, 'fn2.alignmentset.xml')
        with AlignmentSet(fn_orig) as aln:
            aln.copyTo(fn)
            shutil.copy(fn, fn2)

        pre_uuid = AlignmentSet(fn).uuid
        pre_uuid2 = AlignmentSet(fn2).uuid
        self.assertEqual(pre_uuid, pre_uuid2)

        cmd = "dataset newuuid --random {d}".format(d=fn)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
        self.assertTrue(os.path.exists(fn))

        cmd = "dataset newuuid --random {d}".format(d=fn2)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
        self.assertTrue(os.path.exists(fn2))

        post_uuid = AlignmentSet(fn).uuid
        post_uuid2 = AlignmentSet(fn2).uuid
        self.assertNotEqual(pre_uuid, post_uuid)
        self.assertNotEqual(pre_uuid2, post_uuid2)
        # RANDOM, THEREFORE THESE ARE NOT EQUAL:
        self.assertNotEqual(post_uuid, post_uuid2)
Exemple #5
0
def wait_for_sge_jobs(cmd, jids, timeout):
    """
    This replaces the original qsub -sync y -hold_jid j1,j2..... command
    which can still be hung if certain jobs got stuck.

    If timeout occurs, simply qdel all jids (ignoring whether they exist or not)
    and let the main function that calls it handle what to do
    """
    def get_active_jids():
        stuff = os.popen("qstat").read().strip().split('\n')
        for x in stuff[2:]:
            job_id = x.split()[0]
            yield job_id

    p = Process(target=wait_for_sge_jobs_worker, args=(cmd, ))
    p.start()
    p.join(timeout)
    if p.is_alive():  # timed out
        active_jids = [x for x in get_active_jids()]
        while len(active_jids) > 0:
            for jid in active_jids:
                kill_cmd = "qdel " + str(jid)
                backticks(kill_cmd)  # don't care whether it worked
            time.sleep(3)  # wait 3 sec for qdel to take effect....
            active_jids = [x for x in get_active_jids()
                           ]  # make sure qdel really worked
        return "TIMEOUT"
    return "SUCCESS"
Exemple #6
0
    def test_contigset_consolidate_int_names(self):
        #build set to merge
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")

        inFas = os.path.join(outdir, 'infile.fasta')
        outFas1 = os.path.join(outdir, 'tempfile1.fasta')
        outFas2 = os.path.join(outdir, 'tempfile2.fasta')

        # copy fasta reference to hide fai and ensure FastaReader is used
        backticks('cp {i} {o}'.format(i=ReferenceSet(
            data.getXml(9)).toExternalFiles()[0],
                                      o=inFas))
        rs1 = ContigSet(inFas)

        double = 'B.cereus.1'
        exp_double = rs1.get_contig(double)

        # todo: modify the names first:
        with FastaWriter(outFas1) as writer:
            writer.writeRecord('5141', exp_double.sequence)
        with FastaWriter(outFas2) as writer:
            writer.writeRecord('5142', exp_double.sequence)

        exp_double_seqs = [exp_double.sequence, exp_double.sequence]
        exp_names = ['5141', '5142']

        obs_file = ContigSet(outFas1, outFas2)
        log.debug(obs_file.toExternalFiles())
        obs_file.consolidate()
        log.debug(obs_file.toExternalFiles())

        # open obs and compare to exp
        for name, seq in zip(exp_names, exp_double_seqs):
            self.assertEqual(obs_file.get_contig(name).sequence[:], seq)
    def test_create_cli(self):
        log.debug("Absolute")
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        cmd = "dataset create --type AlignmentSet {o} {i1} {i2}".format(
            o=os.path.join(outdir, 'pbalchemysim.alignmentset.xml'),
            i1=data.getXml(8),
            i2=data.getXml(11))
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
        self.assertTrue(
            os.path.exists(
                os.path.join(outdir, os.path.basename(data.getXml(12)))))

        log.debug("Relative")
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        cmd = ("dataset create --relative --type AlignmentSet "
               "{o} {i1} {i2}".format(o=os.path.join(
                   outdir, 'pbalchemysim.alignmentset.xml'),
                                      i1=data.getXml(8),
                                      i2=data.getXml(11)))
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
        self.assertTrue(
            os.path.exists(
                os.path.join(outdir, os.path.basename(data.getXml(12)))))
    def test_contigset_consolidate(self):
        #build set to merge
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")

        inFas = os.path.join(outdir, 'infile.fasta')
        outFas1 = os.path.join(outdir, 'tempfile1.fasta')
        outFas2 = os.path.join(outdir, 'tempfile2.fasta')

        # copy fasta reference to hide fai and ensure FastaReader is used
        backticks('cp {i} {o}'.format(
                      i=ReferenceSet(data.getXml(9)).toExternalFiles()[0],
                      o=inFas))
        rs1 = ContigSet(inFas)

        singletons = ['A.baumannii.1', 'A.odontolyticus.1']
        double = 'B.cereus.1'
        reader = rs1.resourceReaders()[0]
        exp_double = rs1.get_contig(double)
        exp_singles = [rs1.get_contig(name) for name in singletons]

        # todo: modify the names first:
        with FastaWriter(outFas1) as writer:
            writer.writeRecord(exp_singles[0])
            writer.writeRecord(exp_double.name + '_10_20', exp_double.sequence)
        with FastaWriter(outFas2) as writer:
            writer.writeRecord(exp_double.name + '_0_10',
                               exp_double.sequence + 'ATCGATCGATCG')
            writer.writeRecord(exp_singles[1])

        exp_double_seq = ''.join([exp_double.sequence,
                                  'ATCGATCGATCG',
                                  exp_double.sequence])
        exp_single_seqs = [rec.sequence for rec in exp_singles]

        acc_file = ContigSet(outFas1, outFas2)
        acc_file.induceIndices()
        log.debug(acc_file.toExternalFiles())
        self.assertEqual(len(acc_file), 4)
        self.assertEqual(len(list(acc_file)), 4)
        acc_file.consolidate()
        log.debug(acc_file.toExternalFiles())

        # open acc and compare to exp
        for name, seq in zip(singletons, exp_single_seqs):
            self.assertEqual(acc_file.get_contig(name).sequence[:], seq)
        self.assertEqual(acc_file.get_contig(double).sequence[:],
                         exp_double_seq)

        self.assertEqual(len(acc_file._openReaders), 1)
        self.assertEqual(len(acc_file.index), 3)
        self.assertEqual(len(acc_file._indexMap), 3)
        self.assertEqual(len(acc_file), 3)
        self.assertEqual(len(list(acc_file)), 3)

        # test merge:
        acc1 = ContigSet(outFas1)
        acc2 = ContigSet(outFas2)
        acc3 = acc1 + acc2
Exemple #9
0
 def test_updateCounts_without_pbi(self):
     log.info("Testing updateCounts without pbi")
     data_fname = data.getBam(0)
     outdir = tempfile.mkdtemp(suffix="dataset-unittest")
     tempout = os.path.join(outdir, os.path.basename(data_fname))
     backticks('cp {i} {o}'.format(i=data_fname, o=tempout))
     aln = AlignmentSet(tempout, strict=False)
     self.assertEqual(aln.totalLength, -1)
     self.assertEqual(aln.numRecords, -1)
Exemple #10
0
 def test_updateCounts_without_pbi(self):
     log.info("Testing updateCounts without pbi")
     data_fname = data.getBam(0)
     outdir = tempfile.mkdtemp(suffix="dataset-unittest")
     tempout = os.path.join(outdir, os.path.basename(data_fname))
     backticks('cp {i} {o}'.format(i=data_fname, o=tempout))
     aln = AlignmentSet(tempout, strict=False)
     self.assertEqual(aln.totalLength, -1)
     self.assertEqual(aln.numRecords, -1)
Exemple #11
0
    def test_contigset_consolidate(self):
        #build set to merge
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")

        inFas = os.path.join(outdir, 'infile.fasta')
        outFas1 = os.path.join(outdir, 'tempfile1.fasta')
        outFas2 = os.path.join(outdir, 'tempfile2.fasta')

        # copy fasta reference to hide fai and ensure FastaReader is used
        backticks('cp {i} {o}'.format(i=ReferenceSet(
            data.getXml(9)).toExternalFiles()[0],
                                      o=inFas))
        rs1 = ContigSet(inFas)

        singletons = ['A.baumannii.1', 'A.odontolyticus.1']
        double = 'B.cereus.1'
        reader = rs1.resourceReaders()[0]
        exp_double = rs1.get_contig(double)
        exp_singles = [rs1.get_contig(name) for name in singletons]

        # todo: modify the names first:
        with FastaWriter(outFas1) as writer:
            writer.writeRecord(exp_singles[0])
            writer.writeRecord(exp_double.name + '_10_20', exp_double.sequence)
        with FastaWriter(outFas2) as writer:
            writer.writeRecord(exp_double.name + '_0_10',
                               exp_double.sequence + 'ATCGATCGATCG')
            writer.writeRecord(exp_singles[1])

        exp_double_seq = ''.join(
            [exp_double.sequence, 'ATCGATCGATCG', exp_double.sequence])
        exp_single_seqs = [rec.sequence for rec in exp_singles]

        acc_file = ContigSet(outFas1, outFas2)
        acc_file.induceIndices()
        log.debug(acc_file.toExternalFiles())
        self.assertEqual(len(acc_file), 4)
        self.assertEqual(len(list(acc_file)), 4)
        acc_file.consolidate()
        log.debug(acc_file.toExternalFiles())

        # open acc and compare to exp
        for name, seq in zip(singletons, exp_single_seqs):
            self.assertEqual(acc_file.get_contig(name).sequence[:], seq)
        self.assertEqual(
            acc_file.get_contig(double).sequence[:], exp_double_seq)

        self.assertEqual(len(acc_file._openReaders), 1)
        self.assertEqual(len(acc_file.index), 3)
        self.assertEqual(len(acc_file._indexMap), 3)
        self.assertEqual(len(acc_file), 3)
        self.assertEqual(len(list(acc_file)), 3)

        # test merge:
        acc1 = ContigSet(outFas1)
        acc2 = ContigSet(outFas2)
        acc3 = acc1 + acc2
def qsub_job_runner(cmds_list, sh_file_format, done_script, sge_opts, qsub_retry=3, run_local_if_qsub_fail=True):
    """
    cmds_list -- list of commands to run (each in a separate file)
    sh_file_format ---- ex: test_script.{i}.sh

    ToDo:
    (1) add in ways to gracefully fail if SGE submits fail -- resubmit? wait? run local?
    (2) add in ways to monitor if certain qsub jobs died or hung --- resubmit? kill? run local?
    """
    jids = []
    for i, cmd in enumerate(cmds_list):
        f = open(sh_file_format.format(i=i), "w")
        f.write("#!/bin/bash\n")
        f.write(cmd + "\n")
        f.close()

        # hard-coded to 4 CPUS because hard-coded in daligner!
        qsub_cmd = "qsub"
        if sge_opts.queue_name is not None:
            qsub_cmd += " -q " + sge_opts.queue_name
        qsub_cmd += " -cwd -V -S /bin/bash -pe {env} 4 -e {out}.elog -o {out}.olog {out}".format(
            env=sge_opts.sge_env_name, out=f.name
        )
        try_times = 1
        while try_times <= qsub_retry:
            _out, _code, _msg = backticks(qsub_cmd)
            if _code == 0:  # succeeded, break
                break
            else:
                # failed, sleep for a little, try again
                time.sleep(10)
                try_times += 1
        if try_times > qsub_retry:
            if run_local_if_qsub_fail:
                raise NotImplementedError, "Not yet implemented to not use SGE!"
            else:
                raise RuntimeError, "Unable to qsub. Abort!:", qsub_cmd
        # ex: # Your job 596028 ("a.sh") has been submitted
        jids.append(str(_out).split()[2])

    # use a qsub job to wait for the commands to finish
    # ToDo: this is NOT bullet proof! watch for cases where the job may have died or been killed or hung
    wait_cmd = "qsub "
    if sge_opts.queue_name is not None:
        wait_cmd += " -q " + sge_opts.queue_name
    wait_cmd += " -sync y -pe {2} 1 -cwd -S /bin/bash -V -e /dev/null -o /dev/null -hold_jid {0} {1}".format(
        ",".join(jids), done_script, sge_opts.sge_env_name
    )
    _out, _code, _msg = backticks(wait_cmd)
    if _code != 0:  # failed, just wait manually then
        active_jids = [x.split()[0] for x in os.popen("qstat").read().strip().split("\n")[2:]]
        while True:
            if any(x in jids for x in active_jids):  # some jobs are still running
                time.sleep(10)
            else:
                break
Exemple #13
0
    def createPickles(self):
        """For each file in fasta_filenames, call ice_partial.py to build
        clusters and to save results to a pickle file. When all pickles
        are done, union all pickles.
        """
        self.add_log("Mapping non-full-length reads to consensus isoforms.")
        self.add_log("Creating pickles...", level=logging.INFO)

        for idx, fa in enumerate(self.fasta_filenames):
            # for each splitted non-full-length reads fasta file, build #
            # partial_uc.pickle
            self.add_log("Creating a pickle for {f}".format(f=fa))
            cmd = "ice_partial.py {i} ".format(i=fa) + \
                  "{r} ".format(r=self.ref_fasta) + \
                  "{o} ".format(o=self.pickle_filenames[idx]) + \
                  "--blasr_nproc={n} ".format(n=self.sge_opts.blasr_nproc) + \
                  "--done={d} ".format(d=self.done_filenames[idx])
            if self.ccs_fofn is not None:
                cmd += "--ccs_fofn={f} ".format(f=self.ccs_fofn)
            if self.sa_file is not None:
                cmd += "--sa={sa} ".format(sa=self.sa_file)

            self.add_log("Writing command to script {fsh}".format(
                fsh=self.script_filenames[idx]))
            with open(self.script_filenames[idx], 'w') as fsh:
                fsh.write(cmd + "\n")

            # determine elog & olog
            partial_log_fn = op.join(self.log_dir,
                                     'IcePartial.{idx}'.format(idx=idx))
            elog = partial_log_fn + ".elog"
            olog = partial_log_fn + ".olog"
            jid = "ice_partial_" + op.basename(fa)

            qsub_cmd = "qsub " + \
                       "-pe smp {n} ".format(n=self.sge_opts.blasr_nproc) + \
                       "-cwd -S /bin/bash -V " + \
                       "-e {elog} ".format(elog=elog) + \
                       "-o {olog} ".format(olog=olog) + \
                       "-N {jid} ".format(jid=jid) + \
                       "{sh}".format(sh=self.script_filenames[idx])

            if self.sge_opts.use_sge is True:
                self.add_log("Submitting CMD: {qcmd}".format(qcmd=qsub_cmd))
                _out, _code, _msg = backticks(qsub_cmd)
            #elif self.sge_opts.useSMRTPortal is True:
            #    pass
            else:
                cmd += " 1>{olog} 2>{elog}".format(olog=olog, elog=elog)
                self.add_log("Submitting CMD: {cmd}".format(cmd=cmd))
                _out, _code, _msg = backticks(cmd)
                if _code != 0:
                    raise RuntimeError("CMD failed: {cmd}\n{msg}\n".format(
                        cmd=cmd, msg=str(_msg)))
Exemple #14
0
    def createPickles(self):
        """For each file in fasta_filenames, call ice_partial.py to build
        clusters and to save results to a pickle file. When all pickles
        are done, union all pickles.
        """
        self.add_log("Mapping non-full-length reads to consensus isoforms.")
        self.add_log("Creating pickles...", level=logging.INFO)

        for idx, fa in enumerate(self.fasta_filenames):
            # for each splitted non-full-length reads fasta file, build #
            # partial_uc.pickle
            self.add_log("Creating a pickle for {f}".format(f=fa))
            cmd = "ice_partial.py {i} ".format(i=fa) + \
                  "{r} ".format(r=self.ref_fasta) + \
                  "{o} ".format(o=self.pickle_filenames[idx]) + \
                  "--blasr_nproc={n} ".format(n=self.sge_opts.blasr_nproc) + \
                  "--done={d} ".format(d=self.done_filenames[idx])
            if self.ccs_fofn is not None:
                cmd += "--ccs_fofn={f} ".format(f=self.ccs_fofn)
            if self.sa_file is not None:
                cmd += "--sa={sa} ".format(sa=self.sa_file)

            self.add_log("Writing command to script {fsh}".
                         format(fsh=self.script_filenames[idx]))
            with open(self.script_filenames[idx], 'w') as fsh:
                fsh.write(cmd + "\n")

            # determine elog & olog
            partial_log_fn = op.join(self.log_dir,
                                     'IcePartial.{idx}'.format(idx=idx))
            elog = partial_log_fn + ".elog"
            olog = partial_log_fn + ".olog"
            jid = "ice_partial_" + op.basename(fa)

            qsub_cmd = "qsub " + \
                       "-pe smp {n} ".format(n=self.sge_opts.blasr_nproc) + \
                       "-cwd -S /bin/bash -V " + \
                       "-e {elog} ".format(elog=elog) + \
                       "-o {olog} ".format(olog=olog) + \
                       "-N {jid} ".format(jid=jid) + \
                       "{sh}".format(sh=self.script_filenames[idx])

            if self.sge_opts.use_sge is True:
                self.add_log("Submitting CMD: {qcmd}".format(qcmd=qsub_cmd))
                _out, _code, _msg = backticks(qsub_cmd)
            #elif self.sge_opts.useSMRTPortal is True:
            #    pass
            else:
                cmd += " 1>{olog} 2>{elog}".format(olog=olog, elog=elog)
                self.add_log("Submitting CMD: {cmd}".format(cmd=cmd))
                _out, _code, _msg = backticks(cmd)
                if _code != 0:
                    raise RuntimeError("CMD failed: {cmd}\n{msg}\n".format(
                        cmd=cmd, msg=str(_msg)))
Exemple #15
0
def qsub_job_runner(cmds_list, sh_file_format, done_script, sge_opts, qsub_retry=3, run_local_if_qsub_fail=True):
    """
    cmds_list -- list of commands to run (each in a separate file)
    sh_file_format ---- ex: test_script.{i}.sh

    ToDo:
    (1) add in ways to gracefully fail if SGE submits fail -- resubmit? wait? run local?
    (2) add in ways to monitor if certain qsub jobs died or hung --- resubmit? kill? run local?
    """
    jids = []
    for i, cmd in enumerate(cmds_list):
        f = open(sh_file_format.format(i=i), 'w')
        f.write("#!/bin/bash\n")
        f.write(cmd + '\n')
        f.close()

        # hard-coded to 4 CPUS because hard-coded in daligner!
        qsub_cmd = "qsub"
        if sge_opts.queue_name is not None:
            qsub_cmd += " -q " + sge_opts.queue_name
        qsub_cmd += " -cwd -V -S /bin/bash -pe {env} 4 -e {out}.elog -o {out}.olog {out}".format(\
            env=sge_opts.sge_env_name, out=f.name)
        try_times = 1
        while try_times <= qsub_retry:
            _out, _code, _msg = backticks(qsub_cmd)
            if _code == 0: # succeeded, break
                break
            else:
                # failed, sleep for a little, try again
                time.sleep(10)
                try_times += 1
        if try_times > qsub_retry:
            if run_local_if_qsub_fail:
                raise NotImplementedError, "Not yet implemented to not use SGE!"
            else:
                raise RuntimeError, "Unable to qsub. Abort!:", qsub_cmd
        # ex: # Your job 596028 ("a.sh") has been submitted
        jids.append(str(_out).split()[2])

    # use a qsub job to wait for the commands to finish
    # ToDo: this is NOT bullet proof! watch for cases where the job may have died or been killed or hung
    wait_cmd = "qsub "
    if sge_opts.queue_name is not None:
        wait_cmd += " -q " + sge_opts.queue_name
    wait_cmd += " -sync y -pe {2} 1 -cwd -S /bin/bash -V -e /dev/null -o /dev/null -hold_jid {0} {1}".format(",".join(jids), done_script, sge_opts.sge_env_name)
    _out, _code, _msg = backticks(wait_cmd)
    if _code != 0: # failed, just wait manually then
        active_jids = [x.split()[0] for x in os.popen("qstat").read().strip().split('\n')[2:]]
        while True:
            if any(x in jids for x in active_jids): # some jobs are still running
                time.sleep(10)
            else:
                break
    def _test_daligner_against_ref(self, test_name, use_sge, sge_opts,
                                   prob_model_from="fake"):
        """Test daligner_against_ref with and without using sge."""
        copy_dir = op.join(self.dataDir, "test_daligner_against_ref")
        output_dir = op.join(self.outDir, test_name)
        mknewdir(output_dir)

        qname, tname = "test_daligner_query.fasta", "test_daligner_target.fasta"
        query_filename = op.join(output_dir, qname)
        target_filename = op.join(output_dir, tname)

        prob_model = None
        if prob_model_from == "fake":
            prob_model = ProbFromModel(0.01, 0.07, 0.06)
        elif prob_model_from == "fastq":
            fastq_fn = op.join(copy_dir, "test_daligner_reads.fastq")
            prob_model = ProbFromFastq(fastq_fn)
        else:
            self.assertTrue(False)

        qver_get_func = prob_model.get_smoothed
        qvmean_get_func = prob_model.get_mean

        dummy_o, c, dummy_m = backticks("cp %s %s" % (op.join(copy_dir, qname), query_filename))
        self.assertTrue(c == 0)

        dummy_o, c, dummy_m = backticks("cp %s %s" % (op.join(copy_dir, tname), target_filename))
        self.assertTrue(c == 0)

        old_dir = os.getcwd()
        os.chdir(output_dir)

        runner = DalignerRunner(query_filename=query_filename,
                                target_filename=target_filename,
                                is_FL=True, same_strand_only=True,
                                use_sge=use_sge, sge_opts=sge_opts)
        runner.run(output_dir=op.join(self.outDir, test_name))

        hits = []

        for la4ice_filename in runner.la4ice_filenames:
            hits.extend(daligner_against_ref(query_dazz_handler=runner.query_dazz_handler,
                                             target_dazz_handler=runner.target_dazz_handler,
                                             la4ice_filename=la4ice_filename,
                                             is_FL=True, sID_starts_with_c=False,
                                             qver_get_func=qver_get_func,
                                             qvmean_get_func=qvmean_get_func))
        # Num of hits may change when daligner or parameters change.
        self.assertTrue(len(hits), 706)
        self.assertEqual(str(hits[0]),
                         "m54007_160109_025449/27984844/29_646_CCS/0_617 aligns to m54007_160109_025449/28836279/631_54_CCS")
        os.chdir(output_dir)
def _check_constools():
    if not BamtoolsVersion().good:
        log.warn("Bamtools not found or out of date")
        return False

    cmd = "pbindex"
    o, r, m = backticks(cmd)
    if r != 1:
        return False

    cmd = "samtools"
    o, r, m = backticks(cmd)
    if r != 1:
        return False
    return True
Exemple #18
0
    def test_alignmentset_partial_consolidate(self):
        testFile = ("/pbi/dept/secondary/siv/testdata/SA3-DS/"
                    "lambda/2372215/0007_tiny/Alignment_"
                    "Results/m150404_101626_42267_c10080"
                    "7920800000001823174110291514_s1_p0."
                    "all.alignmentset.xml")
        aln = AlignmentSet(testFile)
        nonCons = AlignmentSet(testFile)
        self.assertEqual(len(aln.toExternalFiles()), 3)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn, numFiles=2)
        self.assertFalse(os.path.exists(outfn))
        self.assertTrue(os.path.exists(_infixFname(outfn, "0")))
        self.assertTrue(os.path.exists(_infixFname(outfn, "1")))
        self.assertEqual(len(aln.toExternalFiles()), 2)
        self.assertEqual(len(nonCons.toExternalFiles()), 3)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(nonCons))

        log.debug("Test cli")
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "merged.bam")
        xmlfile = os.path.join(outdir, "merged.xml")
        cmd = "dataset consolidate --numFiles 2 {i} {d} {x}".format(i=testFile,
                                                                    d=datafile,
                                                                    x=xmlfile)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
    def _startPhmmers(self, chunked_reads_fns, chunked_dom_fns,
                      out_dom_fn, primer_fn, pbmatrix_fn):
        """Run phmmers on chunked reads files in 'chunked_reads_fns' and
        generate chunked dom files as listed in 'chunked_dom_fns', finally
        concatenate dom files to 'out_dom_fn'."""
        logging.info("Start to launch phmmer on chunked reads.")
        jobs = []
        for reads_fn, domFN in zip(chunked_reads_fns, chunked_dom_fns):
            p = multiprocessing.Process(
                target=self._phmmer,
                args=(reads_fn, domFN, primer_fn, pbmatrix_fn))
            jobs.append((p, domFN))
            p.start()

        for p, domFN in jobs:
            p.join()
            cmd = "cat {0} >> {1}".format(real_upath(domFN),
                                          real_upath(out_dom_fn))
            _output, errCode, errMsg = backticks(cmd)
            if errCode != 0:
                raise ClassifierException(
                    "Error concatenating dom files: {e}".
                    format(e=str(errMsg)))

        self._cleanup(chunked_reads_fns)
        self._cleanup(chunked_dom_fns)
Exemple #20
0
 def _releaseLock(self, dbLock):
     """Release dbLock."""
     _o, errCode, _m = backticks("rm -f {dbLock}".format(dbLock=dbLock))
     if errCode == 0:
         logging.debug(self.name + ": Release the lock for DB creation.")
     else:
         raise RuntimeError(self.name + ": Failed to release lock " + dbLock + ". Please delete the lock manually.")
 def test_exit_code_0(self):
     bam = self.getAlignmentSet()
     var_rpt = os.path.join(DATA, 'variants_report.json')
     mapping_rpt = os.path.join(DATA, 'mapping_stats_report.json')
     cmd = 'python -m pbreports.report.sat {o} {r} {c} {a} {v}'.format(o=self._output_dir,
                                                                       r='rpt.json',
                                                                       c=bam,
                                                                       a=var_rpt,
                                                                       v=mapping_rpt)
     o, c, m = backticks(cmd)
     log.info(cmd)
     if c is not 0:
         log.error(m)
         log.error(o)
         print(m)
     self.assertEquals(0, c)
     rpt_file = os.path.join(self._output_dir, 'rpt.json')
     rpt = load_report_from_json(rpt_file)
     self.assertEqual('sidney', rpt.get_attribute_by_id('instrument').value)
     self.assertEqual(1, rpt.get_attribute_by_id('coverage').value)
     self.assertEqual(1, rpt.get_attribute_by_id('concordance').value)
     self.assertEqual(7752, rpt.get_attribute_by_id(
         'mapped_readlength_mean').value)
     self.assertEqual(48, rpt.get_attribute_by_id('reads_in_cell').value)
     out = StringIO()
     self.assertTrue(summarize_report(rpt_file, out=out))
Exemple #22
0
def isExist(ff):
    """Return whether a file or a dir ff exists or not.
    Call ls instead of python os.path.exists to eliminate NFS errors.
    """
    cmd = "ls %s" % ff
    _output, errCode, _errMsg = backticks(cmd)
    return (errCode == 0)
Exemple #23
0
def blasr_sam_for_quiver(input_fasta, ref_fasta,
                         out_sam_filename,
                         run_cmd=True, blasr_nproc=12):
    """
    #input_fofn --- should be input.fofn
    input_fasta --- should be in.raw.fa
    ref_fasta --- reference fasta (ex: g_consensus.fa) to align to
    #output_dir --- if None, automatically set to where ref_fasta is

    run blasr -clipping soft to get sam
    """
    #if output_dir is None:
    #    output_dir = op.dirname(ref_fasta)
    #if movies is not None:
    #    f = open(input_fasta + '.fofn', 'w')
    #    for line in open(input_fofn):
    #        if op.basename(line).split('.')[0] in movies:
    #            f.write(line)
    #    f.close()
    #    input_fofn = f.name
    #out_sam = op.join(output_dir, out_sam_filename)
    #TODO: review code

    cmd = "blasr {i} ".format(i=input_fasta) + \
          "{r} ".format(r=ref_fasta) + \
          "-nproc {n} ".format(n=blasr_nproc) + \
          "-bestn 5 -nCandidates 10 -sam -clipping soft " + \
          "-out {o}".format(o=out_sam_filename)
    logging.debug("CMD: " + cmd)
    if run_cmd:
        _out, _code, _msg = backticks(cmd)
        if _code != 0:
            raise RuntimeError("CMD failed: {cmd}\n{e}".
                format(cmd=cmd, e=_msg))
    return cmd
Exemple #24
0
    def _bt2BuildIndex(self, tempDir, referenceFile):
        """Build bt2 index files.

            Input:
                tempDir      : a temporary directory for saving bowtie2
                               index files.
                referenceFile: the reference sequence file.
            Output:
                list of strings, bowtie2 index files.

        """
        refBaseName = bt2BaseName(tempDir, referenceFile)
        cmdStr = "bowtie2-build -q -f {0} {1}".\
            format(referenceFile, refBaseName)

        logging.info(self.name + ": Build bowtie2 index files.")
        logging.debug(self.name + ": Call {0}".format(cmdStr))

        _output, errCode, errMsg = backticks(cmdStr)
        if (errCode != 0):
            logging.error(self.name + ": Failed to build bowtie2 " +
                          "index files.\n" + errMsg)
            raise RuntimeError(errMsg)

        return bt2IndexFiles(refBaseName)
    def test_integration(self):
        exe = "barcode_report"
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
        json_report_file_name = temp_file.name
        temp_file.close()
        ccs = " --ccs " if self.ccs else ""
        cmd = "{e} --debug {ccs} {b} {ba} {r}".format(e=exe,
                                                      b=self.bas_h5_fofn,
                                                      ba=self.barcode_h5_fofn,
                                                      r=json_report_file_name,
                                                      ccs=ccs)

        log.info("Running cmd {c}".format(c=cmd))
        output, rcode, emsg = backticks(cmd)

        if rcode != 0:
            log.error(output)
            log.error(emsg)

        self.assertEqual(0, rcode)

        with open(json_report_file_name, 'r') as f:
            s = json.load(f)

        self.assertIsNotNone(s)
        log.info(pformat(s))

        # cleanup
        os.remove(json_report_file_name)
Exemple #26
0
    def test_missing_fai_error_message(self):
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")

        inFas = os.path.join(outdir, 'infile.fasta')

        # copy fasta reference to hide fai and ensure FastaReader is used
        backticks('cp {i} {o}'.format(i=ReferenceSet(
            data.getXml(9)).toExternalFiles()[0],
                                      o=inFas))
        rs1 = ContigSet(inFas)
        with self.assertRaises(IOError) as cm:
            rs1.assertIndexed()
        self.assertEqual(
            str(cm.exception),
            ("Companion FASTA index (.fai) file not found or malformatted! "
             "Use 'samtools faidx' to generate FASTA index."))
    def test_exit_code_0_referenceset(self):
        """
        Like a cram test. Assert exits with 0 with ReferenceSet XML
        """

        ref = os.path.join(self._data_dir, 'references', 'lambda', 'sequence',
                           'lambda.fasta')
        ref_name = os.path.join(self._output_dir, "refset.xml")
        refset = ReferenceSet(ref)
        refset.write(ref_name)
        ref = ref_name
        gff = os.path.join(self._data_dir, 'alignment_summary.lambda.gff')
        r = 'rpt.json'
        cmd = 'python -m pbreports.report.coverage {o} {r} {c} {g}'.format(o=self._output_dir,
                                                            r=r,
                                                            c=ref, g=gff)

        log.info(cmd)
        o, c, m = backticks(cmd)

        if c is not 0:
            log.error(m)
            log.error(o)
            sys.stderr.write(str(m) + "\n")

        self.assertEquals(0, c)
        self.assertTrue(os.path.exists(os.path.join(self._output_dir, r)))
Exemple #28
0
    def test_loadmetadata_from_dataset_create_cli(self):
        fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name
        fn2 = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name
        log.debug(fn)

        aln = AlignmentSet(data.getXml(8))
        aln.metadata.collections = None
        aln.copyTo(fn)
        aln.close()
        del aln
        self.assertTrue(os.path.exists(fn))

        aln = AlignmentSet(fn)
        self.assertFalse(aln.metadata.collections)

        cmd = "dataset create --metadata {m} {o} {i}".format(
            o=fn2,
            i=fn,
            m=("/pbi/dept/secondary/siv/testdata/"
               "SA3-Sequel/lambda/roche_SAT/"
               "m54013_151205_032353.subreadset.xml"))
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0, m)
        aln = AlignmentSet(fn2)
        self.assertTrue(aln.metadata.collections)
Exemple #29
0
def trigger_nfs_refresh(ff):
    """
    Central place for all NFS hackery

    Return whether a file or a dir ff exists or not.
    Call ls instead of python os.path.exists to eliminate NFS errors.

    Added try/catch black hole exception cases to help trigger an NFS refresh

    :rtype bool:
    """
    # try to trigger refresh for File case
    try:
        f = open(ff, 'r')
        f.close()
    except Exception:
        pass

    # try to trigger refresh for Directory case
    try:
        _ = os.stat(ff)
        _ = os.listdir(ff)
    except Exception:
        pass

    # Call externally
    # this is taken from Yuan
    cmd = "ls %s" % ff
    _, rcode, _ = backticks(cmd)

    return rcode == 0
    def test_exit_code_0(self):
        """
        Like a cram test. Assert exits with 0, even though region size is 0 See
        bug 25079
        """
        from pbcore.util.Process import backticks
        import tempfile
#         als = os.path.join(self._data_dir, 'alignment_summary.gff')
#         variants = os.path.join(self._data_dir, 'variants.gff.gz')
#         ref = os.path.join(self._data_dir, 'ecoliK12_pbi_March2013')
        ref = pbcore.data.getLambdaFasta()
        tiny_reads = pbcore.data.getBamAndCmpH5()[0]
        out = os.path.join(tempfile.mkdtemp(suffix="summ_cov"), 'gff')
        cmd = 'summarize_coverage --region_size=0 --num_regions=500 {a} {r} {g}'.format(
            a=tiny_reads, r=ref, g=out)

        o, c, m = backticks(cmd)
        log.info(cmd)
        if c is not 0:
            log.error(m)
            log.error(o)
            print(m)
        self.assertEquals(0, c)
        self.assertTrue(
            os.path.exists(os.path.join(out)))
Exemple #31
0
    def _bt2BuildIndex(self, tempDir, referenceFile):
        """Build bt2 index files.

            Input:
                tempDir      : a temporary directory for saving bowtie2
                               index files.
                referenceFile: the reference sequence file.
            Output:
                list of strings, bowtie2 index files.

        """
        refBaseName = bt2BaseName(tempDir, referenceFile)
        cmdStr = "bowtie2-build -q -f {0} {1}".\
            format(referenceFile, refBaseName)

        logging.info(self.name + ": Build bowtie2 index files.")
        logging.debug(self.name + ": Call {0}".format(cmdStr))

        _output, errCode, errMsg = backticks(cmdStr)
        if (errCode != 0):
            logging.error(self.name + ": Failed to build bowtie2 " +
                          "index files.\n" + errMsg)
            raise RuntimeError(errMsg)

        return bt2IndexFiles(refBaseName)
Exemple #32
0
def wait_for_sge_jobs_worker(cmd):
    _out, _code, _msg = backticks(cmd)
    if _code != 0:
        errMsg = "Failed to qsub CMD: {cmd}, {msg}.".format(cmd=cmd, msg=_msg)
        raise RuntimeError(errMsg)
    # Your job 596028 ("a.sh") has been submitted
    return str(_out).split()[2]
    def test_loadmetadata_from_dataset_create_cli(self):
        fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name
        fn2 = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name
        log.debug(fn)

        aln = AlignmentSet(data.getXml(8))
        aln.metadata.collections = None
        aln.copyTo(fn)
        aln.close()
        del aln
        self.assertTrue(os.path.exists(fn))

        aln = AlignmentSet(fn)
        self.assertFalse(aln.metadata.collections)

        cmd = "dataset create --metadata {m} {o} {i}".format(
            o=fn2,
            i=fn,
            m=("/pbi/dept/secondary/siv/testdata/"
               "SA3-Sequel/lambda/roche_SAT/"
               "m54013_151205_032353.subreadset.xml"))
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0, m)
        aln = AlignmentSet(fn2)
        self.assertTrue(aln.metadata.collections)
 def _checkPhmmer(self):
     """Check phmmer can be called successfully."""
     logging.info("checking for phmmer existence.")
     _output, errCode, errMsg = backticks("phmmer -h > /dev/null")
     if errCode != 0:
         raise ClassifierException("Unable to invoke phmmer.\n{e}".
                                   format(e=errMsg))
Exemple #35
0
def _check_constools():
    cmd = "pbindex"
    o, r, m = backticks(cmd)
    if r != 1:
        return False

    cmd = "samtools"
    o, r, m = backticks(cmd)
    if r != 1:
        return False

    cmd = "pbmerge"
    o, r, m = backticks(cmd)
    if r != 1:
        return False
    return True
Exemple #36
0
def _pbindexBam(fname):
    cmd = "pbindex {i}".format(i=fname)
    log.info(cmd)
    o, r, m = backticks(cmd)
    if r != 0:
        raise RuntimeError(m)
    return fname + ".pbi"
    def test_alignmentset_partial_consolidate(self):
        testFile = ("/mnt/secondary-siv/testdata/SA3-DS/"
                    "lambda/2372215/0007_tiny/Alignment_"
                    "Results/m150404_101626_42267_c10080"
                    "7920800000001823174110291514_s1_p0."
                    "all.alignmentset.xml")
        aln = AlignmentSet(testFile)
        nonCons= AlignmentSet(testFile)
        self.assertEqual(len(aln.toExternalFiles()), 3)
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        outfn = os.path.join(outdir, 'merged.bam')
        aln.consolidate(outfn, numFiles=2)
        self.assertFalse(os.path.exists(outfn))
        self.assertTrue(os.path.exists(_infixFname(outfn, "0")))
        self.assertTrue(os.path.exists(_infixFname(outfn, "1")))
        self.assertEqual(len(aln.toExternalFiles()), 2)
        self.assertEqual(len(nonCons.toExternalFiles()), 3)
        for read1, read2 in zip(sorted(list(aln)), sorted(list(nonCons))):
            self.assertEqual(read1, read2)
        self.assertEqual(len(aln), len(nonCons))

        log.debug("Test cli")
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        datafile = os.path.join(outdir, "merged.bam")
        xmlfile = os.path.join(outdir, "merged.xml")
        cmd = "dataset.py consolidate --numFiles 2 {i} {d} {x}".format(
            i=testFile, d=datafile, x=xmlfile)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
Exemple #38
0
def _cpFile(inFname, outFname):
    cmd = "cp {i} {o}".format(i=inFname,
                              o=outFname)
    log.info(cmd)
    o, r, m = backticks(cmd)
    if r != 0:
        raise RuntimeError(m)
 def test_exit_code_0(self):
     bam = self.getAlignmentSet()
     var_rpt = os.path.join(DATA, 'variants_report.json')
     mapping_rpt = os.path.join(DATA, 'mapping_stats_report.json')
     cmd = 'python -m pbreports.report.sat {o} {r} {c} {a} {v}'.format(o=self._output_dir,
                                                             r='rpt.json',
                                                             c=bam,
                                                             a=var_rpt,
                                                             v=mapping_rpt)
     o, c, m = backticks(cmd)
     log.info(cmd)
     if c is not 0:
         log.error(m)
         log.error(o)
         print(m)
     self.assertEquals(0, c)
     rpt_file = os.path.join(self._output_dir, 'rpt.json')
     rpt = load_report_from_json(rpt_file)
     self.assertEqual('sidney', rpt.get_attribute_by_id('instrument').value)
     self.assertEqual(1, rpt.get_attribute_by_id('coverage').value)
     self.assertEqual(1, rpt.get_attribute_by_id('accuracy').value)
     self.assertEqual(1328, rpt.get_attribute_by_id('mapped_readlength_mean').value)
     self.assertEqual(48, rpt.get_attribute_by_id('reads_in_cell').value)
     out = StringIO()
     self.assertTrue(summarize_report(rpt_file, out=out))
Exemple #40
0
def local_job_runner(cmds_list, num_threads, throw_error=True):
    """
    Execute a list of cmds locally using thread pool with at most
    num_threads threads, wait for all jobs to finish before exit.

    If throw_error is True, when any job failed, raise RuntimeError.
    If throw_error is False, return a list of cmds that failed.

    Parameters:
      cmds_list - cmds that will be executed in ThreadPool
      num_threads - number of threads that will be used in the ThreadPool
      throw_error - whether or not to throw RuntimeError when any of cmd failed.
      rescue - whether or not to rescue this job
      rescue_times - maximum number of rescue times
    """
    run_cmd_in_shell = lambda x: backticks(x, merge_stderr=True)
    try:
        pool = ThreadPool(processes=num_threads)
        rets = pool.map(run_cmd_in_shell, cmds_list)
        pool.close()
        pool.join()
    except subprocess.CalledProcessError:
        pass

    failed_cmds = [cmds_list[i] for i in range(0, len(cmds_list)) if rets[i][1] != 0]
    failed_cmds_out = [rets[i][0] for i in range(0, len(cmds_list)) if rets[i][1] != 0]

    if throw_error and len(failed_cmds) > 0:
        errmsg = "\n".join(["CMD failed: %s, %s" % (cmd, out)
                            for (cmd, out) in zip(failed_cmds, failed_cmds_out)])
        raise RuntimeError(errmsg)
    else:
        return failed_cmds
Exemple #41
0
def _pbindexBam(fname):
    cmd = "pbindex {i}".format(i=fname)
    log.info(cmd)
    o, r, m = backticks(cmd)
    if r != 0:
        raise RuntimeError(m)
    return fname + ".pbi"
Exemple #42
0
    def run_cmd_and_log(self, cmd, olog="", elog="", description=""):
        """Run the given command locally and write to log, raise a
        RunTimeError if failed to finish the job.
        olog: output log
        elog: error log

        The error message to display should look like:
            CMD exited with a non-zero code: {cmd}, {msg}\n
            {description}\n
            Error log: {elog}\n
        """
        #msg = "Running CMD: {cmd}".format(cmd=cmd)
        #self.add_log(msg)
        _out, _code, _msg = backticks(cmd)
        if _code != 0:
            errMsgs = ["CMD exited with a non-zero code: {cmd}, {msg}".
                       format(cmd=cmd, msg=_msg)]
            if len(description) != 0:
                errMsgs.append("{description}".format(description=description))
            if len(elog) != 0:
                errMsgs.append("Error log: {elog}".format(elog=elog))
            if len(olog) != 0:
                errMsgs.append("Out log: {olog}".format(olog=olog))
            errMsg = "\n".join(errMsgs)
            self.add_log(errMsg, level=logging.ERROR)
            raise RuntimeError(errMsg)
Exemple #43
0
def _nfs_exists_check(ff):
    """
    Central place for all NFS hackery

    Return whether a file or a dir ff exists or not.
    Call ls instead of python os.path.exists to eliminate NFS errors.

    Added try/catch black hole exception cases to help trigger an NFS refresh

    :rtype bool:
    """
    # try to trigger refresh for File case
    try:
        f = open(ff, 'r')
        f.close()
    except Exception:
        pass

    # try to trigger refresh for Directory case
    try:
        _ = os.stat(ff)
        _ = os.listdir(ff)
    except Exception:
        pass

    # Call externally
    # this is taken from Yuan
    cmd = "ls %s" % ff
    _, rcode, _ = backticks(cmd)

    return rcode == 0
Exemple #44
0
    def run_cmd_and_log(self, cmd, olog="", elog="", description=""):
        """Run the given command locally and write to log, raise a
        RunTimeError if failed to finish the job.
        olog: output log
        elog: error log

        The error message to display should look like:
            CMD exited with a non-zero code: {cmd}, {msg}\n
            {description}\n
            Error log: {elog}\n
        """
        msg = "Running CMD: {cmd}".format(cmd=cmd)
        self.add_log(msg)
        _out, _code, _msg = backticks(cmd)
        if _code != 0:
            errMsgs = [
                "CMD exited with a non-zero code: {cmd}, {msg}".format(
                    cmd=cmd, msg=_msg)
            ]
            if len(description) != 0:
                errMsgs.append("{description}".format(description=description))
            if len(elog) != 0:
                errMsgs.append("Error log: {elog}".format(elog=elog))
            if len(olog) != 0:
                errMsgs.append("Out log: {olog}".format(olog=olog))
            errMsg = "\n".join(errMsgs)
            self.add_log(errMsg, level=logging.ERROR)
            raise RuntimeError(errMsg)
Exemple #45
0
    def _startPhmmers(self, chunked_reads_fns, chunked_dom_fns,
                      out_dom_fn, primer_fn, pbmatrix_fn):
        """Run phmmers on chunked reads files in 'chunked_reads_fns' and
        generate chunked dom files as listed in 'chunked_dom_fns', finally
        concatenate dom files to 'out_dom_fn'."""
        logging.info("Start to launch phmmer on chunked reads.")
        jobs = []
        for reads_fn, domFN in zip(chunked_reads_fns, chunked_dom_fns):
            p = multiprocessing.Process(
                target=self._phmmer,
                args=(reads_fn, domFN, primer_fn, pbmatrix_fn))
            jobs.append((p, domFN))
            p.start()

        for p, domFN in jobs:
            p.join()
            cmd = "cat {0} >> {1}".format(real_upath(domFN),
                                          real_upath(out_dom_fn))
            _output, errCode, errMsg = backticks(cmd)
            if errCode != 0:
                raise ClassifierException(
                    "Error concatenating dom files: {e}".
                    format(e=str(errMsg)))

        self._cleanup(chunked_reads_fns)
        self._cleanup(chunked_dom_fns)
Exemple #46
0
 def _checkPhmmer(self):
     """Check phmmer can be called successfully."""
     logging.info("checking for phmmer existence.")
     _output, errCode, errMsg = backticks("phmmer -h > /dev/null")
     if errCode != 0:
         raise ClassifierException("Unable to invoke phmmer.\n{e}".
                                   format(e=errMsg))
    def test_missing_fai_error_message(self):
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")

        inFas = os.path.join(outdir, 'infile.fasta')

        # copy fasta reference to hide fai and ensure FastaReader is used
        backticks('cp {i} {o}'.format(
            i=ReferenceSet(data.getXml(9)).toExternalFiles()[0],
            o=inFas))
        rs1 = ContigSet(inFas)
        with self.assertRaises(IOError) as cm:
            rs1.assertIndexed()
        self.assertEqual(
            str(cm.exception),
            ( "Companion FASTA index (.fai) file not found or malformatted! "
             "Use 'samtools faidx' to generate FASTA index."))
Exemple #48
0
    def _output(self, inSam, refFile, outFile, readType=None, smrtTitle=False):
        """Generate a SAM, BAM or a CMP.H5 file.
        Input:
            inSam   : an input SAM/BAM file. (e.g. fileName.filteredSam)
            refFile : the reference file. (e.g. fileName.targetFileName)
            outFile : the output SAM/BAM or CMP.H5 file.
                      (i.e. fileName.outputFileName)
            readType: standard or cDNA or CCS (can be None if not specified)
        Output:
            output, errCode, errMsg
        """
        output, errCode, errMsg = "", 0, ""

        outFormat = getFileFormat(outFile)

        if outFormat == FILE_FORMATS.BAM:
            pass  # Nothing to be done
        if outFormat == FILE_FORMATS.SAM:
            logging.info("OutputService: Genearte the output SAM file.")
            logging.debug("OutputService: Move {src} as {dst}".format(
                src=inSam, dst=outFile))
            try:
                shutil.move(real_ppath(inSam), real_ppath(outFile))
            except shutil.Error as e:
                output, errCode, errMsg = "", 1, str(e)
        elif outFormat == FILE_FORMATS.CMP:
            #`samtoh5 inSam outFile -readType readType
            logging.info("OutputService: Genearte the output CMP.H5 " +
                         "file using samtoh5.")
            prog = "samtoh5"
            cmd = "samtoh5 {samFile} {refFile} {outFile}".format(
                samFile=inSam, refFile=refFile, outFile=outFile)
            if readType is not None:
                cmd += " -readType {0} ".format(readType)
            if smrtTitle:
                cmd += " -smrtTitle "
            # Execute the command line
            logging.debug("OutputService: Call \"{0}\"".format(cmd))
            output, errCode, errMsg = backticks(cmd)
        elif outFormat == FILE_FORMATS.XML:
            logging.info(
                "OutputService: Generating the output XML file".format(
                    samFile=inSam, outFile=outFile))
            # Create {out}.xml, given {out}.bam
            outBam = str(outFile[0:-3]) + "bam"
            aln = None
            # FIXME This should really be more automatic
            if self.args.readType == "CCS":
                self._output_dataset_type = ConsensusAlignmentSet
            aln = self._output_dataset_type(real_ppath(outBam))
            for res in aln.externalResources:
                res.reference = refFile
            aln.write(outFile)

        if errCode != 0:
            errMsg = prog + " returned a non-zero exit status." + errMsg
            logging.error(errMsg)
            raise RuntimeError(errMsg)
        return output, errCode, errMsg
def bax2bam_path():
    """Return path to bax2bam"""
    cmd = "which bax2bam"
    o, c, m = backticks(cmd)
    if c != 0:
        raise RuntimeError("could not find bax2bam")
    else:
        return o[0]
Exemple #50
0
def _nfs_exists_check(ff):
    """Return whether a file or a dir ff exists or not.
    Call ls instead of python os.path.exists to eliminate NFS errors.
    """
    # this is taken from Yuan
    cmd = "ls %s" % ff
    output, errCode, errMsg = backticks(cmd)
    return errCode == 0
Exemple #51
0
def sanity_check_gcon():
    """Sanity check gcon."""
    cmd = gcon_py + " --help"
    _out, _code, _msg = backticks(cmd)
    if _code != 0:
        msg = gcon_py + " is not installed."
        raise RuntimeError(msg)
    return gcon_py
Exemple #52
0
 def _releaseLock(self, dbLock):
     """Release dbLock."""
     _o, errCode, _m = backticks("rm -f {dbLock}".format(dbLock=dbLock))
     if errCode == 0:
         logging.debug(self.name + ": Release the lock for DB creation.")
     else:
         raise RuntimeError(self.name + ": Failed to release lock " +
                            dbLock + ". Please delete the lock manually.")
Exemple #53
0
def _pbmergeXML(indset, outbam):
    cmd = "pbmerge -o {o} {i} ".format(i=indset, o=outbam)
    log.info(cmd)
    o, r, m = backticks(cmd)
    if r != 0:
        raise RuntimeError("Pbmerge command failed: {c}\n Message: "
                           "{m}".format(c=cmd, m=m))
    return outbam
    def test_copyTo_cli(self):
        # To a fname:
        # absolute:
        fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name
        cmd = "dataset copyto {i} {o}".format(i=data.getXml(8), o=fn)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
        self.assertTrue(os.path.exists(fn))
        sset = AlignmentSet(fn, strict=True)
        self.assertFalse(_is_relative(fn))

        # relative:
        fn = tempfile.NamedTemporaryFile(suffix=".alignmentset.xml").name
        cmd = "dataset copyto --relative {i} {o}".format(i=data.getXml(8),
                                                         o=fn)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
        self.assertTrue(os.path.exists(fn))
        sset = AlignmentSet(fn, strict=True)
        self.assertTrue(_is_relative(fn))

        # to a directory:
        # absolute:
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        fn = os.path.join(outdir, os.path.split(data.getXml(8))[1])
        cmd = "dataset copyto {i} {o}".format(i=data.getXml(8), o=outdir)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
        self.assertTrue(os.path.exists(fn))
        sset = AlignmentSet(fn, strict=True)
        self.assertFalse(_is_relative(fn))

        # relative:
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")
        fn = os.path.join(outdir, os.path.split(data.getXml(8))[1])
        cmd = "dataset copyto --relative {i} {o}".format(i=data.getXml(8),
                                                         o=outdir)
        log.debug(cmd)
        o, r, m = backticks(cmd)
        self.assertEqual(r, 0)
        self.assertTrue(os.path.exists(fn))
        sset = AlignmentSet(fn, strict=True)
        self.assertTrue(_is_relative(fn))
Exemple #55
0
 def numReads(self):
     """Return the number of reads in reads_fn."""
     cmd = "grep -c '>' {r}".format(r=real_upath(self.reads_fn))
     output, errCode, errMsg = backticks(cmd)
     if errCode != 0:
         raise ClassifierException("Error reading file {r}:{e}".format(
             r=self.reads_fn, e=str(errMsg)))
     return int(output[0])
Exemple #56
0
def wait_for_sge_jobs_worker(cmd):
    _out, _code, _msg = backticks(cmd)
    if _code != 0:
        errMsg = "Failed to qsub CMD: {cmd}, {msg}.".\
            format(cmd=cmd, msg=_msg)
        raise RuntimeError(errMsg)
    # Your job 596028 ("a.sh") has been submitted
    return str(_out).split()[2]
Exemple #57
0
def sanity_check_gcon():
    """Sanity check gcon."""
    cmd = gcon_py + " --help"
    _out, _code, _msg = backticks(cmd)
    if _code != 0:
        msg = gcon_py + " is not installed."
        raise RuntimeError(msg)
    return gcon_py
Exemple #58
0
def _sortBam(fname):
    tmpname = _infixFname(fname, "_sorted")
    cmd = "bamtools sort -in {i} -out {o}".format(i=fname, o=tmpname)
    log.info(cmd)
    o, r, m = backticks(cmd)
    if r != 0:
        raise RuntimeError(m)
    shutil.move(tmpname, fname)
Exemple #59
0
def isExist(ff):
    """Return whether a file or a dir ff exists or not.
    Call ls instead of python os.path.exists to eliminate NFS errors.
    """
    if ff is None:
        return False
    cmd = "ls %s" % real_upath(ff)
    _output, errCode, _errMsg = backticks(cmd)
    return (errCode == 0)
Exemple #60
0
def _mergeBams(inFiles, outFile):
    if len(inFiles) > 1:
        cmd = "bamtools merge -in {i} -out {o}".format(i=' -in '.join(inFiles),
                                                       o=outFile)
        log.info(cmd)
        o, r, m = backticks(cmd)
        if r != 0:
            raise RuntimeError(m)
    else:
        shutil.copy(inFiles[0], outFile)