Example #1
0
    def test_cmscan_task_multithreaded(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('rnaseP-bsu.fa')
            cm = datadir('rnaseP-eubact.c.cm')
            out_single = str(tmpdir.join('single'))
            out_multi = str(tmpdir.join('multi'))

            for n_threads in (2, 3, 4, 5):

                db_task = CMPressTask().task(cm, params=self.cmpress_cfg)
                aln_tasks_single = CMScanTask().task(transcript,
                                                     out_single,
                                                     cm,
                                                     cutoff=1.0,
                                                     n_threads=1)
                aln_tasks_multi = CMScanTask().task(transcript,
                                                    out_multi,
                                                    cm,
                                                    cutoff=1.0,
                                                    n_threads=n_threads)
                run_tasks([db_task, aln_tasks_single], ['run'])
                run_task(aln_tasks_multi)

                alns_single = pd.concat(InfernalParser(out_single))
                alns_multi = pd.concat(InfernalParser(out_multi))

                assert all(alns_single['e_value'].sort_values() == \
                           alns_multi['e_value'].sort_values())
Example #2
0
    def test_hmmscan_task_multithreaded(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            prot = datadir('20aa-alitest.fa')
            hmm = datadir('20aa.hmm')
            out_single = str(tmpdir.join('out-single'))
            out_multi = str(tmpdir.join('out-multi'))

            for n_threads in (2, 3, 4, 5):
                db_task = HMMPressTask().task(hmm, params=self.hmmpress_cfg)
                aln_task_single = HMMScanTask().task(prot,
                                                     out_single,
                                                     hmm,
                                                     cutoff=1.0,
                                                     n_threads=1)
                aln_task_multi = HMMScanTask().task(prot,
                                                    out_multi,
                                                    hmm,
                                                    cutoff=1.0,
                                                    n_threads=n_threads)
                run_tasks([db_task, aln_task_single], ['run'])
                run_tasks([aln_task_multi], ['run'])
                print(os.listdir(), file=sys.stderr)

                print(open(out_single).read())
                alns_single = pd.concat(HMMerParser(out_single))
                alns_multi = pd.concat(HMMerParser(out_multi))

                assert all(alns_single['domain_i_evalue'].sort_values().reset_index(drop=True) == \
                           alns_multi['domain_i_evalue'].sort_values().reset_index(drop=True))
Example #3
0
    def test_hmmpress_task_existing(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            tf = datadir('test-profile.hmm')
            for ext in self.extensions:
                touch(tf + ext)
            task = HMMPressTask().task(tf)
            run_tasks([task], ['run'])
            print(os.listdir(), file=sys.stderr)
            print(task, file=sys.stderr)
            status = check_status(task)

            assert status.status == 'up-to-date'
Example #4
0
    def test_cmpress_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            tf = datadir('test-covariance-model.cm')
            task = CMPressTask().task(tf, params=self.cmpress_cfg)
            run_tasks([task], ['run'])
            status = check_status(task)
            print(os.listdir(), file=sys.stderr)

            for ext in self.extensions:
                assert os.path.isfile(tf + ext)

            assert status.status == 'up-to-date'
Example #5
0
def get_prices(data):
    marketId_entries, urls = data
    dbMgr.persist_marketId(marketId_entries)
    concurrent_tasks = 10
    tasks = []

    global counter
    counter += 1
    if counter % interval_historcial == 0:
        counter = 0

    def persist_intraday(data):
        d = defer.maybeDeferred(dbMgr.persist_intraday_data, data)
        return d

    def persist_historical(data):
        d = defer.maybeDeferred(dbMgr.persist_historical_data, data)
        return d

    for intraday_url, historical_url in urls:
        tasks.append((download_price_data, intraday_url, persist_intraday))
        if counter == 0:
            tasks.append((download_price_data, historical_url, persist_historical))

    defer_list = run_tasks(tasks, concurrent_tasks)
    return defer_list
Example #6
0
    def test_hmmscan_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            prot = datadir('test-protein.fa')
            hmm = datadir('test-profile.hmm')
            out = str(tmpdir.join('test.out'))
                        
            db_task = HMMPressTask().task(hmm, params=self.hmmpress_cfg)
            aln_task = HMMScanTask().task(prot, out, hmm, 
                                          cutoff=1.0, n_threads=1)

            run_tasks([db_task, aln_task], ['run'])
            print(os.listdir(), file=sys.stderr)
            aln = open(out).read()
            print(aln)

            assert aln.count('accession') == 2
            assert 'i-Evalue' in aln
Example #7
0
    def test_longorfs_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('test-transcript.fa')
            exp_orf = datadir('test-transcript-orf.pep')

            task = TransDecoderLongOrfsTask().task(transcript,
                                                   params=self.longorfs_cfg)
            run_tasks([task], ['run'])
            output_dir = transcript + '.transdecoder_dir'

            exp_pep = open(exp_orf).read()

            pep_fn = os.path.join(output_dir, 'longest_orfs.pep')
            assert os.path.isfile(pep_fn)
            pep = open(pep_fn).read()

            assert exp_pep in pep
Example #8
0
    def test_cmscan_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('test-transcript.fa')
            cm = datadir('test-covariance-model.cm')
            out = str(tmpdir.join('test.out'))

            db_task = CMPressTask().task(cm, params=self.cmpress_cfg)
            aln_task = CMScanTask().task(transcript,
                                         out,
                                         cm,
                                         cutoff=1.0,
                                         n_threads=1)
            run_tasks([db_task, aln_task], ['run'])
            print(os.listdir(), file=sys.stderr)
            aln = ''.join(open(out).readlines())
            print(aln, file=sys.stderr)

            # TODO: better correctness check
            assert aln.count('accession') == 2
            assert 'E-value' in aln
Example #9
0
    def test_predict_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('pom.50.fa')
            pfam = datadir('test-protein-x-pfam-a.tbl')

            orf_task = TransDecoderLongOrfsTask().task(
                transcript, params=self.longorfs_cfg)
            pred_task = TransDecoderPredictTask().task(transcript,
                                                       pfam,
                                                       params=self.predict_cfg)
            run_tasks([orf_task, pred_task], ['run'])

            pprint(tmpdir.listdir())
            for ext in self.extensions:
                fn = transcript + '.transdecoder' + ext
                assert os.path.isfile(fn)
                contents = open(fn).read()
                if ext == '.gff3':
                    assert 'mRNA' in contents
                    assert 'gene' in contents
                    assert 'CDS' in contents
                    assert 'three_prime_UTR' in contents
                    assert 'exon' in contents