Esempio n. 1
0
    def test_run_raises_AttributeError_if_pbs_file_not_set(self):

        bp = BasePipe()
        with self.assertRaisesRegex(AttributeError, r'file.*not set'):
            bp.run()
Esempio n. 2
0
 def test_run_raises_OSError_if_pbs_file_not_found(self):
     bp = BasePipe()
     bp.pbs_file = 'script.pbs'
     with patch.object(os.path, 'isfile', return_value=False):
         with self.assertRaises(OSError):
             bp.run()
Esempio n. 3
0
def pipe(file_list, genome, project_dir, force=False):

    timestamp = time.strftime("%y%m%d-%H%M%S")

    for f in file_list:
        name = f[0]
        files = f[1:]
        out_dir = os.path.join(project_dir, name)
        path.makedirs(out_dir)

        # # 1st fast qc
        # fastqc_1 = FastqcCmd(*files, o=out_dir)
        #
        # # trimming
        # out_prefix = os.path.join(out_dir, name)
        # trim = SkewerCmd(*files, o=out_prefix)
        # trimmed_fastq = trim.output()
        #
        # # 2nd fastqc
        # fastqc_2 = FastqcCmd(*trimmed_fastq, o=out_dir)
        #
        # # setup alignment
        # # NOTE: need to check for encoding
        # align_kwargs = {
        #     '-x': genome,
        #     '-S': '{}_{}.sam'.format(
        #         out_prefix,
        #         os.path.basename(genome),
        #     ),
        #     '-p': 3,  # set for local (should use pbs paramters on qsub)
        # }
        # if len(trimmed_fastq) == 1:
        #     align_kwargs['U'] = trimmed_fastq[0]
        # else:
        #     align_kwargs['1'], align_kwargs['2'] = trimmed_fastq
        # align = HisatCmd(timestamp=timestamp, **align_kwargs)
        # # human_kw = [m for m in ['human', 'sapien', 'G37RCh'] if m in genome]
        # # if human_kw:
        # #     align = HisatCmd(timestamp=timestamp, **align_kwargs)
        # # else:
        # #     align = Bowtie2Cmd(timestamp=timestamp, **align_kwargs)
        #
        # # samtools
        # sam_sort = SamtoolsSortCmd(*(align.output()))
        # sam_index = SamtoolsIndexCmd(*(sam_sort.output()))

        # UPDATED
        fastqc_1 = FastqcCmd(*files, o=out_dir)

        # trimming
        out_prefix = os.path.join(out_dir, name)
        trim = SkewerCmd(*files, o=out_prefix)

        # 2nd fastqc
        fastqc_2 = FastqcCmd(o=out_dir)

        # setup alignment
        # NOTE: need to check for encoding
        align_kwargs = {
            '-x': genome,
            '-S': '{}_{}.sam'.format(
                out_prefix,
                os.path.basename(genome),
            ),
            '-p': 3,  # set for local (should use pbs paramters on qsub)
        }
        align = HisatCmd(timestamp=timestamp, **align_kwargs)

        # samtools
        sam_sort = SamtoolsSortCmd()
        sam_index = SamtoolsIndexCmd()

        # count
        kwargs = {'-bed': genome}
        bedtools_multicov = BedtoolsMulticovCmd(**kwargs)

        # Setup pipe
        # NOTE: This is the alpha test of the pipe class.
        job_name = name + '_' + os.path.basename(genome)
        pipe = BasePipe(job_name=job_name, force=force)
        pipe.add(
            fastqc_1, trim, fastqc_2, align, sam_sort, sam_index,
            bedtools_multicov,
        )

        # write pbs file & run
        # pbs_file = '{}  , timestamp, os.path.basename(genome))
        pipe.write_script()
        pipe.run()