Example #1
0
def run_test(
    basic: Basic,
    code: str,
    name: str,
    res_type: type_res = None,
    err_type: type_err = None,
    value: type_value = None,
    repl: bool = True,
) -> None:
    if not isinstance(res_type, list):
        res_type = [res_type] if res_type else []
    if not isinstance(value, list):
        value = [value] if value is not None else []
    try:
        results = basic.run(code, name, repl)
        assert len(results) == len(res_type)
        for i, res in enumerate(results):
            assert res_type and isinstance(res, res_type[i])
            if value and value[i] is not Ellipsis:
                if res.value != value[i]:
                    print(res.value, value[i])
                assert res.value == value[i]
    except Error as err:
        assert err_type and isinstance(err, err_type)
    else:
        assert err_type is None
Example #2
0
def main(debug: bool = False) -> None:
    basic = Basic()
    while True:
        text = input("basic > ")
        try:
            results = basic.run(text, "<stdin>", True, debug)
            for result in results:
                print(result)
        except Error as err:
            print_error(err)
Example #3
0
def main(file_name: str) -> None:
    with open(file_name, "r") as f:
        code = f.read()
    basic = Basic()
    try:
        results = basic.run(code, file_name)
        for result in results:
            print(result)
    except Error as err:
        print_error(err)
Example #4
0
    def submit(self, number=None, mem="15G"):
        """
        number: how many jobs to submit:
        "all", "last_one"
        """
        handle = open(self.sample_list_file, "r")
        samples = handle.readlines()
        if number == "last_one":
            last_one = samples[-1].split("\t")[0]
            last_one_pbs = os.path.join(self.pbs_dir, last_one + ".pbs")
            cmd = "qsub -l nodes=1:ppn=%s -l mem=%s %s" % (self.threads, mem,
                                                           last_one_pbs)
            p = Basic.run(cmd, wkdir=self.pbs_dir)
            print(p.stdout.read())

        if number == "all":
            for one_sample in samples:
                one = one_sample[-1].split("\t")[0]
                one_pbs = os.path.join(self.pbs_dir, one + ".pbs")
                cmd = "qsub -l nodes=1:ppn=%s -l mem=%s %s" % (self.threads,
                                                               mem, one_pbs)
                p = Basic.run(cmd, wkdir=self.pbs_dir)
                print(p.stdout.read())
        print("finish submit pbs!")
    def process(self, sampleID=None, fastq_1=None, fastq_2=None):
        oneSample_dir = os.path.join(self.bam_dir, sampleID)
        if not os.path.exists(oneSample_dir):
            os.makedirs(oneSample_dir)
        log_file = os.path.join(oneSample_dir, sampleID + ".log.txt")
        #step1. bwa mapping to whole genome
        sort_bam = os.path.join(oneSample_dir, sampleID + ".sort.bam")
        cmd = """%s mem -R '@RG\\tID:GRCH37\\tSM:%s\\tLB:\\tPL:ILLUMINA' -t %s %s %s %s|%s view -@ %s -Shu -|%s sort -@ %s -o %s - > %s;\n""" % \
                (self.bwa, sampleID, self.threads, self.whole_genome, fastq_1, fastq_2, self.samtools, self.threads,self.samtools, self.threads,\
                 sort_bam, log_file)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        ##step1.1 bwa index for bam file
        cmd = """%s index %s;\n""" % (self.samtools, sort_bam)
        Basic.run(cmd=cmd, wkdir=self.prefix)

        #step2. filter out read mapped in bam file
        MT_bam = os.path.join(oneSample_dir, sampleID + ".MT.bam")
        cmd = "%s view -bh %s chrM -o %s;\n" % (self.samtools, sort_bam,
                                                MT_bam)
        Basic.run(cmd=cmd, wkdir=self.prefix)

        #step3. obtain unique mapped reads
        MT_bam_unique = os.path.join(oneSample_dir,
                                     sampleID + ".unique.MT.bam")
        MT_bam_noneUnique = os.path.join(oneSample_dir,
                                         sampleID + ".noneUnique.MT.bam")
        cmd = "%s view -h %s | grep -v -e 'XA:Z:' -e 'SA:Z:' | samtools view -b > %s;" % \
                (self.samtools, MT_bam, MT_bam_unique)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        cmd = "%s view -h %s | grep -e 'XA:Z:' -e 'SA:Z:' | samtools view -b > %s;" % \
                (self.samtools, MT_bam, MT_bam_noneUnique)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        #step4. mpileup
        p_sort_bam = os.path.join(oneSample_dir, sampleID + ".p.sort.bam")
        cmd = "%s sort -@ 7 -o %s %s;\n" % (self.samtools, p_sort_bam,
                                            MT_bam_unique)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        mpile_file = os.path.join(self.mpile_dir, sampleID + ".mpile.file")
        cmd = "%s mpileup -B -Q 30 -d 1000000 -L 10000 -f %s %s > %s;" % (
            self.samtools, self.ref_genome, p_sort_bam, mpile_file)
        Basic.run(cmd=cmd, wkdir=self.prefix)

        #step5. call heteroplasmy
        # heteroplasmy
        heteroplasmy_raw = os.path.join(oneSample_dir, sampleID + ".mp.raw")
        cmd = '%s -i %s -o %s;\n' % (self.het_raw, mpile_file,
                                     heteroplasmy_raw)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        heteropasmy = os.path.join(self.mpile_dir,
                                   sampleID + ".heteroplasmy.txt")
        cmd = 'python2 %s --loose %s --chi %s -d %s --mle %s -i %s -o %s' \
                             %(self.het_filter, "0.003,0.003", "0.0", "1000", "0", heteroplasmy_raw, heteropasmy)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        print("finish detetcting!")
    def new_process(self, sampleID=None, fastq_1=None, fastq_2=None):
        oneSample_dir = os.path.join(self.bam_dir, sampleID)
        if not os.path.exists(oneSample_dir):
            os.makedirs(oneSample_dir)
        log_file = os.path.join(oneSample_dir, sampleID + ".log.txt")
        #step1. bwa mapping to whole genome
        sort_bam = os.path.join(oneSample_dir, sampleID + ".sort.bam")
        cmd = """%s mem -R '@RG\\tID:GRCH37\\tSM:%s\\tLB:mitochondira\\tPL:ILLUMINA' -t %s %s %s %s|%s view -@ %s -Shu -|%s sort -@ %s -o %s - > %s;\n""" % \
                (self.bwa, sampleID, self.threads, self.whole_genome, fastq_1, fastq_2, self.samtools, self.threads,self.samtools, self.threads,\
                 sort_bam, log_file)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        cmd = """%s index %s;\n""" % (self.samtools, sort_bam)
        Basic.run(cmd=cmd, wkdir=self.prefix)

        #step2. filter out read mapped in bam file
        bamio = bamIO(
            bam_file=
            "/home/zyang/Project/mitochondria/pnas_data/luo_pipeline/work_test/bam/ERR452358/ERR452358.sort.bam"
        )
        MTbam = bamio.run()
        cmd = """%s index %s;\n""" % (self.samtools, MTbam)
        Basic.run(cmd=cmd, wkdir=self.prefix)

        #step3. mark duplicate
        marked_duplicated_bam = os.path.join(oneSample_dir,
                                             sampleID + ".mkdup.bam")
        mkmatrix = os.path.join(oneSample_dir, sampleID + ".mkdup.matrix.txt")
        cmd = self.markduplicate(input_bam=MTbam,
                                 marked_duplicated_bam=marked_duplicated_bam,
                                 marked_dup_metrics_txt=mkmatrix)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        bamio.sample_meta_infor["befor_mkduped_read"] = bamio.mapped_read(
            input_bam=MTbam)
        cmd = """%s index %s;\n""" % (self.samtools, marked_duplicated_bam)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        bamio.sample_meta_infor["after_mkduped_read"] = bamio.mapped_read(
            input_bam=marked_duplicated_bam)

        #step4. local realignment
        localRealign_bam = os.path.join(oneSample_dir,
                                        sampleID + ".localRealign.bam")
        cmd = self.localRealignment(input_bam=marked_duplicated_bam,
                                    output_bam=localRealign_bam)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        cmd = """%s index %s;\n""" % (self.samtools, localRealign_bam)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        bamio.sample_meta_infor["after_local_realign"] = bamio.mapped_read(
            input_bam=localRealign_bam)

        #step5. mismatch filter
        mismatch_bam = os.path.join(oneSample_dir,
                                    sampleID + ".mismatchFilter.bam")
        bamio.limit_mismatch(input_bam=localRealign_bam,
                             output_bam=mismatch_bam,
                             mismatch_num=1)
        cmd = """%s index %s;\n""" % (self.samtools, localRealign_bam)
        Basic.run(cmd=cmd, wkdir=self.prefix)

        #step5. mpileup
        p_sort_bam = os.path.join(oneSample_dir, sampleID + ".p.sort.bam")
        MT_bam_unique = os.path.join(oneSample_dir,
                                     sampleID + ".unique.MT.bam")
        cmd = "%s view -h %s | grep -v -e 'XA:Z:' -e 'SA:Z:' | samtools view -b > %s;" % \
                (self.samtools, mismatch_bam, MT_bam_unique)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        cmd = "%s sort -@ 7 -o %s %s;\n" % (self.samtools, p_sort_bam,
                                            MT_bam_unique)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        mpile_file = os.path.join(self.mpile_dir, sampleID + ".mpile.file")
        cmd = "%s mpileup -B -Q 20 -q 20 -d 1000000 -L 10000 -f %s %s > %s;" % (
            self.samtools, self.ref_genome, MT_bam_unique, mpile_file)
        Basic.run(cmd=cmd, wkdir=self.prefix)

        #step5. call heteroplasmy
        # heteroplasmy
        heteroplasmy_raw = os.path.join(oneSample_dir, sampleID + ".mp.raw")
        cmd = '%s -i %s -o %s;\n' % (self.het_raw, mpile_file,
                                     heteroplasmy_raw)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        heteropasmy = os.path.join(self.mpile_dir,
                                   sampleID + ".heteroplasmy.txt")
        cmd = 'python2 %s --loose %s --chi %s -d %s --mle %s -i %s -o %s' \
                             %(self.het_filter, "0.003,0.003", "0.0", "1000", "0", heteroplasmy_raw, heteropasmy)
        Basic.run(cmd=cmd, wkdir=self.prefix)
        print("finish heteroplasmy detecting!")
        print(bamio.sample_meta_infor)