コード例 #1
0
 def __init__(self):
     self._message = Message()
     self._file_conf = None
     self._name_par = "NAME"
     self._replic_parm = "REPLIC"
     self._group_number_parm = "GROUP_NUMBER"
     self._group_name_parm = "GROUP_NAMES"
     self._reference_parm = "REFERENCE_GENOME"
     self._read_directory_parm = "READS_DIRECTORY"
     self._group_directory_parm = "GROUP_DIRECTORIES"
     self._paired_end_parm = "PAIRED_END"
     self._threads_parm = "THREADS"
     self._count_mode_parm = "MODE"
     self._annotation_file_parm = "ANOTATION_FILE"
     self._annotation_type_parm = "ANOTATION_TYPE"
     self._output_parm = "OUTPUT"
     self._name = ""
     self._replic = []
     self._group_number = 0
     self._group_name = []
     self._reference = ""
     self._read_directory = ""
     self._group_directory = []
     self._paired_end = False
     self._threads = 0
     self._count_mode = ""
     self._annotation_file = ""
     self._annotation_type = ""
     self._output = ""
コード例 #2
0
ファイル: mappVo.py プロジェクト: santii/consexpression
 def __init__(self, name, index, read1_n, read2_n, threads, out, other, single_end):
     """
     Meka a first construction of object with param by user
     :param name:
     :param index:
     :param reads_dir:
     :param threads:
     :param out:
     :param other:
     :param single_end:
     """
     self._index_parm = ""
     self._reads1_parm = ""
     self._reads2_parm = ""
     self._threads_parm = ""
     self._output_parm = ""
     self._command_parm = ""
     self._sep = " "
     self._name = name
     #nIndex = index[(1 + index.rfind('/')):] # (index.rfind('.'))
     self._index_name = index # nIndex
     self._reads1_name = read1_n
     self._reads2_name = read2_n
     self._threads_value = threads
     self._output_name = out
     self._output_type = "--no-convert-bam "
     self._other_conf = other
     self._paired_end = single_end
     self._message = Message()
     self.parm_mapp()
コード例 #3
0
ファイル: countBo.py プロジェクト: santii/consexpression
 def __init__(self, count):
     """
     Test the doc of constructor class
     :param count:
     """
     assert isinstance(count, CountVo)
     self._counter = count
     self.message = Message()
コード例 #4
0
ファイル: ebseq.py プロジェクト: costasilvati/consexpression
class Ebseq(object):
    def __init__(self, count, group, repl, out):
        """
        Inite object Ebseq
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._message = Message()
        self._exp_column = 1
        self._exp = "DE"

    def run_de(self, gene):
        de = 0
        if gene[self._exp_column] == self._exp:
            de = 1
        return de

    def run_ebseq(self):
        """
        Execute default analysis with EBSeq
        :return:
        """
        try:
            robjects.r('library("' + 'EBSeq' + '")')
            ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE)'
            res = robjects.r(ct)
            res = robjects.r('m <- as.matrix(table)')
            grup = ""
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                grup = aux + grup
            grup = grup[:(len(grup) - 2)]
            # siz = 'data(m)'
            # robjects.r(siz)
            siz = 'Sizes=MedianNorm(m)'
            robjects.r(siz)
            ct = 'EBOut=EBTest(Data=m, ' \
                 'Conditions=as.factor(rep(' \
                'c(' + grup + '),each=' + str(self._replic) + ')), sizeFactors=Sizes, maxround=5)'
            robjects.r(ct)
            ct = 'EBDERes=GetDEResults(EBOut, FDR=0.05)'
            robjects.r(ct)
            wr = 'write.table(EBDERes$Status, file="' + self._output + '", sep = "\t", quote = FALSE)'
            robjects.r(wr)
            self._message.message_9("--- EBSeq: is completed!")
        except RRuntimeError as rre:
            self._message.message_9("Error in baySeq execution: " + str(rre))
            raise rre
コード例 #5
0
ファイル: ebseq.py プロジェクト: costasilvati/consexpression
 def __init__(self, count, group, repl, out):
     """
     Inite object Ebseq
     :param count:
     :param group:
     :param repl:
     :param out:
     """
     self._table_count = count
     self._groups_name = group
     self._replic = repl
     self._output = out
     self._message = Message()
     self._exp_column = 1
     self._exp = "DE"
コード例 #6
0
 def __init__(self, count, group, repl, out):
     """
     Define the NOISeq object
     :param count:
     :param group:
     :param repl:
     :param out:
     """
     self._table_count = count
     self._groups_name = group
     self._replic = repl
     self._output = out
     self._message = Message()
     self._likelihood_column = len(group) + 3
     self._likelihood = 0.95
コード例 #7
0
 def __init__(self, count, group, repl, out):
     """
     Inite object Ebseq
     :param count:
     :param group:
     :param repl:
     :param out:
     """
     self._table_count = count
     self._groups_name = group
     self._replic = repl
     self._output = out
     self._message = Message()
     self._logfc_column = 2
     self._pvalue_column = 5
     self._logfc = 2
     self._pvalue = 0.05
コード例 #8
0
 def __init__(self, count, group, repl, out):
     """
     Define the edgeR object
     :param count:
     :param group:
     :param repl:
     :param out:
     """
     self._table_count = count
     self._groups_name = group
     self._replic = repl
     self._output = out
     self._message = Message()
     self._logfc_column = 6
     self._pvalue_column = 7
     self._pvalue = 0.05
     self._logfc = 2
コード例 #9
0
 def __init__(self, count, group, repl, out):
     """
     Define the edgeR object
     :param count:
     :param group:
     :param repl:
     :param out:
     """
     self._table_count = count
     self._groups_name = group
     self._replic = repl
     self._output = out
     self._message = Message()
     self._likelihood_column = 2 + len(group) * repl
     self._fdr_de_column = 4 + len(group) * repl
     self._likelihood = 0.95
     self._fdr = 0.1
コード例 #10
0
 def __init__(self, count, group, repl, out):
     """
     Inite object Ebseq
     :param count:
     :param group:
     :param repl:
     :param out:
     """
     robjects.r['options'](warn=-1)
     self._table_count = count
     self._groups_name = group
     self._replic = repl
     self._output = out
     self._class = '"Two class unpaired"'
     self._message = Message()
     self._fd_column = 4
     self._qvalue_column = 5
     self._qvalue = 1
     self._fd = 2
コード例 #11
0
    def __init__(self):

        self._exp_dao = None
        self._reference = None
        self._transcript = False
        self._count = None
        self._expression = None
        self._mapp_bo = None
        self.message = Message()
        self._fastq = []
        self._out_mapp = []
        self._count_table = []
        self._merged_table_out = None
        self._edger = None
        self._bayseq = None
        self._deseq = None
        self._noiseq = None
        self._ebseq = None
        self._samseq = None
        self._limmavoom = None
コード例 #12
0
class Experiment(object):
    """
        Business object of Experiment
    """
    _count: CountBo

    def __init__(self):

        self._exp_dao = None
        self._reference = None
        self._transcript = False
        self._count = None
        self._expression = None
        self._mapp_bo = None
        self.message = Message()
        self._fastq = []
        self._out_mapp = []
        self._count_table = []
        self._merged_table_out = None
        self._edger = None
        self._bayseq = None
        self._deseq = None
        self._noiseq = None
        self._ebseq = None
        self._samseq = None
        self._limmavoom = None

    def init_experiment(self, exp, file):
        """
        Iniatialize experiment
        :param exp:
        :param file: config file
        :return:
        """
        assert isinstance(exp, ExperimentDao)
        self._exp_dao = exp
        self._exp_dao.read_configuration_file(file)
        self._exp_dao._name = self.name_valid(self._exp_dao._name)
        self.rep_valid(self._exp_dao._replic)
        self.group_number_valid(self._exp_dao._group_number)
        ref = self._exp_dao._reference

        if self._exp_dao._reference == "":
            print(
                "You don't have a refserence genome... Expression analyse need a table count with mapping reads"
            )
            self._merged_table_out = input("Type absolute path to table count")
        elif ref != "" and (self.extension_valid(ref, "fa")
                            or self.extension_valid(ref, "fasta")):
            self._reference = self._exp_dao._reference  # == ref
        else:
            self.message.message_3("REFERENCE FILE ")
            exit(0)

        self.directory_valid(self._exp_dao._read_directory, "reads")

        for i in iter(self._exp_dao._group_directory):
            reads = self._exp_dao._read_directory + "/" + str(i)
            self.directory_valid(reads, "group")
            # Get fastq reads
            path_find = self._exp_dao._read_directory + "/" + i + "/"
            self._fastq.append(self.get_reads_file(path_find))

        if self._exp_dao._paired_end == True:
            self.message.message_8(
                "The sequence is paired-end. CONSEXPRESSION dont make paired-end analysis"
            )
            exit(0)
        else:
            self.message.message_8("The sequence is single-end")

    def name_valid(self, name):
        """
        Verify the name: if is empty change to default name
        :param name: name of experiment
        :return: boolean
        """
        if len(name) == 0:
            name = "consexpression"
            self.message.message_7(
                "Experiment name is empty! The name was changed to consexpression"
            )
        return name

    def rep_valid(self, rep):
        """
        Verify if number of replicates technical and biological is valid (>= 1).
        basestring :param rep_t:
        basestring :param rep_b:
        void :return:
        """
        ok = False
        if rep >= 1:
            self.message.message_1("replics")
        else:
            self.message.message_2(
                "1 replic or more (technique or biological)")
            self.message.message_3("number of replics in line 5 - 6")
            exit()

    def extension_valid(self, path, extension):
        """
        Verify if the extension file is the expected
        :param path:
        :param extension:
        :return: boolean
        """
        var_ret = False
        if str.endswith(path, extension):
            var_ret = True
        else:
            var_ret = False

        return var_ret

    def directory_valid(self, path, type):
        """
        Verify if path is a directory
        :param path: path of file
        :param type: file is reference genome, reads?
        :return: void
        """
        ok = os.path.isdir(path)
        if ok:
            self.message.message_1("directory " + type + ": " + path)
        else:
            self.message.message_2("a valid directory " + type + " path")
            self.message.message_3(" the directory " + type + " path (line 9)")
            exit()

    def group_number_valid(self, group_n):
        """
        Verify the number of groups. The minimal is one
        int :param group_n:
        void :return:
        """
        assert isinstance(group_n, int)

        if group_n >= 1:
            self.message.message_1("group number")
        else:
            self.message.message_2("1 group or more.")
            self.message.message_3("the number of gruoups in line 7")
            exit()

    def file_valid(self, path):
        """
        Verify if path is a file
        basestring :param path:
        void :return:
        """
        if os.path.isfile(path):
            self.message.message_1(" file: " + path)
        else:
            self.message.message_2(" a valid reference file")
            self.message.message_3(" the reference file path (line 7)")
            exit()

    def exceute_mapp_count(self):
        """
        Execute Tophat and htseq-count
        :return:
        """
        ref = self._reference
        thread = self._exp_dao._threads
        sing = self._exp_dao._paired_end
        n = self._exp_dao._name
        path_find = []
        for grp in iter(self._fastq):
            for grp_file in iter(grp):
                bar = 1 + grp_file.rfind('/')
                out_mapp = grp_file[:bar] + n + "/" + grp_file[bar:]
                dir = grp_file[:bar] + n
                if os.path.isdir(dir):
                    pass
                else:
                    os.mkdir(dir, 0o755)
                out_mapp = out_mapp.replace('fastq', 'sam')
                path_find.append(out_mapp)
                mapp_vo = MappVo("TopHat", ref, grp_file, "", thread, out_mapp,
                                 "", sing)
                self._mapp_bo = MappBo(mapp_vo)
                mapp_exe = self._mapp_bo.execute_mapp()
                if mapp_exe == 0:
                    dot = out_mapp.rfind('.')
                    in_type = out_mapp[dot + 1:]
                    bar = 1 + out_mapp.rfind('/')
                    table_count = out_mapp[bar:dot]
                    table_count = out_mapp[:bar] + table_count + "_table_count.txt"
                    self._count_table.append(table_count)
                    in_count = out_mapp + "/accepted_hits.sam"

                    count_vo = CountVo(in_count,
                                       self._exp_dao._annotation_file,
                                       self._exp_dao._annotation_type, in_type,
                                       self._exp_dao._count_mode, table_count)
                    self._count = CountBo(count_vo)
                    if self._count.execute_count() == 0:
                        self.message.message_8("Count Sucsessfull!!!")
                    else:
                        self.message.message_4(
                            "Error in counting mapped reads...")
                else:
                    self.message.message_4(
                        "Task: Mapping don't run correctly.")
            self._out_mapp.append(path_find)

    def get_reads_file(self, dir):
        """
        Get all fastq path of dir
        :param dir: path to folder of fastq sample
        :return: array of reads file path
        """
        fastq_file = []
        path = dir + "*.fastq"  #serach
        for file in glob.glob(path):
            fastq_file.append(file)
        if len(fastq_file) == 0:
            self.message.message_7("*Not found files FASTQ in directorie " +
                                   dir)
        return fastq_file

    def execute_expression_analysis(self):
        """
        Make analysis with counts data for mapping
        :return:
        """
        print("Expression analisys start...")
        n = "consexpression"
        out_merge_table = ""
        if self._exp_dao._reference != "":
            out_merge_table = self._exp_dao._read_directory + "/" + self._exp_dao._name + "_table_count.txt"
            self.execute_merge_table(self._count_table, out_merge_table)
        else:
            out_merge_table = self._merged_table_out
        # 1 ------------------ edgeR -----------------
        print("---- edgeR START! ------------")
        out_expression = self._exp_dao._output + "/" + self._exp_dao._name
        out_edger = out_expression + "_edger.csv"
        self._edger = EdgeR(out_merge_table, self._exp_dao._group_name,
                            self._exp_dao._replic, out_edger)
        self._edger.run_edger()
        # 2 ------------- BaySeq --------------------
        print("---- baySeq START! ------------")
        out_bayseq = out_expression + "_baySeq.csv"
        self._bayseq = BaySeq(out_merge_table, self._exp_dao._group_name,
                              self._exp_dao._replic, out_bayseq)
        self._bayseq.run_bayseq()
        # 3 ------------- DESeq --------------------
        print("---- DESeq START! ------------")
        out_deseq = out_expression + "_DESeq.csv"
        self._deseq = DESeq(out_merge_table, self._exp_dao._group_name,
                            self._exp_dao._replic, out_deseq)
        self._deseq.run_deseq()
        # 4 ------------- NOISeq --------------------
        print("---- NOISeq START! ------------")
        out_noiseq = out_expression + "_NOISeq.csv"
        self._noiseq = Noiseq(out_merge_table, self._exp_dao._group_name,
                              self._exp_dao._replic, out_noiseq)
        self._noiseq.run_noiseq()
        # 5 ------------- EBSeq --------------------
        print("---- EBSeq START! ------------")
        out_ebseq = out_expression + "_EBSeq.csv"
        self._ebseq = Ebseq(out_merge_table, self._exp_dao._group_name,
                            self._exp_dao._replic, out_ebseq)
        self._ebseq.run_ebseq()
        # 6 ------------- SAMSeq --------------------
        print("---- SAMSeq START! ------------")
        # out_samseq =  out_expression + "_SAMSeq.csv"
        # self._samseq = SamSeq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_samseq)
        # self._samseq.run_samseq()
        # 7 ------------- limma-voom --------------------
        print("---- limma START! ------------")
        out_limmavoom = out_expression + "_limmavoom.csv"
        self._limmavoom = LimmaVoom(out_merge_table, self._exp_dao._group_name,
                                    self._exp_dao._replic, out_limmavoom)
        self._limmavoom.run_limmavoom()

    def execute_conseus(self, out):
        gene_de = {}
        read_bay = open(self._bayseq._output, 'r')
        c_b = 0
        for line in iter(read_bay):
            if c_b > 0:
                gene = line.split("\t")
                v = self._bayseq.run_de(gene)
                if gene[0] in gene_de:
                    aux = gene_de[gene[0]]
                    gene_de[gene[0]] = int(aux) + int(v)
                else:
                    gene_de[gene[0]] = int(v)
            c_b += 1
        read_bay.close()

        # ---- edger
        read_edger = open(self._edger._output, 'r')
        c_b = 0
        for line in iter(read_edger):
            if c_b > 0:
                gene = line.split("\t")
                v = self._edger.run_de(gene)
                if gene[0] in gene_de:
                    aux = gene_de[gene[0]]
                    gene_de[gene[0]] = int(aux) + int(v)
                else:
                    gene_de[gene[0]] = int(v)
            c_b += 1
        read_edger.close()

        #--- deseq
        read_deseq = open(self._deseq._output, 'r')
        c_b = 0
        for line in iter(read_deseq):
            if c_b > 0:
                gene = line.split("\t")
                v = self._deseq.run_de(gene)
                if gene[1] in gene_de:
                    aux = gene_de[gene[1]]
                    gene_de[gene[1]] = int(aux) + int(v)
                else:
                    gene_de[gene[1]] = int(v)
            c_b += 1
        read_deseq.close()

        # --- noiseq
        read_noiseq = open(self._noiseq._output, 'r')
        c_b = 0
        for line in iter(read_noiseq):
            if c_b > 0:
                gene = line.split(",")
                v = self._noiseq.run_de(gene)
                if gene[0] in gene_de:
                    aux = gene_de[gene[0]]
                    gene_de[gene[0]] = int(aux) + int(v)
                else:
                    gene_de[gene[0]] = int(v)
            c_b += 1
        read_noiseq.close()

        # --- samseq
        if self._samseq is None:
            print("SAMSeq results not found")
        else:
            read_samseq = open(self._samseq._output, 'r')
            c_b = 0
            for line in iter(read_samseq):
                if c_b > 0:
                    gene = line.split("\t")
                    v = self._samseq.run_de(gene)
                    if gene[1] in gene_de:
                        aux = gene_de[gene[1]]
                        gene_de[gene[1]] = int(aux) + int(v)
                    else:
                        gene_de[gene[1]] = int(v)
                c_b += 1
            read_samseq.close()

        # --- limma
        if self._exp_dao._replic >= 2:
            read_limma = open(self._limmavoom._output, 'r')
            c_b = 0
            for line in iter(read_limma):
                if c_b > 0:
                    gene = line.split("\t")
                    v = self._limmavoom.run_de(gene)
                    if gene[0] in gene_de:
                        aux = gene_de[gene[0]]
                        gene_de[gene[0]] = int(aux) + int(v)
                    else:
                        gene_de[gene[0]] = int(v)
                c_b += 1
            read_limma.close()
        else:
            print("limma require more than one replics")

        # --- ebseq
        read_ebseq = open(self._ebseq._output, 'r')
        c_b = 0
        for line in iter(read_ebseq):
            if c_b > 0:
                gene = line.split("\t")
                v = self._ebseq.run_de(gene)
                if gene[0] in gene_de:
                    aux = gene_de[gene[0]]
                    gene_de[gene[0]] = int(aux) + int(v)
                else:
                    gene_de[gene[0]] = int(v)
            c_b += 1
        read_ebseq.close()

        #--- write results
        header = 'gene, indications'
        out_cons = open(out, 'w')
        out_cons.write(header)
        names = gene_de.keys()
        print(len(names))
        for i in iter(names):
            if (gene_de[i]) >= 4:
                out_cons.write("\n" + i + "," + str(gene_de[i]))

    def execute_merge_table(self, out_mapp_list, out_name):
        """
        Make a merge table whit counts
        :param out_mapp_list:
        :param out_name:
        :return:
        """
        self._count.merge_table_count(out_mapp_list, out_name,
                                      self._exp_dao._group_name)
コード例 #13
0
class DESeq (object):
    """
    Run DESeq analysis
    """
    def __init__(self, count, group, repl, out):
        """
        Define the edgeR object
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._message = Message()
        self._logfc_column = 6
        self._pvalue_column = 7
        self._pvalue = 0.05
        self._logfc = 2

    def run_de(self, gene):
        de = 0
        try:
            lfc = float(gene[self._logfc_column])
            pv = float(gene[self._pvalue_column])
            if lfc >= self._logfc or lfc <= -self._logfc:
                if pv <= self._pvalue:
                    de = 1
        except ValueError:
            de = 0
        return de

    def run_deseq(self):
        """
        Execute default analysis with DESeq
        :return:
        """
        try:
            res = robjects.r('library("parallel")')
            res = robjects.r('library("stats4")')
            res = robjects.r('library("BiocGenerics")')
            res = robjects.r('library("Biobase")')
            res = robjects.r('library("locfit")')
            res = robjects.r('library(DESeq)')
            res = robjects.r('library("lattice")')
            ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE)'
            res = robjects.r(ct)
            res = robjects.r('m <- as.matrix(table)')
            grup = ""
            b_test = ""
            assert isinstance(self._replic, int)
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                b_test = aux + b_test
                grup = grup + aux * self._replic
            grup = grup[:(len(grup) - 2)]
            b_test = b_test[:len(b_test) - 2]
            res = robjects.r('condition = factor( c(' + grup + '))')
            res = robjects.r('cds <- newCountDataSet(m, condition)')
            res = robjects.r('cds <- estimateSizeFactors(cds)')
            command = ""
            if (self._replic == 1):
                command = 'cds <- estimateDispersions(cds, method="blind", fitType="local")' # fitType="local"
            else:
                command ='cds <- estimateDispersions(cds, fitType="local")' #fitType="local"

            res = robjects.r(command)
            cm = 'res <- nbinomTest(cds, ' + b_test + ')'
            res = robjects.r(cm)
            wr = 'write.table(res, file="' + self._output + '", sep = "\t", quote = FALSE)'
            res = robjects.r(wr)
        except RRuntimeError as rre:
            self._message.message_9("Error in DESeq execution: " + str(rre))
            raise rre

        self._message.message_9("--- DESeq: is completed!")

# =============================== TESTES DA CLASSE ==================================
# inp = '/Volumes/SD128/bioconvergencia/reads_RNApa/kallisto_quant_RNApa_apa_1B_0B.csv'
# gr = ["0b", "pb"]
# rp = 2
# out = 'RNApa_apa_1B_0B-consexpression_deseq.csv'
# t = DESeq(inp, gr, rp, out)
# t.run_deseq() # Não temos DESeq na versão necessária
コード例 #14
0
ファイル: mappBo.py プロジェクト: santii/consexpression
class MappBo(object):
    """
    This class make rules of validate information and command, to execute Mapp tools
    """
    def __init__(self, mapp):
        assert isinstance(mapp, MappVo)
        self._map_vo = mapp
        self._reads_file = []
        self.message = Message()

    def threads_conf(self, threads_vo):
        """
        Alter threads larger to default of system
        :param threads_vo: number of threads
        :return: void
        """
        threads_sys = multiprocessing.cpu_count()
        if threads_vo < threads_sys:
            self._map_vo._threads_value = threads_sys - 1
            self.message.message_9("The threads nunber defined is " +
                                   str(threads_vo) +
                                   ", but the system have only " +
                                   str(threads_sys))
            self.message.message_9("---> Number of threads was change to " +
                                   str(threads_sys - 1))
        self.message.message_9("Successful! Threads configuration is ok!")

    def execute_mapp(self):
        """
        Execute the command: 0 is ok, 1 is fail mapped task
        :return: int
        """
        self.threads_conf(self._map_vo._threads_value)
        n = self.make_bowtie2_index(self._map_vo._index_name)
        self._map_vo._index_name = n
        text = self._map_vo.to_string()
        return_code = subprocess.call(text, shell=True)
        return return_code

    def make_bowtie2_index(self, index):
        """
        Execute command to make a bowtie2 index if do not exists
        :param index: fasta file reference to mapp
        :return: name of generated index
        """
        dot = index.rfind('.f')
        name = index[:dot]
        if os.path.isfile(name + ".1.bt2"):
            return name
        else:
            command = "bowtie2-build " + index + " " + name
            if subprocess.call(command, shell=True) == 0:
                return name
            else:
                self.message.message_4("Error in index build")
                return ""


# #===== TESTES DA CLASSE =====================
# name = "Bowtie2"
# index_name = "/home/juliana/Documents/Projeto_Juliana/Datasets/Referencias/GRCh38.p5/GCA_000001405.20_GRCh38.p5_genomic.fna"
# threads_value = 3
# reads1_name = "/home/juliana/Documents/Eliandro-UEL/E1_S1_L001_R1_001_prinseq_1.fastq"
# reads2_name = "/home/juliana/Documents/Eliandro-UEL/E1_S1_L001_R2_001_prinseq_2.fastq"
# output_name = "/home/juliana/Documents/Testes_RNATool/eliandro_uel.sam"
# map = vo.MappVo.MappVo(name,index_name,reads1_name, reads2_name, threads_value,output_name,"",False)
# mapbo = MappBo(map)
# mapbo.make_bowtie2_index(index_name)
# # map.parm_mapp()
# # teste = map.to_string()
# # print teste
# # print teste
コード例 #15
0
class ExperimentDao(object):
    """
    Object manager data of experiment
    """
    def __init__(self):
        self._message = Message()
        self._file_conf = None
        self._name_par = "NAME"
        self._replic_parm = "REPLIC"
        self._group_number_parm = "GROUP_NUMBER"
        self._group_name_parm = "GROUP_NAMES"
        self._reference_parm = "REFERENCE_GENOME"
        self._read_directory_parm = "READS_DIRECTORY"
        self._group_directory_parm = "GROUP_DIRECTORIES"
        self._paired_end_parm = "PAIRED_END"
        self._threads_parm = "THREADS"
        self._count_mode_parm = "MODE"
        self._annotation_file_parm = "ANOTATION_FILE"
        self._annotation_type_parm = "ANOTATION_TYPE"
        self._output_parm = "OUTPUT"
        self._name = ""
        self._replic = []
        self._group_number = 0
        self._group_name = []
        self._reference = ""
        self._read_directory = ""
        self._group_directory = []
        self._paired_end = False
        self._threads = 0
        self._count_mode = ""
        self._annotation_file = ""
        self._annotation_type = ""
        self._output = ""

    def read_configuration_file(self, file):
        """
        Read file and feed class attributes, any error terminates execution
        :param file: path to config file
        :return: void
        """
        self._message.message_9("- Reading configuration file.. ----")
        conf = open(file, 'r')
        count_line = 0
        parms = {}

        for line in iter(conf):
            count_line += 1
            if line[0] != "#" and line[0] != "":
                l = line.rstrip("\n")
                p = l.split(": ")

                if p[0] in parms:
                    self._message.message_9("Parameter  " + p[0] +
                                            " is repeated!")
                else:
                    if len(p) < 2:
                        parms[p[0]] = ""
                    else:
                        parms[p[0]] = p[1]

        if self._name_par in parms:
            self._name = parms[self._name_par]
        if self._replic_parm in parms:
            self._replic = int(parms[self._replic_parm])
        if self._group_number_parm in parms:
            self._group_number = int(parms[self._group_number_parm])
        if self._group_name_parm in parms:
            self._group_name = parms[self._group_name_parm].split(',')
        if self._reference_parm in parms:
            self._reference = parms[self._reference_parm]
        if self._read_directory_parm in parms:
            self._read_directory = parms[self._read_directory_parm]
        if self._group_directory_parm in parms:
            self._group_directory = parms[self._group_directory_parm].split(
                ',')
        if self._paired_end_parm in parms:
            self._paired_end = parms[self._paired_end_parm]
        if self._threads_parm in parms:
            self._threads = int(parms[self._threads_parm])
        if self._count_mode_parm in parms:
            self._count_mode = parms[self._count_mode_parm]
        if self._annotation_file_parm in parms:
            self._annotation_file = parms[self._annotation_file_parm]
        if self._annotation_type_parm in parms:
            self._annotation_type = parms[self._annotation_type_parm]
        if self._output_parm in parms:
            self._output = parms[self._output_parm]


# # # #================ TESTE DA CLASSE =====================================
# file = "dao/CONFIG_tool"
# exp = ExperimentDao()
# exp.read_configuration_file(file)
# print "---"
# print exp._name
コード例 #16
0
ファイル: mappVo.py プロジェクト: santii/consexpression
class MappVo(object):
    """
    Record values to run Mapp methos
    """

    #mapp_vo = MappVo("TopHat", ref, grp_file, "", thread, out_mapp, "", sing)
    def __init__(self, name, index, read1_n, read2_n, threads, out, other, single_end):
        """
        Meka a first construction of object with param by user
        :param name:
        :param index:
        :param reads_dir:
        :param threads:
        :param out:
        :param other:
        :param single_end:
        """
        self._index_parm = ""
        self._reads1_parm = ""
        self._reads2_parm = ""
        self._threads_parm = ""
        self._output_parm = ""
        self._command_parm = ""
        self._sep = " "
        self._name = name
        #nIndex = index[(1 + index.rfind('/')):] # (index.rfind('.'))
        self._index_name = index # nIndex
        self._reads1_name = read1_n
        self._reads2_name = read2_n
        self._threads_value = threads
        self._output_name = out
        self._output_type = "--no-convert-bam "
        self._other_conf = other
        self._paired_end = single_end
        self._message = Message()
        self.parm_mapp()

    def parm_mapp(self):
        """
        Make command parameter by mapping tool
        """

        if self._name == "BWA":
            self._command_parm = "bwa mem "
            self._threads_parm = "-t "
            self._output_parm = "> "

        elif self._name == "Bowtie2":
            self._command_parm = "bowtie2 "
            self._index_parm = "-x "
            self._threads_parm = "-p "
            self._output_parm = "-S "

            if self._paired_end == 'True':
                self._reads1_parm = "-1 "
                self._reads2_parm = "-2 "
            else:
                self._reads1_parm = "-U "

        elif self._name == "TopHat":
            self._command_parm = "tophat2 "
            self._threads_parm = "-p "
            self._output_parm = "--output-dir "
        else:
            self._message.message_4("Mapping " + self._name + " not found!")
            exit()

    def to_string(self):
        """
        Return a command, this command used to run a Mapping tool
        str :return:
        """
        aux = ""
        if self._name == "Bowtie2":
            aux = self._command_parm + self._index_parm + self._index_name + self._sep
            aux = aux + self._threads_parm + str(self._threads_value) + self._sep
            aux = aux + self._reads1_parm + self._reads1_name + self._sep
            if self._paired_end == 'True':
                aux = aux + self._reads2_parm + self._reads2_name + self._sep
            aux = aux + self._output_parm + self._output_name + self._sep
            aux = aux + self._other_conf
            return aux
        elif self._name == "BWA":
            aux = self._command_parm + self._index_parm + self._index_name + self._sep
            aux = aux + self._threads_parm + str(self._threads_value) + self._sep
            aux = aux + self._reads1_parm + self._reads1_name + self._sep
            if self._paired_end == 'True':
                aux = aux + self._reads2_parm + self._reads2_name + self._sep
            aux = aux + self._output_parm + self._output_name + self._sep
            aux = aux + self._other_conf
            return aux
        elif self._name == "TopHat":
            aux = self._command_parm
            aux = aux + self._threads_parm + str(self._threads_value) + self._sep
            aux = aux + self._output_type
            aux = aux + self._other_conf
            aux = aux + self._output_parm + self._output_name + self._sep
            #print(aux + self._index_name)
            aux = aux + self._index_parm + self._index_name + self._sep
            aux = aux + self._reads1_parm + self._reads1_name + self._sep
            if self._paired_end == 'True':
                aux = aux + self._reads2_parm + self._reads2_name + self._sep

            return aux
        else:
            return aux
コード例 #17
0
ファイル: mappBo.py プロジェクト: santii/consexpression
 def __init__(self, mapp):
     assert isinstance(mapp, MappVo)
     self._map_vo = mapp
     self._reads_file = []
     self.message = Message()
コード例 #18
0
class SamSeq (object):

    def __init__(self, count, group, repl, out):
        """
        Inite object Ebseq
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        robjects.r['options'](warn=-1)
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._class = '"Two class unpaired"'
        self._message = Message()
        self._fd_column = 4
        self._qvalue_column = 5
        self._qvalue = 1
        self._fd = 2

    def run_de(self, gene):
        de = 0
        fd = float(gene[self._fd_column])
        qv = float(gene[self._qvalue_column])
        if fd <= self._fd and fd <= self._qvalue:
            de = 1
        return de


    def run_samseq(self):
        """
        Execute default analysis with SAMSeq
        :return:
        """
        try:
            if len(self._groups_name) > 2:
                self._class = '"Multiclass"'

            robjects.r('library("'+'samr'+'")')
            res = robjects.r('table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE, sep = "' + ',' + '")')
            res = robjects.r('m <- as.matrix(table)')

            grup = ""
            for ind in iter(self._groups_name):
                grup = grup + '"' + ind + '",'
            grup = grup[:(len(grup) - 1)]

            cm = 'SAMseq.test = SAMseq(m, as.factor(rep(c('
            cm = cm + grup + '),each=' + str(self._replic) + ')), resp.type = '+ self._class + ', geneid = rownames(m), genenames = rownames(m), nperms = 100)'
            #print(cm)
            res = robjects.r(cm)
            res = robjects.r('SAMseq.result.table = rbind(SAMseq.test$siggenes.table$genes.up, SAMseq.test$siggenes.table$genes.lo)')
            res = robjects.r('SAMseq.score = rep(0, nrow(m))')
            res = robjects.r('SAMseq.score[match(SAMseq.result.table[,1], rownames(m))] = as.numeric(SAMseq.result.table[,3])')
            res = robjects.r('SAMseq.FDR = rep(1, nrow(m))')
            res = robjects.r('SAMseq.FDR[match(SAMseq.result.table[,1], rownames(m))] = as.numeric(SAMseq.result.table[,5])/100')
            wr = 'write.table(SAMseq.result.table, file="' + self._output + '", sep = "\t", quote = FALSE)'
            robjects.r(wr)
            self._message.message_9("--- SAMSeq: is completed!")
        except RRuntimeError as rre:
            self._message.message_9("Error in SAMSeq execution: " + str(rre))
            # raise rre
コード例 #19
0
class BaySeq(object):
    """

    Commands to run BaySeq expression analysis
    """
    def __init__(self, count, group, repl, out):
        """
        Define the edgeR object
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._message = Message()
        self._likelihood_column = 2 + len(group) * repl
        self._fdr_de_column = 4 + len(group) * repl
        self._likelihood = 0.95
        self._fdr = 0.1

    def run_de(self, gene):
        de = 0
        try:
            fdr = float(gene[self._fdr_de_column])
            like = float(gene[self._likelihood_column])
            if fdr <= self._fdr and like > self._likelihood:
                de = 1
        except ValueError:
            de = 0
        return de

    def run_bayseq(self):
        """
        Execute default analysis with baySeq
        :return:
        """
        try:
            res = robjects.r('library("parallel")')
            res = robjects.r('library("stats4")')
            res = robjects.r('library("BiocGenerics")')
            res = robjects.r('library("S4Vectors")')
            res = robjects.r('library("IRanges")')
            res = robjects.r('library("GenomeInfoDb")')
            res = robjects.r('library("abind")')
            res = robjects.r('library("perm")')
            res = robjects.r('library("GenomicRanges")')
            res = robjects.r('library("baySeq")')

            res = robjects.r(
                'if(require("parallel")) cl <- makeCluster(4) else cl <- NUL')
            ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors = FALSE)'
            res = robjects.r(ct)
            res = robjects.r('m <- as.matrix(table)')
            replicates = ""
            assert isinstance(self._replic, int)
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                replicates = replicates + aux * self._replic
            replicates = replicates[:(len(replicates) - 2)]
            replicates = 'replicates <- c(' + replicates + ')'
            res = robjects.r(replicates)
            groups = 'groups <- list(NDE = c(' + "1," * len(self._groups_name)
            groups = groups[:(len(groups) - 1)] + ')'
            groups = groups + ', DE = c(' + '1,' * self._replic
            groups = groups + '2,' * self._replic
            groups = groups[:(len(groups) - 1)] + "))"
            res = robjects.r(groups)
            res = robjects.r(
                'CD <- new("countData", data = m, replicates = replicates, groups = groups)'
            )
            res = robjects.r('libsizes(CD) <- getLibsizes(CD)')
            res = robjects.r(
                'CD <- getPriors.NB(CD, samplesize = 1000, estimation = "QL", cl = cl, equalDispersions = TRUE)'
            )
            res = robjects.r(
                'CD <- getLikelihoods(CD, prs=c(0.5, 0.5), pET="BIC", cl=cl)')
            # CD.posteriors.DE < - exp(CD @ posteriors)[, 2]
            res = robjects.r(
                'write.table(topCounts(CD, group = "DE", number = 65000, normaliseData = TRUE), "'
                + self._output + '", sep="\t", quote = FALSE)')
            self._message.message_9("--- baySeq is completed!")
        except RRuntimeError as rre:
            self._message.message_9("Error in baySeq execution: " + str(rre))
            #raise rre


#========================= TESTE da CLASSE==============
# inp = '/home/juliana/Dropbox/UTFPR/PPGBIOINFO/Projeto/results_gencode/TopHat_results/bayseq/UHR_vs_Brain_gencode_TopHat_baySeq.csv'
# grp = "g1", "g2"
# rep = 7
# out = '/home/juliana/Documentos/Projeto_Juliana/Datasets/consexpression_baySeq_out.csv'
# b = BaySeq(inp, grp, rep, out)
# read_bay = open(inp, 'r')
# c_b = 1
# for line in iter(read_bay):
#     #print('--' + line)
#     if c_b > 0:
#        gene = line.split("\t")
#        print(gene[0])
#        v = b.run_de(gene)
#        print('--> '+ str(v))
#     c_b += 1
コード例 #20
0
ファイル: countBo.py プロジェクト: santii/consexpression
class CountBo(object):
    """
    This object define business rules to make count table execution
    """

    def __init__(self, count):
        """
        Test the doc of constructor class
        :param count:
        """
        assert isinstance(count, CountVo)
        self._counter = count
        self.message = Message()


    def annotation_format(self):
        """
        Verify format of annotation file (default: GTF | GFF)
        :return: void
        """
        bar = self._counter.annotation_file.rfind('.')
        name = self._counter.annotation_file[bar:]

        if name != 'gtf' and name != 'gff':
            self.message.message_4('File extension of annotation file can be only GTF or GFF.')

    def execute_count(self):
        """
        Execute command htseq-count
        :return: int subprocess
        """
        text = self._counter.to_string()
        return_code = subprocess.call(text, shell=True)
        return return_code

    def merge_table_count(self, list_file, out, groups_name):
        """
        Make a table whit count of all samples
        :param list_file: array count files
        :param out: text file line (gene) column (sample) data (count mapped)
        :param groups_name: treatment of samples
        :return:
        """
        n_g = len(groups_name)
        group_count = 0
        rep = int(len(list_file) / n_g)
        rep_count = 1
        out_file = None
        gene = {}
        no_genes = {'__no_feature': 0, '__ambiguous': 0, '__too_low_aQual': 0, '__not_aligned': 0,'__alignment_not_unique': 0,
                    'not_aligned':0, 'no_feature':0, 'ambiguous':0, 'too_low_aQual':0, 'alignment_not_unique':0}
        out_file = open(out, 'w')
        out_file.write("gene")
        # loop table count by samples
        for ind in iter(list_file):
            op = open(ind, 'r')
            if rep_count <= rep:
                out_file.write("," + groups_name[group_count] + str(rep_count))
            else:
                rep_count = 1
                group_count += 1
                out_file.write("," + groups_name[group_count]+str(rep_count))

            for line in iter(op):
                line = line.rstrip()
                text = line.split("\t")
                if text[0] in no_genes:
                    pass
                else:
                    if text[0] in gene:
                        aux = gene[text[0]]
                        aux = aux + ',' + text[1]
                        gene[text[0]] = aux
                    else:
                        gene[text[0]] = text[1]
            op.close()
            rep_count += 1
        names = gene.keys()
        for i in iter(names):
            out_file.write("\n" + i + "," + str(gene[i]))
        out_file.close()
コード例 #21
0
ファイル: edgeR.py プロジェクト: costasilvati/consexpression
class EdgeR(object):
    def __init__(self, count, group, repl, out):
        """
        Define the edgeR object
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._column_result = [3,4]
        self._min_result = []
        self._message = Message()
        self._logfc_colum = 1
        self._pvalue_colum = 3
        self._pvalue = 0.05
        self._logfc = 2


    def run_de(self, gene):
        de = 0
        lfc = float(gene[self._logfc_colum])
        pv = float(gene[self._pvalue_colum])
        if lfc >= self._logfc or lfc <= -self._logfc:
            if pv >= self._pvalue:
                de = 1
        return de

    def run_edger(self):
        """
        Execute default analysis with edegeR
        :return:
        """
        try:
            finish_message = ""
            res = robjects.r('library("limma")')
            res = robjects.r('library("edgeR")')
            ct = 'table <- read.csv("' \
                 + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE, sep = "' + "," + '")'
            res = robjects.r(ct)
            res = robjects.r('m <- as.matrix(table)')
            grup = ""
            assert isinstance(self._replic, int)
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                grup = grup + aux * self._replic
            grup = grup[:(len(grup) - 2)]
            grup = 'group <- c(' + grup + ')'
            res = robjects.r(grup)
            res = robjects.r('y.dge <- DGEList(counts = m, group = group)')
            if (self._replic < 1):
                self._message.message_4(" Replicates not found by edgeR. EdgeR should be executed manual form.")
            elif (self._replic == 1):
                # edgeR manual based solution for without replicates
                res = robjects.r('bcv <- 0.2')
                res = robjects.r('y.et <- exactTest(y.dge, dispersion = bcv^2)')
                res = robjects.r('y.tp <- topTags(y.et, n = 100000)')
                res = robjects.r('y.pvalues <- y.et$table$PValue')
                wr = 'write.table(y.tp$table, "' + self._output + '", sep = "\t", quote = FALSE)'
                res = robjects.r(wr)
                finish_message = "--- edgeR without replicates is completed!"
            else:
                r('y.dge <- calcNormFactors(y.dge)')
                r('y.dge <- estimateDisp(y.dge)')
                r('y.dge <- estimateCommonDisp(y.dge)')
                r('y.et <- exactTest(y.dge)')
                r('y.tp <- topTags(y.et, n = 100000)')
                r('y.pvalues <- y.et$table$PValue')
                wr = 'write.table(y.tp$table, "' + self._output + '", sep = "\t", quote = FALSE)'
                r(wr)
                finish_message = "--- edgeR with replicates is completed!"
            self._message.message_9(finish_message)
        except RRuntimeError as rre:
            self._message.message_9("Error in edgeR execution: " + str(rre))
            raise rre
コード例 #22
0
class Noiseq(object):
    def __init__(self, count, group, repl, out):
        """
        Define the NOISeq object
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._message = Message()
        self._likelihood_column = len(group) + 3
        self._likelihood = 0.95

    def run_de(self, gene):
        de = 0
        try:
            like = float(gene[self._likelihood_column])
            if like >= self._likelihood:
                de = 1
        except ValueError:
            de = 0
        return de

    def run_noiseq(self):
        """
        Execute default analysis with NOISeq
        :return:
        """
        try:
            res = robjects.r('library("parallel")')
            res = robjects.r('library("splines")')
            res = robjects.r('library("Matrix")')
            res = robjects.r('library("BiocGenerics")')
            res = robjects.r('library("Biobase")')
            res = robjects.r('library("NOISeq")')
            ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE)'
            res = robjects.r(ct)
            res = robjects.r('table <- as.matrix(table)')
            ts = ""
            run = ""
            tsrun = ""
            count_run = 1
            assert isinstance(self._replic, int)
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                ts = ts + aux * self._replic
                while (count_run <= self._replic):
                    tsrun = tsrun + "'" + ind + str(count_run) + "', "
                    run = run + "'" + "R" + str(count_run) + "', "
                    count_run += 1
                count_run = 1
            ts = ts[:(len(ts) - 2)]
            tsrun = tsrun[:(len(tsrun) - 2)]
            run = run[:(len(run) - 2)]
            res = robjects.r('myfactors = data.frame(Tissue=c(' + ts +
                             '), TissueRun=c(' + tsrun + '), Run=c(' + run +
                             '))')
            res = robjects.r(
                'mydata <- readData(data = table, factors = myfactors)')
            res = robjects.r(
                'mynoiseq = noiseq(mydata, k = 0.5, factor = "Tissue", lc = 1, replicates = "technical")'
            )
            res = robjects.r('results <- head(mynoiseq@results)')
            res = robjects.r('write.csv(results, file="' + self._output +
                             '", sep = "\t", quote = FALSE)')
            self._message.message_9("--- NOISeq: is completed!")
        except RRuntimeError as rre:
            self._message.message_9("Error in NOISeq execution: " + str(rre))
            raise rre


#========================= TESTE da CLASSE==============
# inp = 'UHR_vs_Brain_gencode_TopHat_NOISeq.csv'
# inp = 'consexpression_NOISeq.csv'
# grp = "g1", "g2"
# rep = 1
# out = 'consexpression_NOISeq_out.csv'
# b = Noiseq(inp, grp, rep, out)
# read_bay = open(inp, 'r')
# c_b = 0
# for line in iter(read_bay):
#     #print('--' + line)
#     if c_b > 0:
#        gene = line.split(",")
#        print(gene[0])
#        v = b.run_de(gene)
#        print('--> '+ str(v))
#     c_b += 1
コード例 #23
0
class LimmaVoom(object):
    def __init__(self, count, group, repl, out):
        """
        Inite object Ebseq
        :param count:
        :param group:
        :param repl:
        :param out:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = out
        self._message = Message()
        self._logfc_column = 2
        self._pvalue_column = 5
        self._logfc = 2
        self._pvalue = 0.05

    def run_de(self, gene):
        de = 0
        lfc = float(gene[self._logfc_column])
        pv = float(gene[self._pvalue_column])
        if lfc >= self._logfc or lfc <= -self._logfc:
            if pv >= self._pvalue:
                de = 1
        return de

    def run_limmavoom(self):
        """
        Execute default analysis with Limma-voom
        :return:
        """
        if self._replic == 1:
            self._message.message_4(
                "limma-voom require more than one replics.")
            self._message.message_9("--- limma-voom: is kipped!")
        else:
            try:
                robjects.r('library("' + 'edgeR' + '")')
                robjects.r('library("' + 'limma' + '")')
                ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors=FALSE)'
                res = robjects.r(ct)
                res = robjects.r('m <- as.matrix(table)')
                res = robjects.r('nf = calcNormFactors(m, method = "TMM")')

                grup = ""
                for ind in iter(self._groups_name):
                    grup = grup + ('"' + ind + '",') * self._replic
                grup = grup[:(len(grup) - 1)]
                robjects.r('condition = factor(c(' + grup + '))')

                res = robjects.r(
                    'voom.data <- voom(m, model.matrix(~factor(condition)), lib.ize = colSums(m) * nf)'
                )
                res = robjects.r('voom.data$genes = rownames(m)')
                res = robjects.r(
                    'voom.fitlimma = lmFit(voom.data, design=model.matrix(~factor(condition)))'
                )
                res = robjects.r('voom.fitbayes = eBayes(voom.fitlimma)')
                res = robjects.r('voom.pvalues = voom.fitbayes$p.value[, 2]')
                res = robjects.r(
                    'voom.adjpvalues = p.adjust(voom.pvalues, method="BH")')
                var = 'design <- c(' + '1,' * self._replic + '2,' * self._replic
                var = var[:(len(var) - 1)] + ')'
                res = robjects.r(var)
                res = robjects.r(
                    'data <- topTable(voom.fitbayes, coef=ncol(design), number=1000000)'
                )
                wr = 'write.table(data, file="' + self._output + '", sep = "\t", quote = FALSE)'
                robjects.r(wr)
                self._message.message_9("--- limma-voom: is completed!")
            except RRuntimeError as rre:
                self._message.message_9("Error in limma-voom execution: " +
                                        str(rre))
                raise rre
コード例 #24
0
class BaySeq(object):
    """

    Commands to run BaySeq expression analysis
    """
    def __init__(self, count, group, repl, output):
        """
        Define the edgeR object
        :param count:
        :param group:
        :param repl:
        :param output:
        """
        self._table_count = count
        self._groups_name = group
        self._replic = repl
        self._output = output
        self._message = Message()
        self._likelihood_column = 2 + len(group) * repl
        self._fdr_de_column = 4 + len(group) * repl
        self._likelihood = 0.95
        self._fdr = 0.1

    def run_de(self, gene):
        de = 0
        try:
            fdr = float(gene[self._fdr_de_column])
            like = float(gene[self._likelihood_column])
            if fdr <= self._fdr and like > self._likelihood:
                de = 1
        except ValueError:
            de = 0
        return de

    def run_bayseq(self):
        """
        Execute default analysis with baySeq
        :return:
        """
        try:
            res = robjects.r('library("parallel")')
            res = robjects.r('library("stats4")')
            res = robjects.r('library("BiocGenerics")')
            res = robjects.r('library("S4Vectors")')
            res = robjects.r('library("IRanges")')
            res = robjects.r('library("GenomeInfoDb")')
            res = robjects.r('library("abind")')
            # res = robjects.r('library("perm")')
            res = robjects.r('library("GenomicRanges")')
            res = robjects.r('library("baySeq")')

            res = robjects.r(
                'if(require("parallel")) cl <- makeCluster(4) else cl <- NUL')
            ct = 'table <- read.csv("' + self._table_count + '",  row.names = 1, header = TRUE, stringsAsFactors = FALSE)'
            res = robjects.r(ct)
            res = robjects.r('m <- as.matrix(table)')
            replicates = ""
            assert isinstance(self._replic, int)
            for ind in iter(self._groups_name):
                aux = "'" + ind + "', "
                replicates = replicates + aux * self._replic
            replicates = replicates[:(len(replicates) - 2)]
            replicates = 'replicates <- c(' + replicates + ')'
            res = robjects.r(replicates)
            groups = 'groups <- list(NDE = c(' + "1," * len(self._groups_name)
            groups = groups[:(len(groups) - 1)] + ')'
            groups = groups + ', DE = c(' + '1,' * self._replic
            groups = groups + '2,' * self._replic
            groups = groups[:(len(groups) - 1)] + "))"
            print(groups)
            res = robjects.r(groups)
            res = robjects.r(
                'CD <- new("countData", data = m, replicates = replicates, groups = groups)'
            )
            res = robjects.r('libsizes(CD) <- getLibsizes(CD)')
            res = robjects.r(
                'CD <- getPriors.NB(CD, samplesize = 1000, estimation = "QL", cl = cl, equalDispersions = TRUE)'
            )
            res = robjects.r(
                'CD <- getLikelihoods(CD, prs=c(0.5, 0.5), pET="BIC", cl=cl)')
            # CD.posteriors.DE < - exp(CD @ posteriors)[, 2]
            res = robjects.r(
                'write.table(topCounts(CD, group = "DE", number = 65000, normaliseData = TRUE), "'
                + self._output + '", sep="\t", quote = FALSE)')
            self._message.message_9("--- baySeq is completed!")
        except RRuntimeError as rre:
            self._message.message_9("Error in baySeq execution: " + str(rre))
            raise rre