class Experiment(object): """ Business object of Experiment """ def __init__(self): print("-----------------------------------" + "\n -- Welcome to consExpression -- " + "\n---------------------------------\n") self._exp_dao = None self._reference = None self._transcript = False self._count = None self._expression = None self._mapp_bo = None self.message = Message() self._fastq = [] self._out_mapp = [] self._count_table = [] self._merged_table_out = None self._edger = None self._bayseq = None self._deseq = None self._noiseq = None self._ebseq = None self._samseq = None self._limmavoom = None def execute_expression_analysis(self): """ Make analysis with counts data for mapping :return: """ print("Expression analisys start...") n = "consexpression" out_merge_table = '' self.execute_merge_table(self._count_table, out_merge_table) # 1 ------------------ edgeR ----------------- out_edger = self._exp_dao._read_directory + "/" + self._exp_dao._name + "_edger.csv" self._edger = EdgeR(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_edger) self._edger.run_edger() # 2 ------------- BaySeq -------------------- out_bayseq = self._exp_dao._read_directory + "/" + self._exp_dao._name + "_baySeq.csv" self._bayseq = BaySeq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_bayseq) self._bayseq.run_bayseq() # 3 ------------- DESeq -------------------- out_deseq = self._exp_dao._read_directory + "/" + self._exp_dao._name + "_DESeq.csv" self._deseq = DESeq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_deseq) self._deseq.run_deseq() # 4 ------------- NOISeq -------------------- out_noiseq = self._exp_dao._read_directory + "/" + self._exp_dao._name + "_NOISeq.csv" self._noiseq = Noiseq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_noiseq) self._noiseq.run_noiseq() # 5 ------------- EBSeq -------------------- out_ebseq = self._exp_dao._read_directory + "/" + self._exp_dao._name + "_EBSeq.csv" self._ebseq = Ebseq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_ebseq) self._ebseq.run_ebseq() # 6 ------------- SAMSeq -------------------- out_samseq = self._exp_dao._read_directory + "/" + self._exp_dao._name + "_SAMSeq.csv" self._samseq = SamSeq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_samseq) self._samseq.run_samseq() # 7 ------------- limma-voom -------------------- out_limmavoom = self._exp_dao._read_directory + "/" + self._exp_dao._name + "_limmavoom.csv" self._limmavoom = LimmaVoom(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_limmavoom) self._limmavoom.run_limmavoom()
class Experiment(object): """ Business object of Experiment """ _count: CountBo def __init__(self): self._exp_dao = None self._reference = None self._transcript = False self._count = None self._expression = None self._mapp_bo = None self.message = Message() self._fastq = [] self._out_mapp = [] self._count_table = [] self._merged_table_out = None self._edger = None self._bayseq = None self._deseq = None self._noiseq = None self._ebseq = None self._samseq = None self._limmavoom = None def init_experiment(self, exp, file): """ Iniatialize experiment :param exp: :param file: config file :return: """ assert isinstance(exp, ExperimentDao) self._exp_dao = exp self._exp_dao.read_configuration_file(file) self._exp_dao._name = self.name_valid(self._exp_dao._name) self.rep_valid(self._exp_dao._replic) self.group_number_valid(self._exp_dao._group_number) ref = self._exp_dao._reference if self._exp_dao._reference == "": print( "You don't have a refserence genome... Expression analyse need a table count with mapping reads" ) self._merged_table_out = input("Type absolute path to table count") elif ref != "" and (self.extension_valid(ref, "fa") or self.extension_valid(ref, "fasta")): self._reference = self._exp_dao._reference # == ref else: self.message.message_3("REFERENCE FILE ") exit(0) self.directory_valid(self._exp_dao._read_directory, "reads") for i in iter(self._exp_dao._group_directory): reads = self._exp_dao._read_directory + "/" + str(i) self.directory_valid(reads, "group") # Get fastq reads path_find = self._exp_dao._read_directory + "/" + i + "/" self._fastq.append(self.get_reads_file(path_find)) if self._exp_dao._paired_end == True: self.message.message_8( "The sequence is paired-end. CONSEXPRESSION dont make paired-end analysis" ) exit(0) else: self.message.message_8("The sequence is single-end") def name_valid(self, name): """ Verify the name: if is empty change to default name :param name: name of experiment :return: boolean """ if len(name) == 0: name = "consexpression" self.message.message_7( "Experiment name is empty! The name was changed to consexpression" ) return name def rep_valid(self, rep): """ Verify if number of replicates technical and biological is valid (>= 1). basestring :param rep_t: basestring :param rep_b: void :return: """ ok = False if rep >= 1: self.message.message_1("replics") else: self.message.message_2( "1 replic or more (technique or biological)") self.message.message_3("number of replics in line 5 - 6") exit() def extension_valid(self, path, extension): """ Verify if the extension file is the expected :param path: :param extension: :return: boolean """ var_ret = False if str.endswith(path, extension): var_ret = True else: var_ret = False return var_ret def directory_valid(self, path, type): """ Verify if path is a directory :param path: path of file :param type: file is reference genome, reads? :return: void """ ok = os.path.isdir(path) if ok: self.message.message_1("directory " + type + ": " + path) else: self.message.message_2("a valid directory " + type + " path") self.message.message_3(" the directory " + type + " path (line 9)") exit() def group_number_valid(self, group_n): """ Verify the number of groups. The minimal is one int :param group_n: void :return: """ assert isinstance(group_n, int) if group_n >= 1: self.message.message_1("group number") else: self.message.message_2("1 group or more.") self.message.message_3("the number of gruoups in line 7") exit() def file_valid(self, path): """ Verify if path is a file basestring :param path: void :return: """ if os.path.isfile(path): self.message.message_1(" file: " + path) else: self.message.message_2(" a valid reference file") self.message.message_3(" the reference file path (line 7)") exit() def exceute_mapp_count(self): """ Execute Tophat and htseq-count :return: """ ref = self._reference thread = self._exp_dao._threads sing = self._exp_dao._paired_end n = self._exp_dao._name path_find = [] for grp in iter(self._fastq): for grp_file in iter(grp): bar = 1 + grp_file.rfind('/') out_mapp = grp_file[:bar] + n + "/" + grp_file[bar:] dir = grp_file[:bar] + n if os.path.isdir(dir): pass else: os.mkdir(dir, 0o755) out_mapp = out_mapp.replace('fastq', 'sam') path_find.append(out_mapp) mapp_vo = MappVo("TopHat", ref, grp_file, "", thread, out_mapp, "", sing) self._mapp_bo = MappBo(mapp_vo) mapp_exe = self._mapp_bo.execute_mapp() if mapp_exe == 0: dot = out_mapp.rfind('.') in_type = out_mapp[dot + 1:] bar = 1 + out_mapp.rfind('/') table_count = out_mapp[bar:dot] table_count = out_mapp[:bar] + table_count + "_table_count.txt" self._count_table.append(table_count) in_count = out_mapp + "/accepted_hits.sam" count_vo = CountVo(in_count, self._exp_dao._annotation_file, self._exp_dao._annotation_type, in_type, self._exp_dao._count_mode, table_count) self._count = CountBo(count_vo) if self._count.execute_count() == 0: self.message.message_8("Count Sucsessfull!!!") else: self.message.message_4( "Error in counting mapped reads...") else: self.message.message_4( "Task: Mapping don't run correctly.") self._out_mapp.append(path_find) def get_reads_file(self, dir): """ Get all fastq path of dir :param dir: path to folder of fastq sample :return: array of reads file path """ fastq_file = [] path = dir + "*.fastq" #serach for file in glob.glob(path): fastq_file.append(file) if len(fastq_file) == 0: self.message.message_7("*Not found files FASTQ in directorie " + dir) return fastq_file def execute_expression_analysis(self): """ Make analysis with counts data for mapping :return: """ print("Expression analisys start...") n = "consexpression" out_merge_table = "" if self._exp_dao._reference != "": out_merge_table = self._exp_dao._read_directory + "/" + self._exp_dao._name + "_table_count.txt" self.execute_merge_table(self._count_table, out_merge_table) else: out_merge_table = self._merged_table_out # 1 ------------------ edgeR ----------------- print("---- edgeR START! ------------") out_expression = self._exp_dao._output + "/" + self._exp_dao._name out_edger = out_expression + "_edger.csv" self._edger = EdgeR(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_edger) self._edger.run_edger() # 2 ------------- BaySeq -------------------- print("---- baySeq START! ------------") out_bayseq = out_expression + "_baySeq.csv" self._bayseq = BaySeq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_bayseq) self._bayseq.run_bayseq() # 3 ------------- DESeq -------------------- print("---- DESeq START! ------------") out_deseq = out_expression + "_DESeq.csv" self._deseq = DESeq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_deseq) self._deseq.run_deseq() # 4 ------------- NOISeq -------------------- print("---- NOISeq START! ------------") out_noiseq = out_expression + "_NOISeq.csv" self._noiseq = Noiseq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_noiseq) self._noiseq.run_noiseq() # 5 ------------- EBSeq -------------------- print("---- EBSeq START! ------------") out_ebseq = out_expression + "_EBSeq.csv" self._ebseq = Ebseq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_ebseq) self._ebseq.run_ebseq() # 6 ------------- SAMSeq -------------------- print("---- SAMSeq START! ------------") # out_samseq = out_expression + "_SAMSeq.csv" # self._samseq = SamSeq(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_samseq) # self._samseq.run_samseq() # 7 ------------- limma-voom -------------------- print("---- limma START! ------------") out_limmavoom = out_expression + "_limmavoom.csv" self._limmavoom = LimmaVoom(out_merge_table, self._exp_dao._group_name, self._exp_dao._replic, out_limmavoom) self._limmavoom.run_limmavoom() def execute_conseus(self, out): gene_de = {} read_bay = open(self._bayseq._output, 'r') c_b = 0 for line in iter(read_bay): if c_b > 0: gene = line.split("\t") v = self._bayseq.run_de(gene) if gene[0] in gene_de: aux = gene_de[gene[0]] gene_de[gene[0]] = int(aux) + int(v) else: gene_de[gene[0]] = int(v) c_b += 1 read_bay.close() # ---- edger read_edger = open(self._edger._output, 'r') c_b = 0 for line in iter(read_edger): if c_b > 0: gene = line.split("\t") v = self._edger.run_de(gene) if gene[0] in gene_de: aux = gene_de[gene[0]] gene_de[gene[0]] = int(aux) + int(v) else: gene_de[gene[0]] = int(v) c_b += 1 read_edger.close() #--- deseq read_deseq = open(self._deseq._output, 'r') c_b = 0 for line in iter(read_deseq): if c_b > 0: gene = line.split("\t") v = self._deseq.run_de(gene) if gene[1] in gene_de: aux = gene_de[gene[1]] gene_de[gene[1]] = int(aux) + int(v) else: gene_de[gene[1]] = int(v) c_b += 1 read_deseq.close() # --- noiseq read_noiseq = open(self._noiseq._output, 'r') c_b = 0 for line in iter(read_noiseq): if c_b > 0: gene = line.split(",") v = self._noiseq.run_de(gene) if gene[0] in gene_de: aux = gene_de[gene[0]] gene_de[gene[0]] = int(aux) + int(v) else: gene_de[gene[0]] = int(v) c_b += 1 read_noiseq.close() # --- samseq if self._samseq is None: print("SAMSeq results not found") else: read_samseq = open(self._samseq._output, 'r') c_b = 0 for line in iter(read_samseq): if c_b > 0: gene = line.split("\t") v = self._samseq.run_de(gene) if gene[1] in gene_de: aux = gene_de[gene[1]] gene_de[gene[1]] = int(aux) + int(v) else: gene_de[gene[1]] = int(v) c_b += 1 read_samseq.close() # --- limma if self._exp_dao._replic >= 2: read_limma = open(self._limmavoom._output, 'r') c_b = 0 for line in iter(read_limma): if c_b > 0: gene = line.split("\t") v = self._limmavoom.run_de(gene) if gene[0] in gene_de: aux = gene_de[gene[0]] gene_de[gene[0]] = int(aux) + int(v) else: gene_de[gene[0]] = int(v) c_b += 1 read_limma.close() else: print("limma require more than one replics") # --- ebseq read_ebseq = open(self._ebseq._output, 'r') c_b = 0 for line in iter(read_ebseq): if c_b > 0: gene = line.split("\t") v = self._ebseq.run_de(gene) if gene[0] in gene_de: aux = gene_de[gene[0]] gene_de[gene[0]] = int(aux) + int(v) else: gene_de[gene[0]] = int(v) c_b += 1 read_ebseq.close() #--- write results header = 'gene, indications' out_cons = open(out, 'w') out_cons.write(header) names = gene_de.keys() print(len(names)) for i in iter(names): if (gene_de[i]) >= 4: out_cons.write("\n" + i + "," + str(gene_de[i])) def execute_merge_table(self, out_mapp_list, out_name): """ Make a merge table whit counts :param out_mapp_list: :param out_name: :return: """ self._count.merge_table_count(out_mapp_list, out_name, self._exp_dao._group_name)