def read_header(self): if self.header == None: line = FileReader.readline(self).rstrip() hdr = line.split("\t") self.header = MatrixHeader(hdr) return self.header
def merge(log, input, output, gitools_output): """ Merge repeated rows by the lowest pvalue, in case the pvalue is the same take the one with greater n """ f = FileReader(input) hdr = f.readline().rstrip().split("\t") upreg = {} downreg = {} upreg_count = 0 downreg_count = 0 mid_index = 8 for line in f: line = line.rstrip() if len(line) == 0: continue fields = line.split("\t") row_name = fields[0] upreg_count += merge_data(row_name, fields[1:mid_index], upreg) downreg_count += merge_data(row_name, fields[mid_index:], downreg) f.close() upreg_keys = upreg.keys() downreg_keys = downreg.keys() log.debug("Total rows: upreg = {}, downreg = {}".format(len(upreg_keys), len(downreg_keys))) log.debug("Merged rows: upreg = {}, downreg = {}".format(upreg_count, downreg_count)) ofile = FileWriter(output) ofile.write("\t".join(hdr)) ofile.write("\n") gfile = FileWriter(gitools_output) gfile.write("column\trow\t") gfile.write("\t".join([x[6:] for x in hdr if x.startswith("upreg_")])) gfile.write("\n") for row_name in upreg_keys: upreg_data = upreg[row_name] upreg_data_join = "\t".join(upreg_data) downreg_data = downreg[row_name] downreg_data_join = "\t".join(downreg_data) ofile.write(row_name) ofile.write("\t") ofile.write(upreg_data_join) ofile.write("\t") ofile.write(downreg_data_join) ofile.write("\n") gfile.write("upreg\t") gfile.write(row_name) gfile.write("\t") gfile.write(upreg_data_join) gfile.write("\n") gfile.write("downreg\t") gfile.write(row_name) gfile.write("\t") gfile.write(downreg_data_join) gfile.write("\n") ofile.close() gfile.close() return (upreg_count, downreg_count)