def _create_json(self): workspace = io.Workspace(self.output_dir, must_exist=False) samples = [ ] groups = [ ] for sample in self.samples: this_groups = [ ] for item in self.groups: if selection.matches( selection.term_specification(item), sample.tags + [ sample.output_dir ] ): this_groups.append(selection.term_name(item)) group = ','.join(this_groups) if this_groups else 'ungrouped' if group not in groups: groups.append(group) item = { 'name' : sample.output_dir, 'bam' : os.path.abspath( workspace/('samples',sample.output_dir,'alignments_filtered_sorted.bam') ), 'group' : group, 'tags' : sample.tags, } samples.append(item) obj = collections.OrderedDict() obj['reference'] = os.path.abspath( self.reference ) obj['extension'] = self.extension obj['genes'] = os.path.abspath( workspace/('peaks','relation-parent.gff') ) obj['peaks'] = os.path.abspath( workspace/('peaks','relation-child.gff') ) obj['groups'] = groups obj['samples'] = samples with open(workspace/"plotter-config.json","wb") as f: json.dump(obj, f, indent=4)
def run(self): data = io.read_grouped_table( self.counts, [('Count',str), ('Annotation',str), ('Tail_count',str), ('Tail',str), ('Proportion',str)], 'Count', ) features = data['Count'].keys() samples = data['Count'].value_type().keys() tags = { } for sample in samples: tags[sample] = [sample] for line in data.comments: if line.startswith('#sampleTags='): parts = line[len('#sampleTags='):].split(',') tags[parts[0]] = parts group_names = [ ] groups = [ ] group_tags = [ ] for item in self.groups: select = selection.term_specification(item) name = selection.term_name(item) group = [ item for item in samples if selection.matches(select, tags[item]) ] assert group, 'Empty group: '+name this_group_tags = [ name ] for tag in tags[group[0]]: if tag == name: continue for item in group[1:]: for item2 in tags[item]: if tag not in item2: break else: this_group_tags.append(tag) group_names.append(name) groups.append(group) group_tags.append(this_group_tags) result = io.Grouped_table() result.comments = [ '#Counts' ] for item in group_tags: result.comments.append('#sampleTags='+','.join(item)) count = [ ] tail_count = [ ] tail = [ ] proportion = [ ] for feature in features: this_count = [ ] this_tail_count = [ ] this_tail = [ ] this_proportion = [ ] for group in groups: this_this_count = [ ] this_this_tail_count = [ ] this_this_tail = [ ] this_this_proportion = [ ] for sample in group: this_this_count.append(int(data['Count'][feature][sample])) this_this_tail_count.append(int(data['Tail_count'][feature][sample])) item = data['Tail'][feature][sample] if item != 'NA': this_this_tail.append(float(item)) item = data['Proportion'][feature][sample] if item != 'NA': this_this_proportion.append(float(item)) this_count.append(str(sum(this_this_count))) this_tail_count.append(str(sum(this_this_tail_count))) this_tail.append(str(sum(this_this_tail)/len(this_this_tail)) if this_this_tail else 'NA') this_proportion.append(str(sum(this_this_proportion)/len(this_this_proportion)) if this_this_proportion else 'NA') count.append(this_count) tail_count.append(this_tail_count) tail.append(this_tail) proportion.append(this_proportion) matrix = io.named_matrix_type(features,group_names) result['Count'] = matrix(count) result['Annotation'] = data['Annotation'] result['Tail_count'] = matrix(tail_count) result['Tail'] = matrix(tail) result['Proportion'] = matrix(proportion) result.write_csv(self.prefix + '.csv')
def run(self): assert self.method in ("limma", "fitnoise1", "fitnoise2"), "Unknown method." assert self.method != "limma" or not self.empirical_controls title = self.get_title() n_alt = len(self.alt) n_null = len(self.null) suffix = '-dedup' if self.dedup else '' genewise_filename = join(self.analysis,'expression','genewise'+suffix,'counts.csv') genewise_norm_filename = join(self.analysis,'expression','genewise'+suffix,'norm.csv') primarypeakwise_filename = join(self.analysis,'expression','primarypeakwise'+suffix,'counts.csv') primarypeakwise_norm_filename = join(self.analysis,'expression','primarypeakwise'+suffix,'norm.csv') peakwise_filename = join(self.analysis,'expression','peakwise'+suffix,'counts.csv') peakwise_norm_filename = join(self.analysis,'expression','peakwise'+suffix,'norm.csv') pairwise_filename = join(self.analysis,'peak-shift'+suffix,'individual-pairs.csv') pairwise_norm_filename = join(self.analysis,'peak-shift'+suffix,'individual-pairs-norm.csv') reader = io.Table_reader(genewise_filename, 'Count') reader.close() samples = [ item for i, item in enumerate(reader.headings) if reader.groups[i] == 'Count' ] tags = { } for item in samples: tags[item] = [ item ] for line in reader.comments: if line.startswith('#sampleTags='): parts = line[len('#sampleTags='):].split(',') tags[parts[0]] = parts model = [ ] for term in self.alt + self.null: spec = selection.term_specification(term) model.append([ selection.weight(spec, tags[item]) for item in samples ]) model = zip(*model) #Transpose select = [ any(row) for row in model ] model = [ row for row,selected in zip(model,select) if selected ] model_columns = [ selection.term_name(item) for item in self.alt + self.null ] model_rows = [ item for keep, item in zip(select, samples) if keep ] #degust complains if name starts with '-', delimits with commas model_columns = [ ('.' if item[:1] == '-' else '') + item.replace(',',';') for item in model_columns ] pairs_n_alt = n_alt pairs_select = select + select pairs_model = ( [ (0,) * n_alt + row + (0,) for row in model ] + [ row[:n_alt] + row + (1,) for row in model ] ) pairs_model_columns = ( [ item+'-interaction' for item in model_columns[:n_alt] ] + model_columns + [ 'pair2' ] ) pairs_model_rows = [ item+'-peak1' for item in model_rows ] + [ item+'-peak2' for item in model_rows ] design_str = '['+('-'*(8*n_alt-2))+'] test coefficients\n' for row, name in zip(model, model_rows): design_str += "%s %s\n" % (''.join('%7g ' % item for item in row), name) print print "Design matrix" print design_str print print 'Pair design matrix' print '['+('-'*(8*n_alt-2))+'] test coefficients' for row, name in zip(pairs_model, pairs_model_rows): print ''.join('%7g ' % item for item in row), name print workspace = self.get_workspace() runr.run_script(TEST_R, self.tell, DIR = workspace.working_dir, METHOD = self.method, WEIGHT = self.weight, EMPIRICAL_CONTROLS = self.empirical_controls, MIN_READS = self.min_reads, BIOTYPE = self.biotype, RELATION = self.relation, QUANTILE_TAIL = self.quantile_tail, DO_EXPRESSION = self.do_expression, DO_TAIL_LENGTH = self.do_tail_length, VERBOSE = self.verbose, GENEWISE_FILENAME = genewise_filename, GENEWISE_NORM_FILENAME = genewise_norm_filename, PRIMARYPEAKWISE_FILENAME = primarypeakwise_filename, PRIMARYPEAKWISE_NORM_FILENAME = primarypeakwise_norm_filename, PEAKWISE_FILENAME = peakwise_filename, PEAKWISE_NORM_FILENAME = peakwise_norm_filename, PAIRWISE_FILENAME = pairwise_filename, PAIRWISE_NORM_FILENAME = pairwise_norm_filename, N_ALT = n_alt, SELECT = select, MODEL = model, MODEL_COLUMNS = model_columns, PAIRS_N_ALT = pairs_n_alt, PAIRS_SELECT = pairs_select, PAIRS_MODEL = pairs_model, PAIRS_MODEL_COLUMNS = pairs_model_columns, ) if self.tell: return reporter = reporting.Reporter(workspace.working_dir, title, style=web.style()) if self.dedup: reporter.p('Read deduplication was used.') reporter.write('<table>\n') for is_expression, entities, result, aveexpr, subtitle, terms in [ (True, 'genes', 'genewise-voom', 'avg.expression', 'Genewise expression level', model_columns[:n_alt]), (False, 'genes', 'genewise-tail', 'avg.tail', 'Genewise tail length', model_columns[:n_alt]), (True, 'primary peaks', 'primarypeakwise-voom', 'avg.expression', 'Primary-peakwise expression level', model_columns[:n_alt]), (False, 'primary peaks', 'primarypeakwise-tail', 'avg.tail', 'Primary-peakwise tail length', model_columns[:n_alt]), (True, 'peaks', 'peakwise-voom', 'avg.expression', 'Peakwise expression level', model_columns[:n_alt]), (False, 'peaks', 'peakwise-tail', 'avg.tail', 'Peakwise tail length', model_columns[:n_alt]), (True, 'peak pairs', 'pairwise-voom', 'avg.expression', 'Peak-pair expression shift', pairs_model_columns[:n_alt]), (False, 'peak pairs', 'pairwise-tail', 'avg.tail', 'Peak-pair tail length shift', pairs_model_columns[:n_alt]), ]: #data = io.read_grouped_table(workspace/(result+'-toptable.csv'))['All'] #n = 0 #n_01 = 0 #n_05 = 0 #for row in data.values(): # fdr = float(row['adj.P.Val']) # if fdr <= 0.01: n_01 += 1 # if fdr <= 0.05: n_05 += 1 # n += 1 if is_expression and not self.do_expression: continue if not is_expression and not self.do_tail_length: continue io.execute([ 'degust.py', '--name', title + ' : ' + subtitle, '--avg', aveexpr, '--primary', 'baseline', '--logFC', ','.join(terms), '--fdr', 'adj.P.Val', '--info', 'gene,locus_tag,product,reads,polya.reads,tail.lengths,'+aveexpr, '--notour', '1', '--out', workspace/(result+'.html'), workspace/(result+'-toptable.csv'), ]) with open(workspace/(result+'.txt'),'rU') as f: lines = f.readlines() reporter.write('<tr><td valign="top" width="33%">') reporter.subheading( reporter.href(workspace/(result+'.html'), subtitle) ) #reporter.p( '%d %s, %d with fdr<=0.01, %d with fdr<=0.05' % (n,entities,n_01,n_05) ) line = reporter.href(workspace/(result+'-toptable.csv'), 'Spreadsheet') if result.endswith('voom'): line += ', ' + reporter.href(workspace/(result+'.png'), 'voom plot') reporter.p(line) for line in lines[-2:]: reporter.p(line.strip()) reporter.write('</td><td valign="top"><br/><br/>') for line in lines[:-2]: reporter.write(line.strip() + '<br/>\n') reporter.write('</td></tr>') reporter.write('</table>\n') reporter.subheading("Design matrix") reporter.write('<pre>' + design_str + '</pre>') reporter.close()
def run(self): data = io.read_grouped_table( self.counts, [("Count", str), ("Annotation", str), ("Tail_count", str), ("Tail", str), ("Proportion", str)], "Count", ) features = data["Count"].keys() samples = data["Count"].value_type().keys() tags = {} for sample in samples: tags[sample] = [sample] for line in data.comments: if line.startswith("#sampleTags="): parts = line[len("#sampleTags=") :].split(",") tags[parts[0]] = parts group_names = [] groups = [] group_tags = [] for item in self.groups: select = selection.term_specification(item) name = selection.term_name(item) group = [item for item in samples if selection.matches(select, tags[item])] assert group, "Empty group: " + name this_group_tags = [name] for tag in tags[group[0]]: if tag == name: continue for item in group[1:]: for item2 in tags[item]: if tag not in item2: break else: this_group_tags.append(tag) group_names.append(name) groups.append(group) group_tags.append(this_group_tags) result = io.Grouped_table() result.comments = ["#Counts"] for item in group_tags: result.comments.append("#sampleTags=" + ",".join(item)) count = [] tail_count = [] tail = [] proportion = [] for feature in features: this_count = [] this_tail_count = [] this_tail = [] this_proportion = [] for group in groups: this_this_count = [] this_this_tail_count = [] this_this_tail = [] this_this_proportion = [] for sample in group: this_this_count.append(int(data["Count"][feature][sample])) this_this_tail_count.append(int(data["Tail_count"][feature][sample])) item = data["Tail"][feature][sample] if item != "NA": this_this_tail.append(float(item)) item = data["Proportion"][feature][sample] if item != "NA": this_this_proportion.append(float(item)) this_count.append(str(sum(this_this_count))) this_tail_count.append(str(sum(this_this_tail_count))) this_tail.append(str(sum(this_this_tail) / len(this_this_tail)) if this_this_tail else "NA") this_proportion.append( str(sum(this_this_proportion) / len(this_this_proportion)) if this_this_proportion else "NA" ) count.append(this_count) tail_count.append(this_tail_count) tail.append(this_tail) proportion.append(this_proportion) matrix = io.named_matrix_type(features, group_names) result["Count"] = matrix(count) result["Annotation"] = data["Annotation"] result["Tail_count"] = matrix(tail_count) result["Tail"] = matrix(tail) result["Proportion"] = matrix(proportion) result.write_csv(self.prefix + ".csv")
def run(self): assert self.method in ("limma", "fitnoise1", "fitnoise2"), "Unknown method." assert self.method != "limma" or not self.empirical_controls title = self.get_title() n_alt = len(self.alt) n_null = len(self.null) suffix = "-dedup" if self.dedup else "" genewise_filename = join(self.analysis, "expression", "genewise" + suffix, "counts.csv") genewise_norm_filename = join(self.analysis, "expression", "genewise" + suffix, "norm.csv") primarypeakwise_filename = join(self.analysis, "expression", "primarypeakwise" + suffix, "counts.csv") primarypeakwise_norm_filename = join(self.analysis, "expression", "primarypeakwise" + suffix, "norm.csv") peakwise_filename = join(self.analysis, "expression", "peakwise" + suffix, "counts.csv") peakwise_norm_filename = join(self.analysis, "expression", "peakwise" + suffix, "norm.csv") pairwise_filename = join(self.analysis, "peak-shift" + suffix, "individual-pairs.csv") pairwise_norm_filename = join(self.analysis, "peak-shift" + suffix, "individual-pairs-norm.csv") reader = io.Table_reader(genewise_filename, "Count") reader.close() samples = [item for i, item in enumerate(reader.headings) if reader.groups[i] == "Count"] tags = {} for item in samples: tags[item] = [item] for line in reader.comments: if line.startswith("#sampleTags="): parts = line[len("#sampleTags=") :].split(",") tags[parts[0]] = parts model = [] for term in self.alt + self.null: spec = selection.term_specification(term) model.append([selection.weight(spec, tags[item]) for item in samples]) model = zip(*model) # Transpose select = [any(row) for row in model] model = [row for row, selected in zip(model, select) if selected] model_columns = [selection.term_name(item) for item in self.alt + self.null] model_rows = [item for keep, item in zip(select, samples) if keep] # degust complains if name starts with '-', delimits with commas model_columns = [("." if item[:1] == "-" else "") + item.replace(",", ";") for item in model_columns] pairs_n_alt = n_alt pairs_select = select + select pairs_model = [(0,) * n_alt + row + (0,) for row in model] + [row[:n_alt] + row + (1,) for row in model] pairs_model_columns = [item + "-interaction" for item in model_columns[:n_alt]] + model_columns + ["pair2"] pairs_model_rows = [item + "-peak1" for item in model_rows] + [item + "-peak2" for item in model_rows] design_str = "[" + ("-" * (8 * n_alt - 2)) + "] test coefficients\n" for row, name in zip(model, model_rows): design_str += "%s %s\n" % ("".join("%7g " % item for item in row), name) print print "Design matrix" print design_str print print "Pair design matrix" print "[" + ("-" * (8 * n_alt - 2)) + "] test coefficients" for row, name in zip(pairs_model, pairs_model_rows): print "".join("%7g " % item for item in row), name print workspace = self.get_workspace() runr.run_script( TEST_R, self.tell, SOURCE=os.path.join(os.path.dirname(__file__), "tail_tools.R"), DIR=workspace.working_dir, METHOD=self.method, WEIGHT=self.weight, EMPIRICAL_CONTROLS=self.empirical_controls, MIN_READS=self.min_reads, BIOTYPE=self.biotype, RELATION=self.relation, QUANTILE_TAIL=self.quantile_tail, DO_EXPRESSION=self.do_expression, DO_TAIL_LENGTH=self.do_tail_length, VERBOSE=self.verbose, GENEWISE_FILENAME=genewise_filename, GENEWISE_NORM_FILENAME=genewise_norm_filename, PRIMARYPEAKWISE_FILENAME=primarypeakwise_filename, PRIMARYPEAKWISE_NORM_FILENAME=primarypeakwise_norm_filename, PEAKWISE_FILENAME=peakwise_filename, PEAKWISE_NORM_FILENAME=peakwise_norm_filename, PAIRWISE_FILENAME=pairwise_filename, PAIRWISE_NORM_FILENAME=pairwise_norm_filename, N_ALT=n_alt, SELECT=select, MODEL=model, MODEL_COLUMNS=model_columns, PAIRS_N_ALT=pairs_n_alt, PAIRS_SELECT=pairs_select, PAIRS_MODEL=pairs_model, PAIRS_MODEL_COLUMNS=pairs_model_columns, ) if self.tell: return reporter = reporting.Reporter(workspace.working_dir, title, style=web.style()) if self.dedup: reporter.p("Read deduplication was used.") reporter.write("<table>\n") for is_expression, entities, result, aveexpr, subtitle, terms in [ (True, "genes", "genewise-voom", "avg.expression", "Genewise expression level", model_columns[:n_alt]), (False, "genes", "genewise-tail", "avg.tail", "Genewise tail length", model_columns[:n_alt]), ( True, "primary peaks", "primarypeakwise-voom", "avg.expression", "Primary-peakwise expression level", model_columns[:n_alt], ), ( False, "primary peaks", "primarypeakwise-tail", "avg.tail", "Primary-peakwise tail length", model_columns[:n_alt], ), (True, "peaks", "peakwise-voom", "avg.expression", "Peakwise expression level", model_columns[:n_alt]), (False, "peaks", "peakwise-tail", "avg.tail", "Peakwise tail length", model_columns[:n_alt]), ( True, "peak pairs", "pairwise-voom", "avg.expression", "Peak-pair expression shift", pairs_model_columns[:n_alt], ), ( False, "peak pairs", "pairwise-tail", "avg.tail", "Peak-pair tail length shift", pairs_model_columns[:n_alt], ), ]: # data = io.read_grouped_table(workspace/(result+'-toptable.csv'))['All'] # n = 0 # n_01 = 0 # n_05 = 0 # for row in data.values(): # fdr = float(row['adj.P.Val']) # if fdr <= 0.01: n_01 += 1 # if fdr <= 0.05: n_05 += 1 # n += 1 if is_expression and not self.do_expression: continue if not is_expression and not self.do_tail_length: continue io.execute( [ "degust.py", "--name", title + " : " + subtitle, "--avg", aveexpr, "--primary", "baseline", "--logFC", ",".join(terms), "--fdr", "adj.P.Val", "--info", "gene,locus_tag,product,reads,polya.reads,tail.lengths," + aveexpr, "--notour", "1", "--out", workspace / (result + ".html"), workspace / (result + "-toptable.csv"), ] ) with open(workspace / (result + ".txt"), "rU") as f: lines = f.readlines() reporter.write('<tr><td valign="top" width="33%">') reporter.subheading(reporter.href(workspace / (result + ".html"), subtitle)) # reporter.p( '%d %s, %d with fdr<=0.01, %d with fdr<=0.05' % (n,entities,n_01,n_05) ) line = reporter.href(workspace / (result + "-toptable.csv"), "Spreadsheet") if result.endswith("voom"): line += ", " + reporter.href(workspace / (result + ".png"), "voom plot") reporter.p(line) for line in lines[-2:]: reporter.p(line.strip()) reporter.write('</td><td valign="top"><br/><br/>') for line in lines[:-2]: reporter.write(line.strip() + "<br/>\n") reporter.write("</td></tr>") reporter.write("</table>\n") reporter.subheading("Design matrix") reporter.write("<pre>" + design_str + "</pre>") reporter.close()
def run(self): data = io.read_grouped_table( self.counts, [('Count', str), ('Annotation', str), ('Tail_count', str), ('Tail', str), ('Proportion', str)], 'Count', ) features = data['Count'].keys() samples = data['Count'].value_type().keys() tags = {} for sample in samples: tags[sample] = [sample] for line in data.comments: if line.startswith('#sampleTags='): parts = line[len('#sampleTags='):].split(',') tags[parts[0]] = parts group_names = [] groups = [] group_tags = [] for item in self.groups: select = selection.term_specification(item) name = selection.term_name(item) group = [ item for item in samples if selection.matches(select, tags[item]) ] assert group, 'Empty group: ' + name this_group_tags = [name] for tag in tags[group[0]]: if tag == name: continue for item in group[1:]: for item2 in tags[item]: if tag not in item2: break else: this_group_tags.append(tag) group_names.append(name) groups.append(group) group_tags.append(this_group_tags) result = io.Grouped_table() result.comments = ['#Counts'] for item in group_tags: result.comments.append('#sampleTags=' + ','.join(item)) count = [] tail_count = [] tail = [] proportion = [] for feature in features: this_count = [] this_tail_count = [] this_tail = [] this_proportion = [] for group in groups: this_this_count = [] this_this_tail_count = [] this_this_tail = [] this_this_proportion = [] for sample in group: this_this_count.append(int(data['Count'][feature][sample])) this_this_tail_count.append( int(data['Tail_count'][feature][sample])) item = data['Tail'][feature][sample] if item != 'NA': this_this_tail.append(float(item)) item = data['Proportion'][feature][sample] if item != 'NA': this_this_proportion.append(float(item)) this_count.append(str(sum(this_this_count))) this_tail_count.append(str(sum(this_this_tail_count))) this_tail.append( str(sum(this_this_tail) / len(this_this_tail)) if this_this_tail else 'NA') this_proportion.append( str(sum(this_this_proportion) / len(this_this_proportion) ) if this_this_proportion else 'NA') count.append(this_count) tail_count.append(this_tail_count) tail.append(this_tail) proportion.append(this_proportion) matrix = io.named_matrix_type(features, group_names) result['Count'] = matrix(count) result['Annotation'] = data['Annotation'] result['Tail_count'] = matrix(tail_count) result['Tail'] = matrix(tail) result['Proportion'] = matrix(proportion) result.write_csv(self.prefix + '.csv')
def run(self): assert self.method in ("limma", "fitnoise1", "fitnoise2"), "Unknown method." assert self.method != "limma" or not self.empirical_controls title = self.get_title() n_alt = len(self.alt) n_null = len(self.null) suffix = '-dedup' if self.dedup else '' genewise_filename = join(self.analysis, 'expression', 'genewise' + suffix, 'counts.csv') genewise_norm_filename = join(self.analysis, 'expression', 'genewise' + suffix, 'norm.csv') primarypeakwise_filename = join(self.analysis, 'expression', 'primarypeakwise' + suffix, 'counts.csv') primarypeakwise_norm_filename = join(self.analysis, 'expression', 'primarypeakwise' + suffix, 'norm.csv') peakwise_filename = join(self.analysis, 'expression', 'peakwise' + suffix, 'counts.csv') peakwise_norm_filename = join(self.analysis, 'expression', 'peakwise' + suffix, 'norm.csv') pairwise_filename = join(self.analysis, 'peak-shift' + suffix, 'individual-pairs.csv') pairwise_norm_filename = join(self.analysis, 'peak-shift' + suffix, 'individual-pairs-norm.csv') reader = io.Table_reader(genewise_filename, 'Count') reader.close() samples = [ item for i, item in enumerate(reader.headings) if reader.groups[i] == 'Count' ] tags = {} for item in samples: tags[item] = [item] for line in reader.comments: if line.startswith('#sampleTags='): parts = line[len('#sampleTags='):].split(',') tags[parts[0]] = parts model = [] for term in self.alt + self.null: spec = selection.term_specification(term) model.append( [selection.weight(spec, tags[item]) for item in samples]) model = zip(*model) #Transpose select = [any(row) for row in model] model = [row for row, selected in zip(model, select) if selected] model_columns = [ selection.term_name(item) for item in self.alt + self.null ] model_rows = [item for keep, item in zip(select, samples) if keep] #degust complains if name starts with '-', delimits with commas model_columns = [ ('.' if item[:1] == '-' else '') + item.replace(',', ';') for item in model_columns ] pairs_n_alt = n_alt pairs_select = select + select pairs_model = ([(0, ) * n_alt + row + (0, ) for row in model] + [row[:n_alt] + row + (1, ) for row in model]) pairs_model_columns = ( [item + '-interaction' for item in model_columns[:n_alt]] + model_columns + ['pair2']) pairs_model_rows = [item + '-peak1' for item in model_rows ] + [item + '-peak2' for item in model_rows] design_str = '[' + ('-' * (8 * n_alt - 2)) + '] test coefficients\n' for row, name in zip(model, model_rows): design_str += "%s %s\n" % (''.join('%7g ' % item for item in row), name) print print "Design matrix" print design_str print print 'Pair design matrix' print '[' + ('-' * (8 * n_alt - 2)) + '] test coefficients' for row, name in zip(pairs_model, pairs_model_rows): print ''.join('%7g ' % item for item in row), name print workspace = self.get_workspace() runr.run_script( TEST_R, self.tell, DIR=workspace.working_dir, METHOD=self.method, WEIGHT=self.weight, EMPIRICAL_CONTROLS=self.empirical_controls, MIN_READS=self.min_reads, BIOTYPE=self.biotype, RELATION=self.relation, QUANTILE_TAIL=self.quantile_tail, DO_EXPRESSION=self.do_expression, DO_TAIL_LENGTH=self.do_tail_length, VERBOSE=self.verbose, GENEWISE_FILENAME=genewise_filename, GENEWISE_NORM_FILENAME=genewise_norm_filename, PRIMARYPEAKWISE_FILENAME=primarypeakwise_filename, PRIMARYPEAKWISE_NORM_FILENAME=primarypeakwise_norm_filename, PEAKWISE_FILENAME=peakwise_filename, PEAKWISE_NORM_FILENAME=peakwise_norm_filename, PAIRWISE_FILENAME=pairwise_filename, PAIRWISE_NORM_FILENAME=pairwise_norm_filename, N_ALT=n_alt, SELECT=select, MODEL=model, MODEL_COLUMNS=model_columns, PAIRS_N_ALT=pairs_n_alt, PAIRS_SELECT=pairs_select, PAIRS_MODEL=pairs_model, PAIRS_MODEL_COLUMNS=pairs_model_columns, ) if self.tell: return reporter = reporting.Reporter(workspace.working_dir, title, style=web.style()) if self.dedup: reporter.p('Read deduplication was used.') reporter.write('<table>\n') for is_expression, entities, result, aveexpr, subtitle, terms in [ (True, 'genes', 'genewise-voom', 'avg.expression', 'Genewise expression level', model_columns[:n_alt]), (False, 'genes', 'genewise-tail', 'avg.tail', 'Genewise tail length', model_columns[:n_alt]), (True, 'primary peaks', 'primarypeakwise-voom', 'avg.expression', 'Primary-peakwise expression level', model_columns[:n_alt]), (False, 'primary peaks', 'primarypeakwise-tail', 'avg.tail', 'Primary-peakwise tail length', model_columns[:n_alt]), (True, 'peaks', 'peakwise-voom', 'avg.expression', 'Peakwise expression level', model_columns[:n_alt]), (False, 'peaks', 'peakwise-tail', 'avg.tail', 'Peakwise tail length', model_columns[:n_alt]), (True, 'peak pairs', 'pairwise-voom', 'avg.expression', 'Peak-pair expression shift', pairs_model_columns[:n_alt]), (False, 'peak pairs', 'pairwise-tail', 'avg.tail', 'Peak-pair tail length shift', pairs_model_columns[:n_alt]), ]: #data = io.read_grouped_table(workspace/(result+'-toptable.csv'))['All'] #n = 0 #n_01 = 0 #n_05 = 0 #for row in data.values(): # fdr = float(row['adj.P.Val']) # if fdr <= 0.01: n_01 += 1 # if fdr <= 0.05: n_05 += 1 # n += 1 if is_expression and not self.do_expression: continue if not is_expression and not self.do_tail_length: continue io.execute([ 'degust.py', '--name', title + ' : ' + subtitle, '--avg', aveexpr, '--primary', 'baseline', '--logFC', ','.join(terms), '--fdr', 'adj.P.Val', '--info', 'gene,locus_tag,product,reads,polya.reads,tail.lengths,' + aveexpr, '--notour', '1', '--out', workspace / (result + '.html'), workspace / (result + '-toptable.csv'), ]) with open(workspace / (result + '.txt'), 'rU') as f: lines = f.readlines() reporter.write('<tr><td valign="top" width="33%">') reporter.subheading( reporter.href(workspace / (result + '.html'), subtitle)) #reporter.p( '%d %s, %d with fdr<=0.01, %d with fdr<=0.05' % (n,entities,n_01,n_05) ) line = reporter.href(workspace / (result + '-toptable.csv'), 'Spreadsheet') if result.endswith('voom'): line += ', ' + reporter.href(workspace / (result + '.png'), 'voom plot') reporter.p(line) for line in lines[-2:]: reporter.p(line.strip()) reporter.write('</td><td valign="top"><br/><br/>') for line in lines[:-2]: reporter.write(line.strip() + '<br/>\n') reporter.write('</td></tr>') reporter.write('</table>\n') reporter.subheading("Design matrix") reporter.write('<pre>' + design_str + '</pre>') reporter.close()