def job__saveFig(figs): dfig = pyutil.saveFigDict(figs, DIR='.', exts=exts, dpi=dpi) dfig['fignames'] = [x for x in dfig['fignames'] if x.endswith('.png')] buf = [pyutil.ppJson(dfig)] ofname = 'figures.json' pyutil.printlines(buf, ofname) return dfig
def main(f, dbg=0, reCallPeak=0, gPar=None): global shellexec def shellexec(cmd, dbg=0): if dbg: print cmd res = 'dbg' else: res = subprocess.check_output(cmd, shell=1) return res # ############################################################################################# # DEPENDENT_FILES_PATH = '/media/pw_synology3/Software/chip-summary/' # [path of chip-summary.py] # DEFAULT_TARGET_RANGE = '3000' # [change] a string, not a number # SUMMARY_FILE_NAME = 'summary.html' # SUMMARY_DIR = 'summary' # PEAK_CALL_PIPELINE_TEMPLATE = os.path.join(DEPENDENT_FILES_PATH, 'depend/script/pipeline724-t.sh') # PEAK_SELECT_SCRIPT = os.path.join(DEPENDENT_FILES_PATH, 'depend/script/select_peaks.py') # GENELOCUS_TO_GENENAME_SCRIPT= os.path.join(DEPENDENT_FILES_PATH, 'depend/script/genelocus2genename.py') # #### Slowest part to be refactored??? # EXTRACT_AGI_CODE_AND_FC = os.path.join(DEPENDENT_FILES_PATH, 'depend/script/extract_AGI_code_and_fold_change.py') # GO_ENRICHMENT_SCRIPT = os.path.join(DEPENDENT_FILES_PATH, 'depend/script/fe.sh') # install goatools (GO enrichment) and edit fe.sh # GO_ENRICHMENT_DIFF_SCRIPT = os.path.join(DEPENDENT_FILES_PATH, 'depend/script/goterm-matrix.py') # AGI_TO_GENE_NAMES = os.path.join(DEPENDENT_FILES_PATH, 'depend/data/AGI-to-gene-names.txt') # ANNOTATION_FILE = os.path.join(DEPENDENT_FILES_PATH, 'depend/data/genesTAIR10.bed') # for bedmap # GENE_DESCRIPTION = os.path.join(DEPENDENT_FILES_PATH, 'depend/data/gene_description_20140101.txt') # MAX_FOLD_CHANGE = 10 # for number of peaks versus fold-change plot # ############################################################################################# gPar = gPar or get_global_parameters(f) condDict = get_conditions(f, gPar) DIR = pyutil.dict2flat(gPar) # os.system('mkdir -p ' + DIR); os.chdir(DIR) if dbg == 1: d = gPar, condDict for dd in d: print pyutil.ppJson(dd) return d # try: if 1: # Collect results os.system('mkdir -p %s' % SUMMARY_DIR) # make pipeline files for peak calling def getPeak(k): sname = make_peak_call_script(k, condDict, PEAK_CALL_PIPELINE_TEMPLATE) print('Run %s ...' % (sname)) res = subprocess.call(['bash', sname]) # return '%s_peaks.narrowPeak'%k return res if reCallPeak: # check that every ChIP file is present for k in condDict.keys(): chip_file = condDict[k]['CHIP'] input_file = condDict[k]['INPUT'] if not os.path.exists(chip_file): print('%s dose not exist. STOP' % (chip_file)) sys.exit() if not os.path.exists(input_file): print('%s dose not exist. STOP' % (input_file)) sys.exit() [getPeak(k) for k in condDict.keys()] npkFS = ['%s_peaks.narrowPeak' % k for k in condDict.keys()] peakSummary(npkFS) gene_lists = { } # a dictionary of form d = {'condition1': {'AT1G12345':'2.3', 'AT1G12346':'1.2'} }
def peakSummary(npkFS, gPar=None, dbg=0, FC=1.5, PVALUE=0.01, QVALUE=0.0005, maxDist=1500, DIR='test', ref=None, **kwargs): gPar = gPar or { "FC": FC, "PVALUE": PVALUE, "QVALUE": QVALUE, "PAIRWISE_COMPARE": "Y", "TARGET_RANGE": maxDist, "TITLE": "testRun" } os.system('mkdir -p %s' % DIR) os.chdir(DIR) cmd = ''' mkdir -p summary; mkdir -p summary/npeaks_vs_fc_npk; mkdir -p summary/npk; cp -r {infiles} -t . '''.format(infiles=' '.join(npkFS)) os.system(cmd) # f = functools.partial(process,gPar = gPar) f = lambda x: process(npkFile=x, gPar=gPar, dbg=dbg, ANNOTATION_FILE=ref) condRes = res = map(f, npkFS) if dbg: with open('condRes.dbg', 'w') as f: print >> f, pyutil.ppJson(condRes) if dbg == 2: return condRes dfs = [pd.read_table(x['file_bedmap']).set_index('hit') for x in res] # for df in dfs: # print df.head(10) # print [type(df) for df in dfs] indAll = pd.concat(dfs, axis=1, join='outer').index print '[db1]', dfs[0].head() df = pd.concat([df.reindex(indAll) for df in dfs], axis=0) df = df.reset_index() # df..reset_index() # df = df.set_index('hit') print '[db2]', df.head() cols = df.columns.to_series() cols[0] = 'index' df.columns = cols print '[db3]', df.head() df_fc = df.pivot_table(columns='condition', values='FC', index='index').fillna(0) sanitise = lambda x: x.split('.', 1)[0] df_fc.index = map(sanitise, df_fc.index) # index = scount.vstack([dfs],as_index=1,how = 'outer') getPM = lambda lst: ''.join(['+' if x != 0 else '-' for x in lst]) vals = df_fc.apply(getPM, axis=1) df_fc.insert(0, 'pmSummary', vals) print df_fc.head(10) df_fc.to_csv('FoldChange_table.csv') ##### write html summary report print('... make html page %s' % (SUMMARY_FILE_NAME)) f = open(SUMMARY_FILE_NAME, 'w') TITLE = 'test' # TITLE = gPar['TITLE'] f.write('<html>') f.write('<head>') f.write('<title>%s</title>' % (TITLE)) f.write( '<style> body {font-family:\"HelveticaNeue-Light\", \"Helvetica Neue Light\", \"Helvetica neue\"} </style>' ) f.write('</head>') f.write('<body>') f.write('<h2>%s</h2>' % (TITLE)) ##################################################################### f.write( '<h3>Target genes and (Not implemented "associated GO terms")</h3>') f.write('<p><a href=\"{0}\">[{0}]</a></p>'.format('FoldChange_table.csv')) table_string = '<table><tr><td>Sample</td><td>Target gene list</td><td>#target genes</td><td>GO enrichment</td><td>Peak selection thresholds</td><td>#peaks plot</td></tr>' rowFmt = ''' <tr><td>{key}</td> <td><a href="{glst_filename}">target genes</a></td> <td align=right>{nGene:d}</td> <td><a href="{goenrich_filename}">enrichment</a></td> <td><a href="{peak_file}">{param}</a></td> <td><a href="{plot_file}">plot</a></td></tr> ''' for d in res: table_string += rowFmt.format(**d) table_string += '</table>' f.write(table_string) # ##################################################################### # f.write('<h3>Enriched GO terms associated to target genes in different conditions</h3>') # f.write('<p>Most shared GO terms across conditions are on the top in the following table.</p>') # filename = make_goenrichment_diff([d['goenrich_filename'] for d in res], # SUMMARY_DIR) # f.write('<a href=\"%s\">Each row is a GO term. Each column is a condition.</a>' % (filename)) # ##################################################################### # # if gPar['PAIRWISE_COMPARE'].lower().startswith('y'): # # f.write('<h3>Pairwise comparison between conditions</h3>') # # f.write('<p>Each cell in the following table contains three numbers, X, Y and Z. X is the number of target genes that are in condition A but not in condition B. Z is the number of target genes that are in condition B but not in condition A. Y is the number of target genes that are in both conditions.</p>') # # make_comparison_table(f, gene_lists, agi2genename_dict, SUMMARY_DIR) # ##################################################################### # f.write('<h3>Binding to target genes in different conditions</h3>') # f.write('<p><b>Note:</b> In the following, \'+\' means binding near a target gene in a particular experimental condition, and \'-\' means non-binding.</p>') # f.write('<p>The columns are:<br/><br/>') # colName = ['AGI_locus_name'] + [d['key'] for d in res] + ['gene_name (if available)'] # f.write( '<br/>'.join(colName)) # f.write('</p>') f.write('</body>') f.write('</html>') f.close() # clean up print('Done.')
default=pyutil.os.environ.get('GTF', 'none') + '.cds') parser.add_argument('-c', '--cdsFile', default=None) parser.add_argument('-g', '--GSIZE', default=pyutil.os.environ.get('GSIZE', None)) parser.add_argument('-o', '--DIR', default=None) parser.add_argument('-j', '--NCORE', default=pyutil.os.environ.get(4, None), type=int) parser.add_argument('-s', '--center_summit', default=0, type=int) parser.add_argument('-d', '--debug', default=0, type=int) parser.add_argument('-f', '--figsize', default=[14, 14], type=int, nargs=2) # parser.add_argument('-y','--ylim', # default=[0., 10.], type=float,nargs=2) parser.add_argument('-y', '--ylim', default=None, type=float, nargs=2) defaults = {act.dest: act.default for act in parser._actions} for key in ['bedFile', 'bwFiles']: defaults.pop(key) main = pyutil.functools.partial(main, **defaults) # argparser if __name__ == '__main__': args = parser.parse_args() pars = vars(args) assert pars['bedFile'] is not None print(pyutil.ppJson(pars)) main(**pars) stderrLine('[Done]')