コード例 #1
0
 def job__saveFig(figs):
     dfig = pyutil.saveFigDict(figs, DIR='.', exts=exts, dpi=dpi)
     dfig['fignames'] = [x for x in dfig['fignames'] if x.endswith('.png')]
     buf = [pyutil.ppJson(dfig)]
     ofname = 'figures.json'
     pyutil.printlines(buf, ofname)
     return dfig
コード例 #2
0
ファイル: chipSummary.py プロジェクト: shouldsee/synoBio
def main(f, dbg=0, reCallPeak=0, gPar=None):
    global shellexec

    def shellexec(cmd, dbg=0):
        if dbg:
            print cmd
            res = 'dbg'
        else:
            res = subprocess.check_output(cmd, shell=1)
        return res

#     #############################################################################################

#     DEPENDENT_FILES_PATH        = '/media/pw_synology3/Software/chip-summary/'  # [path of chip-summary.py]
#     DEFAULT_TARGET_RANGE        = '3000' # [change]  a string, not a number
#     SUMMARY_FILE_NAME           = 'summary.html'
#     SUMMARY_DIR                 = 'summary'
#     PEAK_CALL_PIPELINE_TEMPLATE = os.path.join(DEPENDENT_FILES_PATH, 'depend/script/pipeline724-t.sh')
#     PEAK_SELECT_SCRIPT          = os.path.join(DEPENDENT_FILES_PATH, 'depend/script/select_peaks.py')
#     GENELOCUS_TO_GENENAME_SCRIPT= os.path.join(DEPENDENT_FILES_PATH, 'depend/script/genelocus2genename.py')

#     #### Slowest part to be refactored???
#     EXTRACT_AGI_CODE_AND_FC     = os.path.join(DEPENDENT_FILES_PATH, 'depend/script/extract_AGI_code_and_fold_change.py')
#     GO_ENRICHMENT_SCRIPT        = os.path.join(DEPENDENT_FILES_PATH, 'depend/script/fe.sh')  # install goatools (GO enrichment) and edit fe.sh
#     GO_ENRICHMENT_DIFF_SCRIPT   = os.path.join(DEPENDENT_FILES_PATH, 'depend/script/goterm-matrix.py')
#     AGI_TO_GENE_NAMES           = os.path.join(DEPENDENT_FILES_PATH, 'depend/data/AGI-to-gene-names.txt')
#     ANNOTATION_FILE             = os.path.join(DEPENDENT_FILES_PATH, 'depend/data/genesTAIR10.bed') # for bedmap
#     GENE_DESCRIPTION            = os.path.join(DEPENDENT_FILES_PATH, 'depend/data/gene_description_20140101.txt')
#     MAX_FOLD_CHANGE             = 10  # for number of peaks versus fold-change plot

#     #############################################################################################

    gPar = gPar or get_global_parameters(f)
    condDict = get_conditions(f, gPar)
    DIR = pyutil.dict2flat(gPar)
    #     os.system('mkdir -p ' + DIR); os.chdir(DIR)
    if dbg == 1:
        d = gPar, condDict
        for dd in d:
            print pyutil.ppJson(dd)

        return d


#     try:
    if 1:

        # Collect results
        os.system('mkdir -p %s' % SUMMARY_DIR)

        # make pipeline files for peak calling
        def getPeak(k):
            sname = make_peak_call_script(k, condDict,
                                          PEAK_CALL_PIPELINE_TEMPLATE)
            print('Run %s ...' % (sname))
            res = subprocess.call(['bash', sname])
            #             return '%s_peaks.narrowPeak'%k
            return res

        if reCallPeak:
            # check that every ChIP file is present
            for k in condDict.keys():
                chip_file = condDict[k]['CHIP']
                input_file = condDict[k]['INPUT']
                if not os.path.exists(chip_file):
                    print('%s dose not exist. STOP' % (chip_file))
                    sys.exit()
                if not os.path.exists(input_file):
                    print('%s dose not exist. STOP' % (input_file))
                    sys.exit()
            [getPeak(k) for k in condDict.keys()]

        npkFS = ['%s_peaks.narrowPeak' % k for k in condDict.keys()]
        peakSummary(npkFS)

        gene_lists = {
        }  # a dictionary of form d  = {'condition1': {'AT1G12345':'2.3', 'AT1G12346':'1.2'} }
コード例 #3
0
ファイル: chipSummary.py プロジェクト: shouldsee/synoBio
def peakSummary(npkFS,
                gPar=None,
                dbg=0,
                FC=1.5,
                PVALUE=0.01,
                QVALUE=0.0005,
                maxDist=1500,
                DIR='test',
                ref=None,
                **kwargs):
    gPar = gPar or {
        "FC": FC,
        "PVALUE": PVALUE,
        "QVALUE": QVALUE,
        "PAIRWISE_COMPARE": "Y",
        "TARGET_RANGE": maxDist,
        "TITLE": "testRun"
    }

    os.system('mkdir -p %s' % DIR)
    os.chdir(DIR)
    cmd = '''
mkdir -p summary; 
mkdir -p summary/npeaks_vs_fc_npk;
mkdir -p summary/npk;
cp -r {infiles} -t .
'''.format(infiles=' '.join(npkFS))
    os.system(cmd)

    #     f = functools.partial(process,gPar = gPar)
    f = lambda x: process(npkFile=x, gPar=gPar, dbg=dbg, ANNOTATION_FILE=ref)
    condRes = res = map(f, npkFS)
    if dbg:
        with open('condRes.dbg', 'w') as f:
            print >> f, pyutil.ppJson(condRes)
    if dbg == 2:
        return condRes

    dfs = [pd.read_table(x['file_bedmap']).set_index('hit') for x in res]
    #     for df in dfs:
    #         print df.head(10)
    #     print [type(df) for df in dfs]
    indAll = pd.concat(dfs, axis=1, join='outer').index
    print '[db1]', dfs[0].head()

    df = pd.concat([df.reindex(indAll) for df in dfs], axis=0)
    df = df.reset_index()
    #     df..reset_index()
    #     df = df.set_index('hit')

    print '[db2]', df.head()

    cols = df.columns.to_series()
    cols[0] = 'index'
    df.columns = cols
    print '[db3]', df.head()
    df_fc = df.pivot_table(columns='condition', values='FC',
                           index='index').fillna(0)

    sanitise = lambda x: x.split('.', 1)[0]
    df_fc.index = map(sanitise, df_fc.index)

    #     index = scount.vstack([dfs],as_index=1,how = 'outer')
    getPM = lambda lst: ''.join(['+' if x != 0 else '-' for x in lst])
    vals = df_fc.apply(getPM, axis=1)
    df_fc.insert(0, 'pmSummary', vals)
    print df_fc.head(10)
    df_fc.to_csv('FoldChange_table.csv')

    ##### write html summary report

    print('... make html page %s' % (SUMMARY_FILE_NAME))

    f = open(SUMMARY_FILE_NAME, 'w')
    TITLE = 'test'
    #     TITLE = gPar['TITLE']
    f.write('<html>')
    f.write('<head>')
    f.write('<title>%s</title>' % (TITLE))
    f.write(
        '<style> body {font-family:\"HelveticaNeue-Light\", \"Helvetica Neue Light\", \"Helvetica neue\"} </style>'
    )
    f.write('</head>')
    f.write('<body>')
    f.write('<h2>%s</h2>' % (TITLE))

    #####################################################################
    f.write(
        '<h3>Target genes and (Not implemented "associated GO terms")</h3>')
    f.write('<p><a href=\"{0}\">[{0}]</a></p>'.format('FoldChange_table.csv'))
    table_string = '<table><tr><td>Sample</td><td>Target gene list</td><td>#target genes</td><td>GO enrichment</td><td>Peak selection thresholds</td><td>#peaks plot</td></tr>'
    rowFmt = '''
        <tr><td>{key}</td>
        <td><a href="{glst_filename}">target genes</a></td>
        <td align=right>{nGene:d}</td>
        <td><a href="{goenrich_filename}">enrichment</a></td>
        <td><a href="{peak_file}">{param}</a></td>
        <td><a href="{plot_file}">plot</a></td></tr>
        '''
    for d in res:
        table_string += rowFmt.format(**d)

    table_string += '</table>'
    f.write(table_string)

    #     #####################################################################
    #     f.write('<h3>Enriched GO terms associated to target genes in different conditions</h3>')
    #     f.write('<p>Most shared GO terms across conditions are on the top in the following table.</p>')
    #     filename = make_goenrichment_diff([d['goenrich_filename'] for d in res],
    #                                       SUMMARY_DIR)
    #     f.write('<a href=\"%s\">Each row is a GO term. Each column is a condition.</a>' % (filename))

    #     #####################################################################
    # #     if gPar['PAIRWISE_COMPARE'].lower().startswith('y'):
    # #         f.write('<h3>Pairwise comparison between conditions</h3>')
    # #         f.write('<p>Each cell in the following table contains three numbers, X, Y and Z. X is the number of target genes that are in condition A but not in condition B. Z is the number of target genes that are in condition B but not in condition A.  Y is the number of target genes that are in both conditions.</p>')
    # #         make_comparison_table(f, gene_lists, agi2genename_dict, SUMMARY_DIR)

    #     #####################################################################

    #     f.write('<h3>Binding to target genes in different conditions</h3>')
    #     f.write('<p><b>Note:</b> In the following, \'+\' means binding near a target gene in a particular experimental condition, and \'-\' means non-binding.</p>')
    #     f.write('<p>The columns are:<br/><br/>')
    #     colName = ['AGI_locus_name'] + [d['key'] for d in res] + ['gene_name (if available)']
    #     f.write( '<br/>'.join(colName))
    #     f.write('</p>')

    f.write('</body>')
    f.write('</html>')
    f.close()

    # clean up
    print('Done.')
コード例 #4
0
                    default=pyutil.os.environ.get('GTF', 'none') + '.cds')
parser.add_argument('-c', '--cdsFile', default=None)

parser.add_argument('-g',
                    '--GSIZE',
                    default=pyutil.os.environ.get('GSIZE', None))
parser.add_argument('-o', '--DIR', default=None)
parser.add_argument('-j',
                    '--NCORE',
                    default=pyutil.os.environ.get(4, None),
                    type=int)
parser.add_argument('-s', '--center_summit', default=0, type=int)
parser.add_argument('-d', '--debug', default=0, type=int)
parser.add_argument('-f', '--figsize', default=[14, 14], type=int, nargs=2)
# parser.add_argument('-y','--ylim',
#                     default=[0., 10.], type=float,nargs=2)
parser.add_argument('-y', '--ylim', default=None, type=float, nargs=2)

defaults = {act.dest: act.default for act in parser._actions}
for key in ['bedFile', 'bwFiles']:
    defaults.pop(key)

main = pyutil.functools.partial(main, **defaults)
# argparser
if __name__ == '__main__':
    args = parser.parse_args()
    pars = vars(args)
    assert pars['bedFile'] is not None
    print(pyutil.ppJson(pars))
    main(**pars)
    stderrLine('[Done]')