Exemple #1
0
def sample_get_rawfile_detail(sample):
    from pymisca.events import LinkEvent
    from pymisca.ext import f as _f
    node = pyext.file__asModule('/home/feng/envs/0726-polyq/src/validate_fastq.py')
    node.rawMeta = rawMeta()
    node.DATA_ACC = sample['data_acc']
    node.WORKDIR = WORKDIR()
#     pyext.path.Path('/home/feng/envs/0726-polyq/WORKDIR.submit/').realpath()
#         node.WORKDIR = pyext.path.Path('/home/feng/envs/0830-polyq/WORKDIR/').realpath()
    node.valid_fastq()
    sample.rawfile_nodes = nodes = node.combined_valid_fastq()['OUTPUT_NODES']
    sample.rawfile_files_orig = [x['OUTPUT_FILE'] for x in nodes]
#     sample.rawfile_files = [x['OUTPUT_FILE'].relpath(WORKDIR()) for x in nodes]

    #### Relinking because GEO needs a flat directory tree
    sample.rawfile_files = [
        LinkEvent(
            x['OUTPUT_FILE'],
            WORKDIR()/"ftp"/_f('{sample.data_acc}.{x["OUTPUT_FILE"].basename()}'),
            1,).dest.relpath(WORKDIR()/"ftp") for x in nodes]
    
#     print nodes[0]._data.keys()
    sample.rawfile_checksums = [x['FILE_MD5']['MD5_HEX'] for x in nodes]
    sample.rawfile_readlengths = [ '75' for x in nodes]
    sample.rawfile_is_paired = 'paired-end' if len(nodes) > 1 else 'single'
    template = u'''
!Sample_raw_file_name = {{','.join(sample.rawfile_files)}}
!Sample_raw_file_type = fastq
!Sample_raw_file_checksum = {{','.join(sample.rawfile_checksums)}}
!Sample_raw_file_read_length = {{','.join(sample.rawfile_readlengths)}}
!Sample_raw_file_single_or_paired-end = {{sample.rawfile_is_paired}}
!Sample_raw_file_instrument_model = NextSeq 500
'''
    
    return pyext.jf2(template)
from pymisca.util import colGroupMean
pyext.colGroupMean = colGroupMean
import pymisca.vis_util as pyvis

import src.util as _util
from util import _get_file
import synotil.dio
import synotil.qcplots

import sys

SRC_DIR = os.path.dirname(__file__)
# pyext.os.chdir('/home/feng/envs/0726-polyq/')
# ns = pyext.file__asModule('/home/feng/envs/0726-polyq/src/get_meta_soft.py')
ns = pyext.file__asModule(SRC_DIR + '/get_meta_soft.py')
meta = ns.df_mappedData_chipseq()
# DATA_ACC = "189CS11"
# npkFile = meta.loc["189CS10",'narrowPeak']
# npkFile = meta.loc["189CS10",'narrowPeak']

inputs = []


def _get_file(fn):
    fn = _util._get_file(fn)
    inputs.append(fn)
    return fn


outputs = []
# -*- coding: utf-8 -*-
import pymisca.ext as pyext
ns = pyext.file__asModule('/home/feng/envs/0726-polyq/src/get_meta_soft.py')

WORKDIR = ns.WORKDIR
# OUTDIR = WORKDIR() / "final_soft"
OUTDIR = WORKDIR() / "get_soft_text"

pyext.real__dir(dirname=OUTDIR)
_samples = ns.sample_init_full()

for sample in _samples:
    print '[template_finalise]', sample['data_acc'], '...'
    try:
        ns.sample_template_find_curated(sample)
        #     continue
        ns.sample_template_finalise(sample)
        res = sample['template_final']
        res = '\n'.join([x.strip() for x in res.splitlines()])
        sample.soft_text = res
        pyext.printlines([sample.soft_text], OUTDIR /
                         pyext.f("{sample.data_acc}.autofilled.soft.txt"))
    except Exception as e:
        print('FAILED')
        print(str(e))

template = u'''
^SERIES = 0829-polyq
!Series_title = RNA-Seq and ChIP-Seq profiling of ELF3, an prion-like domain-containig in ELF3 that functions as a
thermosensor in Arabidopsis.
!Series_summary = Temperature is a major environmental variable governing plant growth and
    #     mcurr = _readData(
    #         '/home/feng/envs/upGeo/results/0424-database-raw/mcurr.csv',
    #         guess_index=0)
    #     mcurr = mcurr.loc[~mcurr['FULL_PATH'].str.contains("Raw_data/184R_Q_reseq181_combined")]

    #     mcurr = mcurr.loc[~mcurr["FULL_PATH"].isin(['/home/feng/writable/teampw/__backup/syno3/raw-data/PW_HiSeq_data/RNA-seq/Raw_data/184R_Q_reseq181_combined/181RESEQ_Q_870/181R-Q-870-4196-ZT12-27C-adult_S22_L005_R1_001.fastq.gz'],
    #        ['/home/feng/writable/teampw/__backup/syno3/raw-data/PW_HiSeq_data/RNA-seq/Raw_data/184R_Q_reseq181_combined/181RESEQ_Q_870/181R-Q-870-4196-ZT12-27C-adult_S22_L006_R1_001.fastq.gz'],
    #        ['/home/feng/writable/teampw/__backup/syno3/raw-data/PW_HiSeq_data/RNA-seq/Raw_data/184R_Q_reseq181_combined/181RESEQ_Q_870/181R-Q-870-4196-ZT12-27C-adult_S22_L007_R1_001.fastq.gz'],
    #        ['/home/feng/writable/teampw/__backup/syno3/raw-data/PW_HiSeq_data/RNA-seq/Raw_data/184R_Q_reseq181_combined/181RESEQ_Q_870/181R-Q-870-4196-ZT12-27C-adult_S22_L008_R1_001.fastq.gz']
    #                                              )]
    #     rawMeta = mcurr

    mappedMeta = _readData(
        '/home/feng/work/results/0407-database-mapped/mcurr.csv',
        guess_index=0)
    for DATA_ACC in DATA_ACC_RNASEQ() + DATA_ACC_CHIPSEQ():
        print DATA_ACC
        node = pyext.file__asModule(
            '/home/feng/envs/0726-polyq/src/validate_fastq.py')
        node.rawMeta = rawMeta()
        node.DATA_ACC = DATA_ACC
        #     '150RS1'
        node.WORKDIR = pyext.path.Path(
            '/home/feng/envs/0726-polyq/WORKDIR.submit/').realpath()
        #         node.WORKDIR = pyext.path.Path('/home/feng/envs/0830-polyq/WORKDIR/').realpath()
        node.valid_fastq()
        node.combined_valid_fastq()
#         break

    print("[DONE]")
# -*- coding: utf-8 -*-
import pymisca.ext as pyext
ns = pyext.file__asModule('./get_meta_soft.py')

# dfc = node.df_figureRegistry()

# import pymisca.ext as pyext
# node = pyext.file__asModule('src/get_meta_soft.py')
# node.WORKDIR = lambda: pyext.path.Path('./WORKDIR.submit/').realpath()
ns.get_soft_text()
pyext.MDFile('./WORKDIR.submit/get_soft_text.tar.gz')

WORKDIR = ns.WORKDIR
# OUTDIR = WORKDIR() / "final_soft"
OUTDIR = WORKDIR() / "get_soft_text"
(WORKDIR() / 'ftp').makedirs_p()
pyext.real__dir(dirname=OUTDIR)
_samples = ns.sample_init_full()

for sample in _samples:
    print '[template_finalise]', sample['data_acc'], '...'
    # try:
    if 1:
        ns.sample_template_find_curated(sample)
        #     continue
        ns.sample_template_finalise(sample)
        res = sample['template_final']
        res = '\n'.join([x.strip() for x in res.splitlines()])
        sample.soft_text = res
        pyext.printlines([sample.soft_text], OUTDIR /
                         pyext.f("{sample.data_acc}.autofilled.soft.txt"))