def sample_get_rawfile_detail(sample): from pymisca.events import LinkEvent from pymisca.ext import f as _f node = pyext.file__asModule('/home/feng/envs/0726-polyq/src/validate_fastq.py') node.rawMeta = rawMeta() node.DATA_ACC = sample['data_acc'] node.WORKDIR = WORKDIR() # pyext.path.Path('/home/feng/envs/0726-polyq/WORKDIR.submit/').realpath() # node.WORKDIR = pyext.path.Path('/home/feng/envs/0830-polyq/WORKDIR/').realpath() node.valid_fastq() sample.rawfile_nodes = nodes = node.combined_valid_fastq()['OUTPUT_NODES'] sample.rawfile_files_orig = [x['OUTPUT_FILE'] for x in nodes] # sample.rawfile_files = [x['OUTPUT_FILE'].relpath(WORKDIR()) for x in nodes] #### Relinking because GEO needs a flat directory tree sample.rawfile_files = [ LinkEvent( x['OUTPUT_FILE'], WORKDIR()/"ftp"/_f('{sample.data_acc}.{x["OUTPUT_FILE"].basename()}'), 1,).dest.relpath(WORKDIR()/"ftp") for x in nodes] # print nodes[0]._data.keys() sample.rawfile_checksums = [x['FILE_MD5']['MD5_HEX'] for x in nodes] sample.rawfile_readlengths = [ '75' for x in nodes] sample.rawfile_is_paired = 'paired-end' if len(nodes) > 1 else 'single' template = u''' !Sample_raw_file_name = {{','.join(sample.rawfile_files)}} !Sample_raw_file_type = fastq !Sample_raw_file_checksum = {{','.join(sample.rawfile_checksums)}} !Sample_raw_file_read_length = {{','.join(sample.rawfile_readlengths)}} !Sample_raw_file_single_or_paired-end = {{sample.rawfile_is_paired}} !Sample_raw_file_instrument_model = NextSeq 500 ''' return pyext.jf2(template)
from pymisca.util import colGroupMean pyext.colGroupMean = colGroupMean import pymisca.vis_util as pyvis import src.util as _util from util import _get_file import synotil.dio import synotil.qcplots import sys SRC_DIR = os.path.dirname(__file__) # pyext.os.chdir('/home/feng/envs/0726-polyq/') # ns = pyext.file__asModule('/home/feng/envs/0726-polyq/src/get_meta_soft.py') ns = pyext.file__asModule(SRC_DIR + '/get_meta_soft.py') meta = ns.df_mappedData_chipseq() # DATA_ACC = "189CS11" # npkFile = meta.loc["189CS10",'narrowPeak'] # npkFile = meta.loc["189CS10",'narrowPeak'] inputs = [] def _get_file(fn): fn = _util._get_file(fn) inputs.append(fn) return fn outputs = []
# -*- coding: utf-8 -*- import pymisca.ext as pyext ns = pyext.file__asModule('/home/feng/envs/0726-polyq/src/get_meta_soft.py') WORKDIR = ns.WORKDIR # OUTDIR = WORKDIR() / "final_soft" OUTDIR = WORKDIR() / "get_soft_text" pyext.real__dir(dirname=OUTDIR) _samples = ns.sample_init_full() for sample in _samples: print '[template_finalise]', sample['data_acc'], '...' try: ns.sample_template_find_curated(sample) # continue ns.sample_template_finalise(sample) res = sample['template_final'] res = '\n'.join([x.strip() for x in res.splitlines()]) sample.soft_text = res pyext.printlines([sample.soft_text], OUTDIR / pyext.f("{sample.data_acc}.autofilled.soft.txt")) except Exception as e: print('FAILED') print(str(e)) template = u''' ^SERIES = 0829-polyq !Series_title = RNA-Seq and ChIP-Seq profiling of ELF3, an prion-like domain-containig in ELF3 that functions as a thermosensor in Arabidopsis. !Series_summary = Temperature is a major environmental variable governing plant growth and
# mcurr = _readData( # '/home/feng/envs/upGeo/results/0424-database-raw/mcurr.csv', # guess_index=0) # mcurr = mcurr.loc[~mcurr['FULL_PATH'].str.contains("Raw_data/184R_Q_reseq181_combined")] # mcurr = mcurr.loc[~mcurr["FULL_PATH"].isin(['/home/feng/writable/teampw/__backup/syno3/raw-data/PW_HiSeq_data/RNA-seq/Raw_data/184R_Q_reseq181_combined/181RESEQ_Q_870/181R-Q-870-4196-ZT12-27C-adult_S22_L005_R1_001.fastq.gz'], # ['/home/feng/writable/teampw/__backup/syno3/raw-data/PW_HiSeq_data/RNA-seq/Raw_data/184R_Q_reseq181_combined/181RESEQ_Q_870/181R-Q-870-4196-ZT12-27C-adult_S22_L006_R1_001.fastq.gz'], # ['/home/feng/writable/teampw/__backup/syno3/raw-data/PW_HiSeq_data/RNA-seq/Raw_data/184R_Q_reseq181_combined/181RESEQ_Q_870/181R-Q-870-4196-ZT12-27C-adult_S22_L007_R1_001.fastq.gz'], # ['/home/feng/writable/teampw/__backup/syno3/raw-data/PW_HiSeq_data/RNA-seq/Raw_data/184R_Q_reseq181_combined/181RESEQ_Q_870/181R-Q-870-4196-ZT12-27C-adult_S22_L008_R1_001.fastq.gz'] # )] # rawMeta = mcurr mappedMeta = _readData( '/home/feng/work/results/0407-database-mapped/mcurr.csv', guess_index=0) for DATA_ACC in DATA_ACC_RNASEQ() + DATA_ACC_CHIPSEQ(): print DATA_ACC node = pyext.file__asModule( '/home/feng/envs/0726-polyq/src/validate_fastq.py') node.rawMeta = rawMeta() node.DATA_ACC = DATA_ACC # '150RS1' node.WORKDIR = pyext.path.Path( '/home/feng/envs/0726-polyq/WORKDIR.submit/').realpath() # node.WORKDIR = pyext.path.Path('/home/feng/envs/0830-polyq/WORKDIR/').realpath() node.valid_fastq() node.combined_valid_fastq() # break print("[DONE]")
# -*- coding: utf-8 -*- import pymisca.ext as pyext ns = pyext.file__asModule('./get_meta_soft.py') # dfc = node.df_figureRegistry() # import pymisca.ext as pyext # node = pyext.file__asModule('src/get_meta_soft.py') # node.WORKDIR = lambda: pyext.path.Path('./WORKDIR.submit/').realpath() ns.get_soft_text() pyext.MDFile('./WORKDIR.submit/get_soft_text.tar.gz') WORKDIR = ns.WORKDIR # OUTDIR = WORKDIR() / "final_soft" OUTDIR = WORKDIR() / "get_soft_text" (WORKDIR() / 'ftp').makedirs_p() pyext.real__dir(dirname=OUTDIR) _samples = ns.sample_init_full() for sample in _samples: print '[template_finalise]', sample['data_acc'], '...' # try: if 1: ns.sample_template_find_curated(sample) # continue ns.sample_template_finalise(sample) res = sample['template_final'] res = '\n'.join([x.strip() for x in res.splitlines()]) sample.soft_text = res pyext.printlines([sample.soft_text], OUTDIR / pyext.f("{sample.data_acc}.autofilled.soft.txt"))