def main(): os.chdir('./bin') dirc = sys.argv[1] cfg = load_mirquant_config_file(sys.argv[3]) name = os.path.basename(dirc.split('./IntermediateFiles/')[0]) mirquant_output = sample_output_paths(cfg['paths']['output'], name) logName = '{}_collectRes.log'.format(os.path.basename(dirc)) initiate_logging('{}/collect_results_logs/'.format(mirquant_output['log']), logName) bt = '{}.results'.format(sys.argv[1]) res_li = resource_paths(cfg['parameters']['species'], cfg['paths'], cfg['parameters']) mir_file, TRNAfile = res_li[1], res_li[5] outDir = os.path.dirname(bt) filesLib = glob.glob('{}/../*LIB.fa'.format(os.path.dirname(dirc)))[0] print_run_info(dirc, bt, cfg['parameters']['species'], outDir, filesLib) btWins, EM = result_file_dict(bt, dirc) mirs = get_miR_info(mir_file) counters = initialize_counters() bedFile = [] for res in sorted(btWins): bedFile, counters = mainChunk(res, counters, bedFile, dirc, EM, TRNAfile, mirs, outDir, mirquant_output) write_summary_to_log(counters) write_shrimp_results_bed(sys.argv[1], bedFile) os.system('rm {}'.format(sys.argv[2]))
def main(arg): start = time.time() os.chdir('./bin') cfg = load_mirquant_config_file(arg.conf) scfg = load_sys_config_file(arg.conf) job = build_job(scfg['job']) t_job = build_job(scfg['job_threaded']) dr, dr_i, fi_base = set_up_output_folder(arg.sample, cfg['paths']['output']) out_di = sample_output_paths(cfg['paths']['output'], fi_base) initiate_logging(out_di['log'], 'chainSubmission.log') res_li = resource_paths(cfg['parameters']['species'], cfg['paths'], cfg['parameters']) tRNA, tmRNA, BI = res_li[4], res_li[3], res_li[7] define_input_varibles(cfg) lib = set_lib(dr_i, fi_base) MINrna, MAXrna = cutadapt_cmd(arg.sample, lib, cfg['cutadapt'], out_di['log'], arg.conf) logging.info('cutadapt = {}'.format(time.time() - start)) separate_by_read_length(MINrna, MAXrna, lib, out_di['output']) for length in range(MINrna, MAXrna + 1): fi = '{}_{}.fq'.format(lib, length) bowtie(fi, length, BI, cfg['bowtie']) logging.info('bowtie = {}'.format(time.time() - start)) window_creation(MINrna, MAXrna, lib, BI, tRNA, tmRNA) logging.info('window generation = {}'.format(time.time() - start)) mapping_statistics(arg.sample, lib, dr, fi_base) run_shrimp_alignment(MINrna, MAXrna, lib, out_di['log'], out_di['temp'], t_job, arg.conf) bt_postProcEM.main('{}_merge.bed'.format(lib), '{}_allGS.bed'.format(lib), out_di['temp']) reduce_shrimp_res(out_di['temp'], dr_i, job) logging.info('finish = {}'.format(time.time() - start))
def main(arg): os.chdir('./bin') cfg = load_mirquant_config_file(arg.conf) scfg = load_sys_config_file(arg.conf) job = build_job(scfg['job']) dr, dr_i, fi_base = set_up_output_folder(arg.sample, cfg['paths']['output']) out_di = sample_output_paths(cfg['paths']['output'], fi_base) initiate_logging(out_di['log'], 'chainSubmission.log') res_li = resource_paths(cfg['parameters']['species'], cfg['paths'], cfg['parameters']) tRNA, tmRNA, BI = res_li[4], res_li[3], res_li[7] define_input_varibles(cfg) generate_adapter_files.main(arg.sample, out_di['log'], arg.conf) MAXrna = get_maxRNA_length(arg.sample, cfg['cutadapt']) lib = set_lib(dr_i, fi_base) MINrna = cutadapt_cmd(arg.sample, lib, cfg['cutadapt']) separate_by_read_length(MINrna, MAXrna, lib, out_di['output']) for length in range(MINrna, MAXrna + 1): fi = '{}_{}.fq'.format(lib, length) bowtie(fi, length, BI, cfg['bowtie']) window_creation(MINrna, MAXrna, lib, BI, tRNA, tmRNA) mapping_statistics(arg.sample, lib, dr, fi_base) run_shrimp_alignment(MINrna, MAXrna, lib, out_di['log'], out_di['temp'], job, arg.conf) bt_postProcEM.main('{}_merge.bed'.format(lib), '{}_allGS.bed'.format(lib), out_di['temp']) reduce_shrimp_res(out_di['temp'], dr_i, job)
def main(file, log_dir, conf): check_input() cfg = load_mirquant_config_file(conf) dirc, name, need_adapt = check_for_adapter_file(file) if need_adapt == True: if not check_for_barcode_file(dirc, name, cfg): barcode = scan_fastq_for_barcode(file, name, log_dir) adapter = create_adapter(barcode, cfg['cutadapt']['adapter']) write_adapter_file(dirc, name, adapter)
def main(conf): os.chdir('./bin') cfg = load_mirquant_config_file(conf) samples = return_sample_results_directories(cfg['paths']['project']) for sample in samples: samp_name = os.path.basename(sample[:-1]) out_di = sample_output_paths(cfg['paths']['output'], samp_name) run_summary2Tab_clust(cfg['paths'], cfg['parameters']['species'], sample, conf) move_files_to_out_dir(out_di, sample, samp_name) write_summary_table(sample)
def main(size, lib, base, conf, log_dir): cfg = load_mirquant_config_file(conf) initiate_logging(log_dir, "SHRiMP_{}.log".format(size)) reads = "{}{}.noHit".format(base, size) working_dir = os.getcwd() make_out_dir(lib, size) seedList = make_base_seed(size) group_Name = shrimp_submission("pypath", cfg["shrimp"], seedList, lib, log_dir, reads) shrimp_postProcGS.main(conf, os.getcwd())
def main(size, lib, base, conf, log_dir): cfg = load_mirquant_config_file(conf) initiate_logging(log_dir, 'SHRiMP_{}.log'.format(size)) reads = '{}{}.noHit'.format(base, size) working_dir = os.getcwd() make_out_dir(lib, size) seedList = make_base_seed(size) group_Name = shrimp_submission('pypath', cfg['shrimp'], seedList, lib, log_dir, reads) shrimp_postProcGS.main(conf, os.getcwd())
def main(conf): os.chdir('./bin') cfg = load_mirquant_config_file(conf) samples = return_sample_results_directories(cfg['paths']['project']) for sample in samples: print '\nProcessing sample {}...'.format(sample) samp_name = os.path.basename(sample[:-1]) out_di = sample_output_paths(cfg['paths']['output'], samp_name) run_summary2Tab_clust(cfg['paths'], cfg['parameters']['species'], sample, conf) move_files_to_out_dir(out_di, sample, samp_name) write_summary_table(sample)
def main(conf, shrimp_dir): check_input() cfg = load_mirquant_config_file(conf) res_li = resource_paths(cfg['parameters']['species'], cfg['paths'], cfg['parameters']) sample = os.path.basename(shrimp_dir.split('./IntermediateFiles/')[0]) out_di = sample_output_paths(cfg['paths']['output'], sample) logging.info('\n\n### Processing SHRiMP results ###\n') mir_fi = res_li[1] mirList, mirStrand = load_mir_info(mir_fi) tagCount, hits, maps = load_SHRiMP_res(shrimp_dir) hits, tags, pCount = get_best_alignments(hits, maps) write_processed_shrimp_output(hits, tags, mirList, mirStrand, shrimp_dir) remove_temp_file(os.path.basename(shrimp_dir).split('_')[1], out_di['temp']) logging.info('Total SHRiMP alignments = {}; proportional count = {}'.format(tagCount, pCount))
def main(conf, shrimp_dir): print 'Shrimp post-processing' check_input() cfg = load_mirquant_config_file(conf) res_li = resource_paths(cfg['parameters']['species'], cfg['paths'], cfg['parameters']) sample = os.path.basename(shrimp_dir.split('./IntermediateFiles/')[0]) out_di = sample_output_paths(cfg['paths']['output'], sample) logging.info('\n\n### Processing SHRiMP results ###\n') mir_fi = res_li[1] mirList, mirStrand = load_mir_info(mir_fi) tagCount, hits, maps = load_SHRiMP_res(shrimp_dir) hits, tags, pCount = get_best_alignments(hits, maps) write_processed_shrimp_output(hits, tags, mirList, mirStrand, shrimp_dir) remove_temp_file(os.path.basename(shrimp_dir).split('_')[1], out_di['temp']) logging.info('Total SHRiMP alignments = {}; proportional count = {}'.format(tagCount, pCount))
dictionary. Add the all the counts together in a total counts dictionary. Add the counts to the counts dictionary as well. ''' for item in res_line[1:]: L, V = item.split(':') if N not in features: features[N] = {} features[N][L] = V try: tot[L] += float(features[N][L]) except KeyError: tot[L] = float(features[N][L]) return features, tot cfg = load_mirquant_config_file(sys.argv[2]) res = sys.argv[1] SPEC = cfg['parameters']['species'] res_li = resource_paths(SPEC, cfg['paths'], cfg['parameters']) genome, mmuFile, tRNAFile, refAnn = res_li[0], res_li[2], res_li[4], res_li[6] CODE = SPEC bedInfo = mirANDtrna_to_bed(mmuFile, tRNAFile) expression = {} baseExp = {}
''' for item in res_line[1:]: L, V = item.split(':') if N not in features: features[N] = {} features[N][L] = V try: tot[L] += float(features[N][L]) except KeyError: tot[L] = float(features[N][L]) return features, tot res = sys.argv[1] cfg = load_mirquant_config_file(sys.argv[2]) species = cfg['parameters']['species'] res_li = resource_paths(species, cfg['paths'], cfg['parameters']) genome, mmuFile, tRNAFile, refAnn = res_li[0], res_li[2], res_li[4], res_li[6] bedInfo = mirANDtrna_to_bed(mmuFile, tRNAFile) expression = {} baseExp = {} baseKeys = {} bedLine = {} features = {} Novelarr = {} tot = {} tmp = res tfile_name = 'mktemp'