Example #1
0
def main():
    os.chdir('./bin')
    dirc = sys.argv[1]
    cfg = load_mirquant_config_file(sys.argv[3])
    name = os.path.basename(dirc.split('./IntermediateFiles/')[0])
    mirquant_output = sample_output_paths(cfg['paths']['output'], name)
    logName = '{}_collectRes.log'.format(os.path.basename(dirc))
    initiate_logging('{}/collect_results_logs/'.format(mirquant_output['log']),
                     logName)
    bt = '{}.results'.format(sys.argv[1])
    res_li = resource_paths(cfg['parameters']['species'], cfg['paths'],
                            cfg['parameters'])
    mir_file, TRNAfile = res_li[1], res_li[5]
    outDir = os.path.dirname(bt)
    filesLib = glob.glob('{}/../*LIB.fa'.format(os.path.dirname(dirc)))[0]
    print_run_info(dirc, bt, cfg['parameters']['species'], outDir, filesLib)
    btWins, EM = result_file_dict(bt, dirc)
    mirs = get_miR_info(mir_file)
    counters = initialize_counters()

    bedFile = []
    for res in sorted(btWins):
        bedFile, counters = mainChunk(res, counters, bedFile, dirc, EM,
                                      TRNAfile, mirs, outDir, mirquant_output)

    write_summary_to_log(counters)
    write_shrimp_results_bed(sys.argv[1], bedFile)
    os.system('rm {}'.format(sys.argv[2]))
def main(arg):
    start = time.time()
    os.chdir('./bin')
    cfg = load_mirquant_config_file(arg.conf)
    scfg = load_sys_config_file(arg.conf)
    job = build_job(scfg['job'])
    t_job = build_job(scfg['job_threaded'])

    dr, dr_i, fi_base = set_up_output_folder(arg.sample,
                                             cfg['paths']['output'])
    out_di = sample_output_paths(cfg['paths']['output'], fi_base)
    initiate_logging(out_di['log'], 'chainSubmission.log')
    res_li = resource_paths(cfg['parameters']['species'], cfg['paths'],
                            cfg['parameters'])
    tRNA, tmRNA, BI = res_li[4], res_li[3], res_li[7]
    define_input_varibles(cfg)

    lib = set_lib(dr_i, fi_base)
    MINrna, MAXrna = cutadapt_cmd(arg.sample, lib, cfg['cutadapt'],
                                  out_di['log'], arg.conf)
    logging.info('cutadapt = {}'.format(time.time() - start))
    separate_by_read_length(MINrna, MAXrna, lib, out_di['output'])
    for length in range(MINrna, MAXrna + 1):
        fi = '{}_{}.fq'.format(lib, length)
        bowtie(fi, length, BI, cfg['bowtie'])
    logging.info('bowtie = {}'.format(time.time() - start))
    window_creation(MINrna, MAXrna, lib, BI, tRNA, tmRNA)
    logging.info('window generation = {}'.format(time.time() - start))
    mapping_statistics(arg.sample, lib, dr, fi_base)
    run_shrimp_alignment(MINrna, MAXrna, lib, out_di['log'], out_di['temp'],
                         t_job, arg.conf)
    bt_postProcEM.main('{}_merge.bed'.format(lib), '{}_allGS.bed'.format(lib),
                       out_di['temp'])
    reduce_shrimp_res(out_di['temp'], dr_i, job)
    logging.info('finish = {}'.format(time.time() - start))
def main():
    os.chdir('./bin') 
    dirc = sys.argv[1]
    cfg = load_mirquant_config_file(sys.argv[3])
    name = os.path.basename(dirc.split('./IntermediateFiles/')[0])
    mirquant_output = sample_output_paths(cfg['paths']['output'], name)
    logName  = '{}_collectRes.log'.format(os.path.basename(dirc))
    initiate_logging('{}/collect_results_logs/'.format(mirquant_output['log']), logName)
    bt = '{}.results'.format(sys.argv[1])
    res_li = resource_paths(cfg['parameters']['species'], cfg['paths'], cfg['parameters'])
    mir_file, TRNAfile = res_li[1], res_li[5]
    outDir = os.path.dirname(bt)
    filesLib = glob.glob('{}/../*LIB.fa'.format(os.path.dirname(dirc)))[0] 
    print_run_info(dirc, bt, cfg['parameters']['species'], outDir, filesLib)
    btWins, EM = result_file_dict(bt, dirc)
    mirs =  get_miR_info(mir_file)
    counters = initialize_counters()

    bedFile = [] 
    for res in sorted(btWins):
        bedFile, counters = mainChunk(res, counters, bedFile, dirc, EM, TRNAfile, mirs, outDir, mirquant_output)

    write_summary_to_log(counters)
    write_shrimp_results_bed(sys.argv[1], bedFile)
    os.system('rm {}'.format(sys.argv[2])) 
def main(arg):
    os.chdir('./bin')
    cfg = load_mirquant_config_file(arg.conf)
    scfg = load_sys_config_file(arg.conf)
    job = build_job(scfg['job'])

    dr, dr_i, fi_base = set_up_output_folder(arg.sample, cfg['paths']['output'])
    out_di = sample_output_paths(cfg['paths']['output'], fi_base)
    initiate_logging(out_di['log'], 'chainSubmission.log')
    res_li = resource_paths(cfg['parameters']['species'], cfg['paths'], cfg['parameters'])
    tRNA, tmRNA, BI = res_li[4], res_li[3], res_li[7]
    define_input_varibles(cfg)

    generate_adapter_files.main(arg.sample, out_di['log'], arg.conf)
    MAXrna = get_maxRNA_length(arg.sample, cfg['cutadapt'])
    lib = set_lib(dr_i, fi_base)
    MINrna = cutadapt_cmd(arg.sample, lib, cfg['cutadapt'])
    separate_by_read_length(MINrna, MAXrna, lib, out_di['output'])
    for length in range(MINrna, MAXrna + 1):
        fi = '{}_{}.fq'.format(lib, length)
        bowtie(fi, length, BI, cfg['bowtie'])
    window_creation(MINrna, MAXrna, lib, BI, tRNA, tmRNA)
    mapping_statistics(arg.sample, lib, dr, fi_base)
    run_shrimp_alignment(MINrna, MAXrna, lib, out_di['log'], out_di['temp'], job, arg.conf)
    bt_postProcEM.main('{}_merge.bed'.format(lib), '{}_allGS.bed'.format(lib), out_di['temp'])
    reduce_shrimp_res(out_di['temp'], dr_i, job)
def main(file, log_dir,  conf):
    check_input()
    cfg = load_mirquant_config_file(conf)
    dirc, name, need_adapt = check_for_adapter_file(file)
    if need_adapt == True:
        if not check_for_barcode_file(dirc, name, cfg):
            barcode = scan_fastq_for_barcode(file, name, log_dir)
            adapter = create_adapter(barcode, cfg['cutadapt']['adapter'])
            write_adapter_file(dirc, name, adapter)
def main(file, log_dir, conf):
    check_input()
    cfg = load_mirquant_config_file(conf)
    dirc, name, need_adapt = check_for_adapter_file(file)
    if need_adapt == True:
        if not check_for_barcode_file(dirc, name, cfg):
            barcode = scan_fastq_for_barcode(file, name, log_dir)
            adapter = create_adapter(barcode, cfg['cutadapt']['adapter'])
            write_adapter_file(dirc, name, adapter)
def main(conf):
    os.chdir('./bin')
    cfg = load_mirquant_config_file(conf) 
    samples = return_sample_results_directories(cfg['paths']['project'])
    for sample in samples:
        samp_name = os.path.basename(sample[:-1])
        out_di = sample_output_paths(cfg['paths']['output'], samp_name)
        run_summary2Tab_clust(cfg['paths'], cfg['parameters']['species'], sample, conf)
        move_files_to_out_dir(out_di, sample, samp_name)
        write_summary_table(sample)
Example #8
0
def main(size, lib, base, conf, log_dir):
    cfg = load_mirquant_config_file(conf)
    initiate_logging(log_dir, "SHRiMP_{}.log".format(size))

    reads = "{}{}.noHit".format(base, size)
    working_dir = os.getcwd()

    make_out_dir(lib, size)
    seedList = make_base_seed(size)
    group_Name = shrimp_submission("pypath", cfg["shrimp"], seedList, lib, log_dir, reads)
    shrimp_postProcGS.main(conf, os.getcwd())
Example #9
0
def main(size, lib, base, conf, log_dir):
    cfg = load_mirquant_config_file(conf)
    initiate_logging(log_dir, 'SHRiMP_{}.log'.format(size))

    reads = '{}{}.noHit'.format(base, size)
    working_dir = os.getcwd()

    make_out_dir(lib, size)
    seedList = make_base_seed(size)
    group_Name = shrimp_submission('pypath', cfg['shrimp'], seedList, lib,
                                   log_dir, reads)
    shrimp_postProcGS.main(conf, os.getcwd())
Example #10
0
def main(conf):
    os.chdir('./bin')
    cfg = load_mirquant_config_file(conf)
    samples = return_sample_results_directories(cfg['paths']['project'])
    for sample in samples:
        print '\nProcessing sample {}...'.format(sample)
        samp_name = os.path.basename(sample[:-1])
        out_di = sample_output_paths(cfg['paths']['output'], samp_name)
        run_summary2Tab_clust(cfg['paths'], cfg['parameters']['species'],
                              sample, conf)
        move_files_to_out_dir(out_di, sample, samp_name)
        write_summary_table(sample)
def main(conf, shrimp_dir):
    check_input()
    cfg = load_mirquant_config_file(conf)
    res_li = resource_paths(cfg['parameters']['species'], cfg['paths'], cfg['parameters'])
    sample = os.path.basename(shrimp_dir.split('./IntermediateFiles/')[0])
    out_di = sample_output_paths(cfg['paths']['output'], sample) 
    logging.info('\n\n### Processing SHRiMP results ###\n')
    mir_fi = res_li[1]
    mirList, mirStrand = load_mir_info(mir_fi)
    tagCount, hits, maps = load_SHRiMP_res(shrimp_dir)    
    hits, tags, pCount = get_best_alignments(hits, maps)
    write_processed_shrimp_output(hits, tags, mirList, mirStrand, shrimp_dir)
    remove_temp_file(os.path.basename(shrimp_dir).split('_')[1], out_di['temp'])
    logging.info('Total SHRiMP alignments = {}; proportional count = {}'.format(tagCount, pCount))
Example #12
0
def main(conf, shrimp_dir):
    print 'Shrimp post-processing'
    check_input()
    cfg = load_mirquant_config_file(conf)
    res_li = resource_paths(cfg['parameters']['species'], cfg['paths'], cfg['parameters'])
    sample = os.path.basename(shrimp_dir.split('./IntermediateFiles/')[0])
    out_di = sample_output_paths(cfg['paths']['output'], sample) 
    logging.info('\n\n### Processing SHRiMP results ###\n')
    mir_fi = res_li[1]
    mirList, mirStrand = load_mir_info(mir_fi)
    tagCount, hits, maps = load_SHRiMP_res(shrimp_dir)    
    hits, tags, pCount = get_best_alignments(hits, maps)
    write_processed_shrimp_output(hits, tags, mirList, mirStrand, shrimp_dir)
    remove_temp_file(os.path.basename(shrimp_dir).split('_')[1], out_di['temp'])
    logging.info('Total SHRiMP alignments = {}; proportional count = {}'.format(tagCount, pCount))
    dictionary.  Add the all the counts together in a total counts 
    dictionary.  Add the counts to the counts dictionary as well.
    '''
    for item in res_line[1:]:
        L, V = item.split(':')
        if N not in features:
            features[N] = {}
        features[N][L] = V
        try:
            tot[L] += float(features[N][L])
        except KeyError:
            tot[L] = float(features[N][L])
    return features, tot


cfg = load_mirquant_config_file(sys.argv[2])

res = sys.argv[1]
SPEC = cfg['parameters']['species']


res_li = resource_paths(SPEC, cfg['paths'], cfg['parameters'])
genome, mmuFile, tRNAFile, refAnn = res_li[0], res_li[2], res_li[4], res_li[6]

CODE = SPEC


bedInfo = mirANDtrna_to_bed(mmuFile, tRNAFile)

expression = {}
baseExp = {}
Example #14
0
    '''
    for item in res_line[1:]:
        L, V = item.split(':')
        if N not in features:
            features[N] = {}
        features[N][L] = V
        try:
            tot[L] += float(features[N][L])
        except KeyError:
            tot[L] = float(features[N][L])
    return features, tot



res = sys.argv[1]
cfg = load_mirquant_config_file(sys.argv[2])
species = cfg['parameters']['species']
res_li = resource_paths(species, cfg['paths'], cfg['parameters'])
genome, mmuFile, tRNAFile, refAnn = res_li[0], res_li[2], res_li[4], res_li[6]

bedInfo = mirANDtrna_to_bed(mmuFile, tRNAFile)

expression = {}
baseExp = {}
baseKeys = {}
bedLine = {}
features = {}
Novelarr = {}
tot = {}
tmp = res
tfile_name = 'mktemp'