コード例 #1
0
ファイル: qcReads.py プロジェクト: CGSbioinfo/MethylSeq
def qc_check(i):
    allFiles = os.listdir(in_dir + "/" + i )
    pairedReads_temp = [allFiles[y] for y, x in enumerate(allFiles) if re.findall("_R2", x)]
    functions.make_sure_path_exists(out_dir+'/'+i)
    os.system("fastqc "  + in_dir + "/" + i + "/" + i + "*_R1*.fastq" + gz + " --outdir=" + out_dir + "/" + i + " --nogroup --extract ")
    if pairedReads_temp:
        os.system("fastqc " + in_dir + "/" + i + "/" + i + "*_R2*.fastq" + gz + " --outdir=" + out_dir + "/" + i + " --nogroup --extract")
コード例 #2
0
ファイル: qcReads.py プロジェクト: cal64/RNASeq_pipeline
def qc_check(i):
    allFiles = os.listdir(in_dir + "/" + i )
    pairedReads_temp = [allFiles[y] for y, x in enumerate(allFiles) if re.findall("_R2", x)]
    functions.make_sure_path_exists(out_dir+'/'+i)
    os.system("fastqc "  + in_dir + "/" + i + "/" + i + "*_R1*.fastq" + gz + " --outdir=" + out_dir + "/" + i + " --nogroup --extract ")
    if pairedReads_temp:
        os.system("fastqc " + in_dir +  + i + "/" + i + "*_R2*.fastq" + gz + " --outdir=" + out_dir + "/" + i + " --nogroup --extract")
コード例 #3
0
ファイル: qcReads.py プロジェクト: CGSbioinfo/MethylSeq
    path=os.getcwd()

    #Ncores
    ncores=int(args.ncores)

    # Read sample names text file
    sample_names_file=args.sample_names_file
    sampleNames = functions.read_sample_names(sample_names_file)

    # Set input and output directories if not 'rawReads/'
    in_dir=path + '/' + args.in_dir
    out_dir=path + '/' +args.out_dir
    out_dir_report=path + '/' + args.out_dir_report

    # Create out_dir_report
    functions.make_sure_path_exists(out_dir_report)

    # Detect if files are gz 
    gz = functions.check_gz(in_dir)

    # Run fastqc
    Parallel(n_jobs=ncores)(delayed(qc_check)(i) for i in sampleNames)

    # Number of reads per sample
    os.system("Rscript bin/indexQC.R " + in_dir + " " + out_dir_report) 





コード例 #4
0
    params_file = args.analysis_info_file
    path = functions.read_parameters_file(params_file)['Working directory']
    refGenome = functions.read_parameters_file(params_file)['Reference Genome']
    strand = functions.read_parameters_file(params_file)['strand']
    strand_piccard, strand_htseq = functions.get_strand(strand)
    gtfFile = functions.read_parameters_file(params_file)['GTF File']

    os.chdir(path)

    # Read sample names text file
    sampleNames = functions.read_sample_names()

    # Set input and output directories if not '/'
    in_dir = args.in_dir
    out_dir = args.out_dir
    functions.make_sure_path_exists(out_dir)
    mapping_summary_file = args.mapping_summary_file

    # Detect if files are gz
    gz = functions.check_gz(in_dir)

    # Count command
    Parallel(n_jobs=7)(delayed(counting)(i) for i in sampleNames)

    # QC
    os.system("Rscript /usr/local/bin/countsLog_rnaseq.R " + out_dir + ' ' +
              mapping_summary_file)
    os.system("Rscript /usr/local/bin/library_proportion.R " + out_dir + ' ' +
              out_dir + ' ' + gtfFile)
コード例 #5
0
    params_file=args.analysis_info_file
    path=functions.read_parameters_file(params_file)['Working directory']
    refGenome=functions.read_parameters_file(params_file)['Reference Genome']
    strand=functions.read_parameters_file(params_file)['strand']
    strand_piccard, strand_htseq = functions.get_strand(strand)
    gtfFile=functions.read_parameters_file(params_file)['GTF File']

    os.chdir(path)

    # Read sample names text file
    sampleNames = functions.read_sample_names()

    # Set input and output directories if not '/'
    in_dir=args.in_dir
    out_dir=args.out_dir
    functions.make_sure_path_exists(out_dir)
    mapping_summary_file=args.mapping_summary_file


    # Detect if files are gz
    gz = functions.check_gz(in_dir)

    # Count command
    Parallel(n_jobs=7)(delayed(counting)(i) for i in sampleNames)
    
    # QC
    os.system("Rscript /usr/local/bin/countsLog_rnaseq.R " + out_dir + ' ' + mapping_summary_file)
    os.system("Rscript /usr/local/bin/library_proportion.R " + out_dir + ' ' + out_dir + ' ' + gtfFile)

コード例 #6
0
    project_location=ai['project_location']
    folders = os.listdir(project_location)
    readsFiles = [folders[i] for i, x in enumerate(folders) if re.findall('rawReads',x)]
    # print readsFiles    

    # Collect fastq files analysis_info_file
    if args.in_dir == 'bcl2fastq_output':
        allFiles=functions.get_filepaths(ai['project_location'] + '/' + ai[args.in_dir])
        fastq=[allFiles[y] for y, x in enumerate(allFiles) if re.findall("fastq.gz", x)]
        fastq=[fastq[y] for y,x in enumerate(fastq) if not re.findall('Undetermined', x)]
    elif args.in_dir != 'bcl2fastq_output':
        allFiles=os.listdir(args.in_dir)
        fastq=[allFiles[y] for y, x in enumerate(allFiles) if re.findall("fastq.gz", x)]
        fastq=[args.in_dir + x for x in fastq]
    # print fastq
    
    # Move reads
    if not readsFiles:
        functions.make_sure_path_exists('rawReads')
        sampleDir = []
        for sample in sampleNames:
            reads = [fastq[i] for i,x in enumerate(fastq) if re.findall(sample,x)]
            if sample not in sampleDir:
                functions.make_sure_path_exists('rawReads/'+sample)
            for r in reads:
               os.system('mv ' + '"' + r + '"' + ' rawReads/' + sample)
            sampleDir.append(sample)
    else:
        print "rawReads/ already folder exists"

コード例 #7
0
    params_file = args.analysis_info_file
    path = functions.read_parameters_file(params_file)['Working directory']
    os.chdir(path)

    # Read sample names text file
    sampleNames = functions.read_sample_names()

    # Set input and output directories if not 'rawReads/'
    in_dir = args.in_dir
    out_dir = args.out_dir
    out_dir_plots = args.out_dir_plots
    readType = args.readType
    suffix_name = args.suffix_name

    files = functions.get_filepaths(in_dir)
    files = [
        files[y] for y, x in enumerate(files)
        if re.findall("fastqc_data.txt", x)
    ]
    Parallel(n_jobs=8)(delayed(tables)(i) for i in files)

    functions.make_sure_path_exists(out_dir_plots)
    Parallel(n_jobs=8)(delayed(plots)(i) for i in sampleNames)

    os.system('Rscript /usr/local/bin/fastqc_plots_all_part2.R ' + in_dir +
              ' ' + 'sample_names.txt' + ' ' + readType + ' ' + out_dir_plots +
              ' ' + suffix_name)

#os.system('ls rawReads/*/*fastqc  |  grep -v trimmed  | grep ":"  | sed \'s/://g\' > sample_names2.txt')
#os.system('fastqc_summary.py ./sample_names2.txt ./summary_fastqc.txt')
コード例 #8
0
import pickle
import logging
from joblib import Parallel, delayed
import multiprocessing
import subprocess
#sys.path.insert(0,'/usr/local/bin/')
import functions
import argparse

__version__ = 'v01'
# created on 17/08/2016

if __name__ == '__main__':

    """ This script creates a file which needs to be filled with information required for a methylSeq project.
    - It takes one argument, the 'outfile', which is the name of the output file. The default is 'analysis_info.txt'""" 

    parser=argparse.ArgumentParser(prog='analysis_info.py', description='Creates analysis_info.txt')
    parser.add_argument('-v','--version',action='version',version='%(prog)s-'+__version__)
    parser.add_argument('--analysis_info_file', help='Text file with details of the analysis. Default=analysis_info.txt', default='analysis_info.txt')
    args=parser.parse_args()

    # Collect info from analysis_info_file
    ai=functions.read_analysis_info_file(args.analysis_info_file)
    
    functions.make_sure_path_exists(ai['project_location'] + '/' +ai['bcl2fastq_output']) 
    os.system("bcl2fastq -R " + ai['run_folder'] + " -o " +  ai['project_location'] + '/' + ai['bcl2fastq_output'] + " --no-lane-splitting --sample-sheet " + ai['run_samplesheet'] + '&>' + ai['project_location'] + '/bcl_log.txt')
    


コード例 #9
0
    # Set path of project location
    ai = functions.read_analysis_info_file(args.analysis_info_file)
    path = ai['project_location']
    os.chdir(path)

    #Ncores
    ncores = int(ai['ncores'])

    # Read sample names text file
    sample_names_file = args.sample_names_file
    sample_names_file = path + '/' + sample_names_file
    sampleNames = functions.read_sample_names(sample_names_file)

    # Set input and output directories if not 'rawReads/'
    in_dir = path + '/' + args.in_dir
    out_dir = path + '/' + args.out_dir
    out_dir_report = path + '/' + args.out_dir_report

    # Create out_dir_report
    functions.make_sure_path_exists(out_dir_report)

    # Detect if files are gz
    gz = functions.check_gz(in_dir)

    # Run fastqc
    Parallel(n_jobs=ncores)(delayed(qc_check)(i) for i in sampleNames)

    # Number of reads per sample
    os.system("/usr/bin/Rscript " + path + "/bin/indexQC.R " + in_dir + " " +
              out_dir_report)
コード例 #10
0
    args=parser.parse_args()

    params_file=args.analysis_info_file
    path=functions.read_parameters_file(params_file)['Working directory']
    os.chdir(path)

    # Read sample names text file
    sampleNames = functions.read_sample_names()

    # Set input and output directories if not 'rawReads/'
    in_dir=args.in_dir
    out_dir=args.out_dir
    out_dir_plots=args.out_dir_plots
    readType=args.readType
    suffix_name=args.suffix_name
    
    files=functions.get_filepaths(in_dir)
    files = [files[y] for y, x in enumerate(files) if re.findall("fastqc_data.txt", x)] 
    Parallel(n_jobs=8)(delayed(tables)(i) for i in files)
    
    functions.make_sure_path_exists(out_dir_plots)
    Parallel(n_jobs=8)(delayed(plots)(i) for i in sampleNames)

    os.system('Rscript /usr/local/bin/fastqc_plots_all_part2.R ' + in_dir + ' ' + 'sample_names.txt' + ' ' + readType + ' ' + out_dir_plots + ' ' + suffix_name  )


#os.system('ls rawReads/*/*fastqc  |  grep -v trimmed  | grep ":"  | sed \'s/://g\' > sample_names2.txt')
#os.system('fastqc_summary.py ./sample_names2.txt ./summary_fastqc.txt')