Beispiel #1
0
    parser.add_argument('--out_dir', help='Path to out put folder. Default=rawReads/', default='rawReads/')
    parser.add_argument('--out_dir_report', help='Path to out put folder. Default=Report/figure/data/', default='Report/figure/data/')
    parser.add_argument('--sample_names_file', help='Text file with sample names. Default=sample_names.txt', default='sample_names.txt')
    parser.add_argument('--ncores', help='Number of cores to use. Default=8', default='8')
    args=parser.parse_args()

    # Set path of working directory
    ai=functions.read_analysis_info_file(args.analysis_info_file)
    path=os.getcwd()

    #Ncores
    ncores=int(args.ncores)

    # Read sample names text file
    sample_names_file=args.sample_names_file
    sampleNames = functions.read_sample_names(sample_names_file)

    # Set input and output directories if not 'rawReads/'
    in_dir=path + '/' + args.in_dir
    out_dir=path + '/' +args.out_dir
    out_dir_report=path + '/' + args.out_dir_report

    # Create out_dir_report
    functions.make_sure_path_exists(out_dir_report)

    # Detect if files are gz 
    gz = functions.check_gz(in_dir)

    # Run fastqc
    Parallel(n_jobs=ncores)(delayed(qc_check)(i) for i in sampleNames)
    parser.add_argument('--out_dir', help='Path to output folder. Default=countedReads/', default='countedReads/')
    parser.add_argument('--mapping_summary_file', help='Mapping summary file. Default=mapping_summary.csv', default='mapping_summary.csv')

    args=parser.parse_args()

    params_file=args.analysis_info_file
    path=functions.read_parameters_file(params_file)['Working directory']
    refGenome=functions.read_parameters_file(params_file)['Reference Genome']
    strand=functions.read_parameters_file(params_file)['strand']
    strand_piccard, strand_htseq = functions.get_strand(strand)
    gtfFile=functions.read_parameters_file(params_file)['GTF File']

    os.chdir(path)

    # Read sample names text file
    sampleNames = functions.read_sample_names()

    # Set input and output directories if not '/'
    in_dir=args.in_dir
    out_dir=args.out_dir
    functions.make_sure_path_exists(out_dir)
    mapping_summary_file=args.mapping_summary_file


    # Detect if files are gz
    gz = functions.check_gz(in_dir)

    # Count command
    Parallel(n_jobs=7)(delayed(counting)(i) for i in sampleNames)
    
    # QC
Beispiel #3
0
        help='Mapping summary file. Default=mapping_summary.csv',
        default='mapping_summary.csv')

    args = parser.parse_args()

    params_file = args.analysis_info_file
    path = functions.read_parameters_file(params_file)['Working directory']
    refGenome = functions.read_parameters_file(params_file)['Reference Genome']
    strand = functions.read_parameters_file(params_file)['strand']
    strand_piccard, strand_htseq = functions.get_strand(strand)
    gtfFile = functions.read_parameters_file(params_file)['GTF File']

    os.chdir(path)

    # Read sample names text file
    sampleNames = functions.read_sample_names()

    # Set input and output directories if not '/'
    in_dir = args.in_dir
    out_dir = args.out_dir
    functions.make_sure_path_exists(out_dir)
    mapping_summary_file = args.mapping_summary_file

    # Detect if files are gz
    gz = functions.check_gz(in_dir)

    # Count command
    Parallel(n_jobs=7)(delayed(counting)(i) for i in sampleNames)

    # QC
    os.system("Rscript /usr/local/bin/countsLog_rnaseq.R " + out_dir + ' ' +
Beispiel #4
0
    parser = argparse.ArgumentParser(prog='organizeWorkingDirectory.py',description = 'Organize working directory of the analysis')
    parser.add_argument('-v','--version', action='version',version='%(prog)s-'+__version__)
    parser.add_argument('--analysis_info_file', help='Text file with details of the analysis. Default=analysis_info.txt', default='analysis_info.txt')
    parser.add_argument('--sample_names_file', help='Text file with sample names. Default=sample_names_info.txt', default='sample_names.txt')
    parser.add_argument('--in_dir', help='directory with fastq files. Default= corresponding to bcl2fastq_output', default='bcl2fastq_output')
    args=parser.parse_args()

    # Read analysis info file
    ai=functions.read_analysis_info_file(args.analysis_info_file)
    
    # Change dir
    os.chdir(ai['project_location'])

    # Read sample names
    sample_names_file = args.sample_names_file
    sampleNames = functions.read_sample_names(sample_names_file)
        
    # Create rawReads folder
    # Check if rawReads exists
    project_location=ai['project_location']
    folders = os.listdir(project_location)
    readsFiles = [folders[i] for i, x in enumerate(folders) if re.findall('rawReads',x)]
    # print readsFiles    

    # Collect fastq files analysis_info_file
    if args.in_dir == 'bcl2fastq_output':
        allFiles=functions.get_filepaths(ai['project_location'] + '/' + ai[args.in_dir])
        fastq=[allFiles[y] for y, x in enumerate(allFiles) if re.findall("fastq.gz", x)]
        fastq=[fastq[y] for y,x in enumerate(fastq) if not re.findall('Undetermined', x)]
    elif args.in_dir != 'bcl2fastq_output':
        allFiles=os.listdir(args.in_dir)