parser.add_argument('--out_dir', help='Path to out put folder. Default=rawReads/', default='rawReads/') parser.add_argument('--out_dir_report', help='Path to out put folder. Default=Report/figure/data/', default='Report/figure/data/') parser.add_argument('--sample_names_file', help='Text file with sample names. Default=sample_names.txt', default='sample_names.txt') parser.add_argument('--ncores', help='Number of cores to use. Default=8', default='8') args=parser.parse_args() # Set path of working directory ai=functions.read_analysis_info_file(args.analysis_info_file) path=os.getcwd() #Ncores ncores=int(args.ncores) # Read sample names text file sample_names_file=args.sample_names_file sampleNames = functions.read_sample_names(sample_names_file) # Set input and output directories if not 'rawReads/' in_dir=path + '/' + args.in_dir out_dir=path + '/' +args.out_dir out_dir_report=path + '/' + args.out_dir_report # Create out_dir_report functions.make_sure_path_exists(out_dir_report) # Detect if files are gz gz = functions.check_gz(in_dir) # Run fastqc Parallel(n_jobs=ncores)(delayed(qc_check)(i) for i in sampleNames)
parser.add_argument('--out_dir', help='Path to output folder. Default=countedReads/', default='countedReads/') parser.add_argument('--mapping_summary_file', help='Mapping summary file. Default=mapping_summary.csv', default='mapping_summary.csv') args=parser.parse_args() params_file=args.analysis_info_file path=functions.read_parameters_file(params_file)['Working directory'] refGenome=functions.read_parameters_file(params_file)['Reference Genome'] strand=functions.read_parameters_file(params_file)['strand'] strand_piccard, strand_htseq = functions.get_strand(strand) gtfFile=functions.read_parameters_file(params_file)['GTF File'] os.chdir(path) # Read sample names text file sampleNames = functions.read_sample_names() # Set input and output directories if not '/' in_dir=args.in_dir out_dir=args.out_dir functions.make_sure_path_exists(out_dir) mapping_summary_file=args.mapping_summary_file # Detect if files are gz gz = functions.check_gz(in_dir) # Count command Parallel(n_jobs=7)(delayed(counting)(i) for i in sampleNames) # QC
help='Mapping summary file. Default=mapping_summary.csv', default='mapping_summary.csv') args = parser.parse_args() params_file = args.analysis_info_file path = functions.read_parameters_file(params_file)['Working directory'] refGenome = functions.read_parameters_file(params_file)['Reference Genome'] strand = functions.read_parameters_file(params_file)['strand'] strand_piccard, strand_htseq = functions.get_strand(strand) gtfFile = functions.read_parameters_file(params_file)['GTF File'] os.chdir(path) # Read sample names text file sampleNames = functions.read_sample_names() # Set input and output directories if not '/' in_dir = args.in_dir out_dir = args.out_dir functions.make_sure_path_exists(out_dir) mapping_summary_file = args.mapping_summary_file # Detect if files are gz gz = functions.check_gz(in_dir) # Count command Parallel(n_jobs=7)(delayed(counting)(i) for i in sampleNames) # QC os.system("Rscript /usr/local/bin/countsLog_rnaseq.R " + out_dir + ' ' +
parser = argparse.ArgumentParser(prog='organizeWorkingDirectory.py',description = 'Organize working directory of the analysis') parser.add_argument('-v','--version', action='version',version='%(prog)s-'+__version__) parser.add_argument('--analysis_info_file', help='Text file with details of the analysis. Default=analysis_info.txt', default='analysis_info.txt') parser.add_argument('--sample_names_file', help='Text file with sample names. Default=sample_names_info.txt', default='sample_names.txt') parser.add_argument('--in_dir', help='directory with fastq files. Default= corresponding to bcl2fastq_output', default='bcl2fastq_output') args=parser.parse_args() # Read analysis info file ai=functions.read_analysis_info_file(args.analysis_info_file) # Change dir os.chdir(ai['project_location']) # Read sample names sample_names_file = args.sample_names_file sampleNames = functions.read_sample_names(sample_names_file) # Create rawReads folder # Check if rawReads exists project_location=ai['project_location'] folders = os.listdir(project_location) readsFiles = [folders[i] for i, x in enumerate(folders) if re.findall('rawReads',x)] # print readsFiles # Collect fastq files analysis_info_file if args.in_dir == 'bcl2fastq_output': allFiles=functions.get_filepaths(ai['project_location'] + '/' + ai[args.in_dir]) fastq=[allFiles[y] for y, x in enumerate(allFiles) if re.findall("fastq.gz", x)] fastq=[fastq[y] for y,x in enumerate(fastq) if not re.findall('Undetermined', x)] elif args.in_dir != 'bcl2fastq_output': allFiles=os.listdir(args.in_dir)