def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for i, f in enumerate(input_files): folder_path = os.path.dirname(f) if f.endswith('.gz'): print('Unzipping: ', f) f = useful.gunzip_python(f) # Run trimmomatic trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'SE' trimmedf = processing.run_trimmomatic(f, folder_path, method, phred_encode, trimming_parameters)[0] # Run quality filtering filtered_trimmed_file = fastx.Run_Quality_Filter(trimmedf, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(trimmedf) processed_files.append(filtered_trimmed_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): annotated_f = igfft.igfft_multiprocess(f, file_type='FASTQ', species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True) annotated_files.append(annotated_f[0]) print('Pairing sequences') output_dir = os.path.dirname(annotated_files[0]) pairing.RunPairing(annotated_files, annotated_file_formats='TAB', analysis_method='GEORGIOU_INHOUSE', output_folder_path=output_dir, prefix_output_files=group_name, cluster_cutoff=cluster_setting, annotation_cluster_setting=annotation_cluster_cutoff) print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for i, f in enumerate(input_files): folder_path = os.path.dirname(f) if f.endswith('.gz'): print('Unzipping: ', f) f = useful.gunzip_python(f) # Run trimmomatic trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'SE' trimmedf = processing.run_trimmomatic(f, folder_path, method, phred_encode, trimming_parameters)[0] # Run quality filtering filtered_trimmed_file = fastx.Run_Quality_Filter( trimmedf, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(trimmedf) processed_files.append(filtered_trimmed_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): annotated_f = igfft.igfft_multiprocess(f, file_type='FASTQ', species=species, locus=loci, parsing_settings={ 'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions }, num_processes=number_threads, delete_alignment_file=True) annotated_files.append(annotated_f[0]) print('Pairing sequences') output_dir = os.path.dirname(annotated_files[0]) pairing.RunPairing(annotated_files, annotated_file_formats='TAB', analysis_method='GEORGIOU_INHOUSE', output_folder_path=output_dir, prefix_output_files=group_name, cluster_cutoff=cluster_setting, annotation_cluster_setting=annotation_cluster_cutoff) print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for pair_of_files in input_files: folder_path = os.path.dirname(pair_of_files[0]) for i, f in enumerate(pair_of_files): if f.endswith('.gz'): print('Unzipping: ', f) pair_of_files[i] = useful.gunzip_python(f) # Run trimmomatic if trim_seqs: print('Trimming low quality bases') trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'PE' input_files = processing.run_trimmomatic(pair_of_files, folder_path, method, phred_encode, trimming_parameters) else: input_files = pair_of_files # Stitch R1-R2 files pairing_parameters = { 'v': min_overlap_length, 'm': max_assembly_length, 'n': min_assembly_length, 'u': max_fraction_uncalled, } print('Stitching R1-R2 reads') pear_results = processing.run_pear(input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory)[0] # Run quality filtering filtered_file = fastx.Run_Quality_Filter(pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(pear_results) processed_files.append(filtered_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): output_file = useful.removeFileExtension(f) + '.mixcr.alignment' output_file_annotation = useful.removeFileExtension(f) + '.mixcr.annotation' # Run MIXCR file print('Running MIXCR') [annotated_f, command_val] = mixcr.RunMixcr(f, output_file, filetype='FASTQ', loci=[], species='', exportPrettyAlignment=False, num_threads=number_threads) # Parse MIXCR file print('Parsing MIXCR') annotated_file = mixcr.parseMIXCR(f, output_file, 'FASTQ', output_file_annotation, command_val=command_val) # again, annotated_file should be equal to outfile_annotation annotated_files.append(annotated_file[0]) print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for pair_of_files in input_files: folder_path = os.path.dirname(pair_of_files[0]) for i, f in enumerate(pair_of_files): if f.endswith('.gz'): print('Unzipping: ', f) pair_of_files[i] = useful.gunzip_python(f) # Run trimmomatic if trim_seqs: print('Trimming low quality bases') trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'PE' input_files = processing.run_trimmomatic(pair_of_files, folder_path, method, phred_encode, trimming_parameters) else: input_files = pair_of_files # Stitch R1-R2 files pairing_parameters = { 'v': min_overlap_length, 'm': max_assembly_length, 'n': min_assembly_length, 'u': max_fraction_uncalled, } print('Stitching R1-R2 reads') pear_results = processing.run_pear(input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory)[0] # Run quality filtering filtered_file = fastx.Run_Quality_Filter(pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(pear_results) processed_files.append(filtered_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): annotated_f = igfft.igfft_multiprocess(f, species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True) annotated_files.append(annotated_f[0]) print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for i, f in enumerate(input_files): folder_path = os.path.dirname(f) if f.endswith('.gz'): print('Unzipping: ', f) f = useful.gunzip_python(f) # Run trimmomatic trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'SE' trimmedf = processing.run_trimmomatic(f, folder_path, method, phred_encode, trimming_parameters)[0] # Run quality filtering filtered_trimmed_file = fastx.Run_Quality_Filter(trimmedf, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(trimmedf) processed_files.append(filtered_trimmed_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): output_file = useful.removeFileExtension(f) + '.mixcr.alignment' output_file_annotation = useful.removeFileExtension(f) + '.mixcr.annotation' # Run MIXCR file print('Running MIXCR') [annotated_f, command_val] = mixcr.RunMixcr(f, output_file, filetype='FASTQ', loci=[], species='', exportPrettyAlignment=False, num_threads=number_threads) # Parse MIXCR file print('Parsing MIXCR') annotated_file = mixcr.parseMIXCR(f, output_file, 'FASTQ', output_file_annotation, command_val=command_val) # again, annotated_file should be equal to outfile_annotation annotated_files.append(annotated_file) print('Pairing sequences') output_dir = os.path.dirname(annotated_files[0]) pairing.RunPairing(annotated_files, annotated_file_formats='TAB', analysis_method='MIXCR', output_folder_path=output_dir, prefix_output_files=group_name, cluster_cutoff=cluster_setting, annotation_cluster_setting=annotation_cluster_cutoff) print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=""): # Unzip files print("Processing raw fastq files") processed_files = [] for pair_of_files in input_files: folder_path = os.path.dirname(pair_of_files[0]) for i, f in enumerate(pair_of_files): if f.endswith(".gz"): print("Unzipping: ", f) pair_of_files[i] = useful.gunzip_python(f) # Run trimmomatic if trim_seqs: print("Trimming low quality bases") trimming_parameters = { "SLIDINGWINDOW": str(window_trim) + ":" + str(quality_cutoff_trim), "MINLEN": min_read_len_post_trim, } method = "PE" input_files = processing.run_trimmomatic( pair_of_files, folder_path, method, phred_encode, trimming_parameters ) else: input_files = pair_of_files # Stitch R1-R2 files pairing_parameters = { "v": min_overlap_length, "m": max_assembly_length, "n": min_assembly_length, "u": max_fraction_uncalled, } print("Stitching R1-R2 reads") pear_results = processing.run_pear( input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory, )[0] # Run quality filtering filtered_file = fastx.Run_Quality_Filter( pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases ) os.remove(pear_results) processed_files.append(filtered_file) print("Annotating processed fastq files") annotated_files = [] for i, f in enumerate(processed_files): output_file = useful.removeFileExtension(f) + ".mixcr.alignment" output_file_annotation = useful.removeFileExtension(f) + ".mixcr.annotation" # Run MIXCR file print("Running MIXCR") [annotated_f, command_val] = mixcr.RunMixcr( f, output_file, filetype="FASTQ", loci=[], species="", exportPrettyAlignment=False, num_threads=number_threads, ) # Parse MIXCR file print("Parsing MIXCR") annotated_file = mixcr.parseMIXCR( f, output_file, "FASTQ", output_file_annotation, command_val=command_val ) # again, annotated_file should be equal to outfile_annotation annotated_files.append(annotated_file[0]) print("Pipeline complete")
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for i, f in enumerate(input_files): folder_path = os.path.dirname(f) if f.endswith('.gz'): print('Unzipping: ', f) f = useful.gunzip_python(f) # Run trimmomatic trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'SE' trimmedf = processing.run_trimmomatic(f, folder_path, method, phred_encode, trimming_parameters)[0] # Run quality filtering filtered_trimmed_file = fastx.Run_Quality_Filter( trimmedf, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(trimmedf) processed_files.append(filtered_trimmed_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): output_file = useful.removeFileExtension(f) + '.mixcr.alignment' output_file_annotation = useful.removeFileExtension( f) + '.mixcr.annotation' # Run MIXCR file print('Running MIXCR') [annotated_f, command_val] = mixcr.RunMixcr(f, output_file, filetype='FASTQ', loci=[], species='', exportPrettyAlignment=False, num_threads=number_threads) # Parse MIXCR file print('Parsing MIXCR') annotated_file = mixcr.parseMIXCR( f, output_file, 'FASTQ', output_file_annotation, command_val=command_val ) # again, annotated_file should be equal to outfile_annotation annotated_files.append(annotated_file) print('Pairing sequences') output_dir = os.path.dirname(annotated_files[0]) pairing.RunPairing(annotated_files, annotated_file_formats='TAB', analysis_method='MIXCR', output_folder_path=output_dir, prefix_output_files=group_name, cluster_cutoff=cluster_setting, annotation_cluster_setting=annotation_cluster_cutoff) print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for pair_of_files in input_files: folder_path = os.path.dirname(pair_of_files[0]) for i, f in enumerate(pair_of_files): if f.endswith('.gz'): print('Unzipping: ', f) pair_of_files[i] = useful.gunzip_python(f) # Run trimmomatic if trim_seqs: print('Trimming low quality bases') trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'PE' input_files = processing.run_trimmomatic(pair_of_files, folder_path, method, phred_encode, trimming_parameters) else: input_files = pair_of_files # Stitch R1-R2 files pairing_parameters = { 'v': min_overlap_length, 'm': max_assembly_length, 'n': min_assembly_length, 'u': max_fraction_uncalled, } print('Stitching R1-R2 reads') pear_results = processing.run_pear(input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory)[0] # Run quality filtering filtered_file = fastx.Run_Quality_Filter(pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(pear_results) processed_files.append(filtered_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): annotated_f = igfft.igfft_multiprocess(f, species=species, locus=loci, parsing_settings={ 'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions }, num_processes=number_threads, delete_alignment_file=True) annotated_files.append(annotated_f[0]) print('Pipeline complete')