def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for i, f in enumerate(input_files): folder_path = os.path.dirname(f) if f.endswith('.gz'): print('Unzipping: ', f) f = useful.gunzip_python(f) annotated_f = igfft.igfft_multiprocess(f, file_type='FASTQ', species=species, locus=loci, parsing_settings={ 'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions }, num_processes=number_threads, delete_alignment_file=True) annotated_files.append(annotated_f[0]) output_file_list = ','.join(annotated_files) print output_file_list return output_file_list
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for i, f in enumerate(input_files): folder_path = os.path.dirname(f) if f.endswith('.gz'): print('Unzipping: ', f) f = useful.gunzip_python(f) # Run trimmomatic trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'SE' trimmedf = processing.run_trimmomatic(f, folder_path, method, phred_encode, trimming_parameters)[0] # Run quality filtering filtered_trimmed_file = fastx.Run_Quality_Filter(trimmedf, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(trimmedf) processed_files.append(filtered_trimmed_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): annotated_f = igfft.igfft_multiprocess(f, file_type='FASTQ', species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True) annotated_files.append(annotated_f[0]) print('Pairing sequences') output_dir = os.path.dirname(annotated_files[0]) pairing.RunPairing(annotated_files, annotated_file_formats='TAB', analysis_method='GEORGIOU_INHOUSE', output_folder_path=output_dir, prefix_output_files=group_name, cluster_cutoff=cluster_setting, annotation_cluster_setting=annotation_cluster_cutoff) print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for i, f in enumerate(input_files): folder_path = os.path.dirname(f) if f.endswith('.gz'): print('Unzipping: ', f) f = useful.gunzip_python(f) # Run trimmomatic trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'SE' trimmedf = processing.run_trimmomatic(f, folder_path, method, phred_encode, trimming_parameters)[0] # Run quality filtering filtered_trimmed_file = fastx.Run_Quality_Filter( trimmedf, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(trimmedf) processed_files.append(filtered_trimmed_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): annotated_f = igfft.igfft_multiprocess(f, file_type='FASTQ', species=species, locus=loci, parsing_settings={ 'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions }, num_processes=number_threads, delete_alignment_file=True) annotated_files.append(annotated_f[0]) print('Pairing sequences') output_dir = os.path.dirname(annotated_files[0]) pairing.RunPairing(annotated_files, annotated_file_formats='TAB', analysis_method='GEORGIOU_INHOUSE', output_folder_path=output_dir, prefix_output_files=group_name, cluster_cutoff=cluster_setting, annotation_cluster_setting=annotation_cluster_cutoff) print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') annotated_files = [] for i, f in enumerate(input_files): folder_path = os.path.dirname(f) if f.endswith('.gz'): print('Unzipping: ', f) f = useful.gunzip_python(f) annotated_f = igfft.igfft_multiprocess(f, file_type='FASTQ', species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True) annotated_files.append(annotated_f[0]) output_file_list = ','.join(annotated_files) print output_file_list return output_file_list
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for pair_of_files in input_files: folder_path = os.path.dirname(pair_of_files[0]) for i, f in enumerate(pair_of_files): if f.endswith('.gz'): print('Unzipping: ', f) pair_of_files[i] = useful.gunzip_python(f) # Run trimmomatic if trim_seqs: print('Trimming low quality bases') trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'PE' input_files = processing.run_trimmomatic(pair_of_files, folder_path, method, phred_encode, trimming_parameters) else: input_files = pair_of_files # Stitch R1-R2 files pairing_parameters = { 'v': min_overlap_length, 'm': max_assembly_length, 'n': min_assembly_length, 'u': max_fraction_uncalled, } print('Stitching R1-R2 reads') pear_results = processing.run_pear(input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory)[0] # Run quality filtering filtered_file = fastx.Run_Quality_Filter(pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(pear_results) processed_files.append(filtered_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): annotated_f = igfft.igfft_multiprocess(f, species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True) annotated_files.append(annotated_f[0]) print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=""): # Unzip files print ("Processing raw fastq files") processed_files = [] for i, f in enumerate(input_files): folder_path = os.path.dirname(f) if f.endswith(".gz"): print ("Unzipping: ", f) f = useful.gunzip_python(f) annotated_f = igfft.igfft_multiprocess( f, file_type="FASTQ", species=species, locus=loci, parsing_settings={"isotype": isotyping_barcodes, "remove_insertions": remove_insertions}, num_processes=number_threads, delete_alignment_file=True, ) annotated_files.append(annotated_f[0]) output_file_list = ",".join(annotated_files) print output_file_list return output_file_list
def run_gglab_pipeline(input_files, species, loci, group_name=''): # Unzip files print('Processing raw fastq files') processed_files = [] for pair_of_files in input_files: folder_path = os.path.dirname(pair_of_files[0]) for i, f in enumerate(pair_of_files): if f.endswith('.gz'): print('Unzipping: ', f) pair_of_files[i] = useful.gunzip_python(f) # Run trimmomatic if trim_seqs: print('Trimming low quality bases') trimming_parameters = { 'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim), 'MINLEN': min_read_len_post_trim } method = 'PE' input_files = processing.run_trimmomatic(pair_of_files, folder_path, method, phred_encode, trimming_parameters) else: input_files = pair_of_files # Stitch R1-R2 files pairing_parameters = { 'v': min_overlap_length, 'm': max_assembly_length, 'n': min_assembly_length, 'u': max_fraction_uncalled, } print('Stitching R1-R2 reads') pear_results = processing.run_pear(input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory)[0] # Run quality filtering filtered_file = fastx.Run_Quality_Filter(pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases) os.remove(pear_results) processed_files.append(filtered_file) print('Annotating processed fastq files') annotated_files = [] for i, f in enumerate(processed_files): annotated_f = igfft.igfft_multiprocess(f, species=species, locus=loci, parsing_settings={ 'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions }, num_processes=number_threads, delete_alignment_file=True) annotated_files.append(annotated_f[0]) print('Pipeline complete')