def run_gglab_pipeline(input_files, species, loci, group_name=''):
	# Unzip files
	print('Processing raw fastq files')
	processed_files = []
	for i, f in enumerate(input_files):
		folder_path = os.path.dirname(f)
		if f.endswith('.gz'):
			print('Unzipping: ', f)
			f = useful.gunzip_python(f)

		# Run trimmomatic
		trimming_parameters = {
			'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),				
			'MINLEN': min_read_len_post_trim
		}
		method = 'SE'		
		trimmedf = processing.run_trimmomatic(f, folder_path, method, phred_encode, trimming_parameters)[0]		
		# Run quality filtering
		filtered_trimmed_file = fastx.Run_Quality_Filter(trimmedf, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases)		
		os.remove(trimmedf)
		processed_files.append(filtered_trimmed_file)
	
	print('Annotating processed fastq files')
	annotated_files = []
	for i, f in enumerate(processed_files):
		annotated_f = igfft.igfft_multiprocess(f, file_type='FASTQ', species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True)			
		annotated_files.append(annotated_f[0])
	
	print('Pairing sequences')	
	output_dir = os.path.dirname(annotated_files[0])
	pairing.RunPairing(annotated_files, annotated_file_formats='TAB', analysis_method='GEORGIOU_INHOUSE', output_folder_path=output_dir, prefix_output_files=group_name, cluster_cutoff=cluster_setting, annotation_cluster_setting=annotation_cluster_cutoff)
	print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=''):
    # Unzip files
    print('Processing raw fastq files')
    processed_files = []
    for i, f in enumerate(input_files):
        folder_path = os.path.dirname(f)
        if f.endswith('.gz'):
            print('Unzipping: ', f)
            f = useful.gunzip_python(f)

        # Run trimmomatic
        trimming_parameters = {
            'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),
            'MINLEN': min_read_len_post_trim
        }
        method = 'SE'
        trimmedf = processing.run_trimmomatic(f, folder_path, method,
                                              phred_encode,
                                              trimming_parameters)[0]
        # Run quality filtering
        filtered_trimmed_file = fastx.Run_Quality_Filter(
            trimmedf,
            output_dir=folder_path,
            quality=quality_cutoff,
            percent=percent_bases)
        os.remove(trimmedf)
        processed_files.append(filtered_trimmed_file)

    print('Annotating processed fastq files')
    annotated_files = []
    for i, f in enumerate(processed_files):
        annotated_f = igfft.igfft_multiprocess(f,
                                               file_type='FASTQ',
                                               species=species,
                                               locus=loci,
                                               parsing_settings={
                                                   'isotype':
                                                   isotyping_barcodes,
                                                   'remove_insertions':
                                                   remove_insertions
                                               },
                                               num_processes=number_threads,
                                               delete_alignment_file=True)
        annotated_files.append(annotated_f[0])

    print('Pairing sequences')
    output_dir = os.path.dirname(annotated_files[0])
    pairing.RunPairing(annotated_files,
                       annotated_file_formats='TAB',
                       analysis_method='GEORGIOU_INHOUSE',
                       output_folder_path=output_dir,
                       prefix_output_files=group_name,
                       cluster_cutoff=cluster_setting,
                       annotation_cluster_setting=annotation_cluster_cutoff)
    print('Pipeline complete')
Example #3
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
	# Unzip files
	print('Processing raw fastq files')
	processed_files = []
	
	for pair_of_files in input_files:		
		folder_path = os.path.dirname(pair_of_files[0])
		for i, f in enumerate(pair_of_files):		
			if f.endswith('.gz'):
				print('Unzipping: ', f)
				pair_of_files[i] = useful.gunzip_python(f)

		# Run trimmomatic
		if trim_seqs:
			print('Trimming low quality bases')
			trimming_parameters = {
				'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),				
				'MINLEN': min_read_len_post_trim
			}
			method = 'PE'		
			input_files = processing.run_trimmomatic(pair_of_files, folder_path, method, phred_encode, trimming_parameters)
		else:
			input_files = pair_of_files

		# Stitch R1-R2 files
		pairing_parameters = {
			'v': min_overlap_length,
			'm': max_assembly_length,
			'n': min_assembly_length,
			'u': max_fraction_uncalled,					
		}
		print('Stitching R1-R2 reads')
		pear_results = processing.run_pear(input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory)[0]		
		# Run quality filtering
		filtered_file = fastx.Run_Quality_Filter(pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases)		
		os.remove(pear_results)
		processed_files.append(filtered_file)
	
	print('Annotating processed fastq files')
	annotated_files = []
	for i, f in enumerate(processed_files):
		output_file = useful.removeFileExtension(f) + '.mixcr.alignment'
		output_file_annotation = useful.removeFileExtension(f) + '.mixcr.annotation'
		# Run MIXCR file
		print('Running MIXCR')
		[annotated_f, command_val] = mixcr.RunMixcr(f, output_file, filetype='FASTQ', loci=[], species='', exportPrettyAlignment=False, num_threads=number_threads)
		# Parse MIXCR file
		print('Parsing MIXCR')
		annotated_file = mixcr.parseMIXCR(f, output_file, 'FASTQ', output_file_annotation, command_val=command_val)  # again, annotated_file should be equal to outfile_annotation
		annotated_files.append(annotated_file[0])
	print('Pipeline complete')
Example #4
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
	# Unzip files
	print('Processing raw fastq files')
	processed_files = []
	
	for pair_of_files in input_files:		
		folder_path = os.path.dirname(pair_of_files[0])
		for i, f in enumerate(pair_of_files):		
			if f.endswith('.gz'):
				print('Unzipping: ', f)
				pair_of_files[i] = useful.gunzip_python(f)

		# Run trimmomatic
		if trim_seqs:
			print('Trimming low quality bases')
			trimming_parameters = {
				'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),				
				'MINLEN': min_read_len_post_trim
			}
			method = 'PE'		
			input_files = processing.run_trimmomatic(pair_of_files, folder_path, method, phred_encode, trimming_parameters)
		else:
			input_files = pair_of_files

		# Stitch R1-R2 files
		pairing_parameters = {
			'v': min_overlap_length,
			'm': max_assembly_length,
			'n': min_assembly_length,
			'u': max_fraction_uncalled,					
		}
		print('Stitching R1-R2 reads')
		pear_results = processing.run_pear(input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory)[0]		
		# Run quality filtering
		filtered_file = fastx.Run_Quality_Filter(pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases)		
		os.remove(pear_results)
		processed_files.append(filtered_file)	
	
	print('Annotating processed fastq files')
	annotated_files = []
	for i, f in enumerate(processed_files):
		annotated_f = igfft.igfft_multiprocess(f, species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True)			
		annotated_files.append(annotated_f[0])
	print('Pipeline complete')
def run_gglab_pipeline(input_files, species, loci, group_name=''):
	# Unzip files
	print('Processing raw fastq files')
	processed_files = []
	for i, f in enumerate(input_files):
		folder_path = os.path.dirname(f)
		if f.endswith('.gz'):
			print('Unzipping: ', f)
			f = useful.gunzip_python(f)

		# Run trimmomatic
		trimming_parameters = {
			'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),				
			'MINLEN': min_read_len_post_trim
		}
		method = 'SE'		
		trimmedf = processing.run_trimmomatic(f, folder_path, method, phred_encode, trimming_parameters)[0]		
		# Run quality filtering
		filtered_trimmed_file = fastx.Run_Quality_Filter(trimmedf, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases)		
		os.remove(trimmedf)
		processed_files.append(filtered_trimmed_file)
	
	print('Annotating processed fastq files')
	annotated_files = []
	for i, f in enumerate(processed_files):
		output_file = useful.removeFileExtension(f) + '.mixcr.alignment'
		output_file_annotation = useful.removeFileExtension(f) + '.mixcr.annotation'
		# Run MIXCR file
		print('Running MIXCR')
		[annotated_f, command_val] = mixcr.RunMixcr(f, output_file, filetype='FASTQ', loci=[], species='', exportPrettyAlignment=False, num_threads=number_threads)
		# Parse MIXCR file
		print('Parsing MIXCR')
		annotated_file = mixcr.parseMIXCR(f, output_file, 'FASTQ', output_file_annotation, command_val=command_val)  # again, annotated_file should be equal to outfile_annotation
		annotated_files.append(annotated_file)	
	print('Pairing sequences')	
	output_dir = os.path.dirname(annotated_files[0])
	pairing.RunPairing(annotated_files, annotated_file_formats='TAB', analysis_method='MIXCR', output_folder_path=output_dir, prefix_output_files=group_name, cluster_cutoff=cluster_setting, annotation_cluster_setting=annotation_cluster_cutoff)
	print('Pipeline complete')
Example #6
0
def run_gglab_pipeline(input_files, species, loci, group_name=""):
    # Unzip files
    print("Processing raw fastq files")
    processed_files = []

    for pair_of_files in input_files:
        folder_path = os.path.dirname(pair_of_files[0])
        for i, f in enumerate(pair_of_files):
            if f.endswith(".gz"):
                print("Unzipping: ", f)
                pair_of_files[i] = useful.gunzip_python(f)

                # Run trimmomatic
        if trim_seqs:
            print("Trimming low quality bases")
            trimming_parameters = {
                "SLIDINGWINDOW": str(window_trim) + ":" + str(quality_cutoff_trim),
                "MINLEN": min_read_len_post_trim,
            }
            method = "PE"
            input_files = processing.run_trimmomatic(
                pair_of_files, folder_path, method, phred_encode, trimming_parameters
            )
        else:
            input_files = pair_of_files

            # Stitch R1-R2 files
        pairing_parameters = {
            "v": min_overlap_length,
            "m": max_assembly_length,
            "n": min_assembly_length,
            "u": max_fraction_uncalled,
        }
        print("Stitching R1-R2 reads")
        pear_results = processing.run_pear(
            input_files[0],
            input_files[1],
            working_directory=folder_path,
            parameters=pairing_parameters,
            num_threads=number_threads,
            memory=pear_memory,
        )[0]
        # Run quality filtering
        filtered_file = fastx.Run_Quality_Filter(
            pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases
        )
        os.remove(pear_results)
        processed_files.append(filtered_file)

    print("Annotating processed fastq files")
    annotated_files = []
    for i, f in enumerate(processed_files):
        output_file = useful.removeFileExtension(f) + ".mixcr.alignment"
        output_file_annotation = useful.removeFileExtension(f) + ".mixcr.annotation"
        # Run MIXCR file
        print("Running MIXCR")
        [annotated_f, command_val] = mixcr.RunMixcr(
            f,
            output_file,
            filetype="FASTQ",
            loci=[],
            species="",
            exportPrettyAlignment=False,
            num_threads=number_threads,
        )
        # Parse MIXCR file
        print("Parsing MIXCR")
        annotated_file = mixcr.parseMIXCR(
            f, output_file, "FASTQ", output_file_annotation, command_val=command_val
        )  # again, annotated_file should be equal to outfile_annotation
        annotated_files.append(annotated_file[0])
    print("Pipeline complete")
def run_gglab_pipeline(input_files, species, loci, group_name=''):
    # Unzip files
    print('Processing raw fastq files')
    processed_files = []
    for i, f in enumerate(input_files):
        folder_path = os.path.dirname(f)
        if f.endswith('.gz'):
            print('Unzipping: ', f)
            f = useful.gunzip_python(f)

        # Run trimmomatic
        trimming_parameters = {
            'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),
            'MINLEN': min_read_len_post_trim
        }
        method = 'SE'
        trimmedf = processing.run_trimmomatic(f, folder_path, method,
                                              phred_encode,
                                              trimming_parameters)[0]
        # Run quality filtering
        filtered_trimmed_file = fastx.Run_Quality_Filter(
            trimmedf,
            output_dir=folder_path,
            quality=quality_cutoff,
            percent=percent_bases)
        os.remove(trimmedf)
        processed_files.append(filtered_trimmed_file)

    print('Annotating processed fastq files')
    annotated_files = []
    for i, f in enumerate(processed_files):
        output_file = useful.removeFileExtension(f) + '.mixcr.alignment'
        output_file_annotation = useful.removeFileExtension(
            f) + '.mixcr.annotation'
        # Run MIXCR file
        print('Running MIXCR')
        [annotated_f,
         command_val] = mixcr.RunMixcr(f,
                                       output_file,
                                       filetype='FASTQ',
                                       loci=[],
                                       species='',
                                       exportPrettyAlignment=False,
                                       num_threads=number_threads)
        # Parse MIXCR file
        print('Parsing MIXCR')
        annotated_file = mixcr.parseMIXCR(
            f,
            output_file,
            'FASTQ',
            output_file_annotation,
            command_val=command_val
        )  # again, annotated_file should be equal to outfile_annotation
        annotated_files.append(annotated_file)
    print('Pairing sequences')
    output_dir = os.path.dirname(annotated_files[0])
    pairing.RunPairing(annotated_files,
                       annotated_file_formats='TAB',
                       analysis_method='MIXCR',
                       output_folder_path=output_dir,
                       prefix_output_files=group_name,
                       cluster_cutoff=cluster_setting,
                       annotation_cluster_setting=annotation_cluster_cutoff)
    print('Pipeline complete')
Example #8
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
    # Unzip files
    print('Processing raw fastq files')
    processed_files = []

    for pair_of_files in input_files:
        folder_path = os.path.dirname(pair_of_files[0])
        for i, f in enumerate(pair_of_files):
            if f.endswith('.gz'):
                print('Unzipping: ', f)
                pair_of_files[i] = useful.gunzip_python(f)

        # Run trimmomatic
        if trim_seqs:
            print('Trimming low quality bases')
            trimming_parameters = {
                'SLIDINGWINDOW':
                str(window_trim) + ':' + str(quality_cutoff_trim),
                'MINLEN': min_read_len_post_trim
            }
            method = 'PE'
            input_files = processing.run_trimmomatic(pair_of_files,
                                                     folder_path, method,
                                                     phred_encode,
                                                     trimming_parameters)
        else:
            input_files = pair_of_files

        # Stitch R1-R2 files
        pairing_parameters = {
            'v': min_overlap_length,
            'm': max_assembly_length,
            'n': min_assembly_length,
            'u': max_fraction_uncalled,
        }
        print('Stitching R1-R2 reads')
        pear_results = processing.run_pear(input_files[0],
                                           input_files[1],
                                           working_directory=folder_path,
                                           parameters=pairing_parameters,
                                           num_threads=number_threads,
                                           memory=pear_memory)[0]
        # Run quality filtering
        filtered_file = fastx.Run_Quality_Filter(pear_results,
                                                 output_dir=folder_path,
                                                 quality=quality_cutoff,
                                                 percent=percent_bases)
        os.remove(pear_results)
        processed_files.append(filtered_file)

    print('Annotating processed fastq files')
    annotated_files = []
    for i, f in enumerate(processed_files):
        annotated_f = igfft.igfft_multiprocess(f,
                                               species=species,
                                               locus=loci,
                                               parsing_settings={
                                                   'isotype':
                                                   isotyping_barcodes,
                                                   'remove_insertions':
                                                   remove_insertions
                                               },
                                               num_processes=number_threads,
                                               delete_alignment_file=True)
        annotated_files.append(annotated_f[0])
    print('Pipeline complete')