Ejemplo n.º 1
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
	# Unzip files
	print('Processing raw fastq files')
	processed_files = []
	
	for pair_of_files in input_files:		
		folder_path = os.path.dirname(pair_of_files[0])
		for i, f in enumerate(pair_of_files):		
			if f.endswith('.gz'):
				print('Unzipping: ', f)
				pair_of_files[i] = useful.gunzip_python(f)

		# Run trimmomatic
		if trim_seqs:
			print('Trimming low quality bases')
			trimming_parameters = {
				'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),				
				'MINLEN': min_read_len_post_trim
			}
			method = 'PE'		
			input_files = processing.run_trimmomatic(pair_of_files, folder_path, method, phred_encode, trimming_parameters)
		else:
			input_files = pair_of_files

		# Stitch R1-R2 files
		pairing_parameters = {
			'v': min_overlap_length,
			'm': max_assembly_length,
			'n': min_assembly_length,
			'u': max_fraction_uncalled,					
		}
		print('Stitching R1-R2 reads')
		pear_results = processing.run_pear(input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory)[0]		
		# Run quality filtering
		filtered_file = fastx.Run_Quality_Filter(pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases)		
		os.remove(pear_results)
		processed_files.append(filtered_file)
	
	print('Annotating processed fastq files')
	annotated_files = []
	for i, f in enumerate(processed_files):
		output_file = useful.removeFileExtension(f) + '.mixcr.alignment'
		output_file_annotation = useful.removeFileExtension(f) + '.mixcr.annotation'
		# Run MIXCR file
		print('Running MIXCR')
		[annotated_f, command_val] = mixcr.RunMixcr(f, output_file, filetype='FASTQ', loci=[], species='', exportPrettyAlignment=False, num_threads=number_threads)
		# Parse MIXCR file
		print('Parsing MIXCR')
		annotated_file = mixcr.parseMIXCR(f, output_file, 'FASTQ', output_file_annotation, command_val=command_val)  # again, annotated_file should be equal to outfile_annotation
		annotated_files.append(annotated_file[0])
	print('Pipeline complete')
Ejemplo n.º 2
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
	# Unzip files
	print('Processing raw fastq files')
	processed_files = []
	
	for pair_of_files in input_files:		
		folder_path = os.path.dirname(pair_of_files[0])
		for i, f in enumerate(pair_of_files):		
			if f.endswith('.gz'):
				print('Unzipping: ', f)
				pair_of_files[i] = useful.gunzip_python(f)

		# Run trimmomatic
		if trim_seqs:
			print('Trimming low quality bases')
			trimming_parameters = {
				'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),				
				'MINLEN': min_read_len_post_trim
			}
			method = 'PE'		
			input_files = processing.run_trimmomatic(pair_of_files, folder_path, method, phred_encode, trimming_parameters)
		else:
			input_files = pair_of_files

		# Stitch R1-R2 files
		pairing_parameters = {
			'v': min_overlap_length,
			'm': max_assembly_length,
			'n': min_assembly_length,
			'u': max_fraction_uncalled,					
		}
		print('Stitching R1-R2 reads')
		pear_results = processing.run_pear(input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory)[0]		
		# Run quality filtering
		filtered_file = fastx.Run_Quality_Filter(pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases)		
		os.remove(pear_results)
		processed_files.append(filtered_file)	
	
	print('Annotating processed fastq files')
	annotated_files = []
	for i, f in enumerate(processed_files):
		annotated_f = igfft.igfft_multiprocess(f, species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True)			
		annotated_files.append(annotated_f[0])
	print('Pipeline complete')
Ejemplo n.º 3
0
def run_gglab_pipeline(input_files, species, loci, group_name=""):
    # Unzip files
    print("Processing raw fastq files")
    processed_files = []

    for pair_of_files in input_files:
        folder_path = os.path.dirname(pair_of_files[0])
        for i, f in enumerate(pair_of_files):
            if f.endswith(".gz"):
                print("Unzipping: ", f)
                pair_of_files[i] = useful.gunzip_python(f)

                # Run trimmomatic
        if trim_seqs:
            print("Trimming low quality bases")
            trimming_parameters = {
                "SLIDINGWINDOW": str(window_trim) + ":" + str(quality_cutoff_trim),
                "MINLEN": min_read_len_post_trim,
            }
            method = "PE"
            input_files = processing.run_trimmomatic(
                pair_of_files, folder_path, method, phred_encode, trimming_parameters
            )
        else:
            input_files = pair_of_files

            # Stitch R1-R2 files
        pairing_parameters = {
            "v": min_overlap_length,
            "m": max_assembly_length,
            "n": min_assembly_length,
            "u": max_fraction_uncalled,
        }
        print("Stitching R1-R2 reads")
        pear_results = processing.run_pear(
            input_files[0],
            input_files[1],
            working_directory=folder_path,
            parameters=pairing_parameters,
            num_threads=number_threads,
            memory=pear_memory,
        )[0]
        # Run quality filtering
        filtered_file = fastx.Run_Quality_Filter(
            pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases
        )
        os.remove(pear_results)
        processed_files.append(filtered_file)

    print("Annotating processed fastq files")
    annotated_files = []
    for i, f in enumerate(processed_files):
        output_file = useful.removeFileExtension(f) + ".mixcr.alignment"
        output_file_annotation = useful.removeFileExtension(f) + ".mixcr.annotation"
        # Run MIXCR file
        print("Running MIXCR")
        [annotated_f, command_val] = mixcr.RunMixcr(
            f,
            output_file,
            filetype="FASTQ",
            loci=[],
            species="",
            exportPrettyAlignment=False,
            num_threads=number_threads,
        )
        # Parse MIXCR file
        print("Parsing MIXCR")
        annotated_file = mixcr.parseMIXCR(
            f, output_file, "FASTQ", output_file_annotation, command_val=command_val
        )  # again, annotated_file should be equal to outfile_annotation
        annotated_files.append(annotated_file[0])
    print("Pipeline complete")
Ejemplo n.º 4
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
    # Unzip files
    print('Processing raw fastq files')
    processed_files = []

    for pair_of_files in input_files:
        folder_path = os.path.dirname(pair_of_files[0])
        for i, f in enumerate(pair_of_files):
            if f.endswith('.gz'):
                print('Unzipping: ', f)
                pair_of_files[i] = useful.gunzip_python(f)

        # Run trimmomatic
        if trim_seqs:
            print('Trimming low quality bases')
            trimming_parameters = {
                'SLIDINGWINDOW':
                str(window_trim) + ':' + str(quality_cutoff_trim),
                'MINLEN': min_read_len_post_trim
            }
            method = 'PE'
            input_files = processing.run_trimmomatic(pair_of_files,
                                                     folder_path, method,
                                                     phred_encode,
                                                     trimming_parameters)
        else:
            input_files = pair_of_files

        # Stitch R1-R2 files
        pairing_parameters = {
            'v': min_overlap_length,
            'm': max_assembly_length,
            'n': min_assembly_length,
            'u': max_fraction_uncalled,
        }
        print('Stitching R1-R2 reads')
        pear_results = processing.run_pear(input_files[0],
                                           input_files[1],
                                           working_directory=folder_path,
                                           parameters=pairing_parameters,
                                           num_threads=number_threads,
                                           memory=pear_memory)[0]
        # Run quality filtering
        filtered_file = fastx.Run_Quality_Filter(pear_results,
                                                 output_dir=folder_path,
                                                 quality=quality_cutoff,
                                                 percent=percent_bases)
        os.remove(pear_results)
        processed_files.append(filtered_file)

    print('Annotating processed fastq files')
    annotated_files = []
    for i, f in enumerate(processed_files):
        annotated_f = igfft.igfft_multiprocess(f,
                                               species=species,
                                               locus=loci,
                                               parsing_settings={
                                                   'isotype':
                                                   isotyping_barcodes,
                                                   'remove_insertions':
                                                   remove_insertions
                                               },
                                               num_processes=number_threads,
                                               delete_alignment_file=True)
        annotated_files.append(annotated_f[0])
    print('Pipeline complete')