Ejemplo n.º 1
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
    # Unzip files
    print('Processing raw fastq files')
    processed_files = []
    for i, f in enumerate(input_files):
        folder_path = os.path.dirname(f)
        if f.endswith('.gz'):
            print('Unzipping: ', f)
            f = useful.gunzip_python(f)
        annotated_f = igfft.igfft_multiprocess(f,
                                               file_type='FASTQ',
                                               species=species,
                                               locus=loci,
                                               parsing_settings={
                                                   'isotype':
                                                   isotyping_barcodes,
                                                   'remove_insertions':
                                                   remove_insertions
                                               },
                                               num_processes=number_threads,
                                               delete_alignment_file=True)
        annotated_files.append(annotated_f[0])
    output_file_list = ','.join(annotated_files)
    print output_file_list
    return output_file_list
Ejemplo n.º 2
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
	# Unzip files
	print('Processing raw fastq files')
	processed_files = []
	for i, f in enumerate(input_files):
		folder_path = os.path.dirname(f)
		if f.endswith('.gz'):
			print('Unzipping: ', f)
			f = useful.gunzip_python(f)

		# Run trimmomatic
		trimming_parameters = {
			'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),				
			'MINLEN': min_read_len_post_trim
		}
		method = 'SE'		
		trimmedf = processing.run_trimmomatic(f, folder_path, method, phred_encode, trimming_parameters)[0]		
		# Run quality filtering
		filtered_trimmed_file = fastx.Run_Quality_Filter(trimmedf, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases)		
		os.remove(trimmedf)
		processed_files.append(filtered_trimmed_file)
	
	print('Annotating processed fastq files')
	annotated_files = []
	for i, f in enumerate(processed_files):
		annotated_f = igfft.igfft_multiprocess(f, file_type='FASTQ', species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True)			
		annotated_files.append(annotated_f[0])
	
	print('Pairing sequences')	
	output_dir = os.path.dirname(annotated_files[0])
	pairing.RunPairing(annotated_files, annotated_file_formats='TAB', analysis_method='GEORGIOU_INHOUSE', output_folder_path=output_dir, prefix_output_files=group_name, cluster_cutoff=cluster_setting, annotation_cluster_setting=annotation_cluster_cutoff)
	print('Pipeline complete')
Ejemplo n.º 3
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
    # Unzip files
    print('Processing raw fastq files')
    processed_files = []
    for i, f in enumerate(input_files):
        folder_path = os.path.dirname(f)
        if f.endswith('.gz'):
            print('Unzipping: ', f)
            f = useful.gunzip_python(f)

        # Run trimmomatic
        trimming_parameters = {
            'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),
            'MINLEN': min_read_len_post_trim
        }
        method = 'SE'
        trimmedf = processing.run_trimmomatic(f, folder_path, method,
                                              phred_encode,
                                              trimming_parameters)[0]
        # Run quality filtering
        filtered_trimmed_file = fastx.Run_Quality_Filter(
            trimmedf,
            output_dir=folder_path,
            quality=quality_cutoff,
            percent=percent_bases)
        os.remove(trimmedf)
        processed_files.append(filtered_trimmed_file)

    print('Annotating processed fastq files')
    annotated_files = []
    for i, f in enumerate(processed_files):
        annotated_f = igfft.igfft_multiprocess(f,
                                               file_type='FASTQ',
                                               species=species,
                                               locus=loci,
                                               parsing_settings={
                                                   'isotype':
                                                   isotyping_barcodes,
                                                   'remove_insertions':
                                                   remove_insertions
                                               },
                                               num_processes=number_threads,
                                               delete_alignment_file=True)
        annotated_files.append(annotated_f[0])

    print('Pairing sequences')
    output_dir = os.path.dirname(annotated_files[0])
    pairing.RunPairing(annotated_files,
                       annotated_file_formats='TAB',
                       analysis_method='GEORGIOU_INHOUSE',
                       output_folder_path=output_dir,
                       prefix_output_files=group_name,
                       cluster_cutoff=cluster_setting,
                       annotation_cluster_setting=annotation_cluster_cutoff)
    print('Pipeline complete')
Ejemplo n.º 4
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
	# Unzip files
	print('Processing raw fastq files')
	annotated_files = []
	for i, f in enumerate(input_files):
		folder_path = os.path.dirname(f)
		if f.endswith('.gz'):
			print('Unzipping: ', f)
			f = useful.gunzip_python(f)
		annotated_f = igfft.igfft_multiprocess(f, file_type='FASTQ', species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True)			
		annotated_files.append(annotated_f[0])
	output_file_list = ','.join(annotated_files)
	print output_file_list
	return output_file_list
Ejemplo n.º 5
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
	# Unzip files
	print('Processing raw fastq files')
	processed_files = []
	
	for pair_of_files in input_files:		
		folder_path = os.path.dirname(pair_of_files[0])
		for i, f in enumerate(pair_of_files):		
			if f.endswith('.gz'):
				print('Unzipping: ', f)
				pair_of_files[i] = useful.gunzip_python(f)

		# Run trimmomatic
		if trim_seqs:
			print('Trimming low quality bases')
			trimming_parameters = {
				'SLIDINGWINDOW': str(window_trim) + ':' + str(quality_cutoff_trim),				
				'MINLEN': min_read_len_post_trim
			}
			method = 'PE'		
			input_files = processing.run_trimmomatic(pair_of_files, folder_path, method, phred_encode, trimming_parameters)
		else:
			input_files = pair_of_files

		# Stitch R1-R2 files
		pairing_parameters = {
			'v': min_overlap_length,
			'm': max_assembly_length,
			'n': min_assembly_length,
			'u': max_fraction_uncalled,					
		}
		print('Stitching R1-R2 reads')
		pear_results = processing.run_pear(input_files[0], input_files[1], working_directory=folder_path, parameters=pairing_parameters, num_threads=number_threads, memory=pear_memory)[0]		
		# Run quality filtering
		filtered_file = fastx.Run_Quality_Filter(pear_results, output_dir=folder_path, quality=quality_cutoff, percent=percent_bases)		
		os.remove(pear_results)
		processed_files.append(filtered_file)	
	
	print('Annotating processed fastq files')
	annotated_files = []
	for i, f in enumerate(processed_files):
		annotated_f = igfft.igfft_multiprocess(f, species=species, locus=loci, parsing_settings={'isotype': isotyping_barcodes, 'remove_insertions': remove_insertions}, num_processes=number_threads, delete_alignment_file=True)			
		annotated_files.append(annotated_f[0])
	print('Pipeline complete')
Ejemplo n.º 6
0
def run_gglab_pipeline(input_files, species, loci, group_name=""):
    # Unzip files
    print ("Processing raw fastq files")
    processed_files = []
    for i, f in enumerate(input_files):
        folder_path = os.path.dirname(f)
        if f.endswith(".gz"):
            print ("Unzipping: ", f)
            f = useful.gunzip_python(f)
        annotated_f = igfft.igfft_multiprocess(
            f,
            file_type="FASTQ",
            species=species,
            locus=loci,
            parsing_settings={"isotype": isotyping_barcodes, "remove_insertions": remove_insertions},
            num_processes=number_threads,
            delete_alignment_file=True,
        )
        annotated_files.append(annotated_f[0])
    output_file_list = ",".join(annotated_files)
    print output_file_list
    return output_file_list
Ejemplo n.º 7
0
def run_gglab_pipeline(input_files, species, loci, group_name=''):
    # Unzip files
    print('Processing raw fastq files')
    processed_files = []

    for pair_of_files in input_files:
        folder_path = os.path.dirname(pair_of_files[0])
        for i, f in enumerate(pair_of_files):
            if f.endswith('.gz'):
                print('Unzipping: ', f)
                pair_of_files[i] = useful.gunzip_python(f)

        # Run trimmomatic
        if trim_seqs:
            print('Trimming low quality bases')
            trimming_parameters = {
                'SLIDINGWINDOW':
                str(window_trim) + ':' + str(quality_cutoff_trim),
                'MINLEN': min_read_len_post_trim
            }
            method = 'PE'
            input_files = processing.run_trimmomatic(pair_of_files,
                                                     folder_path, method,
                                                     phred_encode,
                                                     trimming_parameters)
        else:
            input_files = pair_of_files

        # Stitch R1-R2 files
        pairing_parameters = {
            'v': min_overlap_length,
            'm': max_assembly_length,
            'n': min_assembly_length,
            'u': max_fraction_uncalled,
        }
        print('Stitching R1-R2 reads')
        pear_results = processing.run_pear(input_files[0],
                                           input_files[1],
                                           working_directory=folder_path,
                                           parameters=pairing_parameters,
                                           num_threads=number_threads,
                                           memory=pear_memory)[0]
        # Run quality filtering
        filtered_file = fastx.Run_Quality_Filter(pear_results,
                                                 output_dir=folder_path,
                                                 quality=quality_cutoff,
                                                 percent=percent_bases)
        os.remove(pear_results)
        processed_files.append(filtered_file)

    print('Annotating processed fastq files')
    annotated_files = []
    for i, f in enumerate(processed_files):
        annotated_f = igfft.igfft_multiprocess(f,
                                               species=species,
                                               locus=loci,
                                               parsing_settings={
                                                   'isotype':
                                                   isotyping_barcodes,
                                                   'remove_insertions':
                                                   remove_insertions
                                               },
                                               num_processes=number_threads,
                                               delete_alignment_file=True)
        annotated_files.append(annotated_f[0])
    print('Pipeline complete')