def do_resequencing(self, data_file, amp_analysis_records, min_length, whitelist_file): log.info('Resequencing supplied Amplicon Analysis sequences') output_dir = self.get_output_folder("amp_analysis_resequencing") # Convert the raw data file into a BaxH5 fofn for use downstream # and create appropriate reader for local access bash5 = get_bash5_reader(data_file) baxh5_file = os.path.join(self.output, 'baxh5.fofn') create_baxh5_fofn(data_file, baxh5_file) # Extract any consensus sequences associated with this barcode log.info('Identified {0} consensus sequences to resequence'.format( len(amp_analysis_records))) reference_file = os.path.join(output_dir, 'reference.fasta') write_records(amp_analysis_records, reference_file) # Resequence the selected consensus sequences with the selected ZMWs self.resequencer(baxh5_file, whitelist_file, reference_file, output=output_dir, min_length=min_length) log.info( "Finished resequencing supplied Amplicon Analysis sequences\n")
def __call__(self, amp_analysis, data_file, barcode_file, barcode_string=None, min_snr=None, min_length=None): log.info( "Beginning Amplicon Analysis resequencing workflow for {0}".format( amp_analysis)) # Pick or create a single file from AA and read it amp_analysis_file = get_input_file(amp_analysis) amp_analysis_records = list(AmpliconAnalysisReader(amp_analysis_file)) # Convert the raw data file into a BaxH5 fofn for use downstream # and create appropriate reader for local access bash5 = get_bash5_reader(data_file) baxh5_file = os.path.join(self.output, 'baxh5.fofn') create_baxh5_fofn(data_file, baxh5_file) # Create a Reader for the Barcode data and find the overlap with any # barcodes specified by the user bc_reader = get_barcode_reader(barcode_file) bc_list = get_barcodes(bc_reader, barcode_string) for i, bc in enumerate(bc_list): log.info('Resequencing Barcode {0} (#{1} of {2})'.format( bc, i + 1, len(bc_list))) output_dir = self.get_output_folder(bc) # Extract any consensus sequences associated with this barcode record_list = [r for r in amp_analysis_records if r.barcode == bc] log.info( 'Identified {0} consensus sequences for Barcode {1}'.format( len(record_list), bc)) filtered_records = [r for r in record_list if r.num_reads >= 20] unique_records = get_unique_records(filtered_records) fraction = 100 * round( len(unique_records) / float(len(record_list)), 3) log.info( '{0} of {1} ({2}%) consensus sequences passed all filters'. format(len(unique_records), len(record_list), fraction)) reference_file = os.path.join(output_dir, 'reference.fasta') write_records(unique_records, reference_file) # Identify all high-quality, barcode-specific ZMWs and write them to file zmw_list = get_barcode_zmws(bc_reader, bc) zmw_list = filter_zmw_list(bash5, zmw_list, min_snr=min_snr) whitelist_file = os.path.join(output_dir, 'whitelist.txt') write_zmw_whitelist(bash5, zmw_list, whitelist_file) # Resequence the selected consensus sequences with the selected ZMWs self.resequencer(baxh5_file, whitelist_file, reference_file, output=output_dir, min_length=min_length) log.info("Finished resequencing Barcode {0}\n".format(bc))
def _create_baxh5_fofn( input_file, output ): """ Convert any BasH5 input files to BaxH5 to avoid file-type problems """ log.info('Creating FOFN of Bax.H5 files') baxh5_fofn = os.path.join( output, 'baxh5.fofn' ) if valid_file( baxh5_fofn ): log.info("Using existing Bax.H5 FOFN file") return baxh5_fofn log.info("No existing Bax.H5 fofn found") create_baxh5_fofn( input_file, baxh5_fofn ) check_output_file( baxh5_fofn ) log.info('Finished writing Bax.H5 fofn file\n') return baxh5_fofn
def _create_baxh5_fofn(input_file, output): """ Convert any BasH5 input files to BaxH5 to avoid file-type problems """ log.info('Creating FOFN of Bax.H5 files') baxh5_fofn = os.path.join(output, 'baxh5.fofn') if valid_file(baxh5_fofn): log.info("Using existing Bax.H5 FOFN file") return baxh5_fofn log.info("No existing Bax.H5 fofn found") create_baxh5_fofn(input_file, baxh5_fofn) check_output_file(baxh5_fofn) log.info('Finished writing Bax.H5 fofn file\n') return baxh5_fofn
def __call__(self, amp_analysis, data_file, barcode_file, barcode_string=None, min_snr=None, min_length=None): log.info("Beginning Amplicon Analysis resequencing workflow for {0}".format(amp_analysis)) # Pick or create a single file from AA and read it amp_analysis_file = get_input_file( amp_analysis ) amp_analysis_records = list(AmpliconAnalysisReader(amp_analysis_file)) # Convert the raw data file into a BaxH5 fofn for use downstream # and create appropriate reader for local access bash5 = get_bash5_reader( data_file ) baxh5_file = os.path.join( self.output, 'baxh5.fofn') create_baxh5_fofn( data_file, baxh5_file ) # Create a Reader for the Barcode data and find the overlap with any # barcodes specified by the user bc_reader = get_barcode_reader( barcode_file ) bc_list = get_barcodes( bc_reader, barcode_string ) for i, bc in enumerate( bc_list ): log.info('Resequencing Barcode {0} (#{1} of {2})'.format(bc, i+1, len(bc_list))) output_dir = self.get_output_folder( bc ) # Extract any consensus sequences associated with this barcode record_list = [r for r in amp_analysis_records if r.barcode == bc] log.info('Identified {0} consensus sequences for Barcode {1}'.format(len(record_list), bc)) filtered_records = [r for r in record_list if r.num_reads >= 20] unique_records = get_unique_records( filtered_records ) fraction = 100 * round(len(unique_records)/float(len(record_list)), 3) log.info('{0} of {1} ({2}%) consensus sequences passed all filters'.format(len(unique_records), len(record_list), fraction)) reference_file = os.path.join( output_dir, 'reference.fasta' ) write_records( unique_records, reference_file ) # Identify all high-quality, barcode-specific ZMWs and write them to file zmw_list = get_barcode_zmws( bc_reader, bc ) zmw_list = filter_zmw_list( bash5, zmw_list, min_snr=min_snr ) whitelist_file = os.path.join( output_dir, 'whitelist.txt' ) write_zmw_whitelist( bash5, zmw_list, whitelist_file ) # Resequence the selected consensus sequences with the selected ZMWs self.resequencer( baxh5_file, whitelist_file, reference_file, output=output_dir, min_length=min_length ) log.info("Finished resequencing Barcode {0}\n".format( bc ))
def do_resequencing(self, data_file, amp_analysis_records, min_length, whitelist_file): log.info("Resequencing supplied Amplicon Analysis sequences") output_dir = self.get_output_folder("amp_analysis_resequencing") # Convert the raw data file into a BaxH5 fofn for use downstream # and create appropriate reader for local access bash5 = get_bash5_reader(data_file) baxh5_file = os.path.join(self.output, "baxh5.fofn") create_baxh5_fofn(data_file, baxh5_file) # Extract any consensus sequences associated with this barcode log.info("Identified {0} consensus sequences to resequence".format(len(amp_analysis_records))) reference_file = os.path.join(output_dir, "reference.fasta") write_records(amp_analysis_records, reference_file) # Resequence the selected consensus sequences with the selected ZMWs self.resequencer(baxh5_file, whitelist_file, reference_file, output=output_dir, min_length=min_length) log.info("Finished resequencing supplied Amplicon Analysis sequences\n")
def do_barcoded_resequencing( self, data_file, amp_analysis_records, barcode_file, barcode_string, min_snr, min_length ): # Convert the raw data file into a BaxH5 fofn for use downstream # and create appropriate reader for local access bash5 = get_bash5_reader(data_file) baxh5_file = os.path.join(self.output, "baxh5.fofn") create_baxh5_fofn(data_file, baxh5_file) # Create a Reader for the Barcode data and find the overlap with any # barcodes specified by the user bc_reader = get_barcode_reader(barcode_file) bc_list = get_barcodes(bc_reader, barcode_string) for i, bc in enumerate(bc_list): log.info("Resequencing Barcode {0} (#{1} of {2})".format(bc, i + 1, len(bc_list))) output_dir = self.get_output_folder(bc) # Extract any consensus sequences associated with this barcode record_list = [r for r in amp_analysis_records if r.barcode == bc] log.info("Identified {0} consensus sequences for Barcode {1}".format(len(record_list), bc)) filtered_records = [r for r in record_list if r.num_reads >= 20] unique_records = get_unique_records(filtered_records) fraction = 100 * round(len(unique_records) / float(len(record_list)), 3) log.info( "{0} of {1} ({2}%) consensus sequences passed all filters".format( len(unique_records), len(record_list), fraction ) ) reference_file = os.path.join(output_dir, "reference.fasta") write_records(unique_records, reference_file) # Identify all high-quality, barcode-specific ZMWs and write them to file zmw_list = get_barcode_zmws(bc_reader, bc) zmw_list = filter_zmw_list(bash5, zmw_list, min_snr=min_snr) whitelist_file = os.path.join(output_dir, "whitelist.txt") write_zmw_whitelist(bash5, zmw_list, whitelist_file) # Resequence the selected consensus sequences with the selected ZMWs self.resequencer(baxh5_file, whitelist_file, reference_file, output=output_dir, min_length=min_length) log.info("Finished resequencing Barcode {0}\n".format(bc))