def _extract_exons( record, exon_fofn, output_type, directory ): """ Extract Fasta Records of each Exon from a Fasta Record """ log.info('Extracting exons from "%s"' % record.name) temp_fasta = _write_temp_fasta( record ) output_file = os.path.join( directory, 'all_exons.%s' % output_type ) output_handle = _open_output_handle( output_file, output_type ) # Iterate over the individual Exon Fasta files looking for alignments exon_count = 0 for exon_fasta in read_list_file( exon_fofn ): exon_num = exon_fasta[-7] start, end = _find_exon_position( temp_fasta, exon_fasta, directory ) if start is None or end is None: continue exon_count += 1 exon_record = _extract_exon_record( record, exon_num, start, end ) output_handle.writeRecord( exon_record ) os.unlink( temp_fasta ) output_handle.close() if exon_count: log.info("Extracted %s exons from %s" % (exon_count, record.name)) else: log.warn("No valid exons found for %s!" % record.name) return None check_output_file( output_file ) return output_file
def _extract_exons(record, exon_fofn, output_type, directory): """ Extract Fasta Records of each Exon from a Fasta Record """ log.info('Extracting exons from "%s"' % record.name) temp_fasta = _write_temp_fasta(record) output_file = os.path.join(directory, 'all_exons.%s' % output_type) output_handle = _open_output_handle(output_file, output_type) # Iterate over the individual Exon Fasta files looking for alignments exon_count = 0 for exon_fasta in read_list_file(exon_fofn): exon_num = exon_fasta[-7] start, end = _find_exon_position(temp_fasta, exon_fasta, directory) if start is None or end is None: continue exon_count += 1 exon_record = _extract_exon_record(record, exon_num, start, end) output_handle.writeRecord(exon_record) os.unlink(temp_fasta) output_handle.close() if exon_count: log.info("Extracted %s exons from %s" % (exon_count, record.name)) else: log.warn("No valid exons found for %s!" % record.name) return None check_output_file(output_file) return output_file
def _parse_input(input_data): """ Parse the list of subread files from the input if needed """ if isinstance(input_data, str): return read_list_file(input_data) elif isinstance(input_data, list): return input_data else: msg = 'Input must be FOFN or List' log.error(msg) raise TypeError(msg)
def _parse_input(input_data): """ Parse the list of subread files from the input if needed """ if isinstance(input_data, str): return read_list_file( input_data ) elif isinstance(input_data, list): return input_data else: msg = 'Input must be FOFN or List' log.error( msg ) raise TypeError( msg )
def separate_amplicons( subread_input, reference_fofn, locus, output=None ): """ Public interfact for _separate_subreads """ # Convert input to list if needed if isinstance(subread_input, str): file_list = read_list_file( subread_input ) if output is None: output = subread_input elif isinstance(subread_input, list): file_list = subread_input if output is None: msg = 'Output file must be specified with file-list input!' log.error( msg ) raise ValueError( msg ) # If the inputs are valid, check that the files haven't already been split if _split_exists( file_list, locus ): log.info("Separating subreads by amplicon for Locus %s" % locus) return # Otherwise, separate the sequences and write the results log.info("Separating subreads by amplicon for Locus %s" % locus) reference_fasta = _parse_reference_fofn( reference_fofn, locus ) new_file_list = _separate_amplicons( file_list, reference_fasta, locus) write_list_file( new_file_list, output )
def separate_amplicons(subread_input, reference_fofn, locus, output=None): """ Public interfact for _separate_subreads """ # Convert input to list if needed if isinstance(subread_input, str): file_list = read_list_file(subread_input) if output is None: output = subread_input elif isinstance(subread_input, list): file_list = subread_input if output is None: msg = 'Output file must be specified with file-list input!' log.error(msg) raise ValueError(msg) # If the inputs are valid, check that the files haven't already been split if _split_exists(file_list, locus): log.info("Separating subreads by amplicon for Locus %s" % locus) return # Otherwise, separate the sequences and write the results log.info("Separating subreads by amplicon for Locus %s" % locus) reference_fasta = _parse_reference_fofn(reference_fofn, locus) new_file_list = _separate_amplicons(file_list, reference_fasta, locus) write_list_file(new_file_list, output)