Example #1
0
def _extract_exons( record, exon_fofn, output_type, directory ):
    """
    Extract Fasta Records of each Exon from a Fasta Record
    """
    log.info('Extracting exons from "%s"' % record.name)
    temp_fasta = _write_temp_fasta( record )
    output_file = os.path.join( directory, 'all_exons.%s' % output_type )
    output_handle = _open_output_handle( output_file, output_type )

    # Iterate over the individual Exon Fasta files looking for alignments
    exon_count = 0
    for exon_fasta in read_list_file( exon_fofn ):
        exon_num = exon_fasta[-7]
        start, end = _find_exon_position( temp_fasta, exon_fasta, directory )
        if start is None or end is None:
            continue
        exon_count += 1
        exon_record = _extract_exon_record( record, exon_num, start, end )
        output_handle.writeRecord( exon_record )
    os.unlink( temp_fasta )
    output_handle.close()

    if exon_count:
        log.info("Extracted %s exons from %s" % (exon_count, record.name))
    else:
        log.warn("No valid exons found for %s!" % record.name)
        return None
    check_output_file( output_file )
    return output_file
Example #2
0
def _extract_exons(record, exon_fofn, output_type, directory):
    """
    Extract Fasta Records of each Exon from a Fasta Record
    """
    log.info('Extracting exons from "%s"' % record.name)
    temp_fasta = _write_temp_fasta(record)
    output_file = os.path.join(directory, 'all_exons.%s' % output_type)
    output_handle = _open_output_handle(output_file, output_type)

    # Iterate over the individual Exon Fasta files looking for alignments
    exon_count = 0
    for exon_fasta in read_list_file(exon_fofn):
        exon_num = exon_fasta[-7]
        start, end = _find_exon_position(temp_fasta, exon_fasta, directory)
        if start is None or end is None:
            continue
        exon_count += 1
        exon_record = _extract_exon_record(record, exon_num, start, end)
        output_handle.writeRecord(exon_record)
    os.unlink(temp_fasta)
    output_handle.close()

    if exon_count:
        log.info("Extracted %s exons from %s" % (exon_count, record.name))
    else:
        log.warn("No valid exons found for %s!" % record.name)
        return None
    check_output_file(output_file)
    return output_file
Example #3
0
def _parse_input(input_data):
    """
    Parse the list of subread files from the input if needed
    """
    if isinstance(input_data, str):
        return read_list_file(input_data)
    elif isinstance(input_data, list):
        return input_data
    else:
        msg = 'Input must be FOFN or List'
        log.error(msg)
        raise TypeError(msg)
Example #4
0
def _parse_input(input_data):
    """
    Parse the list of subread files from the input if needed
    """
    if isinstance(input_data, str):
        return read_list_file( input_data )
    elif isinstance(input_data, list):
        return input_data
    else:
        msg = 'Input must be FOFN or List'
        log.error( msg )
        raise TypeError( msg )
def separate_amplicons( subread_input, reference_fofn, locus, output=None ):
    """
    Public interfact for _separate_subreads
    """
    # Convert input to list if needed
    if isinstance(subread_input, str):
        file_list = read_list_file( subread_input )
        if output is None:
            output = subread_input
    elif isinstance(subread_input, list):
        file_list = subread_input
        if output is None:
            msg = 'Output file must be specified with file-list input!'
            log.error( msg )
            raise ValueError( msg )
    # If the inputs are valid, check that the files haven't already been split
    if _split_exists( file_list, locus ):
        log.info("Separating subreads by amplicon for Locus %s" % locus)
        return
    # Otherwise, separate the sequences and write the results
    log.info("Separating subreads by amplicon for Locus %s" % locus)
    reference_fasta = _parse_reference_fofn( reference_fofn, locus )
    new_file_list = _separate_amplicons( file_list, reference_fasta, locus)
    write_list_file( new_file_list, output )
Example #6
0
def separate_amplicons(subread_input, reference_fofn, locus, output=None):
    """
    Public interfact for _separate_subreads
    """
    # Convert input to list if needed
    if isinstance(subread_input, str):
        file_list = read_list_file(subread_input)
        if output is None:
            output = subread_input
    elif isinstance(subread_input, list):
        file_list = subread_input
        if output is None:
            msg = 'Output file must be specified with file-list input!'
            log.error(msg)
            raise ValueError(msg)
    # If the inputs are valid, check that the files haven't already been split
    if _split_exists(file_list, locus):
        log.info("Separating subreads by amplicon for Locus %s" % locus)
        return
    # Otherwise, separate the sequences and write the results
    log.info("Separating subreads by amplicon for Locus %s" % locus)
    reference_fasta = _parse_reference_fofn(reference_fofn, locus)
    new_file_list = _separate_amplicons(file_list, reference_fasta, locus)
    write_list_file(new_file_list, output)