Ejemplo n.º 1
0
def separate_amplicons( input_data, reference_fofn, target_loci, output=None ):
    """
    Public interface for _separate_subreads
    """
    # Check and set the input and output, as needed
    log.info("Separating amplicons for these loci[%s]" % ','.join(target_loci))
    file_list = _parse_input( input_data )
    output = output or _get_output_file( input_data )
    references = parse_reference_dict( reference_fofn )

    # Iterate over the input subread files, splitting as needed
    new_files = []
    for filepath in file_list:
        locus = get_file_locus( filepath )
        if locus in target_loci:
            if is_amplicon_specific( filepath ):
                log.info("Subreads for Locus %s already split, skipping..." % locus)
                new_files.append( filepath )
            else:
                log.info("Subreads for Locus %s already split, skipping..." % locus)
                reference_fasta = references[locus]
                new_file_list = _separate_amplicons( file_list, reference_fasta, locus)
                continue

        # Otherwise, separate the sequences and write the results
        log.info("Separating subreads by amplicon for Locus %s" % locus)
        new_file_list = _separate_amplicons( file_list, reference_fasta, locus)
    write_list_file( new_file_list, output )
Ejemplo n.º 2
0
def separate_amplicons(input_data, reference_fofn, target_loci, output=None):
    """
    Public interface for _separate_subreads
    """
    # Check and set the input and output, as needed
    log.info("Separating amplicons for these loci[%s]" % ','.join(target_loci))
    file_list = _parse_input(input_data)
    output = output or _get_output_file(input_data)
    references = parse_reference_dict(reference_fofn)

    # Iterate over the input subread files, splitting as needed
    new_files = []
    for filepath in file_list:
        locus = get_file_locus(filepath)
        if locus in target_loci:
            if is_amplicon_specific(filepath):
                log.info("Subreads for Locus %s already split, skipping..." %
                         locus)
                new_files.append(filepath)
            else:
                log.info("Subreads for Locus %s already split, skipping..." %
                         locus)
                reference_fasta = references[locus]
                new_file_list = _separate_amplicons(file_list, reference_fasta,
                                                    locus)
                continue

        # Otherwise, separate the sequences and write the results
        log.info("Separating subreads by amplicon for Locus %s" % locus)
        new_file_list = _separate_amplicons(file_list, reference_fasta, locus)
    write_list_file(new_file_list, output)
Ejemplo n.º 3
0
def summarize_amp_analysis( input_dir, output_dir ):
    create_directory( output_dir )
    log.info('Combining AmpliconAnalysis output from "{0}" to "{1}"'.format(input_dir, output_dir))
    results = list( find_amp_assem_results(input_dir) )
    output_files = list( output_amp_assem_results(results, output_dir) )
    result_output = os.path.join( output_dir, "AmpliconAssembly_Results.fofn" )
    write_list_file( output_files, result_output )
    return result_output
Ejemplo n.º 4
0
def summarize_amp_analysis(input_dir, output_dir):
    create_directory(output_dir)
    log.info('Combining AmpliconAnalysis output from "{0}" to "{1}"'.format(
        input_dir, output_dir))
    results = list(find_amp_assem_results(input_dir))
    output_files = list(output_amp_assem_results(results, output_dir))
    result_output = os.path.join(output_dir, "AmpliconAssembly_Results.fofn")
    write_list_file(output_files, result_output)
    return result_output
Ejemplo n.º 5
0
def combine_clusense_output(input_dir, output_dir):
    create_directory( output_dir )
    log.info('Combining clusense output from "{0}" in "{1}"'.format(input_dir, output_dir))

    clusense_dirs = find_clusense_dirs( input_dir )
    clusense_clusters = find_clusense_clusters( clusense_dirs )
    cns_files, read_files = output_clusters( clusense_clusters, output_dir )

    cns_output = os.path.join( output_dir, CNS_FOFN )
    write_list_file( cns_files, cns_output )

    read_output = os.path.join( output_dir, READ_FOFN )
    write_list_file( read_files, read_output )
    return cns_output, read_output
Ejemplo n.º 6
0
def combine_clusense_output(input_dir, output_dir):
    create_directory(output_dir)
    log.info('Combining clusense output from "{0}" in "{1}"'.format(
        input_dir, output_dir))

    clusense_dirs = find_clusense_dirs(input_dir)
    clusense_clusters = find_clusense_clusters(clusense_dirs)
    cns_files, read_files = output_clusters(clusense_clusters, output_dir)

    cns_output = os.path.join(output_dir, CNS_FOFN)
    write_list_file(cns_files, cns_output)

    read_output = os.path.join(output_dir, READ_FOFN)
    write_list_file(read_files, read_output)
    return cns_output, read_output
Ejemplo n.º 7
0
def create_baxh5_fofn( input_file, output_file ):
    log.info("Converting %s into a FOFN of BaxH5 files" % input_file)
    if input_file.endswith('.fofn'):
        baxh5_files = _parse_fofn( input_file )
    elif input_file.endswith('.bas.h5'):
        baxh5_files = _parse_bash5( input_file )
    elif input_file.endswith('.bax.h5'):
        baxh5_files = [input_file]
    elif input_file.endswith('.fa') or input_file.endswith('.fasta'):
        baxh5_files = []
    else:
        msg = 'Invalid input filetype "%s"' % input_file
        log.info( msg )
        raise TypeError( msg )
    log.info("Writing a total of %s BaxH5 files to BaxH5 Fofn" % len(baxh5_files))
    write_list_file( baxh5_files, output_file )
    return output_file
Ejemplo n.º 8
0
def separate_amplicons( subread_input, reference_fofn, locus, output=None ):
    """
    Public interfact for _separate_subreads
    """
    # Convert input to list if needed
    if isinstance(subread_input, str):
        file_list = read_list_file( subread_input )
        if output is None:
            output = subread_input
    elif isinstance(subread_input, list):
        file_list = subread_input
        if output is None:
            msg = 'Output file must be specified with file-list input!'
            log.error( msg )
            raise ValueError( msg )
    # If the inputs are valid, check that the files haven't already been split
    if _split_exists( file_list, locus ):
        log.info("Separating subreads by amplicon for Locus %s" % locus)
        return
    # Otherwise, separate the sequences and write the results
    log.info("Separating subreads by amplicon for Locus %s" % locus)
    reference_fasta = _parse_reference_fofn( reference_fofn, locus )
    new_file_list = _separate_amplicons( file_list, reference_fasta, locus)
    write_list_file( new_file_list, output )
Ejemplo n.º 9
0
def separate_amplicons(subread_input, reference_fofn, locus, output=None):
    """
    Public interfact for _separate_subreads
    """
    # Convert input to list if needed
    if isinstance(subread_input, str):
        file_list = read_list_file(subread_input)
        if output is None:
            output = subread_input
    elif isinstance(subread_input, list):
        file_list = subread_input
        if output is None:
            msg = 'Output file must be specified with file-list input!'
            log.error(msg)
            raise ValueError(msg)
    # If the inputs are valid, check that the files haven't already been split
    if _split_exists(file_list, locus):
        log.info("Separating subreads by amplicon for Locus %s" % locus)
        return
    # Otherwise, separate the sequences and write the results
    log.info("Separating subreads by amplicon for Locus %s" % locus)
    reference_fasta = _parse_reference_fofn(reference_fofn, locus)
    new_file_list = _separate_amplicons(file_list, reference_fasta, locus)
    write_list_file(new_file_list, output)