Ejemplo n.º 1
0
def summarize_amp_analysis( input_dir, output_dir ):
    create_directory( output_dir )
    log.info('Combining AmpliconAnalysis output from "{0}" to "{1}"'.format(input_dir, output_dir))
    results = list( find_amp_assem_results(input_dir) )
    output_files = list( output_amp_assem_results(results, output_dir) )
    result_output = os.path.join( output_dir, "AmpliconAssembly_Results.fofn" )
    write_list_file( output_files, result_output )
    return result_output
Ejemplo n.º 2
0
def summarize_amp_analysis(input_dir, output_dir):
    create_directory(output_dir)
    log.info('Combining AmpliconAnalysis output from "{0}" to "{1}"'.format(
        input_dir, output_dir))
    results = list(find_amp_assem_results(input_dir))
    output_files = list(output_amp_assem_results(results, output_dir))
    result_output = os.path.join(output_dir, "AmpliconAssembly_Results.fofn")
    write_list_file(output_files, result_output)
    return result_output
Ejemplo n.º 3
0
    def __init__(self, output, setup=None, nproc=1, debug=False):
        """Initialize cross-cluster and object-specific settings"""
        if debug:
            log.setLevel(logging.DEBUG)
            log.debug("TESTING")
        log.info("Initializing Resequencer sub-module")
        self._resequencer = Resequencer(setup, nproc)

        # Initialize output folder
        self._output = output
        create_directory(self.output)
Ejemplo n.º 4
0
    def __init__(self, output, setup=None, nproc=1, debug=False):
        """Initialize cross-cluster and object-specific settings"""
        if debug:
            log.setLevel(logging.DEBUG)
            log.debug("TESTING")
        log.info("Initializing Resequencer sub-module")
        self._resequencer = Resequencer(setup, nproc)

        # Initialize output folder
        self._output = output
        create_directory(self.output)
Ejemplo n.º 5
0
def combine_clusense_output(input_dir, output_dir):
    create_directory( output_dir )
    log.info('Combining clusense output from "{0}" in "{1}"'.format(input_dir, output_dir))

    clusense_dirs = find_clusense_dirs( input_dir )
    clusense_clusters = find_clusense_clusters( clusense_dirs )
    cns_files, read_files = output_clusters( clusense_clusters, output_dir )

    cns_output = os.path.join( output_dir, CNS_FOFN )
    write_list_file( cns_files, cns_output )

    read_output = os.path.join( output_dir, READ_FOFN )
    write_list_file( read_files, read_output )
    return cns_output, read_output
Ejemplo n.º 6
0
def combine_clusense_output(input_dir, output_dir):
    create_directory(output_dir)
    log.info('Combining clusense output from "{0}" in "{1}"'.format(
        input_dir, output_dir))

    clusense_dirs = find_clusense_dirs(input_dir)
    clusense_clusters = find_clusense_clusters(clusense_dirs)
    cns_files, read_files = output_clusters(clusense_clusters, output_dir)

    cns_output = os.path.join(output_dir, CNS_FOFN)
    write_list_file(cns_files, cns_output)

    read_output = os.path.join(output_dir, READ_FOFN)
    write_list_file(read_files, read_output)
    return cns_output, read_output
Ejemplo n.º 7
0
def split_results(amp_analysis):
    """Split the output of an Amplicon Analysis job by Barcode"""
    assert os.path.isdir(amp_analysis)
    sequence_path = os.path.join(amp_analysis, 'amplicon_analysis.fasta')
    check_output_file(sequence_path)
    print "Analyzing %s output sequences" % fasta_size(sequence_path)
    barcode_path = os.path.join(amp_analysis, 'by_barcode')
    create_directory(barcode_path)

    records = list(FastaReader(sequence_path))
    barcodes = {get_barcode(r): [] for r in records}
    [barcodes[get_barcode(r)].append(r) for r in records]
    barcode_files = {}
    for barcode, records in barcodes.iteritems():
        barcode_file = barcode + '.fasta'
        sample_path = os.path.join(barcode_path, barcode_file)
        with FastaWriter(sample_path) as handle:
            for record in records:
                handle.writeRecord(record)
        barcode_files[barcode] = sample_path
    return barcode_files
Ejemplo n.º 8
0
def split_results(amp_analysis):
    """Split the output of an Amplicon Analysis job by Barcode"""
    assert os.path.isdir(amp_analysis)
    sequence_path = os.path.join(amp_analysis, "amplicon_analysis.fasta")
    check_output_file(sequence_path)
    print "Analyzing %s output sequences" % fasta_size(sequence_path)
    barcode_path = os.path.join(amp_analysis, "by_barcode")
    create_directory(barcode_path)

    records = list(FastaReader(sequence_path))
    barcodes = {get_barcode(r): [] for r in records}
    [barcodes[get_barcode(r)].append(r) for r in records]
    barcode_files = {}
    for barcode, records in barcodes.iteritems():
        barcode_file = barcode + ".fasta"
        sample_path = os.path.join(barcode_path, barcode_file)
        with FastaWriter(sample_path) as handle:
            for record in records:
                handle.writeRecord(record)
        barcode_files[barcode] = sample_path
    return barcode_files
Ejemplo n.º 9
0
def extract_cDNA( input_file, exon_fofn, output=None,
                                         directory=None,
                                         reference_file=None, 
                                         alignment_file=None,
                                         debug=False ):
    """
    Extract the cDNA sequences from a mixed Fasta
    """
    # Check the input files, and align the input file if needed
    if reference_file and alignment_file is None:
        alignment_file = align_best_reference( input_file, reference_file )
    elif reference_file is None and alignment_file is None:
        msg = "extract_alleles requires either an Alignment or a Reference!"
        log.error( msg )
        raise IOError( msg )

    # Set the output and directory if it hasn't been specified
    if directory is None:
        dirname = os.path.dirname( input_file )
        directory = os.path.join( dirname, 'cDNA' )
        remove_directory( directory )

    create_directory( directory )
    output = output or _get_output_file( input_file )

    # Prepare the Fasta by orienting and subsetting it
    records = _parse_input_records( input_file )
    fofn = _parse_exon_fofn( exon_fofn )
    loci = _parse_loci( alignment_file )
    log.info("Extracting cDNA sequences from all records")
    _extract_cDNA( records, loci, fofn, directory )
    log.info("Collecting all extracted cDNA records into %s" % output)
    _collect_cDNA( directory, output )

    # Clean up the directory and return the combined cDNA file
    if not debug:
        remove_directory( directory )
    return output
Ejemplo n.º 10
0
 def initialize_output(self, output):
     """Initialize the cluster-specific output folders"""
     # TODO: Check for existing directories and do something
     self._output = os.path.abspath(output)
     create_directory(self._output)
     self._scripts = os.path.join(self._output, "scripts")
     create_directory(self._scripts)
     self._logs = os.path.join(self._output, "logs")
     create_directory(self._logs)
Ejemplo n.º 11
0
 def initialize_output(self, output):
     """Initialize the cluster-specific output folders"""
     # TODO: Check for existing directories and do something
     self._output = os.path.abspath(output)
     create_directory(self._output)
     self._scripts = os.path.join(self._output, 'scripts')
     create_directory(self._scripts)
     self._logs = os.path.join(self._output, 'logs')
     create_directory(self._logs)
Ejemplo n.º 12
0
def _extract_cDNA( records, loci, fofn, directory ):
    """
    Extract and create a cDNA record for each Fasta Sequence
    """
    for record in records:
        # Create an output folder for each record to process
        name = record.name.split()[0]
        try:
            locus = loci[name]
        except:
            log.warn( 'No HLA locus associated with "%s" - skipping' % name )
            continue
        # Create a directory 
        record_directory = os.path.join( directory, name )
        create_directory( record_directory )
        # Find the appropriate Locus and FOFN
        if locus in fofn:
            exon_fofn = fofn[locus]
        else:
            log.warn( 'No exonic reference for %s' % locus )
        # Extract the exons and make the cDNA
        exon_file = extract_exons( record, exon_fofn, record_directory )
        if exon_file:
            cDNA_file = exons_to_cDNA( exon_file )
Ejemplo n.º 13
0
 def get_output_folder(self, barcode):
     output_dir = os.path.join(self.output, barcode)
     create_directory(output_dir)
     return output_dir
Ejemplo n.º 14
0
 def get_output_folder(self, barcode):
     output_dir = os.path.join(self.output, barcode)
     create_directory(output_dir)
     return output_dir