def summarize_amp_analysis( input_dir, output_dir ): create_directory( output_dir ) log.info('Combining AmpliconAnalysis output from "{0}" to "{1}"'.format(input_dir, output_dir)) results = list( find_amp_assem_results(input_dir) ) output_files = list( output_amp_assem_results(results, output_dir) ) result_output = os.path.join( output_dir, "AmpliconAssembly_Results.fofn" ) write_list_file( output_files, result_output ) return result_output
def summarize_amp_analysis(input_dir, output_dir): create_directory(output_dir) log.info('Combining AmpliconAnalysis output from "{0}" to "{1}"'.format( input_dir, output_dir)) results = list(find_amp_assem_results(input_dir)) output_files = list(output_amp_assem_results(results, output_dir)) result_output = os.path.join(output_dir, "AmpliconAssembly_Results.fofn") write_list_file(output_files, result_output) return result_output
def __init__(self, output, setup=None, nproc=1, debug=False): """Initialize cross-cluster and object-specific settings""" if debug: log.setLevel(logging.DEBUG) log.debug("TESTING") log.info("Initializing Resequencer sub-module") self._resequencer = Resequencer(setup, nproc) # Initialize output folder self._output = output create_directory(self.output)
def combine_clusense_output(input_dir, output_dir): create_directory( output_dir ) log.info('Combining clusense output from "{0}" in "{1}"'.format(input_dir, output_dir)) clusense_dirs = find_clusense_dirs( input_dir ) clusense_clusters = find_clusense_clusters( clusense_dirs ) cns_files, read_files = output_clusters( clusense_clusters, output_dir ) cns_output = os.path.join( output_dir, CNS_FOFN ) write_list_file( cns_files, cns_output ) read_output = os.path.join( output_dir, READ_FOFN ) write_list_file( read_files, read_output ) return cns_output, read_output
def combine_clusense_output(input_dir, output_dir): create_directory(output_dir) log.info('Combining clusense output from "{0}" in "{1}"'.format( input_dir, output_dir)) clusense_dirs = find_clusense_dirs(input_dir) clusense_clusters = find_clusense_clusters(clusense_dirs) cns_files, read_files = output_clusters(clusense_clusters, output_dir) cns_output = os.path.join(output_dir, CNS_FOFN) write_list_file(cns_files, cns_output) read_output = os.path.join(output_dir, READ_FOFN) write_list_file(read_files, read_output) return cns_output, read_output
def split_results(amp_analysis): """Split the output of an Amplicon Analysis job by Barcode""" assert os.path.isdir(amp_analysis) sequence_path = os.path.join(amp_analysis, 'amplicon_analysis.fasta') check_output_file(sequence_path) print "Analyzing %s output sequences" % fasta_size(sequence_path) barcode_path = os.path.join(amp_analysis, 'by_barcode') create_directory(barcode_path) records = list(FastaReader(sequence_path)) barcodes = {get_barcode(r): [] for r in records} [barcodes[get_barcode(r)].append(r) for r in records] barcode_files = {} for barcode, records in barcodes.iteritems(): barcode_file = barcode + '.fasta' sample_path = os.path.join(barcode_path, barcode_file) with FastaWriter(sample_path) as handle: for record in records: handle.writeRecord(record) barcode_files[barcode] = sample_path return barcode_files
def split_results(amp_analysis): """Split the output of an Amplicon Analysis job by Barcode""" assert os.path.isdir(amp_analysis) sequence_path = os.path.join(amp_analysis, "amplicon_analysis.fasta") check_output_file(sequence_path) print "Analyzing %s output sequences" % fasta_size(sequence_path) barcode_path = os.path.join(amp_analysis, "by_barcode") create_directory(barcode_path) records = list(FastaReader(sequence_path)) barcodes = {get_barcode(r): [] for r in records} [barcodes[get_barcode(r)].append(r) for r in records] barcode_files = {} for barcode, records in barcodes.iteritems(): barcode_file = barcode + ".fasta" sample_path = os.path.join(barcode_path, barcode_file) with FastaWriter(sample_path) as handle: for record in records: handle.writeRecord(record) barcode_files[barcode] = sample_path return barcode_files
def extract_cDNA( input_file, exon_fofn, output=None, directory=None, reference_file=None, alignment_file=None, debug=False ): """ Extract the cDNA sequences from a mixed Fasta """ # Check the input files, and align the input file if needed if reference_file and alignment_file is None: alignment_file = align_best_reference( input_file, reference_file ) elif reference_file is None and alignment_file is None: msg = "extract_alleles requires either an Alignment or a Reference!" log.error( msg ) raise IOError( msg ) # Set the output and directory if it hasn't been specified if directory is None: dirname = os.path.dirname( input_file ) directory = os.path.join( dirname, 'cDNA' ) remove_directory( directory ) create_directory( directory ) output = output or _get_output_file( input_file ) # Prepare the Fasta by orienting and subsetting it records = _parse_input_records( input_file ) fofn = _parse_exon_fofn( exon_fofn ) loci = _parse_loci( alignment_file ) log.info("Extracting cDNA sequences from all records") _extract_cDNA( records, loci, fofn, directory ) log.info("Collecting all extracted cDNA records into %s" % output) _collect_cDNA( directory, output ) # Clean up the directory and return the combined cDNA file if not debug: remove_directory( directory ) return output
def initialize_output(self, output): """Initialize the cluster-specific output folders""" # TODO: Check for existing directories and do something self._output = os.path.abspath(output) create_directory(self._output) self._scripts = os.path.join(self._output, "scripts") create_directory(self._scripts) self._logs = os.path.join(self._output, "logs") create_directory(self._logs)
def initialize_output(self, output): """Initialize the cluster-specific output folders""" # TODO: Check for existing directories and do something self._output = os.path.abspath(output) create_directory(self._output) self._scripts = os.path.join(self._output, 'scripts') create_directory(self._scripts) self._logs = os.path.join(self._output, 'logs') create_directory(self._logs)
def _extract_cDNA( records, loci, fofn, directory ): """ Extract and create a cDNA record for each Fasta Sequence """ for record in records: # Create an output folder for each record to process name = record.name.split()[0] try: locus = loci[name] except: log.warn( 'No HLA locus associated with "%s" - skipping' % name ) continue # Create a directory record_directory = os.path.join( directory, name ) create_directory( record_directory ) # Find the appropriate Locus and FOFN if locus in fofn: exon_fofn = fofn[locus] else: log.warn( 'No exonic reference for %s' % locus ) # Extract the exons and make the cDNA exon_file = extract_exons( record, exon_fofn, record_directory ) if exon_file: cDNA_file = exons_to_cDNA( exon_file )
def get_output_folder(self, barcode): output_dir = os.path.join(self.output, barcode) create_directory(output_dir) return output_dir