def create_ref_id_dict(self, ref_genomes): mydict = {} for ref in ref_genomes: mydict[misc.get_refid_from_path(ref)] = ref if len(mydict) != len(ref_genomes): raise TypeError("The given reference genomes must have unique base names in their paths.") return mydict
def align_to_references(state): logger = state.logger ref_sams = {} for refpath in state.paths.reference_genomes: suffix = basename(refpath).split('.')[0] logger.info("Building the genome fasta file for the file %s..." % basename(refpath)) bowtie2.build('2.2.9', refpath, logger) logger.info("Aligning the forward reads in %s to the genome %s..." % (basename(state.paths.fastq_files[0]), basename(refpath))) outsam1 = bowtie2.align_to_genome( '2.2.9', refpath, state.paths.fastq_files[0], os.path.join(state.paths.full_sams_dir, 'fastq1_to_genome_%s.sam' % suffix), state.settings.threads) logger.info("Output saved to %s" % basename(outsam1)) logger.debug("Saving reference header for %s under %s" % (basename(refpath), misc.get_refid_from_path(refpath))) state.settings.reference_headers_dict[misc.get_refid_from_path( refpath)].append(misc.get_sam_header(outsam1)) logger.info("Aligning the reverse reads in %s to the genome %s..." % (basename(state.paths.fastq_files[1]), basename(refpath))) outsam2 = bowtie2.align_to_genome( '2.2.9', refpath, state.paths.fastq_files[1], os.path.join(state.paths.full_sams_dir, 'fastq2_to_genome_%s.sam' % suffix), state.settings.threads) logger.info("Output saved to %s" % basename(outsam2)) logger.debug("Saving reference header for %s under %s" % (basename(refpath), misc.get_refid_from_path(refpath))) state.settings.reference_headers_dict[misc.get_refid_from_path( refpath)].append(misc.get_sam_header(outsam2)) ref_sams[refpath] = (outsam1, outsam2) state.paths.fastq_to_genome_algnmnts = ref_sams return ref_sams
def genome(path): global ref_basenames try: if not os.path.isfile(path): raise TypeError("Genome fasta is not a file.") if misc.get_refid_from_path(path) in ref_basenames: raise TypeError("Duplicate Basename") ref_basenames.add(os.path.basename(path).split('.')[0]) return os.path.abspath(path) except: raise argparse.ArgumentTypeError( "Genomes must be entered as \"<reference-fasta1> <reference-fasta2>\" and " "they must have unique basenames.")