def prepare_genome(genome_file,color_space=False): run_fine=True pipeline_param=utils_param.get_pipeline_parameters() BWA_dir=pipeline_param.get_bwa_dir() BWA_bin=os.path.join(BWA_dir,'bwa') genome_loader = GenomeLoader(genome_file=genome_file) length=0 for fasta_rec in genome_loader: header, sequence = fasta_rec length+=len(sequence) if length>1000000000: break genome_loader.close() #Following recommendation set the indexing algorithm to is if genome is <10M if length>1000000000: a_option='bwtsw' else: a_option='is' #Create the indexes if color_space: command='%s index -c -a %s %s'%(BWA_bin, a_option, genome_file) else: command='%s index -a %s %s'%(BWA_bin, a_option, genome_file) command_runner.run_command(command) return run_fine
def prepare_genome(genome_file, color_space=False): run_fine = True pipeline_param = utils_param.get_pipeline_parameters() BWA_dir = pipeline_param.get_bwa_dir() BWA_bin = os.path.join(BWA_dir, 'bwa') genome_loader = GenomeLoader(genome_file=genome_file) length = 0 for fasta_rec in genome_loader: header, sequence = fasta_rec length += len(sequence) if length > 1000000000: break genome_loader.close() #Following recommendation set the indexing algorithm to is if genome is <10M if length > 1000000000: a_option = 'bwtsw' else: a_option = 'is' #Create the indexes if color_space: command = '%s index -c -a %s %s' % (BWA_bin, a_option, genome_file) else: command = '%s index -a %s %s' % (BWA_bin, a_option, genome_file) command_runner.run_command(command) return run_fine
def bin_coordinates_through_genome(input_file, output_file, genome_file, bin_size): open_file=utils_logging.open_input_file(input_file) open_output=utils_logging.open_output_file(output_file) all_coordinates_per_chr={} genome_loader=GenomeLoader(genome_file) previous_bin=0 all_chr=[] for line in open_file: sp_line=line.split() all_coordinates=all_coordinates_per_chr.get(sp_line[0]) if all_coordinates is None: all_chr.append(sp_line[0]) all_coordinates=[] all_coordinates_per_chr[sp_line[0]]=all_coordinates all_coordinates.append(int(sp_line[1])) all_chr.sort() for chr in all_chr: header, sequence =genome_loader.get_chr(chr) chr=header.strip() chr_len=len(sequence) all_coordinates=all_coordinates_per_chr.get(chr) all_bins=bin_value_from_array(all_coordinates, bin_size, chr_len) for bin,value in enumerate(all_bins): open_output.write('%s\t%s\t%s\t%s\n'%(chr, bin*bin_size, (bin*bin_size)+previous_bin, value)) previous_bin+=len(all_bins)*bin_size open_output.close()
def extract_reads_from_all_bam_files_set_of_consensus_old(bam_files, list_consensus, output_dir, genome_loader=None, all_read1_consensus_file=None): if genome_loader is None: genome_loader = GenomeLoader(all_read1_consensus_file, keep_until_done=True) for consensus_name in list_consensus: logging.info("Extract reads from %s " % consensus_name) consensus_name, consensus_sequence = genome_loader.get_chr(consensus_name) extract_reads_from_one_consensus(bam_files, output_dir, consensus_name, consensus_sequence)
def extract_reads_from_all_bam_files_set_of_consensus_old( bam_files, list_consensus, output_dir, genome_loader=None, all_read1_consensus_file=None): if genome_loader is None: genome_loader = GenomeLoader(all_read1_consensus_file, keep_until_done=True) for consensus_name in list_consensus: logging.info("Extract reads from %s " % consensus_name) consensus_name, consensus_sequence = genome_loader.get_chr( consensus_name) extract_reads_from_one_consensus(bam_files, output_dir, consensus_name, consensus_sequence)
def extract_reads_from_all_bam_files_set_of_consensus( bam_files, list_consensus, output_dir, genome_loader=None, all_read1_consensus_file=None): all_previous_dir = glob.glob(os.path.join(output_dir, '*_dir')) if len(all_previous_dir): logging.info("cleanup previous run in %s" % output_dir) for dir in all_previous_dir: shutil.rmtree(dir) if genome_loader is None: genome_loader = GenomeLoader(all_read1_consensus_file, keep_until_done=True) for bam_file in bam_files: extract_reads_from_one_bam_file(bam_file, output_dir, list_consensus, genome_loader) # All the read have been extract now close the fastq files close_fastq_files()