def bin_coordinates_through_genome(input_file, output_file, genome_file, bin_size): open_file=utils_logging.open_input_file(input_file) open_output=utils_logging.open_output_file(output_file) all_coordinates_per_chr={} genome_loader=GenomeLoader(genome_file) previous_bin=0 all_chr=[] for line in open_file: sp_line=line.split() all_coordinates=all_coordinates_per_chr.get(sp_line[0]) if all_coordinates is None: all_chr.append(sp_line[0]) all_coordinates=[] all_coordinates_per_chr[sp_line[0]]=all_coordinates all_coordinates.append(int(sp_line[1])) all_chr.sort() for chr in all_chr: header, sequence =genome_loader.get_chr(chr) chr=header.strip() chr_len=len(sequence) all_coordinates=all_coordinates_per_chr.get(chr) all_bins=bin_value_from_array(all_coordinates, bin_size, chr_len) for bin,value in enumerate(all_bins): open_output.write('%s\t%s\t%s\t%s\n'%(chr, bin*bin_size, (bin*bin_size)+previous_bin, value)) previous_bin+=len(all_bins)*bin_size open_output.close()
def extract_reads_from_all_bam_files_set_of_consensus_old(bam_files, list_consensus, output_dir, genome_loader=None, all_read1_consensus_file=None): if genome_loader is None: genome_loader = GenomeLoader(all_read1_consensus_file, keep_until_done=True) for consensus_name in list_consensus: logging.info("Extract reads from %s " % consensus_name) consensus_name, consensus_sequence = genome_loader.get_chr(consensus_name) extract_reads_from_one_consensus(bam_files, output_dir, consensus_name, consensus_sequence)
def extract_reads_from_all_bam_files_set_of_consensus_old( bam_files, list_consensus, output_dir, genome_loader=None, all_read1_consensus_file=None): if genome_loader is None: genome_loader = GenomeLoader(all_read1_consensus_file, keep_until_done=True) for consensus_name in list_consensus: logging.info("Extract reads from %s " % consensus_name) consensus_name, consensus_sequence = genome_loader.get_chr( consensus_name) extract_reads_from_one_consensus(bam_files, output_dir, consensus_name, consensus_sequence)