Exemple #1
0
def bin_coordinates_through_genome(input_file, output_file, genome_file, bin_size):
    open_file=utils_logging.open_input_file(input_file)
    open_output=utils_logging.open_output_file(output_file)
    all_coordinates_per_chr={}
    genome_loader=GenomeLoader(genome_file)
    previous_bin=0
    all_chr=[]
    for line in open_file:
        sp_line=line.split()
        all_coordinates=all_coordinates_per_chr.get(sp_line[0])
        if all_coordinates is None:
            all_chr.append(sp_line[0])
            all_coordinates=[]
            all_coordinates_per_chr[sp_line[0]]=all_coordinates
        all_coordinates.append(int(sp_line[1]))
    all_chr.sort()
    for chr in all_chr:
        header, sequence =genome_loader.get_chr(chr)
        chr=header.strip()
        chr_len=len(sequence)
        
        all_coordinates=all_coordinates_per_chr.get(chr)
        all_bins=bin_value_from_array(all_coordinates, bin_size, chr_len)
        for bin,value in enumerate(all_bins):
            open_output.write('%s\t%s\t%s\t%s\n'%(chr, bin*bin_size, (bin*bin_size)+previous_bin, value))
        previous_bin+=len(all_bins)*bin_size
    open_output.close()    
def extract_reads_from_all_bam_files_set_of_consensus_old(bam_files, list_consensus, output_dir, genome_loader=None,
                                                          all_read1_consensus_file=None):
    if genome_loader is None:
        genome_loader = GenomeLoader(all_read1_consensus_file, keep_until_done=True)
    for consensus_name in list_consensus:
        logging.info("Extract reads from %s " % consensus_name)
        consensus_name, consensus_sequence = genome_loader.get_chr(consensus_name)
        extract_reads_from_one_consensus(bam_files, output_dir, consensus_name, consensus_sequence)
def extract_reads_from_all_bam_files_set_of_consensus_old(
        bam_files,
        list_consensus,
        output_dir,
        genome_loader=None,
        all_read1_consensus_file=None):
    if genome_loader is None:
        genome_loader = GenomeLoader(all_read1_consensus_file,
                                     keep_until_done=True)
    for consensus_name in list_consensus:
        logging.info("Extract reads from %s " % consensus_name)
        consensus_name, consensus_sequence = genome_loader.get_chr(
            consensus_name)
        extract_reads_from_one_consensus(bam_files, output_dir, consensus_name,
                                         consensus_sequence)