def to_fasta(self,genome): new_fasta = Fasta() tracker = Progress_tracker("Converting to .fasta",len(self.entries)).auto_display().start() for entry in self.entries: for seq in entry.get_seqs(genome): new_fasta.add_entry(entry.cns_ID,seq.sequence.replace("-","")) tracker.step() tracker.done() return new_fasta
def to_bed(self,genome): new_bed = Bed6() tracker = Progress_tracker("Converting to .bed",len(self.entries)).auto_display().start() for entry in self.entries: for seq in entry.get_seqs(genome): new_bed.add_entry(seq.loc_chrom, seq.start, seq.stop, seq.cns_ID, seq.dist) tracker.step() tracker.done() return new_bed
def add_lines(self,lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" tracker = Progress_tracker("Parsing .gff3",len(lines)).auto_display().start() for line in lines: if not line.startswith('#'): fields = line.strip().split('\t') if (len(fields)==9): self.entries.append(_Gff3_entry(*fields)) tracker.step() tracker.done()
def add_lines(self,lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" ID=None tracker = Progress_tracker("Parsing .cns",len(lines)).auto_display().start() for line in lines: list = [item if item!='.' else None for item in line.strip().split('\t')] if list[0]!=ID: ID = list[0] self.entries.append(_Cns_entry(ID)) self.entries[-1].add_seq(*(list[1:])) tracker.step() tracker.done()
def add_lines(self,lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" tracker = Progress_tracker("Parsing 6 column .bed",len(lines)).auto_display().start() for line in lines: fields = line.strip().split('\t') if len(fields)>1: if len(fields)<6: fields.append([None]*(6-len(fields))) fields[:] = [item if item!='.' else None for item in fields] self.entries.append(_Bed6_entry(*fields)) tracker.step() tracker.done()
def add_lines(self, lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" paragraphs = [] found_first = False tracker = Progress_tracker("Parsing wiggle file", len(lines) * 2).auto_display().start() for line in lines + ['']: stripped = line.strip() if (not stripped) or stripped[0] not in self._score_starting_chars: if not found_first: found_first = True else: info = [ item.split("=") for item in paragraphs[-1][0].split() ] step_type = info[0][0] info_dict = {pair[0]: pair[1] for pair in info[1:]} val_list = [float(item) for item in paragraphs[-1][1:]] self.entries.append( _Wiggle_entry(step_type, info_dict['chrom'], int(info_dict['start']), int(info_dict['step']), val_list)) tracker.step(len(paragraphs[-1])) paragraphs.append([stripped]) elif found_first: paragraphs[-1].append(stripped) tracker.step() tracker.done()
def add_lines(self,lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" if not hasattr(self, 'headerLines'): self.headerLines = [] paragraph = [] tracker = Progress_tracker("Parsing .maf",len(lines)).auto_display().start() for line in lines: stripped = line.strip() if stripped.startswith("#") and not stripped.startswith("##--"): self.headerLines.append(stripped) elif stripped=="": if len(paragraph)>1: self.entries.append(_Maf_entry(paragraph)) paragraph = [] else: paragraph.append(stripped) tracker.step() tracker.done()
def to_bed(self,genome_name=None,index_tag="maf_index"): """Converts :py:class:`Maf` data to :py:class:`Bed6` using the specified genome for location information. :params string genome_name: Name of the genome to use to make BED regions. If `genome_name==None` then it will use the first genome it encounters in the file. :params string index_tag: Tag name which will be use to mark the BED entry with the MAF entry's index.""" new_bed = Bed6() tracker = Progress_tracker("Converting to .bed",len(self.entries)).auto_display().start() if not genome_name: genome_name=self.entries[0].sequences[0].src.split(":")[0].strip() index = 0 for entry in self.entries: seq_to_convert = (seq for seq in entry.sequences if seq.src.split(":")[0].strip()==genome_name) for sequence in seq_to_convert: id_string = "%s=%s" % (index_tag,index) if index_tag!=None else None if sequence.metadata: id_string = (sequence.metadata+";"+id_string) if id_string else (sequence.metadata) new_bed.add_entry(sequence.src, sequence.start, sequence.start+sequence.size, name=id_string, strand=sequence.strand) index+=1 tracker.step() tracker.done() return new_bed
def run(gff3_file,bed_out=None,type_list=[],sequence_prefix=None): """Converts a gff3 file into a bed file. If no output path is provided, outputs to stdout. :param string gff3_file: Path to gff3 file. :param string bed_out: Path to output. (bed file) :param list[string] type_list: Specifies that only sequences of these types should be added to the bed. :param string sequence_prefix: Appends provided string to the front of the chromosome names in the bed file. :returns: `None` """ bed = Gff3(file_name=gff3_file).to_bed(type_list) if sequence_prefix: tracker = Progress_tracker("Prefixing bed chrom IDs",len(bed.entries)).auto_display().start() for entry in bed.entries: entry.chrom = sequence_prefix+entry.chrom tracker.step() tracker.done() if bed_out: bed.save_file(bed_out) else: sys.stdout.write("\n".join(bed.get_lines())+"\n") sys.stdout.flush()
def to_bed(self, type_list=None, genome=None): """Converts :py:class:`Gff3` data to :py:class:`Bed6` data after selecting for sequence types. :params list[string] type_list: List of sequence types which should be converted to BED regions. :params string genome: Genome name to prepend to the chromosome name when it is converted to BED.""" new_bed = Bed6() entry_selection = None if (type_list): entry_selection = [ entry for entry in self.entries if entry.type in type_list ] else: entry_selection = self.entries tracker = Progress_tracker( "Converting to .bed", len(entry_selection)).auto_display().start() for entry in entry_selection: if (entry.start < entry.end): chromStart, chromEnd = entry.start, entry.end else: chromStart, chromEnd = entry.end, entry.start id_with_type = entry.attributes + ";seqType=" + entry.type chrom = entry.seqid if not genome else genome + ":" + entry.seqid new_bed.add_entry(chrom, chromStart - 1, chromEnd, name=id_with_type, score=entry.score, strand=entry.strand) tracker.step() tracker.done() return new_bed
def to_bed(self, genome_name=None, index_tag="maf_index"): """Converts :py:class:`Maf` data to :py:class:`Bed6` using the specified genome for location information. :params string genome_name: Name of the genome to use to make BED regions. If `genome_name==None` then it will use the first genome it encounters in the file. :params string index_tag: Tag name which will be use to mark the BED entry with the MAF entry's index.""" new_bed = Bed6() tracker = Progress_tracker("Converting to .bed", len(self.entries)).auto_display().start() if not genome_name: genome_name = self.entries[0].sequences[0].src.split( ":")[0].strip() index = 0 for entry in self.entries: seq_to_convert = (seq for seq in entry.sequences if seq.src.split(":")[0].strip() == genome_name) for sequence in seq_to_convert: id_string = "%s=%s" % (index_tag, index) if index_tag != None else None if sequence.metadata: id_string = (sequence.metadata + ";" + id_string) if id_string else ( sequence.metadata) new_bed.add_entry(sequence.src, sequence.start, sequence.start + sequence.size, name=id_string, strand=sequence.strand) index += 1 tracker.step() tracker.done() return new_bed
def to_bed(self,type_list=None,genome=None): """Converts :py:class:`Gff3` data to :py:class:`Bed6` data after selecting for sequence types. :params list[string] type_list: List of sequence types which should be converted to BED regions. :params string genome: Genome name to prepend to the chromosome name when it is converted to BED.""" new_bed = Bed6() entry_selection = None if(type_list): entry_selection = [entry for entry in self.entries if entry.type in type_list] else: entry_selection = self.entries tracker = Progress_tracker("Converting to .bed",len(entry_selection)).auto_display().start() for entry in entry_selection: if(entry.start<entry.end): chromStart,chromEnd = entry.start,entry.end else: chromStart,chromEnd = entry.end,entry.start id_with_type = entry.attributes+";seqType="+entry.type chrom = entry.seqid if not genome else genome+":"+entry.seqid new_bed.add_entry(chrom, chromStart-1, chromEnd, name=id_with_type, score=entry.score, strand=entry.strand) tracker.step() tracker.done() return new_bed
def to_fasta(self, genome): new_fasta = Fasta() tracker = Progress_tracker("Converting to .fasta", len(self.entries)).auto_display().start() for entry in self.entries: for seq in entry.get_seqs(genome): new_fasta.add_entry(entry.cns_ID, seq.sequence.replace("-", "")) tracker.step() tracker.done() return new_fasta
def to_bed(self, genome): new_bed = Bed6() tracker = Progress_tracker("Converting to .bed", len(self.entries)).auto_display().start() for entry in self.entries: for seq in entry.get_seqs(genome): new_bed.add_entry(seq.loc_chrom, seq.start, seq.stop, seq.cns_ID, seq.dist) tracker.step() tracker.done() return new_bed
def add_lines(self, lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" tracker = Progress_tracker("Parsing .gff3", len(lines)).auto_display().start() for line in lines: if not line.startswith('#'): fields = line.strip().split('\t') if (len(fields) == 9): self.entries.append(_Gff3_entry(*fields)) tracker.step() tracker.done()
def add_lines(self, lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" tracker = Progress_tracker("Parsing 6 column .bed", len(lines)).auto_display().start() for line in lines: fields = line.strip().split('\t') if len(fields) > 1: if len(fields) < 6: fields.append([None] * (6 - len(fields))) fields[:] = [item if item != '.' else None for item in fields] self.entries.append(_Bed6_entry(*fields)) tracker.step() tracker.done()
def add_lines(self, lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" paragraphs = [[]] first_found = False tracker = Progress_tracker("Parsing .fasta data", len(lines * 2)).auto_display().start() for line in lines: stripped = line.strip() if stripped.startswith('>'): first_found = True paragraphs[-1].append(stripped[1:]) elif first_found: paragraphs[-1].append(stripped) tracker.step() for paragraph in paragraphs: for item in paragraph[1:]: if not type(item) is str: raise TypeError(str(item)) self.entries.append( Fasta_entry(paragraph[0], "".join(paragraph[1:]))) tracker.step(len(paragraph)) tracker.done()
def add_lines(self, lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" ID = None tracker = Progress_tracker("Parsing .cns", len(lines)).auto_display().start() for line in lines: list = [ item if item != '.' else None for item in line.strip().split('\t') ] if list[0] != ID: ID = list[0] self.entries.append(_Cns_entry(ID)) self.entries[-1].add_seq(*(list[1:])) tracker.step() tracker.done()
def add_lines(self,lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" paragraphs = [[]] first_found = False tracker = Progress_tracker("Parsing .fasta data",len(lines*2)).auto_display().start() for line in lines: stripped = line.strip() if stripped.startswith('>'): first_found = True paragraphs[-1].append(stripped[1:]) elif first_found: paragraphs[-1].append(stripped) tracker.step() for paragraph in paragraphs: for item in paragraph[1:]: if not type(item) is str: raise TypeError (str(item)) self.entries.append(Fasta_entry(paragraph[0],"".join(paragraph[1:]))) tracker.step(len(paragraph)) tracker.done()
def add_lines(self, lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" if not hasattr(self, 'headerLines'): self.headerLines = [] paragraph = [] tracker = Progress_tracker("Parsing .maf", len(lines)).auto_display().start() for line in lines: stripped = line.strip() if stripped.startswith("#") and not stripped.startswith("##--"): self.headerLines.append(stripped) elif stripped == "": if len(paragraph) > 1: self.entries.append(_Maf_entry(paragraph)) paragraph = [] else: paragraph.append(stripped) tracker.step() tracker.done()
def add_lines(self,lines): """See :py:func:`Filetype.add_lines()`. :param list[string] lines: Lines in the filetype's format to add to stored data.""" paragraphs = [] found_first=False tracker = Progress_tracker("Parsing wiggle file",len(lines)*2).auto_display().start() for line in lines+['']: stripped = line.strip() if (not stripped) or stripped[0] not in self._score_starting_chars: if not found_first: found_first=True else: info = [item.split("=") for item in paragraphs[-1][0].split()] step_type = info[0][0] info_dict = {pair[0]:pair[1] for pair in info[1:]} val_list = [float(item) for item in paragraphs[-1][1:]] self.entries.append(_Wiggle_entry(step_type,info_dict['chrom'],int(info_dict['start']),int(info_dict['step']),val_list)) tracker.step(len(paragraphs[-1])) paragraphs.append([stripped]) elif found_first: paragraphs[-1].append(stripped) tracker.step() tracker.done()
def run(gff3_file, bed_out=None, type_list=[], sequence_prefix=None): """Converts a gff3 file into a bed file. If no output path is provided, outputs to stdout. :param string gff3_file: Path to gff3 file. :param string bed_out: Path to output. (bed file) :param list[string] type_list: Specifies that only sequences of these types should be added to the bed. :param string sequence_prefix: Appends provided string to the front of the chromosome names in the bed file. :returns: `None` """ bed = Gff3(file_name=gff3_file).to_bed(type_list) if sequence_prefix: tracker = Progress_tracker("Prefixing bed chrom IDs", len(bed.entries)).auto_display().start() for entry in bed.entries: entry.chrom = sequence_prefix + entry.chrom tracker.step() tracker.done() if bed_out: bed.save_file(bed_out) else: sys.stdout.write("\n".join(bed.get_lines()) + "\n") sys.stdout.flush()
def _main(data, output_folder, num_threads, overwrite=False, chrom_name=None): datasaver = JSON_saver( create_path(output_folder, "record", "json", overwrite=overwrite)) datasaver.save(data) #maf_to_bed info = "Convert aligned sequences to .bed:" header_print(info) data['ref_seq_bed'] = create_path(output_folder, "ref_seq", "bed", overwrite=overwrite) maf_to_bed(maf_file=data['chrom_seq_maf'], bed_out=data['ref_seq_bed'], ref_genome=data['ref_genome'], index_tag="chrom_maf_index") datasaver.save(data) # #$bedtools intersect # info = "Intersect aligned regions with conserved regions:" # header_print(info) # data['conserved_bed'] = create_path(output_folder,"conserved","bed",overwrite=overwrite) # cmd = "bedtools intersect -a %s -b %s > %s" % (data['ref_seq_bed'],data['chrom_conserved_bed'],data['conserved_bed']) # print cmd # tracker = Progress_tracker("Running bedtools intersect",1).estimate(False).display() # process = subprocess.Popen(cmd, shell=True) # process.wait() # tracker.done() # datasaver.save(data) #$bedtools subtract info = "Subtract coding regions from aligned regions:" header_print(info) data['aligned_noncoding_bed'] = create_path(output_folder, "aligned_noncoding_bed", "bed", overwrite=overwrite) cmd = "bedtools subtract -a %s -b %s > %s" % ( data['ref_seq_bed'], data['ref_coding_bed'], data['aligned_noncoding_bed']) tracker = Progress_tracker("Running bedtools subtract", 1).estimate(False).display() process = subprocess.Popen(cmd, shell=True) process.wait() tracker.done() datasaver.save(data) #wiggle_to_bed info = "Converting especially conserved regions in wiggle file to bed" header_print(info) data['best_conserved_bed'] = create_path(output_folder, "best_conserved", "bed", overwrite=overwrite) wiggle_to_bed(wig_file=data['chrom_conservation_wig'], out_file=data['best_conserved_bed'], genome_name=data['ref_genome']) datasaver.save(data) #filter_bed_with_wiggle info = "Intersecting wiggle bed with the potential cns bed" header_print(info) data['cns_bed'] = create_path(output_folder, "cns", "bed", overwrite=overwrite) cmd = "bedtools intersect -a %s -b %s > %s" % ( data['aligned_noncoding_bed'], data['best_conserved_bed'], data['cns_bed']) tracker = Progress_tracker("Running bedtools intersect", 1).estimate(False).display() process = subprocess.Popen(cmd, shell=True) process.wait() tracker.done() datasaver.save(data) #slice_maf_by_bed info = "Slice multi-alignment file based on identified conserved non-coding regions:" header_print(info) data['cns_maf'] = create_path(output_folder, "cns", "maf", overwrite=overwrite) slice_maf_by_bed(maf_file=data['chrom_seq_maf'], bed_file=data['cns_bed'], index_tag="chrom_maf_index", ref_genome=data['ref_genome'], out_file=data['cns_maf'], max_N_ratio=0.5, max_gap_ratio=0.5, min_len=15) datasaver.save(data) #maf_to_bed info = "Convert per-genome CNS regions to .bed:" header_print(info) data['genome_cns_beds_folder'] = create_path(output_folder + "genome_cns_beds", overwrite=overwrite) cns_maf = Maf(file_name=data['cns_maf']) for genome in data['genomes']: data['genomes'][genome]['cns_bed'] = create_path( data['genome_cns_beds_folder'], genome + "_cns_" + chrom_name, "bed", overwrite=overwrite) bed = cns_maf.to_bed(genome_name=genome, index_tag="cns_maf_index") bed.save_file(data['genomes'][genome]['cns_bed']) del cns_maf datasaver.save(data) #$bedtools closest info = "Find closest gene for each CNS region:" header_print(info) data['gene_proximity_beds_folder'] = create_path(output_folder + "gene_proximity_beds", overwrite=overwrite) for genome in data['genomes']: data['genomes'][genome]['gene_proximity_bed'] = \ create_path(data['gene_proximity_beds_folder'],genome+"_proxim","bed",overwrite=overwrite) cmd = "bedtools closest -D a -a %s -b %s > %s" % \ (data['genomes'][genome]['cns_bed'], data['genomes'][genome]['annot_bed'], data['genomes'][genome]['gene_proximity_bed']) process = subprocess.Popen(cmd, shell=True) process.wait() datasaver.save(data) #maf_and_proxim_bed_to_cns info = "Process proximity and maf files into .cns file:" header_print(info) data['results'] = create_path(output_folder, "identified_CNSs", "cns", overwrite=overwrite) cns_proxim_beds = { genome: Bed13(data['genomes'][genome]['gene_proximity_bed']) for genome in data['genomes'] } Maf(file_name=data['cns_maf'])\ .cns_from_proxim_beds(cns_proxim_beds,"cns_maf_index")\ .save_file(data['results']) datasaver.save(data) return data
def call_commands_async(command_iterable,num,shell=False,tracker_name=None, env=os.environ): process_list = [] finished = [] if len(command_iterable)<num: num = len(command_iterable) if tracker_name: tracker = Progress_tracker(tracker_name,len(command_iterable)).estimate(False) tracker.display() else: tracker = None for command in command_iterable: #print " ".join(command) if shell==True: process_list.append(subprocess.Popen(" ".join(command),env=env,shell=True)) else: process_list.append(subprocess.Popen(command,env=env)) while len(process_list) >= num: if tracker: tracker.status("%s/%s processes active"%(len(process_list),num)) pid,exitstat = os.waitpid(-1,0) for i in range(len(process_list)-1,-1,-1): if process_list[i].pid == pid or process_list[i].poll() != None: finished.append(process_list.pop(i)) if tracker: tracker.step() for i in range(len(process_list)): proc = process_list.pop(0) proc.wait() finished.append(proc) if tracker: tracker.step().display().status("%s/%s processes active"%(len(process_list),num)) if tracker: tracker.status().done() return finished
def _main(data,output_folder,num_threads,overwrite=False,chrom_name=None): datasaver = JSON_saver(create_path(output_folder,"record","json",overwrite=overwrite)) datasaver.save(data) #maf_to_bed info = "Convert aligned sequences to .bed:" header_print(info) data['ref_seq_bed'] = create_path(output_folder,"ref_seq","bed",overwrite=overwrite) maf_to_bed(maf_file = data['chrom_seq_maf'], bed_out = data['ref_seq_bed'], ref_genome = data['ref_genome'], index_tag = "chrom_maf_index") datasaver.save(data) # #$bedtools intersect # info = "Intersect aligned regions with conserved regions:" # header_print(info) # data['conserved_bed'] = create_path(output_folder,"conserved","bed",overwrite=overwrite) # cmd = "bedtools intersect -a %s -b %s > %s" % (data['ref_seq_bed'],data['chrom_conserved_bed'],data['conserved_bed']) # print cmd # tracker = Progress_tracker("Running bedtools intersect",1).estimate(False).display() # process = subprocess.Popen(cmd, shell=True) # process.wait() # tracker.done() # datasaver.save(data) #$bedtools subtract info = "Subtract coding regions from aligned regions:" header_print(info) data['aligned_noncoding_bed'] = create_path(output_folder,"aligned_noncoding_bed","bed",overwrite=overwrite) cmd = "bedtools subtract -a %s -b %s > %s" % (data['ref_seq_bed'],data['ref_coding_bed'],data['aligned_noncoding_bed']) tracker = Progress_tracker("Running bedtools subtract",1).estimate(False).display() process = subprocess.Popen(cmd, shell=True) process.wait() tracker.done() datasaver.save(data) #wiggle_to_bed info = "Converting especially conserved regions in wiggle file to bed" header_print(info) data['best_conserved_bed'] = create_path(output_folder,"best_conserved","bed",overwrite=overwrite) wiggle_to_bed(wig_file=data['chrom_conservation_wig'], out_file=data['best_conserved_bed'], genome_name=data['ref_genome']) datasaver.save(data) #filter_bed_with_wiggle info = "Intersecting wiggle bed with the potential cns bed" header_print(info) data['cns_bed'] = create_path(output_folder,"cns","bed",overwrite=overwrite) cmd = "bedtools intersect -a %s -b %s > %s" % (data['aligned_noncoding_bed'],data['best_conserved_bed'],data['cns_bed']) tracker = Progress_tracker("Running bedtools intersect",1).estimate(False).display() process = subprocess.Popen(cmd, shell=True) process.wait() tracker.done() datasaver.save(data) #slice_maf_by_bed info = "Slice multi-alignment file based on identified conserved non-coding regions:" header_print(info) data['cns_maf'] = create_path(output_folder,"cns","maf",overwrite=overwrite) slice_maf_by_bed(maf_file = data['chrom_seq_maf'], bed_file = data['cns_bed'], index_tag = "chrom_maf_index", ref_genome = data['ref_genome'], out_file = data['cns_maf'], max_N_ratio = 0.5, max_gap_ratio = 0.5, min_len = 15) datasaver.save(data) #maf_to_bed info = "Convert per-genome CNS regions to .bed:" header_print(info) data['genome_cns_beds_folder'] = create_path(output_folder+"genome_cns_beds",overwrite=overwrite) cns_maf = Maf(file_name=data['cns_maf']) for genome in data['genomes']: data['genomes'][genome]['cns_bed'] = create_path(data['genome_cns_beds_folder'],genome+"_cns_"+chrom_name,"bed",overwrite=overwrite) bed = cns_maf.to_bed(genome_name=genome,index_tag="cns_maf_index") bed.save_file(data['genomes'][genome]['cns_bed']) del cns_maf datasaver.save(data) #$bedtools closest info = "Find closest gene for each CNS region:" header_print(info) data['gene_proximity_beds_folder'] = create_path(output_folder+"gene_proximity_beds",overwrite=overwrite) for genome in data['genomes']: data['genomes'][genome]['gene_proximity_bed'] = \ create_path(data['gene_proximity_beds_folder'],genome+"_proxim","bed",overwrite=overwrite) cmd = "bedtools closest -D a -a %s -b %s > %s" % \ (data['genomes'][genome]['cns_bed'], data['genomes'][genome]['annot_bed'], data['genomes'][genome]['gene_proximity_bed']) process = subprocess.Popen(cmd, shell=True) process.wait() datasaver.save(data) #maf_and_proxim_bed_to_cns info = "Process proximity and maf files into .cns file:" header_print(info) data['results'] = create_path(output_folder,"identified_CNSs","cns",overwrite=overwrite) cns_proxim_beds = {genome:Bed13(data['genomes'][genome]['gene_proximity_bed']) for genome in data['genomes']} Maf(file_name=data['cns_maf'])\ .cns_from_proxim_beds(cns_proxim_beds,"cns_maf_index")\ .save_file(data['results']) datasaver.save(data) return data