def output_pickle_file(self, results_output_dir, sample_label): print "Serializing a total of %d events by Pickle." %(len(self.events)) pickle_output_dir = os.path.join(results_output_dir, 'pickle') if not os.path.isdir(pickle_output_dir): os.mkdir(pickle_output_dir) pickle_events_filename = os.path.join(pickle_output_dir, sample_label + '.pickle') pickle_utils.write_pickled_file(self.events, pickle_events_filename) return pickle_events_filename
def output_pickle_file(self, results_output_dir, sample_label): print "Serializing a total of %d events by Pickle." % (len( self.events)) pickle_output_dir = os.path.join(results_output_dir, 'pickle') if not os.path.isdir(pickle_output_dir): os.mkdir(pickle_output_dir) pickle_events_filename = os.path.join(pickle_output_dir, sample_label + '.pickle') pickle_utils.write_pickled_file(self.events, pickle_events_filename) return pickle_events_filename
def serialize_genes(gff_genes, output_dir): """ Output genes into pickle files by chromosome, by gene. """ genes_by_chrom = defaultdict(dict) # Split up genes by chromosome for gene_id, gene_info in gff_genes.iteritems(): gene_obj = gene_info["gene_object"] gene_hierarchy = gene_info["hierarchy"] genes_by_chrom[gene_obj.chrom][gene_id] = { 'gene_object': gene_obj, 'hierarchy': gene_hierarchy } # Mapping from gene IDs to pickled filename gene_id_to_filename = {} # Serialize all the genes in each chromosome into their # own directory for chrom, chrom_genes in genes_by_chrom.iteritems(): if chrom.startswith("chr"): chrom_dir_name = chrom else: chrom_dir_name = "chr%s" % (str(chrom)) # Make directory for chromosome if it doesn't already exist chrom_dir = os.path.join(output_dir, chrom_dir_name) if not os.path.isdir(chrom_dir): print "Making directory: %s" % (chrom_dir) os.makedirs(chrom_dir) t1 = time.time() # Serialize each gene into a separate file num_genes = len(genes_by_chrom[chrom]) for gene_id, gene_info in genes_by_chrom[chrom].iteritems(): gene_filename = os.path.abspath( os.path.join(chrom_dir, "%s.pickle" % (gene_id))) pickle_utils.write_pickled_file( {gene_id: genes_by_chrom[chrom][gene_id]}, gene_filename) # Record what filename was associated with this gene ID gene_id_to_filename[gene_id] = gene_filename t2 = time.time() print " - Chromosome serialization took %.2f seconds" % (t2 - t1) # Shelve the mapping from gene ids to filenames shelved_filename = os.path.join(output_dir, "genes_to_filenames.shelve") shelved_data = shelve.open(shelved_filename) for k, v in gene_id_to_filename.iteritems(): shelved_data[k] = v shelved_data.close()
def serialize_genes(gff_genes, output_dir): """ Output genes into pickle files by chromosome, by gene. """ genes_by_chrom = defaultdict(dict) # Split up genes by chromosome for gene_id, gene_info in gff_genes.iteritems(): gene_obj = gene_info["gene_object"] gene_hierarchy = gene_info["hierarchy"] genes_by_chrom[gene_obj.chrom][gene_id] = {"gene_object": gene_obj, "hierarchy": gene_hierarchy} # Mapping from gene IDs to pickled filename gene_id_to_filename = {} # Serialize all the genes in each chromosome into their # own directory for chrom, chrom_genes in genes_by_chrom.iteritems(): if chrom.startswith("chr"): chrom_dir_name = chrom else: chrom_dir_name = "chr%s" % (str(chrom)) # Make directory for chromosome if it doesn't already exist chrom_dir = os.path.join(output_dir, chrom_dir_name) if not os.path.isdir(chrom_dir): print "Making directory: %s" % (chrom_dir) os.makedirs(chrom_dir) t1 = time.time() # Serialize each gene into a separate file num_genes = len(genes_by_chrom[chrom]) for gene_id, gene_info in genes_by_chrom[chrom].iteritems(): gene_filename = os.path.abspath(os.path.join(chrom_dir, "%s.pickle" % (gene_id))) pickle_utils.write_pickled_file({gene_id: genes_by_chrom[chrom][gene_id]}, gene_filename) # Record what filename was associated with this gene ID gene_id_to_filename[gene_id] = gene_filename t2 = time.time() print " - Chromosome serialization took %.2f seconds" % (t2 - t1) # Shelve the mapping from gene ids to filenames shelved_filename = os.path.join(output_dir, "genes_to_filenames.shelve") shelved_data = shelve.open(shelved_filename) for k, v in gene_id_to_filename.iteritems(): shelved_data[k] = v shelved_data.close()