def _load_from_cache(cache_file_name, logger): """ Load from cache file """ try: if not os.path.exists(cache_file_name): return None, None, None logger.debug(" Loading taxa data from cache") data_file = open(cache_file_name, 'rb') # # Is file version correct # latest_version, errmsg = check_cache_file_version( data_file, _FILE_VERSION_MAJ) if not latest_version: logger.warning(errmsg) return None, None, None # # taxon data # taxon_id_to_scientific_name = marshal.load(data_file) taxon_id_to_common_name = marshal.load(data_file) taxon_id_to_parent = marshal.load(data_file) return (taxon_id_to_scientific_name, taxon_id_to_common_name, taxon_id_to_parent) except: raise return None, None, None
def _load_from_cache (cache_file_name, logger, data_to_load = None): """ Load tree data from cache file data_to_load is a list or set of the following values "alignment", "node", "node_to_parent" which determines what data to load usually just node or node_to_parent is useful. """ start_time = time.time() try: if not os.path.exists(cache_file_name): return None, None, None logger.debug(" Loading protein tree from cache") logger.info(" Loading cache file = %s" % cache_file_name) data_file = open(cache_file_name, 'rb') # # Is file version correct # latest_version, errmsg = check_cache_file_version (data_file, _FILE_VERSION_MAJ) if not latest_version: logger.warning(errmsg) return None, None, None taxon_id_to_species_data = cPickle.load(data_file) taxon_ids_to_ortholog_species_pair = cPickle.load(data_file) return taxon_id_to_species_data, taxon_ids_to_ortholog_species_pair except: raise return None, None
def _load_feature_types_from_cache(cache_file_name, logger): """ Load features from cache file """ try: if not os.path.exists(cache_file_name): return None logger.debug(" Loading feature types from cache") data_file = open(cache_file_name, 'rb') # # Is file version correct # latest_version, errmsg = check_cache_file_version( data_file, _FILE_VERSION_MAJ) if not latest_version: logger.warning(errmsg) return None # # Ignore feature type data # cnt_protein_features = marshal.load(data_file) for i in range(cnt_protein_features): name = marshal.load(data_file) t_protein_feature.load(data_file) # # Retrieve features by type # file_pos_by_section = read_directory_of_sections(data_file) return sorted(file_pos_by_section.keys(), key=str.lower) except: raise return None
def _load_from_cache (cache_file_name, logger, data_to_load = None): """ Load tree data from cache file data_to_load is a list or set of the following values "alignment", "node", "node_to_parent" which determines what data to load usually just node or node_to_parent is useful. """ start_time = time.time() try: if not os.path.exists(cache_file_name): return None, None, None logger.debug(" Loading protein tree from cache") logger.info(" Loading cache file = %s" % cache_file_name) data_file = open(cache_file_name, 'rb') # # Is file version correct # latest_version, errmsg = check_cache_file_version (data_file, _FILE_VERSION_MAJ) if not latest_version: logger.warning(errmsg) return None, None, None # # Retrieve protein tree data in sections # file_pos_by_section = read_directory_of_sections (data_file) # # Read all data if none specified # if data_to_load == None: data_to_load = file_pos_by_section.keys() load_method = dict() load_method["alignment"] = load_dict_of_objects, t_prot_tree_alignment load_method["node"] = load_dict_of_objects, t_prot_tree_node load_method["node_to_parent"] = (lambda d, ignore: marshal.load(d)), dict data = dict() for section in file_pos_by_section: # # initialise to empty data # data[section] = dict() if section not in data_to_load: continue # # read data # data_file.seek(file_pos_by_section[section], os.SEEK_SET) load_func, object_type = load_method[section] data[section] = load_func(data_file, object_type) # log what data have been read? # end_time = time.time() logger.info(" Loaded tree data in %ds" % (end_time - start_time)) # prot_node_id_to_alignments, prot_node_id_to_nodes, prot_node_id_to_parent return [data[k] for k in sorted(data.keys())] except: raise return None, None, None
def _load_features_from_cache(cache_file_name, logger, filter_feature_types=None): """ Load features from cache file """ start_time = time.time() try: if not os.path.exists(cache_file_name): logger.warning("The cache file %s does not exist" % cache_file_name) return None, None logger.debug(" Loading features from cache") logger.info(" Loading cache file = %s" % cache_file_name) data_file = open(cache_file_name, 'rb') # # Is file version correct # latest_version, errmsg = check_cache_file_version( data_file, _FILE_VERSION_MAJ) if not latest_version: logger.warning(errmsg) return None, None # # Retrieve feature type data # protein_features = dict() cnt_protein_features = marshal.load(data_file) for i in range(cnt_protein_features): name = marshal.load(data_file) protein_features[name] = t_protein_feature.load(data_file) # # Retrieve features by type # file_pos_by_section = read_directory_of_sections(data_file) # # Read all features if none specified # if filter_feature_types == None: filter_feature_types = file_pos_by_section.keys() matches_by_type = defaultdict(lambda: defaultdict(list)) cnt_all_matches = 0 # # have case-independent matches # filter_feature_types_lc = map(str.lower, filter_feature_types) feature_type_lc_to_orig = dict( zip(map(str.lower, file_pos_by_section.keys()), file_pos_by_section.keys())) for feature_type_lc in filter_feature_types_lc: if feature_type_lc not in feature_type_lc_to_orig: continue feature_type = feature_type_lc_to_orig[feature_type_lc] data_file.seek(file_pos_by_section[feature_type], os.SEEK_SET) # # read feature # # cnt_prot_ids = marshal.load(data_file) for i in xrange(cnt_prot_ids): prot_id = marshal.load(data_file) cnt_matches = marshal.load(data_file) for i in range(cnt_matches): matches_by_type[feature_type][prot_id].append( t_protein_feature_match.load(data_file)) cnt_all_matches += cnt_matches # # log what features have been read? # end_time = time.time() logger.info(" Loaded %d features in %ds" % (cnt_all_matches, end_time - start_time)) return protein_features, matches_by_type except: raise return None, None