def _get_genome_id_to_path_map( self, file_path_of_file_mapping_genome_id_to_paths, list_of_drawn_genome_id): """ Get a dictionary mapping genome id to the path of their genome @param file_path_of_file_mapping_genome_id_to_paths: File path to file with format 'id \t path' @type file_path_of_file_mapping_genome_id_to_paths: str | unicode @param list_of_drawn_genome_id: List of genome identifiers @type list_of_drawn_genome_id: list[str|unicode] @return: genome ids mapped to their gnome file path @rtype: dict[str|unicode, str|unicode] """ genome_id_to_path_map = {} mdt = MetadataTable(logfile=self._logfile, verbose=self._verbose) mdt.read(file_path_of_file_mapping_genome_id_to_paths) if mdt.get_number_of_rows() > 0: genome_id_to_path_map = mdt.get_map(0, 1, unique_key=True) msg = "'{}' is missing one or more genome id".format( os.path.basename(file_path_of_file_mapping_genome_id_to_paths)) assert set(genome_id_to_path_map.keys()).issuperset( list_of_drawn_genome_id), msg return { genome_id: genome_id_to_path_map[genome_id] for genome_id in list_of_drawn_genome_id }
def create_meta_table(self, file_path_metadata_table): """ Generate a input metadata file with genome ids only @param file_path_metadata_table: @type file_path_metadata_table: str|unicode @rtype: None """ metadata_table = MetadataTable(separator=self._separator, logfile=self._logfile, verbose=self._verbose) metadata_table.read(self._file_path_reference_genome_locations, column_names=False) if metadata_table.get_number_of_rows() == 0: raise ValueError("Invalid file content") id_column = metadata_table.get_column(0) metadata_table.clear() metadata_table.insert_column(id_column, self._column_name_genome_id) metadata_table.write(file_path_metadata_table, column_names=True)
def _get_genome_id_to_path_map(self, file_path): """ Get a map of genome_id to genome path @param file_path: File path @type file_path: str | unicode @return: map of genome_id to genome path @rtype: dict[str|unicode, str|unicode] """ assert self.validate_file(file_path) data_table = MetadataTable(separator=self._separator, logfile=self._logfile, verbose=self._verbose) data_table.read(file_path, column_names=False) if data_table.get_number_of_rows() == 0: self._logger.warning("No data in file '{}'.".format(file_path)) return {} dict_genome_id_to_path = data_table.get_map(0, 1) return dict_genome_id_to_path