def __generate_length_dependent_key(self, command_line_args): num_records = command_line_args.pop('num_records', None) peptide_length = command_line_args.pop('peptide_length', None) if num_records is None: return r.Key(r.WholeRange(), peptide_length=peptide_length) return r.Key(r.DiscreteDataChunk(list(range(num_records))), peptide_length=peptide_length)
def test_hdf5_array_repository_remove_records(array_repository_w_saved_data): _, repo, _, ids = array_repository_w_saved_data prev_indices = {} for index in repo.indices: prev_indices[index] = repo.get_index(index) removed_ids = ranges.Key([ids[0]]) repo.remove_records(removed_ids) removed_locations = repo.find(removed_ids) locations = repo.find(ids) found_ids = [] for path, indices in locations.items(): with hdf5.HDF5File() as file: dataset = file[path] for _id, idx in dataset.attrs.items(): if idx in indices: found_ids.append(_id) assert repo.get_num_records() == len(ids) - 1 assert len(removed_locations) == 0 assert len(found_ids) == len(ids) - 1 assert sorted(found_ids) != ids for index in repo.indices: assert prev_indices[index] != repo.get_index(index)
def test_hdf5_obj_repository_load_from_whole_range(obj_repository_w_saved_data, dict_sort_key): dicts, repo, _, ids = obj_repository_w_saved_data key = ranges.Key(ranges.WholeRange()) loaded_ids, loaded_data = zip(*list(repo.load(key))) loaded_ids = list(loaded_ids) loaded_data = list(loaded_data) loaded_ids.sort() loaded_data.sort(key=dict_sort_key) dicts.sort(key=dict_sort_key) assert loaded_data == dicts assert ids == loaded_ids
def test_hdf5_array_repository_load_from_whole_range( array_repository_w_saved_data): arrays, repo, _, ids = array_repository_w_saved_data arrays = [list(record) for record in arrays.reg_combos + arrays.cap_combos] key = ranges.Key(ranges.WholeRange()) loaded_ids, loaded_data = zip(*list(repo.load(key))) loaded_ids = list(loaded_ids) loaded_data = [list(record) for record in loaded_data] loaded_ids.sort() loaded_data.sort() arrays.sort() assert loaded_data == arrays assert ids == loaded_ids
def test_hdf5_obj_repository_load_from_index_vals(obj_repository_w_saved_data, extract_attr_from_dicts, dict_sort_key): dicts, repo, _, ids = obj_repository_w_saved_data for index in repo.DEFAULT_INDICES: key = ranges.Key(extract_attr_from_dicts(dicts, index), index=index) loaded_ids, loaded_data = zip(*list(repo.load(key))) loaded_ids = list(loaded_ids) loaded_data = list(loaded_data) loaded_ids.sort() loaded_data.sort(key=dict_sort_key) dicts.sort(key=dict_sort_key) assert loaded_data == dicts assert ids == loaded_ids
def __extract_operation_parameters(self, command_line_args): if self.operation == g.SidechainModifier.STRING: operation_parameters = { 'sidechain_key': r.Key(r.WholeRange()), 'connection_key': r.Key(r.WholeRange()) } elif self.operation == g.MonomerGenerator.STRING: operation_parameters = { 'sidechain_key': r.Key(r.WholeRange()), 'backbone_key': r.Key(r.WholeRange()) } elif self.operation == g.PeptidePlanGenerator.STRING: operation_parameters = {'monomer_key': r.Key(r.WholeRange())} elif self.operation == g.PeptideGenerator.STRING: operation_parameters = { 'peptide_plan_key': self.__generate_length_dependent_key(command_line_args), 'monomer_key': r.Key(r.WholeRange()) } elif self.operation == g.TemplatePeptideGenerator.STRING: operation_parameters = { 'peptide_key': self.__generate_length_dependent_key(command_line_args), 'template_key': r.Key(r.WholeRange()) } elif self.operation == g.MacrocycleGenerator.STRING: operation_parameters = { 'template_peptide_key': self.__generate_length_dependent_key(command_line_args), 'reaction_key': r.Key(r.WholeRange()) } elif self.operation == g.InterMolecularReactionGenerator.STRING: operation_parameters = { 'sidechain_key': r.Key(r.WholeRange()), 'monomer_key': r.Key(r.WholeRange()), 'template_key': r.Key(r.WholeRange()) } elif self.operation == g.IntraMolecularReactionGenerator.STRING: operation_parameters = {'template_key': r.Key(r.WholeRange())} elif self.operation == g.ConformerGenerator.STRING: operation_parameters = { 'macrocycle_key': self.__generate_length_dependent_key(command_line_args) } operation_parameters.update(command_line_args) return operation_parameters
def test_key_batched_data_chunk(total_num_data, total_num_jobs, job_num, static_range, expected_start, expected_end, expected_result): data_chunk = ranges.Key(ranges.BatchedDataChunk(total_num_data, total_num_jobs, job_num)) assert(data_chunk.start == expected_start) assert(data_chunk.end == expected_end) assert((data_chunk in static_range) == expected_result) assert((static_range in data_chunk) == expected_result)
def test_key_discrete_data_chunk(test_min, test_max, static_range, expected): int_avg = int((test_min + test_max) / 2) data_chunk = ranges.Key(ranges.DiscreteDataChunk([test_min, int_avg, test_max])) assert((data_chunk in static_range) == expected) assert(int_avg in data_chunk)
def test_key_whole_range(static_range): whole_range = ranges.Key(ranges.WholeRange()) assert(static_range in whole_range) assert(whole_range in static_range)
def test_key_range(test_min, test_max, static_range, expected): dynamic_range = ranges.Key(ranges.Range(test_min, test_max)) assert((dynamic_range in static_range) == expected) assert((static_range in dynamic_range) == expected)