def test_read_write(self): path = full_path('5e5z.mtz') mtz = gemmi.read_mtz_file(path) self.assertEqual(mtz.spacegroup.hm, 'P 1 21 1') out_name = get_path_for_tempfile() mtz.write_to_file(out_name) mtz2 = gemmi.read_mtz_file(out_name) os.remove(out_name) self.assertEqual(mtz2.spacegroup.hm, 'P 1 21 1')
def combine_mtz(prefix, columns): """Combine columns from multiple MTZ files with gemmijoin""" result = { "hklout": "%s.mtz" % prefix, "stdout": "%s.log" % prefix, "stderr": "%s.err" % prefix, } reference = columns[0] reference_mtz = gemmi.read_mtz_file(reference) reference_data = numpy.array(reference_mtz, copy=False) reference_df = pandas.DataFrame(data=reference_data, columns=reference_mtz.column_labels()) reference_sel_col_df = reference_df[[ 'H', 'K', 'L', 'FP', 'SIGFP', 'FREE', 'PHWT', 'FOM' ]] model = columns[1] model_mtz = gemmi.read_mtz_file(model) model_data = numpy.array(model_mtz, copy=False) model_df = pandas.DataFrame(data=model_data, columns=model_mtz.column_labels()) model_sel_col_df = model_df[['PHWT', 'FOM']] model_sel_col_df = model_sel_col_df.rename(columns={ 'PHWT': 'PHWT_model', 'FOM': 'FOM_model' }) combined_df = pandas.concat([reference_sel_col_df, model_sel_col_df], ignore_index=True, axis=1) combined_array = combined_df.to_numpy() mtz_new = gemmi.Mtz() mtz_new.add_dataset('combined') mtz_new.add_column('H', 'H', dataset_id=0) mtz_new.add_column('K', 'H', dataset_id=0) mtz_new.add_column('L', 'H', dataset_id=0) mtz_new.add_column('FP', 'F', dataset_id=0) mtz_new.add_column('SIGFP', 'Q', dataset_id=0) mtz_new.add_column('FREE', 'I', dataset_id=0) mtz_new.add_column('PHWT_ref', 'P', dataset_id=0) mtz_new.add_column('FOM_ref', 'W', dataset_id=0) mtz_new.add_column('PHWT_model', 'P', dataset_id=0) mtz_new.add_column('FOM_model', 'W', dataset_id=0) unit_cell = reference_mtz.cell space_group = reference_mtz.spacegroup mtz_new.spacegroup = space_group mtz_new.cell = unit_cell mtz_new.set_data(combined_array) mtz_new.write_to_file(result["hklout"]) if not os.path.exists(result["hklout"]): return {"error": "No reflection data produced"} return result
def test_read_write(self): path = full_path('5e5z.mtz') mtz = gemmi.read_mtz_file(path) self.assertEqual(mtz.spacegroup.hm, 'P 1 21 1') out_name = get_path_for_tempfile() mtz.write_to_file(out_name) mtz2 = gemmi.read_mtz_file(out_name) os.remove(out_name) self.assertEqual(mtz2.spacegroup.hm, 'P 1 21 1') if numpy is not None: self.assert_numpy_equal(numpy.array(mtz, copy=False), mtz.array) self.assert_numpy_equal(mtz.array, mtz2.array)
def test_binner(self): path = full_path('5e5z.mtz') def check_limits_17(limits): self.assertEqual(len(limits), 17) self.assertAlmostEqual(limits[10], 0.27026277234462415) mtz = gemmi.read_mtz_file(path) binner = gemmi.Binner() method = gemmi.Binner.Method.Dstar3 binner.setup(17, method, mtz) check_limits_17(binner.bin_limits) self.assertEqual(binner.bin_count(), 17) binner = gemmi.Binner() binner.setup(17, method, mtz, cell=mtz.cell) check_limits_17(binner.bin_limits) self.assertEqual(binner.get_bin_number([3, 3, 3]), 9) if numpy is None: return binner.setup(17, method, mtz.make_miller_array(), cell=mtz.cell) check_limits_17(binner.bin_limits) binner.setup_from_1_d2(17, method, mtz.make_1_d2_array(), mtz.cell) check_limits_17(binner.bin_limits) hkls = [[0, 0, 1], [3, 3, 3], [10, 10, 10]] bins = [0, 9, 16] self.assertEqual(list(binner.get_bin_numbers(hkls)), bins) inv_d2 = [mtz.cell.calculate_1_d2(h) for h in hkls] self.assertEqual(list(binner.get_bin_numbers_from_1_d2(inv_d2)), bins)
def __init__(self, mtz): try: self.mtz = gemmi.read_mtz_file(mtz) except RuntimeError: pass # self.info = { # 'DataProcessingPointGroup': None, # 'DataProcessingSpaceGroup': None, # 'DataProcessingUnitCell': None, # 'DataProcessingUnitCellVolume': None, # 'DataProcessingLattice': None, # 'DataCollectionWavelength': None, # 'DataProcessingNsymop': None # } self.info = { 'DataProcessingPointGroup': '', 'DataProcessingSpaceGroup': '', 'DataProcessingUnitCell': '', 'DataProcessingUnitCellVolume': '', 'DataProcessingLattice': '', 'DataCollectionWavelength': '', 'DataProcessingNsymop': '' }
def test_hewl(): mtz = gemmi.read_mtz_file(data_path("hewl_data.mtz")) fsigf = DataItem(mtz, "F_New,SIGF_New") fphi = DataItem(mtz, "FWT,PHWT") comit = Comit(fsigf, fphi) assert comit.abcd.nreflections == mtz.nreflections assert comit.fphi.nreflections == mtz.nreflections
def test_1kv9(): mtz = gemmi.read_mtz_file(data_path("1kv9_data.mtz")) fsigf = DataItem(mtz, "FP,SIGFP") freer = DataItem(mtz, "FREE") structure = read_structure(data_path("1kv9_model.pdb")) sheetbend = Sheetbend(fsigf, freer, structure) assert ModelStats(structure) == ModelStats(sheetbend.structure)
def read_mtz(mtzfile): """ Populate the dataset object with data from an MTZ reflection file. If the gemmi.Mtz object contains an M/ISYM column and contains duplicated Miller indices, an unmerged DataSet will be constructed. The Miller indices will be mapped to their observed values, and a partiality flag will be extracted and stored as a boolean column with the label, ``PARTIAL``. Otherwise, a merged DataSet will be constructed. If columns are found with the ``MTZInt`` dtype and are labeled ``PARTIAL`` or ``CENTRIC``, these will be interpreted as boolean flags used to label partial or centric reflections, respectively. Parameters ---------- mtzfile : str or file name of an mtz file or a file object Returns ------- DataSet """ gemmi_mtz = gemmi.read_mtz_file(mtzfile) return from_gemmi(gemmi_mtz)
def test_combine_data_items(): mtz = gemmi.read_mtz_file(data_path("1kv9_data.mtz")) fsigf = DataItem(mtz, "FP,SIGFP") freer = DataItem(mtz, "FREE") combined = _combine_data_items([fsigf, freer]) assert combined.column_labels() == ["H", "K", "L", "FP", "SIGFP", "FREE"] assert mtz.nreflections == combined.nreflections
def test_value_grid(self): #path = full_path('5wkd_phases.mtz.gz') path = full_path('5e5z.mtz') mtz = gemmi.read_mtz_file(path) size = mtz.get_size_for_hkl() if numpy is None: return asu = gemmi.ReciprocalAsu(mtz.spacegroup) mtz_data = numpy.array(mtz, copy=False) fp_idx = mtz.column_labels().index('FP') fp_map = {} for row in mtz_data: fp_map[tuple(row[0:3])] = row[fp_idx] for order in (gemmi.AxisOrder.XYZ, gemmi.AxisOrder.ZYX): for half_l in (True, False): grid = mtz.get_value_on_grid('FP', size, half_l=half_l, order=order) counter = 0 for point in grid: hkl = grid.to_hkl(point) value = fp_map.get(tuple(hkl)) if asu.is_in(hkl): if value is not None: self.assertTrue(point.value == value or (numpy.isnan(point.value) and numpy.isnan(value))) counter += 1 else: self.assertEqual(point.value, 0.) else: self.assertIsNone(value) self.assertEqual(counter, mtz_data.shape[0])
def test_1kv9_dataitem_init_types(): mtz = gemmi.read_mtz_file(data_path("1kv9_data.mtz")) item = DataItem(mtz, "FP,SIGFP") # str assert item.label() == "FP,SIGFP" item = DataItem(mtz, mtz.columns[4:6]) # gemmi.MtzColumns assert item.label() == "FP,SIGFP" item = DataItem(mtz, [mtz.columns[4], mtz.columns[5]]) # List[gemmi.Mtz.Column] assert item.label() == "FP,SIGFP"
def test_invalid_columns_for_mtz_with_multiple_datasets(columns): mtz = gemmi.read_mtz_file(data_path("hewl_data.mtz")) mtz.add_dataset("Old") mtz.add_column("HLA", "A") mtz.add_column("HLB", "A") mtz.add_column("HLC", "A") mtz.add_column("HLD", "A") with pytest.raises(ValueError): DataItem(mtz, columns)
def read_info(proj, mtz_file): """ get high resolution and labels for Free-R-flag, native F and Sigma(F) columns from the specified MTZ file """ with temp_decrypted(proj, mtz_file) as fpath: mtz = gemmi.read_mtz_file(fpath) return (mtz.resolution_high(), *_get_column_labels(mtz))
def test_1kv9(): contents = AsuContents(data_path("1kv9_sequence.fasta")) mtz = gemmi.read_mtz_file(data_path("1kv9_data.mtz")) fsigf = DataItem(mtz, "FP,SIGFP") freer = DataItem(mtz, "FREE") phases = DataItem(mtz, "HL") parrot = Parrot(contents, fsigf, freer, phases) assert parrot.abcd.nreflections == mtz.nreflections assert parrot.fphi.nreflections == mtz.nreflections
def test_valid_columns_for_mtz_with_multiple_datasets(columns): mtz = gemmi.read_mtz_file(data_path("hewl_data.mtz")) mtz.add_dataset("Old") mtz.add_column("HLA", "A") mtz.add_column("HLB", "A") mtz.add_column("HLC", "A") mtz.add_column("HLD", "A") abcd = DataItem(mtz, columns) assert abcd.types == "AAAA" assert len(abcd.columns) == 7
def load_dataset(datapath, as_gemmi=False): """ Load dataset at given datapath. Datapath is expected to be a list of directories to follow. """ inFN = abspath(join(dirname(__file__), *datapath)) if as_gemmi: return gemmi.read_mtz_file(inFN) else: return rs.read_mtz(inFN)
def test_1kv9_prune(): structure = read_structure(data_path("1kv9_model.pdb")) stats_in = ModelStats(structure) mtz = gemmi.read_mtz_file(data_path("1kv9_data.mtz")) fsigf = DataItem(mtz, "FP,SIGFP") freer = DataItem(mtz, "FREE") refmac = Refmac(structure, fsigf, freer, cycles=1) prune = Prune(structure, refmac.fphi_best, refmac.fphi_diff) stats_out = ModelStats(prune.structure) assert stats_out.residues < stats_in.residues
def test_1kv9_fix_side_chains(): structure = read_structure(data_path("1kv9_model.pdb")) stats_in = ModelStats(structure) mtz = gemmi.read_mtz_file(data_path("1kv9_data.mtz")) fsigf = DataItem(mtz, "FP,SIGFP") freer = DataItem(mtz, "FREE") refmac = Refmac(structure, fsigf, freer, cycles=1) sidechains = FixSideChains(structure, refmac.fphi_best, refmac.fphi_diff) stats_out = ModelStats(sidechains.structure) assert stats_out.residues == stats_in.residues
def test_1kv9(): structure = read_structure(data_path("1kv9_model.pdb")) mtz = gemmi.read_mtz_file(data_path("1kv9_data.mtz")) fsigf = DataItem(mtz, "FP,SIGFP") freer = DataItem(mtz, "FREE") refmac = Refmac(structure, fsigf, freer, cycles=1) assert refmac.fsigf.nreflections == mtz.nreflections assert refmac.abcd.nreflections == mtz.nreflections assert refmac.fphi_best.nreflections == mtz.nreflections assert refmac.fphi_diff.nreflections == mtz.nreflections assert refmac.fphi_calc.nreflections == mtz.nreflections assert refmac.rwork_change < 0
def test_1kv9(): contents = AsuContents(data_path("1kv9_sequence.fasta")) mtz = gemmi.read_mtz_file(data_path("1kv9_data.mtz")) fsigf = DataItem(mtz, "FP,SIGFP") freer = DataItem(mtz, "FREE") phases = DataItem(mtz, "HL") buccaneer = Buccaneer(contents=contents, fsigf=fsigf, freer=freer, phases=phases, cycles=1) stats = ModelStats(buccaneer.structure) assert stats.residues > 0
def get_grid_from_file( mtz_path, columns=["FTW", "PHWT"], sample_rate=4, ): mtz = gemmi.read_mtz_file(str(mtz_path)) grid = mtz.transform_f_phi_to_map( columns[0], columns[1], sample_rate=sample_rate, ) return grid
def test_f_phi_grid(self): path = full_path('5wkd_phases.mtz.gz') mtz = gemmi.read_mtz_file(path) size = mtz.get_size_for_hkl() for half_l in (False, True): grid1 = mtz.get_f_phi_on_grid('FWT', 'PHWT', size, half_l=half_l) grid2 = mtz.get_f_phi_on_grid('FWT', 'PHWT', size, half_l=half_l, order=gemmi.AxisOrder.ZYX) if numpy is None: continue array1 = numpy.array(grid1, copy=False) array2 = numpy.array(grid2, copy=False) self.assertTrue((array2 == array1.transpose(2, 1, 0)).all()) fft_test(self, mtz, 'FWT', 'PHWT', size)
def convert_amplitudes(hklin, seqin, prefix): result = { "hklout": "%s.mtz" % prefix, "stdout": "%s.log" % prefix, "stderr": "%s.err" % prefix, } mtz = gemmi.read_mtz_file(hklin) labels = [col.label for col in mtz.columns] column_sets = [ ["FP", "SIGFP"], ["I", "SIGI"], ["F(+)", "SIGF(+)", "F(-)", "SIGF(-)"], ["I(+)", "SIGI(+)", "I(-)", "SIGI(-)"], ] columns = next((s for s in column_sets if all(l in labels for l in s)), None) if columns is None: return {"error": "Can't find columns to convert to F,SIGF"} arguments = [ "-hklin", hklin, "-seqin", seqin, "-hklout", result["hklout"], ] if len(columns) == 2: arguments.extend(["-colin", "/*/*/[%s]" % ",".join(columns)]) result["colout"] = ["F", "SIGF"] else: arguments.extend(["-colano", "/*/*/[%s]" % ",".join(columns)]) result["colout"] = ["FMEAN", "SIGFMEAN"] if columns[0][0] == "F": arguments.append("-amplitudes") utils.run("ctruncate", arguments, stdout=result["stdout"], stderr=result["stderr"]) with open(result["stderr"]) as f: line = f.readline().strip() if len(line) > 0: return {"error": line} return result
def test_1kv9(): structure = read_structure(data_path("1kv9_model.pdb")) stats_in = ModelStats(structure) mtz = gemmi.read_mtz_file(data_path("1kv9_data.mtz")) fsigf = DataItem(mtz, "FP,SIGFP") freer = DataItem(mtz, "FREE") refmac = Refmac(structure, fsigf, freer, cycles=1) findwaters = FindWaters(structure, refmac.fphi_best) stats_out = ModelStats(findwaters.structure) assert stats_out.residues == stats_in.residues assert stats_out.waters > stats_in.waters assert stats_out.dummy_atoms == stats_in.dummy_atoms findwaters = FindWaters(structure, refmac.fphi_best, dummy=True) stats_out = ModelStats(findwaters.structure) assert stats_out.residues == stats_in.residues assert stats_out.waters == stats_in.waters assert stats_out.dummy_atoms > stats_in.dummy_atoms
def test_remove_and_add_column(self): path = full_path('5e5z.mtz') col_name = 'FREE' mtz = gemmi.read_mtz_file(path) col = mtz.column_with_label(col_name) col_idx = col.idx ncol = len(mtz.columns) if numpy is None: return self.assert_numpy_equal(col.array, numpy.array(col, copy=False)) arr = col.array.copy() mtz_data = numpy.array(mtz, copy=True) self.assertEqual(mtz_data.shape, (mtz.nreflections, ncol)) mtz.remove_column(col_idx) self.assertEqual(len(mtz.columns), ncol - 1) self.assertEqual( numpy.array(mtz, copy=False).shape, (mtz.nreflections, ncol - 1)) col = mtz.add_column(col_name, 'I', dataset_id=0, pos=col_idx) numpy.array(col, copy=False)[:] = arr self.assert_numpy_equal(mtz_data, numpy.array(mtz, copy=False))
def refine_deposited_structure(key, structure): hklin = structure.jobs["cad"]["hklout"] xyzin = structure.jobs["no_unl"]["xyzout"] prefix = structure.path("refmac") result = tasks.refine(hklin, xyzin, prefix) structure.jobs["refmac"] = result if "Refmac: End of Refmac" in result: # if "error" not in result: mtz = gemmi.read_mtz_file(result["hklout"]) structure.add_metadata("spacegroup", mtz.spacegroup.hm) structure.add_metadata("resolution", round(mtz.resolution_high(), 2)) structure.add_metadata("asu_volume", round(mtz.cell.volume / mtz.nsymop)) structure.add_metadata("data_completeness", result["data_completeness"]) structure.add_metadata("refined_rwork", result["final_rwork"]) structure.add_metadata("refined_rfree", result["final_rfree"]) # else: # print("No results found for structure") return key, structure
#!/usr/bin/env python3 # IF YOU ADD OR REMOVE LINES, ADJUST :lines: in docs/hkl.rst import gemmi import numpy import pandas from matplotlib import pyplot MTZ_PATH = 'tests/5wkd_phases.mtz.gz' SFCIF_PATH = 'tests/r5wkdsf.ent' # make DataFrame from MTZ file mtz = gemmi.read_mtz_file(MTZ_PATH) mtz_data = numpy.array(mtz, copy=False) mtz_df = pandas.DataFrame(data=mtz_data, columns=mtz.column_labels()) # (optional) store Miller indices as integers mtz_df = mtz_df.astype({label: 'int32' for label in 'HKL'}) # make DataFrame from mmCIF file cif_doc = gemmi.cif.read(SFCIF_PATH) rblock = gemmi.as_refln_blocks(cif_doc)[0] cif_df = pandas.DataFrame(data=rblock.make_index_array(), columns=['H', 'K', 'L']) cif_df['F_meas_au'] = rblock.make_float_array('F_meas_au') cif_df['d'] = rblock.make_d_array() # merge DataFrames df = pandas.merge(mtz_df, cif_df, on=['H', 'K', 'L']) # plot FP from MTZ as a function of F_meas_au from mmCIF pyplot.rc('font', size=8)
#! /Library/Frameworks/Python.framework/Versions/3.9/bin/python3.9 import gemmi import numpy as np #Read in reflection file as cctbx any_reflection_file mtz_name = 'example.mtz' mtz = gemmi.read_mtz_file(mtz_name) #Make 2D numpy array with all of the original MTZ data all_data = np.array(mtz, copy=False) #Define which columns of all_data to find the initial I and sigI values I_column = 3 sigI_column = 4 #Define tranlational vector along k (fraction of unit cell length) td = 0.245 #Calculate corrected I and sigI values for with different k-values #k_counter = k * 100 k_counter = 0 max_k = 50 #Initalise label list of lists to record labels for final MTZ label_list = [] while k_counter <= max_k: #k_label three digit version of k_counter k_label = "{0:0=3d}".format(k_counter) label_list.append(["I_{}".format(k_label), "sigI_{}".format(k_label)]) #Cycle through reflections scaling intensities and sigIs k = k_counter / 100 #mod_I and mod_sI lists will hold modified Is and sigIs. mod_I = [] mod_SI = [] i = 0 while i < all_data.shape[0]:
def get_map_from_mtz_path(path): mtz = gemmi.read_mtz_file(str(path)) xmap = mtz.transform_f_phi_to_map('FWT', 'PHWT', sample_rate=4) return xmap
def phase_graft( initial_mtz_path, event_mtz_path, out_path, ): intial_mtz = gemmi.read_mtz_file(str(initial_mtz_path)) event_mtz = gemmi.read_mtz_file(str(event_mtz_path)) array_to_index_map = array_to_index(intial_mtz) index_to_array_map = index_to_array(event_mtz) initial_mtz_data = np.array(intial_mtz, copy=False) event_mtz_data = np.array(event_mtz, copy=False) # print(initial_mtz_data.shape) # print(event_mtz_data.shape) # FWT initial_mtz_fwt = intial_mtz.column_with_label('FWT') # initial_mtz_fwt_index = initial_mtz_fwt.dataset_id initial_mtz_fwt_index = intial_mtz.column_labels().index("FWT") event_mtz_fwt = event_mtz.column_with_label('FWT') event_mtz_fwt_index = event_mtz.column_labels().index("FWT") # print("\t{}, {}".format(initial_mtz_data.shape, event_mtz_data.shape)) # print(list(array_to_index_map.keys())[:10]) # print(list(index_to_array_map.keys())[:10]) skipped = 0 for intial_array in range(initial_mtz_data.shape[0]): try: index = array_to_index_map[intial_array] event_array = index_to_array_map[index] initial_mtz_data[intial_array, initial_mtz_fwt_index] = event_mtz_data[ event_array, event_mtz_fwt_index] except Exception as e: skipped = skipped + 1 initial_mtz_data[intial_array, initial_mtz_fwt_index] = 0 intial_mtz.set_data(initial_mtz_data) print("\tSkipped {} reflections".format(skipped)) # PHWT initial_mtz_phwt = intial_mtz.column_with_label('PHWT') # initial_mtz_phwt_index = initial_mtz_phwt.dataset_id initial_mtz_phwt_index = intial_mtz.column_labels().index("PHWT") event_mtz_phwt = event_mtz.column_with_label('PHWT') # event_mtz_phwt_index = event_mtz_phwt.dataset_id event_mtz_phwt_index = event_mtz.column_labels().index("PHWT") skipped = 0 for intial_array in range(initial_mtz_data.shape[0]): try: index = array_to_index_map[intial_array] event_array = index_to_array_map[index] initial_mtz_data[intial_array, initial_mtz_phwt_index] = event_mtz_data[ event_array, event_mtz_phwt_index] except Exception as e: skipped = skipped + 1 initial_mtz_data[intial_array, initial_mtz_phwt_index] = 0 intial_mtz.set_data(initial_mtz_data) print("\tCopied FWT from {} to {}".format(event_mtz_fwt_index, initial_mtz_fwt_index)) print("\tCopied PHWT from {} to {}".format(event_mtz_phwt_index, initial_mtz_phwt_index)) print("\tSkipper {} reflections".format(skipped)) intial_mtz.write_to_file(str(out_path))