def __init__ (self,
     xray_structure,
     pdb_hierarchy,
     f_obs,
     r_free_flags,
     rigid_body_refine=False,
     optimize_b_factors=False,
     skip_twin_detection=False,
     scattering_table="n_gaussian") :
   self.r_work = None
   self.r_free = None
   self.xray_structure = None
   from mmtbx.utils import fmodel_simple
   from cctbx import crystal
   combined_symmetry = crystal.symmetry(
     unit_cell=f_obs.unit_cell(),
     space_group=xray_structure.space_group())
   xray_structure = xray_structure.customized_copy(
     crystal_symmetry=combined_symmetry)
   f_obs = f_obs.customized_copy(
     crystal_symmetry=combined_symmetry).eliminate_sys_absent()
   r_free_flags = r_free_flags.customized_copy(
     crystal_symmetry=combined_symmetry).eliminate_sys_absent()
   fmodel = fmodel_simple(
     f_obs=f_obs,
     r_free_flags=r_free_flags,
     xray_structures=[xray_structure],
     skip_twin_detection=skip_twin_detection,
     scattering_table=scattering_table)
   self.r_work_start = fmodel.r_work()
   self.r_free_start = fmodel.r_free()
   if (not rigid_body_refine) :
     self.r_work = self.r_work_start
     self.r_free = self.r_free_start
     self.xray_structure = xray_structure
   else :
     from mmtbx.refinement import rigid_body
     selection_strings = rigid_body.rigid_groups_from_pdb_chains(
       pdb_hierarchy=pdb_hierarchy,
       xray_structure=xray_structure,
       group_all_by_chain=True,
       check_for_atoms_on_special_positions=True,
       log=null_out())
     selections = []
     for sele_str in selection_strings :
       sele = pdb_hierarchy.atom_selection_cache().selection(sele_str)
       selections.append(sele.iselection())
     refined = rigid_body.manager(
       fmodel=fmodel,
       selections=selections,
       params=rigid_body.master_params.extract(),
       log=null_out())
     self.xray_structure = refined.fmodel.xray_structure
     self.r_work = refined.fmodel.r_work()
     self.r_free = refined.fmodel.r_free()
 def __init__(self,
              xray_structure,
              pdb_hierarchy,
              f_obs,
              r_free_flags,
              rigid_body_refine=False,
              optimize_b_factors=False,
              skip_twin_detection=False,
              scattering_table="n_gaussian"):
     self.r_work = None
     self.r_free = None
     self.xray_structure = None
     from mmtbx.utils import fmodel_simple
     from cctbx import crystal
     combined_symmetry = crystal.symmetry(
         unit_cell=f_obs.unit_cell(),
         space_group=xray_structure.space_group())
     xray_structure = xray_structure.customized_copy(
         crystal_symmetry=combined_symmetry)
     f_obs = f_obs.customized_copy(
         crystal_symmetry=combined_symmetry).eliminate_sys_absent()
     r_free_flags = r_free_flags.customized_copy(
         crystal_symmetry=combined_symmetry).eliminate_sys_absent()
     fmodel = fmodel_simple(f_obs=f_obs,
                            r_free_flags=r_free_flags,
                            xray_structures=[xray_structure],
                            skip_twin_detection=skip_twin_detection,
                            scattering_table=scattering_table)
     self.r_work_start = fmodel.r_work()
     self.r_free_start = fmodel.r_free()
     if (not rigid_body_refine):
         self.r_work = self.r_work_start
         self.r_free = self.r_free_start
         self.xray_structure = xray_structure
     else:
         from mmtbx.refinement import rigid_body
         selection_strings = rigid_body.rigid_groups_from_pdb_chains(
             pdb_hierarchy=pdb_hierarchy,
             xray_structure=xray_structure,
             group_all_by_chain=True,
             check_for_atoms_on_special_positions=True,
             log=null_out())
         selections = []
         for sele_str in selection_strings:
             sele = pdb_hierarchy.atom_selection_cache().selection(sele_str)
             selections.append(sele.iselection())
         refined = rigid_body.manager(
             fmodel=fmodel,
             selections=selections,
             params=rigid_body.master_params.extract(),
             log=null_out())
         self.xray_structure = refined.fmodel.xray_structure
         self.r_work = refined.fmodel.r_work()
         self.r_free = refined.fmodel.r_free()
Example #3
0
    def run(self, args, command_name, out=sys.stdout):
        command_line = (iotbx_option_parser(
            usage="%s [options]" % command_name,
            description='Example: %s data.mtz data.mtz ref_model.pdb' %
            command_name).option(
                None,
                "--show_defaults",
                action="store_true",
                help="Show list of parameters.")).process(args=args)

        cif_file = None
        processed_args = utils.process_command_line_args(
            args=args, log=sys.stdout, master_params=master_phil)
        params = processed_args.params
        if (params is None): params = master_phil
        self.params = params.extract().ensemble_probability
        pdb_file_names = processed_args.pdb_file_names
        if len(pdb_file_names) != 1:
            raise Sorry("Only one PDB structure may be used")
        pdb_file = file_reader.any_file(pdb_file_names[0])
        self.log = multi_out()
        self.log.register(label="stdout", file_object=sys.stdout)
        self.log.register(label="log_buffer",
                          file_object=StringIO(),
                          atexit_send_to=None)
        sys.stderr = self.log
        log_file = open(
            pdb_file_names[0].split('/')[-1].replace('.pdb', '') +
            '_pensemble.log', "w")

        self.log.replace_stringio(old_label="log_buffer",
                                  new_label="log",
                                  new_file_object=log_file)
        utils.print_header(command_name, out=self.log)
        params.show(out=self.log)
        #
        f_obs = None
        r_free_flags = None
        reflection_files = processed_args.reflection_files

        if self.params.fobs_vs_fcalc_post_nll:
            if len(reflection_files) == 0:
                raise Sorry(
                    "Fobs from input MTZ required for fobs_vs_fcalc_post_nll")

        if len(reflection_files) > 0:
            crystal_symmetry = processed_args.crystal_symmetry
            print('Reflection file : ',
                  processed_args.reflection_file_names[0],
                  file=self.log)
            utils.print_header("Model and data statistics", out=self.log)
            rfs = reflection_file_server(
                crystal_symmetry=crystal_symmetry,
                reflection_files=processed_args.reflection_files,
                log=self.log)

            parameters = extract_xtal_data.data_and_flags_master_params(
            ).extract()
            determine_data_and_flags_result = extract_xtal_data.run(
                reflection_file_server=rfs,
                parameters=parameters,
                data_parameter_scope="refinement.input.xray_data",
                flags_parameter_scope="refinement.input.xray_data.r_free_flags",
                data_description="X-ray data",
                keep_going=True,
                log=self.log)
            f_obs = determine_data_and_flags_result.f_obs
            number_of_reflections = f_obs.indices().size()
            r_free_flags = determine_data_and_flags_result.r_free_flags
            test_flag_value = determine_data_and_flags_result.test_flag_value
            if (r_free_flags is None):
                r_free_flags = f_obs.array(
                    data=flex.bool(f_obs.data().size(), False))

        # process PDB
        pdb_file.assert_file_type("pdb")
        #
        pdb_in = hierarchy.input(file_name=pdb_file.file_name)
        ens_pdb_hierarchy = pdb_in.construct_hierarchy()
        ens_pdb_hierarchy.atoms().reset_i_seq()
        ens_pdb_xrs_s = pdb_in.input.xray_structures_simple()
        number_structures = len(ens_pdb_xrs_s)
        print('Number of structure in ensemble : ',
              number_structures,
              file=self.log)

        # Calculate sigmas from input map only
        if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None:
            # process MTZ
            input_file = file_reader.any_file(
                self.params.ensemble_sigma_map_input)
            if input_file.file_type == "hkl":
                if input_file.file_object.file_type() != "ccp4_mtz":
                    raise Sorry("Only MTZ format accepted for map input")
                else:
                    mtz_file = input_file
            else:
                raise Sorry("Only MTZ format accepted for map input")
            miller_arrays = mtz_file.file_server.miller_arrays
            map_coeffs_1 = miller_arrays[0]
            #
            xrs_list = []
            for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
                # get sigma levels from ensemble fc for each structure
                xrs = get_map_sigma(ens_pdb_hierarchy=ens_pdb_hierarchy,
                                    ens_pdb_xrs=ens_pdb_xrs,
                                    map_coeffs_1=map_coeffs_1,
                                    residue_detail=self.params.residue_detail,
                                    ignore_hd=self.params.ignore_hd,
                                    log=self.log)
                xrs_list.append(xrs)
            # write ensemble pdb file, occupancies as sigma level
            filename = pdb_file_names[0].split('/')[-1].replace(
                '.pdb',
                '') + '_vs_' + self.params.ensemble_sigma_map_input.replace(
                    '.mtz', '') + '_pensemble.pdb'
            write_ensemble_pdb(filename=filename,
                               xrs_list=xrs_list,
                               ens_pdb_hierarchy=ens_pdb_hierarchy)

        # Do full analysis vs Fobs
        else:
            model_map_coeffs = []
            fmodel = None
            # Get <fcalc>
            for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
                ens_pdb_xrs.set_occupancies(1.0)
                if model == 0:
                    # If mtz not supplied get fobs from xray structure...
                    # Use input Fobs for scoring against nll
                    if self.params.fobs_vs_fcalc_post_nll:
                        dummy_fobs = f_obs
                    else:
                        if f_obs == None:
                            if self.params.fcalc_high_resolution == None:
                                raise Sorry(
                                    "Please supply high resolution limit or input mtz file."
                                )
                            dummy_dmin = self.params.fcalc_high_resolution
                            dummy_dmax = self.params.fcalc_low_resolution
                        else:
                            print(
                                'Supplied mtz used to determine high and low resolution cuttoffs',
                                file=self.log)
                            dummy_dmax, dummy_dmin = f_obs.d_max_min()
                        #
                        dummy_fobs = abs(
                            ens_pdb_xrs.structure_factors(
                                d_min=dummy_dmin).f_calc())
                        dummy_fobs.set_observation_type_xray_amplitude()
                        # If mtz supplied, free flags are over written to prevent array size error
                        r_free_flags = dummy_fobs.array(
                            data=flex.bool(dummy_fobs.data().size(), False))
                    #
                    fmodel = utils.fmodel_simple(
                        scattering_table="wk1995",
                        xray_structures=[ens_pdb_xrs],
                        f_obs=dummy_fobs,
                        target_name='ls',
                        bulk_solvent_and_scaling=False,
                        r_free_flags=r_free_flags)
                    f_calc_ave = fmodel.f_calc().array(
                        data=fmodel.f_calc().data() * 0).deep_copy()
                    # XXX Important to ensure scale is identical for each model and <model>
                    fmodel.set_scale_switch = 1.0
                    f_calc_ave_total = fmodel.f_calc().data().deep_copy()
                else:
                    fmodel.update_xray_structure(xray_structure=ens_pdb_xrs,
                                                 update_f_calc=True,
                                                 update_f_mask=False)
                    f_calc_ave_total += fmodel.f_calc().data().deep_copy()
                print('Model :', model + 1, file=self.log)
                print("\nStructure vs real Fobs (no bulk solvent or scaling)",
                      file=self.log)
                print('Rwork          : %5.4f ' % fmodel.r_work(),
                      file=self.log)
                print('Rfree          : %5.4f ' % fmodel.r_free(),
                      file=self.log)
                print('K1             : %5.4f ' % fmodel.scale_k1(),
                      file=self.log)
                fcalc_edm = fmodel.electron_density_map()
                fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type='Fc')
                fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset(
                    column_root_label='Fc')
                if self.params.output_model_and_model_ave_mtz:
                    fcalc_mtz_dataset.mtz_object().write(
                        file_name=str(model + 1) + "_Fc.mtz")
                model_map_coeffs.append(fcalc_map_coeffs.deep_copy())

            fmodel.update(f_calc=f_calc_ave.array(f_calc_ave_total /
                                                  number_structures))
            print("\nEnsemble vs real Fobs (no bulk solvent or scaling)",
                  file=self.log)
            print('Rwork          : %5.4f ' % fmodel.r_work(), file=self.log)
            print('Rfree          : %5.4f ' % fmodel.r_free(), file=self.log)
            print('K1             : %5.4f ' % fmodel.scale_k1(), file=self.log)

            # Get <Fcalc> map
            fcalc_ave_edm = fmodel.electron_density_map()
            fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients(
                map_type='Fc').deep_copy()
            fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset(
                column_root_label='Fc')
            if self.params.output_model_and_model_ave_mtz:
                fcalc_ave_mtz_dataset.mtz_object().write(file_name="aveFc.mtz")
            fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map()
            fcalc_ave_map_coeffs.apply_volume_scaling()
            fcalc_ave_map_data = fcalc_ave_map_coeffs.real_map_unpadded()
            fcalc_ave_map_stats = maptbx.statistics(fcalc_ave_map_data)

            print("<Fcalc> Map Stats :", file=self.log)
            fcalc_ave_map_stats.show_summary(f=self.log)
            offset = fcalc_ave_map_stats.min()
            model_neg_ll = []

            number_previous_scatters = 0

            # Run through structure list again and get probability
            xrs_list = []
            for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
                if self.params.verbose:
                    print('\n\nModel                   : ',
                          model + 1,
                          file=self.log)
                # Get model atom sigmas vs Fcalc
                fcalc_map = model_map_coeffs[model].fft_map()
                fcalc_map.apply_volume_scaling()
                fcalc_map_data = fcalc_map.real_map_unpadded()
                fcalc_map_stats = maptbx.statistics(fcalc_map_data)
                if self.params.verbose:
                    print("Fcalc map stats         :", file=self.log)
                fcalc_map_stats.show_summary(f=self.log)

                xrs = get_map_sigma(
                    ens_pdb_hierarchy=ens_pdb_hierarchy,
                    ens_pdb_xrs=ens_pdb_xrs,
                    fft_map_1=fcalc_map,
                    model_i=model,
                    residue_detail=self.params.residue_detail,
                    ignore_hd=self.params.ignore_hd,
                    number_previous_scatters=number_previous_scatters,
                    log=self.log)
                fcalc_sigmas = xrs.scatterers().extract_occupancies()
                del fcalc_map
                # Get model atom sigmas vs <Fcalc>
                xrs = get_map_sigma(
                    ens_pdb_hierarchy=ens_pdb_hierarchy,
                    ens_pdb_xrs=ens_pdb_xrs,
                    fft_map_1=fcalc_ave_map_coeffs,
                    model_i=model,
                    residue_detail=self.params.residue_detail,
                    ignore_hd=self.params.ignore_hd,
                    number_previous_scatters=number_previous_scatters,
                    log=self.log)

                ### For testing other residue averaging options
                #print xrs.residue_selections

                fcalc_ave_sigmas = xrs.scatterers().extract_occupancies()
                # Probability of model given <model>
                prob = fcalc_ave_sigmas / fcalc_sigmas
                # XXX debug option
                if False:
                    for n, p in enumerate(prob):
                        print(' {0:5d} {1:5.3f}'.format(n, p), file=self.log)
                # Set probabilty between 0 and 1
                # XXX Make Histogram / more stats
                prob_lss_zero = flex.bool(prob <= 0)
                prob_grt_one = flex.bool(prob > 1)
                prob.set_selected(prob_lss_zero, 0.001)
                prob.set_selected(prob_grt_one, 1.0)
                xrs.set_occupancies(prob)
                xrs_list.append(xrs)
                sum_neg_ll = sum(-flex.log(prob))
                model_neg_ll.append((sum_neg_ll, model))
                if self.params.verbose:
                    print('Model probability stats :', file=self.log)
                    print(prob.min_max_mean().show(), file=self.log)
                    print('  Count < 0.0 : ',
                          prob_lss_zero.count(True),
                          file=self.log)
                    print('  Count > 1.0 : ',
                          prob_grt_one.count(True),
                          file=self.log)

                # For averaging by residue
                number_previous_scatters += ens_pdb_xrs.sites_cart().size()

            # write ensemble pdb file, occupancies as sigma level
            write_ensemble_pdb(
                filename=pdb_file_names[0].split('/')[-1].replace('.pdb', '') +
                '_pensemble.pdb',
                xrs_list=xrs_list,
                ens_pdb_hierarchy=ens_pdb_hierarchy)

            # XXX Test ordering models by nll
            # XXX Test removing nth percentile atoms
            if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll:
                for percentile in [1.0, 0.975, 0.95, 0.9, 0.8, 0.6, 0.2]:
                    model_neg_ll = sorted(model_neg_ll)
                    f_calc_ave_total_reordered = None
                    print_list = []
                    for i_neg_ll in model_neg_ll:
                        xrs = xrs_list[i_neg_ll[1]]
                        nll_occ = xrs.scatterers().extract_occupancies()

                        # Set q=0 nth percentile atoms
                        sorted_nll_occ = sorted(nll_occ, reverse=True)
                        number_atoms = len(sorted_nll_occ)
                        percentile_prob_cutoff = sorted_nll_occ[
                            int(number_atoms * percentile) - 1]
                        cutoff_selections = flex.bool(
                            nll_occ < percentile_prob_cutoff)
                        cutoff_nll_occ = flex.double(nll_occ.size(),
                                                     1.0).set_selected(
                                                         cutoff_selections,
                                                         0.0)
                        #XXX Debug
                        if False:
                            print('\nDebug')
                            for x in range(len(cutoff_selections)):
                                print(cutoff_selections[x], nll_occ[x],
                                      cutoff_nll_occ[x])
                            print(percentile)
                            print(percentile_prob_cutoff)
                            print(cutoff_selections.count(True))
                            print(cutoff_selections.size())
                            print(cutoff_nll_occ.count(0.0))
                            print('Count q = 1           : ',
                                  cutoff_nll_occ.count(1.0))
                            print('Count scatterers size : ',
                                  cutoff_nll_occ.size())

                        xrs.set_occupancies(cutoff_nll_occ)
                        fmodel.update_xray_structure(xray_structure=xrs,
                                                     update_f_calc=True,
                                                     update_f_mask=True)

                        if f_calc_ave_total_reordered == None:
                            f_calc_ave_total_reordered = fmodel.f_calc().data(
                            ).deep_copy()
                            f_mask_ave_total_reordered = fmodel.f_masks(
                            )[0].data().deep_copy()
                            cntr = 1
                        else:
                            f_calc_ave_total_reordered += fmodel.f_calc().data(
                            ).deep_copy()
                            f_mask_ave_total_reordered += fmodel.f_masks(
                            )[0].data().deep_copy()
                            cntr += 1
                        fmodel.update(
                            f_calc=f_calc_ave.array(
                                f_calc_ave_total_reordered / cntr).deep_copy(),
                            f_mask=f_calc_ave.array(
                                f_mask_ave_total_reordered / cntr).deep_copy())

                        # Update solvent and scale
                        # XXX Will need to apply_back_trace on latest version
                        fmodel.set_scale_switch = 0
                        fmodel.update_all_scales()

                        # Reset occ for outout
                        xrs.set_occupancies(nll_occ)
                        # k1 updated vs Fobs
                        if self.params.fobs_vs_fcalc_post_nll:
                            print_list.append([
                                cntr, i_neg_ll[0], i_neg_ll[1],
                                fmodel.r_work(),
                                fmodel.r_free()
                            ])

                    # Order models by nll and print summary
                    print(
                        '\nModels ranked by nll <Fcalc> R-factors recalculated',
                        file=self.log)
                    print('Percentile cutoff : {0:5.3f}'.format(percentile),
                          file=self.log)
                    xrs_list_sorted_nll = []
                    print('      |      NLL     <Rw>     <Rf>    Ens Model',
                          file=self.log)
                    for info in print_list:
                        print(' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'.
                              format(
                                  info[0],
                                  info[1],
                                  info[3],
                                  info[4],
                                  info[2] + 1,
                              ),
                              file=self.log)
                        xrs_list_sorted_nll.append(xrs_list[info[2]])

                # Output nll ordered ensemble

                write_ensemble_pdb(
                    filename='nll_ordered_' +
                    pdb_file_names[0].split('/')[-1].replace('.pdb', '') +
                    '_pensemble.pdb',
                    xrs_list=xrs_list_sorted_nll,
                    ens_pdb_hierarchy=ens_pdb_hierarchy)
Example #4
0
def run(args=None, params=None, out=sys.stdout):
    assert [args, params].count(None) == 1
    if args is not None:
        if (len(args) == 0) or ("--help" in args):
            raise Usage("""
  phenix.cc_star model.pdb data.mtz unmerged_data=data.hkl [n_bins=X] [options]
  phenix.cc_star model_refine_001.mtz unmerged_data=data.hkl [...]

Implementation of the method for assessing data and model quality described in:
  Karplus PA & Diederichs K (2012) Science 336:1030-3.

Full parameters:
  %s
  """ % master_phil.as_str(prefix=" ", attributes_level=1))
        import iotbx.phil
        cmdline = iotbx.phil.process_command_line_with_files(
            args=args,
            master_phil=master_phil,
            pdb_file_def="model",
            reflection_file_def="data")
        params = cmdline.work.extract()
    import mmtbx.command_line
    import mmtbx.validation.experimental
    from iotbx import merging_statistics
    from iotbx import file_reader
    if (params.data is None):
        raise Sorry("Please specify a data file (usually MTZ format).")
    if (params.unmerged_data is None):
        raise Sorry("Please specify unmerged_data file")
    hkl_in = file_reader.any_file(params.data, force_type="hkl")
    hkl_in.check_file_type("hkl")
    f_model = f_obs = r_free_flags = None
    f_models = []
    data_arrays = []
    f_model_labels = []
    if (params.f_model_labels is None):
        for array in hkl_in.file_server.miller_arrays:
            labels = array.info().label_string()
            if (array.is_complex_array()):
                if (labels.startswith("F-model")
                        or labels.startswith("FMODEL")):
                    f_models.append(array)
                    f_model_labels.append(labels)
        if (len(f_models) > 1):
            raise Sorry(
                ("Multiple F(model) arrays found:\n%s\nPlease specify the " +
                 "'labels' parameter.") % "\n".join(f_model_labels))
        elif (len(f_models) == 1):
            f_model = f_models[0]
            if (f_model.anomalous_flag()):
                info = f_model.info()
                f_model = f_model.average_bijvoet_mates().set_info(info)
            print("F(model):", file=out)
            f_model.show_summary(f=out, prefix="  ")
        else:
            data_array = hkl_in.file_server.get_xray_data(
                file_name=params.data,
                labels=params.f_obs_labels,
                ignore_all_zeros=True,
                parameter_scope="")
            if (data_array.is_xray_intensity_array()):
                from cctbx import french_wilson
                f_obs = french_wilson.french_wilson_scale(
                    miller_array=data_array, out=out)
            else:
                f_obs = data_array
    else:
        for array in hkl_in.file_server.miller_arrays:
            array_labels = array.info().label_string()
            if (array_labels == params.f_model_labels):
                if (array.is_complex_array()):
                    f_model = array
                    break
                else:
                    raise Sorry(
                        "The data in %s are not of the required type." %
                        array_labels)
    if (f_model is not None):
        assert (f_obs is None)
        for array in hkl_in.file_server.miller_arrays:
            labels = array.info().label_string()
            if (labels == params.f_obs_labels):
                f_obs = array
                break
        else:
            try:
                f_obs = hkl_in.file_server.get_amplitudes(
                    file_name=params.f_obs_labels,
                    labels=None,
                    convert_to_amplitudes_if_necessary=False,
                    parameter_name="f_obs_labels",
                    parameter_scope="",
                    strict=True)
            except Sorry:
                raise Sorry(
                    "You must supply a file containing both F-obs and F-model "
                    + "if you want to use a pre-calculated F-model array.")
    assert (f_obs.is_xray_amplitude_array())
    if (f_obs.anomalous_flag()):
        info = f_obs.info()
        f_obs = f_obs.average_bijvoet_mates().set_info(info)
    print("F(obs):", file=out)
    f_obs.show_summary(f=out, prefix="  ")
    print("", file=out)
    r_free_flags, test_flag_value = hkl_in.file_server.get_r_free_flags(
        file_name=params.data,
        label=params.r_free_flags.label,
        test_flag_value=params.r_free_flags.test_flag_value,
        disable_suitability_test=False,
        parameter_scope="")
    info = r_free_flags.info()
    r_free_flags = r_free_flags.customized_copy(
        data=r_free_flags.data() == test_flag_value).set_info(info)
    if (r_free_flags.anomalous_flag()):
        r_free_flags = r_free_flags.average_bijvoet_mates().set_info(info)
    print("R-free flags:", file=out)
    r_free_flags.show_summary(f=out, prefix="  ")
    print("", file=out)
    unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data(
        f_obs=f_obs,
        file_name=params.unmerged_data,
        data_labels=params.unmerged_labels,
        log=out)
    print("Unmerged intensities:", file=out)
    unmerged_i_obs.show_summary(f=out, prefix="  ")
    print("", file=out)
    if (f_model is None):
        assert (f_obs is not None)
        if (params.model is None):
            raise Sorry(
                "A PDB file is required if F(model) is not pre-calculated.")
        make_sub_header("Calculating F(model)", out=out)
        pdb_in = file_reader.any_file(params.model, force_type="pdb")
        pdb_in.check_file_type("pdb")
        pdb_symm = pdb_in.file_object.crystal_symmetry()
        if (pdb_symm is None):
            pdb_symm = f_obs
        else:
            if (f_obs.crystal_symmetry() is None):
                f_obs = f_obs.customized_copy(crystal_symmetry=pdb_symm)
            elif (not pdb_symm.is_similar_symmetry(f_obs)):
                mmtbx.command_line.show_symmetry_error(file1="PDB file",
                                                       file2="data file",
                                                       symm1=pdb_symm,
                                                       symm2=f_obs)
        xray_structure = pdb_in.file_object.xray_structure_simple(
            crystal_symmetry=pdb_symm)
        from mmtbx.utils import fmodel_simple
        # XXX this gets done anyway later, but they need to be consistent before
        # creating the fmodel manager
        if (f_obs.anomalous_flag()):
            f_obs = f_obs.average_bijvoet_mates()
        f_obs = f_obs.eliminate_sys_absent()
        f_obs, r_free_flags = f_obs.map_to_asu().common_sets(
            other=r_free_flags.map_to_asu())
        fmodel = fmodel_simple(f_obs=f_obs,
                               r_free_flags=r_free_flags,
                               xray_structures=[xray_structure],
                               skip_twin_detection=True,
                               scattering_table="n_gaussian")
        fmodel.show(log=out)
        f_model = fmodel.f_model()
        f_obs = fmodel.f_obs()
        r_free_flags = fmodel.r_free_flags()
    else:
        if (f_model.anomalous_flag()):
            f_model = f_model.average_bijvoet_mates()

    stats = mmtbx.validation.experimental.merging_and_model_statistics(
        f_model=f_model,
        f_obs=f_obs,
        r_free_flags=r_free_flags,
        unmerged_i_obs=unmerged_i_obs,
        n_bins=params.n_bins,
        sigma_filtering=params.sigma_filtering)
    stats.show_cc_star(out=out)
    if (params.loggraph):
        stats.show_loggraph(out=out)
    print("", file=out)
    print("Reference:", file=out)
    print("  Karplus PA & Diederichs K (2012) Science 336:1030-3.", file=out)
    print("", file=out)
    return stats
Example #5
0
def run(args,
        command_name             = "mmtbx.model_vs_data",
        show_geometry_statistics = True,
        model_size_max_atoms     = 80000,
        data_size_max_reflections= 1000000,
        unit_cell_max_dimension  = 800.,
        return_fmodel_and_pdb    = False,
        out                      = None,
        log                      = sys.stdout):
  import mmtbx.f_model_info
  if(len(args)==0) or (args == ["--help"]) :
    print >> log, msg
    defaults(log=log, silent=False)
    return
  parsed = defaults(log=log, silent=True)
  #
  mvd_obj = mvd()
  #
  processed_args = utils.process_command_line_args(args = args,
    log = log, master_params = parsed)
  params = processed_args.params.extract()
  #
  reflection_files = processed_args.reflection_files
  if(len(reflection_files) == 0):
    raise Sorry("No reflection file found.")
  crystal_symmetry = processed_args.crystal_symmetry
  if(crystal_symmetry is None):
    raise Sorry("No crystal symmetry found.")
  if(len(processed_args.pdb_file_names) == 0):
    raise Sorry("No PDB file found.")
  pdb_file_names = processed_args.pdb_file_names
  #
  rfs = reflection_file_server(
    crystal_symmetry = crystal_symmetry,
    reflection_files = reflection_files)
  parameters = utils.data_and_flags_master_params().extract()
  if(params.f_obs_label is not None):
    parameters.labels = params.f_obs_label
  if(params.r_free_flags_label is not None):
    parameters.r_free_flags.label = params.r_free_flags_label
  if (params.high_resolution is not None) :
    parameters.high_resolution = params.high_resolution
  determine_data_and_flags_result = utils.determine_data_and_flags(
    reflection_file_server  = rfs,
    parameters              = parameters,
    data_parameter_scope    = "refinement.input.xray_data",
    flags_parameter_scope   = "refinement.input.xray_data.r_free_flags",
    data_description        = "X-ray data",
    keep_going              = True,
    log                     = StringIO())
  f_obs = determine_data_and_flags_result.f_obs
  number_of_reflections = f_obs.indices().size()
  if(params.ignore_giant_models_and_datasets and
     number_of_reflections > data_size_max_reflections):
    raise Sorry("Too many reflections: %d"%number_of_reflections)
  #
  max_unit_cell_dimension = max(f_obs.unit_cell().parameters()[:3])
  if(params.ignore_giant_models_and_datasets and
     max_unit_cell_dimension > unit_cell_max_dimension):
    raise Sorry("Too large unit cell (max dimension): %s"%
      str(max_unit_cell_dimension))
  #
  r_free_flags = determine_data_and_flags_result.r_free_flags
  test_flag_value = determine_data_and_flags_result.test_flag_value
  if(r_free_flags is None):
    r_free_flags=f_obs.array(data=flex.bool(f_obs.data().size(), False))
    test_flag_value=None
  #
  mmtbx_pdb_file = mmtbx.utils.pdb_file(
    pdb_file_names        = pdb_file_names,
    cif_objects           = processed_args.cif_objects,
    crystal_symmetry      = crystal_symmetry,
    use_neutron_distances = (params.scattering_table=="neutron"),
    ignore_unknown_nonbonded_energy_types = not show_geometry_statistics,
    log                   = log)
  mmtbx_pdb_file.set_ppf(stop_if_duplicate_labels = False)
  processed_pdb_file = mmtbx_pdb_file.processed_pdb_file
  pdb_raw_records = mmtbx_pdb_file.pdb_raw_records
  pdb_inp = mmtbx_pdb_file.pdb_inp
  #
  # just to avoid going any further with bad PDB file....
  pdb_inp.xray_structures_simple()
  #
  acp = processed_pdb_file.all_chain_proxies
  atom_selections = group_args(
    all           = acp.selection(string = "all"),
    macromolecule = acp.selection(string = "protein or dna or rna"),
    solvent       = acp.selection(string = "water"), # XXX single_atom_residue
    ligand        = acp.selection(string = "not (protein or dna or rna or water)"),
    backbone      = acp.selection(string = "backbone"),
    sidechain     = acp.selection(string = "sidechain"))
  #
  scattering_table = params.scattering_table
  exptl_method = pdb_inp.get_experiment_type()
  if (exptl_method is not None) and ("NEUTRON" in exptl_method) :
    scattering_table = "neutron"
  xsfppf = mmtbx.utils.xray_structures_from_processed_pdb_file(
    processed_pdb_file = processed_pdb_file,
    scattering_table   = scattering_table,
    d_min              = f_obs.d_min())
  xray_structures = xsfppf.xray_structures
  if(0): #XXX normalize occupancies if all models have occ=1 so the total=1
    n_models = len(xray_structures)
    for xrs in xray_structures:
      occ = xrs.scatterers().extract_occupancies()
      occ = occ/n_models
      xrs.set_occupancies(occ)
  model_selections = xsfppf.model_selections
  mvd_obj.collect(crystal = group_args(
    uc       = f_obs.unit_cell(),
    sg       = f_obs.crystal_symmetry().space_group_info().symbol_and_number(),
    n_sym_op = f_obs.crystal_symmetry().space_group_info().type().group().order_z(),
    uc_vol   = f_obs.unit_cell().volume()))
  #
  hierarchy = pdb_inp.construct_hierarchy()
  pdb_atoms = hierarchy.atoms()
  pdb_atoms.reset_i_seq()
  #
  # Extract TLS
  pdb_tls = None
  pdb_inp_tls = pdb_inp.extract_tls_params(hierarchy)
  pdb_tls = group_args(pdb_inp_tls           = pdb_inp_tls,
                       tls_selections        = [],
                       tls_selection_strings = [])
  # XXX no TLS + multiple models
  if(pdb_inp_tls.tls_present and pdb_inp_tls.error_string is None and
     len(xray_structures)==1):
    pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb(
      pdb_inp_tls       = pdb_inp_tls,
      all_chain_proxies = mmtbx_pdb_file.processed_pdb_file.all_chain_proxies,
      xray_structure    = xsfppf.xray_structure_all)
    if(len(pdb_tls.tls_selections)==len(pdb_inp_tls.tls_params) and
       len(pdb_inp_tls.tls_params) > 0):
      xray_structures = [utils.extract_tls_and_u_total_from_pdb(
        f_obs          = f_obs,
        r_free_flags   = r_free_flags,
        xray_structure = xray_structures[0], # XXX no TLS + multiple models
        tls_selections = pdb_tls.tls_selections,
        tls_groups     = pdb_inp_tls.tls_params)]
  ###########################
  geometry_statistics = show_geometry(
    xray_structures          = xray_structures,
    processed_pdb_file       = processed_pdb_file,
    scattering_table         = scattering_table,
    hierarchy                = hierarchy,
    model_selections         = model_selections,
    show_geometry_statistics = show_geometry_statistics,
    mvd_obj                  = mvd_obj,
    atom_selections          = atom_selections)
  ###########################
  mp = mmtbx.masks.mask_master_params.extract()
  f_obs_labels = f_obs.info().label_string()
  f_obs = f_obs.sort(reverse=True, by_value="packed_indices")
  r_free_flags = r_free_flags.sort(reverse=True, by_value="packed_indices")
  fmodel = utils.fmodel_simple(
    xray_structures     = xray_structures,
    scattering_table    = scattering_table,
    mask_params         = mp,
    f_obs               = f_obs,
    r_free_flags        = r_free_flags,
    skip_twin_detection = params.skip_twin_detection)
  n_outl = f_obs.data().size() - fmodel.f_obs().data().size()
  mvd_obj.collect(model_vs_data = show_model_vs_data(fmodel))
  # Extract information from PDB file header and output (if any)
  pub_r_work       = None
  pub_r_free       = None
  pub_high         = None
  pub_low          = None
  pub_sigma        = None
  pub_program_name = None
  pub_solv_cont    = None
  pub_matthews     = None
  published_results = pdb_inp.get_r_rfree_sigma(file_name=pdb_file_names[0])
  if(published_results is not None):
    pub_r_work = published_results.r_work
    pub_r_free = published_results.r_free
    pub_high   = published_results.high
    pub_low    = published_results.low
    pub_sigma  = published_results.sigma
  pub_program_name = pdb_inp.get_program_name()
  pub_solv_cont    = pdb_inp.get_solvent_content()
  pub_matthews     = pdb_inp.get_matthews_coeff()
  mvd_obj.collect(pdb_header = group_args(
    program_name    = pub_program_name,
    year            = pdb_inp.extract_header_year(),
    r_work          = pub_r_work,
    r_free          = pub_r_free,
    high_resolution = pub_high,
    low_resolution  = pub_low,
    sigma_cutoff    = pub_sigma,
    matthews_coeff  = pub_matthews,
    solvent_cont    = pub_solv_cont,
    tls             = pdb_tls,
    exptl_method    = exptl_method))
  #
  # Recompute R-factors using published cutoffs
  fmodel_cut = fmodel
  tmp_sel = flex.bool(fmodel.f_obs().data().size(), True)
  if(pub_sigma is not None and fmodel.f_obs().sigmas() is not None):
    tmp_sel &= fmodel.f_obs().data() > fmodel.f_obs().sigmas()*pub_sigma
  if(pub_high is not None and abs(pub_high-fmodel.f_obs().d_min()) > 0.03):
    tmp_sel &= fmodel.f_obs().d_spacings().data() > pub_high
  if(pub_low is not None and abs(pub_low-fmodel.f_obs().d_max_min()[0]) > 0.03):
    tmp_sel &= fmodel.f_obs().d_spacings().data() < pub_low
  if(tmp_sel.count(True) != tmp_sel.size() and tmp_sel.count(True) > 0):
    fmodel_cut = utils.fmodel_simple(
      xray_structures     = xray_structures,
      scattering_table    = scattering_table,
      f_obs               = fmodel.f_obs().select(tmp_sel),
      r_free_flags        = fmodel.r_free_flags().select(tmp_sel),
      skip_twin_detection = params.skip_twin_detection)
  mvd_obj.collect(misc = group_args(
    r_work_cutoff = fmodel_cut.r_work(),
    r_free_cutoff = fmodel_cut.r_free(),
    n_refl_cutoff = fmodel_cut.f_obs().data().size()))
  mvd_obj.collect(data =
    show_data(fmodel          = fmodel,
              n_outl          = n_outl,
              test_flag_value = test_flag_value,
              f_obs_labels    = f_obs_labels,
              fmodel_cut      = fmodel_cut))
  # map statistics
  if(len(xray_structures)==1): # XXX no multi-model support yet
    mvd_obj.collect(maps = maps(fmodel = fmodel, mvd_obj = mvd_obj))
  # CC* and friends
  cc_star_stats = None
  if (params.unmerged_data is not None) :
    import mmtbx.validation.experimental
    import mmtbx.command_line
    f_obs = fmodel.f_obs().average_bijvoet_mates()
    unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data(
      f_obs=f_obs,
      file_name=params.unmerged_data,
      data_labels=params.unmerged_labels,
      log=null_out())
    cc_star_stats = mmtbx.validation.experimental.merging_and_model_statistics(
      f_model=fmodel.f_model().average_bijvoet_mates(),
      f_obs=f_obs,
      r_free_flags=fmodel.r_free_flags().average_bijvoet_mates(),
      unmerged_i_obs=unmerged_i_obs,
      n_bins=params.n_bins)
  mvd_obj.show(log=out)
  if (cc_star_stats is not None) :
    cc_star_stats.show_model_vs_data(out=out, prefix="  ")
  if return_fmodel_and_pdb :
    mvd_obj.pdb_file = processed_pdb_file
    mvd_obj.fmodel = fmodel
  if(len(params.map) > 0):
    for map_name_string in params.map:
      map_type_obj = mmtbx.map_names(map_name_string = map_name_string)
      map_params = mmtbx.maps.map_and_map_coeff_master_params().fetch(
        mmtbx.maps.cast_map_coeff_params(map_type_obj)).extract()
      maps_obj = mmtbx.maps.compute_map_coefficients(fmodel = fmodel_cut, params =
        map_params.map_coefficients)
      fn = os.path.basename(processed_args.reflection_file_names[0])
      if(fn.count(".")):
        prefix = fn[:fn.index(".")]
      else: prefix= fn
      file_name = prefix+"_%s_map_coeffs.mtz"%map_type_obj.format()
      maps_obj.write_mtz_file(file_name = file_name)
  # statistics in bins
  if(not fmodel.twin):
    print >> log, "Statistics in resolution bins:"
    mmtbx.f_model_info.r_work_and_completeness_in_resolution_bins(
      fmodel = fmodel, out = log, prefix="  ")
  # report map cc
  if(params.comprehensive and not fmodel_cut.twin and
     fmodel_cut.xray_structure is not None):
    rsc_params = real_space_correlation.master_params().extract()
    rsc_params.scattering_table = scattering_table
    real_space_correlation.simple(
       fmodel        = fmodel_cut,
       pdb_hierarchy = hierarchy,
       params        = rsc_params,
       log           = log,
       show_results  = True)
  #
  if(params.dump_result_object_as_pickle):
    output_prefixes = []
    for op in processed_args.pdb_file_names+processed_args.reflection_file_names:
      op = os.path.basename(op)
      try: op = op[:op.index(".")]
      except Exception: pass
      if(not op in output_prefixes): output_prefixes.append(op)
    output_prefix = "_".join(output_prefixes)
    easy_pickle.dump("%s.pickle"%output_prefix, mvd_obj)
  return mvd_obj
def exercise():
    from mmtbx.building.alternate_conformations import density_sampling
    from mmtbx.utils import fmodel_simple
    from mmtbx.monomer_library import server
    from iotbx import file_reader
    import iotbx.pdb.hierarchy
    generate_inputs()
    fmodel_params = """
    high_resolution = 1.2
    r_free_flags_fraction = 0.1
    add_sigmas = True
    pdb_file = ser_frag.pdb
    output {
      label = F
      type = *real complex
      file_name = ser_frag.mtz
    }
    fmodel.k_sol = 0.3
    fmodel.b_sol = 20
    """
    open("ser_frag_fmodel.eff", "w").write(fmodel_params)
    assert (easy_run.fully_buffered("phenix.fmodel ser_frag_fmodel.eff").
            raise_if_errors().return_code == 0)
    assert os.path.isfile("ser_frag.mtz")
    mtz_in = file_reader.any_file("ser_frag.mtz")
    f_obs = mtz_in.file_server.miller_arrays[0]
    flags = mtz_in.file_server.miller_arrays[1]
    flags = flags.customized_copy(data=(flags.data() == 1))
    mon_lib_srv = server.server()
    pdb_in = iotbx.pdb.hierarchy.input(file_name="ser_frag_single.pdb")
    hierarchy = pdb_in.hierarchy
    pdb_atoms = hierarchy.atoms()
    pdb_atoms.reset_i_seq()
    sites_cart = pdb_atoms.extract_xyz()
    xrs = pdb_in.input.xray_structure_simple()
    fmodel = fmodel_simple(f_obs=f_obs,
                           xray_structures=[xrs],
                           scattering_table="n_gaussian",
                           r_free_flags=flags,
                           skip_twin_detection=True)
    models = []
    prev_res = next_res = next_next_res = None
    for chain in hierarchy.only_model().chains():
        residue_groups = chain.residue_groups()
        n_rg = len(residue_groups)
        for i_res, residue_group in enumerate(residue_groups):
            sites_orig = sites_cart.deep_copy()
            next_res = next_next_res = None
            if (i_res < (n_rg - 1)):
                next_res = residue_groups[i_res + 1].atom_groups()[0]
            if (i_res < (n_rg - 2)):
                next_next_res = residue_groups[i_res + 2].atom_groups()[0]
            atom_groups = residue_group.atom_groups()
            primary_conf = atom_groups[0]
            out = StringIO()
            confs = density_sampling.screen_residue(
                residue=primary_conf,
                prev_residue=prev_res,
                next_residue=next_res,
                next_next_residue=next_next_res,
                sites_cart=sites_cart,
                fmodel=fmodel,
                mon_lib_srv=mon_lib_srv,
                params=None,
                backrub=True,
                shear=False,
                verbose=True,
                out=out)
            prev_res = primary_conf
            if (confs is None):
                continue
            # TODO tweak density sampling to allow a backrubbed conformer with a
            # chi1 t rotamer for Ser 99
            if (i_res == 1):
                assert ("""  A SER  99     20.0    None       t"""
                        in out.getvalue())
            for conf in confs:
                sites_new = sites_cart.set_selected(conf.sites_selection,
                                                    conf.sites_selected())
                pdb_atoms.set_xyz(sites_new)
                models.append(hierarchy.only_model().detached_copy())
            confs = density_sampling.screen_residue(
                residue=primary_conf,
                prev_residue=prev_res,
                next_residue=next_res,
                next_next_residue=next_next_res,
                sites_cart=sites_cart,
                fmodel=fmodel,
                mon_lib_srv=mon_lib_srv,
                params=None,
                backrub=False,
                out=out)
            if (i_res == 1):
                print len(confs)
    new_hierarchy = iotbx.pdb.hierarchy.root()
    for i_model, conf in enumerate(models):
        conf.id = str(i_model + 1)
        new_hierarchy.append_model(conf)
    open("ser_frag_guided_ensemble.pdb",
         "w").write(new_hierarchy.as_pdb_string())
Example #7
0
def run (args=None, params=None, out=sys.stdout) :
  assert [args, params].count(None) == 1
  if args is not None:
    if (len(args) == 0) or ("--help" in args) :
      raise Usage("""
  phenix.cc_star model.pdb data.mtz unmerged_data=data.hkl [n_bins=X] [options]
  phenix.cc_star model_refine_001.mtz unmerged_data=data.hkl [...]

Implementation of the method for assessing data and model quality described in:
  Karplus PA & Diederichs K (2012) Science 336:1030-3.

Full parameters:
  %s
  """ % master_phil.as_str(prefix=" ", attributes_level=1))
    import iotbx.phil
    cmdline = iotbx.phil.process_command_line_with_files(
      args=args,
      master_phil=master_phil,
      pdb_file_def="model",
      reflection_file_def="data")
    params = cmdline.work.extract()
  import mmtbx.command_line
  import mmtbx.validation.experimental
  from iotbx import merging_statistics
  from iotbx import file_reader
  if (params.data is None) :
    raise Sorry("Please specify a data file (usually MTZ format).")
  if (params.unmerged_data is None) :
    raise Sorry("Please specify unmerged_data file")
  hkl_in = file_reader.any_file(params.data, force_type="hkl")
  hkl_in.check_file_type("hkl")
  f_model = f_obs = r_free_flags = None
  f_models = []
  data_arrays = []
  f_model_labels = []
  if (params.f_model_labels is None) :
    for array in hkl_in.file_server.miller_arrays :
      labels = array.info().label_string()
      if (array.is_complex_array()) :
        if (labels.startswith("F-model") or labels.startswith("FMODEL")) :
          f_models.append(array)
          f_model_labels.append(labels)
    if (len(f_models) > 1) :
      raise Sorry(("Multiple F(model) arrays found:\n%s\nPlease specify the "+
        "'labels' parameter.") % "\n".join(f_model_labels))
    elif (len(f_models) == 1) :
      f_model = f_models[0]
      if (f_model.anomalous_flag()) :
        info = f_model.info()
        f_model = f_model.average_bijvoet_mates().set_info(info)
      print >> out, "F(model):"
      f_model.show_summary(f=out, prefix="  ")
    else :
      data_array = hkl_in.file_server.get_xray_data(
        file_name=params.data,
        labels=params.f_obs_labels,
        ignore_all_zeros=True,
        parameter_scope="")
      if (data_array.is_xray_intensity_array()) :
        from cctbx import french_wilson
        f_obs = french_wilson.french_wilson_scale(
          miller_array=data_array,
          out=out)
      else :
        f_obs = data_array
  else :
    for array in hkl_in.file_server.miller_arrays :
      array_labels = array.info().label_string()
      if (array_labels == params.f_model_labels) :
        if (array.is_complex_array()) :
          f_model = array
          break
        else :
          raise Sorry("The data in %s are not of the required type." %
            array_labels)
  if (f_model is not None) :
    assert (f_obs is None)
    for array in hkl_in.file_server.miller_arrays :
      labels = array.info().label_string()
      if (labels == params.f_obs_labels) :
        f_obs = array
        break
    else :
      try :
        f_obs = hkl_in.file_server.get_amplitudes(
          file_name=params.f_obs_labels,
          labels=None,
          convert_to_amplitudes_if_necessary=False,
          parameter_name="f_obs_labels",
          parameter_scope="",
          strict=True)
      except Sorry :
        raise Sorry("You must supply a file containing both F-obs and F-model "+
          "if you want to use a pre-calculated F-model array.")
  assert (f_obs.is_xray_amplitude_array())
  if (f_obs.anomalous_flag()) :
    info = f_obs.info()
    f_obs = f_obs.average_bijvoet_mates().set_info(info)
  print >> out, "F(obs):"
  f_obs.show_summary(f=out, prefix="  ")
  print >> out, ""
  r_free_flags, test_flag_value = hkl_in.file_server.get_r_free_flags(
    file_name=params.data,
    label=params.r_free_flags.label,
    test_flag_value=params.r_free_flags.test_flag_value,
    disable_suitability_test=False,
    parameter_scope="")
  info = r_free_flags.info()
  r_free_flags = r_free_flags.customized_copy(
    data=r_free_flags.data()==test_flag_value).set_info(info)
  if (r_free_flags.anomalous_flag()) :
    r_free_flags = r_free_flags.average_bijvoet_mates().set_info(info)
  print >> out, "R-free flags:"
  r_free_flags.show_summary(f=out, prefix="  ")
  print >> out, ""
  unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data(
    f_obs=f_obs,
    file_name=params.unmerged_data,
    data_labels=params.unmerged_labels,
    log=out)
  print >> out, "Unmerged intensities:"
  unmerged_i_obs.show_summary(f=out, prefix="  ")
  print >> out, ""
  if (f_model is None) :
    assert (f_obs is not None)
    if (params.model is None) :
      raise Sorry("A PDB file is required if F(model) is not pre-calculated.")
    make_sub_header("Calculating F(model)", out=out)
    pdb_in = file_reader.any_file(params.model, force_type="pdb")
    pdb_in.check_file_type("pdb")
    pdb_symm = pdb_in.file_object.crystal_symmetry()
    if (pdb_symm is None) :
      pdb_symm = f_obs
    else :
      if (f_obs.crystal_symmetry() is None) :
        f_obs = f_obs.customized_copy(crystal_symmetry=pdb_symm)
      elif (not pdb_symm.is_similar_symmetry(f_obs)) :
        mmtbx.command_line.show_symmetry_error(
          file1="PDB file",
          file2="data file",
          symm1=pdb_symm,
          symm2=f_obs)
    xray_structure = pdb_in.file_object.xray_structure_simple(
      crystal_symmetry=pdb_symm)
    from mmtbx.utils import fmodel_simple
    # XXX this gets done anyway later, but they need to be consistent before
    # creating the fmodel manager
    if (f_obs.anomalous_flag()) :
      f_obs = f_obs.average_bijvoet_mates()
    f_obs = f_obs.eliminate_sys_absent()
    f_obs, r_free_flags = f_obs.map_to_asu().common_sets(
      other=r_free_flags.map_to_asu())
    fmodel = fmodel_simple(
      f_obs=f_obs,
      r_free_flags=r_free_flags,
      xray_structures=[xray_structure],
      skip_twin_detection=True,
      scattering_table="n_gaussian")
    fmodel.show(log=out)
    f_model = fmodel.f_model()
    r_free_flags = f_model.customized_copy(data=fmodel.arrays.free_sel)
  else :
    if (f_model.anomalous_flag()) :
      f_model = f_model.average_bijvoet_mates()
    f_model, r_free_flags = f_model.common_sets(other=r_free_flags)
  stats = mmtbx.validation.experimental.merging_and_model_statistics(
    f_model=f_model,
    f_obs=f_obs,
    r_free_flags=r_free_flags,
    unmerged_i_obs=unmerged_i_obs,
    n_bins=params.n_bins,
    sigma_filtering=params.sigma_filtering)
  stats.show_cc_star(out=out)
  if (params.loggraph) :
    stats.show_loggraph(out=out)
  print >> out, ""
  print >> out, "Reference:"
  print >> out, "  Karplus PA & Diederichs K (2012) Science 336:1030-3."
  print >> out, ""
  return stats
def exercise () :
  from mmtbx.building.alternate_conformations import density_sampling
  from mmtbx.utils import fmodel_simple
  from mmtbx.monomer_library import server
  from iotbx import file_reader
  import iotbx.pdb.hierarchy
  generate_inputs()
  fmodel_params = """
    high_resolution = 1.2
    r_free_flags_fraction = 0.1
    add_sigmas = True
    pdb_file = shear_frag.pdb
    output {
      label = F
      type = *real complex
      file_name = shear_frag.mtz
    }
    fmodel.k_sol = 0.3
    fmodel.b_sol = 20
    """
  open("shear_frag_fmodel.eff", "w").write(fmodel_params)
  assert (easy_run.fully_buffered("phenix.fmodel shear_frag_fmodel.eff"
    ).raise_if_errors().return_code == 0)
  assert os.path.isfile("shear_frag.mtz")
  mtz_in = file_reader.any_file("shear_frag.mtz")
  f_obs = mtz_in.file_server.miller_arrays[0]
  flags = mtz_in.file_server.miller_arrays[1]
  flags = flags.customized_copy(data=(flags.data()==1))
  mon_lib_srv = server.server()
  pdb_in = iotbx.pdb.hierarchy.input(file_name="shear_frag_single.pdb")
  hierarchy = pdb_in.hierarchy
  pdb_atoms = hierarchy.atoms()
  pdb_atoms.reset_i_seq()
  sites_cart = pdb_atoms.extract_xyz()
  xrs = pdb_in.input.xray_structure_simple()
  fmodel = fmodel_simple(
    f_obs=f_obs,
    xray_structures=[xrs],
    scattering_table="n_gaussian",
    r_free_flags=flags,
    skip_twin_detection=True)
  models = []
  prev_res = next_res = next_next_res = None
  for chain in hierarchy.only_model().chains() :
    residue_groups = chain.residue_groups()
    n_rg = len(residue_groups) # should be 4
    for i_res, residue_group in enumerate(residue_groups) :
      sites_orig = sites_cart.deep_copy()
      next_res = next_next_res = None
      if (i_res < (n_rg - 1)) :
        next_res = residue_groups[i_res+1].atom_groups()[0]
      if (i_res < (n_rg - 2)) :
        next_next_res = residue_groups[i_res+2].atom_groups()[0]
      atom_groups = residue_group.atom_groups()
      primary_conf = atom_groups[0]
      out = StringIO()
      confs = density_sampling.screen_residue(
        residue=primary_conf,
        prev_residue=prev_res,
        next_residue=next_res,
        next_next_residue=next_next_res,
        sites_cart=sites_cart,
        fmodel=fmodel,
        mon_lib_srv=mon_lib_srv,
        params=None,
        backrub=True,
        shear=True,
        verbose=True,
        out=out)
      prev_res = primary_conf
      if (confs is None) :
        continue
      if (i_res == 1) :
        assert ("""  A ILE   7     None     4.0      mt""")
      for conf in confs :
        sites_new = sites_cart.set_selected(conf.sites_selection,
          conf.sites_selected())
        pdb_atoms.set_xyz(sites_new)
        models.append(hierarchy.only_model().detached_copy())
  new_hierarchy = iotbx.pdb.hierarchy.root()
  for i_model, conf in enumerate(models) :
    conf.id = str(i_model + 1)
    new_hierarchy.append_model(conf)
  open("shear_frag_naive_ensemble.pdb", "w").write(
    new_hierarchy.as_pdb_string())
Example #9
0
def run(args,
        command_name="mmtbx.model_vs_data",
        show_geometry_statistics=True,
        model_size_max_atoms=80000,
        data_size_max_reflections=1000000,
        unit_cell_max_dimension=800.,
        return_fmodel_and_pdb=False,
        out=None,
        log=sys.stdout):
    import mmtbx.f_model.f_model_info
    if (len(args) == 0) or (args == ["--help"]):
        print >> log, msg
        defaults(log=log, silent=False)
        return
    parsed = defaults(log=log, silent=True)
    #
    mvd_obj = mvd()
    #
    processed_args = utils.process_command_line_args(args=args,
                                                     log=log,
                                                     master_params=parsed)
    params = processed_args.params.extract()
    #
    reflection_files = processed_args.reflection_files
    if (len(reflection_files) == 0):
        raise Sorry("No reflection file found.")
    crystal_symmetry = processed_args.crystal_symmetry
    if (crystal_symmetry is None):
        raise Sorry("No crystal symmetry found.")
    if (len(processed_args.pdb_file_names) == 0):
        raise Sorry("No PDB file found.")
    pdb_file_names = processed_args.pdb_file_names
    #
    rfs = reflection_file_server(crystal_symmetry=crystal_symmetry,
                                 reflection_files=reflection_files)
    parameters = utils.data_and_flags_master_params().extract()
    if (params.f_obs_label is not None):
        parameters.labels = params.f_obs_label
    if (params.r_free_flags_label is not None):
        parameters.r_free_flags.label = params.r_free_flags_label
    if (params.high_resolution is not None):
        parameters.high_resolution = params.high_resolution
    determine_data_and_flags_result = utils.determine_data_and_flags(
        reflection_file_server=rfs,
        parameters=parameters,
        data_parameter_scope="refinement.input.xray_data",
        flags_parameter_scope="refinement.input.xray_data.r_free_flags",
        data_description="X-ray data",
        keep_going=True,
        log=StringIO())
    f_obs = determine_data_and_flags_result.f_obs
    number_of_reflections = f_obs.indices().size()
    if (params.ignore_giant_models_and_datasets
            and number_of_reflections > data_size_max_reflections):
        raise Sorry("Too many reflections: %d" % number_of_reflections)
    #
    max_unit_cell_dimension = max(f_obs.unit_cell().parameters()[:3])
    if (params.ignore_giant_models_and_datasets
            and max_unit_cell_dimension > unit_cell_max_dimension):
        raise Sorry("Too large unit cell (max dimension): %s" %
                    str(max_unit_cell_dimension))
    #
    r_free_flags = determine_data_and_flags_result.r_free_flags
    test_flag_value = determine_data_and_flags_result.test_flag_value
    if (r_free_flags is None):
        r_free_flags = f_obs.array(data=flex.bool(f_obs.data().size(), False))
        test_flag_value = None
    #
    mmtbx_pdb_file = mmtbx.utils.pdb_file(
        pdb_file_names=pdb_file_names,
        cif_objects=processed_args.cif_objects,
        crystal_symmetry=crystal_symmetry,
        use_neutron_distances=(params.scattering_table == "neutron"),
        ignore_unknown_nonbonded_energy_types=not show_geometry_statistics,
        log=log)
    mmtbx_pdb_file.set_ppf(stop_if_duplicate_labels=False)
    processed_pdb_file = mmtbx_pdb_file.processed_pdb_file
    pdb_raw_records = mmtbx_pdb_file.pdb_raw_records
    pdb_inp = mmtbx_pdb_file.pdb_inp
    #
    # just to avoid going any further with bad PDB file....
    pdb_inp.xray_structures_simple()
    #
    acp = processed_pdb_file.all_chain_proxies
    atom_selections = group_args(
        all=acp.selection(string="all"),
        macromolecule=acp.selection(string="protein or dna or rna"),
        solvent=acp.selection(string="water"),  # XXX single_atom_residue
        ligand=acp.selection(string="not (protein or dna or rna or water)"),
        backbone=acp.selection(string="backbone"),
        sidechain=acp.selection(string="sidechain"))
    #
    scattering_table = params.scattering_table
    exptl_method = pdb_inp.get_experiment_type()
    if (exptl_method is not None) and ("NEUTRON" in exptl_method):
        scattering_table = "neutron"
    xsfppf = mmtbx.utils.xray_structures_from_processed_pdb_file(
        processed_pdb_file=processed_pdb_file,
        scattering_table=scattering_table,
        d_min=f_obs.d_min())
    xray_structures = xsfppf.xray_structures
    if (0):  #XXX normalize occupancies if all models have occ=1 so the total=1
        n_models = len(xray_structures)
        for xrs in xray_structures:
            occ = xrs.scatterers().extract_occupancies()
            occ = occ / n_models
            xrs.set_occupancies(occ)
    model_selections = xsfppf.model_selections
    mvd_obj.collect(crystal=group_args(
        uc=f_obs.unit_cell(),
        sg=f_obs.crystal_symmetry().space_group_info().symbol_and_number(),
        n_sym_op=f_obs.crystal_symmetry().space_group_info().type().group(
        ).order_z(),
        uc_vol=f_obs.unit_cell().volume()))
    #
    hierarchy = pdb_inp.construct_hierarchy()
    pdb_atoms = hierarchy.atoms()
    pdb_atoms.reset_i_seq()
    #
    # Extract TLS
    pdb_tls = None
    pdb_inp_tls = pdb_inp.extract_tls_params(hierarchy)
    pdb_tls = group_args(pdb_inp_tls=pdb_inp_tls,
                         tls_selections=[],
                         tls_selection_strings=[])
    # XXX no TLS + multiple models
    if (pdb_inp_tls.tls_present and pdb_inp_tls.error_string is None
            and len(xray_structures) == 1):
        pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb(
            pdb_inp_tls=pdb_inp_tls,
            all_chain_proxies=mmtbx_pdb_file.processed_pdb_file.
            all_chain_proxies,
            xray_structure=xsfppf.xray_structure_all)
        if (len(pdb_tls.tls_selections) == len(pdb_inp_tls.tls_params)
                and len(pdb_inp_tls.tls_params) > 0):
            xray_structures = [
                utils.extract_tls_and_u_total_from_pdb(
                    f_obs=f_obs,
                    r_free_flags=r_free_flags,
                    xray_structure=xray_structures[
                        0],  # XXX no TLS + multiple models
                    tls_selections=pdb_tls.tls_selections,
                    tls_groups=pdb_inp_tls.tls_params)
            ]
    ###########################
    geometry_statistics = show_geometry(
        xray_structures=xray_structures,
        processed_pdb_file=processed_pdb_file,
        scattering_table=scattering_table,
        hierarchy=hierarchy,
        model_selections=model_selections,
        show_geometry_statistics=show_geometry_statistics,
        mvd_obj=mvd_obj,
        atom_selections=atom_selections)
    ###########################
    mp = mmtbx.masks.mask_master_params.extract()
    f_obs_labels = f_obs.info().label_string()
    f_obs = f_obs.sort(reverse=True, by_value="packed_indices")
    r_free_flags = r_free_flags.sort(reverse=True, by_value="packed_indices")
    fmodel = utils.fmodel_simple(
        xray_structures=xray_structures,
        scattering_table=scattering_table,
        mask_params=mp,
        f_obs=f_obs,
        r_free_flags=r_free_flags,
        skip_twin_detection=params.skip_twin_detection)
    n_outl = f_obs.data().size() - fmodel.f_obs().data().size()
    mvd_obj.collect(model_vs_data=show_model_vs_data(fmodel))
    # Extract information from PDB file header and output (if any)
    pub_r_work = None
    pub_r_free = None
    pub_high = None
    pub_low = None
    pub_sigma = None
    pub_program_name = None
    pub_solv_cont = None
    pub_matthews = None
    published_results = pdb_inp.get_r_rfree_sigma(file_name=pdb_file_names[0])
    if (published_results is not None):
        pub_r_work = published_results.r_work
        pub_r_free = published_results.r_free
        pub_high = published_results.high
        pub_low = published_results.low
        pub_sigma = published_results.sigma
    pub_program_name = pdb_inp.get_program_name()
    pub_solv_cont = pdb_inp.get_solvent_content()
    pub_matthews = pdb_inp.get_matthews_coeff()
    mvd_obj.collect(pdb_header=group_args(program_name=pub_program_name,
                                          year=pdb_inp.extract_header_year(),
                                          r_work=pub_r_work,
                                          r_free=pub_r_free,
                                          high_resolution=pub_high,
                                          low_resolution=pub_low,
                                          sigma_cutoff=pub_sigma,
                                          matthews_coeff=pub_matthews,
                                          solvent_cont=pub_solv_cont,
                                          tls=pdb_tls,
                                          exptl_method=exptl_method))
    #
    # Recompute R-factors using published cutoffs
    fmodel_cut = fmodel
    tmp_sel = flex.bool(fmodel.f_obs().data().size(), True)
    if (pub_sigma is not None and fmodel.f_obs().sigmas() is not None):
        tmp_sel &= fmodel.f_obs().data() > fmodel.f_obs().sigmas() * pub_sigma
    if (pub_high is not None
            and abs(pub_high - fmodel.f_obs().d_min()) > 0.03):
        tmp_sel &= fmodel.f_obs().d_spacings().data() > pub_high
    if (pub_low is not None
            and abs(pub_low - fmodel.f_obs().d_max_min()[0]) > 0.03):
        tmp_sel &= fmodel.f_obs().d_spacings().data() < pub_low
    if (tmp_sel.count(True) != tmp_sel.size() and tmp_sel.count(True) > 0):
        fmodel_cut = utils.fmodel_simple(
            xray_structures=xray_structures,
            scattering_table=scattering_table,
            f_obs=fmodel.f_obs().select(tmp_sel),
            r_free_flags=fmodel.r_free_flags().select(tmp_sel),
            skip_twin_detection=params.skip_twin_detection)
    mvd_obj.collect(
        misc=group_args(r_work_cutoff=fmodel_cut.r_work(),
                        r_free_cutoff=fmodel_cut.r_free(),
                        n_refl_cutoff=fmodel_cut.f_obs().data().size()))
    mvd_obj.collect(data=show_data(fmodel=fmodel,
                                   n_outl=n_outl,
                                   test_flag_value=test_flag_value,
                                   f_obs_labels=f_obs_labels,
                                   fmodel_cut=fmodel_cut))
    # CC* and friends
    cc_star_stats = None
    if (params.unmerged_data is not None):
        import mmtbx.validation.experimental
        import mmtbx.command_line
        f_obs = fmodel.f_obs().average_bijvoet_mates()
        unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data(
            f_obs=f_obs,
            file_name=params.unmerged_data,
            data_labels=params.unmerged_labels,
            log=null_out())
        cc_star_stats = mmtbx.validation.experimental.merging_and_model_statistics(
            f_model=fmodel.f_model().average_bijvoet_mates(),
            f_obs=f_obs,
            r_free_flags=fmodel.r_free_flags().average_bijvoet_mates(),
            unmerged_i_obs=unmerged_i_obs,
            n_bins=params.n_bins)
    mvd_obj.show(log=out)
    if (cc_star_stats is not None):
        cc_star_stats.show_model_vs_data(out=out, prefix="  ")
    if return_fmodel_and_pdb:
        mvd_obj.pdb_file = processed_pdb_file
        mvd_obj.fmodel = fmodel
    if (len(params.map) > 0):
        for map_name_string in params.map:
            map_type_obj = mmtbx.map_names(map_name_string=map_name_string)
            map_params = mmtbx.maps.map_and_map_coeff_master_params().fetch(
                mmtbx.maps.cast_map_coeff_params(map_type_obj)).extract()
            maps_obj = mmtbx.maps.compute_map_coefficients(
                fmodel=fmodel_cut, params=map_params.map_coefficients)
            fn = os.path.basename(processed_args.reflection_file_names[0])
            if (fn.count(".")):
                prefix = fn[:fn.index(".")]
            else:
                prefix = fn
            file_name = prefix + "_%s_map_coeffs.mtz" % map_type_obj.format()
            maps_obj.write_mtz_file(file_name=file_name)
    # statistics in bins
    if (not fmodel.twin):
        print >> log, "Statistics in resolution bins:"
        mmtbx.f_model.f_model_info.r_work_and_completeness_in_resolution_bins(
            fmodel=fmodel, out=log, prefix="  ")
    # report map cc
    if (params.comprehensive and not fmodel_cut.twin
            and fmodel_cut.xray_structure is not None):
        rsc_params = real_space_correlation.master_params().extract()
        rsc_params.scattering_table = scattering_table
        real_space_correlation.simple(fmodel=fmodel_cut,
                                      pdb_hierarchy=hierarchy,
                                      params=rsc_params,
                                      log=log,
                                      show_results=True)
    #
    if (params.dump_result_object_as_pickle):
        output_prefixes = []
        for op in processed_args.pdb_file_names + processed_args.reflection_file_names:
            op = os.path.basename(op)
            try:
                op = op[:op.index(".")]
            except Exception:
                pass
            if (not op in output_prefixes): output_prefixes.append(op)
        output_prefix = "_".join(output_prefixes)
        easy_pickle.dump("%s.pickle" % output_prefix, mvd_obj)
    return mvd_obj
  def run(self, args, command_name, out=sys.stdout):
    command_line = (iotbx_option_parser(
      usage="%s [options]" % command_name,
      description='Example: %s data.mtz data.mtz ref_model.pdb'%command_name)
      .option(None, "--show_defaults",
        action="store_true",
        help="Show list of parameters.")
      ).process(args=args)

    cif_file = None
    processed_args = utils.process_command_line_args(
                       args          = args,
                       log           = sys.stdout,
                       master_params = master_phil)
    params = processed_args.params
    if(params is None): params = master_phil
    self.params = params.extract().ensemble_probability
    pdb_file_names = processed_args.pdb_file_names
    if len(pdb_file_names) != 1 :
      raise Sorry("Only one PDB structure may be used")
    pdb_file = file_reader.any_file(pdb_file_names[0])
    self.log = multi_out()
    self.log.register(label="stdout", file_object=sys.stdout)
    self.log.register(
      label="log_buffer",
      file_object=StringIO(),
      atexit_send_to=None)
    sys.stderr = self.log
    log_file = open(pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.log', "w")

    self.log.replace_stringio(
        old_label="log_buffer",
        new_label="log",
        new_file_object=log_file)
    utils.print_header(command_name, out = self.log)
    params.show(out = self.log)
    #
    f_obs = None
    r_free_flags = None
    reflection_files = processed_args.reflection_files

    if self.params.fobs_vs_fcalc_post_nll:
      if len(reflection_files) == 0:
        raise Sorry("Fobs from input MTZ required for fobs_vs_fcalc_post_nll")

    if len(reflection_files) > 0:
      crystal_symmetry = processed_args.crystal_symmetry
      print >> self.log, 'Reflection file : ', processed_args.reflection_file_names[0]
      utils.print_header("Model and data statistics", out = self.log)
      rfs = reflection_file_server(
        crystal_symmetry = crystal_symmetry,
        reflection_files = processed_args.reflection_files,
        log              = self.log)

      parameters = utils.data_and_flags_master_params().extract()
      determine_data_and_flags_result = utils.determine_data_and_flags(
        reflection_file_server  = rfs,
        parameters              = parameters,
        data_parameter_scope    = "refinement.input.xray_data",
        flags_parameter_scope   = "refinement.input.xray_data.r_free_flags",
        data_description        = "X-ray data",
        keep_going              = True,
        log                     = self.log)
      f_obs = determine_data_and_flags_result.f_obs
      number_of_reflections = f_obs.indices().size()
      r_free_flags = determine_data_and_flags_result.r_free_flags
      test_flag_value = determine_data_and_flags_result.test_flag_value
      if(r_free_flags is None):
        r_free_flags=f_obs.array(data=flex.bool(f_obs.data().size(), False))

    # process PDB
    pdb_file.assert_file_type("pdb")
    #
    pdb_in = hierarchy.input(file_name=pdb_file.file_name)
    ens_pdb_hierarchy = pdb_in.construct_hierarchy()
    ens_pdb_hierarchy.atoms().reset_i_seq()
    ens_pdb_xrs_s = pdb_in.input.xray_structures_simple()
    number_structures = len(ens_pdb_xrs_s)
    print >> self.log, 'Number of structure in ensemble : ', number_structures

    # Calculate sigmas from input map only
    if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None:
      # process MTZ
      input_file = file_reader.any_file(self.params.ensemble_sigma_map_input)
      if input_file.file_type == "hkl" :
        if input_file.file_object.file_type() != "ccp4_mtz" :
           raise Sorry("Only MTZ format accepted for map input")
        else:
          mtz_file = input_file
      else:
        raise Sorry("Only MTZ format accepted for map input")
      miller_arrays = mtz_file.file_server.miller_arrays
      map_coeffs_1 = miller_arrays[0]
      #
      xrs_list = []
      for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
        # get sigma levels from ensemble fc for each structure
        xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy,
                          ens_pdb_xrs       = ens_pdb_xrs,
                          map_coeffs_1      = map_coeffs_1,
                          residue_detail    = self.params.residue_detail,
                          ignore_hd         = self.params.ignore_hd,
                          log               = self.log)
        xrs_list.append(xrs)
      # write ensemble pdb file, occupancies as sigma level
      filename = pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_vs_' + self.params.ensemble_sigma_map_input.replace('.mtz','') + '_pensemble.pdb'
      write_ensemble_pdb(filename = filename,
                         xrs_list = xrs_list,
                         ens_pdb_hierarchy = ens_pdb_hierarchy
                         )

    # Do full analysis vs Fobs
    else:
      model_map_coeffs = []
      fmodel = None
      # Get <fcalc>
      for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
        ens_pdb_xrs.set_occupancies(1.0)
        if model == 0:
          # If mtz not supplied get fobs from xray structure...
          # Use input Fobs for scoring against nll
          if self.params.fobs_vs_fcalc_post_nll:
            dummy_fobs = f_obs
          else:
            if f_obs == None:
              if self.params.fcalc_high_resolution == None:
                raise Sorry("Please supply high resolution limit or input mtz file.")
              dummy_dmin = self.params.fcalc_high_resolution
              dummy_dmax = self.params.fcalc_low_resolution
            else:
              print >> self.log, 'Supplied mtz used to determine high and low resolution cuttoffs'
              dummy_dmax, dummy_dmin = f_obs.d_max_min()
            #
            dummy_fobs = abs(ens_pdb_xrs.structure_factors(d_min = dummy_dmin).f_calc())
            dummy_fobs.set_observation_type_xray_amplitude()
            # If mtz supplied, free flags are over written to prevent array size error
            r_free_flags = dummy_fobs.array(data=flex.bool(dummy_fobs.data().size(),False))
          #
          fmodel = utils.fmodel_simple(
                     scattering_table         = "wk1995",
                     xray_structures          = [ens_pdb_xrs],
                     f_obs                    = dummy_fobs,
                     target_name              = 'ls',
                     bulk_solvent_and_scaling = False,
                     r_free_flags             = r_free_flags
                     )
          f_calc_ave = fmodel.f_calc().array(data = fmodel.f_calc().data()*0).deep_copy()
          # XXX Important to ensure scale is identical for each model and <model>
          fmodel.set_scale_switch = 1.0
          f_calc_ave_total = fmodel.f_calc().data().deep_copy()
        else:
          fmodel.update_xray_structure(xray_structure  = ens_pdb_xrs,
                                       update_f_calc   = True,
                                       update_f_mask   = False)
          f_calc_ave_total += fmodel.f_calc().data().deep_copy()
        print >> self.log, 'Model :', model+1
        print >> self.log, "\nStructure vs real Fobs (no bulk solvent or scaling)"
        print >> self.log, 'Rwork          : %5.4f '%fmodel.r_work()
        print >> self.log, 'Rfree          : %5.4f '%fmodel.r_free()
        print >> self.log, 'K1             : %5.4f '%fmodel.scale_k1()
        fcalc_edm        = fmodel.electron_density_map()
        fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type = 'Fc')
        fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset(column_root_label ='Fc')
        if self.params.output_model_and_model_ave_mtz:
          fcalc_mtz_dataset.mtz_object().write(file_name = str(model+1)+"_Fc.mtz")
        model_map_coeffs.append(fcalc_map_coeffs.deep_copy())

      fmodel.update(f_calc = f_calc_ave.array(f_calc_ave_total / number_structures))
      print >> self.log, "\nEnsemble vs real Fobs (no bulk solvent or scaling)"
      print >> self.log, 'Rwork          : %5.4f '%fmodel.r_work()
      print >> self.log, 'Rfree          : %5.4f '%fmodel.r_free()
      print >> self.log, 'K1             : %5.4f '%fmodel.scale_k1()

      # Get <Fcalc> map
      fcalc_ave_edm        = fmodel.electron_density_map()
      fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients(map_type = 'Fc').deep_copy()
      fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset(column_root_label ='Fc')
      if self.params.output_model_and_model_ave_mtz:
        fcalc_ave_mtz_dataset.mtz_object().write(file_name = "aveFc.mtz")
      fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map()
      fcalc_ave_map_coeffs.apply_volume_scaling()
      fcalc_ave_map_data   = fcalc_ave_map_coeffs.real_map_unpadded()
      fcalc_ave_map_stats  = maptbx.statistics(fcalc_ave_map_data)

      print >> self.log, "<Fcalc> Map Stats :"
      fcalc_ave_map_stats.show_summary(f = self.log)
      offset = fcalc_ave_map_stats.min()
      model_neg_ll = []

      number_previous_scatters = 0

      # Run through structure list again and get probability
      xrs_list = []
      for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s):
        if self.params.verbose:
          print >> self.log, '\n\nModel                   : ', model+1
        # Get model atom sigmas vs Fcalc
        fcalc_map = model_map_coeffs[model].fft_map()
        fcalc_map.apply_volume_scaling()
        fcalc_map_data  = fcalc_map.real_map_unpadded()
        fcalc_map_stats  = maptbx.statistics(fcalc_map_data)
        if self.params.verbose:
          print >> self.log, "Fcalc map stats         :"
        fcalc_map_stats.show_summary(f = self.log)

        xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy,
                            ens_pdb_xrs       = ens_pdb_xrs,
                            fft_map_1         = fcalc_map,
                            model_i           = model,
                            residue_detail    = self.params.residue_detail,
                            ignore_hd         = self.params.ignore_hd,
                            number_previous_scatters = number_previous_scatters,
                            log               = self.log)
        fcalc_sigmas = xrs.scatterers().extract_occupancies()
        del fcalc_map
        # Get model atom sigmas vs <Fcalc>
        xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy,
                            ens_pdb_xrs       = ens_pdb_xrs,
                            fft_map_1         = fcalc_ave_map_coeffs,
                            model_i           = model,
                            residue_detail    = self.params.residue_detail,
                            ignore_hd         = self.params.ignore_hd,
                            number_previous_scatters = number_previous_scatters,
                            log               = self.log)

        ### For testing other residue averaging options
        #print xrs.residue_selections

        fcalc_ave_sigmas = xrs.scatterers().extract_occupancies()
        # Probability of model given <model>
        prob = fcalc_ave_sigmas / fcalc_sigmas
        # XXX debug option
        if False:
          for n,p in enumerate(prob):
            print >> self.log, ' {0:5d} {1:5.3f}'.format(n,p)
        # Set probabilty between 0 and 1
        # XXX Make Histogram / more stats
        prob_lss_zero = flex.bool(prob <= 0)
        prob_grt_one = flex.bool(prob > 1)
        prob.set_selected(prob_lss_zero, 0.001)
        prob.set_selected(prob_grt_one, 1.0)
        xrs.set_occupancies(prob)
        xrs_list.append(xrs)
        sum_neg_ll = sum(-flex.log(prob))
        model_neg_ll.append((sum_neg_ll, model))
        if self.params.verbose:
          print >> self.log, 'Model probability stats :'
          print >> self.log, prob.min_max_mean().show()
          print >> self.log, '  Count < 0.0 : ', prob_lss_zero.count(True)
          print >> self.log, '  Count > 1.0 : ', prob_grt_one.count(True)

        # For averaging by residue
        number_previous_scatters += ens_pdb_xrs.sites_cart().size()

      # write ensemble pdb file, occupancies as sigma level
      write_ensemble_pdb(filename = pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.pdb',
                       xrs_list = xrs_list,
                       ens_pdb_hierarchy = ens_pdb_hierarchy
                       )

      # XXX Test ordering models by nll
      # XXX Test removing nth percentile atoms
      if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll:
        for percentile in [1.0,0.975,0.95,0.9,0.8,0.6,0.2]:
          model_neg_ll = sorted(model_neg_ll)
          f_calc_ave_total_reordered = None
          print_list = []
          for i_neg_ll in model_neg_ll:
            xrs = xrs_list[i_neg_ll[1]]
            nll_occ = xrs.scatterers().extract_occupancies()

            # Set q=0 nth percentile atoms
            sorted_nll_occ = sorted(nll_occ, reverse=True)
            number_atoms = len(sorted_nll_occ)
            percentile_prob_cutoff = sorted_nll_occ[int(number_atoms * percentile)-1]
            cutoff_selections = flex.bool(nll_occ < percentile_prob_cutoff)
            cutoff_nll_occ = flex.double(nll_occ.size(), 1.0).set_selected(cutoff_selections, 0.0)
            #XXX Debug
            if False:
              print '\nDebug'
              for x in xrange(len(cutoff_selections)):
                print cutoff_selections[x], nll_occ[x], cutoff_nll_occ[x]
              print percentile
              print percentile_prob_cutoff
              print cutoff_selections.count(True)
              print cutoff_selections.size()
              print cutoff_nll_occ.count(0.0)
              print 'Count q = 1           : ', cutoff_nll_occ.count(1.0)
              print 'Count scatterers size : ', cutoff_nll_occ.size()

            xrs.set_occupancies(cutoff_nll_occ)
            fmodel.update_xray_structure(xray_structure  = xrs,
                                         update_f_calc   = True,
                                         update_f_mask   = True)

            if f_calc_ave_total_reordered == None:
              f_calc_ave_total_reordered = fmodel.f_calc().data().deep_copy()
              f_mask_ave_total_reordered = fmodel.f_masks()[0].data().deep_copy()
              cntr = 1
            else:
              f_calc_ave_total_reordered += fmodel.f_calc().data().deep_copy()
              f_mask_ave_total_reordered += fmodel.f_masks()[0].data().deep_copy()
              cntr+=1
            fmodel.update(f_calc = f_calc_ave.array(f_calc_ave_total_reordered / cntr).deep_copy(),
                          f_mask = f_calc_ave.array(f_mask_ave_total_reordered / cntr).deep_copy()
                          )

            # Update solvent and scale
            # XXX Will need to apply_back_trace on latest version
            fmodel.set_scale_switch = 0
            fmodel.update_all_scales()

            # Reset occ for outout
            xrs.set_occupancies(nll_occ)
            # k1 updated vs Fobs
            if self.params.fobs_vs_fcalc_post_nll:
              print_list.append([cntr, i_neg_ll[0], i_neg_ll[1], fmodel.r_work(), fmodel.r_free()])

          # Order models by nll and print summary
          print >> self.log, '\nModels ranked by nll <Fcalc> R-factors recalculated'
          print >> self.log, 'Percentile cutoff : {0:5.3f}'.format(percentile)
          xrs_list_sorted_nll = []
          print >> self.log, '      |      NLL     <Rw>     <Rf>    Ens Model'
          for info in print_list:
            print >> self.log, ' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'.format(
              info[0],
              info[1],
              info[3],
              info[4],
              info[2]+1,
              )
            xrs_list_sorted_nll.append(xrs_list[info[2]])

        # Output nll ordered ensemble

        write_ensemble_pdb(filename = 'nll_ordered_' + pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.pdb',
                       xrs_list = xrs_list_sorted_nll,
                       ens_pdb_hierarchy = ens_pdb_hierarchy
                       )