Esempio n. 1
0
def run(args, command_name="distl.sweep_strength"):
    help_str = """\
Similar to distl.signal_strength, but acting on a sweep of images, with
tabulation of the results and optional output of results as CSV file and
plots of number of spots and resolution with image number.
"""

    if (len(args) == 0
            or args[0] in ["H", "h", "-H", "-h", "help", "--help", "-help"]):
        print "usage:   %s image_prefix_*.img [parameter=value ...]" % command_name
        print "example: %s lysozyme_*.img distl.minimum_spot_area=8 plot.file_name=lysozyme.pdf" % command_name
        master_params.show(attributes_level=1, expert_level=1)
        print help_str
        return

    print "%s: characterization of candidate Bragg spots" % command_name

    phil_objects = []
    argument_interpreter = master_params.command_line_argument_interpreter(
        home_scope="distl")
    image_file_names = []
    moving_pdb_file_name = None

    for arg in args:
        if (os.path.isfile(arg)):
            image_file_names.append(arg)
        else:
            try:
                command_line_params = argument_interpreter.process(arg=arg)
            except KeyboardInterrupt:
                raise
            except Exception:
                raise Sorry("Unknown file or keyword: %s" % arg)
            else:
                phil_objects.append(command_line_params)

    if len(image_file_names) < 2:
        raise RuntimeError(
            "Please provide more than one file. Alternatively use "
            "distl.signal_strength to process a single image file.")

    working_params = master_params.fetch(sources=phil_objects)
    params = working_params.extract()

    if params.distl.verbosity > 0:
        print "#Parameters used:"
        print "#phil __ON__"
        print
        working_params = master_params.format(python_object=params)
        working_params.show(expert_level=1)
        print
        print "#phil __OFF__"
        print

    from spotfinder.applications import signal_strength

    spotfinder_results = run_sweep_strength(image_file_names, params)
    print_table(spotfinder_results.S, keys=["N_spots_inlier", "resolution"])

    csv_file_name = params.distl.csv
    if csv_file_name is not None:
        with open(csv_file_name, 'wb') as f:
            as_csv(spotfinder_results.S, out=f)
    plot_file_name = params.distl.plot.file_name
    if plot_file_name is not None:
        plot(spotfinder_results.S, file_name=plot_file_name)
Esempio n. 2
0
def run(args):
    phil = iotbx.phil.process_command_line(args=args,
                                           master_string=master_phil).show()
    usage = \
    """ %s input.experiment=experimentname input.run_num=N input.address=address
  """%libtbx.env.dispatcher_name

    params = phil.work.extract()
    if not os.path.exists(params.output.output_dir):
        raise Sorry("Output path not found:" + params.output.output_dir)

    if params.input.experiment is None or \
    params.input.run_num is None or \
    params.input.address is None:
        raise Usage(usage)
    # set up psana
    if params.dispatch.events_accepted or params.dispatch.events_rejected:
        assert params.input.cfg is not None
        setConfigFile(params.input.cfg)

    dataset_name = "exp=%s:run=%s:idx" % (params.input.experiment,
                                          params.input.run_num)
    ds = DataSource(dataset_name)
    src = Source('DetInfo(%s)' % params.input.address)
    # set up multiprocessing with MPI
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()  # each process in MPI has a unique id, 0-indexed
    size = comm.Get_size()  # size: number of processes running in this job

    if params.dispatch.max_events is None:
        max_events = sys.maxsize
    else:
        max_events = params.dispatch.max_events
    if params.input.dark is not None:
        dark = easy_pickle.load('%s' % params.input.dark)
    for run in ds.runs():
        times = run.times()
        if (params.dispatch.events_begin is None
                and params.dispatch.events_end is None):
            times = times[:]
        elif (params.dispatch.events_begin is not None
              and params.dispatch.events_end is None):
            times = times[params.dispatch.events_begin:]
        elif (params.dispatch.events_begin is None
              and params.dispatch.events_end is not None):
            times = times[:params.dispatch.events_end]
        elif (params.dispatch.events_begin is not None
              and params.dispatch.events_end is not None):
            times = times[params.dispatch.events_begin:params.dispatch.
                          events_end]
        nevents = min(len(times), max_events)
        # chop the list into pieces, depending on rank.  This assigns each process
        # events such that the get every Nth event where N is the number of processes
        mytimes = [times[i] for i in range(nevents) if (i + rank) % size == 0]
        print(len(mytimes))
        #mytimes = mytimes[len(mytimes)-1000:len(mytimes)]
        totals = np.array([0.0])
        print("initial totals", totals)

        for i, t in enumerate(mytimes):
            print("Event", i, "of", len(mytimes), end=' ')
            evt = run.event(t)
            if params.dispatch.events_accepted or params.dispatch.events_all:
                if evt.get("skip_event") == True:
                    continue
            elif params.dispatch.events_rejected:
                if evt.get("skip_event") == False:
                    continue
            try:
                data = evt.get(Camera.FrameV1, src)
            except ValueError as e:
                src = Source('BldInfo(%s)' % params.input.address)
                data = evt.get(Bld.BldDataSpectrometerV1, src)
            if data is None:
                print("No data")
                continue
            #set default to determine FEE data type
            two_D = False
            #check attribute of data for type
            try:
                data = np.array(data.data16().astype(np.int32))
                two_D = True
            except AttributeError as e:
                data = np.array(data.hproj().astype(np.float64))

            if two_D:
                if 'dark' in locals():
                    data = data - dark
                one_D_data = np.sum(data, 0) / data.shape[0]
                two_D_data = np.double(data)
            else:
                #used to fix underflow problem that was present in earlier release of psana and pressent for LH80
                for i in range(len(data)):
                    if data[i] > 1000000000:
                        data[i] = data[i] - (2**32)
                if 'dark' in locals():
                    data = data - dark
                one_D_data = data

            totals[0] += 1
            print("total good:", totals[0])

            if not 'fee_one_D' in locals():
                fee_one_D = one_D_data
            else:
                fee_one_D += one_D_data
            if ('two_D_data' in locals() and not 'fee_two_D' in locals()):
                fee_two_D = two_D_data
            elif 'fee_two_D' in locals():
                fee_two_D += two_D_data

        acceptedtotals = np.zeros(totals.shape)
        acceptedfee1 = np.zeros((fee_one_D.shape))
        if 'fee_two_D' in locals():
            acceptedfee2 = np.zeros((fee_two_D.shape))
        print("Synchronizing rank", rank)
    comm.Reduce(fee_one_D, acceptedfee1)
    comm.Reduce(totals, acceptedtotals)
    if 'acceptedfee2' in locals():
        comm.Reduce(fee_two_D, acceptedfee2)
    print("number averaged", acceptedtotals[0])
    if rank == 0:
        if acceptedtotals[0] > 0:
            acceptedfee1 /= acceptedtotals[0]
            if 'acceptedfee2' in locals():
                acceptedfee2 /= acceptedtotals[0]

        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        from pylab import savefig, close
        from matplotlib.backends.backend_pdf import PdfPages
        import matplotlib.pyplot as plt
        from matplotlib import cm

        if params.dispatch.events_accepted:
            easy_pickle.dump(
                os.path.join(
                    params.output.output_dir, "fee_avg_1_D_" +
                    'r%s' % params.input.run_num + "_accepted.pickle"),
                acceptedfee1)
            pp1 = PdfPages(
                os.path.join(
                    params.output.output_dir, "fee_avg_1_D_" +
                    'r%s' % params.input.run_num + "_accepted.pdf"))
            if 'acceptedfee2' in locals():
                easy_pickle.dump(
                    os.path.join(
                        params.output.output_dir, "fee_avg_2_D_" +
                        'r%s' % params.input.run_num + "_accepted.pickle"),
                    acceptedfee2)
                pp2 = PdfPages(
                    os.path.join(
                        params.output.output_dir, "fee_avg_2_D_" +
                        'r%s' % params.input.run_num + "_accepted.pdf"))
        if params.dispatch.events_all:
            easy_pickle.dump(
                os.path.join(
                    params.output.output_dir, "fee_avg_1_D_" +
                    'r%s' % params.input.run_num + "_all.pickle"),
                acceptedfee1)
            pp1 = PdfPages(
                os.path.join(
                    params.output.output_dir, "fee_avg_1_D_" +
                    'r%s' % params.input.run_num + "_all.pdf"))
            if 'acceptedfee2' in locals():
                easy_pickle.dump(
                    os.path.join(
                        params.output.output_dir, "fee_avg_2_D_" +
                        'r%s' % params.input.run_num + "_all.pickle"),
                    acceptedfee2)
                pp2 = PdfPages(
                    os.path.join(
                        params.output.output_dir, "fee_avg_2_D_" +
                        'r%s' % params.input.run_num + "_all.pdf"))
        if params.dispatch.events_rejected:
            easy_pickle.dump(
                os.path.join(
                    params.output.output_dir, "fee_avg_1_D_" +
                    'r%s' % params.input.run_num + "_rejected.pickle"),
                acceptedfee1)
            pp1 = PdfPages(
                os.path.join(
                    params.output.output_dir, "fee_avg_1_D_" +
                    'r%s' % params.input.run_num + "_rejected.pdf"))
            if 'acceptedfee2' in locals():
                easy_pickle.dump(
                    os.path.join(
                        params.output.output_dir, "fee_avg_2_D_" +
                        'r%s' % params.input.run_num + "_rejected.pickle"),
                    acceptedfee2)
                pp2 = PdfPages(
                    os.path.join(
                        params.output.output_dir, "fee_avg_2_D_" +
                        'r%s' % params.input.run_num + "_rejected.pdf"))
        print("Done")
        #plotting result
        # matplotlib needs a different backend when run on the cluster nodes at SLAC
        # these two lines not needed when working interactively at SLAC, or on mac or on viper

        if params.input.pixel_to_eV.energy_per_px is not None:
            xvals = (
                np.array(range(acceptedfee1.shape[0])) -
                params.input.pixel_to_eV.x_coord_one
            ) * params.input.pixel_to_eV.energy_per_px + params.input.pixel_to_eV.y_coord_one
            xvals = xvals[::-1]

        if params.input.pixel_to_eV.x_coord_two is not None:
            eV_per_px = (params.input.pixel_to_eV.y_coord_two -
                         params.input.pixel_to_eV.y_coord_one) / (
                             params.input.pixel_to_eV.x_coord_two -
                             params.input.pixel_to_eV.x_coord_one)
            xvals = (np.array(range(acceptedfee1.shape[0])) -
                     params.input.pixel_to_eV.x_coord_one
                     ) * eV_per_px + params.input.pixel_to_eV.y_coord_one
            xvals = xvals[::-1]

        if params.input.pixel_to_eV.x_coord_two is None and params.input.pixel_to_eV.energy_per_px is None:
            xvals = np.arange(0, len(acceptedfee1), 1)

        yvals = acceptedfee1

        def OneD_plot(X, Y):
            plt.figure()
            plt.clf()
            plt.plot(X, Y)
            if params.dispatch.events_accepted:
                plt.title('Accepted Shots FEE Spectrum Run %s' %
                          params.input.run_num)
            elif params.dispatch.events_all:
                plt.title('All Shots FEE Spectrum Run %s' %
                          params.input.run_num)
            elif params.dispatch.events_rejected:
                plt.title('Rejected Shots FEE Spectrum Run %s' %
                          params.input.run_num)
            if params.input.pixel_to_eV.x_coord_one is not None:
                plt.xlabel('eV', fontsize=13)
            else:
                plt.xlabel('pixels', fontsize=13)
            plt.ylabel('pixels', fontsize=13)
            pp1.savefig()

        def TwoD_plot(data):
            plt.figure()
            ax = plt.gca()
            #  use specified range 0, 50 to plot runs 117 - 201
            #min=0, vmax=50
            cax = ax.imshow(data,
                            interpolation='nearest',
                            origin='lower',
                            cmap=cm.coolwarm)
            plt.colorbar(cax, fraction=0.014, pad=0.04)
            if params.dispatch.events_accepted:
                ax.set_title('Accepted 2-D FEE Spectrum Run %s' %
                             params.input.run_num)
            elif params.dispatch.events_all:
                ax.set_title('All 2-D FEE Spectrum Run %s' %
                             params.input.run_num)
            elif params.dispatch.events_rejected:
                ax.set_title('Rejected 2-D FEE Spectrum Run %s' %
                             params.input.run_num)
                pp2.savefig()

        OneD_plot(xvals, yvals)
        pp1.close()
        if 'acceptedfee2' in locals():
            TwoD_plot(acceptedfee2)
            pp2.close()
Esempio n. 3
0
def extract(file_name,
            crystal_symmetry,
            wavelength_id,
            crystal_id,
            show_details_if_error,
            output_r_free_label,
            merge_non_unique_under_symmetry,
            map_to_asu,
            remove_systematic_absences,
            all_miller_arrays=None,
            incompatible_flags_to_work_set=False,
            ignore_bad_sigmas=False,
            extend_flags=False,
            return_as_miller_arrays=False,
            log=sys.stdout):
    import iotbx.cif
    from cctbx import miller
    if all_miller_arrays is None:
        base_array_info = miller.array_info(
            crystal_symmetry_from_file=crystal_symmetry)
        all_miller_arrays = iotbx.cif.reader(
            file_path=file_name).build_miller_arrays(
                base_array_info=base_array_info)
    if (len(all_miller_arrays) == 0):
        raise Sorry(
            "No data arrays were found in this CIF file.  Please make " +
            "sure that the file contains reflection data, rather than the refined "
            + "model.")
    column_labels = set()
    if (extend_flags):
        map_to_asu = True
    # TODO: is all_mille_arrays a dict ? If not change back
    for (data_name, miller_arrays) in six.iteritems(all_miller_arrays):
        for ma in miller_arrays.values():
            other_symmetry = crystal_symmetry
            try:
                crystal_symmetry = other_symmetry.join_symmetry(
                    other_symmetry=ma.crystal_symmetry(), force=True)
            except AssertionError as e:
                str_e = str(e)
                from six.moves import cStringIO as StringIO
                s = StringIO()
                if "Space group is incompatible with unit cell parameters." in str_e:
                    other_symmetry.show_summary(f=s)
                    ma.crystal_symmetry().show_summary(f=s)
                    str_e += "\n%s" % (s.getvalue())
                    raise Sorry(str_e)
                else:
                    raise
    if (crystal_symmetry.unit_cell() is None
            or crystal_symmetry.space_group_info() is None):
        raise Sorry(
            "Crystal symmetry is not defined. Please use the --symmetry option."
        )
    mtz_object = iotbx.mtz.object() \
      .set_title(title="phenix.cif_as_mtz") \
      .set_space_group_info(space_group_info=crystal_symmetry.space_group_info())
    unit_cell = crystal_symmetry.unit_cell()
    mtz_crystals = {}
    mtz_object.set_hkl_base(unit_cell=unit_cell)
    from iotbx.reflection_file_utils import cif_status_flags_as_int_r_free_flags
    # generate list of all reflections (for checking R-free flags)
    from iotbx.reflection_file_utils import make_joined_set
    all_arrays = []
    for (data_name, miller_arrays) in six.iteritems(all_miller_arrays):
        for ma in miller_arrays.values():
            all_arrays.append(ma)
    complete_set = make_joined_set(all_arrays)
    if return_as_miller_arrays:
        miller_array_list = []
    for i, (data_name,
            miller_arrays) in enumerate(six.iteritems(all_miller_arrays)):
        for ma in miller_arrays.values():
            ma = ma.customized_copy(
                crystal_symmetry=crystal_symmetry).set_info(ma.info())
            labels = ma.info().labels
            label = get_label(miller_array=ma,
                              output_r_free_label=output_r_free_label)
            if label is None:
                print("Can't determine output label for %s - skipping." % \
                  ma.info().label_string(), file=log)
                continue
            elif label.startswith(output_r_free_label):
                ma, _ = cif_status_flags_as_int_r_free_flags(
                    ma, test_flag_value="f")
                if isinstance(ma.data(), flex.double):
                    data_int = ma.data().iround()
                    assert data_int.as_double().all_eq(ma.data())
                    ma = ma.customized_copy(data=data_int).set_info(ma.info())
            elif (
                (ma.is_xray_amplitude_array() or ma.is_xray_intensity_array())
                    and isinstance(ma.data(), flex.int)):
                ma = ma.customized_copy(data=ma.data().as_double()).set_info(
                    ma.info())
            crys_id = 0
            for l in labels:
                if 'crystal_id' in l:
                    crys_id = int(l.split('=')[-1])
                    break
            if crys_id > 0 and crystal_id is None:
                label += "%i" % crys_id
            if crystal_id is not None and crys_id > 0 and crys_id != crystal_id:
                continue
            if crys_id not in mtz_crystals:
                mtz_crystals[crys_id] = (mtz_object.add_crystal(
                    name="crystal_%i" % crys_id,
                    project_name="project",
                    unit_cell=unit_cell), {})
            crystal, datasets = mtz_crystals[crys_id]
            w_id = 0
            for l in labels:
                if 'wavelength_id' in l:
                    w_id = int(l.split('=')[-1])
                    break
            if wavelength_id is not None and w_id > 0 and w_id != wavelength_id:
                continue
            if w_id > 1 and wavelength_id is None:
                if (label in column_labels):
                    label += "%i" % w_id
                #print "label is", label
            if w_id not in datasets:
                wavelength = ma.info().wavelength
                if (wavelength is None):
                    wavelength = 0
                datasets[w_id] = crystal.add_dataset(name="dataset",
                                                     wavelength=wavelength)
            dataset = datasets[w_id]
            # if all sigmas for an array are set to zero either raise an error, or set sigmas to None
            if ma.sigmas() is not None and (ma.sigmas()
                                            == 0).count(False) == 0:
                if ignore_bad_sigmas:
                    print("Warning: bad sigmas, setting sigmas to None.",
                          file=log)
                    ma.set_sigmas(None)
                else:
                    raise Sorry("""Bad sigmas: all sigmas are equal to zero.
  Add --ignore_bad_sigmas to command arguments to leave out sigmas from mtz file."""
                                )
            if not ma.is_unique_set_under_symmetry():
                if merge_non_unique_under_symmetry:
                    print("Warning: merging non-unique data", file=log)
                    if (label.startswith(output_r_free_label)
                            and incompatible_flags_to_work_set):
                        merging = ma.merge_equivalents(
                            incompatible_flags_replacement=0)
                        if merging.n_incompatible_flags > 0:
                            print("Warning: %i reflections were placed in the working set " \
                                  "because of incompatible flags between equivalents." %(
                                    merging.n_incompatible_flags), file=log)
                    else:
                        try:
                            merging = ma.merge_equivalents()
                        except Sorry as e:
                            if ("merge_equivalents_exact: incompatible"
                                    in str(e)):
                                raise Sorry(
                                    str(e) + " for %s" % ma.info().labels[-1] +
                                    "\n" +
                                    "Add --incompatible_flags_to_work_set to command line "
                                    "arguments to place incompatible flags to working set."
                                )
                                raise
                    ma = merging.array().customized_copy(
                        crystal_symmetry=ma).set_info(ma.info())
                elif return_as_miller_arrays:  # allow non-unique set
                    pass
                else:
                    n_all = ma.indices().size()
                    sel_unique = ma.unique_under_symmetry_selection()
                    sel_dup = ~flex.bool(n_all, sel_unique)
                    n_duplicate = sel_dup.count(True)
                    n_uus = sel_unique.size()
                    msg = (
                      "Miller indices not unique under symmetry: " + file_name + \
                      "(%d redundant indices out of %d)" % (n_all-n_uus, n_all) +
                      "Add --merge to command arguments to force merging data.")
                    if (show_details_if_error):
                        print(msg)
                        ma.show_comprehensive_summary(prefix="  ")
                        ma.map_to_asu().sort().show_array(prefix="  ")
                    raise Sorry(msg)
            if (map_to_asu):
                ma = ma.map_to_asu().set_info(ma.info())
            if (remove_systematic_absences):
                ma = ma.remove_systematic_absences()
            if (label.startswith(output_r_free_label)
                    and complete_set is not None):
                n_missing = len(complete_set.lone_set(other=ma).indices())
                if (n_missing > 0):
                    if (extend_flags):
                        from cctbx import r_free_utils
                        # determine flag values
                        fvals = list(set(ma.data()))
                        print("fvals", fvals)
                        fval = None
                        if (len(fvals) == 1):
                            fval = fvals[0]
                        elif (len(fvals) == 2):
                            f1 = (ma.data()
                                  == fvals[0]).count(True) / ma.data().size()
                            f2 = (ma.data()
                                  == fvals[1]).count(True) / ma.data().size()
                            if (f1 < f2): fval = fvals[0]
                            else: fval = fvals[1]
                        elif (len(fvals) == 0):
                            fval = None
                        else:
                            fval = 0
                            if (not fval in fvals):
                                raise Sorry(
                                    "Cannot determine free-R flag value.")
                        #
                        if (fval is not None):
                            ma = r_free_utils.extend_flags(
                                r_free_flags=ma,
                                test_flag_value=fval,
                                array_label=label,
                                complete_set=complete_set,
                                preserve_input_values=True,
                                allow_uniform_flags=True,
                                log=sys.stdout)
                        else:
                            ma = None
                    else:
                        libtbx.warn((
                            "%d reflections do not have R-free flags in the " +
                            "array '%s' - this may " +
                            "cause problems if you try to use the MTZ file for refinement "
                            +
                            "or map calculation.  We recommend that you extend the flags "
                            +
                            "to cover all reflections (--extend_flags on the command line)."
                        ) % (n_missing, label))
            # Get rid of fake (0,0,0) reflection in some CIFs
            if (ma is not None):
                ma = ma.select_indices(indices=flex.miller_index(
                    ((0, 0, 0), )),
                                       negate=True).set_info(ma.info())

            if return_as_miller_arrays:
                miller_array_list.append(ma)
                continue  # don't make a dataset

            dec = None
            if ("FWT" in label):
                dec = iotbx.mtz.ccp4_label_decorator()
            column_types = None
            if ("PHI" in label or "PHWT" in label) and (ma.is_real_array()):
                column_types = "P"
            elif (label.startswith("DANO") and ma.is_real_array()):
                if (ma.sigmas() is not None):
                    column_types = "DQ"
                else:
                    column_types = "D"
            label_base = label
            i = 1
            while label in column_labels:
                label = label_base + "-%i" % (i)
                i += 1
            if (ma is not None):
                column_labels.add(label)
                dataset.add_miller_array(ma,
                                         column_root_label=label,
                                         label_decorator=dec,
                                         column_types=column_types)
    if return_as_miller_arrays:
        return miller_array_list
    else:
        return mtz_object
def run(args):
  # processing command-line stuff, out of the object
  log = multi_out()
  log.register("stdout", sys.stdout)
  if len(args) == 0:
    format_usage_message(log)
    return
  input_objects = process_command_line_with_files(
      args=args,
      master_phil=master_params(),
      pdb_file_def="model_file_name",
      map_file_def="map_file_name",
      reflection_file_def="hkl_file_name",
      cif_file_def="ligands_file_name")
  work_params = input_objects.work.extract()
  if [work_params.map_file_name, work_params.hkl_file_name].count(None) < 1:
    raise Sorry("Only one source of map could be supplied.")
  input_objects.work.show(prefix=" ", out=log)
  if len(work_params.model_file_name) == 0:
    raise Sorry("No PDB file specified")
  if work_params.output_prefix is None:
    work_params.output_prefix = os.path.basename(work_params.model_file_name[0])
  log_file_name = "%s.log" % work_params.output_prefix
  logfile = open(log_file_name, "w")
  log.register("logfile", logfile)
  err_log = multi_out()
  err_log.register(label="log", file_object=log)
  # err_log.register(label="stderr", file_object=sys.stderr)
  sys.stderr = err_log

  if work_params.loop_idealization.output_prefix is None:
    work_params.loop_idealization.output_prefix = "%s_rama_fixed" % work_params.output_prefix

  # Here we start opening files provided,
  # collect crystal symmetries
  pdb_combined = iotbx.pdb.combine_unique_pdb_files(file_names=work_params.model_file_name)
  pdb_input = iotbx.pdb.input(source_info=None,
    lines=flex.std_string(pdb_combined.raw_records))
  pdb_cs = pdb_input.crystal_symmetry()
  crystal_symmetry = None
  map_cs = None
  map_content = input_objects.get_file(work_params.map_file_name)
  if map_content is not None:
    try:
      map_cs = map_content.crystal_symmetry()
    except NotImplementedError as e:
      pass

  try:
    crystal_symmetry = crystal.select_crystal_symmetry(
        from_command_line     = None,
        from_parameter_file   = None,
        from_coordinate_files = [pdb_cs],
        from_reflection_files = [map_cs],
        enforce_similarity    = True)
  except AssertionError as e:
    if len(e.args)>0 and e.args[0].startswith("No unit cell and symmetry information supplied"):
      pass
    else:
      raise e


  model = mmtbx.model.manager(
      model_input = pdb_input,
      restraint_objects = input_objects.cif_objects,
      crystal_symmetry = crystal_symmetry,
      process_input = False,
      log=log)

  map_data = None
  shift_manager = None

  if map_content is not None:
    map_data, map_cs, shift_manager = get_map_from_map(
        map_content,
        work_params,
        xrs=model.get_xray_structure(),
        log=log)
    model.set_shift_manager(shift_manager)
    # model.get_hierarchy().write_pdb_file("junk_shift.pdb")

  hkl_content = input_objects.get_file(work_params.hkl_file_name)
  if hkl_content is not None:
    map_data, map_cs = get_map_from_hkl(
        hkl_content,
        work_params,
        xrs=model.get_xray_structure(), # here we don't care about atom order
        log=log)

  mi_object = model_idealization(
      model = model,
      map_data = map_data,
      params=work_params,
      log=log,
      verbose=False)
  mi_object.run()
  mi_object.print_stat_comparison()
  print >> log, "RMSD from starting model (backbone, all): %.4f, %.4f" % (
      mi_object.get_rmsd_from_start(), mi_object.get_rmsd_from_start2())
  mi_object.print_runtime()
  # add hydrogens if needed ?
  print >> log, "All done."
  log.close()
Esempio n. 5
0
    def __init__(self,
                 map_manager,
                 mask_as_map_manager,
                 model=None,
                 box_cushion=3,
                 wrapping=None,
                 model_can_be_outside_bounds=False,
                 log=sys.stdout):

        self._map_manager = map_manager
        self._model = model
        self.model_can_be_outside_bounds = model_can_be_outside_bounds
        assert map_manager.shift_cart() == mask_as_map_manager.shift_cart()

        # safeguards
        assert isinstance(map_manager, iotbx.map_manager.map_manager)
        assert isinstance(mask_as_map_manager, iotbx.map_manager.map_manager)
        assert self._map_manager.map_data().accessor().origin() == (0, 0, 0)
        assert map_manager.is_similar(mask_as_map_manager)
        if self.map_manager().wrapping():
            assert map_manager.unit_cell_grid == map_manager.map_data().all()

        self._force_wrapping = wrapping
        if wrapping is None:
            wrapping = self.map_manager().wrapping()
        self.basis_for_boxing_string = 'around_mask bounds, wrapping = %s' % (
            wrapping)

        # Make sure the map goes from 0 to 1
        map_data = mask_as_map_manager.map_data()
        mmm = map_data.as_1d().min_max_mean()
        minimum = mmm.min
        range_of_values = mmm.max - mmm.min
        map_data = (map_data - minimum) / max(1.e-10, range_of_values)

        # Get a connectivity object that marks all the connected regions in map

        from cctbx.maptbx.segment_and_split_map import get_co
        co, sorted_by_volume, min_b, max_b = get_co(map_data=map_data,
                                                    threshold=0.5,
                                                    wrapping=False)

        if len(sorted_by_volume) < 2:  # didn't work
            raise Sorry("No mask obtained...")

        # Get the biggest connected region in the map

        original_id_from_id = {}
        for i in range(1, len(sorted_by_volume)):
            v, id = sorted_by_volume[i]
            original_id_from_id[i] = id
        id = 1
        orig_id = original_id_from_id[id]

        # Get lower and upper bounds of this region in grid units

        self.gridding_first = min_b[orig_id]
        self.gridding_last = max_b[orig_id]

        # Increase range of bounds by box_cushion
        cs = map_manager.crystal_symmetry()
        cushion = flex.double(cs.unit_cell().fractionalize(
            (box_cushion, ) * 3))
        all_orig = map_manager.map_data().all()
        self.gridding_first = [
            max(0, ifloor(gf - c * n))
            for c, gf, n in zip(cushion, self.gridding_first, all_orig)
        ]
        self.gridding_last = [
            min(n - 1, iceil(gl + c * n))
            for c, gl, n in zip(cushion, self.gridding_last, all_orig)
        ]

        # Ready with gridding...set up shifts and box crystal_symmetry
        self.set_shifts_and_crystal_symmetry()

        self.apply_to_model_ncs_and_map()

        # Also apply to mask_as_map_manager so that mask_as_map_manager is boxed
        mask_as_map_manager = self.apply_to_map(mask_as_map_manager)
        self.mask_as_map_manager = mask_as_map_manager  # save it
Esempio n. 6
0
phil_scope = parse(phil_str)

user_phil = []
root_dirs = []
indexing_phil = None
for arg in sys.argv[1:]:
    if os.path.isdir(arg):
        root_dirs.append(arg)
    elif os.path.isfile(arg):
        assert indexing_phil is None
        indexing_phil = arg
    else:
        try:
            user_phil.append(parse(arg))
        except Exception:
            raise Sorry("Couldn't parse argument %s" % arg)

params = phil_scope.fetch(sources=user_phil).extract()

print("Finding files")

images = []
strongs = []

for root in root_dirs:
    for filename in os.listdir(root):
        if os.path.splitext(filename)[1] != params.image_extension:
            continue
        filepath = os.path.join(root, filename)
        strong_filepath = os.path.join(
            root,
Esempio n. 7
0
def run(args, return_list_of_tests=None):
    if (len(args) == 0):
        raise Usage(
            """libtbx.run_tests_parallel [module=NAME] [directory=path]""")
    user_phil = []
    for arg in args:
        if os.path.isdir(arg):
            user_phil.append(libtbx.phil.parse("directory=%s" % arg))
        else:
            try:
                arg_phil = libtbx.phil.parse(arg)
            except RuntimeError:
                raise Sorry("Unrecognized argument '%s'" % arg)
            else:
                user_phil.append(arg_phil)

    params = master_phil.fetch(sources=user_phil).extract()

    if params.run_in_tmp_dir:
        from libtbx.test_utils import open_tmp_directory
        run_dir = open_tmp_directory()
        print('Running tests in %s' % run_dir)
        os.chdir(run_dir)
    elif return_list_of_tests:
        pass  # don't need to check anything
    else:
        cwd = os.getcwd()
        cwd_files = os.listdir(cwd)
        if (len(cwd_files) > 0):
            raise Sorry("Please run this program in an empty directory.")
    if (len(params.directory) == 0) and (len(params.module) == 0):
        raise Sorry("Please specify modules and/or directories to test.")
    all_tests = []
    all_tests.extend(libtbx.test_utils.parallel.make_commands(params.script))
    for dir_name in params.directory:
        if os.path.split(dir_name)[-1].find("cctbx_project") > -1:
            print('DANGER ' * 10)
            print(
                'Using the directory option in cctbx_project can be very time consuming'
            )
            print('DANGER ' * 10)
        dir_tests = libtbx.test_utils.parallel.find_tests(dir_name)
        all_tests.extend(libtbx.test_utils.parallel.make_commands(dir_tests))
    for module_name in params.module:
        module_tests = libtbx.test_utils.parallel.get_module_tests(module_name)
        all_tests.extend(module_tests)

    if return_list_of_tests:
        return all_tests

    if (len(all_tests) == 0):
        raise Sorry("No test scripts found in %s." % params.directory)
    if (params.shuffle):
        random.shuffle(all_tests)
    if (params.quiet):
        params.verbosity = 0
    with open("run_tests_parallel_zlog", "w") as log:
        result = libtbx.test_utils.parallel.run_command_list(
            cmd_list=all_tests,
            nprocs=params.nproc,
            log=log,
            verbosity=params.verbosity,
            max_time=params.max_time)
    print("\nSee run_tests_parallel_zlog for full output.\n")
    if (result.failure > 0):
        print("")
        print("*" * 80)
        print("ERROR: %d TEST FAILURES.  PLEASE FIX BEFORE COMMITTING CODE." % \
          result.failure)
        print("*" * 80)
        print("")
    return result.failure
def run(args,
        out=None,
        master_params=None,
        assume_shelx_observation_type_is="intensities"):
    if (out is None): out = sys.stdout
    import iotbx.phil
    if (master_params is None):
        master_params = iotbx.phil.parse(master_phil, process_includes=True)
    cmdline = cmdline_processor(args=args,
                                master_phil=master_params,
                                reflection_file_def="file_name",
                                pdb_file_def="symmetry_file",
                                space_group_def="space_group",
                                unit_cell_def="unit_cell",
                                usage_string="""\
phenix.merging_statistics [data_file] [options...]

Calculate merging statistics for non-unique data, including R-merge, R-meas,
R-pim, and redundancy.  Any format supported by Phenix is allowed, including
MTZ, unmerged Scalepack, or XDS/XSCALE (and possibly others).  Data should
already be on a common scale, but with individual observations unmerged.
%s
""" % citations_str)
    params = cmdline.work.extract()
    i_obs = iotbx.merging_statistics.select_data(
        file_name=params.file_name,
        data_labels=params.labels,
        log=out,
        assume_shelx_observation_type_is=assume_shelx_observation_type_is)
    params.labels = i_obs.info().label_string()
    validate_params(params)
    symm = sg = uc = None
    if (params.symmetry_file is not None):
        from iotbx import crystal_symmetry_from_any
        symm = crystal_symmetry_from_any.extract_from(
            file_name=params.symmetry_file)
        if (symm is None):
            raise Sorry("No symmetry records found in %s." %
                        params.symmetry_file)
    else:
        sg = i_obs.space_group()
        if (params.space_group is not None):
            sg = params.space_group.group()
        elif (sg is None):
            raise Sorry("Missing space group information.")
        uc = i_obs.unit_cell()
        if (params.unit_cell is not None):
            uc = params.unit_cell
        elif (uc is None):
            raise Sorry("Missing unit cell information.")
        from cctbx import crystal
        symm = crystal.symmetry(space_group=sg, unit_cell=uc)
    if (i_obs.sigmas() is None):
        raise Sorry("Sigma(I) values required for this application.")
    result = iotbx.merging_statistics.dataset_statistics(
        i_obs=i_obs,
        crystal_symmetry=symm,
        d_min=params.high_resolution,
        d_max=params.low_resolution,
        n_bins=params.n_bins,
        binning_method=params.binning_method,
        anomalous=params.anomalous,
        debug=params.debug,
        file_name=params.file_name,
        sigma_filtering=params.sigma_filtering,
        use_internal_variance=params.use_internal_variance,
        eliminate_sys_absent=params.eliminate_sys_absent,
        extend_d_max_min=params.extend_d_max_min,
        cc_one_half_significance_level=params.cc_one_half_significance_level,
        cc_one_half_method=params.cc_one_half_method,
        log=out)
    result.show(out=out)
    if (getattr(params, "loggraph", False)):
        result.show_loggraph(out=out)
    if (params.estimate_cutoffs):
        result.show_estimated_cutoffs(out=out)
    if params.json.file_name is not None:
        result.as_json(file_name=params.json.file_name,
                       indent=params.json.indent)
    if params.mmcif.file_name is not None:
        import iotbx.cif.model
        cif = iotbx.cif.model.cif()
        cif[params.mmcif.data_name] = result.as_cif_block()
        with open(params.mmcif.file_name, 'wb') as f:
            print >> f, cif
    print >> out, ""
    print >> out, "References:"
    print >> out, citations_str
    print >> out, ""
    return result
Esempio n. 9
0
def run(args, log=sys.stdout):
  print("-"*79, file=log)
  print(legend, file=log)
  print("-"*79, file=log)
  inputs = mmtbx.utils.process_command_line_args(args = args,
    master_params = master_params())
  params = inputs.params.extract()
  # estimate resolution
  d_min = params.resolution
  broadcast(m="Map resolution:", log=log)
  if(d_min is None):
    raise Sorry("Resolution is required.")
  print("  d_min: %6.4f"%d_min, file=log)
  # model
  broadcast(m="Input PDB:", log=log)
  file_names = inputs.pdb_file_names
  if(len(file_names) != 1): raise Sorry("PDB file has to given.")
  if(inputs.crystal_symmetry is None):
    raise Sorry("No crystal symmetry defined.")
  pdb_inp = iotbx.pdb.input(file_name=file_names[0])
  model = mmtbx.model.manager(
      model_input = pdb_inp,
      crystal_symmetry=inputs.crystal_symmetry)
  model.process(make_restraints=True)
  if model.get_number_of_models() > 1:
    raise Sorry("Only one model allowed.")
  model.setup_scattering_dictionaries(scattering_table=params.scattering_table)
  model.get_xray_structure().show_summary(f=log, prefix="  ")
  broadcast(m="Input map:", log=log)
  if(inputs.ccp4_map is None): raise Sorry("Map file has to given.")
  inputs.ccp4_map.show_summary(prefix="  ")
  map_data = inputs.ccp4_map.map_data()
  print("  Actual map (min,max,mean):", \
    map_data.as_1d().min_max_mean().as_tuple(), file=log)
  make_sub_header("Histogram of map values", out=log)
  md = map_data.as_1d()
  show_histogram(data=md, n_slots=10, data_min=flex.min(md),
    data_max=flex.max(md), log=log)
  # shift origin if needed
  soin = maptbx.shift_origin_if_needed(map_data=map_data,
    sites_cart=model.get_sites_cart(), crystal_symmetry=model.crystal_symmetry())
  map_data = soin.map_data
  model.set_sites_cart(soin.sites_cart)
  ####
  # Compute and show all stats
  ####
  broadcast(m="Model statistics:", log=log)
  make_sub_header("Overall", out=log)
  info = mmtbx.model.statistics.info(model=model)
  info.geometry.show()

  # XXX - these are not available anymore due to refactoring
  # make_sub_header("Histogram of devations from ideal bonds", out=log)
  # show_histogram(data=ms.bond_deltas, n_slots=10, data_min=0, data_max=0.2,
  #   log=log)
  # #
  # make_sub_header("Histogram of devations from ideal angles", out=log)
  # show_histogram(data=ms.angle_deltas, n_slots=10, data_min=0, data_max=30.,
  #   log=log)
  # #
  # make_sub_header("Histogram of non-bonded distances", out=log)
  # show_histogram(data=ms.nonbonded_distances, n_slots=10, data_min=0,
  #   data_max=5., log=log)
  #
  make_sub_header("Histogram of ADPs", out=log)
  info.adp.show(log=log)
  # bs = xrs.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.)
  # show_histogram(data=bs, n_slots=10, data_min=flex.min(bs),
  #   data_max=flex.max(bs), log=log)
  #
  # Compute CC
  broadcast(m="Map-model CC (overall):", log=log)
  five_cc_result = mmtbx.maps.correlation.five_cc(map = map_data,
    xray_structure = model.get_xray_structure(), d_min = d_min)
  atom_radius = five_cc_result.atom_radius
  if atom_radius is None:
    atom_radius = five_cc_result._atom_radius()
  print("  CC_mask  : %6.4f"%five_cc_result.result.cc_mask, file=log)
  print("  CC_volume: %6.4f"%five_cc_result.result.cc_volume, file=log)
  print("  CC_peaks : %6.4f"%five_cc_result.result.cc_peaks, file=log)
  # Compute FSC(map, model)
  broadcast(m="Model-map FSC:", log=log)
  fsc = mmtbx.maps.correlation.fsc_model_vs_map(
    xray_structure = model.get_xray_structure(),
    map            = map_data,
    atom_radius    = atom_radius,
    d_min          = d_min)
  fsc.show(prefix="  ")
  # Local CC
  cc_calculator = mmtbx.maps.correlation.from_map_and_xray_structure_or_fmodel(
    xray_structure = model.get_xray_structure(),
    map_data       = map_data,
    d_min          = d_min)
  broadcast(m="Map-model CC (local):", log=log)
  # per residue
  print("Per residue:", file=log)
  residue_results = list()
  ph = model.get_hierarchy()
  xrs = model.get_xray_structure()
  for rg in ph.residue_groups():
    cc = cc_calculator.cc(selection=rg.atoms().extract_i_seq())
    chain_id = rg.parent().id
    print("  chain id: %s resid %s: %6.4f"%(
      chain_id, rg.resid(), cc), file=log)
  # per chain
  print("Per chain:", file=log)
  for chain in ph.chains():
    print("  chain %s: %6.4f"%(chain.id, cc_calculator.cc(
      selection=chain.atoms().extract_i_seq())), file=log)
  # per residue detailed counts
  print("Per residue (histogram):", file=log)
  crystal_gridding = maptbx.crystal_gridding(
    unit_cell             = xrs.unit_cell(),
    space_group_info      = xrs.space_group_info(),
    pre_determined_n_real = map_data.accessor().all())
  f_calc = xrs.structure_factors(d_min=d_min).f_calc()
  fft_map = miller.fft_map(
    crystal_gridding     = crystal_gridding,
    fourier_coefficients = f_calc)
  fft_map.apply_sigma_scaling()
  map_model = fft_map.real_map_unpadded()
  sites_cart = xrs.sites_cart()
  cc_per_residue = flex.double()
  for rg in ph.residue_groups():
    cc = mmtbx.maps.correlation.from_map_map_atoms(
      map_1      = map_data,
      map_2      = map_model,
      sites_cart = sites_cart.select(rg.atoms().extract_i_seq()),
      unit_cell  = xrs.unit_cell(),
      radius     = 2.)
    cc_per_residue.append(cc)
  show_histogram(data=cc_per_residue, n_slots=10, data_min=-1., data_max=1.0,
    log=log)
Esempio n. 10
0
def selection_string_from_selection(pdb_h,
                                    selection,
                                    chains_info=None,
                                    atom_selection_cache=None):
    """
  !!! if selection contains alternative conformations, the assertion in the
  end will fail. This is to prevent using this function with such selections.
  This limits its application to search NCS only and at the same time asserts
  that found NCS groups don't contain alternative conformations.

  Convert a selection array to a selection string.
  The function tries to minimise the selection string as possible,
  using chain names, resseq ranges and when there is not other option
  residues IDs

  Limitations:
    When pdb_h contains multiple conformations, selection must
    not include residues with alternate locations

  Args:
    pdb_h : iotbx.pdb.hierarchy
    selection (flex.bool or flex.size_t)
    chains_info : object containing
      chains (str): chain IDs OR selections string
      res_name (list of str): list of residues names
      resid (list of str): list of residues sequence number, resid
      atom_names (list of list of str): list of atoms in residues
      atom_selection (list of list of list of int): the location of atoms in ph
      chains_atom_number (list of int): list of number of atoms in each chain

  Returns:
    sel_str (str): atom selection string
  """
    if isinstance(selection, flex.bool): selection = selection.iselection(True)
    if selection.size() == 0: raise Sorry('Empty atom selection')
    # pdb_hierarchy_inp is a hierarchy
    selection_set = set(selection)
    sel_list = []
    # pdb_h.select(selection).write_pdb_file("selected_in.pdb")
    # using chains_info to improve performance
    if not chains_info:
        chains_info = get_chains_info(pdb_h)
    # print "chains_info"
    # for k, v in chains_info.iteritems():
    #   print k, v
    # print "\n\n"
    chain_ids = sorted(chains_info)
    for ch_id in chain_ids:
        # print "chains_info[ch_id].atom_selection", chains_info[ch_id].atom_selection
        # this "unfolds" the atom_selection array which is [[],[],[],[]...] into
        # a set
        if not chain_is_needed(selection, chains_info[ch_id].atom_selection):
            continue
        a_sel = {x for xi in chains_info[ch_id].atom_selection for x in xi}
        test_set = a_sel.intersection(selection_set)
        if not test_set: continue
        ch_sel = "chain '%s'" % convert_wildcards_in_chain_id(ch_id)
        # Chain should be present, so do all the work.
        # if there is water in chain, specify residues numbers
        water_present = (len(a_sel) != chains_info[ch_id].chains_atom_number)
        complete_ch_not_present = (test_set != a_sel) or water_present
        if bool(chains_info[ch_id].no_altloc):
            no_altloc = chains_info[ch_id].no_altloc
            no_altloc_present = no_altloc.count(False) > 0
        else:
            no_altloc_present = False
        # exclude residues with alternative locations
        complete_ch_not_present |= no_altloc_present
        # print "complete_ch_not_present", complete_ch_not_present
        res_sel = []
        if complete_ch_not_present:
            # collect continuous ranges of residues when possible
            res_len = len(chains_info[ch_id].resid)

            # prev_resid = None
            prev_all_atoms_present = None
            cur_all_atoms_present = None
            atoms_for_dumping = []
            # all_prev_atoms_in_range
            previous_res_selected_atom_names = []
            a_sel = set(chains_info[ch_id].atom_selection[0])
            cur_res_selected_atom_names = get_atom_names_from_test_set(
                a_sel.intersection(selection_set), a_sel,
                chains_info[ch_id].atom_names[0])
            atoms_in_current_range = cur_res_selected_atom_names
            sequence_was_broken = False

            first_resid = chains_info[ch_id].resid[0]
            last_resid = None
            for i in xrange(res_len):
                cur_resid = chains_info[ch_id].resid[i]
                # test that all atoms in residue are included in selection
                a_sel = set(chains_info[ch_id].atom_selection[i])
                # print "a_sel", a_sel
                test_set = a_sel.intersection(selection_set)
                # if not bool(test_set): continue
                if len(test_set) == 0:
                    # None of residue's atoms are selected
                    # print "Breaking 1"
                    sequence_was_broken = True
                    continue
                if no_altloc_present and not no_altloc[i]:
                    # print "Breaking 2"
                    sequence_was_broken = True
                    continue
                all_atoms_present = (test_set == a_sel)
                if prev_all_atoms_present is None:
                    prev_all_atoms_present = cur_all_atoms_present
                else:
                    prev_all_atoms_present = cur_all_atoms_present and prev_all_atoms_present
                cur_all_atoms_present = all_atoms_present
                previous_res_selected_atom_names = cur_res_selected_atom_names
                cur_res_selected_atom_names = get_atom_names_from_test_set(
                    test_set, a_sel, chains_info[ch_id].atom_names[i])

                # print "all_atoms_present (cur/prev), test_set", chains_info[ch_id].resid[i], cur_all_atoms_present, prev_all_atoms_present, test_set, chains_info[ch_id].atom_names[i]

                # prev_resid = cur_resid
                cur_resid = chains_info[ch_id].resid[i]
                # print "cur_resid", cur_resid

                # new range is needed when previous selection doesn't match current
                # selection.
                # print "cur/prev res_sel", cur_res_selected_atom_names, previous_res_selected_atom_names
                # print "atoms_for_dumping", atoms_for_dumping
                # print "atoms_in_current_range", atoms_in_current_range
                # print "intersecting sets:", set(cur_res_selected_atom_names) ^ set(previous_res_selected_atom_names)
                continue_range = False
                continue_range = ((cur_all_atoms_present
                                   and prev_all_atoms_present) or (len(
                                       set(cur_res_selected_atom_names)
                                       ^ set(atoms_in_current_range)) == 0))
                continue_range &= not chains_info[ch_id].gap_residue[i]
                # print "continue range 1", continue_range
                # residues are consequtive
                continue_range = continue_range and not sequence_was_broken
                # print "continue range 2", continue_range
                if len(atoms_for_dumping) > 0:
                    continue_range = continue_range and (len(
                        set(atoms_for_dumping)
                        ^ set(cur_res_selected_atom_names)) == 0)
                sequence_was_broken = False
                # print "continue range 3", continue_range

                if continue_range:
                    # continue range
                    # print "Continuing range"
                    last_resid = cur_resid
                    atoms_in_current_range = list(
                        set(atoms_in_current_range)
                        | set(cur_res_selected_atom_names))
                    if not cur_all_atoms_present:
                        # all_prev_atoms_in_range |= set(cur_res_selected_atom_names)
                        atoms_for_dumping = cur_res_selected_atom_names
                else:
                    # dump previous range, start new one
                    # print "Dumping range"
                    if len(atoms_for_dumping) > 0:
                        atoms_sel = get_atom_str(
                            previous_res_selected_atom_names)
                    else:
                        atoms_sel = "" if prev_all_atoms_present else get_atom_str(
                            previous_res_selected_atom_names)
                        if prev_all_atoms_present is None:
                            atoms_sel = "" if cur_all_atoms_present else get_atom_str(
                                cur_res_selected_atom_names)
                    res_sel = update_res_sel(res_sel=res_sel,
                                             first_resid=first_resid,
                                             last_resid=last_resid,
                                             atoms_selection=atoms_sel)
                    # print "res_sel", res_sel
                    first_resid = cur_resid
                    last_resid = cur_resid
                    atoms_in_current_range = cur_res_selected_atom_names
                    if not cur_all_atoms_present:
                        atoms_for_dumping = cur_res_selected_atom_names
                    else:
                        atoms_for_dumping = []
                    prev_all_atoms_present = None

            # print "DUMPING THE LAST RANGE"
            # print "prev_all_atoms_present", prev_all_atoms_present
            atoms_sel = "" if prev_all_atoms_present else get_atom_str(
                previous_res_selected_atom_names)
            if prev_all_atoms_present or prev_all_atoms_present is None:
                atoms_sel = "" if cur_all_atoms_present else get_atom_str(
                    cur_res_selected_atom_names)
            # print "atoms_sel", atoms_sel
            omit_resids = (first_resid == chains_info[ch_id].resid[0]
                           and last_resid == chains_info[ch_id].resid[-1])
            res_sel = update_res_sel(res_sel, first_resid, last_resid,
                                     atoms_sel, omit_resids)

        s = get_clean_selection_string(ch_sel, res_sel)
        sel_list.append(s)
    # add parenthesis what selection is more than just a chain
    s_l = []
    sel_list.sort()
    for s in sel_list:
        if len(s) > 10:
            s = '(' + s + ')'
        s_l.append(s)
    sel_str = ' or '.join(s_l)
    # This check could take up to ~90% of runtime of this function...
    # Nevertheless, this helps to spot bugs early. So this should remain
    # here, let's say for a year. If no bugs discovered, this could be removed.
    # When ready to remove, don't forget to remove atom_selection_cache
    # parameter as well.
    # Current removal date: Jan 22, 2017
    # Removed on Feb, 7, 2018.
    # if atom_selection_cache is None:
    #   atom_selection_cache = pdb_h.atom_selection_cache()
    # isel = atom_selection_cache.iselection(sel_str)
    # # pdb_h.select(isel).write_pdb_file("selected_string.pdb")
    # # pdb_h.select(selection).write_pdb_file("selected_isel.pdb")
    # assert len(isel) == len(selection), ""+\
    #     "%d (result) != %d (input): conversion to string selects different number of atoms!.\n" \
    #     % (len(isel), len(selection)) +\
    #     "String lead to error: '%s'" % sel_str

    # This hack is implemented to allow a chain be completely in two alternative
    # conformations. Above check would fail. Selections outputted in refinement
    # are incorrect, but underlying iselections are actually correct and refinement
    # should be fine. General solution would be a universal procedure which can
    # handle alternative conformations correctly, but this is time-demanding project.
    # http://phenix-online.org/pipermail/phenixbb/2018-November/024006.html
    if sel_str == '':
        sel_str = "not all"
    return sel_str
Esempio n. 11
0
def validate_params(params):
  if (params.fetch_pdb.pdb_ids is None) or (len(params.fetch_pdb.pdb_ids)==0):
    raise Sorry("No PDB IDs specified!")
  return True
Esempio n. 12
0
    def selection_parser(self,
                         word_iterator,
                         optional=True,
                         callback=None,
                         stop_word=None,
                         expect_nonmatching_closing_parenthesis=False):
        have_optional = False
        result_stack = []
        for word, word_iterator in simple_parser.infix_as_postfix(
                word_iterator=word_iterator,
                stop_word=stop_word,
                expect_nonmatching_closing_parenthesis=
                expect_nonmatching_closing_parenthesis):
            lword = word.value.lower()

            def raise_syntax_error():
                raise RuntimeError(
                    'Atom selection syntax error at word "%s".' % lword)

            if (lword == "optional"):
                if (len(result_stack) != 0):
                    raise Sorry('"optional" can appear only at the beginning.')
                if (have_optional):
                    raise Sorry('"optional" can appear only once.')
                have_optional = True
            elif (lword == "not"):
                assert len(result_stack) >= 1
                arg = result_stack.pop()
                result_stack.append(~arg)
            elif (lword in ["and", "or"]):
                assert len(result_stack) >= 2
                rhs = result_stack.pop()
                lhs = result_stack.pop()
                if (lword == "and"):
                    result_stack.append(lhs & rhs)
                else:
                    result_stack.append(lhs | rhs)
            else:
                if (lword == "all"):
                    result_stack.append(flex.bool(self.n_seq, True))
                elif (lword == "none"):
                    result_stack.append(flex.bool(self.n_seq, False))
                elif (lword == "name"):
                    result_stack.append(
                        self.sel_name(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword in ["altloc", "altid"]):
                    result_stack.append(
                        self.sel_altloc(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "resname"):
                    result_stack.append(
                        self.sel_resname(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "chain"):
                    result_stack.append(
                        self.sel_chain_id(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword in ["resseq", "resid", "resi", "model"]):
                    arg = word_iterator.pop_argument(word.value)

                    def try_compose_range():
                        def is_cont():
                            if (len(arg_cont.value) == 0): return False
                            return ("0123456789".find(arg_cont.value[0]) >= 0)

                        i_colon = arg.value.find(":")
                        if (i_colon < 0):
                            arg_cont = word_iterator.try_pop()
                            if (arg_cont is None):
                                return arg.value, -1
                            if (not arg_cont.value.startswith(":")):
                                word_iterator.backup()
                                return arg.value, -1
                            if (len(arg_cont.value) == 1):
                                arg_cont = word_iterator.try_pop()
                                if (arg_cont is None):
                                    return arg.value + ":", len(arg.value)
                                if (not is_cont()):
                                    word_iterator.backup()
                                    return arg.value + ":", len(arg.value)
                                return arg.value + ":" + arg_cont.value, len(
                                    arg.value)
                            return arg.value + arg_cont.value, len(arg.value)
                        elif (i_colon + 1 == len(arg.value)):
                            arg_cont = word_iterator.try_pop()
                            if (arg_cont is not None):
                                if (is_cont()):
                                    return arg.value + arg_cont.value, i_colon
                                word_iterator.backup()
                        return arg.value, i_colon

                    def try_compose_sequence():
                        arg_next = word_iterator.try_pop()
                        if (arg_next is None):
                            word_iterator.backup()
                            return None, None
                        lnext = arg_next.value.lower()
                        if (lnext == "through"):
                            arg_final = word_iterator.pop_argument(
                                arg_next.value)
                            return arg.value, arg_final.value
                        word_iterator.backup()
                        return (None, None)

                    val, i_colon = try_compose_range()
                    if (i_colon < 0):
                        if (lword == "resseq"):
                            result_stack.append(self.sel_resseq(pattern=arg))
                        elif (lword in ["resid", "resi"]):
                            start, stop = try_compose_sequence()
                            if (start is None):
                                result_stack.append(
                                    self.sel_resid(pattern=arg))
                            else:
                                result_stack.append(
                                    self.sel_resid_sequence(start=start,
                                                            stop=stop))
                        else:
                            result_stack.append(self.sel_model_id(pattern=arg))
                    else:
                        start = val[:i_colon]
                        stop = val[i_colon + 1:]
                        if (lword == "resseq"):
                            result_stack.append(
                                self.sel_resseq_range(start=start, stop=stop))
                        elif (lword in ["resid", "resi"]):
                            result_stack.append(
                                self.sel_resid_range(start=start, stop=stop))
                        else:
                            result_stack.append(
                                self.sel_model_id_range(start=start,
                                                        stop=stop))
                elif (lword == "icode"):
                    result_stack.append(
                        self.sel_icode(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "segid"):
                    result_stack.append(
                        self.sel_segid(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "element"):
                    result_stack.append(
                        self.sel_element(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "charge"):
                    result_stack.append(
                        self.sel_charge(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "anisou"):
                    result_stack.append(self.sel_anisou())
                elif (lword == "pepnames"):
                    result_stack.append(self.sel_pepnames())
                elif ((lword == "protein" or lword == "peptide")
                      and callback is None):
                    # if there is callback, these keywords shoudl be processed there,
                    # most likely it is pdb_interpretation
                    result_stack.append(self.sel_protein())
                elif lword == "nucleotide" and callback is None:
                    result_stack.append(self.sel_nucleotide())
                elif (lword == "single_atom_residue"):
                    result_stack.append(self.sel_single_atom_residue())
                elif (lword == "water"):
                    result_stack.append(self.sel_water())
                elif (lword == "hetero") or (lword == "hetatm"):
                    result_stack.append(self.sel_hetero())
                elif (lword == "bfactor") or (lword == "occupancy"):
                    op = word_iterator.pop_argument(word.value).value
                    if (not op in [">", "<", "="]):
                        raise_syntax_error()
                    else:
                        arg_next = word_iterator.try_pop()
                        lnext = arg_next.value
                        try:
                            val = float(lnext)
                        except ValueError:
                            raise_syntax_error()
                        else:
                            if (lword == "bfactor"):
                                result_stack.append(self.sel_bfactor(op, val))
                            else:
                                result_stack.append(self.sel_occupancy(
                                    op, val))
                elif ((lword == "within" or lword == 'residues_within')
                      and (self.special_position_settings is not None)):
                    assert word_iterator.pop().value == "("
                    radius = float(word_iterator.pop().value)
                    assert word_iterator.pop().value == ","
                    sel = self.selection_parser(
                        word_iterator=word_iterator,
                        callback=callback,
                        expect_nonmatching_closing_parenthesis=True)
                    if lword == 'within':
                        result_stack.append(
                            self.sel_within(radius=radius,
                                            primary_selection=sel))
                    elif lword == 'residues_within':
                        result_stack.append(
                            self.sel_residues_within(radius=radius,
                                                     primary_selection=sel))
                elif (callback is not None):
                    if (not callback(word=word,
                                     word_iterator=word_iterator,
                                     result_stack=result_stack)):
                        raise_syntax_error()
                else:
                    raise_syntax_error()
        if (optional): have_optional = False
        if (len(result_stack) == 0):
            if (have_optional): return None
            return flex.bool(self.n_seq, False)
        selection = result_stack[0]
        for result in result_stack[1:]:
            selection &= result
        if (have_optional and selection.all_eq(False)):
            return None
        return selection
Esempio n. 13
0
  def process_input_array(self, arr):
    array = arr.deep_copy()
    work_array = arr
    multiplicities = None
    try:
      if self.merge_equivalents :
        array, multiplicities, merge = MergeData(array, self.settings.show_anomalous_pairs)
      settings = self.settings
      data = array.data()
      #import code, traceback; code.interact(local=locals(), banner="".join( traceback.format_stack(limit=10) ) )
      self.missing_set = oop.null()
      #if (array.is_xray_intensity_array()):
      #  data.set_selected(data < 0, flex.double(data.size(), 0.))
      if (array.is_unique_set_under_symmetry()) and (settings.map_to_asu):
        array = array.map_to_asu()
        if (multiplicities is not None):
          multiplicities = multiplicities.map_to_asu()

      if (settings.d_min is not None):
        array = array.resolution_filter(d_min=settings.d_min)
        if (multiplicities is not None):
          multiplicities = multiplicities.resolution_filter(
            d_min=settings.d_min)
      self.filtered_array = array.deep_copy()
      if (settings.expand_anomalous):
        if not array.is_unique_set_under_symmetry():
          raise Sorry("Error! Cannot generate bijvoet mates of unmerged reflections.")
        array = array.generate_bijvoet_mates()
        original_symmetry = array.crystal_symmetry()

        if (multiplicities is not None):
          multiplicities = multiplicities.generate_bijvoet_mates()
      if (self.settings.show_missing):
        self.missing_set = array.complete_set().lone_set(array)
        if self.settings.show_anomalous_pairs:
          self.missing_set = self.missing_set.select(
            self.missing_set.centric_flags().data(), negate=True)
      if (settings.expand_to_p1):
        if not array.is_unique_set_under_symmetry():
          raise Sorry("Error! Cannot expand unmerged reflections to P1.")
        original_symmetry = array.crystal_symmetry()
        array = array.expand_to_p1().customized_copy(
          crystal_symmetry=original_symmetry)
        #array = array.niggli_cell().expand_to_p1()
        #self.missing_set = self.missing_set.niggli_cell().expand_to_p1()
        self.missing_set = self.missing_set.expand_to_p1().customized_copy(
          crystal_symmetry=original_symmetry)
        if (multiplicities is not None):
          multiplicities = multiplicities.expand_to_p1().customized_copy(
              crystal_symmetry=original_symmetry)
      data = array.data()
      self.r_free_mode = False
      self.phases = flex.double(data.size(), float('nan'))
      self.radians = flex.double(data.size(), float('nan'))
      self.ampl = flex.double(data.size(), float('nan'))
      self.sigmas = None
      if isinstance(data, flex.bool):
        self.r_free_mode = True
        data_as_float = flex.double(data.size(), 0.0)
        data_as_float.set_selected(data==True, flex.double(data.size(), 1.0))
        data = data_as_float
        self.data = data #.deep_copy()
      else :
        if isinstance(data, flex.double):
          self.data = data #.deep_copy()
        elif isinstance(data, flex.complex_double):
          self.data = data #.deep_copy()
          self.ampl = flex.abs(data)
          self.phases = flex.arg(data) * 180.0/math.pi
          # purge nan values from array to avoid crash in fmod_positive()
          b = flex.bool([bool(math.isnan(e)) for e in self.phases])
          # replace the nan values with an arbitrary float value
          self.phases = self.phases.set_selected(b, 42.4242)
          # Cast negative degrees to equivalent positive degrees
          self.phases = flex.fmod_positive(self.phases, 360.0)
          self.radians = flex.arg(data)
          # replace the nan values with an arbitrary float value
          self.radians = self.radians.set_selected(b, 0.424242)
        elif hasattr(array.data(), "as_double"):
          self.data = array.data().as_double()
        else:
          raise RuntimeError("Unexpected data type: %r" % data)
        if (settings.show_data_over_sigma):
          if (array.sigmas() is None):
            raise Sorry("sigmas not defined.")
          sigmas = array.sigmas()
          non_zero_sel = sigmas != 0
          array = array.select(non_zero_sel)
          array = array.customized_copy(data=array.data()/array.sigmas())
          self.data = array.data()
          if (multiplicities is not None):
            multiplicities = multiplicities.select(non_zero_sel)
        if array.sigmas() is not None:
          self.sigmas = array.sigmas()
        else:
          self.sigmas = None
      work_array = array
    except Exception as e:
      print(to_str(e) + "".join(traceback.format_stack(limit=10)))
      raise e
      return None, None
    work_array.set_info(arr.info() )
    multiplicities = multiplicities
    return work_array, multiplicities
def run(args, command_name="phenix.explore_metric_symmetry"):
  command_line = (
    option_parser(
    usage=command_name+" [options]",
    description="""\
Explore Metric Symmetry. A list of possible unit cells and spacegroups is
given for the given specified unit cell and spacegroup combination. If a
second unit cell is given, linear combinations of the basis vector of one
unit cell are sought that match the other.""")

    .enable_symmetry_comprehensive()

    .option(None, "--max_delta",
            action = "store",
            type="float",
            default=5.0,
            dest = "max_delta",
            help = "Maximum delta/obliquity used in determining the lattice symmetry, using a modified Le-Page algorithm. Default is 5.0 degrees",
            metavar="FLOAT")

    .option(None, "--start_from_p1",
            action="store_true",
            dest="niggli",
            default=False,
            help="Reduce to Niggli cell and forget the input spacegroup before higher metric symmetry is sought.")

    .option(None, "--graph",
            action="store",
            default=None,
            help="A graphical representation of the graph will be written out."
                 " Requires Graphviz to be installed and on PATH.")

    .option(None, "--centring_type",
            action="store",
            type="str",
            help="Centring type, choose from P,A,B,C,I,R,F")

    .option(None, "--other_unit_cell",
            action="store",
            type="str",
            help="Other unit cell, for unit cell comparison",
            metavar="10,20,30,90,103.7,90")

    .option(None, "--other_space_group",
            action="store",
            type="str",
            help="space group for other_unit_cell, for unit cell comparison")

    .option(None, "--other_centring_type",
            action="store",
            type="str",
            help="Centring type, choose from P,A,B,C,I,R,F")

    .option(None, "--no_point_group_graph",
            action="store_true",
            dest="pg_graph",
            default=False,
            help="Do not carry out the construction of a point group graph." )

    .option(None, "--relative_length_tolerance",
            action="store",
            type="float",
            help="Tolerance for unit cell lengths to be considered equal-ish.",
            default=0.10,
            metavar="FLOAT",
            dest="rel_length_tol")

    .option(None, "--absolute_angle_tolerance",
            action="store",
            dest="abs_angle_tol",
            type="float",
            default=10.0,
            metavar="FLOAT",
            help="Angular tolerance in unit cell comparison")

     .option(None, "--max_order",
             action="store",
             type="int",
             default=1,
             metavar="INT",
             help="Maximum volume change for target cell" )
    ).process(args=args)

  log = multi_out()
  log.register(label="stdout", file_object=sys.stdout)

  allowed_centring_types={"P":"Primitive",
                          "A":"A centered",
                          "B":"B centered",
                          "C":"C centered",
                          "I":"Body centered",
                          "R":"Rombohedral",
                          "F":"Face centered"}
  if command_line.options.centring_type is not None:
    if command_line.options.centring_type not in allowed_centring_types:
      print("Sorry, the centring type %s is not known."%(command_line.options.centring_type), file=log)
      print("Choose from P,A,B,C,I,R,F ", file=log)
      return

  xs = None
  other_xs = None

  if len(args)==0:
    command_line.parser.show_help()
    return

  if ( command_line.symmetry.unit_cell() == None ):
    print(file=log)
    print("Sorry: Unit cell not specified.", file=log)
    print(file=log)
    command_line.parser.show_help()
    return

  if command_line.options.centring_type is None:
    if ( command_line.symmetry.space_group_info() == None ):
      print(file=log)
      print("Sorry: centring type or space group not specified.", file=log)
      print(file=log)
      command_line.parser.show_help()
      return
  if command_line.symmetry.space_group_info()  is not None:
    if not ( command_line.symmetry.space_group().is_chiral() ):
      print("Sorry, Non chiral space groups not yet supported.", file=log)
      return

  if command_line.options.centring_type is not None:
    xs  = crystal.symmetry(
      unit_cell=command_line.symmetry.unit_cell(),
      space_group_symbol="Hall: %s 1" %( command_line.options.centring_type )
      )
    command_line.symmetry = xs

  if command_line.options.niggli:
    print("*Unit cell will be niggli reduced and P1 will be assumed*", file=log)
    uc = command_line.symmetry.change_basis(
      command_line.symmetry.change_of_basis_op_to_niggli_cell() ).unit_cell()
    command_line.symmetry = crystal.symmetry( uc, "P 1" )

  xs = command_line.symmetry

  ############################################################################
  # ABOVE IS JUST INPUT PARSING, NOW THE ACTUAL STUFF HAPPENS
  ############################################################################


  if not command_line.options.pg_graph:
    ##############################
    #   get a point group graph  #
    ##############################

    pg_object = do_pointgroup_tricks( xs.unit_cell(),
                                      xs.space_group(),
                                      command_line.options.max_delta,
                                      log )

    ################################################
    #  make a graphical representation if desired  #
    ################################################

    if command_line.options.graph is not None:
      make_graph_of_graph(pg_object,
                          command_line.options.graph,
                          log)


  #########################################
  #  Check if other cell has been defined #
  #########################################

  if command_line.options.other_unit_cell is not None:
    print("A second unit cell has been specified. ", file=log)
    other_xs = None

    if command_line.options.other_space_group is None:
      if command_line.options.other_centring_type is None:
        raise Sorry("No space group or centring type for other cell specified.")
      else:
        other_xs = crystal.symmetry( command_line.options.other_unit_cell,
                                     space_group_symbol="Hall: %s 1" %( command_line.options.other_centring_type )
                                   )
    else:
      other_xs = crystal.symmetry( command_line.options.other_unit_cell,
                                   space_group_symbol=command_line.options.other_space_group
                                 )

    # get the graph is desired
    if not command_line.options.pg_graph:
      other_pg_object = do_pointgroup_tricks( other_xs.unit_cell(),
                                              other_xs.space_group(),
                                              command_line.options.max_delta,
                                              log )
    # do the unit cell comparison
    print(file=log)
    print(file=log)
    print("Unit cell comparison", file=log)
    print("--------------------", file=log)
    print(file=log)
    print("The unit cells will be compared. The smallest niggli cell,", file=log)
    print("will be used as a (semi-flexible) lego-block to see if it", file=log)
    print("can construct the larger Niggli cell.", file=log)
    print(file=log)
    print(file=log)

    order = command_line.options.max_order

    if order==1:
      sl_object =  slt.compare_lattice(xs_a=xs,
                                       xs_b=other_xs,
                                       max_delta=command_line.options.max_delta,
                                       out=log,
                                       relative_length_tolerance=command_line.options.rel_length_tol,
                                       absolute_angle_tolerance=command_line.options.abs_angle_tol)
    else:

      tmp_a = xs.change_basis( xs.change_of_basis_op_to_niggli_cell() )
      tmp_b = other_xs.change_basis( other_xs.change_of_basis_op_to_niggli_cell() )
      modified_xs = None
      order = command_line.options.max_order
      lego_block = None
      if ( tmp_a.unit_cell().volume() > tmp_b.unit_cell().volume() ):
        modified_xs = slt.make_list_of_target_xs_up_to_order( xs, order )
        lego_block = other_xs
      else:
        modified_xs = slt.make_list_of_target_xs_up_to_order( other_xs, order )
        lego_block = xs

      print(file=log)
      print("Volume change of largest niggli cell requested via keyword --max_order", file=log)
      print(file=log)
      print("Input crystal symmetry is tranformed to niggli setting using the operator:", file=log)
      print(modified_xs.basic_to_niggli_cb_op.as_xyz(), file=log)
      print(file=log)
      print("Comparisons for various sublattices of the target cell are listed", file=log)
      print(file=log)

      for tmp_xs,cb_op,mat in zip(modified_xs.xs_list,
                                  modified_xs.extra_cb_op,
                                  modified_xs.matrices ):
        mat=mat.as_list_of_lists()
        print("===================================================================", file=log)
        print("Niggli cell is expanded using matrix:", file=log)
        print(file=log)
        print("               /%4i %4i %4i  \  "%(mat[0][0],mat[0][1],mat[0][2]), file=log)
        print("          M =  |%4i %4i %4i  |  "%(mat[1][0],mat[1][1],mat[1][2]), file=log)
        print("               \%4i %4i %4i  /  "%(mat[2][0],mat[2][1],mat[2][2]), file=log)
        print(file=log)
        print("Change of basis operator to reference setting:", file=log)
        print("    ", cb_op.as_xyz(), file=log)
        print("resulting crystal symmetry:", file=log)
        tmp_xs.show_summary(f=log,prefix="   ")
        print(file=log)
        print(file=log)
        sl_object =  slt.compare_lattice(xs_a=tmp_xs,
                                         xs_b=lego_block,
                                         max_delta=command_line.options.max_delta,
                                         out=log,
                                         relative_length_tolerance=command_line.options.rel_length_tol,
                                         absolute_angle_tolerance=command_line.options.abs_angle_tol)
Esempio n. 15
0
            "--detector_version_phil",
            "-d",
            type="string",
            default=None,
            dest="det_phil",
            help="detector version phil for the CSPAD").option(
                None,
                "--image",
                "-i",
                type="string",
                default=None,
                dest="det_image",
                help="image matching the detector version phil")).process(
                    args=sys.argv[1:])
    if cmd_line.options.det_phil is not None and cmd_line.options.det_image is not None:
        print "extracting active areas..."
        active_areas = get_CSPAD_active_areas(cmd_line.options.det_image,
                                              cmd_line.options.det_phil)
    elif cmd_line.options.det_phil is None and cmd_line.options.det_image is None:
        print "using active areas from LG36 CSPAD metrology"
        active_areas = LG36_active_areas
    else:
        raise Sorry(
            "Specify both a detector version phil and an example image to extract active areas."
        )
    for arg in cmd_line.args:
        file = open(arg, "rb")
        data = pickle.load(file)
        file.close()
        plot_preds(data, active_areas=active_areas)
Esempio n. 16
0
def create_sheet_hydrogen_bond_proxies(sheet_params,
                                       pdb_hierarchy,
                                       weight,
                                       hbond_counts,
                                       distance_ideal,
                                       distance_cut,
                                       remove_outliers,
                                       log=sys.stdout):
    assert (not None in [distance_ideal, distance_cut])
    cache = pdb_hierarchy.atom_selection_cache()
    prev_strand = sheet_params.first_strand
    prev_selection = cache.selection(prev_strand)
    prev_rgs = _get_residue_groups_from_selection(
        pdb_hierarchy=pdb_hierarchy, bool_selection=prev_selection)
    n_proxies = 0
    k = 0
    generated_proxies = geometry_restraints.shared_bond_simple_proxy()
    while k < len(sheet_params.strand):
        curr_strand = sheet_params.strand[k]
        curr_selection = cache.selection(curr_strand.selection)
        curr_start = None
        prev_start = None
        if curr_strand.bond_start_current is not None:
            curr_start = cache.selection(curr_strand.bond_start_current)
        if curr_strand.bond_start_previous is not None:
            prev_start = cache.selection(curr_strand.bond_start_previous)
        curr_rgs = _get_residue_groups_from_selection(
            pdb_hierarchy=pdb_hierarchy, bool_selection=curr_selection)
        i = j = 0
        len_prev_residues = len(prev_rgs)
        len_curr_residues = len(curr_rgs)
        if curr_start is not None and prev_start is not None:
            if curr_start.count(True) < 1 or prev_start.count(True) < 1:
                error_msg = """\
Wrong registration in SHEET record. One of these selections
"%s" or "%s"
yielded zero or several atoms. Possible reason for it is the presence of
insertion codes or alternative conformations for one of these residues or
the .pdb file was edited without updating SHEET records.""" \
        % (curr_strand.bond_start_current, curr_strand.bond_start_previous)
                raise Sorry(error_msg)

            current_start_res_is_donor = pdb_hierarchy.atoms().select(
                curr_start)[0].name.strip() == 'N'
            if (len_curr_residues > 0) and (len_prev_residues > 0):
                i = _find_start_residue(residues=prev_rgs,
                                        start_selection=prev_start)
                j = _find_start_residue(residues=curr_rgs,
                                        start_selection=curr_start)
                if (i >= 0) and (j >= 0):
                    # move i,j pointers from registration residues to the beginning of
                    # beta-strands
                    while (1 < i
                           and ((1 < j and curr_strand.sense == "parallel") or
                                (j < len_curr_residues - 2
                                 and curr_strand.sense == "antiparallel"))):
                        if curr_strand.sense == "parallel":
                            i -= 2
                            j -= 2
                        elif curr_strand.sense == "antiparallel":
                            i -= 2
                            j += 2
                    if (curr_strand.sense == "parallel"):
                        # some tweaking for ensure correct donor assignment
                        if i >= 2 and not current_start_res_is_donor:
                            i -= 2
                            current_start_res_is_donor = not current_start_res_is_donor
                        if j >= 2 and current_start_res_is_donor:
                            j -= 2
                            current_start_res_is_donor = not current_start_res_is_donor
                        while (i < len_prev_residues) and (j <
                                                           len_curr_residues):
                            if current_start_res_is_donor:
                                donor_residue = curr_rgs[j]
                                acceptor_residue = prev_rgs[i]
                                i += 2
                            else:
                                donor_residue = prev_rgs[i]
                                acceptor_residue = curr_rgs[j]
                                j += 2
                            current_start_res_is_donor = not current_start_res_is_donor
                            if donor_residue.atom_groups()[0].resname.strip(
                            ) != "PRO":
                                proxies = _create_hbond_proxy(
                                    acceptor_atoms=acceptor_residue.atoms(),
                                    donor_atoms=donor_residue.atoms(),
                                    hbond_counts=hbond_counts,
                                    distance_ideal=distance_ideal,
                                    distance_cut=distance_cut,
                                    remove_outliers=remove_outliers,
                                    weight=weight,
                                    sigma=sheet_params.sigma,
                                    slack=sheet_params.slack,
                                    top_out=sheet_params.top_out,
                                    log=log)
                                if proxies is not None:
                                    for proxy in proxies:
                                        generated_proxies.append(proxy)
                    elif (curr_strand.sense == "antiparallel"):
                        while (i < len_prev_residues and j >= 0):
                            if (prev_rgs[i].atom_groups()[0].resname.strip() !=
                                    "PRO"):
                                proxies = _create_hbond_proxy(
                                    acceptor_atoms=curr_rgs[j].atoms(),
                                    donor_atoms=prev_rgs[i].atoms(),
                                    hbond_counts=hbond_counts,
                                    distance_ideal=distance_ideal,
                                    distance_cut=distance_cut,
                                    remove_outliers=remove_outliers,
                                    weight=weight,
                                    sigma=sheet_params.sigma,
                                    slack=sheet_params.slack,
                                    top_out=sheet_params.top_out,
                                    log=log)
                                if proxies is not None:
                                    for proxy in proxies:
                                        generated_proxies.append(proxy)

                            if (curr_rgs[j].atom_groups()[0].resname.strip() !=
                                    "PRO"):
                                proxies = _create_hbond_proxy(
                                    acceptor_atoms=prev_rgs[i].atoms(),
                                    donor_atoms=curr_rgs[j].atoms(),
                                    hbond_counts=hbond_counts,
                                    distance_ideal=distance_ideal,
                                    distance_cut=distance_cut,
                                    remove_outliers=remove_outliers,
                                    weight=weight,
                                    sigma=sheet_params.sigma,
                                    slack=sheet_params.slack,
                                    top_out=sheet_params.top_out,
                                    log=log)
                                if proxies is not None:
                                    for proxy in proxies:
                                        generated_proxies.append(proxy)
                            i += 2
                            j -= 2
                    else:
                        print >> log, "  WARNING: strand direction not defined!"
                        print >> log, "    previous: %s" % prev_strand
                        print >> log, "    current: %s" % curr_strand.selection
                else:
                    print >> log, "  WARNING: can't find start of bonding for strands!"
                    print >> log, "    previous: %s" % prev_strand
                    print >> log, "    current: %s" % curr_strand.selection
            else:
                print >> log, "  WARNING: can't find one or more strands!"
                print >> log, "    previous: %s" % prev_strand
                print >> log, "    current: %s" % curr_strand.selection
        k += 1
        prev_strand = curr_strand.selection
        prev_selection = curr_selection
        prev_rgs = curr_rgs
    return generated_proxies
Esempio n. 17
0
def get_probabilities(input):
    result = flex.double([float(d) for d in input.split(',')])
    if (abs(1.0 - flex.sum(result)) > 1.e-3):
        raise Sorry("Sorry, the given probabilities must sum to one")
    return result
Esempio n. 18
0
        if "target=" in arg:
            found_it = True
            break
    if not found_it:
        raise Usage(command_line.parser.usage)

    if command_line.options.no_display:
        display = False
        arguments.append('--nodisplay')
    else:
        display = True

    assert command_line.options.num_procs > 0
    if command_line.options.output_dir is not None and \
      not os.path.isdir(command_line.options.output_dir):
        raise Sorry("Output dir %s doesn't exist" %
                    command_line.options.output_dir)

    def do_work(item):
        file, arguments, kwargs = item
        try:
            run_one_index(file, *arguments, **({'display': display}))
        except Exception, e:
            if hasattr(e, "classname"):
                print e.classname, "for %s:" % file,
            else:
                print "Indexing error for %s:" % file,
            print e

    if command_line.options.num_procs == 1:
        for file in files:
            if command_line.options.output_dir is not None:
Esempio n. 19
0
    def __init__(
            self,
            model,
            pdb_hierarchy=None,  # keep for mmtbx.validation_summary (multiple models)
            fmodel=None,
            fmodel_neutron=None,
            sequences=None,
            flags=None,
            header_info=None,
            raw_data=None,
            unmerged_data=None,
            keep_hydrogens=True,
            nuclear=False,
            save_probe_unformatted_file=None,
            show_hydrogen_outliers=False,
            min_cc_two_fofc=0.8,
            n_bins_data=10,
            count_anomalous_pairs_separately=False,
            use_internal_variance=True,
            outliers_only=True,
            use_pdb_header_resolution_cutoffs=False,
            file_name=None,
            ligand_selection=None,
            rotamer_library="8000",
            map_params=None):
        assert rotamer_library == "8000", "data_version given to RotamerEval not recognized."
        for name in self.__slots__:
            setattr(self, name, None)

        # use objects from model
        self.model = model
        if (self.model is not None):
            pdb_hierarchy = self.model.get_hierarchy()
            xray_structure = self.model.get_xray_structure()
            geometry_restraints_manager = self.model.get_restraints_manager(
            ).geometry
            crystal_symmetry = self.model.crystal_symmetry()
            all_chain_proxies = self.model.all_chain_proxies
        else:
            assert (pdb_hierarchy is not None)
            xray_structure = None
            geometry_restraints_manager = None
            crystal_symmetry = None
            all_chain_proxies = None

        # very important - the i_seq attributes may be extracted later
        pdb_hierarchy.atoms().reset_i_seq()
        self.pdb_hierarchy = pdb_hierarchy
        if (xray_structure is None):
            if (fmodel is not None):
                xray_structure = fmodel.xray_structure
            elif (crystal_symmetry is not None):
                xray_structure = pdb_hierarchy.extract_xray_structure(
                    crystal_symmetry=crystal_symmetry)
        self.crystal_symmetry = crystal_symmetry
        if (crystal_symmetry is None) and (fmodel is not None):
            self.crystal_symmetry = fmodel.f_obs().crystal_symmetry()

        # use maps (fmodel is not used)
        # run earlier since pdb_hierarchy gets modified
        use_maps = False
        if (map_params is not None):
            use_maps = ((map_params.input.maps.map_file_name) or
                        ((map_params.input.maps.map_coefficients_file_name) and
                         (map_params.input.maps.map_coefficients_label)))
        if (use_maps):
            if (flags.real_space):
                self.real_space = experimental.real_space(
                    fmodel=None,
                    model=self.model,
                    cc_min=min_cc_two_fofc,
                    molprobity_map_params=map_params.input.maps)
            if (flags.waters):
                self.waters = waters.waters(
                    pdb_hierarchy=pdb_hierarchy,
                    xray_structure=xray_structure,
                    fmodel=None,
                    collect_all=True,
                    molprobity_map_params=map_params.input.maps)

        self.header_info = header_info
        if (flags is None):
            flags = molprobity_flags()
        import mmtbx.model.statistics
        self.model_statistics_geometry = mmtbx.model.statistics.geometry(
            pdb_hierarchy=pdb_hierarchy,
            geometry_restraints_manager=geometry_restraints_manager,
            use_hydrogens=keep_hydrogens,
            use_nuclear=nuclear)
        self.model_statistics_geometry_result = \
          self.model_statistics_geometry.result()
        self.ramalyze = self.model_statistics_geometry_result.ramachandran.ramalyze
        self.omegalyze = self.model_statistics_geometry_result.omega.omegalyze
        self.rotalyze = self.model_statistics_geometry_result.rotamer.rotalyze
        self.cbetadev = self.model_statistics_geometry_result.c_beta.cbetadev
        self.clashes = self.model_statistics_geometry_result.clash.clashes
        if pdb_hierarchy.contains_protein():
            self.find_missing_atoms(out=null_out())
            if (flags.nqh):
                self.nqh_flips = clashscore.nqh_flips(
                    pdb_hierarchy=pdb_hierarchy)
        if (pdb_hierarchy.contains_rna() and flags.rna
                and libtbx.env.has_module(name="suitename")):
            if (geometry_restraints_manager is not None):
                self.rna = rna_validate.rna_validation(
                    pdb_hierarchy=pdb_hierarchy,
                    geometry_restraints_manager=geometry_restraints_manager,
                    outliers_only=outliers_only,
                    params=None)
        if (flags.model_stats) and (xray_structure is not None):
            self.model_stats = model_properties.model_statistics(
                pdb_hierarchy=pdb_hierarchy,
                xray_structure=xray_structure,
                all_chain_proxies=all_chain_proxies,
                ignore_hd=(not nuclear),
                ligand_selection=ligand_selection)
        if (geometry_restraints_manager is not None) and (flags.restraints):
            assert (xray_structure is not None)
            self.restraints = restraints.combined(
                pdb_hierarchy=pdb_hierarchy,
                xray_structure=xray_structure,
                geometry_restraints_manager=geometry_restraints_manager,
                ignore_hd=(not nuclear),
                cdl=getattr(all_chain_proxies, "use_cdl", None))
        if (sequences is not None) and (flags.seq):
            self.sequence = sequence.validation(
                pdb_hierarchy=pdb_hierarchy,
                sequences=sequences,
                log=null_out(),
                include_secondary_structure=True,
                extract_coordinates=True)

        if (fmodel is not None):
            if (use_pdb_header_resolution_cutoffs) and (header_info
                                                        is not None):
                fmodel = fmodel.resolution_filter(d_min=header_info.d_min,
                                                  d_max=header_info.d_max)
            if (flags.rfactors):
                self.data_stats = experimental.data_statistics(
                    fmodel,
                    raw_data=raw_data,
                    n_bins=n_bins_data,
                    count_anomalous_pairs_separately=
                    count_anomalous_pairs_separately)

            if (not use_maps):  # if maps are used, keep previous results
                if (flags.real_space):
                    self.real_space = experimental.real_space(
                        model=model, fmodel=fmodel, cc_min=min_cc_two_fofc)
                if (flags.waters):
                    self.waters = waters.waters(pdb_hierarchy=pdb_hierarchy,
                                                xray_structure=xray_structure,
                                                fmodel=fmodel,
                                                collect_all=True)

            if (unmerged_data is not None):
                self.merging = experimental.merging_and_model_statistics(
                    f_obs=fmodel.f_obs(),
                    f_model=fmodel.f_model(),
                    r_free_flags=fmodel.r_free_flags(),
                    unmerged_i_obs=unmerged_data,
                    anomalous=count_anomalous_pairs_separately,
                    use_internal_variance=use_internal_variance,
                    n_bins=n_bins_data)
            if (flags.xtriage):
                import mmtbx.scaling.xtriage
                f_model = abs(
                    fmodel.f_model()).set_observation_type_xray_amplitude()
                if (raw_data is not None):
                    f_model, obs = f_model.common_sets(other=raw_data)
                else:
                    obs = fmodel.f_obs()
                self.xtriage = mmtbx.scaling.xtriage.xtriage_analyses(
                    miller_obs=obs,
                    miller_calc=f_model,
                    unmerged_obs=unmerged_data,  # XXX some redundancy here...
                    text_out=null_out())
        if (fmodel_neutron is not None) and (flags.rfactors):
            self.neutron_stats = experimental.data_statistics(
                fmodel_neutron,
                n_bins=n_bins_data,
                count_anomalous_pairs_separately=False)
        if (pdb_hierarchy.models_size() == 1):
            self._multi_criterion = multi_criterion_view(pdb_hierarchy)

        # wilson B
        self.wilson_b = None
        if (fmodel is not None):
            self.wilson_b = fmodel.wilson_b()
        elif (fmodel_neutron is not None):
            self.wilson_b = fmodel_neutron.wilson_b()

        # validate hydrogens
        self.hydrogens = None
        if self.model is not None and self.model.has_hd():
            # import here to avoid circular import issues
            from mmtbx.hydrogens.validate_H import validate_H, validate_H_results
            hydrogens = validate_H(model, nuclear)
            hydrogens.validate_inputs()
            hydrogens.run()
            self.hydrogens = validate_H_results(hydrogens.get_results())

        # write probe file if needed (CLI and GUI)
        if (save_probe_unformatted_file is not None):
            pcm = self.clashes.probe_clashscore_manager
            try:
                with open(save_probe_unformatted_file, 'w') as f:
                    f.write(pcm.probe_unformatted)
                self.clashes.probe_file = save_probe_unformatted_file
            except IOError as err:
                raise Sorry('%s could not be written correctly.\n%s' %
                            (save_probe_unformatted_file, err))
Esempio n. 20
0
    def _silhouette_analysis(self, cluster_labels, linkage_matrix, n_clusters,
                             min_silhouette_score):
        """Compare valid equal-sized clustering using silhouette scores.

        Args:
          cluster_labels (np.ndarray):
          linkage_matrix (np.ndarray): The hierarchical clustering of centroids of the
            initial clustering as produced by
            :func:`scipy.cluster.hierarchy.linkage`.
          n_clusters (int): Optionally override the automatic determination of the
            number of clusters.
          min_silhouette_score (float): The minimum silhouette score to be used
            in automatic determination of the number of clusters.

        Returns:
          cluster_labels (np.ndarray): A label for each coordinate.
        """
        eps = 1e-6

        cluster_labels_input = cluster_labels
        distances = linkage_matrix[::, 2]
        distances = np.insert(distances, 0, 0)
        silhouette_scores = []
        thresholds = []
        threshold_n_clusters = []
        for threshold in distances[1:]:
            cluster_labels = copy.deepcopy(cluster_labels_input)
            labels = hierarchy.fcluster(linkage_matrix,
                                        threshold - eps,
                                        criterion="distance").tolist()
            counts = [labels.count(l) for l in set(labels)]
            if len(set(counts)) > 1:
                # only equal-sized clusters are valid
                continue

            n = len(set(labels))
            if n == 1:
                continue
            elif n_clusters is not Auto and n != n_clusters:
                continue
            for i in range(len(labels)):
                cluster_labels[cluster_labels_input == i] = int(labels[i] - 1)
            if len(np.unique(cluster_labels)) == self.coords.shape[0]:
                # silhouette coefficient not defined if 1 dataset per cluster
                # not sure what the default value should be
                sample_silhouette_values = np.full(cluster_labels.size(), 0)
            else:
                # Compute the silhouette scores for each sample
                sample_silhouette_values = metrics.silhouette_samples(
                    self.coords, cluster_labels, metric="cosine")
            silhouette_avg = sample_silhouette_values.mean()
            silhouette_scores.append(silhouette_avg)
            thresholds.append(threshold)
            threshold_n_clusters.append(n)

            count_negative = (sample_silhouette_values < 0).sum()
            logger.info("Clustering:")
            logger.info("  Number of clusters: %i", n)
            logger.info(
                "  Threshold score: %.3f (%.1f deg)",
                threshold,
                math.degrees(math.acos(1 - threshold)),
            )
            logger.info("  Silhouette score: %.3f", silhouette_avg)
            logger.info(
                "  -ve silhouette scores: %.1f%%",
                100 * count_negative / sample_silhouette_values.size,
            )

        if n_clusters is Auto:
            idx = np.argmin(silhouette_scores)
        else:
            idx = threshold_n_clusters.index(n_clusters)
            if idx is None:
                raise Sorry("No valid clustering with %i clusters" %
                            n_clusters)

        if n_clusters is Auto and silhouette_scores[idx] < min_silhouette_score:
            # assume single cluster
            cluster_labels = np.zeros(cluster_labels.size)
        else:
            threshold = thresholds[idx] - eps
            labels = hierarchy.fcluster(linkage_matrix,
                                        threshold,
                                        criterion="distance")
            cluster_labels = np.full(self.coords.shape[0], -1, dtype=int)
            for i in range(len(labels)):
                cluster_labels[cluster_labels_input == i] = labels[i] - 1

        return cluster_labels, threshold
  def __init__(self,
               model, # shifted, with shift_manager
               map_data = None, # shifted map_data
               params=None,
               log=sys.stdout,
               verbose=True):
    t_0 = time()
    self.model = model
    # self.cif_objects = cif_objects
    self.params = params
    self.log = log
    self.verbose = verbose

    # self.shift_manager = self.model.get_shift_manager()

    self.rmsd_from_start = None
    self.init_model_statistics = None
    self.init_gm_model_statistics = None
    self.after_ss_idealization = None
    self.after_loop_idealization = None
    self.after_rotamer_fixing = None
    self.final_model_statistics = None
    self.user_supplied_map = map_data
    self.reference_map = None # Whole map for all NCS copies
    self.master_map = None # Map for only one NCS copy, or == reference_map if no NCS
    self.init_ref_map = None # separate map for initial GM. Should be tighter than the 2 above

    params = mmtbx.model.manager.get_default_pdb_interpretation_params()
    params.pdb_interpretation.clash_guard.nonbonded_distance_threshold=None

    params.pdb_interpretation.peptide_link.ramachandran_restraints = True
    params.pdb_interpretation.peptide_link.restrain_rama_outliers = self.params.restrain_rama_outliers
    params.pdb_interpretation.peptide_link.restrain_rama_allowed = self.params.restrain_rama_allowed
    params.pdb_interpretation.peptide_link.restrain_allowed_outliers_with_emsley = self.params.restrain_allowed_outliers_with_emsley
    params.pdb_interpretation.peptide_link.rama_weight = self.params.rama_weight
    params.pdb_interpretation.peptide_link.oldfield.weight_scale=self.params.oldfield.weight_scale
    params.pdb_interpretation.peptide_link.oldfield.plot_cutoff=self.params.oldfield.plot_cutoff

    params.pdb_interpretation.peptide_link.apply_peptide_plane = True
    if self.params.loop_idealization.make_all_trans:
      params.pdb_interpretation.peptide_link.apply_all_trans = self.params.apply_all_trans
    params.pdb_interpretation.nonbonded_weight = self.params.nonbonded_weight
    params.pdb_interpretation.c_beta_restraints=True
    params.pdb_interpretation.max_reasonable_bond_distance = None
    params.pdb_interpretation.ncs_search.enabled = True
    params.pdb_interpretation.ncs_search.chain_max_rmsd=4.0
    params.pdb_interpretation.ncs_search.chain_similarity_threshold=0.99
    params.pdb_interpretation.ncs_search.residue_match_radius=999.0
    params.pdb_interpretation.restraints_library.rdl = True
    params.pdb_interpretation.secondary_structure = self.params.secondary_structure
    self.params_for_model = params
    self.model.set_pdb_interpretation_params(params)


    self.original_hierarchy = self.model.get_hierarchy().deep_copy() # original pdb_h, without any processing
    self.original_boxed_hierarchy = None # original and boxed (if needed)

    self.filtered_ncs_restr_group_list = []

    self.init_ss_annotation = self.model.get_ss_annotation()

    # various checks, shifts, trims
    self.cs = self.original_cs = self.model.crystal_symmetry()
    if self.model.get_shift_manager() is not None:
      self.cs = self.model.get_shift_manager().box_crystal_symmetry

    # check self.cs (copy-paste from secondary_sturcure_restraints)
    corrupted_cs = False
    if self.cs is not None:
      if [self.cs.unit_cell(), self.cs.space_group()].count(None) > 0:
        corrupted_cs = True
        self.cs = None
      elif self.cs.unit_cell().volume() < 10:
        corrupted_cs = True
        self.cs = None
    # couple checks if pdb_h is ok
    o_c = self.original_hierarchy.overall_counts()
    o_c.raise_duplicate_atom_labels_if_necessary()
    # o_c.raise_residue_groups_with_multiple_resnames_using_same_altloc_if_necessary()
    o_c.raise_chains_with_mix_of_proper_and_improper_alt_conf_if_necessary()
    o_c.raise_improper_alt_conf_if_necessary()
    if len(self.original_hierarchy.models()) > 1:
      raise Sorry("Multi model files are not supported")
    ca_only_present = False
    for c in self.original_hierarchy.only_model().chains():
      if c.is_ca_only():
        ca_only_present = True
    if ca_only_present:
      raise Sorry("Don't support models with chains containing only CA atoms.")

    self.original_boxed_hierarchy = self.model.get_hierarchy().deep_copy()
    self.shift_vector = None
    if self.cs is None:
      assert self.model.get_shift_manager() is None
      # should it happen here?
      if corrupted_cs:
        print >> self.log, "Symmetry information is corrupted, "
      else:
        print >> self.log, "Symmetry information was not found, "
      print >> self.log, "putting molecule in P1 box."
      self.log.flush()
      from cctbx import uctbx
      box = uctbx.non_crystallographic_unit_cell_with_the_sites_in_its_center(
        sites_cart=self.model.get_sites_cart(),
        buffer_layer=3)
      # Creating new xrs from box, inspired by extract_box_around_model_and_map
      sp = crystal.special_position_settings(box.crystal_symmetry())
      sites_frac = box.sites_frac()
      xrs_box = self.model.get_xray_structure().replace_sites_frac(box.sites_frac())
      xray_structure_box = xray.structure(sp, xrs_box.scatterers())
      self.model.set_xray_structure(xray_structure_box)
      self.cs = box.crystal_symmetry()
      self.shift_vector = box.shift_vector

    if self.shift_vector is not None and self.params.debug:
      txt = self.model.model_as_pdb()
      with open("%s_boxed.pdb" % self.params.output_prefix, 'w') as f:
        f.write(txt)

    if self.params.trim_alternative_conformations:
      self.model.remove_alternative_conformations(always_keep_one_conformer=True)

    self.model = self.model.remove_hydrogens()
    self.model_h = None

    self.time_for_init = time()-t_0
Esempio n. 22
0
                user_phil.append(libtbx.phil.parse(file_name=file_name))
            elif ext in [".pkl", ".pickle"]:
                input_string = "run_file = %s" % arg
                user_phil.append(libtbx.phil.parse(input_string))
        else:
            try:
                arg_phil = libtbx.phil.parse(arg)
            except RuntimeError, e:
                print e
            else:
                user_phil.append(arg_phil)
    working_phil = process_master_phil.fetch(sources=user_phil)
    params = working_phil.extract()
    if params.run_file is None:
        working_phil.show()
        raise Sorry("Pickled target function run_file not defined.")
    target = easy_pickle.load(params.run_file)
    server = detached_process_server(target, params=params)
    server.run()


########################################################################
# testing classes (see tst_runtime_utils.py for usage)
class simple_client(detached_process_client):
    def __init__(self, *args, **kwds):
        self.n_cb = 0
        self.out = cStringIO.StringIO()
        detached_process_client.__init__(self, *args, **kwds)

    def callback_error(self, error, traceback_info):
        raise error
Esempio n. 23
0
    def __init__(
            self,
            map_manager,
            model=None,
            target_ncs_au_model=None,
            regions_to_keep=None,
            solvent_content=None,
            resolution=None,
            sequence=None,
            molecular_mass=None,
            symmetry=None,
            chain_type='PROTEIN',
            keep_low_density=True,  # default from map_box
            box_cushion=5,
            soft_mask=True,
            mask_expand_ratio=1,
            wrapping=None,
            log=None):

        self.model_can_be_outside_bounds = None  # not used but required to be set

        self._map_manager = map_manager
        self._model = model

        self._mask_data = None

        self._force_wrapping = wrapping
        if wrapping is None:
            wrapping = self.map_manager().wrapping()
        self.basis_for_boxing_string = 'around_unique, wrapping = %s' % (
            wrapping)

        if log is None:
            log = null_out()  # Print only if a log is supplied

        assert isinstance(map_manager, iotbx.map_manager.map_manager)
        assert self._map_manager.map_data().accessor().origin() == (0, 0, 0)
        assert resolution is not None
        if model is not None:
            assert isinstance(model, mmtbx.model.manager)
            assert map_manager.is_compatible_model(model)
        if self.map_manager().wrapping():  # map must be entire unit cell
            assert map_manager.unit_cell_grid == map_manager.map_data().all()

        # Get crystal_symmetry
        self.crystal_symmetry = map_manager.crystal_symmetry()
        # Convert to map_data

        from cctbx.maptbx.segment_and_split_map import run as segment_and_split_map
        assert self._map_manager.map_data().origin() == (0, 0, 0)

        args = []

        ncs_group_obj, remainder_ncs_group_obj, tracking_data  = \
          segment_and_split_map(args,
            map_data = self._map_manager.map_data(),
            crystal_symmetry = self.crystal_symmetry,
            ncs_obj = self._map_manager.ncs_object(),
            target_model = target_ncs_au_model,
            write_files = False,
            auto_sharpen = False,
            add_neighbors = False,
            density_select = False,
            save_box_map_ncs_au = True,
            resolution = resolution,
            solvent_content = solvent_content,
            chain_type = chain_type,
            sequence = sequence,
            molecular_mass = molecular_mass,
            symmetry = symmetry,
            keep_low_density = keep_low_density,
            regions_to_keep = regions_to_keep,
            box_buffer = box_cushion,
            soft_mask_extract_unique = soft_mask,
            mask_expand_ratio = mask_expand_ratio,
            out = log)

        from scitbx.matrix import col

        if not hasattr(tracking_data, 'box_mask_ncs_au_map_data'):
            raise Sorry(" Extraction of unique part of map failed...")

        ncs_au_mask_data = tracking_data.box_mask_ncs_au_map_data

        lower_bounds = ncs_au_mask_data.origin()
        upper_bounds = tuple(col(ncs_au_mask_data.focus()) - col((1, 1, 1)))

        print("\nBounds for unique part of map: %s to %s " %
              (str(lower_bounds), str(upper_bounds)),
              file=log)

        # shift the map so it is in the same position as the box map will be in
        ncs_au_mask_data.reshape(flex.grid(ncs_au_mask_data.all()))
        assert col(ncs_au_mask_data.all()) == \
            col(upper_bounds)-col(lower_bounds)+col((1, 1, 1))

        self.gridding_first = lower_bounds
        self.gridding_last = upper_bounds

        # Ready with gridding...set up shifts and box crystal_symmetry
        self.set_shifts_and_crystal_symmetry()

        # Apply boxing to model, ncs, and map (if available)
        self.apply_to_model_ncs_and_map()

        # Note that at this point, self._map_manager has been boxed
        assert ncs_au_mask_data.all() == self._map_manager.map_data().all()
        self._mask_data = ncs_au_mask_data

        # Now separately apply the mask to the boxed map
        self.apply_around_unique_mask(self._map_manager,
                                      resolution=resolution,
                                      soft_mask=soft_mask)
Esempio n. 24
0
 def callback_aborted(self):
     raise Sorry("aborted as planned.")
Esempio n. 25
0
def get_range(value_list,
              threshold=None,
              ignore_ends=True,
              keep_near_ends_frac=0.02,
              half_height_width=2.,
              get_half_height_width=None,
              cutoff_ratio=4,
              ratio_max=0.5):  # XXX May need to set cutoff_ratio and
    #  ratio_max lower.
    # ignore ends allows ignoring the first and last points which may be off
    # if get_half_height_width, find width at half max hieght, go
    #  half_height_width times this width out in either direction, use that as
    #  baseline instead of full cell. Don't do it if the height at this point
    #  is over cutoff_ratio times threshold above original baseline.
    if get_half_height_width:
        z_min, z_max = get_range(value_list,
                                 threshold=0.5,
                                 ignore_ends=ignore_ends,
                                 keep_near_ends_frac=keep_near_ends_frac,
                                 get_half_height_width=False)
        z_mid = 0.5 * (z_min + z_max)
        z_width = 0.5 * (z_max - z_min)
        z_low = z_mid - 2 * z_width
        z_high = z_mid + 2 * z_width
        if ignore_ends:
            i_max = value_list.size() - 2
            i_min = 1
        else:
            i_max = value_list.size() - 1
            i_min = 0

        i_low = max(i_min, min(i_max, int(0.5 + z_low * value_list.size())))
        i_high = max(i_min, min(i_max, int(0.5 + z_high * value_list.size())))
        min_value = value_list.min_max_mean().min
        max_value = value_list.min_max_mean().max
        ratio_low = (value_list[i_low] - min_value) / max(
            1.e-10, (max_value - min_value))
        ratio_high = (value_list[i_high] - min_value) / max(
            1.e-10, (max_value - min_value))
        if ratio_low <=  cutoff_ratio*threshold and ratio_low >0 \
             and ratio_low<ratio_max\
             and ratio_high <=  cutoff_ratio*threshold and ratio_high > 0 \
             and ratio_high < ratio_max:
            ratio = min(ratio_low, ratio_high)
            z_min, z_max = get_range(value_list,
                                     threshold=threshold + ratio,
                                     ignore_ends=ignore_ends,
                                     keep_near_ends_frac=keep_near_ends_frac,
                                     get_half_height_width=False)
            return z_min, z_max
        else:
            z_min, z_max = get_range(value_list,
                                     threshold=threshold,
                                     ignore_ends=ignore_ends,
                                     keep_near_ends_frac=keep_near_ends_frac,
                                     get_half_height_width=False)
            return z_min, z_max

    if threshold is None: threshold = 0
    n_tot = value_list.size()
    assert n_tot > 0
    min_value = value_list.min_max_mean().min
    max_value = value_list.min_max_mean().max
    cutoff = min_value + (max_value - min_value) * threshold
    if ignore_ends:
        i_off = 1
    else:
        i_off = 0
    i_low = None
    for i in range(i_off, n_tot - i_off):
        if value_list[i] > cutoff:
            i_low = max(i_off, i - 1)
            break
    i_high = None
    for i in range(i_off, n_tot - i_off):
        ii = n_tot - 1 - i
        if value_list[ii] > cutoff:
            i_high = min(n_tot - 1 - i_off, ii + 1)
            break
    if i_low is None or i_high is None:
        raise Sorry("Cannot auto-select region...")
    if i_low / n_tot < keep_near_ends_frac: i_low = 0
    if (n_tot - 1 - i_high) / n_tot < keep_near_ends_frac: i_high = n_tot - 1
    return i_low / n_tot, i_high / n_tot
Esempio n. 26
0
 def as_table1_column(self,
                      label,
                      wavelength,
                      log,
                      re_compute_r_factors=Auto):
     """
 Extract information for display in the traditional 'Table 1' of
 crystallographic statistics in structure articles.
 """
     outer_shell = None
     data_stats = self.data_stats
     if (data_stats is None):
         data_stats = dummy_validation()
     merging_stats = dummy_validation()
     merging_outer = dummy_validation()
     n_refl_uniq = data_stats.n_refl
     n_refl_refine = data_stats.n_refl_refine
     n_free = data_stats.n_free
     completeness = data_stats.completeness
     completeness_outer = data_stats.completeness_outer
     d_max_min = self.d_max_min()
     d_max, d_min = d_max_min
     if (self.merging is not None):
         merging_stats = self.merging.overall
         merging_outer = self.merging.bins[-1]
         n_refl_uniq = merging_stats.n_uniq
         epsilon = 0.001
         if ((merging_stats.d_min > d_min + 2 * epsilon)
                 or (merging_stats.d_max < d_max - 2 * epsilon)):
             raise Sorry((
                 "Resolution limits for unmerged data in the structure " +
                 "'%s' do not cover the " +
                 "full range present in the merged data: %g - %g (merged) versus "
                 + "%g - %g (unmerged)") %
                         (label, d_max, d_min, merging_stats.d_max,
                          merging_stats.d_min))
     r_work = self.r_work()
     r_free = self.r_free()
     n_tls_groups = None
     if (self.header_info is not None):
         if (self.header_info.n_tls_groups > 0):
             n_tls_groups = self.header_info.n_tls_groups
         use_header_values = (not re_compute_r_factors or
                              (not self.header_info.is_phenix_refinement()
                               and (re_compute_r_factors is Auto)))
         r_work, r_free, warned = rfactor_sanity_check(
             r_work_pdb=self.header_info.r_work,
             r_free_pdb=self.header_info.r_free,
             r_work_fmodel=r_work,
             r_free_fmodel=r_free,
             out=log,
             structure_name=label,
             re_compute_r_factors=not use_header_values)
         if (use_header_values):
             n_refl_refine = data_stats.n_refl
     adp_result = self.adp_stats.result()
     adp_mean = [None for i in range(4)]
     for i, prop in enumerate(['overall', 'protein', 'other', 'water']):
         if getattr(adp_result, prop) is not None:
             adp_mean[i] = getattr(adp_result, prop).mean
     return iotbx.table_one.column(
         label=label,
         space_group=self.space_group_info(),
         unit_cell=self.unit_cell().parameters(),
         # data properties
         wavelength=wavelength,
         d_max_min=d_max_min,
         n_refl_all=merging_stats.n_obs,
         n_refl=n_refl_uniq,
         multiplicity=merging_stats.mean_redundancy,
         completeness=completeness * 100.0,
         i_over_sigma=merging_stats.i_over_sigma_mean,
         wilson_b=data_stats.wilson_b,
         r_sym=merging_stats.r_merge,
         r_meas=merging_stats.r_meas,
         r_pim=merging_stats.r_pim,
         cc_one_half=merging_stats.cc_one_half,
         cc_star=merging_stats.cc_star,
         # refinement
         n_refl_refine=n_refl_refine,
         n_free=n_free,
         r_work=r_work,
         r_free=r_free,
         cc_work=merging_stats.cc_work,
         cc_free=merging_stats.cc_free,
         # model properties
         n_atoms=self.model_stats_new.result().n_atoms -
         self.model_stats_new.result().n_hd,
         n_macro_atoms=self.model_stats_new.result().n_protein_atoms +
         self.model_stats_new.result().n_nucleotide_atoms,
         n_ligand_atoms=self.model_stats_new.result().n_other_atoms,
         n_waters=self.model_stats_new.result().n_water_atoms,
         n_residues=self.model_stats_new.result().n_protein,
         bond_rmsd=self.rms_bonds(),
         angle_rmsd=self.rms_angles(),
         rama_favored=self.rama_favored(),
         rama_allowed=self.rama_allowed(),
         rama_outliers=self.rama_outliers(),
         rota_outliers=self.rota_outliers(),
         clashscore=self.clashscore(),
         adp_mean=adp_mean[0],
         adp_mean_mm=adp_mean[1],
         adp_mean_lig=adp_mean[2],
         adp_mean_wat=adp_mean[3],
         n_tls_groups=n_tls_groups,
         anomalous_flag=data_stats.anomalous_flag,
     ).add_outer_shell(
         # XXX we need a consistency check here as well
         d_max_min=(data_stats.d_max_outer, data_stats.d_min_outer),
         n_refl=data_stats.n_refl_outer,
         n_refl_all=merging_outer.n_obs,
         n_refl_refine=data_stats.n_refl_refine_outer,
         n_free=data_stats.n_free_outer,
         cc_one_half=merging_outer.cc_one_half,
         cc_star=merging_outer.cc_star,
         r_sym=merging_outer.r_merge,
         r_meas=merging_outer.r_meas,
         r_pim=merging_outer.r_pim,
         i_over_sigma=merging_outer.i_over_sigma_mean,
         multiplicity=merging_outer.mean_redundancy,
         completeness=completeness_outer * 100,
         cc_work=merging_outer.cc_work,
         cc_free=merging_outer.cc_free,
         r_work=data_stats.r_work_outer,
         r_free=data_stats.r_free_outer)
Esempio n. 27
0
    def run(self):
        ''' Run the script. '''
        from dials.algorithms.profile_model.factory import ProfileModelFactory
        from dials.util.command_line import Command
        from dials.array_family import flex
        from dials.util.options import flatten_reflections, flatten_experiments
        from dxtbx.model.experiment_list import ExperimentListDumper
        from libtbx.utils import Sorry
        from dials.util import log

        log.config()

        # Parse the command line
        params, options = self.parser.parse_args(show_diff_phil=True)
        reflections = flatten_reflections(params.input.reflections)
        experiments = flatten_experiments(params.input.experiments)
        if len(reflections) == 0 and len(experiments) == 0:
            self.parser.print_help()
            return
        if len(reflections) != 1:
            raise Sorry('exactly 1 reflection table must be specified')
        if len(experiments) == 0:
            raise Sorry('no experiments were specified')
        if (not 'background.mean'
                in reflections[0]) and params.subtract_background:
            raise Sorry(
                'for subtract_background need background.mean in reflections')

        reflections, _ = self.process_reference(reflections[0], params)

        # Check pixels don't belong to neighbours
        self.filter_reference_pixels(reflections, experiments)

        # Predict the reflections
        logger.info("")
        logger.info("=" * 80)
        logger.info("")
        logger.info("Predicting reflections")
        logger.info("")
        predicted = flex.reflection_table.from_predictions_multi(
            experiments,
            dmin=params.prediction.d_min,
            dmax=params.prediction.d_max,
            margin=params.prediction.margin,
            force_static=params.prediction.force_static,
            padding=params.prediction.padding)

        # Match with predicted
        matched, reflections, unmatched = predicted.match_with_reference(
            reflections)
        assert (len(matched) == len(predicted))
        assert (matched.count(True) <= len(reflections))
        if matched.count(True) == 0:
            raise Sorry('''
        Invalid input for reference reflections.
        Zero reference spots were matched to predictions
      ''')
        elif len(unmatched) != 0:
            logger.info('')
            logger.info('*' * 80)
            logger.info(
                'Warning: %d reference spots were not matched to predictions' %
                (len(unmatched)))
            logger.info('*' * 80)
            logger.info('')

        # Create the profile model
        experiments = ProfileModelFactory.create(params, experiments,
                                                 reflections)
        for model in experiments:
            sigma_b = model.profile.sigma_b(deg=True)
            sigma_m = model.profile.sigma_m(deg=True)
            if type(sigma_b) == type(1.0):
                logger.info('Sigma B: %f' % sigma_b)
                logger.info('Sigma M: %f' % sigma_m)
            else:  # scan varying
                mean_sigma_b = sum(sigma_b) / len(sigma_b)
                mean_sigma_m = sum(sigma_m) / len(sigma_m)
                logger.info('Sigma B: %f' % mean_sigma_b)
                logger.info('Sigma M: %f' % mean_sigma_m)

        # Wrtie the parameters
        Command.start("Writing experiments to %s" % params.output)
        dump = ExperimentListDumper(experiments)
        with open(params.output, "w") as outfile:
            outfile.write(dump.as_json())
        Command.end("Wrote experiments to %s" % params.output)
Esempio n. 28
0
def run(args,
        out=sys.stdout,
        auto_extract_labels=True,
        use_current_directory_if_not_specified=False,
        warn=True):
    master_params = libtbx.phil.parse(master_phil_str, process_includes=True)
    if (len(args) == 0):
        print("""\
************************************************************************
  phenix.table_one - statistics harvesting for publication
************************************************************************

  note: this is somewhat difficult to configure on the command line at
        present; you may find it more convenient to use the PHENIX GUI.

""",
              file=out)
        print("# Parameter template for phenix.table_one:", file=out)
        master_params.show(out=out)
        print("# (the 'structure' scope may be copied as many times as ",
              file=out)
        print("#  necessary to handle multiple datasets.)", file=out)
        print("# Alternate usage:", file=out)
        print("#   phenix.table_one model.pdb data.mtz [logfile]*", file=out)
        return None
    if (warn):
        print("""
  note: this is somewhat difficult to configure on the command line at
        present; you may find it more convenient to use the PHENIX GUI.
    """,
              file=out)
        time.sleep(2)
    master_parmas = libtbx.phil.parse(master_phil_str)
    interpreter = libtbx.phil.command_line.argument_interpreter(
        master_phil=master_params, home_scope="table_one")
    file_phil = []
    cmdline_phil = []
    pdb_file = None
    mtz_file = None
    unmerged_data = None
    log_files = []
    for arg in args:
        if os.path.isfile(arg):
            f = file_reader.any_file(arg)
            if (f.file_type == "phil"):
                file_phil.append(f.file_object)
            elif (f.file_type == "pdb"):
                pdb_file = f.file_name
            elif (f.file_type == "hkl"):
                mtz_file = f.file_name
            elif (f.file_type == "txt"):
                log_files.append(f.file_name)
        else:
            if arg.startswith("unmerged_data="):
                unmerged_data = os.path.abspath("=".join(arg.split("=")[1:]))
                continue
            if arg.startswith("--"):
                arg = arg[2:] + "=True"
            try:
                arg_phil = interpreter.process(arg=arg)
            except RuntimeError:
                print("Ignoring unknown argument %s" % arg, file=out)
            else:
                cmdline_phil.append(arg_phil)
    working_phil = master_params.fetch(sources=file_phil + cmdline_phil)
    params = working_phil.extract()
    if (pdb_file is not None):
        if (len(params.table_one.structure) > 0):
            raise Sorry(
                "You already have a structure defined in the parameter " +
                "file; to add structures, you should edit the parameters instead of "
                +
                "specifying additional PDB and data files on the command line."
            )
        if (mtz_file is None):
            raise Sorry(
                "You have supplied a PDB file, but no corresponding MTZ " +
                "file.")
        log_file_str = "\n".join(["log_file=%s" % f for f in log_files])
        structure_params = libtbx.phil.parse(structure_params_str)
        new_structure = structure_params.extract().structure[0]
        new_structure.pdb_file = pdb_file
        new_structure.mtz_file = mtz_file
        new_structure.unmerged_data = unmerged_data
        params.table_one.structure.append(new_structure)
    if auto_extract_labels:
        extract_labels(params.table_one, out=out)
    if use_current_directory_if_not_specified:
        if (params.table_one.output.directory is None):
            params.table_one.output.directory = os.getcwd()
    validate_params(params)
    if (params.table_one.multiprocessing.nproc is None):
        params.table_one.multiprocessing.nproc = 1
    final_phil = master_params.format(python_object=params)
    if params.table_one.output.verbose:
        print("", file=out)
        print("#Final effective parameters:", file=out)
        final_phil.show(out=out)
        print("#---end", file=out)
        print("", file=out)
    with open("table_one.eff", "w") as f:
        final_phil.show(out=f)
    table1 = table_one(params.table_one, out=out)
    easy_pickle.dump("%s.pkl" % params.table_one.output.base_name, table1)
    table1.save_multiple(file_base=params.table_one.output.base_name,
                         formats=params.table_one.output.format)
    return table1
Esempio n. 29
0
def run(args,
        command_name="phenix.cif_as_mtz",
        out=sys.stdout,
        return_as_miller_arrays=False):
    if (len(args) == 0): args = ["--help"]
    try:
        command_line = (iotbx_option_parser(
            usage="%s [reflection_cif_file] [options]" % command_name,
            description='Example: %s r1o9ksf.ent --symmetry=pdb1o9k.ent' %
            command_name
        ).enable_symmetry_comprehensive().option(
            None,
            "--output_file_name",
            action="store",
            default=False,
            type="string",
            help="Output mtz file name."
        ).option(
            None,
            "--wavelength_id",
            action="store",
            default=None,
            type="int",
            help="Extract data set with given wavelength_id."
        ).option(
            None,
            "--crystal_id",
            action="store",
            default=None,
            type="int",
            help="Extract data set with given crystal_id."
        ).option(
            None,
            "--output_r_free_label",
            action="store",
            default="R-free-flags",
            type="string",
            help=
            "MTZ column label to use for R-free flags (default: R-free-flags)"
        ).option(
            None,
            "--merge",
            action="store_true",
            help="Merge non-unique data where present."
        ).option(
            None,
            "--incompatible_flags_to_work_set",
            action="store_true",
            help=
            "When merging place reflections with incompatible flags into the "
            "working set."
        ).option(
            None,
            "--remove_systematic_absences",
            action="store_true",
            help="Remove systematic absent reflections."
        ).option(
            None,
            "--map_to_asu",
            action="store_true",
            help="Map to asymmetric unit."
        ).option(
            "--show_details_if_error",
            action="store_true",
            help="Show data details for some errors."
        ).option(
            "--show_log",
            action="store_true",
            help="Show some output."
        ).option(
            "--ignore_bad_sigmas",
            action="store_true",
            help=
            "Set sigmas to None instead of raising an error when bad sigmas "
            "are present."
        ).option(
            "--extend_flags",
            action="store_true",
            help="Extend R-free flags to cover all reflections if necessary.")
                        ).process(args=args)
    except Exception as e:
        if (str(e) != "0"): print(str(e))
        sys.exit(0)
    crystal_symmetry = command_line.symmetry
    if (len(command_line.args) > 1):
        print("%d arguments are given from the command line:"% \
          len(command_line.args), command_line.args, file=out)
        raise Sorry("Please specify one reflection cif file.")
    file_name = command_line.args[0]
    if (not os.path.isfile(file_name)):
        raise Sorry("File is not found: %s" % file_name)
    output_r_free_label = command_line.options.output_r_free_label
    if ((not output_r_free_label[0] in string.ascii_uppercase)
            or (re.search("[^a-zA-Z0-9_\-]", output_r_free_label))):
        raise Sorry((
            "%s is not a suitable column label.  MTZ format requires " +
            "an uppercase letter as the first character, and only alphanumeric "
            + "characters or hyphens in the rest of the string.") %
                    output_r_free_label)
    result = process_files(
        file_name=file_name,
        crystal_symmetry=crystal_symmetry,
        output_file_name=command_line.options.output_file_name,
        wavelength_id=command_line.options.wavelength_id,
        crystal_id=command_line.options.crystal_id,
        show_details_if_error=command_line.options.show_details_if_error,
        output_r_free_label=command_line.options.output_r_free_label,
        merge_non_unique_under_symmetry=command_line.options.merge,
        map_to_asu=command_line.options.map_to_asu,
        remove_systematic_absences=command_line.options.
        remove_systematic_absences,
        incompatible_flags_to_work_set=command_line.options.
        incompatible_flags_to_work_set,
        return_as_miller_arrays=return_as_miller_arrays,
        ignore_bad_sigmas=command_line.options.ignore_bad_sigmas,
        extend_flags=command_line.options.extend_flags,
        log=out)
    if return_as_miller_arrays:
        return result
Esempio n. 30
0
def process_inputs(args, log=sys.stdout):
    print >> log, "-" * 79
    print >> log, "PProbe RUN at %s" % time.ctime()
    print >> log, "Processing all Inputs:"
    #process phils in order to not overwrite inputs with defaults
    #phil from above
    master_phil = phil.parse(master_params_str, process_includes=True)
    #map params from phenix defaults (phil)
    maps_phil = phil.parse(mmtbx.maps.map_and_map_coeff_params_str)
    search_phil = phil.parse(peak_search_param_str)

    #merge phil objects?
    total_phil = master_phil.fetch(sources=[maps_phil, search_phil])

    #inputs is somehow different -- object with specific params and lists of files
    #process after all phil?
    inputs = mmtbx.utils.process_command_line_args(args=args,
                                                   master_params=total_phil)

    #params object contains all command line parameters
    working_phil = inputs.params
    params = working_phil.extract()

    #check for master param dictionary
    if params.input.model_param.model_dict_file is None:
        params.input.model_param.model_dict_file = "pprobe_master.dict"
    if not os.path.isfile(params.input.model_param.model_dict_file):
        print >> log, "WARNING -- param file not found!"
        print >> log, "--> trying pprobe_master.dict . . . "
        params.input.model_param.model_dict_file = "pprobe_master.dict"
        if not os.path.isfile(params.input.model_param.model_dict_file):
            raise Sorry("Master Param Dictionary %s not found!" %
                        params.input.model_param.model_dict_file)

    if params.pprobe.extract:
        #check for proper PDB input
        #count up PDB files found
        pdb_count = len(inputs.pdb_file_names)
        for pdbin in (params.input.pdb.model_pdb, params.input.pdb.strip_pdb,
                      params.input.pdb.peaks_pdb):
            if pdbin is not None:
                pdb_count = pdb_count + 1
        if (pdb_count == 1) and (len(inputs.pdb_file_names) == 1):
            #one vanilla pdb to be used as model
            params.input.pdb.model_pdb = inputs.pdb_file_names[0]
        elif (pdb_count == 3) and (len(inputs.pdb_file_names) == 0):
            pass  #three explicit PDBs, hopefully correct
        else:
            raise Sorry("\n\tInput 1 PDB for automatic stripping and peak finding \n"+\
                        "\tor all PDB files specifically, like so: \n"+\
                        "\tfor explicit input: \n"+\
                        "\t\tmodel_pdb=XXX.pdb strip_pdb=YYY.pdb peaks_pdb=ZZZ.pdb \n"+\
                        "\tfor automatic pdb generation: \n"+\
                        "\t\tXXX.pdb")

        #check for proper reflection file input
        reflection_files = inputs.reflection_files
        if (len(reflection_files) == 0):
            raise Sorry("Reflection data or map coefficients required")
        if (len(reflection_files) > 1):
            raise Sorry("Only one type of reflection data can be entered \n"+\
                        "Enter map coefficients with map_coeff_file=XXX.mtz \n"+\
                        "or structure factor files as XXX.(any supported)")
        else:
            params.input.reflection_data.reflection_file_name = reflection_files[
                0].file_name()

        #filename setup
        model_basename = os.path.basename(
            params.input.pdb.model_pdb.split(".")[0])
        if (len(model_basename) > 0
                and params.output.output_file_name_prefix is None):
            params.output.output_file_name_prefix = model_basename
        if params.input.input_map.map_coeff_file is not None:
            params.input.parameters.write_maps = False
        new_params = master_phil.format(python_object=params)
        #okay, see if we're where we want to be
        print >> log, "Runtime Parameters:"
        new_params.show()

        #DATA PROCESSING
        #setup model pdb (required and should be known)
        crystal_symmetry = check_symmetry(inputs, params, log)
        model_pdb_input = iotbx.pdb.input(file_name=params.input.pdb.model_pdb)
        model_hier = model_pdb_input.construct_hierarchy()
        model_hier.remove_hd()
        model_xrs = model_hier.extract_xray_structure(
            crystal_symmetry=crystal_symmetry)

        #strip pdb if needed,write result
        if (params.input.pdb.strip_pdb is
                None) and (params.input.parameters.map_omit_mode != "asis"):
            strip_xrs, strip_hier = create_strip_pdb(
                model_hier, model_xrs, params.input.parameters.map_omit_mode,
                log)
            strip_filename = params.output.output_file_name_prefix + "_pprobe_strip.pdb"
            print >> log, "Writing Strip PDB to: ", strip_filename
            strip_hier.write_pdb_file(file_name=strip_filename,
                                      crystal_symmetry=crystal_symmetry,
                                      append_end=True,
                                      anisou=False)
            params.input.pdb.strip_pdb = strip_filename
        elif params.input.parameters.map_omit_mode == "asis":
            strip_xrs, strip_hier = model_xrs, model_hier
            params.input.pdb.strip_pdb = params.input.pdb.model_pdb
        else:
            strip_pdb_input = iotbx.pdb.input(
                file_name=params.input.pdb.strip_pdb)
            strip_hier = strip_pdb_input.construct_hierarchy()
            strip_hier.remove_hd()
            strip_xrs = strip_hier.extract_xray_structure(
                crystal_symmetry=crystal_symmetry)

        #Make maps if map_coefficients not input,write out by default
        if (params.input.input_map.map_coeff_file is None):

            hkl_in = file_reader.any_file(
                params.input.reflection_data.reflection_file_name,
                force_type="hkl")
            hkl_in.assert_file_type("hkl")
            reflection_files = [hkl_in.file_object]
            f_obs, r_free_flags = setup_reflection_data(
                inputs, params, crystal_symmetry, reflection_files, log)
            #maps object is list of miller arrays
            maps = create_pprobe_maps(f_obs, r_free_flags, params, strip_xrs,
                                      strip_hier, log)
            map_fname = params.output.output_file_name_prefix + "_pprobe_maps.mtz"
            print >> log, "Writing PProbe maps to MTZ file: ", map_fname
            maps.write_mtz_file(map_fname)
            params.input.input_map.map_coeff_file = params.output.output_file_name_prefix + "_pprobe_maps.mtz"
        else:
            print "READING MAP FILE: ", params.input.input_map.map_coeff_file
            #setup input map coefficients

            map_coeff = reflection_file_utils.extract_miller_array_from_file(
                file_name=params.input.input_map.map_coeff_file,
                label=params.input.input_map.map_diff_label,
                type="complex",
                log=null_log)

            if params.input.parameters.score_res is None:
                params.input.parameters.score_res = map_coeff.d_min()
                print >> log, "  Determined Resolution Limit: %.2f" % params.input.parameters.score_res
                print >> log, "    -->Override with \"score_res=XXX\""
            map_fname = params.input.input_map.map_coeff_file

        # if peaks not input, find and write to pdb
        if params.input.pdb.peaks_pdb is None:
            if params.input.parameters.map_omit_mode != "valsol":
                peaks_result = find_map_peaks(params, strip_xrs, log)
                pdb_str = peaks_pdb_str(peaks_result)
                peak_pdb = iotbx.pdb.input(source_info=None,
                                           lines=flex.split_lines(pdb_str))
                peak_hier = peak_pdb.construct_hierarchy()
                peak_filename = params.output.output_file_name_prefix + "_pprobe_peaks.pdb"
                print >> log, "Writing Peaks to %s:" % peak_filename
                peak_hier.write_pdb_file(file_name=peak_filename,
                                         crystal_symmetry=crystal_symmetry,
                                         append_end=True,
                                         anisou=False)
                params.input.pdb.peaks_pdb = peak_filename
            else:
                peak_filename = params.output.output_file_name_prefix + "_pprobe_peaks.pdb"
                peak_xrs, peak_hier = create_sol_pdb(
                    model_hier, model_xrs,
                    params.input.parameters.map_omit_mode, log)
                print >> log, "Writing Peaks to %s:" % peak_filename
                peak_hier.write_pdb_file(file_name=peak_filename,
                                         crystal_symmetry=crystal_symmetry,
                                         append_end=True,
                                         anisou=False)
                params.input.pdb.peaks_pdb = peak_filename

        #Wrap up, display file names and info for manual input
        #save parameters for next stage
        new_phil = working_phil.format(python_object=params)
        phil_fname = params.output.output_file_name_prefix + "_pprobe.param"
        f = open(phil_fname, "w")
        f.write(new_phil.as_str())
        f.close()
        print >> log, "_" * 79
        print >> log, "Inputs Processed, final files:"
        print >> log, "   Model PDB: ", params.input.pdb.model_pdb
        print >> log, "   Strip PDB: ", params.input.pdb.strip_pdb
        print >> log, "   Peaks PDB: ", params.input.pdb.peaks_pdb
        print >> log, "   Map Coeff: ", map_fname
        print >> log, "   Resolution: %.2f" % params.input.parameters.score_res
        print >> log, "   Params: ", phil_fname
        #also return params
        return params
    else:  #only rescoring from pkl
        #filename setup
        pkl_basename = os.path.basename(
            params.input.data_pkl.peak_dict.split(".")[0])
        if (len(pkl_basename) > 0
                and params.output.output_file_name_prefix is None):
            params.output.output_file_name_prefix = pkl_basename
            pkl_file = params.input.data_pkl.peak_dict
        if not os.path.isfile(pkl_file):
            raise Sorry("\n\tPKL input requested but no file available\n"+\
                        "\t\t\t cannot find %s" % pkl_file)
        new_phil = working_phil.format(python_object=params)
        phil_fname = params.output.output_file_name_prefix + "_pprobe.param"
        f = open(phil_fname, "w")
        f.write(new_phil.as_str())
        f.close()

        new_params = master_phil.format(python_object=params)
        print >> log, "Runtime Parameters:"
        new_params.show()
        return params