Example #1
0
def run(
    params=None, # params for running from command line
    map_data=None,  # map_data, as_double()
    pdb_inp=None,
    pdb_hierarchy=None,
    crystal_symmetry=None,
    resolution=None,
    scattering_table='n_gaussian',
    smoothing_window=5,
    crossover_atom='CA',
    minimum_matching_atoms=3,
    minimum_length=2,
    dist_max=1.0,
    minimum_improvement=0.01,
    max_regions_to_test=10,
    max_ends_per_region=5,
    maximum_fraction=0.5,
    max_keep=10,
    map_coeffs_file=None,map_coeffs_labels=None,
    pdb_in_file=None,
    pdb_out=None,
    verbose=None,
    out=sys.stdout):

  if out is None: out=sys.stdout # explode and refine calls it this way

  # get info from params if present
  if params:
     verbose=params.control.verbose
     map_coeffs_file=params.input_files.map_coeffs_file
     map_coeffs_labels=params.input_files.map_coeffs_labels
     pdb_in_file=params.input_files.pdb_in_file
     resolution=params.crystal_info.resolution
     scattering_table=params.crystal_info.scattering_table
     smoothing_window=params.crossover.smoothing_window
     crossover_atom=params.crossover.crossover_atom
     minimum_matching_atoms=params.crossover.minimum_matching_atoms
     minimum_length=params.crossover.minimum_length
     dist_max=params.crossover.dist_max
     minimum_improvement=params.crossover.minimum_improvement
     max_regions_to_test=params.crossover.max_regions_to_test
     max_ends_per_region=params.crossover.max_ends_per_region
     maximum_fraction=params.crossover.maximum_fraction
     max_keep=params.crossover.max_keep
     pdb_out=params.output_files.pdb_out

  # Consistency checks
  if(pdb_hierarchy is not None):
    assert pdb_in_file is None
    assert pdb_inp is None
    assert crystal_symmetry is not None
    # XXX more checks here!

  # Get map_data if not present
  if not map_data:
    if not map_coeffs_file or not os.path.isfile(map_coeffs_file):
      raise Sorry("Cannot find the map_coeffs_file '%s'" %(
        str(map_coeffs_file)))
    from mmtbx.building.minimize_chain import get_map_coeffs
    map_coeffs=get_map_coeffs(map_coeffs_file,
        map_coeffs_labels=map_coeffs_labels)

    fft_map = map_coeffs.fft_map(resolution_factor = 0.25)
    fft_map.apply_sigma_scaling()
    map_data = fft_map.real_map_unpadded()
    map_data=map_data.as_double()
    if map_coeffs and not crystal_symmetry:
      crystal_symmetry=map_coeffs.crystal_symmetry()
    if map_coeffs and not resolution:
      resolution=map_coeffs.d_min()

  # Get the starting model
  if(pdb_hierarchy is None):
    if pdb_inp is None:
      if not pdb_in_file or not os.path.isfile(pdb_in_file):
        raise Sorry("Cannot read input PDB file '%s'" %(
          str(pdb_in_file)))
      else:
        print("Taking models from %s" %(pdb_in_file), file=out)
        pdb_string=open(pdb_in_file).read()
      pdb_inp=iotbx.pdb.input(source_info=None, lines = pdb_string)
      if pdb_inp is None:
        raise Sorry("Need a model or models")
    if not crystal_symmetry:
      crystal_symmetry=pdb_inp.crystal_symmetry()
    assert crystal_symmetry is not None
    hierarchy = pdb_inp.construct_hierarchy()
  else:
    hierarchy = pdb_hierarchy # XXX FIXME
  n_models=0
  for model in hierarchy.models():
    n_models+=1

  if n_models==1:  # nothing to do
    return hierarchy

  #xrs = pdb_inp.xray_structure_simple(crystal_symmetry=crystal_symmetry)
  xrs = hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry)
  xrs.scattering_type_registry(table=scattering_table)
  if not resolution:
    from cctbx import maptbx
    resolution=maptbx.resolution_from_map_and_model.run(
      map_data=map_data, xray_structure=xrs).d_min
  if(resolution is None):
    raise Sorry("Resolution is required")
  print("\nResolution limit: %7.2f" %(resolution), file=out)
  print("\nSummary of input models", file=out)
  xrs.show_summary(f=out, prefix="  ")

  print("\nReady with %d models and map" %(n_models), file=out)
  # Get CC by residue for each model and map

  chain_id_and_resseq_list=[] # Instead set up chain_id and resseq (range)
  from mmtbx.secondary_structure.find_ss_from_ca import \
      split_model,get_first_resno, get_last_resno,get_chain_id
  model_list=split_model(hierarchy=hierarchy,only_first_model=True)
  for m in model_list:
    h=m.hierarchy
    first_resno=get_first_resno(h)
    last_resno=get_last_resno(h)
    chain_id=get_chain_id(h)
    residue_range=[first_resno,last_resno]
    chain_id_and_resseq=[chain_id,residue_range]
    if not chain_id_and_resseq in chain_id_and_resseq_list:
       chain_id_and_resseq_list.append(chain_id_and_resseq)

  # Run through chains separately
  # NOTE: All models of each chain must match exactly

  # Save composite model, chain by chain
  composite_model_stream=StringIO()

  for chain_id_and_resseq in chain_id_and_resseq_list:
    f=StringIO()
    chain_id,[start_resno,end_resno]=chain_id_and_resseq
    atom_selection=get_atom_selection(chain_id=chain_id,
      start_resno=start_resno,end_resno=end_resno)
    asc=hierarchy.atom_selection_cache()
    sel=asc.selection(string = atom_selection)
    sel_hierarchy=hierarchy.select(sel)
    pdb_inp=sel_hierarchy.as_pdb_input(crystal_symmetry=crystal_symmetry)
    ph=pdb_inp.construct_hierarchy()

    print("\nWorking on chain_id='%s' resseq %d:%d\n" %(
       chain_id_and_resseq[0],chain_id_and_resseq[1][0],chain_id_and_resseq[1][1]), file=out)

    # get CC values for all residues
    cc_dict=get_cc_dict(hierarchy=ph,map_data=map_data,d_min=resolution,
     crystal_symmetry=crystal_symmetry,
     table=scattering_table,out=out)

    # smooth CC values with window of smoothing_window
    smoothed_cc_dict=smooth_cc_values(cc_dict=cc_dict,
       smoothing_window=smoothing_window,
       verbose=verbose,out=out)

    # figure out all the places where crossover can occur.
    # FIXME: order of keys changes in py2/3 vthis could be bad
    n_residues=cc_dict[list(cc_dict.keys())[0]].size()

    crossover_dict=get_crossover_dict(
      n_residues=n_residues,
      hierarchy=ph,
      crossover_atom=crossover_atom,
      dist_max=dist_max,
      minimum_matching_atoms=minimum_matching_atoms,
      verbose=verbose,out=out)

    # Now we are ready to identify the best composite model...
    # A composite has reside 0 from model x, residue 1 from model y etc.
    # Each change from model a to model b between residues i and i+1 must have
    #  a crossover between a and b at either residue i or i+1

    keys=list(cc_dict.keys())
    keys.sort()

    sorted_working_model_list=[]
    for key in keys:
      working_model=model_object(source_id=key,
         cc_dict=cc_dict,
         smoothed_cc_dict=smoothed_cc_dict,
         crossover_dict=crossover_dict,
         minimum_length=minimum_length,
         minimum_improvement=minimum_improvement,
         max_regions_to_test=max_regions_to_test,
         max_ends_per_region=max_ends_per_region,
         maximum_fraction=maximum_fraction)
      if verbose:
        working_model.show_summary(out=out)
      sorted_working_model_list.append(
        [working_model.get_score(),working_model])
    sorted_working_model_list.sort()
    sorted_working_model_list.reverse()
    sorted_working_model_list=\
       sorted_working_model_list[:max_keep]
    working_model_list=[]
    for s,m in sorted_working_model_list:
      working_model_list.append(m)

    # Go through all the working models and cross them with other models to
    #  optimize...Then take all the best and cross...

    best_score,best_model=sorted_working_model_list[0]
    found=True
    cycle=0
    while found:
      cycle+=1
      print("\nCYCLE %d current best is %7.3f\n" %(
        cycle,best_model.get_score()), file=out)
      found=False
      sorted_working_model_list=[]
      new_best=best_model
      id=0
      for working_model in working_model_list:
        id+=1
        others=[]
        for m in working_model_list:
          if not working_model==m:  others.append(m)
        new_working_model=working_model.optimize_with_others(others=others)
        if not new_working_model:
          print()
          continue
        aa=[new_working_model.get_score(),new_working_model]
        if not aa in sorted_working_model_list:
          sorted_working_model_list.append(aa)
      if not sorted_working_model_list:
         break # nothing to do

      sorted_working_model_list.sort()
      sorted_working_model_list.reverse()
      sorted_working_model_list=sorted_working_model_list[:max_keep]

      new_working_score,new_working_model=sorted_working_model_list[0]
      if new_working_score>best_model.get_score():
        best_model=new_working_model
        found=True
        if verbose:
          print("NEW BEST SCORE: %7.2f" %(best_model.get_score()), file=out)
          best_model.show_summary(out=out)

    print("\nDONE... best is %7.3f\n" %(
        best_model.get_score()), file=out)

    # Create composite of this chain

    # Note residue values. We are going to pick each residue from one of
    # the models

    for model in ph.models():
      for chain in model.chains():
        if chain.id != chain_id: continue
        residue_list=[]
        for rg in chain.residue_groups():
          residue_list.append(rg.resseq)
    residue_list.sort()
    assert len(best_model.source_list)==len(residue_list)

    for i in range(len(residue_list)):
      atom_selection=get_atom_selection(model_id=best_model.source_list[i],
        resseq_sel=residue_list[i])
      asc=ph.atom_selection_cache()
      sel=asc.selection(string = atom_selection)
      sel_hierarchy=ph.select(sel)
      print(remove_ter(sel_hierarchy.as_pdb_string()), file=composite_model_stream)

  #  All done, make a new pdb_hierarchy
  pdb_string=composite_model_stream.getvalue()
  pdb_inp=iotbx.pdb.input(source_info=None, lines = pdb_string)
  pdb_hierarchy=pdb_inp.construct_hierarchy()

  if pdb_out:
    f=open(pdb_out,'w')
    print(pdb_hierarchy.as_pdb_string(crystal_symmetry=crystal_symmetry), file=f)
    print("Final model is in: %s\n" %(f.name))
    f.close()

  return pdb_hierarchy
Example #2
0
def select_segments_that_match(params=None,
   chain_hierarchy=None,
   target_hierarchy=None,
   out=sys.stdout,
   ncs_obj=None,
   target_file=None, # model
   chain_file=None, # query
   crystal_symmetry=None,
   max_dist=None,
   quiet=None,
   verbose=None,
   use_crystal_symmetry=None,
   chain_type=None,
   target_length_from_matching_chains=None,
   distance_per_site=None,
   min_similarity=None):


  # Identify all the segments in chain_hierarchy that match target_hierarchy
  #  and write them out
  from mmtbx.secondary_structure.find_ss_from_ca import split_model,model_info,\
    merge_hierarchies_from_models
  chain_model=model_info(hierarchy=chain_hierarchy)
  if params.crystal_info.chain_type=="PROTEIN":
    distance_cutoff=5.
  else:
    distance_cutoff=15.
  chain_models=split_model(model=chain_model,distance_cutoff=distance_cutoff)
  print >>out,"Analyzing %s segments and identifying " %(len(chain_models)) +\
      " those with "+\
     "chain_type=%s and match percentage between %.1f %% and %.1f %% " %(
    params.crystal_info.chain_type,
    params.comparison.minimum_percent_match_to_select,
    params.comparison.maximum_percent_match_to_select)
  local_params=deepcopy(params)
  local_params.output_files.match_pdb_file=None # required
  models_to_keep=[]
  write_header=True
  for cm in chain_models:  # one segment
    rv_list=[]
    file_list=[]
    rv=run(
      params=local_params,
      ncs_obj=ncs_obj,
      target_hierarchy=target_hierarchy,
      quiet=True,
      chain_hierarchy=cm.hierarchy,out=null_out(),
        crystal_symmetry=crystal_symmetry,
        max_dist=max_dist,
        verbose=verbose,
        use_crystal_symmetry=use_crystal_symmetry,
        chain_type=chain_type,
        target_length_from_matching_chains=target_length_from_matching_chains,
        distance_per_site=distance_per_site,
        min_similarity=min_similarity,
      )

    rv_list.append(rv)
    file_list.append(params.crystal_info.chain_type)
    close_rmsd,close_n=rv.get_values('close')
    far_away_rmsd,far_away_n=rv.get_values('far_away')
    if close_n+far_away_n<1: continue # wrong chain type or other failure

    percent_matched=100.*close_n/max(1,close_n+far_away_n)
    if percent_matched < params.comparison.minimum_percent_match_to_select:
      continue
    if percent_matched > params.comparison.maximum_percent_match_to_select:
      continue

    write_summary(params=params,file_list=file_list,rv_list=rv_list,
      write_header=write_header,out=out)
    write_header=False
    models_to_keep.append(cm)

  new_model=merge_hierarchies_from_models(models=models_to_keep,resid_offset=5)
  ff=open(params.output_files.match_pdb_file,'w')
  print >>ff,new_model.hierarchy.as_pdb_string()
  ff.close()
  print >>out,"Wrote %s %s chains with %s residues to %s" %(
    len(models_to_keep),params.crystal_info.chain_type,
    new_model.hierarchy.overall_counts().n_residues,
    params.output_files.match_pdb_file)
  return new_model
def process_predicted_model(model, params, pae_matrix=None, log=sys.stdout):
    """
  process_predicted_model:
  Purpose:  Convert values in B-value field to pseudo-B-values, remove
    low_confidence residues, optionally split into compact regions.
  Rationale: predicted models may have regions of low and high confidence.
    This routine uses values in the B-value field to identify confidence,
    removes low-confidence regions, and then examines the remaining model to
    find regions that are compact (residues have high contact with neighbors)
    and that are separate from other regions (low contact with neigbors).

  Inputs (supplied as model and a params object):
    model:  iotbx.model.model object containing model information.
           Normally contains a single chain.   If multiple chains, process
           each separately.

    b_value_field_is:  'lddt' or 'rmsd' or 'b_value'.  For AlphaFold models
                        the b-value field is a value of LDDT (confidence)
                        on scale of 0-1 or 0-100
                        For RoseTTAFold, the B-value field is rmsd (A)
                        If b_value... it is left as is.

    input_lddt_is_fractional:  if True, input lddt is scale of 0 to 1,
        otherwise 0 - 100
       If None, set to True if all lddt are from 0 to 1
    remove_low_confidence_residues: remove residues with low confidence
        (lddt or rmsd as set below)
    minimum_lddt: minimum lddt to keep residues (on same scale as b_value_field,
      if not set, calculated from maximum_rmsd).
    maximum_rmsd: alternative specification of minimum confidence based on rmsd.
        If not set, calculated from minimum_lddt.
    default_maximum_rmsd:  used as default if nothing specified for
         maximum_rmsd or minimum_lddt .Default is 1.5 A,
    split_model_by_compact_regions: split resulting model into compact regions
    pae_matrix:  matrix of predicted aligned errors (e.g., from AlphaFold2), NxN
      matrix of RMSD values, N = number of residues in model.
      Alternative to splitting by compact regions. Split to minimize predicted
          aligned errors in each grouping.
        pae_power (default=1): each edge in the graph will be weighted
           proportional to (1/pae**pae_power)
        pae_cutoff (optional, default=5): graph edges will only be created for
         residue pairs with pae<pae_cutoff

    domain_size: typical size of domains (resolution used for filtering is
       the domain size)
    minimum_domain_length: minimum length (residues) of a domain to keep
    maximum_domains: if more than this many domains, merge close ones to reduce
       number
    chain_id: if model contains more than one chain, split this chain only.
              NOTE: only one chain can be processed at a time.
    if subtract_minimum_b is set, subtract minimum(B values) from all B values
       after applying any B value cutoffs

  Output:
    processed_model_info: group_args object containing:
      processed_model:  single model with regions identified in chainid field

  How to get the parameters object set up:

    You can set up a parameters object like this (see example at end of this
    file as well:

    master_phil = iotbx.phil.parse(master_phil_str)
    params = master_phil.extract()

    The default values are set in the master_phil_str string above.
    You can then set values of params:

    params.process_predicted_model.split_model_by_compact_regions = True


  """

    # Make sure we have what we expect:
    import mmtbx.model
    assert isinstance(model, mmtbx.model.manager)

    # Decide what to do
    p = params.process_predicted_model

    # Determine if input lddt is fractional and get b values

    b_value_field = model.get_hierarchy().atoms().extract_b()
    if p.b_value_field_is == 'lddt':
        if p.input_lddt_is_fractional is None:
            sel = (b_value_field < 0) | (b_value_field > 1)
            p.input_lddt_is_fractional = (sel.count(True) == 0)

        b_values = get_b_values_from_lddt(
            b_value_field, input_lddt_is_fractional=p.input_lddt_is_fractional)

        if p.input_lddt_is_fractional:
            print("B-value field interpreted as LDDT %s" % ("(0 - 1)"),
                  file=log)
        else:
            print("B-value field interpreted as LDDT %s" % ("(0 - 100)"),
                  file=log)

    elif p.b_value_field_is == 'rmsd':
        b_values = get_b_values_rmsd(b_value_field)
        print("B-value field interpreted as rmsd %s" % ("(0 - 1)"), file=log)

    elif p.b_value_field_is == 'b_value':
        b_values = b_value_field
        print("B-value field interpreted as b_values", file=log)
    else:
        raise AssertionError(
            "Please set b_value_field_is to either lddt or rmsd")

    if (not p.input_lddt_is_fractional):
        if p.minimum_lddt is not None:  # convert to fractional
            p.minimum_lddt = p.minimum_lddt * 0.01
            print("Minimum LDDT converted to %.2f" % (p.minimum_lddt),
                  file=log)

    # From here on we work only with fractional lddt

    # Get confidence cutoff if needed
    if p.remove_low_confidence_residues:
        maximum_b_value = get_cutoff_b_value(
            p.maximum_rmsd,
            p.minimum_lddt,
            default_maximum_rmsd=p.default_maximum_rmsd,
            log=log)
    else:
        maximum_b_value = None

    # Offset b-values and cutoff if requested
    if p.subtract_minimum_b:
        minimum_b = b_values.min_max_mean().min
        b_values -= minimum_b
        assert b_values.min_max_mean().min == 0
        if maximum_b_value is not None:
            maximum_b_value -= minimum_b  # offset this too
        print("Subtracting minimum B of " +
              "%.2f from values and from cutoff (now %s)" %
              (minimum_b, " %.2f" %
               maximum_b_value if maximum_b_value is not None else "None"),
              file=log)

    # Make a new model with new B-values

    ph = model.get_hierarchy().deep_copy()
    ph.atoms().set_b(b_values)

    # Remove low_confidence regions if desired
    if p.remove_low_confidence_residues:
        n_before = ph.overall_counts().n_residues
        selection_string = " (bfactor < %s)" % maximum_b_value
        asc1 = ph.atom_selection_cache()
        sel = asc1.selection(selection_string)
        new_ph = ph.select(sel)
        n_after = new_ph.overall_counts().n_residues
        print("Total of %s of %s residues kept after B-factor filtering" %
              (n_after, n_before),
              file=log)
        if n_after == 0:
            raise Sorry(
                "No residues remaining after filtering...please check if " +
                "B-value field is really '%s'" % (p.b_value_field_is))
        removed_ph = ph.select(~sel)
        from mmtbx.secondary_structure.find_ss_from_ca import model_info, \
           split_model
        from iotbx.bioinformatics import get_sequence_from_hierarchy
        remainder_sequence_str = ""
        for m in split_model(model_info(removed_ph)):
            seq = get_sequence_from_hierarchy(m.hierarchy)
            if len(seq) >= p.minimum_remainder_sequence_length:
                remainder_sequence_str += "\n> fragment sequence "
                remainder_sequence_str += "\n%s\n" % (
                    get_sequence_from_hierarchy(m.hierarchy))
    else:
        remainder_sequence_str = None

    # Get a new model
    new_model = model.as_map_model_manager().model_from_hierarchy(
        ph, return_as_model=True)

    # Get high-confidence regions as domains if desired:
    if p.split_model_by_compact_regions:
        # Make sure we have just 1 chain or a chain ID supplied
        chain_id = get_chain_id(model, None, log=log)

        if pae_matrix is not None:  # use pae matrix method
            info = split_model_with_pae(
                model,
                new_model,
                pae_matrix,
                maximum_domains=p.maximum_domains,
                pae_power=p.pae_power,
                pae_cutoff=p.pae_cutoff,
                pae_graph_resolution=p.pae_graph_resolution,
                minimum_domain_length=p.minimum_domain_length,
                log=log)
        else:  # usual
            info = split_model_into_compact_units(
                new_model,
                d_min=p.domain_size,
                maximum_domains=p.maximum_domains,
                minimum_domain_length=p.minimum_domain_length,
                log=log)
        if info is None:
            print("No compact regions identified", file=log)
            chainid_list = []
            model_list = []
        else:
            new_model = info.model
            chainid_list = info.chainid_list
            print("Total of %s regions identified" % (len(chainid_list)),
                  file=log)
            model_list = split_model_by_chainid(new_model, chainid_list)
    else:
        model_list = []
        chainid_list = []

    return group_args(
        group_args_type='processed predicted model',
        model=new_model,
        model_list=model_list,
        chainid_list=chainid_list,
        remainder_sequence_str=remainder_sequence_str,
    )
Example #4
0
def run(
    params=None, # params for running from command line
    map_data=None,  # map_data, as_double()
    pdb_inp=None,
    pdb_hierarchy=None,
    crystal_symmetry=None,
    resolution=None,
    scattering_table='n_gaussian',
    smoothing_window=5,
    crossover_atom='CA',
    minimum_matching_atoms=3,
    minimum_length=2,
    dist_max=1.0,
    minimum_improvement=0.01,
    max_regions_to_test=10,
    max_ends_per_region=5,
    maximum_fraction=0.5,
    max_keep=10,
    map_coeffs_file=None,map_coeffs_labels=None,
    pdb_in_file=None,
    pdb_out=None,
    verbose=None,
    out=sys.stdout):

  if out is None: out=sys.stdout # explode and refine calls it this way

  # get info from params if present
  if params:
     verbose=params.control.verbose
     map_coeffs_file=params.input_files.map_coeffs_file
     map_coeffs_labels=params.input_files.map_coeffs_labels
     pdb_in_file=params.input_files.pdb_in_file
     resolution=params.crystal_info.resolution
     scattering_table=params.crystal_info.scattering_table
     smoothing_window=params.crossover.smoothing_window
     crossover_atom=params.crossover.crossover_atom
     minimum_matching_atoms=params.crossover.minimum_matching_atoms
     minimum_length=params.crossover.minimum_length
     dist_max=params.crossover.dist_max
     minimum_improvement=params.crossover.minimum_improvement
     max_regions_to_test=params.crossover.max_regions_to_test
     max_ends_per_region=params.crossover.max_ends_per_region
     maximum_fraction=params.crossover.maximum_fraction
     max_keep=params.crossover.max_keep
     pdb_out=params.output_files.pdb_out

  # Consistency checks
  if(pdb_hierarchy is not None):
    assert pdb_in_file is None
    assert pdb_inp is None
    assert crystal_symmetry is not None
    # XXX more checks here!

  # Get map_data if not present
  if not map_data:
    if not map_coeffs_file or not os.path.isfile(map_coeffs_file):
      raise Sorry("Cannot find the map_coeffs_file '%s'" %(
        str(map_coeffs_file)))
    from mmtbx.building.minimize_chain import get_map_coeffs
    map_coeffs=get_map_coeffs(map_coeffs_file,
        map_coeffs_labels=map_coeffs_labels)

    fft_map = map_coeffs.fft_map(resolution_factor = 0.25)
    fft_map.apply_sigma_scaling()
    map_data = fft_map.real_map_unpadded()
    map_data=map_data.as_double()
    if map_coeffs and not crystal_symmetry:
      crystal_symmetry=map_coeffs.crystal_symmetry()
    if map_coeffs and not resolution:
      resolution=map_coeffs.d_min()

  # Get the starting model
  if(pdb_hierarchy is None):
    if pdb_inp is None:
      if not pdb_in_file or not os.path.isfile(pdb_in_file):
        raise Sorry("Cannot read input PDB file '%s'" %(
          str(pdb_in_file)))
      else:
        print >>out,"Taking models from %s" %(pdb_in_file)
        pdb_string=open(pdb_in_file).read()
      pdb_inp=iotbx.pdb.input(source_info=None, lines = pdb_string)
      if pdb_inp is None:
        raise Sorry("Need a model or models")
    if not crystal_symmetry:
      crystal_symmetry=pdb_inp.crystal_symmetry()
    assert crystal_symmetry is not None
    hierarchy = pdb_inp.construct_hierarchy()
  else:
    hierarchy = pdb_hierarchy # XXX FIXME
  n_models=0
  for model in hierarchy.models():
    n_models+=1

  if n_models==1:  # nothing to do
    return hierarchy

  #xrs = pdb_inp.xray_structure_simple(crystal_symmetry=crystal_symmetry)
  xrs = hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry)
  xrs.scattering_type_registry(table=scattering_table)

  if not resolution:
    from cctbx import maptbx
    resolution=maptbx.resolution_from_map_and_model(
      map_data=map_data, xray_structure=xrs)

  print >>out,"\nResolution limit: %7.2f" %(resolution)
  print >>out,"\nSummary of input models"
  xrs.show_summary(f=out, prefix="  ")

  print >>out, "\nReady with %d models and map" %(n_models)
  # Get CC by residue for each model and map

  chain_id_and_resseq_list=[] # Instead set up chain_id and resseq (range)
  from mmtbx.secondary_structure.find_ss_from_ca import \
      split_model,get_first_resno, get_last_resno,get_chain_id
  model_list=split_model(hierarchy=hierarchy,only_first_model=True)
  for m in model_list:
    h=m.hierarchy
    first_resno=get_first_resno(h)
    last_resno=get_last_resno(h)
    chain_id=get_chain_id(h)
    residue_range=[first_resno,last_resno]
    chain_id_and_resseq=[chain_id,residue_range]
    if not chain_id_and_resseq in chain_id_and_resseq_list:
       chain_id_and_resseq_list.append(chain_id_and_resseq)

  # Run through chains separately
  # NOTE: All models of each chain must match exactly

  # Save composite model, chain by chain
  from cStringIO import StringIO
  composite_model_stream=StringIO()

  for chain_id_and_resseq in chain_id_and_resseq_list:
    from cStringIO import StringIO
    f=StringIO()
    chain_id,[start_resno,end_resno]=chain_id_and_resseq
    atom_selection=get_atom_selection(chain_id=chain_id,
      start_resno=start_resno,end_resno=end_resno)
    asc=hierarchy.atom_selection_cache()
    sel=asc.selection(string = atom_selection)
    sel_hierarchy=hierarchy.select(sel)
    pdb_inp=sel_hierarchy.as_pdb_input(crystal_symmetry=crystal_symmetry)
    ph=pdb_inp.construct_hierarchy()

    print >>out,"\nWorking on chain_id='%s' resseq %d:%d\n" %(
       chain_id_and_resseq[0],chain_id_and_resseq[1][0],chain_id_and_resseq[1][1])

    # get CC values for all residues
    cc_dict=get_cc_dict(hierarchy=ph,map_data=map_data,d_min=resolution,
     crystal_symmetry=crystal_symmetry,
     table=scattering_table,out=out)

    # smooth CC values with window of smoothing_window
    smoothed_cc_dict=smooth_cc_values(cc_dict=cc_dict,
       smoothing_window=smoothing_window,
       verbose=verbose,out=out)

    # figure out all the places where crossover can occur.

    n_residues=cc_dict[cc_dict.keys()[0]].size()

    crossover_dict=get_crossover_dict(
      n_residues=n_residues,
      hierarchy=ph,
      crossover_atom=crossover_atom,
      dist_max=dist_max,
      minimum_matching_atoms=minimum_matching_atoms,
      verbose=verbose,out=out)

    # Now we are ready to identify the best composite model...
    # A composite has reside 0 from model x, residue 1 from model y etc.
    # Each change from model a to model b between residues i and i+1 must have
    #  a crossover between a and b at either residue i or i+1

    keys=cc_dict.keys()
    keys.sort()

    sorted_working_model_list=[]
    for key in keys:
      working_model=model_object(source_id=key,
         cc_dict=cc_dict,
         smoothed_cc_dict=smoothed_cc_dict,
         crossover_dict=crossover_dict,
         minimum_length=minimum_length,
         minimum_improvement=minimum_improvement,
         max_regions_to_test=max_regions_to_test,
         max_ends_per_region=max_ends_per_region,
         maximum_fraction=maximum_fraction)
      if verbose:
        working_model.show_summary(out=out)
      sorted_working_model_list.append(
        [working_model.get_score(),working_model])
    sorted_working_model_list.sort()
    sorted_working_model_list.reverse()
    sorted_working_model_list=\
       sorted_working_model_list[:max_keep]
    working_model_list=[]
    for s,m in sorted_working_model_list:
      working_model_list.append(m)

    # Go through all the working models and cross them with other models to
    #  optimize...Then take all the best and cross...

    best_score,best_model=sorted_working_model_list[0]
    found=True
    cycle=0
    while found:
      cycle+=1
      print >>out, "\nCYCLE %d current best is %7.3f\n" %(
        cycle,best_model.get_score())
      found=False
      sorted_working_model_list=[]
      new_best=best_model
      id=0
      for working_model in working_model_list:
        id+=1
        others=[]
        for m in working_model_list:
          if not working_model==m:  others.append(m)
        new_working_model=working_model.optimize_with_others(others=others)
        if not new_working_model:
          print
          continue
        aa=[new_working_model.get_score(),new_working_model]
        if not aa in sorted_working_model_list:
          sorted_working_model_list.append(aa)
      if not sorted_working_model_list:
         break # nothing to do

      sorted_working_model_list.sort()
      sorted_working_model_list.reverse()
      sorted_working_model_list=sorted_working_model_list[:max_keep]

      new_working_score,new_working_model=sorted_working_model_list[0]
      if new_working_score>best_model.get_score():
        best_model=new_working_model
        found=True
        if verbose:
          print >>out,"NEW BEST SCORE: %7.2f" %(best_model.get_score())
          best_model.show_summary(out=out)

    print >>out, "\nDONE... best is %7.3f\n" %(
        best_model.get_score())

    # Create composite of this chain

    # Note residue values. We are going to pick each residue from one of
    # the models

    for model in ph.models():
      for chain in model.chains():
        if chain.id != chain_id: continue
        residue_list=[]
        for rg in chain.residue_groups():
          residue_list.append(rg.resseq)
    residue_list.sort()
    assert len(best_model.source_list)==len(residue_list)

    for i in xrange(len(residue_list)):
      atom_selection=get_atom_selection(model_id=best_model.source_list[i],
        resseq_sel=residue_list[i])
      asc=ph.atom_selection_cache()
      sel=asc.selection(string = atom_selection)
      sel_hierarchy=ph.select(sel)
      print >>composite_model_stream,remove_ter(sel_hierarchy.as_pdb_string())

  #  All done, make a new pdb_hierarchy
  pdb_string=composite_model_stream.getvalue()
  pdb_inp=iotbx.pdb.input(source_info=None, lines = pdb_string)
  pdb_hierarchy=pdb_inp.construct_hierarchy()

  if pdb_out:
    f=open(pdb_out,'w')
    print >>f,pdb_hierarchy.as_pdb_string(crystal_symmetry=crystal_symmetry)
    print "Final model is in: %s\n" %(f.name)
    f.close()

  return pdb_hierarchy