Beispiel #1
0
def run_refinement_pipeline(in_mtz, ref_pdb, out_dir, program='dimple'):
    """Run refinement of the input MTZ file against a reference PDB file"""

    print '*************************'
    print '*** Running Pipelines ***'
    print '*************************'

    if program == 'dimple':
        # Define output files
        out_pdb = os.path.join(out_dir, 'final.pdb')
        out_mtz = os.path.join(out_dir, 'final.mtz')
        # Create command manager for dimple
        cmd = CommandManager('dimple')
        cmd.add_command_line_arguments(
            ['--jelly', '5', in_mtz, ref_pdb, out_dir])
    else:
        raise Exception("no stop that. you're doing it wrong.")

    print_run_and_raise_error_maybe(cmd)
    if not os.path.exists(out_pdb):
        raise Failure(
            'running refinement with {} has failed -- {} does not exist'.
            format(program, out_pdb))
    if not os.path.exists(out_mtz):
        raise Failure(
            'running refinement with {} has failed -- {} does not exist'.
            format(program, out_mtz))

    return out_pdb, out_mtz
Beispiel #2
0
def transfer_rfree_flags(in_mtz,
                         out_mtz,
                         reference_mtz,
                         input_free_r_flag,
                         output_free_r_flag,
                         delete_tmp_files=True):
    """Copy R-free flags from reference mtz"""

    print '*********************************'
    print '*** Transferring R-free flags ***'
    print '*********************************'

    tmp_mtz = splice_ext(path=out_mtz, new='step1-transfer')

    # Stage 1 - transfer R-free from reference
    cmd = CommandManager('cad')
    cmd.add_command_line_arguments(
        ['hklin1', in_mtz, 'hklin2', reference_mtz, 'hklout', tmp_mtz])
    cmd.add_standard_input([
        'labin file_number 1 ALL',
        'labin file_number 2 E1={}'.format(input_free_r_flag),
        'labout file_number 2 E1={}'.format(output_free_r_flag), 'END'
    ])
    print_run_and_raise_error_maybe(cmd)
    if not os.path.exists(tmp_mtz):
        raise Failure(
            'transfer of R-free flags has failed -- {} does not exist'.format(
                tmp_mtz))

    print '*******************************'
    print '*** Completing R-free flags ***'
    print '*******************************'

    # Stage 2 - populate missing R-free values
    cmd = CommandManager('freerflag')
    cmd.add_command_line_arguments(['hklin', tmp_mtz, 'hklout', out_mtz])
    cmd.add_standard_input(
        ['COMPLETE FREE={}'.format(output_free_r_flag), 'END'])
    print_run_and_raise_error_maybe(cmd)
    if not os.path.exists(out_mtz):
        raise Failure(
            'expanding of R-free flags has failed -- {} does not exist'.format(
                out_mtz))

    if delete_tmp_files:
        os.remove(tmp_mtz)
Beispiel #3
0
    def select_reference_dataset(self,
                                 datasets,
                                 method='resolution',
                                 max_rfree=0.4,
                                 min_resolution=5):
        """Select dataset to act as the reference - scaling, aligning etc"""

        assert method in [
            'resolution', 'rfree'
        ], 'METHOD FOR SELECTING THE REFERENCE DATASET NOT RECOGNISED: {!s}'.format(
            method)

        # ==============================>
        # Get the potential reference datasets
        # ==============================>
        # filtered_datasets = self.datasets.mask(mask_name='valid - all')
        filtered_datasets = [d for dtag, d in datasets.items()]
        if not filtered_datasets:
            raise Failure(
                "Can't select a reference dataset - NO SUITABLE (NON-REJECTED) DATASETS REMAINING"
            )
        # ==============================>
        # Select by either R-free or Resolution
        # ==============================>
        if method == 'rfree':
            # Get RFrees of datasets (set to dummy value of 999 if resolution is too high so that it is not selected)
            r_frees = [
                d.model.input.get_r_rfree_sigma().r_free if
                (d.data.mtz_object().max_min_resolution()[1] < min_resolution)
                else 999 for d in filtered_datasets
            ]
            if len(r_frees) == 0:
                raise Exception(
                    'NO DATASETS BELOW RESOLUTION CUTOFF {!s}A - CANNOT SELECT REFERENCE DATASET'
                    .format(min_resolution))
            ref_dataset_index = r_frees.index(min(r_frees))

        elif method == 'resolution':
            # Get Resolutions of datasets (set to dummy value of 999 if r-free is too high so that it is not selected)
            resolns = [
                d.data.mtz_object().max_min_resolution()[1] if
                (d.model.input.get_r_rfree_sigma().r_free < max_rfree) else 999
                for d in filtered_datasets
            ]
            if len(resolns) == 0:
                raise Exception(
                    'NO DATASETS BELOW RFREE CUTOFF {!s} - CANNOT SELECT REFERENCE DATASET'
                    .format(max_rfree))
            ref_dataset_index = resolns.index(min(resolns))
        # ==============================>
        # Report and return
        # ==============================>
        reference = filtered_datasets[ref_dataset_index]

        return reference.model.filename, reference.data.filename
Beispiel #4
0
    def __init__(self, mtz_file, pdb_file, f_label=None):
        scores, command = score_with_edstats_to_dict(mtz_file=mtz_file,
                                                     pdb_file=pdb_file,
                                                     f_label=f_label)
        self.scores = pandas.DataFrame.from_dict(scores)
        self._command = command

        if self.scores.empty and self._command.error:
            raise Failure(
                'EDSTATS has failed to run (error message below)\n=========>\n{!s}\n=========>'
                .format(self._command.error))
Beispiel #5
0
def raise_cmd_output_and_error(cmd):
    """Return STDOUT and STDERR from command object"""

    err_msg = ''
    err_msg += '============================>\n'
    err_msg += 'Program returned with an error ({})\n'.format(' '.join(
        cmd.program))
    err_msg += '============================>\n'
    err_msg += str(cmd) + '\n'
    err_msg += '============================>\n'
    err_msg += cmd.output + '\n'
    err_msg += '============================>\n'
    err_msg += cmd.error + '\n'
    err_msg += '============================>\n'
    raise Failure(err_msg)
Beispiel #6
0
def reindex_mtz_to_reference(in_mtz, out_mtz, reference_mtz, tolerance):
    """Reindex the data in one mtz to a reference mtz"""

    print '**************************'
    print '*** Running reindexing ***'
    print '**************************'

    cmd = CommandManager('pointless')
    cmd.add_command_line_arguments(
        ['hklin', in_mtz, 'hklref', reference_mtz, 'hklout', out_mtz])
    cmd.add_standard_input(['tolerance {}'.format(tolerance)])
    print_run_and_raise_error_maybe(cmd)
    if not os.path.exists(out_mtz):
        raise Failure(
            'reindexing has failed -- {} does not exist'.format(out_mtz))
def run(params):

    assert params.input.mtz, 'No MTZs given for comparison'

    assert not os.path.exists(
        params.output.merged_mtz
    ), 'The output file ({}) already exists. Please delete it before re-running.'.format(
        params.output.merged_mtz)
    assert not os.path.exists(
        params.output.phasematch_mtz
    ), 'The output file ({}) already exists. Please delete it before re-running.'.format(
        params.output.phasematch_mtz)

    merge = CommandManager('giant.mtz.merge')
    merge.add_command_line_arguments('label_suffix=incremental')
    merge.add_command_line_arguments(params.input.mtz)
    merge.add_command_line_arguments('output.mtz=' + params.output.merged_mtz)
    merge.add_command_line_arguments(params.settings.f_obs.split(','))
    merge.add_command_line_arguments(params.settings.f_calc.split(','))
    merge.run()
    merge.write_output(params.output.merging_log)

    if not os.path.exists(params.output.merged_mtz):
        raise Failure('giant.mtz.merge has failed to merge the mtz files')

    fo1, fo2 = params.settings.f_obs.split(',')
    fc1, fc2 = params.settings.f_calc.split(',')

    for i_2 in range(1, len(params.input.mtz) + 1):
        for i_1 in range(1, len(params.input.mtz) + 1):
            if i_1 == i_2:
                break
            match = CommandManager('cphasematch')
            match.add_command_line_arguments(
                ['-mtzin', params.output.merged_mtz])
            match.add_command_line_arguments(
                ['-mtzout', params.output.phasematch_mtz])
            match.add_command_line_arguments(
                ['-colin-fo', '{}-{},{}-{}'.format(fo1, i_1, fo1, i_1)])
            match.add_command_line_arguments(
                ['-colin-fc-1', '{}-{},{}-{}'.format(fc1, i_1, fc1, i_1)])
            match.add_command_line_arguments(
                ['-colin-fc-2', '{}-{},{}-{}'.format(fc2, i_2, fc2, i_2)])
            match.run()
            march.write_output(params.output.phase_log_template +
                               '-{}-{}.log'.format(i_1.i_2))
Beispiel #8
0
    def extract_residue_group_scores(self,
                                     residue_group,
                                     data_table=None,
                                     rg_label=None,
                                     column_suffix=''):
        """Extract density quality metrics for a residue group from precalculated edstats scores"""

        rg = residue_group
        # Set defaults
        if rg_label is None:
            rg_label = (rg.unique_resnames()[0] + '-' + rg.parent().id + '-' +
                        rg.resseq + rg.icode).replace(' ', '')
        if data_table is None:
            data_table = pandas.DataFrame(index=[rg_label], column=[])
        # Check validity
        if len(rg.unique_resnames()) != 1:
            raise Failure(
                rg_label +
                ': More than one residue name associated with residue group -- cannot process'
            )

        # Extract residue scores
        ed_scores = self.scores[(rg.unique_resnames()[0], rg.parent().id,
                                 rg.resseq_as_int(), rg.icode)]
        # Append scores to data_table
        data_table.set_value(index=rg_label,
                             col='RSCC' + column_suffix,
                             value=ed_scores['CCSa'])
        data_table.set_value(index=rg_label,
                             col='RSR' + column_suffix,
                             value=ed_scores['Ra'])
        data_table.set_value(index=rg_label,
                             col='B_AV' + column_suffix,
                             value=ed_scores['BAa'])
        data_table.set_value(index=rg_label,
                             col='RSZO' + column_suffix,
                             value=ed_scores['ZOa'])
        data_table.set_value(index=rg_label,
                             col='RSZD' + column_suffix,
                             value=ed_scores['ZDa'])

        return data_table
Beispiel #9
0
    def __call__(self, reference=None):
        """Generate the grid objects for the analysis"""

        # ============================================================================>
        #####
        # Create Sampling Grid (for generated maps)
        #####
        # Create reference grid based on the reference structure
        # ============================================================================>
        if self.grid is None:
            # Which dataset to be used to mask the grid
            if bool(self.mask_pdb):
                mask_dataset = PanddaReferenceDataset.from_file(model_filename=self.mask_pdb).label(
                    tag='masking')
                if self.align_mask_to_reference:
                    try:
                        mask_dataset.model.alignment = None
                        mask_dataset.model.align_to(other_hierarchy=reference.model.hierarchy,
                                                    method=self.alignment_method,
                                                    require_hierarchies_identical=False)
                    except:
                        msg = traceback.format_exc()
                        msg += '\n------------------>>>'
                        msg += '\n\nFailed to align masking pdb ({}) to the reference structure.'.format(
                            self.mask_pdb)
                        msg += '\nIf the masking structure does not need alignment, rerun with params.masks.align_mask_to_reference=False'
                        raise Failure(msg)
                else:
                    mask_dataset.set_origin_shift((0.0, 0.0, 0.0))
            else:
                mask_dataset = reference.copy()

            # Create the grid using the masking dataset (for determining size and extent of grid)
            self.create_reference_grid(dataset=mask_dataset, grid_spacing=self.grid_spacing, reference=reference)
            self.mask_reference_grid(dataset=mask_dataset, selection=self.mask_selection_string)
            # Store the transformation to shift the reference dataset to the "grid frame", where the grid origin is (0,0,0)
            reference.set_origin_shift([-1.0 * a for a in self.grid.cart_origin()])
            # Partition the grid with the reference dataset (which grid points use which coordinate transformations)
            self.partition_reference_grid(dataset=reference)

        return self.grid
Beispiel #10
0
def transfer_residue_groups_from_other(acceptor_hierarchy,
                                       donor_hierarchy,
                                       in_place=False,
                                       verbose=False):
    """Transfer atom_groups from donor_hierarchy to matching residue_groups in acceptor_hierarchy, creating new chains and residue groups only where necessary"""
    if not in_place: acceptor_hierarchy = acceptor_hierarchy.deep_copy()
    # Sort all residues (by chain then id) for the acceptor hierarchy
    accept_model = acceptor_hierarchy.only_model()
    accept_dict = {c.id: {} for c in accept_model.chains()}
    [
        accept_dict.get(rg.parent().id).setdefault(rg.resid(), []).append(rg)
        for rg in accept_model.residue_groups()
    ]
    # Dictionary to link matching chains (allows multiple chain As to be linked uniquely to multiple chain As)
    link_dict = {}
    # Residues that don't have a matching partner in the old hierarchy
    tricky_rgs = []
    # Iterate through donor chains
    for donor_ch in donor_hierarchy.only_model().chains():
        # If chain not in hierarchy, simply copy across
        if accept_dict.get(donor_ch.id, None) is None:
            if verbose:
                print 'Transferring whole chain:    {}'.format(
                    Labeller.format(donor_ch))
            accept_model.append_chain(donor_ch.detached_copy())
            continue
        # Chain present, copy by residue_group
        for donor_rg in donor_ch.residue_groups():
            # Find equivalent residue groups in the other hierarchy
            accept_rg = accept_dict.get(donor_ch.id).get(donor_rg.resid(), [])
            if len(accept_rg) > 1:
                # Should only be one...
                raise Exception(
                    'More than one residue group in hierarchy with the same residue_id and chain_id'
                )
            elif len(accept_rg) == 1:
                accept_rg = accept_rg[0]
                # Record the links between these chains
                link_dict.setdefault(donor_rg.parent(), accept_rg.parent())
                # Transfer atom groups to this residue_group
                if verbose:
                    print 'Transferring atom groups:    {} > {}'.format(
                        Labeller.format(donor_rg), Labeller.format(accept_rg))
                for donor_ag in donor_rg.atom_groups():
                    accept_rg.append_atom_group(donor_ag.detached_copy())
            else:
                # Have the possibility of multiple chains with the same id, so at the moment, store for later
                tricky_rgs.append(donor_rg)
    # Transfer residues that have chain matches, but don't have residue matches in the acceptor structures
    for donor_rg in tricky_rgs:
        # Get chain from link_dict
        accept_ch = link_dict.get(donor_rg.parent().id, None)
        # If the chain isn't linked:
        if accept_ch is None:
            # If there's only one chain with the same ID, choose this one
            possible_chains = [
                c for c in accept_model.chains()
                if c.id == donor_rg.parent().id
            ]
            if len(possible_chains) == 1:
                accept_ch = possible_chains[0]
            else:
                raise Failure(
                    "Don't know how to transfer {} to the output model".format(
                        Labeller.format(donor_rg)))
        # Simply append to chain
        if verbose:
            print 'Transferring residue group:  {} > {}'.format(
                Labeller.format(donor_rg), Labeller.format(accept_ch))
        accept_ch.append_residue_group(donor_rg.detached_copy())

    return acceptor_hierarchy
Beispiel #11
0
    def __call__(self, mcd, reference_dataset=None):
        """Align each structure the reference structure"""

        assert self.method in ['local',
                               'global'], 'METHOD NOT DEFINED: {!s}'.format(
                                   self.method)

        # ==============================>
        # Select the datasets for alignment
        # ==============================>
        datasets_for_alignment = [d for dtag, d in mcd.datasets.items()]
        # ==============================>
        # Delete alignments
        # ==============================>
        for d in datasets_for_alignment:
            d.model.alignment = None

        # ==============================>
        # Generate the alignments for each structure
        # ==============================>
        arg_list = [
            DatasetAligner(model=d.model,
                           other=reference_dataset.model,
                           method=self.method,
                           id=d.tag) for d in datasets_for_alignment
        ]
        dataset_alignments = jl.Parallel(n_jobs=self.cpus, verbose=15)(
            jl.delayed(wrapper_run)(arg) for arg in arg_list)
        # dataset_alignments = easy_mp.pool_map(func=wrapper_run, args=arg_list, processes=self.cpus)

        # ==============================>
        # Catch errors and print at end
        # ==============================>
        errors = []
        for dataset, alignment in zip(datasets_for_alignment,
                                      dataset_alignments):
            # If errored, print and record
            if isinstance(alignment, str):
                errors.append((dataset, alignment))
                self.alignments[dataset.tag] = False
                print(alignment)
                continue
            # Attach alignment to dataset
            assert dataset.tag == alignment.id
            self.alignments[alignment.id] = True
            dataset.model.alignment = alignment
            # Output an aligned copy of the structure
            aligned_struc = dataset.model.hierarchy.deep_copy()
            aligned_struc.atoms().set_xyz(
                dataset.model.alignment.nat2ref(
                    coordinates=dataset.model.hierarchy.atoms().extract_xyz()))
            # TODO: restore this functionality?
            # aligned_struc.write_pdb_file(file_name=os.path.join(self.file_manager.get_dir('aligned_structures'),
            #                                                     '{!s}-aligned.pdb'.format(dataset.tag)))
            # aligned_struc.write_pdb_file(
            #     file_name=splice_ext(dataset.file_manager.get_file('aligned_model'), 'ref', position=-1))
            # Write alignment summary to log

        # ==============================>
        # Report Errors
        # ==============================>
        if errors:
            for dataset, err_msg in errors:
                print('Failed to align dataset {}'.format(dataset.tag))
                print(err_msg)
            raise Failure(
                'Failed to align {} datasets. Error messages printed above.'.
                format(len(errors)))

        # ==============================>
        # Make new dataset
        # ==============================>
        new_datasets = {d.tag: d for d in datasets_for_alignment}
        new_dataset = mcd.new_from_datasets(datasets=new_datasets)

        return new_dataset
Beispiel #12
0
 def check_programs_are_available(self, programs):
     ni = not_installed(programs)
     if ni:
         raise Failure(
             'The following programs are not available/installed:\n\t{}'.
             format('\n\t'.join(ni)))
Beispiel #13
0
def align_structures_flexible(mov_hierarchy, ref_hierarchy, altlocs=['','A'], cutoff_radius=15, sequence_identity_threshold=0.95,
                              one_to_one_mapping=True, require_hierarchies_identical=True, verbose=False):
    """
    Perform a flexible alignment on two hierarchies. Alignments are performed on a chain-by-chain basis.
    Each chain of mov_hierarchy is aligned
    """

    # List of the alignments for each chain
    local_alignments = []
    # Trim to protein only
    mov_hierarchy = backbone(mov_hierarchy, copy=True)
    ref_hierarchy = backbone(ref_hierarchy, copy=True)
    # Check the structures only have one model
    try:
        mov_hierarchy.only_model()
        ref_hierarchy.only_model()
    except:
        raise Exception('Structures for alignment can only have one model!')
    # Check the structures are identical
    if require_hierarchies_identical:
        assert mov_hierarchy.is_similar_hierarchy(ref_hierarchy), 'Structures for alignment must have the same atoms (although atomic parameters can vary)'
    # Extract the chains from the structures
    c_mov = list(mov_hierarchy.chains())
    c_ref = list(ref_hierarchy.chains())
    # Match chains in the two structures (c_mov is first so the array is first indexed by the chains in mov)
    chn_sim = pairwise_chain_sequence_identity(c_mov, c_ref, seq_identity_threshold=None)
    # Create strings for use in case of errors/verbose printing
    s = 'Chain and sequences for aligment:'
    s += '\n{} chains in mov_hierarchy:'.format(len(c_mov))
    for c in c_mov: s += '\n\t{}: {}'.format(c.id, ''.join(c.as_sequence()))
    s += '\n{} chains in ref_hierarchy:'.format(len(c_ref))
    for c in c_ref: s += '\n\t{}: {}'.format(c.id, ''.join(c.as_sequence()))
    s += '\nPairwise chain-by-chain sequence identities:'
    s += '\n     REF'
    s += '\nMOV  {}'.format(' '.join(['{:4}'.format(c.id) for c in c_ref]))
    for i,i_c in enumerate(c_mov):
        s+= '\n{:3}  {}'.format(i_c.id, ' '.join(['{:4}'.format(v) for v in chn_sim[i]]))
    # Report to be returned in case of error
    report = Report(s, verbose=verbose)
    # Make the array boolean at the threshold value
    chn_sim = (chn_sim>sequence_identity_threshold).astype(int)
    # Report
    s = 'Pairwise chain-by-chain sequence identities (thresholded at {}%):'.format(100*sequence_identity_threshold)
    s += '\n     REF'
    s += '\nMOV  {}'.format(' '.join(['{:4}'.format(c.id) for c in c_ref]))
    for i,i_c in enumerate(c_mov):
        s+= '\n{:3}  {}'.format(i_c.id, ' '.join(['{:4}'.format(v) for v in chn_sim[i]]))
    report(s)
    # Iterate through and align the chains
    for i, chn_mov in enumerate(c_mov):
        # Skip if not protein
        if not chn_mov.is_protein(): continue
        # Find the first chain in the reference structure that's "alignable"
        try:
            idx_ref = list(chn_sim[i]).index(1)
            chn_ref = c_ref[idx_ref]
            report('Aligning chain {} of mov_hierarchy to chain {} in ref_hierarchy'.format(chn_mov.id, chn_ref.id))
            if one_to_one_mapping:
                report('Removing chain {} of ref_hierarchy from the pool of alignment chains (one_to_one_mapping is turned on)'.format(chn_ref.id))
                chn_sim[:,idx_ref] = 0
        except ValueError:
            raise Failure('Error raised during alignment.\n'
                          'Unable to align chain {} from mov_hierarchy: there is no suitable chain in ref_hierarchy.\n'\
                          'This might be fixed by setting one_to_one_mapping to False or decreasing sequence_identity_threshold.\n'.format(chn_mov.id)+
                          str(report))
            continue
        # Align the selected chains
        l_ali = align_chains_flexible(chn_mov=chn_mov, chn_ref=chn_ref, altlocs=altlocs, cutoff_radius=cutoff_radius)
        # Add aligned chains as the ID of the LocalAlignment object
        l_ali.id = 'chain {} to chain {}'.format(chn_mov.id, chn_ref.id)
        l_ali.mov_id = chn_mov.id
        l_ali.ref_id = chn_ref.id
        l_ali.seq_ali = align_sequences_default(seq_a=chn_ref.as_sequence(), seq_b=chn_mov.as_sequence())
        # Append to the alignments
        local_alignments.append(l_ali)
    # Print which chains were aligned to which
    report('\n'.join(['Alignment finished:']+['\t(mov) chain {} aligned to (ref) chain {}'.format(*l_ali.id) for l_ali in local_alignments]))
    # Combine all of the local alignments
    return MultipleLocalAlignment(local_alignments=local_alignments)
Beispiel #14
0
def fill_missing_reflections(in_mtz,
                             out_mtz,
                             fill_resolution_low,
                             fill_resolution_high,
                             delete_tmp_files=True):
    """Complete the set of miller indices in an MTZ file"""

    print '***************************'
    print '*** Filling reflections ***'
    print '***************************'

    tmp_mtz_1 = splice_ext(path=out_mtz, new='step1-truncate')
    tmp_mtz_2 = splice_ext(path=out_mtz, new='step2-uniquify')
    tmp_mtz_3 = splice_ext(path=out_mtz, new='step3-remerged')

    # Stage 1 - truncate dataset, fill missing reflections, change column name
    cmd = CommandManager('cad')
    cmd.add_command_line_arguments(['hklin1', in_mtz, 'hklout', tmp_mtz_1])
    cmd.add_standard_input([
        'monitor BRIEF', 'labin file_number 1 ALL',
        'resolution file 1 {} {}'.format(fill_resolution_low,
                                         fill_resolution_high)
    ])
    print_run_and_raise_error_maybe(cmd)
    if not os.path.exists(tmp_mtz_1):
        raise Failure(
            'filling of missing reflections has failed -- {} does not exist'.
            format(tmp_mtz_1))

    print '-------------------'

    # Stage 2 - Uniqueify the file
    cmd = CommandManager('uniqueify')
    cmd.add_command_line_arguments(['-p', '0.05', tmp_mtz_1, tmp_mtz_2])
    print_run_and_raise_error_maybe(cmd)
    if not os.path.exists(tmp_mtz_2):
        raise Failure(
            'filling of missing reflections has failed -- {} does not exist'.
            format(tmp_mtz_2))

    print '-------------------'

    # Stage 3 - remerge the two files
    cmd = CommandManager('cad')
    cmd.add_command_line_arguments(
        ['hklin1', in_mtz, 'hklin2', tmp_mtz_2, 'hklout', tmp_mtz_3])
    cmd.add_standard_input([
        'monitor BRIEF', 'labin file_number 1 ALL',
        'labin file_number 2 E1=FreeR_flag', 'labout file_number 2 E1=dummy'
    ])
    print_run_and_raise_error_maybe(cmd)
    if not os.path.exists(tmp_mtz_3):
        raise Failure(
            'filling of missing reflections has failed -- {} does not exist'.
            format(tmp_mtz_3))

    print '-------------------'

    # Stage 4 - remove the dummy column
    cmd = CommandManager('mtzutils')
    cmd.add_command_line_arguments(['hklin1', tmp_mtz_3, 'hklout', out_mtz])
    cmd.add_standard_input(
        ['HEADER BRIEF', 'EXCLUDE 1 dummy', 'ONEFILE', 'END'])
    print_run_and_raise_error_maybe(cmd)
    if not os.path.exists(tmp_mtz_3):
        raise Failure(
            'filling of missing reflections has failed -- {} does not exist'.
            format(out_mtz))

    if delete_tmp_files:
        os.remove(tmp_mtz_1)
        os.remove(tmp_mtz_2)
        os.remove(tmp_mtz_3)