Esempio n. 1
0
def score_with_edstats_to_list(mtz_file, pdb_file, f_label=None):
    """Scores residues against density, then returns list"""

    assert os.path.exists(
        mtz_file), 'MTZ file for edstats does not exist! {!s}'.format(mtz_file)
    assert os.path.exists(
        pdb_file), 'PDB file for edstats does not exist! {!s}'.format(mtz_file)

    # Create a file handle and path for the output
    temp_handle, temp_path = tempfile.mkstemp(suffix='.table',
                                              prefix='edstats_')

    # Collate summary of MTZ file
    m_summ = MtzSummary(mtz_file)

    # Use column labels if given
    if (f_label is not None) and (f_label
                                  not in m_summ.summary['colheadings']):
        raise Sorry(
            'Selected f_label ({}) not found in mtz file ({}) -- mtz contains columns {}'
            .format(f_label, mtz_file, m_summ.summary['colheadings']))
    # else guess the labels in the mtzfile
    else:
        f_label = m_summ.label.f

    # Check for f_label
    if not f_label:
        raise Sorry(
            'No F label selected/found in mtz file: {!s} -- mtz contains columns {}'
            .format(mtz_file, m_summ.summary['colheadings']))

    # Run EDSTATS on the files
    try:
        # Initialise Command Manager to run edstats
        command = CommandManager('edstats.pl')
        command.add_command_line_arguments([
            '-hklin', mtz_file, '-xyzin', pdb_file, '-output', temp_path,
            '-noerror', '-flabel', f_label
        ])
        command.set_timeout(timeout=600)
        command.run()
        # Read the output
        with os.fdopen(temp_handle) as f:
            output = f.read().strip().replace('\r\n',
                                              '\n').replace('\r',
                                                            '\n').splitlines()
        command.file_output = output
    finally:
        os.remove(temp_path)

    # Process the output header
    if output:
        # Check header and then remove the first three columns
        header = output.pop(0).split()
        assert header[:3] == [
            'RT', 'CI', 'RN'
        ], 'edstats output headers are not as expected! {!s}'.format(output)
        num_fields = len(header)
        header = header[3:]
    else:
        header = []

    # List to be returned
    outputdata = []

    # Process the rest of the data
    for line in output:
        line = line.strip()
        if not line:
            continue

        fields = line.split()
        if len(fields) != num_fields:
            raise ValueError(
                "Error Parsing EDSTATS output: Header & Data rows have different numbers of fields"
            )

        # Get and process the residue information - TODO CI column can include alternate conformer?! TODO
        residue, chain, resnum = fields[:3]
        try:
            resnum = int(resnum)
            inscode = ' '
        except ValueError:
            inscode = resnum[-1:]
            resnum = int(resnum[:-1])

        # Remove the processed columns
        fields = fields[3:]

        # Process the other columns (changing n/a to None and value to int)
        for i, x in enumerate(fields):
            if x == 'n/a':
                fields[i] = None
            else:
                try:
                    fields[i] = int(x)
                except ValueError:
                    try:
                        fields[i] = float(x)
                    except ValueError:
                        pass

        outputdata.append([(residue, chain, resnum, inscode), fields])

    return outputdata, header, command
Esempio n. 2
0
def score_with_edstats_to_list(mtz_file, pdb_file):
    """Scores residues against density, then returns list"""

    assert os.path.exists(
        mtz_file), 'MTZ FILE FOR EDSTATS DOES NOT EXIST! {!s}'.format(mtz_file)
    assert os.path.exists(
        pdb_file), 'PDB FILE FOR EDSTATS DOES NOT EXIST! {!s}'.format(mtz_file)

    # Create a file handle and path for the output
    temp_handle, temp_path = tempfile.mkstemp(suffix='.table',
                                              prefix='edstats_')

    # Find the labels in the mtzfile
    file_obj = MtzSummary(mtz_file)
    f_label = file_obj.label.f
    if not f_label:
        raise ReflectionException(
            'MTZ Summary ERROR: No F Label Found in MTZ File: {!s}'.format(
                mtz_file))

    # Run EDSTATS on the files
    try:
        # Initialise Command Manager to run edstats
        command = CommandManager('edstats.pl')
        command.add_command_line_arguments([
            '-hklin', mtz_file, '-xyzin', pdb_file, '-output', temp_path,
            '-noerror', '-flabel', f_label
        ])
        command.set_timeout(timeout=600)
        command.run()
        # Read the output
        with os.fdopen(temp_handle) as f:
            output = f.read().strip().replace('\r\n',
                                              '\n').replace('\r',
                                                            '\n').splitlines()
        command.file_output = output
    finally:
        os.remove(temp_path)

    # Process the output header
    if output:
        header = output.pop(0).split()
        assert header[:3] == [
            'RT', 'CI', 'RN'
        ], 'EDSTATS OUTPUT HEADERS ARE NOT AS EXPECTED! {!s}'.format(output)
        num_fields = len(header)
        header = header[3:]
    else:
        header = []

    # List to be returned
    outputdata = []

    # Process the rest of the data
    for line in output:
        line = line.strip()
        if not line:
            continue

        fields = line.split()
        if len(fields) != num_fields:
            raise ValueError(
                "Error Parsing EDSTATS output: Header & Data rows have different numbers of fields"
            )

        # Get and process the residue information
        residue, chain, resnum = fields[:3]
        try:
            resnum = int(resnum)
            inscode = ' '
        except ValueError:
            inscode = resnum[-1:]
            resnum = int(resnum[:-1])

        # Remove the processed columns
        fields = fields[3:]

        # Process the other columns (changing n/a to None and value to int)
        for i, x in enumerate(fields):
            if x == 'n/a':
                fields[i] = None
            else:
                try:
                    fields[i] = int(x)
                except ValueError:
                    fields[i] = float(x)

        outputdata.append([(residue, chain, resnum, inscode), fields])

    return outputdata, header, command