def score_with_edstats_to_list(mtz_file, pdb_file, f_label=None): """Scores residues against density, then returns list""" assert os.path.exists( mtz_file), 'MTZ file for edstats does not exist! {!s}'.format(mtz_file) assert os.path.exists( pdb_file), 'PDB file for edstats does not exist! {!s}'.format(mtz_file) # Create a file handle and path for the output temp_handle, temp_path = tempfile.mkstemp(suffix='.table', prefix='edstats_') # Collate summary of MTZ file m_summ = MtzSummary(mtz_file) # Use column labels if given if (f_label is not None) and (f_label not in m_summ.summary['colheadings']): raise Sorry( 'Selected f_label ({}) not found in mtz file ({}) -- mtz contains columns {}' .format(f_label, mtz_file, m_summ.summary['colheadings'])) # else guess the labels in the mtzfile else: f_label = m_summ.label.f # Check for f_label if not f_label: raise Sorry( 'No F label selected/found in mtz file: {!s} -- mtz contains columns {}' .format(mtz_file, m_summ.summary['colheadings'])) # Run EDSTATS on the files try: # Initialise Command Manager to run edstats command = CommandManager('edstats.pl') command.add_command_line_arguments([ '-hklin', mtz_file, '-xyzin', pdb_file, '-output', temp_path, '-noerror', '-flabel', f_label ]) command.set_timeout(timeout=600) command.run() # Read the output with os.fdopen(temp_handle) as f: output = f.read().strip().replace('\r\n', '\n').replace('\r', '\n').splitlines() command.file_output = output finally: os.remove(temp_path) # Process the output header if output: # Check header and then remove the first three columns header = output.pop(0).split() assert header[:3] == [ 'RT', 'CI', 'RN' ], 'edstats output headers are not as expected! {!s}'.format(output) num_fields = len(header) header = header[3:] else: header = [] # List to be returned outputdata = [] # Process the rest of the data for line in output: line = line.strip() if not line: continue fields = line.split() if len(fields) != num_fields: raise ValueError( "Error Parsing EDSTATS output: Header & Data rows have different numbers of fields" ) # Get and process the residue information - TODO CI column can include alternate conformer?! TODO residue, chain, resnum = fields[:3] try: resnum = int(resnum) inscode = ' ' except ValueError: inscode = resnum[-1:] resnum = int(resnum[:-1]) # Remove the processed columns fields = fields[3:] # Process the other columns (changing n/a to None and value to int) for i, x in enumerate(fields): if x == 'n/a': fields[i] = None else: try: fields[i] = int(x) except ValueError: try: fields[i] = float(x) except ValueError: pass outputdata.append([(residue, chain, resnum, inscode), fields]) return outputdata, header, command
def score_with_edstats_to_list(mtz_file, pdb_file): """Scores residues against density, then returns list""" assert os.path.exists( mtz_file), 'MTZ FILE FOR EDSTATS DOES NOT EXIST! {!s}'.format(mtz_file) assert os.path.exists( pdb_file), 'PDB FILE FOR EDSTATS DOES NOT EXIST! {!s}'.format(mtz_file) # Create a file handle and path for the output temp_handle, temp_path = tempfile.mkstemp(suffix='.table', prefix='edstats_') # Find the labels in the mtzfile file_obj = MtzSummary(mtz_file) f_label = file_obj.label.f if not f_label: raise ReflectionException( 'MTZ Summary ERROR: No F Label Found in MTZ File: {!s}'.format( mtz_file)) # Run EDSTATS on the files try: # Initialise Command Manager to run edstats command = CommandManager('edstats.pl') command.add_command_line_arguments([ '-hklin', mtz_file, '-xyzin', pdb_file, '-output', temp_path, '-noerror', '-flabel', f_label ]) command.set_timeout(timeout=600) command.run() # Read the output with os.fdopen(temp_handle) as f: output = f.read().strip().replace('\r\n', '\n').replace('\r', '\n').splitlines() command.file_output = output finally: os.remove(temp_path) # Process the output header if output: header = output.pop(0).split() assert header[:3] == [ 'RT', 'CI', 'RN' ], 'EDSTATS OUTPUT HEADERS ARE NOT AS EXPECTED! {!s}'.format(output) num_fields = len(header) header = header[3:] else: header = [] # List to be returned outputdata = [] # Process the rest of the data for line in output: line = line.strip() if not line: continue fields = line.split() if len(fields) != num_fields: raise ValueError( "Error Parsing EDSTATS output: Header & Data rows have different numbers of fields" ) # Get and process the residue information residue, chain, resnum = fields[:3] try: resnum = int(resnum) inscode = ' ' except ValueError: inscode = resnum[-1:] resnum = int(resnum[:-1]) # Remove the processed columns fields = fields[3:] # Process the other columns (changing n/a to None and value to int) for i, x in enumerate(fields): if x == 'n/a': fields[i] = None else: try: fields[i] = int(x) except ValueError: fields[i] = float(x) outputdata.append([(residue, chain, resnum, inscode), fields]) return outputdata, header, command