def read_results(file=None, dir=None, scaling=1.0): """Extract the data from the Dasha results file. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword scaling: The parameter scaling factor. @type scaling: float """ # Extract the data. data = extract_data(file=file, dir=dir) # Remove comments. data = strip(data) # Repackage the data as a list of lists of spin ID, value, error. new_data = [] for i in range(len(data)): spin_id = ':%s@N' % data[i][0] value = float(data[i][1]) * scaling error = float(data[i][2]) * scaling new_data.append([spin_id, value, error]) # Return the data. return new_data
def read(file='results', dir=None): """Function for reading the data out of a file.""" # Test if the current data pipe exists. check_pipe() # Make sure that the data pipe is empty. if not cdp.is_empty(): raise RelaxError("The current data pipe is not empty.") # Get the full file path, for later use. file_path = get_file_path(file_name=file, dir=dir) # Open the file. file = open_read_file(file_name=file_path) # Determine the format of the file. format = determine_format(file) # XML results. if format == 'xml': ds.from_xml(file, dir=dirname(file_path), pipe_to=pipes.cdp_name()) # Columnar results (for backwards compatibility with ancient relax results model-free files). elif format == 'columnar': # Extract the data from the file. file_data = extract_data(file=file) # Strip data. file_data = strip(file_data) # Do nothing if the file does not exist. if not file_data: raise RelaxFileEmptyError # Read the results. read_columnar_results(file_data) # Unknown results file. else: raise RelaxError("The format of the results file " + repr(file_path) + " cannot be determined.") # Update all of the required metadata structures. mol_res_spin.metadata_update() interatomic.metadata_update()
def read(file='results', dir=None): """Function for reading the data out of a file.""" # Test if the current data pipe exists. check_pipe() # Make sure that the data pipe is empty. if not cdp.is_empty(): raise RelaxError("The current data pipe is not empty.") # Get the full file path, for later use. file_path = get_file_path(file_name=file, dir=dir) # Open the file. file = open_read_file(file_name=file_path) # Determine the format of the file. format = determine_format(file) # XML results. if format == 'xml': ds.from_xml(file, dir=dirname(file_path), pipe_to=pipes.cdp_name()) # Columnar results (for backwards compatibility with ancient relax results model-free files). elif format == 'columnar': # Extract the data from the file. file_data = extract_data(file=file) # Strip data. file_data = strip(file_data) # Do nothing if the file does not exist. if not file_data: raise RelaxFileEmptyError # Read the results. read_columnar_results(file_data) # Unknown results file. else: raise RelaxError("The format of the results file " + repr(file_path) + " cannot be determined.")
def intensity_generic(peak_list=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, sep=None, spin_id=None): """Extract the peak intensity information from the generic column formatted peak list. @keyword peak_list: The peak list object to place all data into. @type peak_list: lib.spectrum.objects.Peak_list instance @keyword file_data: The data extracted from the file converted into a list of lists. @type file_data: list of lists of str @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. @type spin_id_col: int or None @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type mol_name_col: int or None @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_name_col: int or None @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_num_col: int or None @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_name_col: int or None @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_num_col: int or None @keyword data_col: The column containing the peak intensities. @type data_col: int or list of int @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. @type spin_id: None or str @raises RelaxError: When the expected peak intensity is not a float. """ # Strip the data. file_data = strip(file_data) # Check the intensity column argument. data_present = True if data_col == None: warn(RelaxWarning("The data column argument has not been supplied, and function will only return spin data.")) data_present = False # Convert the the data_col argument to a list if needed. if not isinstance(data_col, list): data_col = [data_col] # Loop over the file data. for line in file_data: # Loop over the intensity columns, storing the data. intensity = [] for i in range(len(data_col)): # Extract the data for the single line (loop of a single element). for values in read_spin_data(file_data=[line], spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col[i], sep=sep, spin_id=spin_id): # Check the values. if len(values) != 6 and data_present: raise RelaxError("The molecule name, residue number and name, spin number and name, and value columns could not be found in the data %s." % repr(values)) # Unpack when peak data is present elif data_present: # Unpack. mol_name, res_num, res_name, spin_num, spin_name, value = values # Store the intensity. intensity.append(value) # Unpack when peak data is not present. elif not data_present: # Unpack. mol_name, res_num, res_name, spin_num, spin_name = values # Add the assignment to the peak list object. peak_list.add(mol_names=[mol_name, mol_name], res_nums=[res_num, res_num], res_names=[res_name, res_name], spin_nums=[spin_num, spin_num], spin_names=[spin_name, spin_name], intensity=intensity)
def read_list(peak_list=None, file_data=None): """Extract the peak intensity information from the Sparky peak intensity file. @keyword peak_list: The peak list object to place all data into. @type peak_list: lib.spectrum.objects.Peak_list instance @keyword file_data: The data extracted from the file converted into a list of lists. @type file_data: list of lists of str @raises RelaxError: When the expected peak intensity is not a float. """ # The number of header lines. num = 0 if file_data[0][0] == 'Assignment': num = num + 1 if file_data[1] == '': num = num + 1 print("Number of header lines found: %s" % num) # The columns according to the file. w1_col = None w2_col = None w3_col = None w4_col = None int_col = None for i in range(len(file_data[0])): # The chemical shifts. if file_data[0][i] == 'w1': w1_col = i elif file_data[0][i] == 'w2': w2_col = i elif file_data[0][i] == 'w3': w3_col = i elif file_data[0][i] == 'w4': w4_col = i # The peak height. elif file_data[0][i] == 'Height': # The peak height when exported from CcpNmr Analysis export without 'Data'. int_col = i # The peak height when exported from Sparky. if file_data[0][i - 1] == 'Data' and file_data[0][i] == 'Height': int_col = i - 1 # The peak volume. elif file_data[0][i] == 'Intensity': int_col = i # Remove the header. file_data = file_data[num:] # Strip the data. file_data = strip(file_data) # The dimensionality. if w4_col != None: dim = 4 elif w3_col != None: dim = 3 elif w2_col != None: dim = 2 elif w1_col != None: dim = 1 else: raise RelaxError( "The dimensionality of the peak list cannot be determined.") print("%sD peak list detected." % dim) # Loop over the file data. for line in file_data: # Skip non-assigned peaks. if line[0] == '?-?': continue # Split up the assignments. if dim == 1: assign1 = line[0] elif dim == 2: assign1, assign2 = split('-', line[0]) elif dim == 3: assign1, assign2, assign3 = split('-', line[0]) elif dim == 4: assign1, assign2, assign3, assign4 = split('-', line[0]) # Process the assignment for each dimension. if dim >= 1: row1 = split('([a-zA-Z]+)', assign1) name1 = row1[-2] + row1[-1] if dim >= 2: row2 = split('([a-zA-Z]+)', assign2) name2 = row2[-2] + row2[-1] if dim >= 3: row3 = split('([a-zA-Z]+)', assign3) name3 = row3[-2] + row3[-1] if dim >= 4: row4 = split('([a-zA-Z]+)', assign4) name4 = row4[-2] + row4[-1] # Get the residue number for dimension 1. got_res_num1 = True try: res_num1 = int(row1[-3]) except: got_res_num1 = False raise RelaxError( "Improperly formatted Sparky file, cannot process the residue number for dimension 1 in assignment: %s." % line[0]) # Get the residue number for dimension 2. try: res_num2 = int(row2[-3]) except: # We cannot always expect dimension 2 to have residue number. if got_res_num1: res_num2 = res_num1 else: res_num2 = None warn( RelaxWarning( "Improperly formatted Sparky file, cannot process the residue number for dimension 2 in assignment: %s. Setting residue number to %s." % (line[0], res_num2))) # The residue name for dimension 1. got_res_name1 = True try: res_name1 = row1[-4] except: got_res_name1 = False res_name1 = None warn( RelaxWarning( "Improperly formatted Sparky file, cannot process the residue name for dimension 1 in assignment: %s. Setting residue name to %s." % (line[0], res_name1))) # The residue name for dimension 2. try: res_name2 = row2[-4] except: # We cannot always expect dimension 2 to have residue name. if got_res_name1: res_name2 = res_name1 else: res_name2 = None warn( RelaxWarning( "Improperly formatted NMRPipe SeriesTab file, cannot process the residue name for dimension 2 in assignment: %s. Setting residue name to %s." % (line[0], res_name2))) # Chemical shifts. w1 = None w2 = None w3 = None w4 = None if w1_col != None: try: w1 = float(line[w1_col]) except ValueError: raise RelaxError( "The chemical shift from the line %s is invalid." % line) if w2_col != None: try: w2 = float(line[w2_col]) except ValueError: raise RelaxError( "The chemical shift from the line %s is invalid." % line) if w3_col != None: try: w3 = float(line[w3_col]) except ValueError: raise RelaxError( "The chemical shift from the line %s is invalid." % line) if w4_col != None: try: w4 = float(line[w4_col]) except ValueError: raise RelaxError( "The chemical shift from the line %s is invalid." % line) # Intensity. if int_col != None: try: intensity = float(line[int_col]) except ValueError: raise RelaxError( "The peak intensity value from the line %s is invalid." % line) # Add the assignment to the peak list object. if dim == 1: peak_list.add(res_nums=[res_num1], res_names=[res_name1], spin_names=[name1], shifts=[w1], intensity=intensity) elif dim == 2: peak_list.add(res_nums=[res_num1, res_num2], res_names=[res_name1, res_name2], spin_names=[name1, name2], shifts=[w1, w2], intensity=intensity) elif dim == 3: peak_list.add(res_nums=[res_num1, res_num2, res_num1], res_names=[res_name1, res_name2, res_name1], spin_names=[name1, name2, name3], shifts=[w1, w2, w3], intensity=intensity) elif dim == 4: peak_list.add( res_nums=[res_num1, res_num2, res_num1, res_num1], res_names=[res_name1, res_name2, res_name1, res_name1], spin_names=[name1, name2, name3, name4], shifts=[w1, w2, w3, w4], intensity=intensity) # If no intensity column, for example when reading spins from a spectrum list. elif int_col == None: warn( RelaxWarning(( "The peak intensity value from the line %s is invalid. The return value will be without intensity." % line))) # Add the assignment to the peak list object. if dim == 1: peak_list.add(res_nums=[res_num1], res_names=[res_name1], spin_names=[name1], shifts=[w1]) elif dim == 2: peak_list.add(res_nums=[res_num1, res_num2], res_names=[res_name1, res_name2], spin_names=[name1, name2], shifts=[w1, w2]) elif dim == 3: peak_list.add(res_nums=[res_num1, res_num2, res_num1], res_names=[res_name1, res_name2, res_name1], spin_names=[name1, name2, name3], shifts=[w1, w2, w3]) elif dim == 4: peak_list.add( res_nums=[res_num1, res_num2, res_num1, res_num1], res_names=[res_name1, res_name2, res_name1, res_name1], spin_names=[name1, name2, name3, name4], shifts=[w1, w2, w3, w4])
def read(align_id=None, file=None, dir=None, file_data=None, data_type='D', spin_id1_col=None, spin_id2_col=None, data_col=None, error_col=None, sep=None, neg_g_corr=False, absolute=False): """Read the RDC data from file. @keyword align_id: The alignment tensor ID string. @type align_id: str @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. @type file_data: list of lists @keyword data_type: A string which is set to 'D' means that the splitting in the aligned sample was assumed to be J + D, or if set to '2D' then the splitting was taken as J + 2D. If set to 'T', then the data will be marked as being J+D values. @keyword spin_id1_col: The column containing the spin ID strings of the first spin. @type spin_id1_col: int @keyword spin_id2_col: The column containing the spin ID strings of the second spin. @type spin_id2_col: int @keyword data_col: The column containing the RDC data in Hz. @type data_col: int or None @keyword error_col: The column containing the RDC errors. @type error_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword neg_g_corr: A flag which is used to correct for the negative gyromagnetic ratio of 15N. If True, a sign inversion will be applied to all RDC values to be loaded. @type neg_g_corr: bool @keyword absolute: A flag which if True indicates that the RDCs to load are signless. All RDCs will then be converted to positive values. @type absolute: bool """ # Check the pipe setup. check_pipe_setup(sequence=True) # Either the data or error column must be supplied. if data_col == None and error_col == None: raise RelaxError("One of either the data or error column must be supplied.") # Check the data types. rdc_types = ['D', '2D', 'T'] if data_type not in rdc_types: raise RelaxError("The RDC data type '%s' must be one of %s." % (data_type, rdc_types)) # Spin specific data. ##################### # Extract the data from the file, and remove comments and blank lines. file_data = extract_data(file, dir, sep=sep) file_data = strip(file_data, comments=True) # Loop over the RDC data. data = [] for line in file_data: # Invalid columns. if spin_id1_col > len(line): warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line)) continue if spin_id2_col > len(line): warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line)) continue if data_col and data_col > len(line): warn(RelaxWarning("The data %s is invalid, no data column can be found." % line)) continue if error_col and error_col > len(line): warn(RelaxWarning("The data %s is invalid, no error column can be found." % line)) continue # Unpack. spin_id1 = line[spin_id1_col-1] spin_id2 = line[spin_id2_col-1] value = None if data_col: value = line[data_col-1] error = None if error_col: error = line[error_col-1] # Convert the spin IDs. if spin_id1[0] in ["\"", "\'"]: spin_id1 = eval(spin_id1) if spin_id2[0] in ["\"", "\'"]: spin_id2 = eval(spin_id2) # Convert and check the value. if value == 'None': value = None if value != None: try: value = float(value) except ValueError: warn(RelaxWarning("The RDC value of the line %s is invalid." % line)) continue # Convert and check the error. if error == 'None': error = None if error != None: try: error = float(error) except ValueError: warn(RelaxWarning("The error value of the line %s is invalid." % line)) continue # Get the spins. spin1 = return_spin(spin_id1) spin2 = return_spin(spin_id2) # Check the spin IDs. if not spin1: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id1, line))) continue if not spin2: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id2, line))) continue # Get the interatomic data container. interatom = return_interatom(spin_id1, spin_id2) # Create the container if needed. if interatom == None: interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2) # Test the error value (a value of 0.0 will cause the interatomic container to be deselected). if error == 0.0: interatom.select = False warn(RelaxWarning("An error value of zero has been encountered, deselecting the interatomic container between spin '%s' and '%s'." % (spin_id1, spin_id2))) continue # Store the data type as global data (need for the conversion of RDC data). if not hasattr(interatom, 'rdc_data_types'): interatom.rdc_data_types = {} if not align_id in interatom.rdc_data_types: interatom.rdc_data_types[align_id] = data_type # Convert and add the data. if data_col: # Data conversion. value = convert(value, data_type, align_id, to_intern=True) # Correction for the negative gyromagnetic ratio of 15N. if neg_g_corr and value != None: value = -value # Absolute values. if absolute: # Force the value to be positive. value = abs(value) # Initialise. if not hasattr(interatom, 'rdc'): interatom.rdc = {} # Add the value. interatom.rdc[align_id] = value # Store the absolute value flag. if not hasattr(interatom, 'absolute_rdc'): interatom.absolute_rdc = {} interatom.absolute_rdc[align_id] = absolute # Convert and add the error. if error_col: # Data conversion. error = convert(error, data_type, align_id, to_intern=True) # Initialise. if not hasattr(interatom, 'rdc_err'): interatom.rdc_err = {} # Append the error. interatom.rdc_err[align_id] = error # Append the data for printout. data.append([spin_id1, spin_id2]) if is_float(value): data[-1].append("%20.15f" % value) else: data[-1].append("%20s" % value) if is_float(error): data[-1].append("%20.15f" % error) else: data[-1].append("%20s" % error) # No data, so fail hard! if not len(data): raise RelaxError("No RDC data could be extracted.") # Print out. print("The following RDCs have been loaded into the relax data store:\n") write_data(out=sys.stdout, headings=["Spin_ID1", "Spin_ID2", "Value", "Error"], data=data) # Initialise some global structures. if not hasattr(cdp, 'align_ids'): cdp.align_ids = [] if not hasattr(cdp, 'rdc_ids'): cdp.rdc_ids = [] # Add the RDC id string. if align_id not in cdp.align_ids: cdp.align_ids.append(align_id) if align_id not in cdp.rdc_ids: cdp.rdc_ids.append(align_id)
def read_list(peak_list=None, file_data=None, int_col=None): """Extract the peak intensity information from the NMRView peak intensity file. @keyword peak_list: The peak list object to place all data into. @type peak_list: lib.spectrum.objects.Peak_list instance @keyword file_data: The data extracted from the file converted into a list of lists. @type file_data: list of lists of str @keyword int_col: The column containing the peak intensity data. The default is 16 for intensities. Setting the int_col argument to 15 will use the volumes (or evolumes). For a non-standard formatted file, use a different value. @type int_col: int @raises RelaxError: When the expected peak intensity is not a float. """ # Assume the NMRView file has six header lines! num = 6 print("Number of header lines: %s" % num) # Remove the header. file_data = file_data[num:] # Strip the data. file_data = strip(file_data) # The chemical shift columns. w2_col = 2 w1_col = 9 # The peak intensity column. if int_col == None: int_col = 16 if int_col == 16: print('Using peak heights.') if int_col == 15: print('Using peak volumes (or evolumes).') # Loop over the file data. for line in file_data: # Unknown assignment. if line[1] == '{}': warn(RelaxWarning("The assignment '%s' is unknown, skipping this peak." % line[1])) continue # The residue number res_num = '' try: res_num = line[1].strip('{') res_num = res_num.strip('}') res_num = res_num.split('.') res_num = int(res_num[0]) except ValueError: raise RelaxError("Improperly formatted NMRView file, cannot process the assignment '%s'." % line[1]) # Nuclei names. name2 = '' if line[1]!='{}': name2 = line[1].strip('{') name2 = name2.strip('}') name2 = name2.split('.') name2 = name2[1] name1 = '' if line[8]!='{}': name1 = line[8].strip('{') name1 = name1.strip('}') name1 = name1.split('.') name1 = name1[1] # Chemical shifts. w1 = None w2 = None if w1_col != None: try: w1 = float(line[w1_col]) except ValueError: raise RelaxError("The chemical shift from the line %s is invalid." % line) if w2_col != None: try: w2 = float(line[w2_col]) except ValueError: raise RelaxError("The chemical shift from the line %s is invalid." % line) # Intensity. try: intensity = float(line[int_col]) except ValueError: raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.") # Add the assignment to the peak list object. peak_list.add(res_nums=[res_num, res_num], spin_names=[name1, name2], shifts=[w1, w2], intensity=intensity)
def intensity_generic(peak_list=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, sep=None, spin_id=None): """Extract the peak intensity information from the generic column formatted peak list. @keyword peak_list: The peak list object to place all data into. @type peak_list: lib.spectrum.objects.Peak_list instance @keyword file_data: The data extracted from the file converted into a list of lists. @type file_data: list of lists of str @keyword spin_id_col: The column containing the spin ID strings (used by the generic intensity file format). If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. @type spin_id_col: int or None @keyword mol_name_col: The column containing the molecule name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type mol_name_col: int or None @keyword res_name_col: The column containing the residue name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_name_col: int or None @keyword res_num_col: The column containing the residue number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type res_num_col: int or None @keyword spin_name_col: The column containing the spin name information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_name_col: int or None @keyword spin_num_col: The column containing the spin number information (used by the generic intensity file format). If supplied, spin_id_col must be None. @type spin_num_col: int or None @keyword data_col: The column containing the peak intensities. @type data_col: int or list of int @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. @type spin_id: None or str @raises RelaxError: When the expected peak intensity is not a float. """ # Strip the data. file_data = strip(file_data) # Check the intensity column argument. data_present = True if data_col == None: warn( RelaxWarning( "The data column argument has not been supplied, and function will only return spin data." )) data_present = False # Convert the the data_col argument to a list if needed. if not isinstance(data_col, list): data_col = [data_col] # Loop over the file data. for line in file_data: # Loop over the intensity columns, storing the data. intensity = [] data_flag = False for i in range(len(data_col)): # Extract the data for the single line (loop of a single element). for values in read_spin_data(file_data=[line], spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col[i], sep=sep, spin_id=spin_id, raise_flag=False): # The data flag. data_flag = True # Check the values. if len(values) != 6 and data_present: raise RelaxError( "The molecule name, residue number and name, spin number and name, and value columns could not be found in the data %s." % repr(values)) # Unpack when peak data is present elif data_present: # Unpack. mol_name, res_num, res_name, spin_num, spin_name, value = values # Store the intensity. intensity.append(value) # Unpack when peak data is not present. elif not data_present: # Unpack. mol_name, res_num, res_name, spin_num, spin_name = values # Add the assignment to the peak list object. if data_flag: peak_list.add(mol_names=[mol_name, mol_name], res_nums=[res_num, res_num], res_names=[res_name, res_name], spin_nums=[spin_num, spin_num], spin_names=[spin_name, spin_name], intensity=intensity)
def read_list(peak_list=None, file_data=None, int_col=None): """Extract the peak intensity information from the NMRView peak intensity file. @keyword peak_list: The peak list object to place all data into. @type peak_list: lib.spectrum.objects.Peak_list instance @keyword file_data: The data extracted from the file converted into a list of lists. @type file_data: list of lists of str @keyword int_col: The column containing the peak intensity data. The default is 16 for intensities. Setting the int_col argument to 15 will use the volumes (or evolumes). For a non-standard formatted file, use a different value. @type int_col: int @raises RelaxError: When the expected peak intensity is not a float. """ # Assume the NMRView file has six header lines! num = 6 print("Number of header lines: %s" % num) # Remove the header. file_data = file_data[num:] # Strip the data. file_data = strip(file_data) # The chemical shift columns. w2_col = 2 w1_col = 9 # The peak intensity column. if int_col == None: int_col = 16 if int_col == 16: print('Using peak heights.') if int_col == 15: print('Using peak volumes (or evolumes).') # Loop over the file data. for line in file_data: # Unknown assignment. if line[1] == '{}': warn( RelaxWarning( "The assignment '%s' is unknown, skipping this peak." % line[1])) continue # The residue number res_num = '' try: res_num = line[1].strip('{') res_num = res_num.strip('}') res_num = res_num.split('.') res_num = int(res_num[0]) except ValueError: raise RelaxError( "Improperly formatted NMRView file, cannot process the assignment '%s'." % line[1]) # Nuclei names. name2 = '' if line[1] != '{}': name2 = line[1].strip('{') name2 = name2.strip('}') name2 = name2.split('.') name2 = name2[1] name1 = '' if line[8] != '{}': name1 = line[8].strip('{') name1 = name1.strip('}') name1 = name1.split('.') name1 = name1[1] # Chemical shifts. w1 = None w2 = None if w1_col != None: try: w1 = float(line[w1_col]) except ValueError: raise RelaxError( "The chemical shift from the line %s is invalid." % line) if w2_col != None: try: w2 = float(line[w2_col]) except ValueError: raise RelaxError( "The chemical shift from the line %s is invalid." % line) # Intensity. try: intensity = float(line[int_col]) except ValueError: raise RelaxError("The peak intensity value " + repr(intensity) + " from the line " + repr(line) + " is invalid.") # Add the assignment to the peak list object. peak_list.add(res_nums=[res_num, res_num], spin_names=[name1, name2], shifts=[w1, w2], intensity=intensity)
def read_dist(file=None, dir=None, unit='meter', spin_id1_col=None, spin_id2_col=None, data_col=None, sep=None): """Set up the magnetic dipole-dipole interaction. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword unit: The measurement unit. This can be either 'meter' or 'Angstrom'. @type unit: str @keyword spin_id1_col: The column containing the spin ID strings of the first spin. @type spin_id1_col: int @keyword spin_id2_col: The column containing the spin ID strings of the second spin. @type spin_id2_col: int @keyword data_col: The column containing the averaged distances in meters. @type data_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None """ # Check the units. if unit not in ['meter', 'Angstrom']: raise RelaxError("The measurement unit of '%s' must be one of 'meter' or 'Angstrom'." % unit) # Test if the current data pipe exists. pipes.test() # Test if sequence data exists. if not exists_mol_res_spin_data(): raise RelaxNoSequenceError # Extract the data from the file, and clean it up. file_data = extract_data(file, dir, sep=sep) file_data = strip(file_data, comments=True) # Loop over the RDC data. data = [] for line in file_data: # Invalid columns. if spin_id1_col > len(line): warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line)) continue if spin_id2_col > len(line): warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line)) continue if data_col and data_col > len(line): warn(RelaxWarning("The data %s is invalid, no data column can be found." % line)) continue # Unpack. spin_id1 = line[spin_id1_col-1] spin_id2 = line[spin_id2_col-1] ave_dist = None if data_col: ave_dist = line[data_col-1] # Convert and check the value. if ave_dist != None: try: ave_dist = float(ave_dist) except ValueError: warn(RelaxWarning("The averaged distance of '%s' from the line %s is invalid." % (ave_dist, line))) continue # Unit conversion. if unit == 'Angstrom': ave_dist = ave_dist * 1e-10 # Get the interatomic data container. interatom = return_interatom(spin_id1, spin_id2) # No container found, so create it. if interatom == None: interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2, verbose=True) # Store the averaged distance. interatom.r = ave_dist # Store the data for the printout. data.append([repr(interatom.spin_id1), repr(interatom.spin_id2), repr(ave_dist)]) # No data, so fail! if not len(data): raise RelaxError("No data could be extracted from the file.") # Print out. print("The following averaged distances have been read:\n") write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2", "Ave_distance(meters)"], data=data)
def read_list(peak_list=None, file_data=None, int_col=None): """Extract the peak intensity information from the XEasy file. @keyword peak_list: The peak list object to place all data into. @type peak_list: lib.spectrum.objects.Peak_list instance @keyword file_data: The data extracted from the file converted into a list of lists. @type file_data: list of lists of str @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file). @type int_col: int @raises RelaxError: When the expected peak intensity is not a float. """ # The hardcoded column positions (note that w1 and w2 are swapped!). w1_col = 3 w2_col = 2 ass_w1_col = 7 ass_w2_col = 4 res_name1_col = 9 res_name2_col = 5 if int_col == None: int_col = 10 # Determine the number of header lines. num = 0 for line in file_data: # Try to see if the intensity can be extracted. try: intensity = float(line[int_col]) except ValueError: num = num + 1 except IndexError: num = num + 1 else: break print("Number of header lines found: " + repr(num)) # Remove the header. file_data = file_data[num:] # Strip the data. file_data = strip(file_data) # Loop over the file data. for line in file_data: # Test for invalid assignment lines which have the column numbers changed and return empty data. if line[ass_w1_col] == 'inv.' or line[ass_w2_col] == 'inv.': continue # The residue number for dimension 1. try: res_num1 = int(line[8]) except: raise RelaxError( "Improperly formatted XEasy file, cannot process the residue number for dimension 1 in assignment: %s." % line) # The residue number for dimension 2. try: res_num2 = int(line[5]) except: warn( RelaxWarning( "Improperly formatted XEasy file, cannot process the residue number for dimension 2 in assignment: %s. Setting residue number to None." % line)) res_num2 = None # Nuclei names. name1 = line[ass_w1_col] name2 = line[ass_w2_col] # Residue names. res_name1 = line[res_name1_col] res_name2 = line[res_name2_col] # Chemical shifts. try: w1 = float(line[w1_col]) except ValueError: raise RelaxError( "The w1 chemical shift from the line %s is invalid." % line) try: w2 = float(line[w2_col]) except ValueError: raise RelaxError( "The w2 chemical shift from the line %s is invalid." % line) # Intensity. try: intensity = float(line[int_col]) except ValueError: raise RelaxError( "The peak intensity value from the line %s is invalid." % line) # Add the assignment to the peak list object. peak_list.add(res_nums=[res_num1, res_num2], res_names=[res_name1, res_name2], spin_names=[name1, name2], shifts=[w1, w2], intensity=intensity)
def read_list(peak_list=None, file_data=None): """Extract the peak intensity information from the Sparky peak intensity file. @keyword peak_list: The peak list object to place all data into. @type peak_list: lib.spectrum.objects.Peak_list instance @keyword file_data: The data extracted from the file converted into a list of lists. @type file_data: list of lists of str @raises RelaxError: When the expected peak intensity is not a float. """ # The number of header lines. num = 0 if file_data[0][0] == 'Assignment': num = num + 1 if file_data[1] == '': num = num + 1 print("Number of header lines found: %s" % num) # The columns according to the file. w1_col = None w2_col = None w3_col = None w4_col = None int_col = None for i in range(len(file_data[0])): # The chemical shifts. if file_data[0][i] == 'w1': w1_col = i elif file_data[0][i] == 'w2': w2_col = i elif file_data[0][i] == 'w3': w3_col = i elif file_data[0][i] == 'w4': w4_col = i # The peak height. elif file_data[0][i] == 'Height': # The peak height when exported from CcpNmr Analysis export without 'Data'. int_col = i # The peak height when exported from Sparky. if file_data[0][i-1] == 'Data' and file_data[0][i] == 'Height': int_col = i-1 # The peak volume. elif file_data[0][i] == 'Intensity': int_col = i # Remove the header. file_data = file_data[num:] # Strip the data. file_data = strip(file_data) # The dimensionality. if w4_col != None: dim = 4 elif w3_col != None: dim = 3 elif w2_col != None: dim = 2 elif w1_col != None: dim = 1 else: raise RelaxError("The dimensionality of the peak list cannot be determined.") print("%sD peak list detected." % dim) # Loop over the file data. for line in file_data: # Skip non-assigned peaks. if line[0] == '?-?': continue # Split up the assignments. if dim == 1: assign1 = line[0] elif dim == 2: assign1, assign2 = split('-', line[0]) elif dim == 3: assign1, assign2, assign3 = split('-', line[0]) elif dim == 4: assign1, assign2, assign3, assign4 = split('-', line[0]) # Process the assignment for each dimension. if dim >= 1: row1 = split('([a-zA-Z]+)', assign1) name1 = row1[-2] + row1[-1] if dim >= 2: row2 = split('([a-zA-Z]+)', assign2) name2 = row2[-2] + row2[-1] if dim >= 3: row3 = split('([a-zA-Z]+)', assign3) name3 = row3[-2] + row3[-1] if dim >= 4: row4 = split('([a-zA-Z]+)', assign4) name4 = row4[-2] + row4[-1] # Get the residue number for dimension 1. got_res_num1 = True try: res_num1 = int(row1[-3]) except: got_res_num1 = False raise RelaxError("Improperly formatted Sparky file, cannot process the residue number for dimension 1 in assignment: %s." % line[0]) # Get the residue number for dimension 2. try: res_num2 = int(row2[-3]) except: # We cannot always expect dimension 2 to have residue number. if got_res_num1: res_num2 = res_num1 else: res_num2 = None warn(RelaxWarning("Improperly formatted Sparky file, cannot process the residue number for dimension 2 in assignment: %s. Setting residue number to %s." % (line[0], res_num2))) # The residue name for dimension 1. got_res_name1 = True try: res_name1 = row1[-4] except: got_res_name1 = False res_name1 = None warn(RelaxWarning("Improperly formatted Sparky file, cannot process the residue name for dimension 1 in assignment: %s. Setting residue name to %s." % (line[0], res_name1))) # The residue name for dimension 2. try: res_name2 = row2[-4] except: # We cannot always expect dimension 2 to have residue name. if got_res_name1: res_name2 = res_name1 else: res_name2 = None warn(RelaxWarning("Improperly formatted NMRPipe SeriesTab file, cannot process the residue name for dimension 2 in assignment: %s. Setting residue name to %s." % (line[0], res_name2))) # Chemical shifts. w1 = None w2 = None w3 = None w4 = None if w1_col != None: try: w1 = float(line[w1_col]) except ValueError: raise RelaxError("The chemical shift from the line %s is invalid." % line) if w2_col != None: try: w2 = float(line[w2_col]) except ValueError: raise RelaxError("The chemical shift from the line %s is invalid." % line) if w3_col != None: try: w3 = float(line[w3_col]) except ValueError: raise RelaxError("The chemical shift from the line %s is invalid." % line) if w4_col != None: try: w4 = float(line[w4_col]) except ValueError: raise RelaxError("The chemical shift from the line %s is invalid." % line) # Intensity. if int_col != None: try: intensity = float(line[int_col]) except ValueError: raise RelaxError("The peak intensity value from the line %s is invalid." % line) # Add the assignment to the peak list object. if dim == 1: peak_list.add(res_nums=[res_num1], res_names=[res_name1], spin_names=[name1], shifts=[w1], intensity=intensity) elif dim == 2: peak_list.add(res_nums=[res_num1, res_num2], res_names=[res_name1, res_name2], spin_names=[name1, name2], shifts=[w1, w2], intensity=intensity) elif dim == 3: peak_list.add(res_nums=[res_num1, res_num2, res_num1], res_names=[res_name1, res_name2, res_name1], spin_names=[name1, name2, name3], shifts=[w1, w2, w3], intensity=intensity) elif dim == 4: peak_list.add(res_nums=[res_num1, res_num2, res_num1, res_num1], res_names=[res_name1, res_name2, res_name1, res_name1], spin_names=[name1, name2, name3, name4], shifts=[w1, w2, w3, w4], intensity=intensity) # If no intensity column, for example when reading spins from a spectrum list. elif int_col == None: warn(RelaxWarning(("The peak intensity value from the line %s is invalid. The return value will be without intensity." % line))) # Add the assignment to the peak list object. if dim == 1: peak_list.add(res_nums=[res_num1], res_names=[res_name1], spin_names=[name1], shifts=[w1]) elif dim == 2: peak_list.add(res_nums=[res_num1, res_num2], res_names=[res_name1, res_name2], spin_names=[name1, name2], shifts=[w1, w2]) elif dim == 3: peak_list.add(res_nums=[res_num1, res_num2, res_num1], res_names=[res_name1, res_name2, res_name1], spin_names=[name1, name2, name3], shifts=[w1, w2, w3]) elif dim == 4: peak_list.add(res_nums=[res_num1, res_num2, res_num1, res_num1], res_names=[res_name1, res_name2, res_name1, res_name1], spin_names=[name1, name2, name3, name4], shifts=[w1, w2, w3, w4])
def read_spin_data(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None, raise_flag=True): """Generator function for reading the spin specific data from file. Description =========== This function reads a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. @type file_data: list of lists @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. @type spin_id_col: int or None @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None. @type mol_name_col: int or None @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None. @type res_name_col: int or None @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None. @type res_num_col: int or None @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None. @type spin_name_col: int or None @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None. @type spin_num_col: int or None @keyword data_col: The column containing the data. @type data_col: int or None @keyword error_col: The column containing the errors. @type error_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. @type spin_id: None or str @keyword raise_flag: A flag which if True will cause a RelaxError to be raised if no data can be found. @type raise_flag: bool @return: A list of the spin specific data is yielded. The format is a list consisting of the spin ID string, the data value (if data_col is give), and the error value (if error_col is given). If both data_col and error_col are None, then the spin ID string is simply yielded. @rtype: str, list of [str, float], or list of [str, float, float] """ # Argument tests. col_args = [spin_id_col, mol_name_col, res_name_col, res_num_col, spin_name_col, spin_num_col, data_col, error_col] col_arg_names = ['spin_id_col', 'mol_name_col', 'res_name_col', 'res_num_col', 'spin_name_col', 'spin_num_col', 'data_col', 'error_col'] for i in range(len(col_args)): if col_args[i] == 0: raise RelaxError("The '%s' argument cannot be zero, column numbering starts at one." % col_arg_names[i]) if spin_id_col and (mol_name_col or res_name_col or res_num_col or spin_name_col or spin_num_col): raise RelaxError("If the 'spin_id_col' argument has been supplied, then the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col must all be set to None.") # Minimum number of columns. min_col_num = max([_f for _f in [spin_id_col, mol_name_col, res_num_col, res_name_col, spin_num_col, spin_name_col, data_col, error_col] if _f]) # Extract the data from the file. if not file_data: # Extract. file_data = extract_data(file, dir, sep=sep) # Strip the data of all comments and empty lines. if spin_id_col != None: file_data = strip(file_data, comments=False) else: file_data = strip(file_data) # No data! if not file_data: warn(RelaxFileEmptyWarning(file)) return # Yield the data, spin by spin. missing_data = True for line in file_data: # Convert the spin IDs. if spin_id_col != None and line[spin_id_col-1][0] in ["\"", "\'"]: line[spin_id_col-1] = eval(line[spin_id_col-1]) # Convert. # Validate the sequence. if not check_sequence(line, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, escalate=1): continue # Get the spin data from the ID. if spin_id_col: # Invalid spin ID. if line[spin_id_col-1] == '#': warn(RelaxWarning("Invalid spin ID, skipping the line %s" % line)) continue mol_name, res_num, res_name, spin_num, spin_name = spin_id_to_data_list(line[spin_id_col-1]) # Convert the spin data. else: # The molecule. mol_name = None if mol_name_col != None and line[mol_name_col-1] != 'None': mol_name = line[mol_name_col-1] # The residue number, catching bad values. res_num = None if res_num_col != None: try: if line[res_num_col-1] == 'None': res_num = None else: res_num = int(line[res_num_col-1]) except ValueError: warn(RelaxWarning("Invalid residue number, skipping the line %s" % line)) continue # The residue name. res_name = None if res_name_col != None and line[res_name_col-1] != 'None': res_name = line[res_name_col-1] # The spin number, catching bad values. spin_num = None if spin_num_col != None: try: if line[spin_num_col-1] == 'None': spin_num = None else: spin_num = int(line[spin_num_col-1]) except ValueError: warn(RelaxWarning("Invalid spin number, skipping the line %s" % line)) continue # The spin name. spin_name = None if spin_name_col != None and line[spin_name_col-1] != 'None': spin_name = line[spin_name_col-1] # Convert the data. value = None if data_col != None: try: # None. if line[data_col-1] == 'None': value = None # A float. else: value = float(line[data_col-1]) # If it a float, test if is nan. if isnan(value): warn(RelaxWarning("The value is 'nan', skipping the line %s" % line)) continue # Bad data. except ValueError: warn(RelaxWarning("Invalid data, skipping the line %s" % line)) continue # Convert the errors. error = None if error_col != None: try: # None. if line[error_col-1] == 'None': error = None # A float. else: error = float(line[error_col-1]) # If it a float, test if is nan. if isnan(error): warn(RelaxWarning("The error is 'nan', skipping the line %s" % line)) continue # Bad data. except ValueError: warn(RelaxWarning("Invalid errors, skipping the line %s" % line)) continue # Right, data is OK and exists. missing_data = False # Yield the data. if data_col and error_col: yield mol_name, res_num, res_name, spin_num, spin_name, value, error elif data_col: yield mol_name, res_num, res_name, spin_num, spin_name, value elif error_col: yield mol_name, res_num, res_name, spin_num, spin_name, error else: yield mol_name, res_num, res_name, spin_num, spin_name # Hmmm, no data! if raise_flag and missing_data: raise RelaxError("No corresponding data could be found within the file.")
def read(file=None, dir=None, file_data=None, spin_id1_col=None, spin_id2_col=None, data_col=None, error_col=None, sign_col=None, sep=None): """Read the J coupling data from file. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. @type file_data: list of lists @keyword spin_id1_col: The column containing the spin ID strings of the first spin. @type spin_id1_col: int @keyword spin_id2_col: The column containing the spin ID strings of the second spin. @type spin_id2_col: int @keyword data_col: The column containing the J coupling data in Hz. @type data_col: int or None @keyword error_col: The column containing the J coupling errors. @type error_col: int or None @keyword sign_col: The optional column containing the sign of the J coupling. @type sign_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None """ # Check the pipe setup. check_pipe_setup(sequence=True) # Either the data or error column must be supplied. if data_col == None and error_col == None: raise RelaxError("One of either the data or error column must be supplied.") # Extract the data from the file, and remove comments and blank lines. file_data = extract_data(file, dir, sep=sep) file_data = strip(file_data, comments=True) # Loop over the J coupling data. data = [] for line in file_data: # Invalid columns. if spin_id1_col > len(line): warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line)) continue if spin_id2_col > len(line): warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line)) continue if data_col and data_col > len(line): warn(RelaxWarning("The data %s is invalid, no data column can be found." % line)) continue if error_col and error_col > len(line): warn(RelaxWarning("The data %s is invalid, no error column can be found." % line)) continue if sign_col and sign_col > len(line): warn(RelaxWarning("The data %s is invalid, no sign column can be found." % line)) continue # Unpack. spin_id1 = line[spin_id1_col-1] spin_id2 = line[spin_id2_col-1] value = None if data_col: value = line[data_col-1] error = None if error_col: error = line[error_col-1] sign = None if sign_col: sign = line[sign_col-1] # Convert the spin IDs. if spin_id1[0] in ["\"", "\'"]: spin_id1 = eval(spin_id1) if spin_id2[0] in ["\"", "\'"]: spin_id2 = eval(spin_id2) # Convert and check the value. if value == 'None': value = None if value != None: try: value = float(value) except ValueError: warn(RelaxWarning("The J coupling value of the line %s is invalid." % line)) continue # The sign data. if sign == 'None': sign = None if sign != None: try: sign = float(sign) except ValueError: warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line)) continue if sign not in [1.0, -1.0]: warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line)) continue # Convert and check the error. if error == 'None': error = None if error != None: try: error = float(error) except ValueError: warn(RelaxWarning("The error value of the line %s is invalid." % line)) continue # Get the spins. spin1 = return_spin(spin_id1) spin2 = return_spin(spin_id2) # Check the spin IDs. if not spin1: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id1, line))) continue if not spin2: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id2, line))) continue # Test the error value (cannot be 0.0). if error == 0.0: raise RelaxError("An invalid error value of zero has been encountered.") # Get the interatomic data container. interatom = return_interatom(spin_id1, spin_id2) # Create the container if needed. if interatom == None: interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2) # Add the data. if data_col: # Sign conversion. if sign != None: value = value * sign # Add the value. interatom.j_coupling = value # Add the error. if error_col: interatom.j_coupling_err = error # Append the data for printout. data.append([spin_id1, spin_id2]) if is_float(value): data[-1].append("%20.15f" % value) else: data[-1].append("%20s" % value) if is_float(error): data[-1].append("%20.15f" % error) else: data[-1].append("%20s" % error) # No data, so fail hard! if not len(data): raise RelaxError("No J coupling data could be extracted.") # Print out. print("The following J coupling have been loaded into the relax data store:\n") write_data(out=sys.stdout, headings=["Spin_ID1", "Spin_ID2", "Value", "Error"], data=data)
def read_spin_data(file=None, dir=None, file_data=None, spin_id_col=None, mol_name_col=None, res_num_col=None, res_name_col=None, spin_num_col=None, spin_name_col=None, data_col=None, error_col=None, sep=None, spin_id=None, raise_flag=True): """Generator function for reading the spin specific data from file. Description =========== This function reads a columnar formatted file where each line corresponds to a spin system. Spin identification is either through a spin ID string or through columns containing the molecule name, residue name and number, and/or spin name and number. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. @type file_data: list of lists @keyword spin_id_col: The column containing the spin ID strings. If supplied, the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col arguments must be none. @type spin_id_col: int or None @keyword mol_name_col: The column containing the molecule name information. If supplied, spin_id_col must be None. @type mol_name_col: int or None @keyword res_name_col: The column containing the residue name information. If supplied, spin_id_col must be None. @type res_name_col: int or None @keyword res_num_col: The column containing the residue number information. If supplied, spin_id_col must be None. @type res_num_col: int or None @keyword spin_name_col: The column containing the spin name information. If supplied, spin_id_col must be None. @type spin_name_col: int or None @keyword spin_num_col: The column containing the spin number information. If supplied, spin_id_col must be None. @type spin_num_col: int or None @keyword data_col: The column containing the data. @type data_col: int or None @keyword error_col: The column containing the errors. @type error_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None @keyword spin_id: The spin ID string used to restrict data loading to a subset of all spins. @type spin_id: None or str @keyword raise_flag: A flag which if True will cause a RelaxError to be raised if no data can be found. @type raise_flag: bool @return: A list of the spin specific data is yielded. The format is a list consisting of the spin ID string, the data value (if data_col is give), and the error value (if error_col is given). If both data_col and error_col are None, then the spin ID string is simply yielded. @rtype: str, list of [str, float], or list of [str, float, float] """ # Argument tests. col_args = [spin_id_col, mol_name_col, res_name_col, res_num_col, spin_name_col, spin_num_col, data_col, error_col] col_arg_names = ['spin_id_col', 'mol_name_col', 'res_name_col', 'res_num_col', 'spin_name_col', 'spin_num_col', 'data_col', 'error_col'] for i in range(len(col_args)): if col_args[i] == 0: raise RelaxError("The '%s' argument cannot be zero, column numbering starts at one." % col_arg_names[i]) if spin_id_col and (mol_name_col or res_name_col or res_num_col or spin_name_col or spin_num_col): raise RelaxError("If the 'spin_id_col' argument has been supplied, then the mol_name_col, res_name_col, res_num_col, spin_name_col, and spin_num_col must all be set to None.") # Minimum number of columns. min_col_num = max([_f for _f in [spin_id_col, mol_name_col, res_num_col, res_name_col, spin_num_col, spin_name_col, data_col, error_col] if _f]) # Extract the data from the file. if not file_data: # Extract. file_data = extract_data(file, dir, sep=sep) # Strip the data of all comments and empty lines. if spin_id_col != None: file_data = strip(file_data, comments=False) else: file_data = strip(file_data) # No data! if not file_data: warn(RelaxFileEmptyWarning(file)) return # Yield the data, spin by spin. missing_data = True for line in file_data: # Convert the spin IDs. if spin_id_col != None and line[spin_id_col-1][0] in ["\"", "\'"]: line[spin_id_col-1] = eval(line[spin_id_col-1]) # Convert. # Validate the sequence. if not check_sequence(line, spin_id_col=spin_id_col, mol_name_col=mol_name_col, res_num_col=res_num_col, res_name_col=res_name_col, spin_num_col=spin_num_col, spin_name_col=spin_name_col, data_col=data_col, error_col=error_col, escalate=1): continue # Get the spin data from the ID. if spin_id_col: # Invalid spin ID. if line[spin_id_col-1] == '#': warn(RelaxWarning("Invalid spin ID, skipping the line %s" % line)) continue mol_name, res_num, res_name, spin_num, spin_name = spin_id_to_data_list(line[spin_id_col-1]) # Convert the spin data. else: # The molecule. mol_name = None if mol_name_col != None and line[mol_name_col-1] != 'None': mol_name = line[mol_name_col-1] # The residue number, catching bad values. res_num = None if res_num_col != None: try: if line[res_num_col-1] == 'None': res_num = None else: res_num = int(line[res_num_col-1]) except ValueError: warn(RelaxWarning("Invalid residue number, skipping the line %s" % line)) continue # The residue name. res_name = None if res_name_col != None and line[res_name_col-1] != 'None': res_name = line[res_name_col-1] # The spin number, catching bad values. spin_num = None if spin_num_col != None: try: if line[spin_num_col-1] == 'None': spin_num = None else: spin_num = int(line[spin_num_col-1]) except ValueError: warn(RelaxWarning("Invalid spin number, skipping the line %s" % line)) continue # The spin name. spin_name = None if spin_name_col != None and line[spin_name_col-1] != 'None': spin_name = line[spin_name_col-1] # Convert the data. value = None if data_col != None: try: # None. if line[data_col-1] == 'None': value = None # A float. else: value = float(line[data_col-1]) # If it a float, test if is nan. if not isFinite(value): warn(RelaxWarning("The value is not finite, skipping the line %s" % line)) continue # Bad data. except ValueError: warn(RelaxWarning("Invalid data, skipping the line %s" % line)) continue # Convert the errors. error = None if error_col != None: try: # None. if line[error_col-1] == 'None': error = None # A float. else: error = float(line[error_col-1]) # If it a float, test if is nan. if not isFinite(error): warn(RelaxWarning("The error is not finite, skipping the line %s" % line)) continue # Bad data. except ValueError: warn(RelaxWarning("Invalid errors, skipping the line %s" % line)) continue # Right, data is OK and exists. missing_data = False # Yield the data. if data_col and error_col: yield mol_name, res_num, res_name, spin_num, spin_name, value, error elif data_col: yield mol_name, res_num, res_name, spin_num, spin_name, value elif error_col: yield mol_name, res_num, res_name, spin_num, spin_name, error else: yield mol_name, res_num, res_name, spin_num, spin_name # Hmmm, no data! if raise_flag and missing_data: raise RelaxError("No corresponding data could be found within the file.")
def read_dist(file=None, dir=None, unit='meter', spin_id1_col=None, spin_id2_col=None, data_col=None, sep=None): """Set up the magnetic dipole-dipole interaction. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword unit: The measurement unit. This can be either 'meter' or 'Angstrom'. @type unit: str @keyword spin_id1_col: The column containing the spin ID strings of the first spin. @type spin_id1_col: int @keyword spin_id2_col: The column containing the spin ID strings of the second spin. @type spin_id2_col: int @keyword data_col: The column containing the averaged distances in meters. @type data_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None """ # Check the units. if unit not in ['meter', 'Angstrom']: raise RelaxError("The measurement unit of '%s' must be one of 'meter' or 'Angstrom'." % unit) # Test if the current data pipe exists. check_pipe() # Test if sequence data exists. if not exists_mol_res_spin_data(): raise RelaxNoSequenceError # Extract the data from the file, and clean it up. file_data = extract_data(file, dir, sep=sep) file_data = strip(file_data, comments=True) # Loop over the RDC data. data = [] for line in file_data: # Invalid columns. if spin_id1_col > len(line): warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line)) continue if spin_id2_col > len(line): warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line)) continue if data_col and data_col > len(line): warn(RelaxWarning("The data %s is invalid, no data column can be found." % line)) continue # Unpack. spin_id1 = line[spin_id1_col-1] spin_id2 = line[spin_id2_col-1] ave_dist = None if data_col: ave_dist = line[data_col-1] # Convert and check the value. if ave_dist != None: try: ave_dist = float(ave_dist) except ValueError: warn(RelaxWarning("The averaged distance of '%s' from the line %s is invalid." % (ave_dist, line))) continue # Unit conversion. if unit == 'Angstrom': ave_dist = ave_dist * 1e-10 # Get the interatomic data container. spin1 = return_spin(spin_id=spin_id1) spin2 = return_spin(spin_id=spin_id2) interatom = return_interatom(spin_hash1=spin1._hash, spin_hash2=spin2._hash) # No container found, so create it. if interatom == None: interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2, verbose=True) # Store the averaged distance. interatom.r = ave_dist # Store the data for the printout. data.append([repr(interatom.spin_id1), repr(interatom.spin_id2), repr(ave_dist)]) # No data, so fail! if not len(data): raise RelaxError("No data could be extracted from the file.") # Print out. print("The following averaged distances have been read:\n") write_data(out=sys.stdout, headings=["Spin_ID_1", "Spin_ID_2", "Ave_distance(meters)"], data=data)
def read(file=None, dir=None, file_data=None, spin_id1_col=None, spin_id2_col=None, data_col=None, error_col=None, sign_col=None, sep=None): """Read the J coupling data from file. @keyword file: The name of the file to open. @type file: str @keyword dir: The directory containing the file (defaults to the current directory if None). @type dir: str or None @keyword file_data: An alternative to opening a file, if the data already exists in the correct format. The format is a list of lists where the first index corresponds to the row and the second the column. @type file_data: list of lists @keyword spin_id1_col: The column containing the spin ID strings of the first spin. @type spin_id1_col: int @keyword spin_id2_col: The column containing the spin ID strings of the second spin. @type spin_id2_col: int @keyword data_col: The column containing the J coupling data in Hz. @type data_col: int or None @keyword error_col: The column containing the J coupling errors. @type error_col: int or None @keyword sign_col: The optional column containing the sign of the J coupling. @type sign_col: int or None @keyword sep: The column separator which, if None, defaults to whitespace. @type sep: str or None """ # Check the pipe setup. check_pipe_setup(sequence=True) # Either the data or error column must be supplied. if data_col == None and error_col == None: raise RelaxError("One of either the data or error column must be supplied.") # Extract the data from the file, and remove comments and blank lines. file_data = extract_data(file, dir, sep=sep) file_data = strip(file_data, comments=True) # Loop over the J coupling data. data = [] for line in file_data: # Invalid columns. if spin_id1_col > len(line): warn(RelaxWarning("The data %s is invalid, no first spin ID column can be found." % line)) continue if spin_id2_col > len(line): warn(RelaxWarning("The data %s is invalid, no second spin ID column can be found." % line)) continue if data_col and data_col > len(line): warn(RelaxWarning("The data %s is invalid, no data column can be found." % line)) continue if error_col and error_col > len(line): warn(RelaxWarning("The data %s is invalid, no error column can be found." % line)) continue if sign_col and sign_col > len(line): warn(RelaxWarning("The data %s is invalid, no sign column can be found." % line)) continue # Unpack. spin_id1 = line[spin_id1_col-1] spin_id2 = line[spin_id2_col-1] value = None if data_col: value = line[data_col-1] error = None if error_col: error = line[error_col-1] sign = None if sign_col: sign = line[sign_col-1] # Convert the spin IDs. if spin_id1[0] in ["\"", "\'"]: spin_id1 = eval(spin_id1) if spin_id2[0] in ["\"", "\'"]: spin_id2 = eval(spin_id2) # Convert and check the value. if value == 'None': value = None if value != None: try: value = float(value) except ValueError: warn(RelaxWarning("The J coupling value of the line %s is invalid." % line)) continue # The sign data. if sign == 'None': sign = None if sign != None: try: sign = float(sign) except ValueError: warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line)) continue if sign not in [1.0, -1.0]: warn(RelaxWarning("The J coupling sign of the line %s is invalid." % line)) continue # Convert and check the error. if error == 'None': error = None if error != None: try: error = float(error) except ValueError: warn(RelaxWarning("The error value of the line %s is invalid." % line)) continue # Get the spins. spin1 = return_spin(spin_id=spin_id1) spin2 = return_spin(spin_id=spin_id2) # Check the spin IDs. if not spin1: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id1, line))) continue if not spin2: warn(RelaxWarning("The spin ID '%s' cannot be found in the current data pipe, skipping the data %s." % (spin_id2, line))) continue # Test the error value (cannot be 0.0). if error == 0.0: raise RelaxError("An invalid error value of zero has been encountered.") # Get the interatomic data container. interatom = return_interatom(spin_hash1=spin1._hash, spin_hash2=spin2._hash) # Create the container if needed. if interatom == None: interatom = create_interatom(spin_id1=spin_id1, spin_id2=spin_id2) # Add the data. if data_col: # Sign conversion. if sign != None: value = value * sign # Add the value. interatom.j_coupling = value # Add the error. if error_col: interatom.j_coupling_err = error # Append the data for printout. data.append([spin_id1, spin_id2]) if is_float(value): data[-1].append("%20.15f" % value) else: data[-1].append("%20s" % value) if is_float(error): data[-1].append("%20.15f" % error) else: data[-1].append("%20s" % error) # No data, so fail hard! if not len(data): raise RelaxError("No J coupling data could be extracted.") # Print out. print("The following J coupling have been loaded into the relax data store:\n") write_data(out=sys.stdout, headings=["Spin_ID1", "Spin_ID2", "Value", "Error"], data=data)
def read_list(peak_list=None, file_data=None, int_col=None): """Extract the peak intensity information from the XEasy file. @keyword peak_list: The peak list object to place all data into. @type peak_list: lib.spectrum.objects.Peak_list instance @keyword file_data: The data extracted from the file converted into a list of lists. @type file_data: list of lists of str @keyword int_col: The column containing the peak intensity data (for a non-standard formatted file). @type int_col: int @raises RelaxError: When the expected peak intensity is not a float. """ # The hardcoded column positions (note that w1 and w2 are swapped!). w1_col = 3 w2_col = 2 ass_w1_col = 7 ass_w2_col = 4 res_name1_col = 9 res_name2_col = 5 if int_col == None: int_col = 10 # Determine the number of header lines. num = 0 for line in file_data: # Try to see if the intensity can be extracted. try: intensity = float(line[int_col]) except ValueError: num = num + 1 except IndexError: num = num + 1 else: break print("Number of header lines found: " + repr(num)) # Remove the header. file_data = file_data[num:] # Strip the data. file_data = strip(file_data) # Loop over the file data. for line in file_data: # Test for invalid assignment lines which have the column numbers changed and return empty data. if line[ass_w1_col] == 'inv.' or line[ass_w2_col] == 'inv.': continue # The residue number for dimension 1. try: res_num1 = int(line[8]) except: raise RelaxError("Improperly formatted XEasy file, cannot process the residue number for dimension 1 in assignment: %s." % line) # The residue number for dimension 2. try: res_num2 = int(line[5]) except: warn(RelaxWarning("Improperly formatted XEasy file, cannot process the residue number for dimension 2 in assignment: %s. Setting residue number to None." % line)) res_num2 = None # Nuclei names. name1 = line[ass_w1_col] name2 = line[ass_w2_col] # Residue names. res_name1 = line[res_name1_col] res_name2 = line[res_name2_col] # Chemical shifts. try: w1 = float(line[w1_col]) except ValueError: raise RelaxError("The w1 chemical shift from the line %s is invalid." % line) try: w2 = float(line[w2_col]) except ValueError: raise RelaxError("The w2 chemical shift from the line %s is invalid." % line) # Intensity. try: intensity = float(line[int_col]) except ValueError: raise RelaxError("The peak intensity value from the line %s is invalid." % line) # Add the assignment to the peak list object. peak_list.add(res_nums=[res_num1, res_num2], res_names=[res_name1, res_name2], spin_names=[name1, name2], shifts=[w1, w2], intensity=intensity)