def get_chain_type(res_name): if Library.library_is_amino_acid(res_name): #chain_type = 0 return 0 elif Library.library_is_nucleic_acid(res_name): return 1 else: ## Unknown residue type print "# UNKOWN RESIDUE!" return 9
def get_chain_type(res_name): """Assign an integer value for residue type, identified by the actual name of the residue. Assigns "0" to amino acids, "1" to nucleic acids, and "9" to everything else. """ if Library.library_is_amino_acid(res_name): return 0 elif Library.library_is_nucleic_acid(res_name): return 1 else: ## Unknown residue type print "# UNKOWN RESIDUE!" return 9
def read_pdb(path): sec = time.time() records = pdbmodule.read(path) sec = time.time() - sec Ueq = [] atom_obj = [] atom_dict = {} listx = [] state = 0 overall = {} n = 1 has_anisou = 0 for rec in records: ## Start with empty dict atom_dict = {'u[1][1]': 0, 'chainID': '', 'name': '', 'resSeq': 0, 'u[0][1]': 0, 'u[0][0]': 0, 'u[0][2]': 0, 'u[1][2]': 0, 'element': '', 'RECORD': '', 'u[2][2]': 0, 'resName': '', 'serial': 0, 'has_anisou': 0, 'tempFactor': 0.0, 'x': 0.0, 'y': 0.0, 'z': 0.0, 'occupancy': 0.0} rec_type = rec["RECORD"] if rec_type == "REMARK": try: text = rec["text"] except KeyError: continue ## Capture the "OVERALL ANISOTROPIC B VALUE" fields if state == 0: m = re_overall_b.match(text) if m != None: state = 1 elif state == 1: m = re_b11.match(text) if m != None: b11 = m.groups(1) if b11[0] != "NULL": overall[0] = float(b11[0]) m = re_b22.match(text) if m != None: b22 = m.groups(1) if b22[0] != "NULL": overall[1] = float(b22[0]) m = re_b33.match(text) if m != None: b33 = m.groups(1) if b22[0] != "NULL": overall[2] = float(b33[0]) m = re_b12.match(text) if m != None: b12 = m.groups(1) if b22[0] != "NULL": overall[3] = float(b12[0]) m = re_b13.match(text) if m != None: b13 = m.groups(1) if b22[0] != "NULL": overall[4] = float(b13[0]) m = re_b23.match(text) if m != None: b23 = m.groups(1) if b22[0] != "NULL": overall[5] = float(b23[0]) state = 2 #if tls_desc.name != "": # listx.append("TLS %s" % (tls_desc.name)) #else: # listx.append("TLS") ## Capture the TLS "RANGE"s fields m = re_tls_range.match(text) if m != None: (chain_id1, frag_id1, chain_id2, frag_id2) = m.groups() sel = "ALL" frag_id1 = convert_frag_id_save(frag_id1) frag_id2 = convert_frag_id_save(frag_id2) listx.append("#RANGE '%s%s' '%s%s' %s" % ( chain_id1, frag_id1.rjust(5), chain_id2, frag_id2.rjust(5), sel)) ## Capture the TLS "ORIGIN"s fields m = re_tls_origin.match(text) if m != None: strx = m.group(1) ## this is nasty -- I wish I could trust the numbers ## to stay in fixed columns, but I can't ox = [0.0, 0.0, 0.0] for i in (0,1,2): j = strx.find(".") if j==-1: break x = strx[ max(0, j-4) : j+5] strx = strx[j+5:] ox[i] = float(x) try: listx.append("#ORIGIN %8.4f %8.4f %8.4f" % ( ox[0], ox[1], ox[2])) except: print "ERROR!" pass ## Capture the actual TLS values/fields. ## NOTE: These can be provided via an external TLSIN/TLSOUT file. #m = re_tls_T.match(text) #if m != None: #if tls_desc.T is not None: # ## REFMAC ORDER: t11 t22 t33 t12 t13 t23 # listx.append("T %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.T[0,0], tls_desc.T[1,1], tls_desc.T[2,2], # tls_desc.T[0,1], tls_desc.T[0,2], tls_desc.T[1,2])) # #m = re_tls_L.match(text) #if m != None: #if tls_desc.L is not None: # ## REFMAC ORDER: l11 l22 l33 l12 l13 l23 # listx.append("L %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.L[0,0] * Constants.RAD2DEG2, # tls_desc.L[1,1] * Constants.RAD2DEG2, # tls_desc.L[2,2] * Constants.RAD2DEG2, # tls_desc.L[0,1] * Constants.RAD2DEG2, # tls_desc.L[0,2] * Constants.RAD2DEG2, # tls_desc.L[1,2] * Constants.RAD2DEG2)) # #m = re_tls_S.match(text) #if m != None: #if tls_desc.S is not None: # ## REFMAC ORDER: # ## <S22 - S11> <S11 - S33> <S12> <S13> <S23> <S21> <S31> <S32> # listx.append( # "S %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # (tls_desc.S[1,1] - tls_desc.S[0,0]) * Constants.RAD2DEG, # (tls_desc.S[0,0] - tls_desc.S[2,2]) * Constants.RAD2DEG, # tls_desc.S[0,1] * Constants.RAD2DEG, # tls_desc.S[0,2] * Constants.RAD2DEG, # tls_desc.S[1,2] * Constants.RAD2DEG, # tls_desc.S[1,0] * Constants.RAD2DEG, # tls_desc.S[2,0] * Constants.RAD2DEG, # tls_desc.S[2,1] * Constants.RAD2DEG)) ## Now, capture the ATOM and/or HETATM fields elif rec_type == "ATOM " or rec_type == "HETATM": if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: atom_dict["serial"] = rec["serial"] atom_dict["name"] = rec["name"] atom_dict["resName"] = rec["resName"] atom_dict["chainID"] = rec["chainID"] atom_dict["resSeq"] = rec["resSeq"] atom_dict["x"] = rec["x"] atom_dict["y"] = rec["y"] atom_dict["z"] = rec["z"] atom_dict["occupancy"] = rec["occupancy"] atom_dict["tempFactor"] = rec["tempFactor"] #atom_dict["u[1][1]"] = 0.0 # Initialize ANISOU values to isotropic = I3x3 * B/8*pi^2 # in case this atom has no ANISOU record, e.g. # if it is next to but not in a TLS group Uiso = rec["tempFactor"] * B2UE4 atom_dict["u[0][0]"] = Uiso atom_dict["u[1][1]"] = Uiso atom_dict["u[2][2]"] = Uiso atom_dict["u[0][1]"] = 0.0 atom_dict["u[0][2]"] = 0.0 atom_dict["u[1][2]"] = 0.0 atom_dict["has_anisou"] = 0 atom_obj.append(atom_dict) ## Capture the ANISOU fields, if they are present elif rec_type == "ANISOU": #print rec["serial"], rec["resName"] #print rec["u[0][0]"], rec["u[1][1]"], rec["u[2][2]"] try: Usum = rec["u[0][0]"] + rec["u[1][1]"] + rec["u[2][2]"] except KeyError: print "# STRANGE Uij VALUE(S) FOR ATOM=%s; RESIDUE=%s:%s:%s" % ( rec["serial"], rec["chainID"], rec["resName"], rec["resSeq"]) continue try: #text = rec["text"] foo = 1 except KeyError: continue if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: ## We only keep the backbone atom's Uij values. U11 = rec["u[0][0]"] U22 = rec["u[1][1]"] U33 = rec["u[2][2]"] U12 = U21 = rec["u[0][1]"] U13 = U31 = rec["u[0][2]"] U23 = U32 = rec["u[1][2]"] ADD_B_OVERALL = False # (default: False) if ADD_B_OVERALL: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] + overall[0] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] + overall[1] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] + overall[2] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] + overall[3] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] + overall[4] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] + overall[5] else: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] atom_obj[-1]["has_anisou"] = 1 has_anisou = 1 return "\n".join(listx), overall, atom_obj
def check_upload(job_id, file): """Runs sanity checks on uploaded file """ ## Checks if PDB contains valids aa/na residues ## PDB must have at least 30 ATOMs ## PDB can not have lowercase alt. res. numbers ## Check Standard deviation of temp. factors ## Check that not all occupancies are 0.00 atom_num = [] res_type = [] res_num = [] chain = [] temp_factors = [] bad_std = -1 num_total = 0 num_good = 0 occupancy = 0.0 ignore = 0 line_num = 0 for line in file: line_num += 1 if line.startswith('HEADER'): header_id = re.sub(r"^HEADER.{56}(....)", '\\1', line).strip() elif line.startswith('EXPDTA NMR'): return "NMR structure! Skipping: %s [%s]" % (job_id, header_id) elif re.match(r'^REMARK 2 RESOLUTION\. ([0-9\.]{1,}) ANGSTROMS.*', line): resolution = re.sub( r'^REMARK 2 RESOLUTION\. ([0-9\.]{1,}) ANGSTROMS.*', '\\1', line).strip() elif re.match('^ATOM.....................[0-9][a-z]', line): ## E.g., Don't allow "100b". Force it to be "100B" return "Lowercase alternate residue names: %s [%s]" % (job_id, header_id) elif line.startswith('ATOM') and (Library.library_is_standard_residue( line[17:20].strip())): num_total += 1 if float(line[56:60].strip()) < 1.00: ## ignore occupancies < 1.00 ignore += 1 continue else: num_good += 1 atom_num.append(int(line[7:11].strip())) res_type.append(line[17:20].strip()) res_num.append(int(line[23:26].strip())) chain.append(line[21:22]) occupancy += float(line[56:60].strip()) temp_factors.append(float(line[60:65].strip())) else: continue if (len(atom_num) < 30): return "Not a PDB structure or has unrecognized residue names: %s [%s]" % ( job_id, header_id) if (occupancy / num_good == 0.0): return "All occupancies are 0.0. TLSMD won't run on this structure: %s [%s]" % ( job_id, header_id) bad_std, tmpfile = running_stddev(atom_num, res_type, res_num, chain, temp_factors) if bad_std > 0: ## If there are a string of "bad" B-factors, return a plot showing the ## "bad" regions and do not proceed any further in the analysis. return_string = "STDDEV %s > Bfact < %s for job_id: %s [%s]" % ( conf.MAX_STDDEV_BFACT, conf.MIN_STDDEV_BFACT, job_id, header_id) return return_string return ''
def read_pdb(path): sec = time.time() records = pdbmodule.read(path) sec = time.time() - sec Ueq = [] atom_obj = [] atom_dict = {} listx = [] state = 0 overall = {} n = 1 has_anisou = 0 for rec in records: ## Start with empty dict atom_dict = {'u[1][1]': 0, 'chainID': '', 'name': '', 'resSeq': 0, 'u[0][1]': 0, 'u[0][0]': 0, 'u[0][2]': 0, 'u[1][2]': 0, 'element': '', 'RECORD': '', 'u[2][2]': 0, 'resName': '', 'serial': 0, 'has_anisou': 0, 'tempFactor': 0.0, 'x': 0.0, 'y': 0.0, 'z': 0.0, 'occupancy': 0.0} rec_type = rec["RECORD"] if rec_type == "REMARK": try: text = rec["text"] except KeyError: continue if state == 0: m = re_overall_b.match(text) if m != None: state = 1 elif state == 1: m = re_b11.match(text) if m != None: b11 = m.groups(1) if b11[0] != "NULL": overall[0] = float(b11[0]) m = re_b22.match(text) if m != None: b22 = m.groups(1) if b22[0] != "NULL": overall[1] = float(b22[0]) m = re_b33.match(text) if m != None: b33 = m.groups(1) if b22[0] != "NULL": overall[2] = float(b33[0]) m = re_b12.match(text) if m != None: b12 = m.groups(1) if b22[0] != "NULL": overall[3] = float(b12[0]) m = re_b13.match(text) if m != None: b13 = m.groups(1) if b22[0] != "NULL": overall[4] = float(b13[0]) m = re_b23.match(text) if m != None: b23 = m.groups(1) if b22[0] != "NULL": overall[5] = float(b23[0]) state = 2 #if tls_desc.name != "": # listx.append("TLS %s" % (tls_desc.name)) #else: # listx.append("TLS") m = re_tls_range.match(text) if m != None: (chain_id1, frag_id1, chain_id2, frag_id2) = m.groups() sel = "ALL" frag_id1 = convert_frag_id_save(frag_id1) frag_id2 = convert_frag_id_save(frag_id2) listx.append("#RANGE '%s%s' '%s%s' %s" % ( chain_id1, frag_id1.rjust(5), chain_id2, frag_id2.rjust(5), sel)) m = re_tls_origin.match(text) #if tls_desc.origin is not None: # listx.append("ORIGIN %8.4f %8.4f %8.4f" % ( # tls_desc.origin[0], tls_desc.origin[1], tls_desc.origin[2])) #elif re_key == "origin": if m != None: strx = m.group(1) ## this is nasty -- I wish I could trust the numbers ## to stay in fixed columns, but I can't ox = [0.0, 0.0, 0.0] for i in (0,1,2): j = strx.find(".") if j==-1: break x = strx[ max(0, j-4) : j+5] strx = strx[j+5:] ox[i] = float(x) try: #self.tls_desc.set_origin(ox[0], ox[1], ox[2]) #origin = numpy.array((ox[0], ox[1], ox[2]), float) #listx.append("ORIGIN %8.4f %8.4f %8.4f" % ( # origin[0], origin[1], origin[2])) listx.append("#ORIGIN %8.4f %8.4f %8.4f" % ( ox[0], ox[1], ox[2])) except: print "ERROR!" pass #m = re_tls_T.match(text) #if m != None: #if tls_desc.T is not None: # ## REFMAC ORDER: t11 t22 t33 t12 t13 t23 # listx.append("T %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.T[0,0], tls_desc.T[1,1], tls_desc.T[2,2], # tls_desc.T[0,1], tls_desc.T[0,2], tls_desc.T[1,2])) # #m = re_tls_L.match(text) #if m != None: #if tls_desc.L is not None: # ## REFMAC ORDER: l11 l22 l33 l12 l13 l23 # listx.append("L %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.L[0,0] * Constants.RAD2DEG2, # tls_desc.L[1,1] * Constants.RAD2DEG2, # tls_desc.L[2,2] * Constants.RAD2DEG2, # tls_desc.L[0,1] * Constants.RAD2DEG2, # tls_desc.L[0,2] * Constants.RAD2DEG2, # tls_desc.L[1,2] * Constants.RAD2DEG2)) # #m = re_tls_S.match(text) #if m != None: #if tls_desc.S is not None: # ## REFMAC ORDER: # ## <S22 - S11> <S11 - S33> <S12> <S13> <S23> <S21> <S31> <S32> # listx.append( # "S %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # (tls_desc.S[1,1] - tls_desc.S[0,0]) * Constants.RAD2DEG, # (tls_desc.S[0,0] - tls_desc.S[2,2]) * Constants.RAD2DEG, # tls_desc.S[0,1] * Constants.RAD2DEG, # tls_desc.S[0,2] * Constants.RAD2DEG, # tls_desc.S[1,2] * Constants.RAD2DEG, # tls_desc.S[1,0] * Constants.RAD2DEG, # tls_desc.S[2,0] * Constants.RAD2DEG, # tls_desc.S[2,1] * Constants.RAD2DEG)) #elif rec_type == "ATOM ": elif rec_type == "ATOM " or rec_type == "HETATM": if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: atom_dict["serial"] = rec["serial"] atom_dict["name"] = rec["name"] atom_dict["resName"] = rec["resName"] atom_dict["chainID"] = rec["chainID"] atom_dict["resSeq"] = rec["resSeq"] atom_dict["x"] = rec["x"] atom_dict["y"] = rec["y"] atom_dict["z"] = rec["z"] atom_dict["occupancy"] = rec["occupancy"] atom_dict["tempFactor"] = rec["tempFactor"] #atom_dict["u[1][1]"] = 0.0 # Initialize ANISOU values to isotropic = I3x3 * B/8*pi^2 # in case this atom has no ANISOU record, e.g. # if it is next to but not in a TLS group Uiso = rec["tempFactor"] * B2UE4 atom_dict["u[0][0]"] = Uiso atom_dict["u[1][1]"] = Uiso atom_dict["u[2][2]"] = Uiso atom_dict["u[0][1]"] = 0.0 atom_dict["u[0][2]"] = 0.0 atom_dict["u[1][2]"] = 0.0 atom_dict["has_anisou"] = 0 atom_obj.append(atom_dict) elif rec_type == "ANISOU": #print rec["serial"], rec["resName"] #print rec["u[0][0]"], rec["u[1][1]"], rec["u[2][2]"] try: Usum = rec["u[0][0]"] + rec["u[1][1]"] + rec["u[2][2]"] except KeyError: print "# STRANGE Uij VALUE(S) FOR ATOM=%s; RESIDUE=%s:%s:%s" % ( rec["serial"], rec["chainID"], rec["resName"], rec["resSeq"]) continue try: #text = rec["text"] foo = 1 except KeyError: continue if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: U11 = rec["u[0][0]"] U22 = rec["u[1][1]"] U33 = rec["u[2][2]"] U12 = U21 = rec["u[0][1]"] U13 = U31 = rec["u[0][2]"] U23 = U32 = rec["u[1][2]"] ADD_B_OVERALL = False if ADD_B_OVERALL: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] + overall[0] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] + overall[1] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] + overall[2] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] + overall[3] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] + overall[4] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] + overall[5] else: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] atom_obj[-1]["has_anisou"] = 1 has_anisou = 1 #U = tlsvld.array([[U11,U12,U13],[U21,U22,U23],[U31,U32,U33]]) #Ueq.append(U) #return "\n".join(listx), overall, Ueq, atom_obj return "\n".join(listx), overall, atom_obj
def check_upload(job_id, file): """Runs sanity checks on uploaded file """ ## Checks if PDB contains valids aa/na residues ## PDB must have at least 30 ATOMs ## PDB can not have lowercase alt. res. numbers ## Check Standard deviation of temp. factors ## Check that not all occupancies are 0.00 atom_num = [] res_type = [] res_num = [] chain = [] temp_factors = [] bad_std = -1 num_total = 0 num_good = 0 occupancy = 0.0 ignore = 0 line_num = 0 for line in file: line_num += 1 if line.startswith('HEADER'): header_id = re.sub(r"^HEADER.{56}(....)", '\\1', line).strip() elif line.startswith('EXPDTA NMR'): return "NMR structure! Skipping: %s [%s]" % (job_id, header_id) elif re.match(r'^REMARK 2 RESOLUTION\. ([0-9\.]{1,}) ANGSTROMS.*', line): resolution = re.sub(r'^REMARK 2 RESOLUTION\. ([0-9\.]{1,}) ANGSTROMS.*', '\\1', line).strip() elif re.match('^ATOM.....................[0-9][a-z]', line): ## E.g., Don't allow "100b". Force it to be "100B" return "Lowercase alternate residue names: %s [%s]" % (job_id, header_id) elif line.startswith('ATOM') and ( Library.library_is_standard_residue(line[17:20].strip())): num_total += 1 if float(line[56:60].strip()) < 1.00: ## ignore occupancies < 1.00 ignore += 1 continue else: num_good += 1 atom_num.append(int(line[7:11].strip())) res_type.append(line[17:20].strip()) res_num.append(int(line[23:26].strip())) chain.append(line[21:22]) occupancy += float(line[56:60].strip()) temp_factors.append(float(line[60:65].strip())) else: continue if(len(atom_num) < 30): return "Not a PDB structure or has unrecognized residue names: %s [%s]" % ( job_id, header_id) if(occupancy / num_good == 0.0): return "All occupancies are 0.0. TLSMD won't run on this structure: %s [%s]" % ( job_id, header_id) bad_std, tmpfile = running_stddev(atom_num, res_type, res_num, chain, temp_factors) if bad_std > 0: ## If there are a string of "bad" B-factors, return a plot showing the ## "bad" regions and do not proceed any further in the analysis. return_string = "STDDEV %s > Bfact < %s for job_id: %s [%s]" % ( conf.MAX_STDDEV_BFACT, conf.MIN_STDDEV_BFACT, job_id, header_id) return return_string return ''