def read_pdb(path): sec = time.time() records = pdbmodule.read(path) sec = time.time() - sec Ueq = [] atom_obj = [] atom_dict = {} listx = [] state = 0 overall = {} n = 1 has_anisou = 0 for rec in records: ## Start with empty dict atom_dict = {'u[1][1]': 0, 'chainID': '', 'name': '', 'resSeq': 0, 'u[0][1]': 0, 'u[0][0]': 0, 'u[0][2]': 0, 'u[1][2]': 0, 'element': '', 'RECORD': '', 'u[2][2]': 0, 'resName': '', 'serial': 0, 'has_anisou': 0, 'tempFactor': 0.0, 'x': 0.0, 'y': 0.0, 'z': 0.0, 'occupancy': 0.0} rec_type = rec["RECORD"] if rec_type == "REMARK": try: text = rec["text"] except KeyError: continue ## Capture the "OVERALL ANISOTROPIC B VALUE" fields if state == 0: m = re_overall_b.match(text) if m != None: state = 1 elif state == 1: m = re_b11.match(text) if m != None: b11 = m.groups(1) if b11[0] != "NULL": overall[0] = float(b11[0]) m = re_b22.match(text) if m != None: b22 = m.groups(1) if b22[0] != "NULL": overall[1] = float(b22[0]) m = re_b33.match(text) if m != None: b33 = m.groups(1) if b22[0] != "NULL": overall[2] = float(b33[0]) m = re_b12.match(text) if m != None: b12 = m.groups(1) if b22[0] != "NULL": overall[3] = float(b12[0]) m = re_b13.match(text) if m != None: b13 = m.groups(1) if b22[0] != "NULL": overall[4] = float(b13[0]) m = re_b23.match(text) if m != None: b23 = m.groups(1) if b22[0] != "NULL": overall[5] = float(b23[0]) state = 2 #if tls_desc.name != "": # listx.append("TLS %s" % (tls_desc.name)) #else: # listx.append("TLS") ## Capture the TLS "RANGE"s fields m = re_tls_range.match(text) if m != None: (chain_id1, frag_id1, chain_id2, frag_id2) = m.groups() sel = "ALL" frag_id1 = convert_frag_id_save(frag_id1) frag_id2 = convert_frag_id_save(frag_id2) listx.append("#RANGE '%s%s' '%s%s' %s" % ( chain_id1, frag_id1.rjust(5), chain_id2, frag_id2.rjust(5), sel)) ## Capture the TLS "ORIGIN"s fields m = re_tls_origin.match(text) if m != None: strx = m.group(1) ## this is nasty -- I wish I could trust the numbers ## to stay in fixed columns, but I can't ox = [0.0, 0.0, 0.0] for i in (0,1,2): j = strx.find(".") if j==-1: break x = strx[ max(0, j-4) : j+5] strx = strx[j+5:] ox[i] = float(x) try: listx.append("#ORIGIN %8.4f %8.4f %8.4f" % ( ox[0], ox[1], ox[2])) except: print "ERROR!" pass ## Capture the actual TLS values/fields. ## NOTE: These can be provided via an external TLSIN/TLSOUT file. #m = re_tls_T.match(text) #if m != None: #if tls_desc.T is not None: # ## REFMAC ORDER: t11 t22 t33 t12 t13 t23 # listx.append("T %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.T[0,0], tls_desc.T[1,1], tls_desc.T[2,2], # tls_desc.T[0,1], tls_desc.T[0,2], tls_desc.T[1,2])) # #m = re_tls_L.match(text) #if m != None: #if tls_desc.L is not None: # ## REFMAC ORDER: l11 l22 l33 l12 l13 l23 # listx.append("L %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.L[0,0] * Constants.RAD2DEG2, # tls_desc.L[1,1] * Constants.RAD2DEG2, # tls_desc.L[2,2] * Constants.RAD2DEG2, # tls_desc.L[0,1] * Constants.RAD2DEG2, # tls_desc.L[0,2] * Constants.RAD2DEG2, # tls_desc.L[1,2] * Constants.RAD2DEG2)) # #m = re_tls_S.match(text) #if m != None: #if tls_desc.S is not None: # ## REFMAC ORDER: # ## <S22 - S11> <S11 - S33> <S12> <S13> <S23> <S21> <S31> <S32> # listx.append( # "S %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # (tls_desc.S[1,1] - tls_desc.S[0,0]) * Constants.RAD2DEG, # (tls_desc.S[0,0] - tls_desc.S[2,2]) * Constants.RAD2DEG, # tls_desc.S[0,1] * Constants.RAD2DEG, # tls_desc.S[0,2] * Constants.RAD2DEG, # tls_desc.S[1,2] * Constants.RAD2DEG, # tls_desc.S[1,0] * Constants.RAD2DEG, # tls_desc.S[2,0] * Constants.RAD2DEG, # tls_desc.S[2,1] * Constants.RAD2DEG)) ## Now, capture the ATOM and/or HETATM fields elif rec_type == "ATOM " or rec_type == "HETATM": if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: atom_dict["serial"] = rec["serial"] atom_dict["name"] = rec["name"] atom_dict["resName"] = rec["resName"] atom_dict["chainID"] = rec["chainID"] atom_dict["resSeq"] = rec["resSeq"] atom_dict["x"] = rec["x"] atom_dict["y"] = rec["y"] atom_dict["z"] = rec["z"] atom_dict["occupancy"] = rec["occupancy"] atom_dict["tempFactor"] = rec["tempFactor"] #atom_dict["u[1][1]"] = 0.0 # Initialize ANISOU values to isotropic = I3x3 * B/8*pi^2 # in case this atom has no ANISOU record, e.g. # if it is next to but not in a TLS group Uiso = rec["tempFactor"] * B2UE4 atom_dict["u[0][0]"] = Uiso atom_dict["u[1][1]"] = Uiso atom_dict["u[2][2]"] = Uiso atom_dict["u[0][1]"] = 0.0 atom_dict["u[0][2]"] = 0.0 atom_dict["u[1][2]"] = 0.0 atom_dict["has_anisou"] = 0 atom_obj.append(atom_dict) ## Capture the ANISOU fields, if they are present elif rec_type == "ANISOU": #print rec["serial"], rec["resName"] #print rec["u[0][0]"], rec["u[1][1]"], rec["u[2][2]"] try: Usum = rec["u[0][0]"] + rec["u[1][1]"] + rec["u[2][2]"] except KeyError: print "# STRANGE Uij VALUE(S) FOR ATOM=%s; RESIDUE=%s:%s:%s" % ( rec["serial"], rec["chainID"], rec["resName"], rec["resSeq"]) continue try: #text = rec["text"] foo = 1 except KeyError: continue if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: ## We only keep the backbone atom's Uij values. U11 = rec["u[0][0]"] U22 = rec["u[1][1]"] U33 = rec["u[2][2]"] U12 = U21 = rec["u[0][1]"] U13 = U31 = rec["u[0][2]"] U23 = U32 = rec["u[1][2]"] ADD_B_OVERALL = False # (default: False) if ADD_B_OVERALL: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] + overall[0] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] + overall[1] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] + overall[2] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] + overall[3] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] + overall[4] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] + overall[5] else: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] atom_obj[-1]["has_anisou"] = 1 has_anisou = 1 return "\n".join(listx), overall, atom_obj
def read_pdb(path): sec = time.time() records = pdbmodule.read(path) sec = time.time() - sec Ueq = [] atom_obj = [] atom_dict = {} listx = [] state = 0 overall = {} n = 1 has_anisou = 0 for rec in records: ## Start with empty dict atom_dict = {'u[1][1]': 0, 'chainID': '', 'name': '', 'resSeq': 0, 'u[0][1]': 0, 'u[0][0]': 0, 'u[0][2]': 0, 'u[1][2]': 0, 'element': '', 'RECORD': '', 'u[2][2]': 0, 'resName': '', 'serial': 0, 'has_anisou': 0, 'tempFactor': 0.0, 'x': 0.0, 'y': 0.0, 'z': 0.0, 'occupancy': 0.0} rec_type = rec["RECORD"] if rec_type == "REMARK": try: text = rec["text"] except KeyError: continue if state == 0: m = re_overall_b.match(text) if m != None: state = 1 elif state == 1: m = re_b11.match(text) if m != None: b11 = m.groups(1) if b11[0] != "NULL": overall[0] = float(b11[0]) m = re_b22.match(text) if m != None: b22 = m.groups(1) if b22[0] != "NULL": overall[1] = float(b22[0]) m = re_b33.match(text) if m != None: b33 = m.groups(1) if b22[0] != "NULL": overall[2] = float(b33[0]) m = re_b12.match(text) if m != None: b12 = m.groups(1) if b22[0] != "NULL": overall[3] = float(b12[0]) m = re_b13.match(text) if m != None: b13 = m.groups(1) if b22[0] != "NULL": overall[4] = float(b13[0]) m = re_b23.match(text) if m != None: b23 = m.groups(1) if b22[0] != "NULL": overall[5] = float(b23[0]) state = 2 #if tls_desc.name != "": # listx.append("TLS %s" % (tls_desc.name)) #else: # listx.append("TLS") m = re_tls_range.match(text) if m != None: (chain_id1, frag_id1, chain_id2, frag_id2) = m.groups() sel = "ALL" frag_id1 = convert_frag_id_save(frag_id1) frag_id2 = convert_frag_id_save(frag_id2) listx.append("#RANGE '%s%s' '%s%s' %s" % ( chain_id1, frag_id1.rjust(5), chain_id2, frag_id2.rjust(5), sel)) m = re_tls_origin.match(text) #if tls_desc.origin is not None: # listx.append("ORIGIN %8.4f %8.4f %8.4f" % ( # tls_desc.origin[0], tls_desc.origin[1], tls_desc.origin[2])) #elif re_key == "origin": if m != None: strx = m.group(1) ## this is nasty -- I wish I could trust the numbers ## to stay in fixed columns, but I can't ox = [0.0, 0.0, 0.0] for i in (0,1,2): j = strx.find(".") if j==-1: break x = strx[ max(0, j-4) : j+5] strx = strx[j+5:] ox[i] = float(x) try: #self.tls_desc.set_origin(ox[0], ox[1], ox[2]) #origin = numpy.array((ox[0], ox[1], ox[2]), float) #listx.append("ORIGIN %8.4f %8.4f %8.4f" % ( # origin[0], origin[1], origin[2])) listx.append("#ORIGIN %8.4f %8.4f %8.4f" % ( ox[0], ox[1], ox[2])) except: print "ERROR!" pass #m = re_tls_T.match(text) #if m != None: #if tls_desc.T is not None: # ## REFMAC ORDER: t11 t22 t33 t12 t13 t23 # listx.append("T %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.T[0,0], tls_desc.T[1,1], tls_desc.T[2,2], # tls_desc.T[0,1], tls_desc.T[0,2], tls_desc.T[1,2])) # #m = re_tls_L.match(text) #if m != None: #if tls_desc.L is not None: # ## REFMAC ORDER: l11 l22 l33 l12 l13 l23 # listx.append("L %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.L[0,0] * Constants.RAD2DEG2, # tls_desc.L[1,1] * Constants.RAD2DEG2, # tls_desc.L[2,2] * Constants.RAD2DEG2, # tls_desc.L[0,1] * Constants.RAD2DEG2, # tls_desc.L[0,2] * Constants.RAD2DEG2, # tls_desc.L[1,2] * Constants.RAD2DEG2)) # #m = re_tls_S.match(text) #if m != None: #if tls_desc.S is not None: # ## REFMAC ORDER: # ## <S22 - S11> <S11 - S33> <S12> <S13> <S23> <S21> <S31> <S32> # listx.append( # "S %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # (tls_desc.S[1,1] - tls_desc.S[0,0]) * Constants.RAD2DEG, # (tls_desc.S[0,0] - tls_desc.S[2,2]) * Constants.RAD2DEG, # tls_desc.S[0,1] * Constants.RAD2DEG, # tls_desc.S[0,2] * Constants.RAD2DEG, # tls_desc.S[1,2] * Constants.RAD2DEG, # tls_desc.S[1,0] * Constants.RAD2DEG, # tls_desc.S[2,0] * Constants.RAD2DEG, # tls_desc.S[2,1] * Constants.RAD2DEG)) #elif rec_type == "ATOM ": elif rec_type == "ATOM " or rec_type == "HETATM": if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: atom_dict["serial"] = rec["serial"] atom_dict["name"] = rec["name"] atom_dict["resName"] = rec["resName"] atom_dict["chainID"] = rec["chainID"] atom_dict["resSeq"] = rec["resSeq"] atom_dict["x"] = rec["x"] atom_dict["y"] = rec["y"] atom_dict["z"] = rec["z"] atom_dict["occupancy"] = rec["occupancy"] atom_dict["tempFactor"] = rec["tempFactor"] #atom_dict["u[1][1]"] = 0.0 # Initialize ANISOU values to isotropic = I3x3 * B/8*pi^2 # in case this atom has no ANISOU record, e.g. # if it is next to but not in a TLS group Uiso = rec["tempFactor"] * B2UE4 atom_dict["u[0][0]"] = Uiso atom_dict["u[1][1]"] = Uiso atom_dict["u[2][2]"] = Uiso atom_dict["u[0][1]"] = 0.0 atom_dict["u[0][2]"] = 0.0 atom_dict["u[1][2]"] = 0.0 atom_dict["has_anisou"] = 0 atom_obj.append(atom_dict) elif rec_type == "ANISOU": #print rec["serial"], rec["resName"] #print rec["u[0][0]"], rec["u[1][1]"], rec["u[2][2]"] try: Usum = rec["u[0][0]"] + rec["u[1][1]"] + rec["u[2][2]"] except KeyError: print "# STRANGE Uij VALUE(S) FOR ATOM=%s; RESIDUE=%s:%s:%s" % ( rec["serial"], rec["chainID"], rec["resName"], rec["resSeq"]) continue try: #text = rec["text"] foo = 1 except KeyError: continue if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: U11 = rec["u[0][0]"] U22 = rec["u[1][1]"] U33 = rec["u[2][2]"] U12 = U21 = rec["u[0][1]"] U13 = U31 = rec["u[0][2]"] U23 = U32 = rec["u[1][2]"] ADD_B_OVERALL = False if ADD_B_OVERALL: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] + overall[0] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] + overall[1] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] + overall[2] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] + overall[3] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] + overall[4] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] + overall[5] else: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] atom_obj[-1]["has_anisou"] = 1 has_anisou = 1 #U = tlsvld.array([[U11,U12,U13],[U21,U22,U23],[U31,U32,U33]]) #Ueq.append(U) #return "\n".join(listx), overall, Ueq, atom_obj return "\n".join(listx), overall, atom_obj
def check_upload(job_id, file): """Runs sanity checks on uploaded file """ ## Checks if PDB contains valids aa/na residues ## PDB must have at least 30 ATOMs ## PDB can not have lowercase alt. res. numbers ## Check Standard deviation of temp. factors ## Check that not all occupancies are 0.00 atom_num = [] res_type = [] res_num = [] chain = [] temp_factors = [] bad_std = -1 num_total = 0 num_good = 0 occupancy = 0.0 ignore = 0 line_num = 0 for line in file: line_num += 1 if line.startswith('HEADER'): header_id = re.sub(r"^HEADER.{56}(....)", '\\1', line).strip() elif line.startswith('EXPDTA NMR'): return "NMR structure! Skipping: %s [%s]" % (job_id, header_id) elif re.match(r'^REMARK 2 RESOLUTION\. ([0-9\.]{1,}) ANGSTROMS.*', line): resolution = re.sub( r'^REMARK 2 RESOLUTION\. ([0-9\.]{1,}) ANGSTROMS.*', '\\1', line).strip() elif re.match('^ATOM.....................[0-9][a-z]', line): ## E.g., Don't allow "100b". Force it to be "100B" return "Lowercase alternate residue names: %s [%s]" % (job_id, header_id) elif line.startswith('ATOM') and (Library.library_is_standard_residue( line[17:20].strip())): num_total += 1 if float(line[56:60].strip()) < 1.00: ## ignore occupancies < 1.00 ignore += 1 continue else: num_good += 1 atom_num.append(int(line[7:11].strip())) res_type.append(line[17:20].strip()) res_num.append(int(line[23:26].strip())) chain.append(line[21:22]) occupancy += float(line[56:60].strip()) temp_factors.append(float(line[60:65].strip())) else: continue if (len(atom_num) < 30): return "Not a PDB structure or has unrecognized residue names: %s [%s]" % ( job_id, header_id) if (occupancy / num_good == 0.0): return "All occupancies are 0.0. TLSMD won't run on this structure: %s [%s]" % ( job_id, header_id) bad_std, tmpfile = running_stddev(atom_num, res_type, res_num, chain, temp_factors) if bad_std > 0: ## If there are a string of "bad" B-factors, return a plot showing the ## "bad" regions and do not proceed any further in the analysis. return_string = "STDDEV %s > Bfact < %s for job_id: %s [%s]" % ( conf.MAX_STDDEV_BFACT, conf.MIN_STDDEV_BFACT, job_id, header_id) return return_string return ''
def check_upload(job_id, file): """Runs sanity checks on uploaded file """ ## Checks if PDB contains valids aa/na residues ## PDB must have at least 30 ATOMs ## PDB can not have lowercase alt. res. numbers ## Check Standard deviation of temp. factors ## Check that not all occupancies are 0.00 atom_num = [] res_type = [] res_num = [] chain = [] temp_factors = [] bad_std = -1 num_total = 0 num_good = 0 occupancy = 0.0 ignore = 0 line_num = 0 for line in file: line_num += 1 if line.startswith('HEADER'): header_id = re.sub(r"^HEADER.{56}(....)", '\\1', line).strip() elif line.startswith('EXPDTA NMR'): return "NMR structure! Skipping: %s [%s]" % (job_id, header_id) elif re.match(r'^REMARK 2 RESOLUTION\. ([0-9\.]{1,}) ANGSTROMS.*', line): resolution = re.sub(r'^REMARK 2 RESOLUTION\. ([0-9\.]{1,}) ANGSTROMS.*', '\\1', line).strip() elif re.match('^ATOM.....................[0-9][a-z]', line): ## E.g., Don't allow "100b". Force it to be "100B" return "Lowercase alternate residue names: %s [%s]" % (job_id, header_id) elif line.startswith('ATOM') and ( Library.library_is_standard_residue(line[17:20].strip())): num_total += 1 if float(line[56:60].strip()) < 1.00: ## ignore occupancies < 1.00 ignore += 1 continue else: num_good += 1 atom_num.append(int(line[7:11].strip())) res_type.append(line[17:20].strip()) res_num.append(int(line[23:26].strip())) chain.append(line[21:22]) occupancy += float(line[56:60].strip()) temp_factors.append(float(line[60:65].strip())) else: continue if(len(atom_num) < 30): return "Not a PDB structure or has unrecognized residue names: %s [%s]" % ( job_id, header_id) if(occupancy / num_good == 0.0): return "All occupancies are 0.0. TLSMD won't run on this structure: %s [%s]" % ( job_id, header_id) bad_std, tmpfile = running_stddev(atom_num, res_type, res_num, chain, temp_factors) if bad_std > 0: ## If there are a string of "bad" B-factors, return a plot showing the ## "bad" regions and do not proceed any further in the analysis. return_string = "STDDEV %s > Bfact < %s for job_id: %s [%s]" % ( conf.MAX_STDDEV_BFACT, conf.MIN_STDDEV_BFACT, job_id, header_id) return return_string return ''