예제 #1
0
    def atomium_parse(self,file):
        try:
            struc = atomium.open(str(file))
        except FileNotFoundError:
            struc = atomium.fetch(str(file))

        self.coord_dict = {}
        for chain in struc.model.chains():
            coords = []
            for res in chain:
                for atom in res.atoms():
                    if (atom.name == 'CA' and atom.het.code != 'X'):
                        coords.append(atom.location)
                        self.atoms.append({'res_id':res.id, 'res':toggle_code(res.code, '3to1'), 'atom_id':atom.id, 'coords':atom.location, 'chain':chain.id})
            self.coord_dict[chain.internal_id] = np.asarray(coords)

        self.er_dict = {}
        for chain_id, coords in self.coord_dict.items():
            self.er_dict[chain_id] = eigenrank(coords)

        # picking first chain
        first_chain = sorted(self.coord_dict.keys())[0]
        self.coordinates = self.coord_dict[first_chain]
        self.l = self.coordinates.shape[0]
        self.er = self.er_dict[first_chain]
예제 #2
0
    def atomium_parse(self,file):
        try:
            struc = atomium.open(str(file))
        except FileNotFoundError:
            struc = atomium.fetch(str(file))

        self.coord_dict = {}
        for chain in struc.model.chains():
            coords = []
            for res in chain:
                for atom in res.atoms():
                    if (atom.name == 'CA' and atom.het.code != 'X'):
                        coords.append(atom.location)
                        self.atoms.append({'res_id':int(res.id.split('.')[-1]), 'res':toggle_code(res.code, '3to1'), 'atom_id':atom.id, 'coords':atom.location, 'chain':chain.id})
            self.coord_dict[chain.internal_id] = np.asarray(coords)

        self.er_dict = {}
        self.lr_dict = {}
        for chain_id, coords in self.coord_dict.items():
            self.er_dict[chain_id], self.lr_dict[chain_id] = eigenrank(coords)
            break # for only first chain is selected below

        # picking first chain
        first_chain = sorted(self.coord_dict.keys())[0]
        self.coordinates = self.coord_dict[first_chain]
        self.l = self.coordinates.shape[0]
        if self.l < 10:
            print('{} is too short for a sensible EigenRank ({})'.format(self.id, self.l))
        else:
            self.er = self.er_dict[first_chain]
            self.lr = self.lr_dict[first_chain]
예제 #3
0
    def test_templates(self):
        site = atomium.fetch("1TON").model.molecule(name="ZN").site()

        template = biometal.create_site_template(site)
        self.assertEqual(len(template.atoms()), 6)
        self.assertEqual(len(template.atoms(name="CA")), 3)
        self.assertEqual(len(template.atoms(name="CB")), 3)
        template.save("test.pdb")
예제 #4
0
	def download_pdb_chain(self, pdb_chain):
		"""
		downloads certain protein chain via atomium library
		:params: pdb_chain - wothout .pdb extension
		"""
		path_dest = os.path.join(self.path, pdb_chain)
		struc_id, chain = pdb_chain.split('_')
		temp = atomium.fetch(struc_id.upper())
		temp.model.chain(chain.upper()).save(path_dest + '.pdb')
예제 #5
0
def pdb(request, id):
    keys = [k for k in request.path.split("/") if k]
    try:
        d = atomium.fetch(keys.pop(0),
                          file_dict="file" in request.GET,
                          data_dict=True)
    except:
        raise Http404
    while keys:
        try:
            d = d[keys.pop(0)] if isinstance(d, dict) else d[int(keys.pop(0))]
        except KeyError:
            pass
    return JsonResponse(d, safe=False, json_dumps_params={"indent": 4})
예제 #6
0
def FetchProtein(pdb_id, bu, selection, model, use_authid=True):
    if model == None or model == "":
        model = 0
    else:
        model = int(model)
    lchains_id = []
    sel_chains = []
    p = atomium.fetch(pdb_id)
    if bu != "AU" and bu != "":
        bu = int(bu) - 1
    elif bu == "AU":
        bu = -1
    else:
        bu = 0
    asele = [""]
    if selection != None and selection != "":
        asele = selection
        sel_chains = asele.split(",")
    return getPDBString(p, sel_chains, bu, model, use_authid=use_authid)
예제 #7
0
파일: parser.py 프로젝트: juvilius/erpscpy
def structure(file):
    ''' atomium structure class with the coordinates of the chains (only one chain in SCOPe, COPS and CATH files)
    and it's EigenRank Profile '''
    try:
        structure = atomium.open(str(file))
    except FileNotFoundError:
        structure = atomium.fetch(str(file))
    if structure.code == None:
        structure.id = os.path.basename(file).split('.')[0]
    else:
        structure.id = structure.code
    coord_dict = {}
    for chain in structure.model.chains():
        coords = []
        for res in chain:
            for atom in res.atoms():
                if (atom.name == 'CA' and atom.het.code != 'X'):
                    coords.append(atom.location)
        coord_dict[chain.internal_id] = np.asarray(coords)
    structure.coordinates = coord_dict
    return erpscpy.er.add_eigenrank(structure)
예제 #8
0
def main(reset=False, log=True, json=True):
    # Setup log
    logger = get_log() if log else None

    # Get all PDBs which contain zinc
    if log: logger.info("Getting PDB codes")
    codes = get_zinc_pdb_codes()
    print(f"There are {len(codes)} PDBs with zinc")
    if not reset:
        checked = [p.id for p in Pdb.objects.all()]
        print(f"{len(checked)} have already been checked")
        codes = [code for code in codes if code not in checked]

    # Go through each PDB
    mmcif_count = 0
    for code in tqdm(codes):
        with transaction.atomic():
            # Get PDB
            if log:
                logger.info("Getting PDB {} object from server".format(code))
            try:
                pdb = atomium.fetch(code)
            except ValueError:
                mmcif_count += 1
                if log: logger.info("Couldn't get {}".format(code))
                continue

            # Which assembly should be used?
            if log: logger.info("Getting best assembly")
            model = pdb.generate_best_assembly()
            metals = model.atoms(is_metal=True)
            while not metals:
                pdb.assemblies.remove(pdb.best_assembly)
                model = pdb.generate_best_assembly()
                metals = model.atoms(is_metal=True)

            # Save the PDB
            if log: logger.info("Saving PDB to database")
            pdb_record = Pdb.create_from_atomium(pdb)

            # Is the PDB usable?
            if log: logger.info("Checking PDB usable")
            if model_is_skeleton(model):
                zincs = model.atoms(element="ZN")
                if log: logger.info("It isn't - saving metals")
                for zinc in zincs:
                    Metal.create_from_atomium(
                        zinc,
                        pdb_record,
                        omission="No residue side chain information in PDB.")
                continue

            # Are any PDB zincs not in assembly
            if log: logger.info("Looking for unused zinc")
            au_zincs = pdb.model.atoms(element="ZN")
            assembly_zinc_ids = [atom.id for atom in model.atoms(element="ZN")]
            for zinc in au_zincs:
                if zinc.id not in assembly_zinc_ids:
                    Metal.create_from_atomium(
                        zinc,
                        pdb_record,
                        omission=
                        "Zinc was in asymmetric unit but not biological assembly."
                    )

            # Get zinc clusters
            if log: logger.info("Clustering metals into sites")
            zinc_clusters = cluster_zincs_with_residues(metals)

            # Create chains
            if log: logger.info("Creating chains")
            chains = {}
            for cluster in zinc_clusters:
                for o in cluster["residues"].union(cluster["metals"]):
                    chains[o.chain.id] = o.chain
            for chain_id, chain in chains.items():
                chains[chain_id] = Chain.create_from_atomium(chain, pdb_record)

            # Create binding sites
            for index, cluster in enumerate(zinc_clusters, start=1):
                # Does the cluster even have any residues?
                if len(cluster["residues"]) == 0:
                    if log: logger.info("Not creating site - no residues")
                    Metal.create_from_atomium(
                        zinc,
                        pdb_record,
                        omission="Zinc has no binding residues.")
                    continue
                # Does the cluster have enough liganding atoms?
                atoms = []
                for residue in cluster["residues"]:
                    atoms += [a for a in residue.atoms() if a.liganding]
                if len(atoms) < 3:
                    if log:
                        logger.info(
                            "Not creating site - too few liganding atoms")
                    for metal in cluster["metals"]:
                        Metal.create_from_atomium(
                            metal,
                            pdb_record,
                            omission="Zinc has too few liganding atoms.")
                    continue

                # Create site record itself
                if log: logger.info("Creating site")
                site = ZincSite.objects.create(id=f"{pdb_record.id}-{index}",
                                               pdb=pdb_record,
                                               code=create_site_code(
                                                   cluster["residues"]),
                                               copies=cluster["count"])

                # Create metals
                if log: logger.info("Creating metals")
                for metal in cluster["metals"]:
                    Metal.create_from_atomium(metal, pdb_record, site=site)

                # Create residue records
                if log: logger.info("Creating residues")
                for r in cluster["residues"]:
                    chain = chains[r.chain.id]
                    Residue.create_from_atomium(r, chain, site)
    mmcif = []
    if log: logger.info("{} mmcif files ignored".format(mmcif_count))
    print("{} mmcif files ignored".format(mmcif_count))

    # JSON?
    if json:
        print("Saving JSON")
        sysout = sys.stdout
        with open("data/zinc.json", "w") as f:
            sys.stdout = f
            call_command("dumpdata", "--exclude=contenttypes", verbosity=0)
            sys.stdout = sysout
예제 #9
0
#! /usr/bin/env python3

import atomium

pdbs = ["1TON", "2CAB", "8TLN", "5CPA", "7ADH"]

sites = {}
for pdb in pdbs:
    print(f"Processing {pdb}...")
    model = atomium.fetch(pdb).model()
    zincs = model.atoms(element="ZN")
    print(f"  Found {len(zincs)} zinc atom" + ("s" if len(zincs) != 1 else ""))
    for zinc in zincs:
        id_ = pdb + zinc.molecule().molecule_id()
        site = zinc.molecule().site()
        site.add_atom(zinc)
        site.translate(-zinc.x(), -zinc.y(), -zinc.z())
        sites[id_] = site
        print(f"  Extracting {id_}...")

print(f"Saving {len(sites)} zinc sites as PDBs...")
for site in sites:
    sites[site].save(f"{site}.pdb")
예제 #10
0
def get_cath_domain_distogram(cath_id, cath_seq, return_seq_pair=False):
    """
    Given a CATH ID and seq, return distogram correspoding to CATH sequence
    
    Return:
        if return_seq_pair == False:
            return a np.array with shape [ seq_len, seq_len ]. Missing values denoted with -1
        else:
            return np.array, [cath_seq, pdb_seq]
    
    Example:
        get_cath_domain_distogram("2j43A01", "ASHHLRXHFKTLPAGESLGSLGLWVWGDVDQPSKDWPNGAITXTKAKKDDYGYYLDVPLAAKHRQQVSYLINNKAGENLSKDQHISLLTPKXNEVWIDENY")
    """
    import atomium, Structure

    ## 分解
    code = cath_id[:4]
    chain = cath_id[4]
    domain = cath_id[5:7]

    ## 读取PDB文件
    pdb = atomium.fetch(code)
    chain = pdb.model.chain(chain)

    ## 获取PDB文件的序列和CATH数据库序列的关系
    present_sequence = "".join(
        [threeToOne.get(res.name, "X") for res in chain.residues()])
    start = present_sequence.find(cath_seq)
    if start != -1:
        end = start + len(cath_seq)
        full_seq = present_sequence
        domain_seq = "-" * start + cath_seq + "-" * (len(present_sequence) -
                                                     end)
        assert len(domain_seq) == len(full_seq)
    else:
        full_seq, domain_seq = Structure.multi_alignment(
            [present_sequence, cath_seq])

    domain_index = [-1] * len(cath_seq)
    fi, di = 0, 0
    for f, d in zip(full_seq, domain_seq):
        if f == '-':
            di += 1
        elif d == '-':
            fi += 1
        else:
            domain_index[di] = fi
            di += 1
            fi += 1
    res_list = [
        chain.residues()[di] if di != -1 else None for di in domain_index
    ]

    ## 计算distogram
    distogram = np.zeros([len(res_list), len(res_list)])
    distogram[:] = -1
    for i in range(len(res_list)):
        if res_list[i] is None:
            continue
        if res_list[i].name == 'GLY':
            atom1 = res_list[i].atom(name="CA")
        else:
            atom1 = res_list[i].atom(name="CB")
        if atom1 is None:
            continue
        for j in range(i + 1, len(res_list)):
            if res_list[j] is None:
                continue
            if res_list[j].name == 'GLY':
                atom2 = res_list[j].atom(name="CA")
            else:
                atom2 = res_list[j].atom(name="CB")
            if atom2 is None:
                continue
            distogram[i, j] = distogram[j, i] = atom1.distance_to(atom2)

    if return_seq_pair:
        pdb_seq = "".join([
            threeToOne.get(res.name, 'X') if res is not None else '?'
            for res in res_list
        ])
        return distogram, (cath_seq, pdb_seq)
    else:
        return distogram
예제 #11
0
파일: big.py 프로젝트: stjordanis/atomium
# Get all codes
response = requests.get("https://www.rcsb.org/pdb/rest/getCurrent")
codes = [child.attrib["structureId"] for child in ET.fromstring(response.text)]
print(f"There are {len(codes)} codes")

# Go through them
print(f"Processing a random {SUBSET} of them...")
shuffle(codes)
sub_codes = codes[:SUBSET]
results = {}
for code in tqdm(sub_codes):
    results[code] = {}
    for ext in ("cif", "mmtf", "pdb"):
        try:
            pdb = atomium.fetch(f"{code}.{ext}")
            results[code][ext] = str(pdb.model)
        except Exception as e:
            results[code][ext] = str(e)
    models = [
        results[code][ext] for ext in ("cif", "mmtf", "pdb")
        if results[code][ext][0] == "<"
    ]
    results[code]["match"] = len(set(models)) == 1

no_pdb = [
    c for c in sub_codes if "could not find" in results[c]["pdb"].lower()
]
print(f"{len(no_pdb)} codes had no .pdb representation:")
print((" ".join(no_pdb) + "\n") if no_pdb else "")
예제 #12
0
파일: parse.py 프로젝트: psyche11/atomium
import sys
sys.path.insert(0, ".")
import atomium
from random import shuffle
from big import get_all_codes

print("Getting PDB codes...")
codes = get_all_codes()
print("There are {} codes.".format(len(codes)))

print("Parsing...")
shuffle(codes)
for code in codes:
    print("\tParsing {}.pdb...".format(code))
    try:
        pdb = atomium.fetch(code + ".pdb")
    except ValueError:
        print("    Doesn't exist.")
        pdb = None
    print("\tParsing {}.cif...".format(code))
    cif = atomium.fetch(code + ".cif")

    if pdb:
        assert len(pdb.model.chains()) == len(cif.model.chains())
        assert len(pdb.model.residues()) == len(cif.model.residues())
        assert len(pdb.model.ligands()) == len(cif.model.ligands())
        assert len(pdb.model.atoms()) == len(cif.model.atoms())
        assert len(pdb.assemblies) == len(cif.assemblies)
    print()
예제 #13
0
파일: parse.py 프로젝트: CHEMPHY/atomium
import sys
sys.path.insert(0, ".")
import atomium
from random import shuffle
from big import get_all_codes

print("Getting PDB codes...")
codes = get_all_codes()
print("There are {} codes.".format(len(codes)))

print("Parsing...")
shuffle(codes)
for code in codes:
    print("\tParsing {}...".format(code))
    pdb = atomium.fetch(code)
    print("\tSuccess ({} model{}).\n".format(
        len(pdb.models()), "" if len(pdb.models()) == 1 else "s"))