def test_basic_with_epitope(self):
        """
        epitope is specified
        """
        path1 = DIRNAME + '/data/sample1.pdb'
        path2 = DIRNAME + '/data/sample2.pdb'

        p = PDBParser(PERMISSIVE=1)

        query_struct = p.get_structure(os.path.basename(path1), path1)
        against_struct = p.get_structure(os.path.basename(path2), path2)

        query_complex = Complex(
            query_struct,
            epitope=[211, 213, 214, 224, 225, 226, 227, 228, 229])
        against_complex = Complex(against_struct,
                                  epitope=[216, 217, 218, 219, 220, 221])

        query_complex.get_fp()
        against_complex.get_fp()

        query_fp_string = query_complex.fp2str()
        against_fp_string = against_complex.fp2str()

        query = FPWithComplex(query_complex, query_fp_string)
        against = FPWithComplex(against_complex, against_fp_string)

        score1, score2, score3 = similarity_between(query, against)

        expected = {'score1': 34.705754203703862, 'score3': 0, 'score2': 6}
        actual = {"score1": score1, "score2": score2, "score3": score3}

        self.assertEqual(actual, expected)
    def test_with_epitope_another_cutoff(self):
        """
        the similarity calculation cutoff is set to 5
        """
        path1 = DIRNAME + '/data/sample1.pdb'
        path2 = DIRNAME + '/data/sample2.pdb'

        p = PDBParser(PERMISSIVE=1)

        query_struct = p.get_structure(os.path.basename(path1), path1)
        against_struct = p.get_structure(os.path.basename(path2), path2)

        query_complex = Complex(query_struct)
        against_complex = Complex(against_struct)

        query_complex.get_fp()
        against_complex.get_fp()

        query_fp_string = query_complex.fp2str()
        against_fp_string = against_complex.fp2str()

        query = FPWithComplex(query_complex, query_fp_string)
        against = FPWithComplex(against_complex, against_fp_string)

        score1, score2, score3 = similarity_between(query, against, cutoff=5)

        expected = {"score1": 119.75339423551459, "score3": -8, "score2": 20}
        actual = {"score1": score1, "score2": score2, "score3": score3}

        self.assertEqual(actual, expected)
    def test_basic_with_another_spinimage(self):
        """
        non-default spinimage 
        """
        path1 = DIRNAME + '/data/sample1.pdb'
        path2 = DIRNAME + '/data/sample2.pdb'

        p = PDBParser(PERMISSIVE=1)

        query_struct = p.get_structure(os.path.basename(path1), path1)
        against_struct = p.get_structure(os.path.basename(path2), path2)

        query_complex = Complex(query_struct)
        against_complex = Complex(against_struct)

        query_complex.get_fp(spin_image_radius_step=2,
                             spin_image_height_step=2,
                             sphere_radius_step=2)
        against_complex.get_fp(spin_image_radius_step=2,
                               spin_image_height_step=2,
                               sphere_radius_step=2)

        query_fp_string = query_complex.fp2str()
        against_fp_string = against_complex.fp2str()

        query = FPWithComplex(query_complex, query_fp_string)
        against = FPWithComplex(against_complex, against_fp_string)

        score1, score2, score3 = similarity_between(query, against)

        expected = {'score1': 129.68169758476202, 'score3': 5, 'score2': 20}
        actual = {"score1": score1, "score2": score2, "score3": score3}

        self.assertEqual(actual, expected)
    def test_basic(self):
        """
        nothing is specified
        """
        path1 = DIRNAME + '/data/sample1.pdb'
        path2 = DIRNAME + '/data/sample2.pdb'

        p = PDBParser(PERMISSIVE=1)

        query_struct = p.get_structure(os.path.basename(path1), path1)
        against_struct = p.get_structure(os.path.basename(path2), path2)

        query_complex = Complex(query_struct)
        against_complex = Complex(against_struct)

        query_complex.get_fp()
        against_complex.get_fp()

        query_fp_string = query_complex.fp2str()
        against_fp_string = against_complex.fp2str()

        query = FPWithComplex(query_complex, query_fp_string)
        against = FPWithComplex(against_complex, against_fp_string)

        score1, score2, score3 = similarity_between(query, against)

        expected = {"score1": 118.00269647021572, "score3": 20, "score2": -8}
        actual = {"score1": score1, "score3": score2, "score2": score3}

        self.assertEqual(actual, expected)
Beispiel #5
0
    def get_structure(self, *args):
        if len(args) == 2:
            pdbId, fileName = args
        elif len(args) == 1:
            fileName = args[0]
            pdbId, fileName = str(fileName), fileName
        else:
            raise ValueError(
                "Error, input should be (id, fileName) or (fileName))")

        if re.match("http(s?)://", fileName):
            r = requests.get(fileName)
            if r.ok:
                fileName = StringIO(r.text)
            else:
                raise Exception("Error downloading pdb")

        try:
            if not isinstance(fileName, str) or not fileName.endswith(".gz"):
                structure = PDBParser.get_structure(self, pdbId, fileName)
            else:
                with gzip.open(fileName) as f:
                    structure = PDBParser.get_structure(self, pdbId, f)
        except Exception as e:
            print(e)
            structure = MMCIFParser.get_structure(self, pdbId, fileName)
        if self.removeHeteroDuplicated:
            structure = self.filterOutDuplicated(structure)
        return structure
Beispiel #6
0
    def test_bad_charge(self):
        """Test if missing or malformed charge case is handled correctly."""
        # Test Entries
        malformed = "ATOM      1  N   PRO     1      000001  02.000 3.0000 -0.W000  1.0000       N\n"
        missing = "ATOM      1  N   PRO     1      000001  02.000 3.0000          1.0000       N\n"

        # Malformed
        parser = PDBParser(PERMISSIVE=True,
                           is_pqr=True)  # default initialization
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always", PDBConstructionWarning)
            structure = parser.get_structure("test", StringIO(malformed))

        atom = next(structure.get_atoms())
        self.assertEqual(atom.get_charge(), None)

        # Missing
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always", PDBConstructionWarning)
            structure = parser.get_structure("test", StringIO(missing))

        atom = next(structure.get_atoms())
        self.assertEqual(atom.get_charge(), None)

        # Test PERMISSIVE mode behaviour
        parser = PDBParser(PERMISSIVE=False,
                           is_pqr=True)  # default initialization
        self.assertRaises(
            PDBConstructionException,
            parser.get_structure,
            "example",
            StringIO(malformed),
        )
def score(query_pdb_path,
          against_pdb_path,
          query_fp_path=None,
          against_fp_path=None,
          query_epitope=[],
          against_epitope=[],
          spin_image_height_step=5,
          spin_image_radius_step=2,
          sphere_radius_step=2,
          cutoff=20.0,
          spin_image_radius_range=(0, 20),
          spin_image_height_range=(-30, 10),
          sphere_radius_range=(0, 20),
          callback=write_score_to_file,
          cbargs=[]):

    p = PDBParser(PERMISSIVE=1)

    query_struct = p.get_structure(os.path.basename(query_pdb_path),
                                   query_pdb_path)
    against_struct = p.get_structure(os.path.basename(against_pdb_path),
                                     against_pdb_path)

    query_complex = Complex(query_struct, query_epitope)
    against_complex = Complex(against_struct, against_epitope)

    if query_fp_path is None or against_fp_path is None:  #if fp is not given
        query_complex.get_fp(spin_image_radius_step=spin_image_radius_step,
                             spin_image_height_step=spin_image_height_step,
                             sphere_radius_step=sphere_radius_step)
        against_complex.get_fp(spin_image_radius_step=spin_image_radius_step,
                               spin_image_height_step=spin_image_height_step,
                               sphere_radius_step=sphere_radius_step)

        query_fp_string = query_complex.fp2str()
        against_fp_string = against_complex.fp2str()
    else:
        #if fp is given, read them
        with open(query_fp_path, 'r') as f1, open(against_fp_path, 'r') as f2:
            query_fp_string = f1.read()
            against_fp_string = f2.read()

    query = FPWithComplex(query_complex, query_fp_string)
    against = FPWithComplex(against_complex, against_fp_string)

    score1, score2, score3 = similarity_between(query, against, cutoff=cutoff)
    #z1, z2, z3 = similarity_between (query, query, cutoff = cutoff) #the normalization constant
    #print score1, score2, score3

    if callback is not None:
        callback((score1, score2, score3), *cbargs)
    return score1, score2, score3
def save_to_csv(input_path, output_path):
    parser = PDBParser(PERMISSIVE=True)
    pdb_files = glob(input_path + '*.pdb')
    str_id = []
    for filename in pdb_files:
        base = os.path.basename(filename)
        structure_id = os.path.splitext(base)[0]
        parser.get_structure(structure_id, filename)
        str_id.append(structure_id)
    data_total = data_processing(input_path)
    data_total[0].to_csv(output_path + 'coordinate_' + structure_id + '.csv',
                         index=False)
    data_total[1].to_csv(output_path + 'missing_seq_' + structure_id + '.csv',
                         index=False)
Beispiel #9
0
def getPdbSequance(pdb_file, chain_id):
    pdb_indexes = []
    pdb_sequance = []

    p = PDBParser(PERMISSIVE=1, QUIET=True)
    s = p.get_structure("", pdb_file)
    pdb_id = pdb_file[0:-4]

    if not s[0].has_id(chain_id):
        print("PDB " + pdb_id + " doesn't have chain with id " + chain_id)
        print()
        exit()

    chain = s[0][chain_id]

    ires = 0
    for res in chain:
        is_regular_res = res.has_id('N') and res.has_id('CA') and res.has_id(
            'C') and (res.get_resname() == 'GLY' or res.has_id('CB'))
        res_id = res.get_id()[0]
        if (res_id == ' ' or res_id == 'H_MSE' or res_id == 'H_M3L'
                or res_id == 'H_CAS') and is_regular_res:
            ires = ires + 1
            res_name = res.get_resname()
            residue_no = res.get_id()[1]
            pdb_sequance.append(res_name)
            pdb_indexes.append(residue_no)
        elif res_id != 'W':
            print("Unknown residue in " + pdb_id + " with res_id " + res_id)

    pdb_seq = three2one(pdb_sequance)

    return pdb_seq, pdb_indexes
Beispiel #10
0
def get_CA_coordinates(filename, my_set):
    """
    Given a pdb file, it creates a dictionary with the CA (alpha-carbon) coordinates
    of those residues that are in the surface (set).
    """
    p = PDBParser(PERMISSIVE=1)
    s = p.get_structure("code.pdb", filename)
    model = s[0]

    CA_coordinates = {}

    sys.stderr.write("Calculating CA coordinates of residues...\n")
    for chain in model:
        for residue in chain:
            residue_name = str(
                residue.get_full_id()[3][1]) + residue.get_full_id()[2]
            if residue.get_id()[0] == " " and residue_name in my_set:
                residue_number = str(residue.get_resname()) + str(
                    residue.get_id()[1])
                for atom in residue:
                    if atom.get_name() == "CA":
                        # get CA coordinates, will be the values
                        CA = atom.get_coord()
                CA_coordinates[residue_number] = tuple(CA)
    return CA_coordinates
Beispiel #11
0
def getrange(pdbfile):
    p = PDBParser(PERMISSIVE=1)
    s = p.get_structure("", pdbfile)
    chnames = []
    pf, pt = 0, 1000000
    for c in s[0]:  # iterate over chains

        cl = list(c)  # list of all residues in a chain

        #if len(cl)<25: continue # skip short chains

        chnames.append(c.get_id())
        c = cl

        pos = 0

        assert (c[0].get_full_id()[3][0]
                ) == ' '  # first residue is not an HET atom

        while c[pos].get_full_id()[3][0] == ' ' and pos < len(c) - 1:
            pos += 1

        pos -= 1

        f, t = c[0].get_id()[1], c[pos].get_id()[1]

        #print f,t

        if f > pf: pf = f
        if t < pt: pt = t

    return pf, pt, ' '.join(chnames)
def get_best_res(target_dist, label=None):
    minimal_score = 1000000000000000000
    best_part = None
    ava = 0
    for subdir, dirs, files in os.walk("top500H"):
        for file in files:
            try:
                ava += 1
                filepath = subdir + os.sep + file
                p = PDBParser(PERMISSIVE=1, QUIET=True)
                structure = p.get_structure('file', filepath)
                residues = [
                    residue for model in structure for chain in model
                    for residue in chain
                ]
                atoms = [atom for residue in residues for atom in residue]
                print("Processing ", file, ava / 5, "%")
                for i in range(0, len(residues)):
                    res = residues[i]
                    if label == None or label == three_to_one(
                            res.get_resname()):
                        first, last = get_first_last_atoms(res, i)
                        dist = get_distance(atoms, first, last)
                        if getScore(dist, target_dist) < minimal_score:
                            minimal_score = getScore(dist, target_dist)
                            best_part = res
            except Exception:
                pass
    return best_part
Beispiel #13
0
def get_surface_residues(filename, my_acc_array, my_threshold):
    """
    Given a pdb file, finds the residues exposed to the solvent (not buried)
    according to the ASA (accessible surface area) value given by DSSP module.
    The user can select a threshold of ASA. Default is 0.2.
    """
    p = PDBParser(PERMISSIVE=1)
    s = p.get_structure("code.pdb", filename)
    model = s[0]
    d = DSSP(model, filename, dssp='mkdssp', acc_array=my_acc_array)

    sys.stderr.write("\nHandled %i residues\n" % len(d))

    residue_number = set()

    for element in sorted(d):
        if type(element[3]) is not str:  #Sometimes the element[3] is NA
            if element[3] >= my_threshold:
                # foreach aa in the surface (according to threshold) store residue_number
                try:
                    residue_number.add(
                        str(list(d.keys())[element[0] - 1][1][1]) +
                        list(d.keys())[element[0] - 1][0])
                except IndexError:
                    sys.stderr.write("Element " + str(d.keys()[0]) +
                                     " index out of range\n")
    return residue_number
Beispiel #14
0
def main(file, atom, CAd=15, CBd=12, mind=6):
    """Analyze the pdb using distance between atom and minimum distances."""

    logging.info("Analyzing %s using %s", file, atom)

    dist = {"CA": CAd, "CB": CBd, "min": mind}
    base = os.path.basename(args.file)
    name_f = os.path.splitext(base)[0]
    parser = PDBParser(PERMISSIVE=1)
    logging.captureWarnings(True)

    structure = parser.get_structure("test", file)

    residues = filter_residues(structure)
    dist_matrix = calc_dist_matrix(residues, atom)
    title_dist = 'Distances of the file {}'.format(name_f)
    name_heatmap = plots.plot_heatmap(dist_matrix, name_f, title_dist, atom)
    logging.info("Heatmap %s created", name_heatmap)
    cont_matrix = contact_map(dist_matrix, atom, dist)
    title_bin = 'Distance contacts of the file {}'.format(name_f)
    name_bin = plots.plot_matrix_binary(cont_matrix, name_f, title_bin, atom)
    logging.info("Contact map %s created", name_bin)
    logging.captureWarnings(False)

    return(dist_matrix, cont_matrix)
Beispiel #15
0
def load_model(model_fname, model_fmt):
    """
    Load a transformation model from the file of the specified format.

    If something goes wrong, the function returns None, otherwise the loaded
    transformation model is returned.
    """
    result = None

    if model_fmt == 'pdb':
        parser = PDBParser(PERMISSIVE=True)
        struct = parser.get_structure('PROMPTPY', model_fname)
        if not pdb.is_transformation(struct):
            logger.error('specified PDB file is not a transformation')
            return None
        x = pdb.get_atom_coordinates(struct)
        w = pdb.get_atom_masses(pdb.extract_model(struct, 0), True)
        result = model.from_conf_coords(x, w)
    elif model_fmt == 'json':
        with open(model_fname) as f:
            result = model.Transformation.from_dict(json.load(f))
    elif model_fmt == 'hdf5':
        result = model.read_hdf5(model_fname)
    else:
        logger.error('unknown model format %s', model_fmt)

    return result
def distance_alpha_c(centers, draw, unit_dir):
    """
    compute the distance between center of mass and each alpha carbon 
    in the corresponding unit 

    :param centers: list of coordinates of the centers of mass for each unit
    :param draw: boolean, if True pymol is used to draw the respective 
                 geometric element
    :return: dictionary of the distaces between center of mass and alpha carbon
    """
    unit_dir_list = os.listdir(unit_dir)
    unit_dir_list.sort(key=utils.natural_keys)
    distances = {}
    for unit in unit_dir_list:
        pdb_parser = PDBParser()
        structure = pdb_parser.get_structure(unit, unit_dir + unit)
        alpha_c = alpha_carbon(structure)
        center = centers[unit]
        distances[unit] = {}
        for i, ca in enumerate(alpha_c):
            distances[unit][ca.get_id() + str(i)] = distance(
                ca.get_coord(), center)

        # drawing distances
        if draw:
            utils.draw_distance_center_mass_alpha(unit, center, alpha_c)

    return distances
Beispiel #17
0
def compute_localQ_init(MAX_OFFSET=4, DISTANCE_CUTOFF=9.5):
    from pathlib import Path
    home = str(Path.home())
    struct_id = '2xov'
    filename = os.path.join(home, "opt/pulling/2xov.pdb")
    p = PDBParser(PERMISSIVE=1)
    s = p.get_structure(struct_id, filename)
    chains = s[0].get_list()

    # import pdb file
    native_coords = []
    for chain in chains:
        dis = []
        all_res = []
        for res in chain:
            is_regular_res = res.has_id('CA') and res.has_id('O')
            res_id = res.get_id()[0]
            if (res.get_resname() == 'GLY'):
                native_coords.append(res['CA'].get_coord())
            elif (res_id == ' ' or res_id == 'H_MSE' or res_id == 'H_M3L'
                  or res_id == 'H_CAS') and is_regular_res:
                native_coords.append(res['CB'].get_coord())
            else:
                print('ERROR: irregular residue at %s!' % res)
                exit()
    native_contacts_table = compute_native_contacts(native_coords, MAX_OFFSET,
                                                    DISTANCE_CUTOFF)

    return native_contacts_table
Beispiel #18
0
def deleteChain():# Delete a complete chain from a pdb and save the new structure in pdbname_free.pdb
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	seq=''
	
	nb_chain=input('How many chain do you want to delete : ')
	for i in range(nb_chain):
		rm_chain=raw_input('What chain you want to delete : ')
		for model in structure:
			for chain in model:
				if(chain.id==rm_chain):
					model.detach_child(chain.id)
	pept = raw_input('Do you want to get a pdb with the sequence in its name : ')
	if(pept == 'y'):
		ppb=PPBuilder()
		for pp in ppb.build_peptides(structure):
			seq = seq + pp.get_sequence()
		seq=seq.lower()
		seq=str(seq)
		w = PDBIO()
		w.set_structure(structure)
		w.save(seq+'_bound.pdb')
	else:
		w = PDBIO()
		w.set_structure(structure)
		w.save(nameStruct+'_without'+rm_chain+'.pdb')
Beispiel #19
0
def load_chains(raw, pdb_id, pdb_type, known):
    parser = PDBParser()
    structure = parser.get_structure(pdb_id, raw)
    data = {'ordering': []}
    for model in structure:
        for chain in model:
            chain_id = chain.get_id()
            data[chain_id] = {'residues': [], 'sequence': []}
            for residue in chain:
                name = residue.resname.strip()
                if name in known:
                    res_id = residue.get_id()
                    id_data = [
                        structure.get_id(), pdb_type,
                        model.get_id(), chain_id, res_id[1], residue.resname,
                        res_id[2]
                    ]
                    id_data = [str(part).strip() for part in id_data]
                    unit_id = '_'.join(id_data)
                    data[chain_id]['residues'].append(unit_id)
                    data[chain_id]['sequence'].append(known[name])
                    data['ordering'].append(unit_id)

            if not data[chain_id]['residues']:
                del data[chain_id]
            else:
                data[chain_id]['sequence'] = ''.join(
                    data[chain_id]['sequence'])
    return data
Beispiel #20
0
def get_aa_residues(pdb, chain):
    """
    pdb: Protein Data Bank file.
    chain: Chain of the PDB file.

    Get the amino acids from a protein.

    returns: List of Biopython PDB Residue objects representing the amino acids
    of the specified protein.
    """
    parser = PDBParser()
    structure = parser.get_structure("prot", pdb)
    model = structure[0]
    chain = model[chain]

    # Get a list of all residues in the specified protein model.
    residue_list = list(chain.get_residues())
    to_remove_list = []

    for res in residue_list:
        # Store non-amino acid residues in PDB in another list.
        if res.get_id()[0] != " ":
            to_remove_list.append(res)

    # Remove non-amino acid residues from original list.
    for res in to_remove_list:
        residue_list.remove(res)

    return residue_list
Beispiel #21
0
def residue_depth(pdbName,
                  ReaderAtomsInput,
                  filename,
                  UseInterfaceAtoms=False):

    parser = PDBParser(PERMISSIVE=1)
    structure = parser.get_structure(pdbName, filename)
    model = structure[0]

    BioAtoms = []
    for chain in model:
        for residue in chain:
            for atom in residue:
                BioAtoms.append(atom)
    if UseInterfaceAtoms:
        BioAtoms = pdbReader_to_BioPyth(ReaderAtomsInput, BioAtoms)

    surface = get_surface(model)
    BioDepthDistances = []
    for atom in BioAtoms:
        dist = min_dist(atom.get_coord(), surface)
        BioDepthDistances.append([atom, dist])
    pdbReaderDistances = BioPyth_to_pdbReader(BioDepthDistances,
                                              ReaderAtomsInput)
    return pdbReaderDistances
def free_cys_tyr(pdb_utils):
    parser = PDBParser(PERMISSIVE=1, QUIET=1)
    _log.debug("procesing free cys/tyr")
    total = ExperimentalStructure.objects(residue_sets__name__ne = "free_tyr").count()
    for strdoc in tqdm(ExperimentalStructure.objects(residue_sets__name__ne = "free_tyr").no_cache().timeout(False), total=total):

        if not (strdoc.residue_set("free_cys") or strdoc.residue_set("free_tyr")):
            if not os.path.exists(pdb_utils.pdb_path(strdoc.name)):
                pdb_utils.update_pdb(strdoc.name)
            if not os.path.exists(pdb_utils.pdb_path(strdoc.name)):
                continue
            try:
                bp_pdb = list(parser.get_structure(strdoc.name, pdb_utils.pdb_path(strdoc.name)  ))[0]
            except PDBConstructionException:
                continue
            except TypeError:
                continue

            free = {"CYS": [], "TYR": []}
            codes = {"CYS": "SG", "TYR": "OH"}
            for x in bp_pdb.get_residues():
                if x.resname in codes:
                    neighbor_atoms = set(list(bp_pdb.get_atoms())) - set(list(x))
                    if (codes[x.resname] in x) and (
                            not any(map(lambda atom: (x[codes[x.resname]] - atom) <= 3, neighbor_atoms))):
                        free[x.resname].append(x.parent.id + "_" + str(x.id[1]))
            if free["CYS"]:
                rs = ResidueSet(name="free_cys", residues=free["CYS"])
                strdoc.residue_sets.append(rs)
            if free["TYR"]:
                rs = ResidueSet(name="free_tyr", residues=free["TYR"])
                strdoc.residue_sets.append(rs)
            if free["CYS"] or free["TYR"]:
                strdoc.save()
Beispiel #23
0
def getPDBSequence(pdb_name, pdb_path, chain):
    logging.info("getPDBSequence pdb " + pdb_name + " cadena " + chain)
    from Bio.PDB.PDBParser import PDBParser
    from Bio.PDB.Polypeptide import three_to_one
    from Bio.PDB.Polypeptide import is_aa
    residue_position = []
    residue_name = list()
    try:
        parser = PDBParser(PERMISSIVE=1)
        structure = parser.get_structure(pdb_name, pdb_path)
        model = structure[0]
        chain = model[chain]
        for residue in chain:
            if is_aa(residue.get_resname(), standard=True):
                residue_name.append(three_to_one(residue.get_resname()))
                residue_position.append(residue.get_full_id()[3][1])
            #else:
            #residue_name.append("X")
            #residue_position.append(residue.get_full_id()[3][1])
            #raise Exception("Secuencia no valida, error en la posicion: " + str(residue.get_full_id()[3][1]))

    except Exception as inst:
        print inst
        logging.error(
            "Error no controlado intentando leer la sequencia del pdb " +
            pdb_name + " cadena " + chain + " path " + pdb_path)
        raise Exception("PDB Invalido pdb " + pdb_name + " cadena " + chain +
                        " path " + pdb_path)
    return residue_position, residue_name
    '''
Beispiel #24
0
def parse_pdb_local(code):
    code = code.lower()
    path = '%s/%s/pdb%s.ent.gz' % (LOCAL_PDB_DIR, code[1:3], code)
    f = gzip.open(path, 'rb')
    p = PDBParser()
    structure = p.get_structure(code, f)
    return structure
Beispiel #25
0
 def update_entry_data(self, code, pdb_path):
     pdb_model = PDB.objects.get(code=code)
     p = PDBParser(PERMISSIVE=True, QUIET=True)
     chains = list(p.get_structure(code, pdb_path)[0].get_chains())
     for chain in tqdm(chains):
         self._process_chain_residues(pdb_model, chain)
         self._process_chain_atoms(pdb_model, chain)
def load_chains(raw, pdb_id, pdb_type, known):
    parser = PDBParser()
    structure = parser.get_structure(pdb_id, raw)
    data = {'ordering': []}
    for model in structure:
        for chain in model:
            chain_id = chain.get_id()
            data[chain_id] = {'residues': [], 'sequence': []}
            for residue in chain:
                name = residue.resname.strip()
                if name in known:
                    res_id = residue.get_id()
                    id_data = [structure.get_id(), pdb_type, model.get_id(),
                               chain_id, res_id[1], residue.resname, res_id[2]]
                    id_data = [str(part).strip() for part in id_data]
                    unit_id = '_'.join(id_data)
                    data[chain_id]['residues'].append(unit_id)
                    data[chain_id]['sequence'].append(known[name])
                    data['ordering'].append(unit_id)

            if not data[chain_id]['residues']:
                del data[chain_id]
            else:
                data[chain_id]['sequence'] = ''.join(data[chain_id]['sequence'])
    return data
Beispiel #27
0
def getPdbSequance(pdb_file, chain_id):
	pdb_indexes = []
	pdb_sequance = []

	p = PDBParser(PERMISSIVE=1)
	s = p.get_structure("",  pdb_file)
	pdb_id = pdb_file[0:-4]
	
	if not s[0].has_id(chain_id):
		print "PDB "+pdb_id+" doesn't have chain with id "+chain_id
		print
		exit()
	
	chain = s[0][chain_id]
	
	ires = 0
	for res in chain:
	        is_regular_res = res.has_id('N') and res.has_id('CA') and res.has_id('C') and (res.get_resname()=='GLY' or res.has_id('CB'))
       		res_id = res.get_id()[0]
	        if (res_id ==' ' or res_id =='H_MSE' or res_id =='H_M3L' or res_id =='H_CAS') and is_regular_res:
        	        ires = ires + 1
	                res_name = res.get_resname()
                	residue_no = res.get_id()[1]
        	        pdb_sequance.append(res_name)
	                pdb_indexes.append(residue_no)
	        elif res_id !='W':
        	        print "Unknown residue in "+pdb_id+" with res_id "+res_id

	pdb_seq = three2one(pdb_sequance)

	return pdb_seq, pdb_indexes
Beispiel #28
0
def get_biopython_structure(path, model_id=None):
    structure = None
    path = path.strip()
    parser = PDBParser()
    if not model_id:
        model_id = os.path.basename(path)
    if os.path.basename(path).split('.')[-1] == "pdb":
        structure = parser.get_structure(model_id, path)
    elif os.path.basename(path).split('.')[-1] == 'gz':
        GZ = gzip.open(path, 'rb')
        structure = parser.get_structure(model_id, GZ)
        GZ.close()
    else:
        sys.exit("Unknown extension to read PDB: " + path)

    return structure
def read_pdbs(directory, verbose=False):
    """Reads the input directory and generates pdb models"""
    if verbose:
        print("Reading pdb input files from %s" % directory)
    parser = PDBParser(PERMISSIVE=1, QUIET=True)
    if os.path.isdir(directory) and directory.endswith("/"):
        try:
            pdbmodels = [
                parser.get_structure("Model_pair", directory + f)[0]
                for f in listdir(directory) if f.endswith(".pdb")
            ]  #  Generates pdb objects for files that end with .pdb
        except:
            sys.stderr.write(
                "PDB files couldn't be opened. Please, revise that their format is correct."
            )
            sys.exit(1)
    else:
        sys.stderr.write(
            "Directory %s doesn't exists, please select a valid directory." %
            directory)
        sys.exit(1)
    if not bool(pdbmodels):  # If no pdb instance is generated
        sys.stderr.write(
            "No pdb files where read. Please make sure the given directory contains pdb files. "
        )
        sys.exit(1)
    for model in pdbmodels:
        if len(model.child_list) != 2:
            sys.stderr.write(
                "A pdb input file doesn't contains two chains. Please, all input pdbs must only contain "
                "two chains.")
            sys.exit(1)
    if verbose:
        print("Pdb objects stored")
    return pdbmodels
def get_template_stech_dict(template, seq_dict, verbose=False):
    """Generates a stechometry dictionary for a given pdb template"""
    template_stech_dict = {
    }  # Format: { "A": 2, "B": 3, ...}, where key is chain id and value
    # is the number of repetitions
    parser = PDBParser(PERMISSIVE=1, QUIET=True)
    template_object = parser.get_structure(
        "template", template)[0]  # Generates pdb template object
    for chain in template_object:
        chain = CustomChain(
            chain)  # Transforms pdb chain object to CustomChain instance
        chain.parent = None  # Removes previous parent to evade biopython errors of id repetitions
        chain_seq = chain.get_sequence()
        if chain_seq in seq_dict:
            chain.id = seq_dict[
                chain_seq]  # Updates the template chain id to the corresponding by its sequence
            template_stech_dict.setdefault(chain.id, 0)
            template_stech_dict[chain.id] += 1  # Adds to chain id counter
    if verbose:  # Transforms the stech_dict to a string to be printed
        stechometry_string = ""
        for key in sorted(template_stech_dict.keys()):
            stechometry_string += key + ":" + str(
                template_stech_dict[key]) + ","
        stechometry_string = stechometry_string[:-1]
        print("Template's Stoichiometry is: " + stechometry_string)
    return template_stech_dict
Beispiel #31
0
def Init():
    ptask = open("task.input","r")
    para = {}
    jobs = []
    for line in ptask.readlines():
        if(line[0]=='/' or line[0]=='\n'):
            continue
        [a,b] = line.split("=")
        if a=='angle':
            jobs.append([float(x) for x in b.strip().split(',')])
        else:
            para[a]=b.strip()
    ptask.close()
    filename = para['protein_file']
    protein_name = filename.strip().split('.')[0]
    file_type = filename.strip().split('.')[1]
    if file_type == 'cif':
        mt = MMCIF2Dict(filename)
        xlist = [float(x) for x in mt['_atom_site.Cartn_x']]
        ylist = [float(x) for x in mt['_atom_site.Cartn_y']]
        zlist = [float(x) for x in mt['_atom_site.Cartn_z']]
        allarr = numpy.vstack((xlist,ylist,zlist)).T
    elif file_type == 'pdb':
        parser = PDBParser()
        structure = parser.get_structure("test", filename)
        atoms = structure.get_atoms()
        alllist = []
        xlist = []
        ylist = []
        zlist = []
        for atom in atoms:
            xlist.append(atom.get_coord()[0])
            ylist.append(atom.get_coord()[1])
            zlist.append(atom.get_coord()[2])
            alllist.append(atom.get_coord())
        allarr = numpy.array(alllist)
    if para['CENTER'] == 'ON':
        x_ave = allarr.mean(axis=0)[0]
        y_ave = allarr.mean(axis=0)[1]
        z_ave = allarr.mean(axis=0)[2]
        allarr[:,0] = allarr[:,0]-x_ave;
        allarr[:,1] = allarr[:,1]-y_ave;
        allarr[:,2] = allarr[:,2]-z_ave

    scr_size = int(para['scr_size'])
    pix_size = float(para['pix_size'])
    distance = float(para['distance'])
    wavenum = 1.0/float(para['lambda'])
    ssc = scr_size/2.0-0.5

    s = numpy.zeros((scr_size,scr_size,3))
    for i in range(scr_size):
        for j in range(scr_size):
            x = (i-ssc)*pix_size
            y = (j-ssc)*pix_size
            z = distance
            sr = numpy.sqrt(x*x+y*y+z*z)
            s[i,j,:] = numpy.array([x*wavenum/sr,y*wavenum/sr,z*wavenum/sr-wavenum])

    return s,allarr
Beispiel #32
0
def get_pdb_sequence(input_pdb_file,
                     chain_id,
                     mapping_output=False,
                     with_gaps=False):
    """Gets the PDB sequence in a dictionary"""
    mapping = {}
    pdb_parser = PDBParser(PERMISSIVE=True, QUIET=True)
    structure = pdb_parser.get_structure(input_pdb_file, input_pdb_file)
    model = structure[0]
    chain = model[chain_id]
    residues = list(chain)
    for res in residues:
        # Remove alternative location residues
        if "CA" in res.child_dict and is_aa(res) and res.id[2] == ' ':
            try:
                mapping[res.id[1]] = three_to_one(res.get_resname())
            except KeyError:
                # Ignore non standard residues such as HIC, MSE, etc.
                pass

    if with_gaps:
        # Add missing gap residues by their residue number
        res_numbers = sorted(mapping.keys())
        start, end = res_numbers[0], res_numbers[-1]
        missing = sorted(set(range(start, end + 1)).difference(res_numbers))
        for m in missing:
            mapping[m] = '-'

    if mapping_output:
        return mapping
    else:
        return ''.join([mapping[k] for k in sorted(mapping.keys())])
Beispiel #33
0
 def collect_1(self, checkboard = []):
     def getChains(s):
         ret = s.split('___')[:2]
         assert(len(ret) == 2)
         return ret
     parser = PDBParser()
     io = PDBIO()
     for f in self.files:
         if not checkboard:
             break
         if f not in checkboard:
             continue
         try:
             os.mkdir(os.path.join( self.outpath, f))
         except OSError as e:
             if e.errno != errno.EEXIST:
                 raise
         structure = parser.get_structure(f, os.path.join(self.inpath, f))
         chain_A, chain_B = getChains(f)
         io.set_structure(structure[0]['A'])
         io.save(os.path.join(self.outpath,f,chain_A + '.pdb'))
         io.set_structure(structure[0]['B'])
         io.save(os.path.join(self.outpath,f,chain_B + '.pdb'))
         #make this module can be reuse to other application
         self.then_do(f)
         #remove the finished file from checkboard
         checkboard.remove(f)
Beispiel #34
0
 def scwrl(self, altseq):
     """ Repacks sidechains using SCWRL4 and returns a copy """
     io = PDBIO()
     seqfname = "temp/%d.txt" % multidigit_rand(10)
     with open(seqfname, 'wb') as seqfile:
         structfile = "temp/%d.pdb" % multidigit_rand(10)
         seqfile.write(altseq)
         scwrlfile = structfile + ".scwrl"
         io.set_structure(self.structure)
         io.save(structfile)
     cmd = [
         "scwrl", "-0", "-i", structfile, '-s', seqfname, '-o', scwrlfile
     ]
     print "\n%s" % ' '.join(cmd)
     sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE).communicate()
     p = PDBParser()
     with open(scwrlfile, 'rb') as fin:
         filterwarnings('ignore', category=PDBConstructionWarning)
         s = p.get_structure(self.id, scwrlfile)
         resetwarnings()
     s = PDBMapStructure(s, pdb2pose={}, refseq=self.refseq)
     os.remove(structfile)
     os.remove(scwrlfile)
     os.remove(seqfname)
     return s
Beispiel #35
0
def main(args):
    if not os.path.exists(args.out_folder):
        os.mkdir(args.out_folder)

    proteins = []

    for file in os.listdir(args.in_folder):
        if file.endswith('.pdb'):
            proteins.append(file)

    if args.join == 'yes':
        result_joined = pd.DataFrame()

    for protein in proteins:
        ID = protein.replace('.pdb', '')
        parser = PDBParser()
        protein_path = args.in_folder + '/' + protein
        structure = parser.get_structure(ID, protein_path)

        result = combine(structure)
        if result is None:
            print('No ligands and/or water present in ', ID)
            continue
        if args.join == 'no':
            result_path = args.out_folder + '/' + ID + '.csv'
            result.to_csv(result_path)
        elif args.join == 'yes':
            result_joined = pd.concat([result_joined, result])

    if args.join == 'yes':
        result_joined_path = args.out_folder + '/' + 'AALI_contacts.csv'
        result_joined.to_csv(result_joined_path)
Beispiel #36
0
    def __pdb_ordering__(self, raw, pdb_id, pdb_type):
        """Generate a dict of the form: { unit_id: {index: index, pdb: pdb }
        for all nucleotides in the given structure. Nucleotides are identified
        by being in the list of known units in self.known.
        """
        parser = PDBParser(QUIET=True)
        structure = parser.get_structure(pdb_id, raw)
        data = {}
        index = 0
        for model in structure:
            model_id = model.get_id() + 1
            for chain in model:
                chain_id = chain.get_id()
                for residue in chain:
                    name = residue.resname.strip()
                    if name in self.known:
                        res_id = residue.get_id()
                        id_data = [structure.get_id(), pdb_type, model_id,
                                   chain_id, res_id[1], name, res_id[2]]
                        id_data = [str(part).strip() for part in id_data]
                        unit_id = '_'.join(id_data)
                        data[unit_id] = {'index': index, 'pdb': pdb_id}
                        index += 1

        return data
def old_residue_ids(raw, filename):
    parser = PDBParser()
    path, ext = os.path.splitext(filename)
    pdb_id = os.path.basename(path)
    structure = parser.get_structure(pdb_id, raw)
    data = []

    pdb_type = 'AU'
    if ext != '.pdb':
        pdb_type = 'BA' + filename[-1]

    for model in structure:
        # BioPython seems to start number models at 0, but it should start
        # at 1.
        model_id = str(model.get_id() + 1)
        for chain in model:
            chain_id = chain.get_id()
            for residue in chain:
                res_id = residue.get_id()
                data.append({
                    'pdb': pdb_id,
                    'type': pdb_type,
                    'model': model_id,
                    'chain': chain_id,
                    'number': str(res_id[1]),
                    'unit': residue.resname.strip(),
                    'insertion': res_id[2].rstrip()
                })

    return data
Beispiel #38
0
def prepare_virtual_sites(pdb_file, use_cis_proline=False):
    parser = PDBParser(QUIET=True)
    structure=parser.get_structure('X',pdb_file,)
    for model in structure:
        for chain in model:
            r_im={}
            r_i={}
            for residue in chain:
                r_im=r_i
                r_i={}
                for atom in residue:
                    r_i[atom.get_name()]=atom
                if use_cis_proline and residue.get_resname() == "IPR":
                    if 'N' in r_i:
                        r_i['N'].set_coord(-0.2094*r_im['CA'].get_coord()+ 0.6908*r_i['CA'].get_coord() + 0.5190*r_im['O'].get_coord())
                    if 'C' in r_im:
                        r_im['C'].set_coord(0.2196*r_im['CA'].get_coord()+ 0.2300*r_i['CA'].get_coord() + 0.5507*r_im['O'].get_coord())
                    if 'H' in r_i:
                        r_i['H'].set_coord(-0.9871*r_im['CA'].get_coord()+ 0.9326*r_i['CA'].get_coord() + 1.0604*r_im['O'].get_coord())
                else:
                    if 'N' in r_i:
                        r_i['N'].set_coord(0.48318*r_im['CA'].get_coord()+ 0.70328*r_i['CA'].get_coord()- 0.18643 *r_im['O'].get_coord())
                    if 'C' in r_im:
                        r_im['C'].set_coord(0.44365*r_im['CA'].get_coord()+ 0.23520*r_i['CA'].get_coord()+ 0.32115 *r_im['O'].get_coord())
                    if 'H' in r_i:
                        r_i['H'].set_coord(0.84100*r_im['CA'].get_coord()+ 0.89296*r_i['CA'].get_coord()- 0.73389 *r_im['O'].get_coord())
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_file)
def get_normalized_pairs(n):
	'''Return a dictionary with keys corresponding to the pairs of residues found 
	within a radius n, and the values to the number of times found in a set of pdb files.\
	This dictionary sets the knowledge of pair-residues at a given frequency found naturally\
	in nature. It is based in 1.110 sequences with known structure with <40% of homology in\
	order to avoid family redundancy. Not necessary for the package.'''
	p = PDBParser(PERMISSIVE=1)
	pdb = glob.glob('./pdbfiles/*.ent')
	pairs = []
	file_list = []	
	
	###### Parsing through PDB files #######
	for filename in pdb:
		s = p.get_structure('X', filename)
		atom_list = np.array([atom for atom in s.get_atoms() if atom.name == 'CB'])
		
			
		if len(atom_list)>2:
			#creates a list containing all atom pairs within a n radius
			ns = Bio.PDB.NeighborSearch(atom_list)
			neighbors = ns.search_all(n)
			file_list.append(filename)
			sys.stderr.write(filename+' processed.\n') #check-point
		else:
			sys.stderr.write(filename+' could not be processed.\n') #check-point
			pass
		
	pairs = [(x.get_parent().get_resname(),y.get_parent().get_resname()) for x,y in neighbors]
	outfile = open( 'normalized_pairs8.py', 'w' )
	counter = dict(Counter(pairs))
	sys.stderr.write(str(len(file_list))+' files processed.\n')			#check-point
	sys.stderr.write('Dictionary length: '+str(len(counter))+'.\n') #check-point
	outfile.write('\nNormalized_pairs_'+str(n)+'='+str(counter))
	outfile.close()
Beispiel #40
0
def score(PDBfile):
    """
    Calculates the m-score for a given PDB file

    arguments:
    
    PDBfile - the PDB file to score

    hidden arguments:

    aas.scr, pro.scr, gly.scr - the scoring tables
    need to be present in working directory
    
    """
    from pro_angle import find_residue
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length
    
    (aas, gly, pro) = load_scores() ##define global tables
    score = 0 #initialize    
    pars = PDBParser(PERMISSIVE = 1)
    struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile)
    model = struct.child_list[0]
    chain = model.child_list[0]
    pro_list = find_residue(chain, 'PRO')
    gly_list = find_residue(chain, 'GLY')
    aas_list = range(chain.child_list[1].id[1],
                     chain.child_list[len(chain)-1].id[1])
    #need to remove pro/gly indices in first/last position
    if pro_list.count(1) > 0:        
        pro_list.remove(1)
    if pro_list.count(len(chain)-1) > 0:
        pro_list.remove(len(chain)-1)
    if gly_list.count(1) > 0:
        gly_list.remove(1)
    if gly_list.count(len(chain)-1) > 0:
        gly_list.remove(len(chain)-1)   
    try:
        for index in pro_list:       
            aas_list.remove(index) #remove pros from aas_list
        for index in gly_list:
            aas_list.remove(index) #remove glys from aas_list
    except ValueError:
        print 'incosistency in PDB file - will return score = 0' 
        return 0
    else:
        proscore = score_help(chain, pro_list, pro)
        glyscore = score_help(chain, gly_list, gly)
        aasscore = score_help(chain, aas_list, aas)
        score = proscore+glyscore+aasscore
        size=length(chain)
        try:
            score = (score/size)*1000 #normalize score
            return score
        except ZeroDivisionError:
            print "calculated protein length 0 -> returning score 0"
            score = 0
            return score
def score (query_pdb_path,
           against_pdb_path,
           query_fp_path = None,
           against_fp_path = None,
           query_epitope = [],
           against_epitope = [],
           spin_image_height_step = 5,
           spin_image_radius_step = 2,
           sphere_radius_step = 2,
           cutoff = 20.0,
           spin_image_radius_range = (0, 20),
           spin_image_height_range =  (-30, 10),
           sphere_radius_range = (0, 20),
           callback = write_score_to_file, cbargs=[]):

    p = PDBParser(PERMISSIVE=1)

    query_struct = p.get_structure(os.path.basename (query_pdb_path), query_pdb_path)
    against_struct = p.get_structure(os.path.basename (against_pdb_path), against_pdb_path)

    query_complex = Complex (query_struct, query_epitope)
    against_complex = Complex (against_struct, against_epitope)
    
    if query_fp_path is None or  against_fp_path is None:#if fp is not given
        query_complex.get_fp(spin_image_radius_step = spin_image_radius_step, spin_image_height_step = spin_image_height_step, sphere_radius_step = sphere_radius_step)
        against_complex.get_fp(spin_image_radius_step = spin_image_radius_step, spin_image_height_step = spin_image_height_step, sphere_radius_step = sphere_radius_step)
        
        query_fp_string = query_complex.fp2str ()
        against_fp_string = against_complex.fp2str ()
    else:
        #if fp is given, read them
        with open (query_fp_path, 'r') as f1, open(against_fp_path, 'r') as f2:
            query_fp_string = f1.read ()
            against_fp_string = f2.read ()
        
    query = FPWithComplex (query_complex, query_fp_string)
    against = FPWithComplex (against_complex, against_fp_string)
    
    score1, score2, score3 = similarity_between (query, against, cutoff = cutoff)
    #z1, z2, z3 = similarity_between (query, query, cutoff = cutoff) #the normalization constant
    #print score1, score2, score3

    if callback is not None:
        callback ((score1, score2, score3), *cbargs)
    return score1, score2, score3
def get_structure(pdb_id):
    '''Returns a PDB structure.'''
    source_url = 'http://www.rcsb.org/pdb/files/' + pdb_id + '.pdb'
    target_filename = os.path.join(os.path.expanduser('~'), _DIR, _PDB_DIR,
                                   pdb_id + '.pdb')

    with open(io_utils.get_file(source_url, target_filename)) as pdb_file:
        parser = PDBParser(QUIET=True)
        return parser.get_structure(pdb_id, pdb_file.name)
Beispiel #43
0
def main():
    if len(sys.argv) < 2:
        sys.exit("Usage: %s input_pdb_file" % sys.argv[0])
    pdb_name = sys.argv[1]

    parser = PDBParser(PERMISSIVE=1)
    structure_id = "temp"
    structure = parser.get_structure(structure_id, pdb_name)
    model = structure[0]

    calculate_ss(model)
Beispiel #44
0
 def parse(self, *pdb_filenames):
     """
     REQUIRED. Adds the protein PDB files. You can specify as many as you want, but only two will be used for the superimposition.
     """
     self.proteins = [] # reset proteins to an empty array
     parser = PDBParser(QUIET=True)
     for filename in pdb_filenames:
         # use file name as PDB id
         pdb_id = self.__get_pdb_id_from_filename(filename)
         # get PDB contents
         self.proteins.append(parser.get_structure(pdb_id, filename))
Beispiel #45
0
def removeDoubleAtoms():# Remove all double atoms defined in a pdb and save the new structure in pdbname_noDouble.pdb
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	
	structure.remove_disordered_atoms()

	w = PDBIO()
	w.set_structure(structure)
	w.save(nameStruct+'_noDouble.pdb')
Beispiel #46
0
def get_pdb_structure(pdb_file, pdb_id=None, quiet=True):
    """Set QUIET to False to output warnings like incomplete chains etc."""
    if pdb_id is None:
        pdb_id = get_pdb_id(pdb_file)
    parser = PDBParser(get_header=True, QUIET=quiet)
    if pdb_file.endswith('.gz'):
        with gzip.open(pdb_file, 'rt') as ifh:
            structure = parser.get_structure(pdb_id, ifh)
    else:
        structure = parser.get_structure(pdb_id, pdb_file)

    # Rename empty chains (i.e. chain.id == ' ')
    model = structure[0]
    chain_ids = {chain.id for chain in model.child_list}
    for chain in model.child_list:
        if chain.id in [' ', 'Z']:
            chain_ids.remove(chain.id)
            chain.id = next(c for c in string.ascii_uppercase if c not in chain_ids)
            chain_ids.add(chain.id)
    model.child_dict = {chain.id: chain for chain in model.child_list}

    return structure
Beispiel #47
0
 def Draw(self, parent, filename):
     p = PDBParser(PERMISSIVE=1)
     # structure_id = Rec[1]
     structure = p.get_structure("WHYY", filename)
     self.pdbMat = structure.get_list()
     rx = []
     ry = []
     rz = []
     bx = []
     by = []
     bz = []
     gx = []
     gy = []
     gz = []
     for chain in self.pdbMat[0].get_list():
         for resnum, residue in enumerate(chain.get_list()):
             atom = residue.get_list()
             if len(atom) > 3:
                 if resnum > 1:
                     bx[resnum - 2].append(npos[0])
                     by[resnum - 2].append(npos[1])
                     bz[resnum - 2].append(npos[2])
                 npos = atom[0].get_coord()
                 capos = atom[1].get_coord()
                 cpos = atom[2].get_coord()
                 opos = atom[3].get_coord()
                 rx.append([npos[0], capos[0]])
                 ry.append([npos[1], capos[1]])
                 rz.append([npos[2], capos[2]])
                 bx.append([capos[0], cpos[0]])
                 by.append([capos[1], cpos[1]])
                 bz.append([capos[2], cpos[2]])
                 gx.append([cpos[0], opos[0]])
                 gy.append([cpos[1], opos[1]])
                 gz.append([cpos[2], opos[2]])
     for n, line in enumerate(rx):
         x = np.array(line)
         y = np.array(ry[n])
         z = np.array(rz[n])
         parent.ax2.plot(x, y, z, "r-", linewidth=5)
     for n, line in enumerate(bx):
         x = np.array(line)
         y = np.array(by[n])
         z = np.array(bz[n])
         parent.ax2.plot(x, y, z, "b-", linewidth=5)
     for n, line in enumerate(gx):
         x = np.array(line)
         y = np.array(gy[n])
         z = np.array(gz[n])
         parent.ax2.plot(x, y, z, "g-", linewidth=5)
Beispiel #48
0
def Pdb2Gro(pdb_file, gro_file, ch_name):
	from Bio.PDB.PDBParser import PDBParser

	p = PDBParser(PERMISSIVE=1)

	pdb_id = pdb_file 
	if pdb_file[-4:].lower()!=".pdb":
		pdb_file = pdb_file + ".pdb"
	if pdb_id[-4:].lower()==".pdb":
		pdb_id = pdb_id[:-4]
	
	output = gro_file
	
	s = p.get_structure(pdb_id, pdb_file)
	chains = s[0].get_list()
	
	if ch_name=='':
		ch_name = 'A'
	
	for chain in chains:
		if chain.get_id()==ch_name:
			ires = 0
			iatom = 0
			res_name = ""
			atoms = []
			for res in chain:
				is_regular_res = res.has_id('N') and res.has_id('CA') and res.has_id('C')
				res_id = res.get_id()[0]
		                if (res_id ==' ' or res_id =='H_MSE' or res_id =='H_M3L' or res_id=='H_CAS') and is_regular_res:
					ires = ires + 1
					res_name = res.get_resname()
					residue_no = res.get_id()[1]
					for atom in res:
						iatom = iatom + 1
						atom_name = atom.get_name()
						xyz = atom.get_coord()
						
#						residue_no = atom.get_full_id()[3][1]
			                        atoms.append( Atom(iatom, atom_name, residue_no, res_name, xyz) )
	
	out = open(output, 'w')
	out.write(" Structure-Based gro file\n")
	out.write( ("            "+str(len(atoms)))[-12:] )
	out.write("\n")
	for iatom in atoms:
		iatom.write_(out)
	out.close()
Beispiel #49
0
def get_ca(pdbfile):
	p=PDBParser(PERMISSIVE=1)
	ca_atoms = []
	s = p.get_structure(pdbfile,pdbfile)
	chains = s[0].get_list()
	for chain in chains:
        	for res in chain:
               		is_regular_res = res.has_id('CA') and res.has_id('O')
	                res_id = res.get_id()[0]
        	        if (res_id==' ' or res_id=='H_MSE' or res_id=='H_M3L' or res_id=='H_CAS' ) and is_regular_res:
                	        resname = res.get_resname(); 
                                ca_atoms.append(res['CA'].get_coord())
                	else :
                        	print "Pdb file contains irregular residue names or missing CA / O atoms! Fix it and run again! Exit with error."
				print "res_id :", res_id
				sys.exit()
	return ca_atoms
Beispiel #50
0
	def __init__(self, filename):
		
		self.spheredata = ''
		
		E2C = {}
		E2R = {}
		exec elements # Read the color mappings at the bottom of this file
		
		# Read the file
		atoms = []
		parser = PDBParser()
		structure = parser.get_structure('test',filename)
		for model in structure.get_list():
		  for chain in model.get_list():
		    for residue in chain.get_list():
		      for atom in residue.get_list():
						atoms += [atom]
		
		# Look up colors and radius
		spheres = []
		for atom in atoms:
			s = Sphere()
			s.x, s.y, s.z = atom.get_coord()
			element = atom.get_name().strip(string.digits)
			s.radius = E2R[element] if E2R.has_key(element) else 1.5
			color = E2C[element] if E2C.has_key(element) else 0xFF1493
			s.r = (color & 0xff) / 255.0
			s.g = ((color & 0xff00) >> 8) / 255.0
			s.b = ((color & 0xff0000) >> 16) / 255.0
			spheres += [s]
			
			self.spheredata += struct.pack('fff f ffff', s.x,s.y,s.z, s.radius, s.r,s.g,s.b,1.0)
			
			
		self.spheres = spheres

		# Figure out the total radius
		xs, ys, zs = [s.x for s in spheres], [s.y for s in spheres], [s.z for s in spheres]
		dx = max(xs) - min(xs)
		dy = max(ys) - min(ys)
		dz = max(zs) - min(zs)
		self.radius = np.sqrt(dx*dx + dy*dy + dz*dz) / 2 + 1.5
		self.x = (max(xs) + min(xs)) / 2
		self.y = (max(ys) + min(ys)) / 2
		self.z = (max(zs) + min(zs)) / 2
Beispiel #51
0
def renameChain():
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	
	what_chain=raw_input('What is the chain you want to rename : ')
	what_chain2=raw_input('What is the new name of this chain : ')
	
	for model in structure:
		for chain in model:
			if chain.id == what_chain:
				chain.id = what_chain2
				
	w = PDBIO()
	w.set_structure(structure)
	w.save(nameStruct+'_rename.pdb')
Beispiel #52
0
def removeHetero():# Remove all heteroatoms from a pdb and save the new structure in pdbname_noHetero.pdb
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	for model in structure:
		for chain in model:
			for residue in chain:
				id = residue.id				
				if id[0] != ' ':
					chain.detach_child(residue.id)
			if len(chain) == 0:
				model.detach_child(chain.id)

	w = PDBIO()
	w.set_structure(structure)
	w.save(nameStruct+'_noHetero.pdb')
Beispiel #53
0
def deleteResidue():# Delete a residue from a pdb and save the new structure in pdbname_noResidue.pdb
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()

	rm_residue=raw_input('What residue you want to delete : ')
	for model in structure:
		for chain in model:
			for residue in chain:
				print residue.id
				if(residue.id[1]==rm_residue):
					print 'HELLO'
					chain.detach_child(residue.id)

	w = PDBIO()
	w.set_structure(structure)
	w.save(nameStruct+'_noResidue.pdb')
Beispiel #54
0
 def Draw(self, parent, filename):
     p = PDBParser(PERMISSIVE=1)
     structure = p.get_structure('WHYY', filename)
     self.pdbMat = structure.get_list()
     rx = []
     ry = []
     rz = []
     bx = []
     by = []
     bz = []
     gx = []
     gy = []
     gz = []
     for chain in self.pdbMat[0].get_list():
         for residue in chain.get_list():
             for atom in residue.get_list():
                 if atom.get_id()[0][0] not in ["H","W"]:
                     pos = atom.get_coord()
                     if atom.get_name() == 'CA':
                         bx.append(pos[0])
                         by.append(pos[1])
                         bz.append(pos[2])
                     elif atom.get_name() == 'N':
                         rx.append(pos[0])
                         ry.append(pos[1])
                         rz.append(pos[2])
                     elif atom.get_name() == 'O':
                         gx.append(pos[0])
                         gy.append(pos[1])
                         gz.append(pos[2])
     x = np.array(bx)
     y = np.array(by)
     z = np.array(bz)
     parent.ax2.scatter(x, y, z,  zdir='z', marker='o', s=385, c='b')            #385 is the radius of carbon times 5
     x = np.array(rx)
     y = np.array(ry)
     z = np.array(rz)
     parent.ax2.scatter(x, y, z,  zdir='z', marker='o', s=350, c='r')            #350 is the radius of Nitrogen times 5
     x = np.array(gx)
     y = np.array(gy)
     z = np.array(gz)
     parent.ax2.scatter(x, y, z,  zdir='z', marker='o', s=330, c='g')            #330 is the radius of oxygen times 5
Beispiel #55
0
def getSequence(): # Get the sequence of a specific chain
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	seq=''
	
	what_chain=raw_input('For what chain do you want the sequence : ')

	for model in structure:
		for chain in model:
			if chain.id != what_chain:
				model.detach_child(chain.id)

	ppb=PPBuilder()
	for pp in ppb.build_peptides(structure):
		seq = seq + pp.get_sequence()
	seq=seq.upper()
	print seq
Beispiel #56
0
 def Draw(self, parent, filename):
     p = PDBParser(PERMISSIVE=1)
     #structure_id = Rec[1]
     structure = p.get_structure('WHYY', filename)
     self.pdbMat = structure.get_list()
     x = []
     y = []
     z = []
     for chain in self.pdbMat[0].get_list():
         for residue in chain.get_list():
             for atom in residue.get_list():
                 if atom.get_name() == 'CA':
                     pos = atom.get_coord()
                     x.append(pos[0])
                     y.append(pos[1])
                     z.append(pos[2])
     x = np.array(x)
     y = np.array(y)
     z = np.array(z)
     parent.ax2.plot(x,y,z)
Beispiel #57
0
def GetExec():
    Recs = os.listdir(os.getcwd())
    newList = []
    j = 0
    listdata=dict()
    k = 0
    p = PDBParser(PERMISSIVE=1)
    ftime = open('lastChecked.txt','r')
    pT = float(ftime.readline())
    ftime.close()
    f = open('lastChecked.txt','w')
    f.write(str(time.time()))
    f.close()
    while k < len(Recs):
        try:
            (name, ext) = os.path.splitext(Recs[k])
            if ext=='':
                2+2
            elif ext==".pdb":
                f = name + ".pickle"
                newList.append([Recs[k],os.getcwd()])
                if not os.path.isfile(f) or float(fmt.filemtime(Recs[k])) > pT:
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore") 
                        pdbRec = p.get_structure(name, Recs[k])
                    models = pdbRec.get_list()
                    listdata[j] = str(name), len(models), os.getcwd()+'/'+str(name) + str(ext)
                    rHoward = [str(name), len(models), str(name) + str(ext)]
                    mP.spickle(f, rHoward)
                else:
                    rHoward = mP.opickle(f)
                    listdata[j] = str(rHoward[0]), rHoward[1], rHoward[2]
                
                j += 1
                
                    
        except IOError, e:
            print e

    
        k += 1
Beispiel #58
0
def assembleChain(): # Allow to assemble 2 chains together
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()

	what_chain=raw_input('What is the 1st chain you want to assemble : ')
	what_chain2=raw_input('What is the 2nd chain you want to assemble : ')

	for model in structure:
		for chain in model:
			if chain.id == what_chain:
				parent=chain;
			elif chain.id == what_chain2:
				for residue in chain:
					residue.get_parent().id=what_chain

	w = PDBIO()
	w.set_structure(structure)
	w.save(nameStruct+'_assemble.pdb')
    def parsePDBInformation(self, file):
        """
        Parses a single pdb file and counts the residues in
        helices, sheets and the total length of the protein
        """
        helices = []
        helixSequences = []

        f = open(self.dir + "/" + file)
        line = f.readline()
        while line:
            #If HELIX, check for type and add length and positions
            if line.startswith("HELIX"):
                start = int(line[21:25].replace(" " ,""))
                end = int(line[33:37].replace(" ", ""))
                type = int(line[39:40])
                chain = line[19:20].replace(" ", "")
                currentHelix = (start, end, chain)
                if type == 1:
                    helices.append(currentHelix)
                line = f.readline()
            else:
                line = f.readline()
        f.close()

        # Parse the structure with a PDBParser object
        pdbParser = PDBParser()
        structure = pdbParser.get_structure("currentFile", self.dir+"/"+file)

        # For every helix tuple, extract the residues and store them in helixSequences
        for helix in helices:
            if helix[2] == "":
                residues = structure.get_residues()
                helixSequences.append(self.getResiduesFromList(residues, helix[0], helix[1]))
            chains = structure.get_chains()
            for chain in chains:
                if (chain.get_id() == helix[2]):
                    helixSequences.append(self.getResiduesFromChain(chain, helix[0], helix[1]))
        return helixSequences
Beispiel #60
-1
def parse_atoms_infile(filename):
	'''
	Parse a PDB file and return atom list.\n
	parse_atoms_infile(filename):\n
	File needs to be a PDB file format (*.ent or *.pdb)
	'''
	p = PDBParser(QUIET=True)
	s = p.get_structure("X", filename)
	atom_list = [atom for atom in s.get_atoms() if atom.name == 'CB']
	return atom_list