Example #1
0
def read_pdb_file_content(input_type, input_value):
    """
    Read the content of a PDB file either from a local path or via fetching the file from
    the PDB webserver.

    Parameters
    ----------
    input_type : str
        Either 'pdb_code' or 'pdb_filepath'.
    input_value : str
        Either a valid PDB code, or a local filepath of a PDB file.

    Returns
    -------
    str
        Content of the PDB file as a single string.
    """
    if input_type == "pdb_code":
        pdb_file_content = pypdb.get_pdb_file(input_value)
    elif input_type == "pdb_filepath":
        with open(input_value) as f:
            pdb_file_content = f.read()
    else:
        raise ValueError(f"Provided input type {input_type} not valid. Accepted values are 'pdb_code' and 'pdb_filepath'.")
    return pdb_file_content
Example #2
0
def readPDB(id, property_map={}):
    """Read a molecular system from a Protein Data Bank (PDBP) ID in the RSCB PDB
       website.

       Parameters
       ----------

       id : str
           The PDB ID string.

       property_map : dict
           A dictionary that maps system "properties" to their user defined
           values. This allows the user to refer to properties with their
           own naming scheme, e.g. { "charge" : "my-charge" }

       Returns
       -------

       system : :class:`System <BioSimSpace._SireWrappers.System>`
           A molecular system.

       Examples
       --------

       Create a molecular system from the deoxy human haemoglobin Protein
       Data Bank (PDB) record.

       >>> import BioSimSpace as BSS
       >>> system = BSS.readPDB("1a3n")
    """

    if not _has_pypdb:
        _warn("BioSimSpace.IO: PyPDB could not be imported on this system.")
        return None

    if type(id) is not str:
        raise TypeError("'id' must be of type 'str'")

    # Strip any whitespace from the PDB ID and convert to upper case.
    id = id.replace(" ", "").upper()

    # Create a temporary directory to write the PDB file.
    tmp_dir = _tempfile.TemporaryDirectory()

    # Attempt to download the PDB file. (Compression is currently broken!)
    try:
        pdb_string = _pypdb.get_pdb_file(id, filetype="pdb", compression=False)
    except:
        raise IOError("Invalid PDB ID: '%s'" % id)

    # Create the name of the PDB file.
    pdb_file = "%s/%s.pdb" % (tmp_dir.name, id)

    # Now write the PDB string to file.
    with open(pdb_file, "w") as file:
        file.write(pdb_string)

    # Read the file and return a molecular system.
    return readMolecules(pdb_file, property_map)
def get_pdb(id, cache):
    if len(id) > 4:
        return get_chain_pdb(id[:4], id[4:], cache)
    cache_path = './cache/data/' + id + '.pdb'
    if os.path.isfile(cache_path):
        return cache_path
    if len(id) != 4:
        print("BAD ID:", id)
    if cache:
        print("CACHE MISS WHEN REQUIRED: ", id)
    with open(cache_path + ".gz", 'wb') as file:
        file.write(pb.get_pdb_file(id, compression=True))
    console("unpigz -f -q " + cache_path + ".gz")
    return cache_path
Example #4
0
def fill_proteins_sequences(proteins_list, folder_path, special_folder):

    for protein in proteins_list:
        pdb_structure = get_pdb_file(protein.id,
                                     filetype='pdb',
                                     compression=False)

        if not os.path.exists(folder_path + '/' + special_folder):
            os.makedirs(folder_path + '/' + special_folder)

        file = write_to_file(str(protein.id), "pdb", pdb_structure,
                             folder_path + "/" + special_folder)
        protein.sequence = str(extract_sequence(file.name))

    return proteins_list
Example #5
0
def get_pdb(
):  # функция извлекает pdb файлы для каждого pdb_id, содержащегося в k00
    conn = sqlite3.connect('test_proteins2.db')
    c = conn.cursor()
    first_k = c.execute(
        "SELECT s_m_title,s_bioluminate_Antigen_Type, s_bioluminate_Antigen_Chain, s_bioluminate_Antigen_Seq FROM 'PrimeStructureDB_DataTable'"
    )
    k = c.fetchall()
    k = [x for x in k if x[1] is not None]
    k02 = [k0[2] for k0 in k]  # символ цепи, к которой относится антиген
    k00 = [k0[0] for k0 in k]  # pdb_id из test_proteins2.db
    pdbs = [
        pypdb.get_pdb_file(k_i, filetype='pdb', compression=False).split('\n')
        for k_i in k00[:4]
    ]  # проверила на первых 4 шт.
    return pdbs
Example #6
0
def fetch_and_save_pdb_file(pdb_code, output_filepath):
    """
    Fetch a PDB file from the PDB webserver and save locally.

    Parameters
    ----------
    pdb_code : str
        PDB code of the protein structure.
    output_filepath : str or pathlib.Path
        Local filepath (including filename) to save the PDB file in.

    Returns
    -------
    pathlib.Path
        The full path of the saved PDB file.
    """
    pdb_file_content = pypdb.get_pdb_file(pdb_code)
    full_filepath = Path(output_filepath).with_suffix(".pdb")
    with open(full_filepath, "w") as f:
        f.write(pdb_file_content)
    return full_filepath
Example #7
0
def download_pdb(pdbid, file_pathway):
    """

    Args:
        pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix
        file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written

    Returns: nothing, but it does write the PDB file

    ***Note: this function does NOT fix any mistakes with the PDB file

    """

    if not os.path.exists(file_pathway):
        os.makedirs(file_pathway)

    if bunit == True:
        pdb = get_pdb_biological_unit(pdbid)

    else:
        pdb = pypdb.get_pdb_file(pdbid)

    write_file(os.path.join(file_pathway, '%s.pdb' % pdbid), pdb)
Example #8
0
    def getPDB(self):
        """
        Downloads the PDB file from the Protein Data Bank.

        Returns
        -------
        pdb : str
            Returns the absolute path to the PDB file downloaded from the Protein Data Bank.
        """
        if self._pdb: return self._pdb
        try:
            pdb_string = _pypdb.get_pdb_file(self._code,
                                             filetype="pdb",
                                             compression=False)
        except:
            raise ValueError("Invalid PDB code: '%s'" % self._code)

        filename = _os.path.abspath(self._code + ".pdb")
        with open(filename, "w") as file:
            file.write(pdb_string)

        self._pdb = filename
        return self._pdb
Example #9
0
def process_input_pdb(request):
    import pypdb
    import re

    pdb_file = None
    try:
        pdb_file = request.FILES['pdb_file']
    except:
        pass
    pdb_search = request.POST.get('pdb_search')
    pdb = None
    chains = []
    if (pdb_file):
        pdb = pdb_file.read().decode('utf-8')
    if (pdb_search):
        pdb = pypdb.get_pdb_file(pdb_search)

    if (pdb):
        chains = get_chains(pdb)
    data = {
        'chains': chains
    }
    return JsonResponse(data)
Example #10
0
    def _down(pdb, outdir):

        check, niter = True, 0
        while check and niter < 10:
            try:
                data = pypdb.get_pdb_file(pdb,
                                          filetype='pdb',
                                          compression=False)
                check = False
            except:
                print('PDBDown -> Issue with :', pdb)
                print('Trying again in 5 sec')
                niter += 1
                time.sleep(5)
        if check:
            print('PDBdown -> Could not download ', pdb)
            return False
        else:
            fname = os.path.join(outdir, pdb)
            fname += '.pdb'
            f = open(fname, 'w')
            f.write(data)
            f.close()
            return True
    return structureList


ignoreFileList = ["cryoem_n_glycosylated.csv", "xray_n_glycosylated.csv"]
ignoreFileList = [""]

rootDir = os.getcwd()
structureListDir = os.path.join(rootDir, "structures")


for structureListFile in os.listdir(structureListDir):
    if structureListFile.endswith(".csv") and structureListFile not in ignoreFileList:
        print(structureListFile)
        structureListFileName = os.path.splitext(os.path.basename(structureListFile))[0]
        outputDir = os.path.join(structureListDir, structureListFileName)
        CreateFolder(outputDir)
        currentStructureList = GenerateListOfStructures(structureListDir, structureListFile)
        for count, pdbID in enumerate(currentStructureList):
            print(f'Currently downloading: {pdbID}\nProgress: {count} out of {len(currentStructureList)}\t Progress - {int((count/len(currentStructureList)*100))}%')
            newFileName = pdbID + '.pdb'
            currentPDB = pdb.get_pdb_file(pdbID, filetype='pdb', compression=False)
            WriteFile(outputDir, newFileName, currentPDB)
       







Example #12
0
def get_pdb(ID):
    pdb_data = pypdb.get_pdb_file(ID, filetype='cif', compression=False)
    return pdb_data
Example #13
0
def _fetch_from_rcsb(pdb_id):
    pdb_data = pypdb.get_pdb_file(pdb_id, filetype="pdb")
    if pdb_data is None:
        raise PDBAddError(f"no PDB with ID '{pdb_id}' found")

    return pdb_data.encode()
Example #14
0
import pypdb
import os

pdbs = ['1ubq']
for p in pdbs:
    pdb_file = pypdb.get_pdb_file(p)
    with open('{}.pdb'.format(p), 'w') as f:
        f.write(pdb_file)
    os.system("mkdir {}".format(p))
    os.system("mkdir {}/input".format(p))
    os.system("mv {}.pdb {}/input".format(p, p))
    print("{} done".format(p))
Example #15
0
def submit(request):
    import subprocess
    import os
    import re
    from . import helpers
    from .forms import JobForm
    import pypdb
    import shutil

    pdb_search = request.POST['pdb_search']

    pdb_file = None
    pdb_filename = None
    available_chains = []

    
    def run_script(processed_pdb_filename, job_dir, orig_bin_dir, archive_name):
        [os.symlink(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', orig_bin_dir, f),
                    os.path.join(job_dir, f)) for f in os.listdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', orig_bin_dir))]

        with helpers.change_workingdir(job_dir):
            shutil.copy(os.path.join('..', processed_pdb_filename), job_dir)

            if job.ph_range:
                subprocess.Popen(['./run_pdg-ph.sh', processed_pdb_filename.split('.')[0], 'MC', archive_name], shell=False)
            else:
                subprocess.Popen(['./run_pdg.sh', str(temperature), str(ph),
                                  processed_pdb_filename, archive_name], shell=False)

    if (pdb_search):
        pdb = pypdb.get_pdb_file(pdb_search)
        pdb_filename = pdb_search + '.pdb'
    else:
        pdb_file = None
        try:
            pdb_file = request.FILES['pdb_file']
        except:
            pass
        if (pdb_file):
            pdb_filename = re.sub('[^0-9a-zA-Z.]+', '_', pdb_file.name)
            pdb = pdb_file.read().decode('utf-8')

    available_chains = [[x,x] for x in get_chains(pdb)]

    form = JobForm(available_chains, request.POST, request.FILES)
    if form.is_valid():
        name = form.cleaned_data['name']
        temperature = form.cleaned_data['temperature']
        ph = form.cleaned_data['ph']
        ph_range = form.cleaned_data['ph_range']
        email = form.cleaned_data['email']
        chain = form.cleaned_data['chain']
        tksamc_version = int(form.cleaned_data['tksamc_version'])

        job = Job(name=name, ph=ph, ph_range=ph_range,
                  temperature=temperature, email=email, chain=chain, tksamc_version=tksamc_version)
        job.save()
        job_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../media/jobs/', str(job.id))
        os.makedirs(job_dir)
        
        tksamc_job_dir = None
        gtksamc_job_dir = None
        if tksamc_version == 1 or tksamc_version == 0:
            tksamc_job_dir = os.path.join(job_dir, 'tksamc')
            os.makedirs(tksamc_job_dir)
        if tksamc_version == 2 or tksamc_version == 0:
            gtksamc_job_dir = os.path.join(job_dir, 'gtksamc')
            os.makedirs(gtksamc_job_dir)

        if (not chain):
            chain = ['^\s']

        with open(os.path.join(job_dir, pdb_filename), 'w') as destination:
            new_pdb = '\n'.join(re.findall(r'^ATOM\s+(?:[^\s]+\s+){3}[%s]\s+.*' % ('|'.join(chain)), pdb, re.MULTILINE))
            destination.write(new_pdb)
        
        with helpers.change_workingdir(job_dir):
            subprocess.check_output(['/bin/sed', '-i', 's/AALA/ ALA/g;s/ACYS/ CYS/g;s/AASP/ ASP/g;s/AGLU/ GLU/g;s/APHE/ PHE/g;s/AGLY/ GLY/g;s/AHIS/ HIS/g;s/AILE/ ILE/g;s/ALYS/ LYS/g;s/ALEU/ LEU/g;s/AMET/ MET/g;s/AASN/ ASN/g;s/APRO/ PRO/g;s/AGLN/ GLN/g;s/AARG/ ARG/g;s/ASER/ SER/g;s/ATHR/ THR/g;s/AVAL/ VAL/g;s/ATRP/ TRP/g;s/ATYR/ TYR/g', pdb_filename], shell=False)
            subprocess.check_output(['/bin/sed', '-i', '/BALA/d;/BCYS/d;/BASP/d;/BGLU/d;/BPHE/d;/BGLY/d;/BHIS/d;/BILE/d;/BLYS/d;/BLEU/d;/BMET/d;/BASN/d;/BPRO/d;/BGLN/d;/BARG/d;/BSER/d;/BTHR/d;/BVAL/d;/BTRP/d;/BTYR/d', pdb_filename], shell=False)
            subprocess.check_output(['/usr/bin/gmx', 'editconf', '-f', pdb_filename, '-c', '-resnr',
                                '1', '-label', 'A', '-o', 'processed_{0}'.format(pdb_filename)], shell=False)
        
        if job.name != '':
            base_archive_name = job.name
        else:
            base_archive_name = str(job.id)
        
        if tksamc_job_dir:
            run_script('processed_{0}'.format(pdb_filename),
            tksamc_job_dir, 'tksamc_bin', 'tksamc_' + base_archive_name)
        if gtksamc_job_dir:
            run_script('processed_{0}'.format(pdb_filename),
            gtksamc_job_dir, 'gtksamc_bin', 'gtksamc_' + base_archive_name)
        

        if email != '':
            job_url = request.build_absolute_uri(
                reverse('check_job', args=[job.id]))
            helpers.send_email(email, job.name, job_url)

        return HttpResponseRedirect(reverse('check_job', args=[job.id]))
    else:
        return render(request, 'main/index.html', {'form': form, 'nav': 'home'})