Beispiel #1
0
def write_PDB(entity, file, pdbid=None, chainid=None):
    """Write PDB file with HEADER and TITLE."""
    with as_handle(file, 'w') as fp:
        try:
            if 'S' == entity.level:
                if not pdbid:
                    pdbid = entity.header.get('idcode', None)
                hdr = entity.header.get('head', None)
                dd = entity.header.get('deposition_date', None)
                if hdr:
                    fp.write(('HEADER    {:40}{:8}   {:4}\n'
                              ).format(hdr.upper(), (dd or ''), (pdbid or '')))
                nam = entity.header.get('name', None)
                if nam:
                    fp.write('TITLE     ' + nam.upper() + '\n')
                io = PDBIO()
                io.set_structure(entity)
                io.save(fp)

            else:
                raise PDBException("level not 'S': "
                                   + str(entity.level))
        except KeyError:
            raise Exception(
                "write_PIC: argument is not a Biopython PDB Entity "
                + str(entity))
Beispiel #2
0
 def scwrl(self, altseq):
     """ Repacks sidechains using SCWRL4 and returns a copy """
     io = PDBIO()
     seqfname = "temp/%d.txt" % multidigit_rand(10)
     with open(seqfname, 'wb') as seqfile:
         structfile = "temp/%d.pdb" % multidigit_rand(10)
         seqfile.write(altseq)
         scwrlfile = structfile + ".scwrl"
         io.set_structure(self.structure)
         io.save(structfile)
     cmd = [
         "scwrl", "-0", "-i", structfile, '-s', seqfname, '-o', scwrlfile
     ]
     print "\n%s" % ' '.join(cmd)
     sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE).communicate()
     p = PDBParser()
     with open(scwrlfile, 'rb') as fin:
         filterwarnings('ignore', category=PDBConstructionWarning)
         s = p.get_structure(self.id, scwrlfile)
         resetwarnings()
     s = PDBMapStructure(s, pdb2pose={}, refseq=self.refseq)
     os.remove(structfile)
     os.remove(scwrlfile)
     os.remove(seqfname)
     return s
Beispiel #3
0
    def test_pdbio_write_pqr_structure(self):
        """Write a full structure using PDBIO."""
        # Create a PDBIO object in pqr mode with example_structure as an argument
        io = PDBIO(is_pqr=True)
        io.set_structure(self.example_structure)

        # Write to a temporary file
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            # Export example_structure to a temp file
            io.save(filename)

            # Parse exported structure
            output_struct = self.pqr_parser.get_structure("1a8o", filename)

            # Comparisons
            self.assertEqual(len(output_struct),
                             len(self.example_structure))  # Structure Length

            original_residues = len(list(
                self.example_structure.get_residues()))
            parsed_residues = len(list(output_struct.get_residues()))
            self.assertEqual(parsed_residues,
                             original_residues)  # Number of Residues

            # Atom-wise comparison
            original_atoms = self.example_structure.get_atoms()
            for atom in output_struct.get_atoms():
                self.assertEqual(atom, next(original_atoms))

        finally:
            os.remove(filename)
Beispiel #4
0
    def cut_7_helix(self, source_pdb, target_folder, offset=0):
        io = PDBIO()
        parser = PDBParser(QUIET=True)

        code = source_pdb[-8:-4]
        struct = parser.get_structure(code, source_pdb)

        plane = list(struct[0][' '].get_residues())

        try:
            up_plane = plane[1]['O'].get_coord()[2] + offset
        except KeyError:
            up_plane = plane[0]['O'].get_coord()[2] + offset

        try:
            down_plane = plane[0]['N'].get_coord()[2] - offset
        except KeyError:
            down_plane = plane[1]['N'].get_coord()[2] - offset

        chain_id = self.get_TM_chain(list(struct[0]), up_plane, down_plane)
        chain = struct[0][chain_id]

        io.set_structure(struct)
        io.save(target_folder + code + '_tm.pdb',
                self.TMSelect(up_plane, down_plane, chain))
def main(pdbfile, scheme, outfile):
    pdb_io = PDBIO()
    parser = PDBParser()
    structure = parser.get_structure('self', pdbfile)
    model = structure[0]
    chain = ' '
    anarci_dict = {}
    fastafile = Path(pdbfile).stem + '.fasta'
    with open(fastafile, 'w+') as ff:
        subprocess.run(['pdb_tofasta', '-multi', pdbfile], stdout=ff)
    out = subprocess.run(['anarci', '-i', fastafile, '--scheme', scheme],
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT)
    output = out.stdout.decode("utf-8").splitlines()
    for line in output:
        l = line.strip()
        li = line.split()
        if line.startswith('#'):
            if re.search("PDB", l):
                ch = l.split('|')
                chain = ch[1]
                anarci_dict[chain] = []
        elif line.startswith(chain):
            if len(li) == 3:
                if li[2] == '-':
                    continue
                else:
                    ch = li[0]
                    resseq = int(li[1])
                    inscode = ' '
                    residue = li[2]
                    het = ' '
                    tuple = (het, resseq, inscode)
                    anarci_dict[chain].append(tuple)
            elif len(li) == 4:
                ch = li[0]
                resseq = int(li[1])
                inscode = str(li[2])
                residue = li[3]
                het = ' '
                tuple = (het, resseq, inscode)
                anarci_dict[chain].append(tuple)
        else:
            continue

    for residue in structure.get_residues():
        residue.id = (' ', residue.id[1] + 900, residue.id[2])

    for d in anarci_dict:
        count = 0
        for i, residue in enumerate(model[d]):
            if i < len(anarci_dict[d]):
                residue.id = anarci_dict[d][i]
                count = residue.id[1]
            else:
                count = count + 1
                residue.id = (' ', count, ' ')

    pdb_io.set_structure(structure)
    pdb_io.save(outfile)
Beispiel #6
0
def splitOnePDB(fname, outPath):

  try:
    s= parser.get_structure(fname, fname)
  except Exception:
    print ("Error loading pdb")
    return 0
  banLenChains=[]    
  try:
    for chain in s[0]:
      badResInChain=0
      for res in  chain.get_list():
        if not is_aa(res,standard=True):
          badResInChain+=1
      chainLen= sum(1 for res in chain if "CA" in res) - badResInChain
      if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN:
        print(chainLen)
        banLenChains.append(chain.get_id())
  except KeyError:
    print ("Not good model")
    return 0  
  for badChainId in banLenChains:
    s[0].detach_child(badChainId)

  receptorChainList= []
  ligandChainList= []
  if len( s[0].get_list())<2:
    print(s)
    print( s[0].get_list())
    print("Not enough good chains")
    return 0
  for chain1 in s[0]:

    tmpReceptorList=[]
    for chain2 in s[0]:
      if chain1!= chain2:
        tmpReceptorList.append(chain2)
    if len(tmpReceptorList)>1 or not tmpReceptorList[0] in ligandChainList:   
      ligandChainList.append(chain1)
      receptorChainList.append(tmpReceptorList)
    
  prefix= os.path.basename(fname).split(".")[0]
  for i, (ligandChain, receptorChains) in enumerate(zip(ligandChainList, receptorChainList)):
    io=PDBIO()
    ligandStruct= Structure(prefix+"ligand")
    ligandStruct.add(Model(0))
    ligandChain.set_parent(ligandStruct[0])
    ligandStruct[0].add(ligandChain)
    io.set_structure(ligandStruct)
    io.save(os.path.join(outPath,prefix+"-"+str(i)+"_l_u.pdb"))

    io=PDBIO()
    receptorStruct= Structure(prefix+"receptor")
    receptorStruct.add(Model(0))
    for receptorChain in receptorChains:
      receptorChain.set_parent(receptorStruct[0])    
      receptorStruct[0].add(receptorChain)
    io.set_structure(receptorStruct)
    io.save(os.path.join(outPath,prefix+"-"+str(i)+"_r_u.pdb"))
    print( "ligand:", ligandChain, "receptor:",receptorChains )
Beispiel #7
0
def write_PDB(entity: Structure,
              file: str,
              pdbid: str = None,
              chainid: str = None) -> None:
    """Write PDB file with HEADER and TITLE."""
    enumerate_atoms(entity)
    with as_handle(file, "w") as fp:
        try:
            if "S" == entity.level:
                if hasattr(entity, "header"):
                    if not pdbid:
                        pdbid = entity.header.get("idcode", None)
                    hdr = entity.header.get("head", None)
                    dd = pdb_date(entity.header.get("deposition_date", None))

                    if hdr:
                        fp.write(("HEADER    {:40}{:8}   {:4}\n").format(
                            hdr.upper(), (dd or ""), (pdbid or "")))
                    nam = entity.header.get("name", None)
                    if nam:
                        fp.write("TITLE     " + nam.upper() + "\n")
                io = PDBIO()
                io.set_structure(entity)
                io.save(fp, preserve_atom_numbering=True)

            else:
                raise PDBException("level not 'S': " + str(entity.level))
        except KeyError:
            raise Exception(
                "write_PIC: argument is not a Biopython PDB Entity " +
                str(entity))
Beispiel #8
0
 def collect_1(self, checkboard = []):
     def getChains(s):
         ret = s.split('___')[:2]
         assert(len(ret) == 2)
         return ret
     parser = PDBParser()
     io = PDBIO()
     for f in self.files:
         if not checkboard:
             break
         if f not in checkboard:
             continue
         try:
             os.mkdir(os.path.join( self.outpath, f))
         except OSError as e:
             if e.errno != errno.EEXIST:
                 raise
         structure = parser.get_structure(f, os.path.join(self.inpath, f))
         chain_A, chain_B = getChains(f)
         io.set_structure(structure[0]['A'])
         io.save(os.path.join(self.outpath,f,chain_A + '.pdb'))
         io.set_structure(structure[0]['B'])
         io.save(os.path.join(self.outpath,f,chain_B + '.pdb'))
         #make this module can be reuse to other application
         self.then_do(f)
         #remove the finished file from checkboard
         checkboard.remove(f)
 def CreatePDB(self, coordArray, fPath, ofile):
     sloppyparser = PDBParser(PERMISSIVE=True, QUIET=True)
     structure = sloppyparser.get_structure("MD_system", fPath)
     print("\nGenerating PDB file...")
     sb = StructureBuilder()
     sb.set_header(structure.header)
     # Iterate through models
     for i in range(len(list(structure.get_models()))):
         # Iterate through chains
         models = list(structure.get_models())
         counter = 0
         for j in range(len(list(models[i].get_chains()))):
             chains = list(models[i].get_chains())
             #Iterate thgouth residues
             for k in range(len(list(chains[j].get_residues()))):
                 #Iterate through
                 residues = list(chains[j].get_residues())
                 for l in range(len(list(residues[k].get_atoms()))):
                     #Set coord for each
                     for atom in structure[i][chains[j].id][
                             residues[k].id].get_atoms():
                         structure[i][chains[j].id][residues[k].id][
                             atom.id].set_coord(
                                 np.array((float(coordArray[counter][0]),
                                           float(coordArray[counter][1]),
                                           float(coordArray[counter][2]))))
                         #print(structure[i][chains[j].id][residues[k].id][atom.id].get_vector())
                     counter += 1
     io = PDBIO()
     io.set_structure(structure)
     io.save(ofile)
     print("Transform file written to: " + ofile)
def run_naccess(model,
                pdb_file,
                probe_size=None,
                z_slice=None,
                naccess='naccess',
                temp_path='/tmp/'):

    # make temp directory;
    tmp_path = tempfile.mkdtemp(dir=temp_path)

    # file name must end with '.pdb' to work with NACCESS
    # -> create temp file of existing pdb
    #    or write model to temp file
    handle, tmp_pdb_file = tempfile.mkstemp('.pdb', dir=tmp_path)
    os.close(handle)
    if pdb_file:
        pdb_file = os.path.abspath(pdb_file)
        shutil.copy(pdb_file, tmp_pdb_file)
    else:
        writer = PDBIO()
        writer.set_structure(model.get_parent())
        writer.save(tmp_pdb_file)

    # chdir to temp directory, as NACCESS writes to current working directory
    old_dir = os.getcwd()
    os.chdir(tmp_path)

    # create the command line and run
    # catch standard out & err
    command = [naccess, tmp_pdb_file]
    if probe_size:
        command.extend(['-p', probe_size])
    if z_slice:
        command.extend(['-z', z_slice])

    p = subprocess.Popen(command,
                         universal_newlines=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    out, err = p.communicate()
    os.chdir(old_dir)

    rsa_file = tmp_pdb_file[:-4] + '.rsa'
    asa_file = tmp_pdb_file[:-4] + '.asa'
    # Alert user for errors
    if err.strip():
        warnings.warn(err)

    if (not os.path.exists(rsa_file)) or (not os.path.exists(asa_file)):
        raise Exception('NACCESS did not execute or finish properly.')

    # get the output, then delete the temp directory
    with open(rsa_file) as rf:
        rsa_data = rf.readlines()
    with open(asa_file) as af:
        asa_data = af.readlines()

    # shutil.rmtree(tmp_path, ignore_errors=True)
    return rsa_data, asa_data
Beispiel #11
0
def extract(structure, chain_id, start, end, filename):
    """
    Write out selected portion to filename.
    """
    sel = ChainSelector(chain_id, start, end)
    io = PDBIO()
    io.set_structure(structure)
    io.save(filename, sel)
Beispiel #12
0
def save_chain_to(chain, filename: str):
    from Bio.PDB.PDBIO import PDBIO
    io = PDBIO()
    # io.set_structure(chain.get_bio_chain())
    structure = Structure(filename)
    structure.add(chain)
    io.set_structure(structure)
    io.save(filename)
Beispiel #13
0
def extract(structure, chain_id, start, end, filename):
    """
    Write out selected portion to filename.
    """
    sel = ChainSelector(chain_id, start, end)
    io = PDBIO()
    io.set_structure(structure)
    io.save(filename, sel)
Beispiel #14
0
def writeFileBioPDB(struct,fn):
	io = PDBIO()
	io.set_structure(struct)
	try:	
		io.save(fn)
	except Exception, e:
		print " "+str(e)
		sys.exit(1)
def splitOnePDB(fname, chainIdL, chainIdR, outPath):
    print(os.path.basename(fname))
    try:
        s = parser.get_structure(os.path.basename(fname), fname)
    except Exception:
        print("Error loading pdb")
        return 0

    banLenChains = []
    try:
        for chain in s[0]:
            badResInChain = 0
            for res in chain.get_list():
                if not is_aa(res, standard=True) and res.resname != "HOH":
                    badResInChain += 1
            # for res in chain: print(res)
            chainLen = sum(1 for res in chain if "CA" in res) - badResInChain
            if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN:
                print(chain, chainLen)
                banLenChains.append(chain.get_id())
    except KeyError:
        print("Not good model")
        return 0

    # print(banLenChains)
    if len(s[0].get_list()) - len(banLenChains) < 2:
        print(s)
        print(s[0].get_list())
        print("Not enough good chains")
        return 0

    ligandChains, receptorChains = findNeigChains(s, chainIdL, chainIdR)
    print("ligand:", ligandChains, "receptor:", receptorChains)

    prefix = os.path.basename(fname).split(".")[0]

    io = PDBIO()
    ligandStruct = Structure(prefix + "ligand")
    ligandStruct.add(Model(0))

    for ligandChain in ligandChains:
        ligandChain.set_parent(ligandStruct[0])
        ligandStruct[0].add(ligandChain)
    io.set_structure(ligandStruct)
    io.save(
        os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_l_u.pdb"))

    io = PDBIO()
    receptorStruct = Structure(prefix + "receptor")
    receptorStruct.add(Model(0))
    for receptorChain in receptorChains:
        receptorChain.set_parent(receptorStruct[0])
        receptorStruct[0].add(receptorChain)
    io.set_structure(receptorStruct)
    io.save(
        os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_r_u.pdb"))
Beispiel #16
0
def filter_pdb(input_path, output_path, chain):
    """
    Filter a PDB file to the chain of interest
    """
    pdb_name = Path(input_path).stem
    pdb_parser = PDBParser()
    structure = pdb_parser.get_structure(pdb_name, input_path)
    pdbio = PDBIO()
    pdbio.set_structure(structure)
    pdbio.save(output_path, select=ChainSelect(chain))
Beispiel #17
0
def export_structure(structure, name, format):
    """
    Writes the strucuture into a file. The file can be either a pdb or a mmcif.
    """
    if format == "pdb":
        io = PDBIO()
    elif format == "cif":
        io = pdb.MMCIFIO()
    io.set_structure(structure)
    io.save(name)
Beispiel #18
0
 def pose(self):
   """ Loads the PDBMapStructure as a Rosetta::Pose object """
   import_rosetta()
   io = PDBIO()
   io.set_structure(self.structure)
   with tempfile.NamedTemporaryFile('wrb',suffix='.pdb',delete=False) as tf:
     io.save(tf.name)
   pose = rosetta.Pose()
   rosetta.pose_from_pdb(pose,tf.name)
   os.remove(tf.name)
   return pose
def get_pdb_string(pdb_path, select=None):
    """
    Get string representation of a PDB file, filtered using select as in Bio.PDB.PDBIO
    """
    pdb_parser = PDBParser()
    structure = pdb_parser.get_structure('_', pdb_path)

    pdbio = PDBIO()
    pdbio.set_structure(structure)
    with StringIO('PDB') as virtual_pdb_file:
        pdbio.save(virtual_pdb_file, select=select)
        return virtual_pdb_file.getvalue()
Beispiel #20
0
def dssp_dict_from_structure(structure, dssp="dsspcmbi", id=None):
    """
    In keeping with the spirit of functional programming...
    Creates a DSSP dictionary from more than just a filename.
    Pipes the structure into a dssp process if you send it one.
    @type structure: filename (string), Bio.PDB.Entity, hpf.hddb.db.Structure
    @return (Bio.PDB.Entity, dssp, keys)
    """
    from subprocess import Popen, PIPE
    from cStringIO import StringIO

    dssp = Popen("which %s" % dssp, shell=True, stdout=PIPE).communicate()[0].strip()
    assert os.path.exists(dssp)
    # If we have a structure object we will pipe it into DSSP
    # otherwise it will be loaded and parsed.
    if isinstance(structure, basestring):
        pipe = None
        dssp = "%s %s" % (dssp, structure)
        from Bio.PDB import PDBParser

        structure = PDBParser().get_structure(id if id else structure, structure)
    else:
        # We write to a buffer which will be piped into the DSSP stdin
        # using pipe.getvalue(), the dssp stdin option must be added
        dssp = "%s --" % dssp
        import tempfile

        pipe = tempfile.NamedTemporaryFile("w")
        from Bio.PDB.Structure import Entity, Structure as BioStructure

        if isinstance(structure, Entity):
            s = structure
            while s.get_parent() != None:
                s = s.get_parent()
            from Bio.PDB.PDBIO import PDBIO

            io = PDBIO()
            io.set_structure(s)
            io.save(pipe.name)
        else:
            # Otherwise this is of type hpf.hddb.db.Structure
            # avoid importing the class and mapped meta table stuff
            with pipe as handle:
                pipe.write(structure.text)
            structure = structure.structure

    # execute the process, piping data if necessary
    # print pipe.getvalue()
    out, err = Popen(dssp, shell=True, stdin=pipe if pipe else None, stdout=PIPE, stderr=PIPE).communicate()
    io = StringIO()
    io.write(out)
    io.seek(0)
    return (structure,) + make_dssp_dict(io)
Beispiel #21
0
def run_naccess(model, pdb_file, probe_size=None, z_slice=None,
                naccess='naccess', temp_path='/tmp/'):

    # make temp directory;
    tmp_path = tempfile.mkdtemp(dir=temp_path)

    # file name must end with '.pdb' to work with NACCESS
    # -> create temp file of existing pdb
    #    or write model to temp file
    handle, tmp_pdb_file = tempfile.mkstemp('.pdb', dir=tmp_path)
    os.close(handle)
    if pdb_file:
        pdb_file = os.path.abspath(pdb_file)
        shutil.copy(pdb_file, tmp_pdb_file)
    else:
        writer = PDBIO()
        writer.set_structure(model.get_parent())
        writer.save(tmp_pdb_file)

    # chdir to temp directory, as NACCESS writes to current working directory
    old_dir = os.getcwd()
    os.chdir(tmp_path)

    # create the command line and run
    # catch standard out & err
    command = [naccess, tmp_pdb_file]
    if probe_size:
        command.extend(['-p', probe_size])
    if z_slice:
        command.extend(['-z', z_slice])

    p = subprocess.Popen(command, universal_newlines=True,
                         stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = p.communicate()
    os.chdir(old_dir)

    rsa_file = tmp_pdb_file[:-4] + '.rsa'
    asa_file = tmp_pdb_file[:-4] + '.asa'
    # Alert user for errors
    if err.strip():
        warnings.warn(err)

    if (not os.path.exists(rsa_file)) or (not os.path.exists(asa_file)):
        raise Exception('NACCESS did not execute or finish properly.')

    # get the output, then delete the temp directory
    with open(rsa_file) as rf:
        rsa_data = rf.readlines()
    with open(asa_file) as af:
        asa_data = af.readlines()

    # shutil.rmtree(tmp_path, ignore_errors=True)
    return rsa_data, asa_data
Beispiel #22
0
def write_pdb(structure,
              file_name,
              selector=None,
              preserve_atom_numbering=False):
    """ Write a PDB file from a given structure with a given file_name. Optionally, write specific atoms with selector and preserve atom numbering. """
    writer = PDBIO()
    writer.set_structure(structure)
    if selector is None:
        writer.save(file_name, preserve_atom_numbering=preserve_atom_numbering)
    else:
        writer.save(file_name,
                    selector,
                    preserve_atom_numbering=preserve_atom_numbering)
Beispiel #23
0
def run_naccess(model,
                pdb_file,
                probe_size=None,
                z_slice=None,
                naccess='naccess',
                temp_path='/tmp/'):

    # make temp directory; chdir to temp directory,
    # as NACCESS writes to current working directory
    tmp_path = tempfile.mktemp(dir=temp_path)
    os.mkdir(tmp_path)
    old_dir = os.getcwd()
    os.chdir(tmp_path)

    # file name must end with '.pdb' to work with NACCESS
    # -> create temp file of existing pdb
    #    or write model to temp file
    tmp_pdb_file = tempfile.mktemp('.pdb', dir=tmp_path)
    if pdb_file:
        os.system('cp %s %s' % (pdb_file, tmp_pdb_file))
    else:
        writer = PDBIO()
        writer.set_structure(model.get_parent())
        writer.save(tmp_pdb_file)

    # create the command line and run
    # catch standard out & err
    command = '%s %s ' % (naccess, tmp_pdb_file)
    if probe_size:
        command += '-p %s ' % probe_size
    if z_slice:
        command += '-z %s ' % z_slice
    in_, out, err = os.popen3(command)
    in_.close()
    stdout = out.readlines()
    out.close()
    stderr = err.readlines()
    err.close()

    # get the output, then delete the temp directory
    rsa_file = tmp_pdb_file[:-4] + '.rsa'
    rf = open(rsa_file)
    rsa_data = rf.readlines()
    rf.close()
    asa_file = tmp_pdb_file[:-4] + '.asa'
    af = open(asa_file)
    asa_data = af.readlines()
    af.close()
    os.chdir(old_dir)
    os.system('rm -rf %s >& /dev/null' % tmp_path)
    return rsa_data, asa_data
Beispiel #24
0
 def save_structure(self, filename, superimpose_structure=None):  
     """ Save the generated sample to a pdb file.
     
     @param filename: The file to which the structure will be written.
     @type filename: str
     
     @param superimpose_structure: Structure to which the sampled structure
         is to be superimposed.
     @type superimpose_structure: Structure
     
     """
     io = PDBIO()
     io.set_structure(self.get_structure())
     io.save(filename)
def main():
    if len(sys.argv) < 2:
        print("Please input fasta filename as an argument for the script")
        sys.exit()

    filename = sys.argv[1]
    if filename.endswith('.fasta'):
        blast_result = blast_sequence(filename)
    else:
        print("Unknown file format (need to be fasta file)")
        sys.exit()

    pdb_filepath = download_pdb_support_sequence(blast_result)
    filter_pdb(pdb_filepath)
    p = PDBParser(PERMISSIVE=1)
    structure = p.get_structure('file', 'filtered.pdb')
    seqObj = generate_fasta_from_pdb(structure)
    target_seq = SeqIO.read(filename, format="fasta").seq
    alignments = pairwise_alignement(target_seq, seqObj.seq)

    #Consider CA atoms only for computing distances
    ca_atoms = [
        atom for model in structure for chain in model for residue in chain
        for atom in residue if atom.get_id() == "CA"
    ]

    gaps = insert_gaps_in_atomlist(ca_atoms, alignments[0][0])
    insertions = insert_gaps_in_atomlist(ca_atoms, alignments[0][1])

    print(gaps)
    print(insertions)

    print("Searching for best part for gaps")
    gap_parts = get_gaps_parts(gaps)
    print("Searching for best part for insertions")
    ins_parts = get_gaps_parts(
        insertions, get_insertion_type(alignments[0][0], alignments[0][1]))

    print("Completing gaps/insertions by found parts")
    complete_atoms = [
        atom for model in structure for chain in model for residue in chain
        for atom in residue
    ]
    complete_atoms = insert_gaps_insertions_in_atomlist(
        complete_atoms, ins_parts, gap_parts)

    print("Writing final pdb file in : final.pdb")
    io = PDBIO()
    io.set_structure(build_structure(complete_atoms))
    io.save('final.pdb')
def generate_output_file(final_model, out_name):
    """ This function takes as input both the final model created with the building algorithm and the output filename given by the user (if not defined, is macrocomplex by default). Eventually, it returns the file saved in either ".pdb" or ".mmcif" format. """
    out_name = str(out_name.strip())
    # If the output file is too big, we save it in ".mmcif" format
    if len(list(final_model[0].get_atoms())) > 99999 or len(
            list(final_model[0].get_chains())) > 62:
        mmcif_IO = MMCIFIO()
        mmcif_IO.set_structure(final_model[0])
        mmcif_IO.save(out_name + ".cif")
    # Otherwise, save it ".pdb" format
    else:
        pdb_IO = PDBIO()
        pdb_IO.set_structure(final_model[0])
        pdb_IO.save(out_name + ".pdb")
Beispiel #27
0
 def clean(self):
   p  = PDBParser()
   io = PDBIO()
   with tempfile.NamedTemporaryFile('wrb',suffix='.pdb',delete=False) as tf:
     io.set_structure(self.structure)
     io.save(tf.name)
   cmd  = ['lib/clean_pdb.py',tf.name,'ignorechain','nopdbout']
   logger.info("Shell to: %s"%' '.join(cmd))
   proc = sp.Popen(cmd,stdout=sp.PIPE)
   s = p.get_structure(self.get_id(),proc.stdout)
   p = lib.PDBMapIO.PDBMapParser()
   s = p.process_structure(s,force=True)
   s = PDBMapStructure(s,pdb2pose=self._pdb2pose,refseq=self.refseq)
   os.remove(tf.name)
   return s
Beispiel #28
0
def main(args):
    """Main script"""
    pdb_name = Path(args.pdb).stem
    # deal with FoldX repaired PDBs
    if pdb_name.endswith('_Repair'):
        pdb_name = pdb_name.replace('_Repair', '')

    pdb_parser = PDBParser()
    structure = pdb_parser.get_structure(pdb_name, args.pdb)

    sections = import_sections(args.yaml, pdb_name)

    pdbio = PDBIO()
    pdbio.set_structure(structure)
    pdbio.save(sys.stdout, select=SectionSelecter(sections))
Beispiel #29
0
def run_naccess(model, pdb_file, probe_size=None, z_slice=None,
                naccess='naccess', temp_path='/tmp/'):

    # make temp directory;
    tmp_path = tempfile.mkdtemp(dir=temp_path)

    # file name must end with '.pdb' to work with NACCESS
    # -> create temp file of existing pdb
    #    or write model to temp file
    handle, tmp_pdb_file = tempfile.mkstemp('.pdb', dir=tmp_path)
    os.close(handle)
    if pdb_file:
        pdb_file = os.path.abspath(pdb_file)
        shutil.copy(pdb_file, tmp_pdb_file)
    else:
        writer = PDBIO()
        writer.set_structure(model.get_parent())
        writer.save(tmp_pdb_file)

    # chdir to temp directory, as NACCESS writes to current working directory
    old_dir = os.getcwd()
    os.chdir(tmp_path)

    # create the command line and run
    # catch standard out & err
    command = [naccess, tmp_pdb_file]
    if probe_size:
        command.extend(['-p', probe_size])
    if z_slice:
        command.extend(['-z', z_slice])

   # p = subprocess.Popen(command, universal_newlines=True,
   #                      stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell = True)
    p = subprocess.call(" ".join(command), shell=True)

    #out, err = p.communicate()
    os.chdir(old_dir)

    # get the output, then delete the temp directory
    rsa_file = tmp_pdb_file[:-13] + 'SIFTS.rsa'
    with open(rsa_file) as rf:
        rsa_data = rf.readlines()
    asa_file = tmp_pdb_file[:-13] + 'SIFTS.asa'
    with open(asa_file) as af:
        asa_data = af.readlines()

    shutil.rmtree(tmp_path, ignore_errors=True)
    return rsa_data, asa_data
Beispiel #30
0
 def _temp(self,structure):
     if isinstance(structure,basestring):
         assert(os.path.exists(structure))
         return structure
     elif isinstance(structure,Structure):
         temp = tempfile.NamedTemporaryFile("w")
         # Save the structure in a tempfile
         from Bio.PDB.PDBIO import PDBIO
         io = PDBIO()
         io.set_structure(structure)
         io.save(temp)
         self._temp_files.append(temp)
         return temp.name
     else:
         print type(structure)
         raise TypeError(structure)
Beispiel #31
0
def run_naccess(model, pdb_file, probe_size = None, z_slice = None, \
                naccess = 'naccess', temp_path = '/tmp/'):
    
    # make temp directory; chdir to temp directory, 
    # as NACCESS writes to current working directory
    tmp_path = tempfile.mktemp(dir = temp_path)
    os.mkdir(tmp_path)
    old_dir = os.getcwd()
    os.chdir(tmp_path)
    
    # file name must end with '.pdb' to work with NACCESS
    # -> create temp file of existing pdb
    #    or write model to temp file
    tmp_pdb_file = tempfile.mktemp('.pdb', dir = tmp_path)
    if pdb_file:
        os.system('cp %s %s' % (pdb_file, tmp_pdb_file))
    else:
        writer = PDBIO()
        writer.set_structure(model.get_parent())
        writer.save(tmp_pdb_file)

    # create the command line and run
    # catch standard out & err
    command = '%s %s ' % (naccess, tmp_pdb_file)
    if probe_size:
        command += '-p %s ' % probe_size
    if z_slice:
        command += '-z %s ' % z_slice
    in_, out, err = os.popen3(command)
    in_.close()
    stdout = out.readlines()
    out.close()
    stderr = err.readlines()
    err.close()

    # get the output, then delete the temp directory
    rsa_file = tmp_pdb_file[:-4] + '.rsa'
    rf = open(rsa_file)
    rsa_data = rf.readlines()
    rf.close()
    asa_file = tmp_pdb_file[:-4] + '.asa'
    af = open(asa_file)
    asa_data = af.readlines()
    af.close()
    os.chdir(old_dir)
    os.system('rm -rf %s >& /dev/null' % tmp_path)
    return rsa_data, asa_data
Beispiel #32
0
    def remove_residues(self, request, claims):
        """
        Remove residues from a PDB structure

        For a detailed input description see the file:
           mdstudio_structures/schemas/endpoints/removed_residues_request_v1.json
        And for a detailed description of the output see:
           mdstudio_structures/schemas/endpoints/removed_residues_response_v1.json
        """
        request['workdir'] = os.path.abspath(request['workdir'])
        # Parse the structure
        parser = PDBParser(PERMISSIVE=True)
        struc_obj = StringIO(request.get('mol'))

        structure = parser.get_structure('mol_object', struc_obj)
        struc_obj.close()

        to_remove = [r.upper() for r in request.get('residues', [])]
        removed = []
        for model in structure:
            for chain in model:
                for residue in chain:
                    if residue.get_resname() in to_remove:
                        chain.detach_child(residue.id)
                        removed.append(residue.get_resname())
                if len(chain) == 0:
                    model.detach_child(chain.id)
        self.log.info('Removed residues: {0}'.format(','.join(removed)))

        # Save to file or string
        pdbio = PDBIO()
        pdbio.set_structure(structure)

        status = 'completed'
        if request.get('workdir'):
            result = os.path.join(request.get('workdir'), 'structure.pdb')
            pdbio.save(result)
        else:
            outfile = StringIO()
            pdbio.save(outfile)
            outfile.seek(0)
            result = outfile.read()

        return {'status': status, 'mol': result}
Beispiel #33
0
def func1():
    import sys
    import re
    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)
    from Bio.PDB.PDBParser import PDBParser
    parser1 = PDBParser(PERMISSIVE=0, QUIET=True)

    from Bio.PDB.PDBIO import PDBIO

    #pathmmcif = "/Users/tarun/Documents/mmCIF"
    #pathmmcif = "/data/pdb/divided/mmCIF"
    pathmmcif = "/Volumes/BIOINFO/mmCIF"
    #pathmmcif = "/Volumes/RCSB_DATA/pdb"

    #count = 0
    #if count == 0:
    try:
        pdb1 = "{}".format(sys.argv[2])
        fol = pdb1[1:3]
        c1 = "{}".format(sys.argv[3])
        pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb1)
        #pdbfile = "{}/{}/pdb{}.ent.gz".format(pathmmcif,fol,pdb1)
        tar = gzip.open("{}".format(pdbfile), "rb")
        out = open("pdbprocess.cif", "wb")
        #out = open("pdbprocess.pdb","wb")
        out.write(tar.read())
        tar.close()
        out.close()
        structure_id = "{}".format(pdb1)
        filename = "pdbprocess.cif"
        #filename = "pdbprocess.pdb"
        structure = parser.get_structure(structure_id, filename)
        model = structure[0]
        chain = model["{}".format(c1)]

        io = PDBIO()
        io.set_structure(chain)
        io.save("chain1.pdb")
    except:
        print("FILE NOT FOUND")
Beispiel #34
0
 def collect_0(self):
     def getChains(s):
         ret = s.split('___')[:2]
         assert(len(ret) == 2)
         return ret
     parser = PDBParser()
     io = PDBIO()
     for f in self.files:
         try:
             os.mkdir(os.path.join( self.outpath, f))
         except OSError as e:
             if e.errno != errno.EEXIST:
                 raise
         structure = parser.get_structure(f, os.path.join(self.inpath, f))
         chain_A, chain_B = getChains(f)
         io.set_structure(structure[0]['A'])
         io.save(os.path.join(self.outpath,f,chain_A + '.pdb'))
         io.set_structure(structure[0]['B'])
         io.save(os.path.join(self.outpath,f,chain_B + '.pdb'))
         self.then_do(f)
Beispiel #35
0
def downloadUsingMmtf(pdbId, fnameOut, maxNumberOfChains=MAX_NUMBER_OF_CHAINS):
    print("downloadUsingMmtf")
    try:
        parser = MMTFParser()
        struct = parser.get_structure_from_url(pdbId)
        if not 0 in struct:
            return False
        if len(struct[0]) > maxNumberOfChains:
            raise NoValidPDBFile(
                "The maximun number of allowed chains is %d (%d) for %s" %
                (maxNumberOfChains, len(struct[0]), pdbId))
        writter = PDBIO()
        writter.set_structure(struct)
        writter.save(fnameOut)
        return True
    except (Exception, ValueError, HTTPError) as e:
        print(e)
        if isinstance(e, NoValidPDBFile):
            raise e
        return False
Beispiel #36
0
def rmsd(pdbid,chain,decoy_file,astral=False):
    """Calculate the RMS between the two structures based on sequence alignment."""
    if astral:
        pdb = hpf.pdb.get_astral(pdbid)
    else:
        pdb = hpf.pdb.get_pdb(pdbid)
    
    temp = tempfile.NamedTemporaryFile("w")
    try:
        # Save the pdbchain
        io = PDBIO()
        io.set_structure(pdb)
        io.save(temp, select=hpf.pdb.PDBChainSelector(chain))
        # Perform Mammoth alignment
        decoy_id = os.path.basename(decoy_file)
        cl = Mammoth.MammothCL(temp.name, decoy_file)
        mm = Mammoth.do_alignment(cl)
        return mm 
    finally:
        temp.close()
Beispiel #37
0
def moveAndWriteAsPDBIfMmcif(fnameIn, fnameOut, removeInput=False):
    from Config import Configuration
    conf = Configuration()
    minNumResidues, maxNumResidues = conf.minNumResiduesPartner, conf.maxNumResiduesPartner
    try:
        struct, __ = loadPdbIfIsPath(fnameIn)
        totalNumRes = 0
        for chain in struct[0]:
            nResInChain = len(chain.get_list())
            totalNumRes += nResInChain
        if not (minNumResidues < totalNumRes < maxNumResidues):
            raise BadNumberOfResidues(totalNumRes)
        else:
            writter = PDBIO()
            writter.set_structure(struct)
            writter.save(fnameOut)
            if removeInput: os.remove(fnameIn)
            return True
    except Exception as e:
        print("Error in moveAndWriteAsPDBIfMmcif !!!", e)
        return False
    def save_structure(self, output_pdb_path: str, mod_id: str = None):
        """
        Saves structure on disk in PDB format

        Args:
            output_pdb_path: OS path to the output file
            mod_id (optional): model to write

        Errors:
            OSError: Error saving the file
        """
        if not output_pdb_path:
            raise OutputPathNotProvidedError
        pdbio = PDBIO()

        if mod_id is None:
            pdbio.set_structure(self.st)
            pdbio.save(output_pdb_path)
        else:
            pdbio.set_structure(self.st[mod_id])
            pdbio.save(output_pdb_path)
Beispiel #39
0
def extract(structure, chain_id, start, end, filename): 
	""" 
	Write out selected residues from a particular given pdb structure to filename. 
	
	Args: 
		structure: The four letter pdb code for the protein (this pdb must currently exist!)
		chain_id: The chain in the pdb you are trying to extract
		start: The beginning residue that you want to start your extraction from
		end:
		filename: The filename (ex. my_protein.pdb) of the pdb that you will extract the selected residues to  	
	Returns:
		A pdb file that contains only the selected residues from the original pdb structure given as an argument
		
	Example of usage
		p=PDBParser()
		s=p.get_structure('X', '1RUZ.pdb')
		extract(s, 'H', 40, 60, 'extracted.pdb')
 
	""" 
	sel=ChainSelector(chain_id, start, end) 
	io=PDBIO() 
	io.set_structure(structure) 
	io.save(filename, sel) 
Beispiel #40
0
def setup_structures_for_linking(filepath1, filepath2, charmmdir, pdb_path_dir):
    cmc = md.CenterOfMassCalculator()
    rig = MRM.Molecular_Rigid_Manipulation()
    structure1 = pdb_parser.get_structure('Centered', filepath1)    
    rig.translate_molecule(structure1,rig.center_molecule(cmc.calculate_center_of_mass(structure1)))

    structure2 = pdb_parser.get_structure('Rotated', filepath2)        
    rig.translate_molecule(structure2,rig.center_molecule(cmc.calculate_center_of_mass(structure2)))
    # we have two structures with possible identical chain identifiers
    # we need to make sure there are no duplicates when structure 1 and 2
    # are merged. We will relable structure2.
    # TODO: There can only be as many labels as letters in the alphabet.
    # It is unlikely that we will need more lables than that in the near future,
    # but constructs one day may have more chains than letters in the alphabet.
    ids = {}
    for i in string.ascii_uppercase:
        ids[i] = False
    # First We used model 0 of structure 1 and turned used_ids for that chain
    # identifier to True.
    for i in structure1[0]:
        ids[i.id] = True
    # Now we go through structure 2 and if there are any chains with the same id
    # as those found in structure 1, we will change the chain ids to something else
    for i in structure2[0]:
        if ids[i.id]:
            for j in string.ascii_uppercase:
                if not ids[j]:
                    i.id = j
                    ids[j] = True
                    break
    
    #print _calculate_center_of_charge(rtp, structure)
    # We want to join the two structures into one structure, with one model
    # and the chains of structure 1 and 2. First, deepcopy copies an object 
    # recursively
    structure3 = copy.deepcopy(structure1)
    structure3.id = 'Ensamble'
    for i in structure2.get_chains():
        structure3[0].add(i)
    # TODO: This works only for angles between 0 and 90 not including 0, and 90
    # and will generate angles in all 8 quadrants of the cartesian coordinate system
    Angle_Separation = 45
    locations = []
    for h in range(0,3):
        for i in range(0,360,Angle_Separation):
            for j in range(0,90/Angle_Separation-1):
                if h == 0:
                    z = 0
                elif h == 1:
                    z = np.cos(45*np.pi/180)
                elif h == 2:
                    z = -1*np.cos(45*np.pi/180)
                    
                locations.append([np.cos(i*np.pi/180),np.sin(i*np.pi/180),z])
    locations.append([0,0,1])
    locations.append([0,0,-1])
    for i in range(0,len(locations)):
        locations[i] = list(locations[i]/np.linalg.norm(locations[i]))
    
    structure_id = 0    
    for i in locations:
        ccc = md.ChargeCalculator(charmmdir)
        center_of_charge2 = ccc.calculate_center_of_charge(structure2)
        center_of_charge2 = center_of_charge2/np.linalg.norm(center_of_charge2)
        RM = rig.alignVectors(i,center_of_charge2)                
        for j in structure2.get_atoms():
            v2 = [j.get_coord()[0],j.get_coord()[1],j.get_coord()[2]]
            jj = np.dot(v2,RM)
            j.set_coord(jj)
        ii = [k*45 for k in i]
        rig.translate_molecule(structure2,ii)
        io = PDBIO()
        io.set_structure(structure3)
        io.save(pdb_path_dir+'struct_'+str(structure_id)+'.pdb')
        structure_id = structure_id + 1
        rig.translate_molecule(structure2,rig.center_molecule(cmc.calculate_center_of_mass(structure2)))
def main(verbose=False, msafile=None, pdbfile=None, renumligs=False):

    onelettercode = {'ASP':'D', 'GLU':'E', 'ASN':'N', 'GLN':'Q',  'ARG':'R', 'LYS':'K', 'PRO':'P', 'GLY':'G', 'CYS':'C', 'THR':'T', 'SER':'S', 'MET':'M', 'TRP':'W', 'PHE':'F', 'TYR':'Y', 'HIS':'H', 'ALA':'A', 'VAL':'V', 'LEU':'L', 'ILE':'I'}

    wd = os.getcwd()
    msa = AlignIO.read(read(msafile), 'fasta')
    querypdbid = fileprefix(pdbfile)
    querypdb = PDBParser(PERMISSIVE=1, QUIET=(not verbose)).get_structure(querypdbid, pdbfile)
    #for each chain, get it's sequence
    qseqdicts = dict()
    chains = dict( (chain.get_id(), chain) for chain in querypdb.get_list()[0].get_list() )
    for chainid in chains:
        qseqdicts[chainid] = dict()
        residuelist = chains[chainid].get_list()
        for i, residue in enumerate(residuelist):
            if residue.get_resname() not in onelettercode:
                oletter = 'X'
            else:
                oletter = onelettercode[residue.get_resname()]
            qseqdicts[chainid][int(residue.get_id()[1])] = oletter
    #get seqs as strings
    for chainid in qseqdicts:
        print "\nCurrent chain is '%s'." % chainid
        qseqdict = qseqdicts[chainid]
        qseq = "".join( [qseqdict[resnr] for resnr in sorted(qseqdict.keys())] )
        qseqkeys = [resnr for resnr in sorted(qseqdict.keys())]
        qseq = SeqRecord(Seq(qseq,IUPAC.protein),id=querypdbid)
        # determine the sequence closest to the one in the PDB file
        i=0
        maxid = 0 # maximum sequence identity
        maxal = maxmsaseq = None
        # align qseq to each seq in the msa and find the one with highest identity
        for alignment in msa:
            msaseq = SeqRecord(alignment.seq, id=alignment.id)
            ids = (qseq.id, msaseq.id)
            ali = Needle(pair=ids,records=(qseq, msaseq), name=ids, confopts={'gapopen':10, 'gapextend':0, 'brief':0}) # wd='/tmp/needle'
            ali.align(force_protein=True)
            seqid = NeedleSingle(outfile=ali.outfile).getResults()['Longest_Identity']
            if seqid > maxid:
                maxid = seqid
                maxal = AlignIO.read(ali.outfile, 'emboss')
                maxmsaseq = msaseq
        try:
            qseq = str(maxal[0].seq)
            tseq = str(maxal[1].seq)
            msaseq = str(maxmsaseq.seq)
            print "Highest seq id is %.2f for:\n%s\n================" % (maxid, str(maxal))
        except TypeError:
            print "No alignment could be generated."
            continue



        # number letters in the msa sequence
        # get the positions without gaps, this gives the actual positions in tseq
        msaseqindices = [  i for i in range(1,len(msaseq)+1) if msaseq[i-1] != '-' ]


        #   get positions with gaps
        # qseqgappos = [ i for i in range(len(qseq)) if tseq[i] == '-' ]
        # qseqgappos.reverse()
        # qseqkeys = range(len(qseq))
        # for i in qseqgappos:
        #     del qseqkeys[i]


        # trim qseq if some pos there are mapped to gaps in tseq
        # expand the list of qseqkeys to align with MSA
        tmp = list(qseqkeys)
        qseqkeys = []
        tmp.reverse()
        for i in range(len(qseq)):
            if qseq[i] != '-':
                key = tmp.pop()
            else:
                key = None
            if tseq[i] != '-':
                qseqkeys.append(key)

        qseq = [ qseq[i] for i in range(len(qseq)) if tseq[i] != '-' ]
        # and trim gaps from tseq
        tseq = [ tseq[i] for i in range(len(tseq)) if tseq[i] != '-' ]
        # now, tseq is guaranteed gap-free

        #replace each position in qseqindices by None if there is a gap in qseq
        newqseqindices = list(msaseqindices)
        for i in range(len(qseq)):
            if qseq[i] == '-':
                newqseqindices[i] = None
        #print newqseqindices
        # now newqseqindices is the list of indices for the renumbering in the order of the query sequence

        #sanity check
        assert(len(qseqkeys) == len(qseq) == len(tseq) == len(newqseqindices))
        for i in range(len(qseqkeys)):
            if qseqkeys[i] is None:
                assert(newqseqindices[i] is None)

        #remove empty positions and create a dict of the remapping
        newqseqindices = [index for index in newqseqindices if index is not None]
        qseqkeys = [index for index in qseqkeys if index is not None]

        newqseqindices = dict(zip(qseqkeys,newqseqindices))

        #renumber residues
        chain = chains[chainid]
        for residue in chain.get_list():
            resid = list(residue.get_id())
            index = resid[1]
            is_het = len(resid[0].strip()) != 0
            if index in newqseqindices and not is_het:
                resid[1] = newqseqindices[index]
            elif renumligs:
                 resid[1] = 0
            residue.id = tuple(resid)

    # write renumbered PDB file
    io = PDBIO()
    io.set_structure(querypdb)
    io.save(os.path.join(wd, '%s_renum.pdb' % querypdbid))
Beispiel #42
0
num_count = 0
for i in range(0,shape(points)[0]):
    num_count = num_count +1
    res_id = (' ',num_count,' ')
    residue = Residue(res_id,'ALA',' ')
    cur_coord = tuple(points[i])
    bfactor = bfactors[i]
    atom = Atom('CA',cur_coord,bfactor,0,' ','CA',num_count,'C')
    residue.add(atom)
    chain.add(residue)

model.add(chain)
structure.add(model)
# --------------------------------------------------------------------
io=PDBIO()
io.set_structure(structure)
if ( args['dst'] is None):
    fn = sys.stdout
    io.save(fn)
    if ( args['link'] ):
        for i in range(1,shape(points)[0]):
            fn.write( "CONECT%5d%5d\n" % (i, i+1))
else:
    fn = args['dst']
    io.save(fn)
    fout = open(fn,"a")
    if (args['link'] ):
        for i in range(1,shape(points)[0]):
            fout.write( "CONECT%5d%5d\n" % (i, i+1))
    fout.close()
#    print "output file: " + args['dst']
Beispiel #43
0
def main():
    # validate file path
    if not args:
        print "Error: No file path provided."
        sys.exit(1)

    filepath1 = "/home/noel/Projects/Protein_design/Insulin/OIPD/2hiu_1H.pdb"
    filepath2 = "/home/noel/Projects/Protein_design/Insulin/OIPD/2zta_1H.pdb"
    # I added a file path for the lucine zipper molecule.
    # The program will have two structures. The first one
    # is to be centered, and the secondone is the one that
    # will be placed around the first one in many different
    # orientations. For now I will call them filepath1 and
    # filepath2.
    filepath1 = args[0]
    if not os.path.exists(filepath1):
        print "Error: File path for molecule to be centered does not exist."
        sys.exit(1)

    filepath2 = args[1]
    if not os.path.exists(filepath2):
        print "Error: File path for molecule to be rotated does not exist."
        sys.exit(1)

    # TODO We should eventually perform more rigorous validation,
    # e.g. verifying file permissions.

    # assign structure id. if one is not provided, default to file name.
    structure_id = options.id
    if not structure_id:
        structure_id = os.path.splitext(os.path.basename(filepath1))[0]
    # We read both structures and place insuline at the center and
    # leucine zipper at [5,0,0]
    structure1 = pdb_parser.get_structure("Centered One", filepath1)
    center_of_mass1 = _calculate_center_of_mass(structure1)
    # print center_of_mass1
    location_vextor1 = center_of_mass1
    translate_molecule(structure1, center_molecule(center_of_mass1))
    center_of_mass1 = _calculate_center_of_mass(structure1)
    print center_of_mass1

    structure2 = pdb_parser.get_structure("Rotated One", filepath2)
    center_of_mass2 = _calculate_center_of_mass(structure2)
    # print center_of_mass2
    translate_molecule(structure2, center_molecule(center_of_mass2))
    location_vector2 = [45, 0, 0]
    translate_molecule(structure2, location_vector2)
    center_of_mass2 = _calculate_center_of_mass(structure2)
    # print center_of_mass2
    # we have two structures with possible identical chain identifiers
    # we need to make sure there are no duplicates when structure 1 and 2
    # are merged. We will relable structure2.
    # TODO: There can only be as many labels as letters in the alphabet.
    # It is unlikely that we will need more lables in the near future,
    # but constructs one day may have more chains than letters in the alphabet.
    ids = {}
    for i in string.ascii_uppercase:
        ids[i] = False
    # First We used model 0 of structure 1 and turned used_ids for that chain
    # identifier to True.
    for i in structure1[0]:
        ids[i.id] = True
    # Now we go through structure 2 and if there are any chains with the same id
    # as those found in structure 1, we will change the chain ids to something else
    for i in structure2[0]:
        if ids[i.id]:
            for j in string.ascii_uppercase:
                if not ids[j]:
                    i.id = j
                    ids[j] = True
                    break

    # We want to join the two structures into one structure, with one model
    # and the chains of structure 1 and 2. First, deepcopy copies an object
    # recursively
    structure3 = copy.deepcopy(structure1)
    structure3.id = "Ensamble"
    for i in structure2.get_chains():
        structure3[0].add(i)

    structure_id = 0
    path_dir = "/home/noel/Projects/Protein_design/Insulin/OIPD/pdbs/"
    # DELETED: (45,45,180),(45,45,360),(45,90,180),(45,90,360),(45,135,180),(45,135,360),(45,180,180),(45,180,360)
    locations = [
        (45, 0, 0),
        (90, 0, 0),
        (135, 0, 0),
        (180, 0, 0),
        (225, 0, 0),
        (270, 0, 0),
        (315, 0, 0),
        (360, 0, 0),
        (45, 45, 45),
        (45, 45, 90),
        (45, 45, 135),
        (45, 45, 225),
        (45, 45, 270),
        (45, 45, 315),
        (45, 90, 45),
        (45, 90, 90),
        (45, 90, 135),
        (45, 90, 225),
        (45, 90, 270),
        (45, 90, 315),
        (45, 135, 45),
        (45, 135, 90),
        (45, 135, 135),
        (45, 135, 225),
        (45, 135, 270),
        (45, 135, 315),
        (45, 180, 45),
        (45, 180, 90),
        (45, 180, 135),
        (45, 180, 225),
        (45, 180, 270),
        (45, 180, 315),
    ]

    for i in locations:
        RotMat = genMatrix(i[0] * np.pi / 180, i[1] * np.pi / 180, i[2] * np.pi / 180)
        center_of_mass2 = _calculate_center_of_mass(structure2)
        translate_molecule(structure2[0], center_molecule(center_of_mass2))
        translate_molecule(structure2[0], list(np.dot(np.asarray(location_vector2), RotMat)))
        io = PDBIO()
        io.set_structure(structure3)
        io.save(path_dir + "struct_" + str(structure_id) + ".pdb")
        # print(structure_id, center_of_mass2)
        structure_id = structure_id + 1