def save_models(self): # output folder output_folder = os.path.join(self.work_dir, 'output_pdbs') logger.log_file(module_name=_name, msg="Saving pdb files to " + str(output_folder)) try: os.mkdir(output_folder) except OSError: logger.warning( _name, "Possibly overwriting previous pdb files. Use --work-dir <DIR> to avoid that." ) # Saving the trajectory to PDBs: if 'R' in self.pdb_output: logger.log_file(module_name=_name, msg='Saving replicas...') self.trajectory.to_pdb(mode='replicas', to_dir=output_folder) # Saving top1000 models to PDB: if 'F' in self.pdb_output: logger.log_file(module_name=_name, msg='Saving filtered models...') self.filtered_trajectory.to_pdb(mode='replicas', to_dir=output_folder, name='top1000') # Saving clusters in CA representation if 'C' in self.pdb_output: logger.log_file(module_name=_name, msg='Saving clusters...') for i, cluster in enumerate(self.clusters): cluster.to_pdb(mode='replicas', to_dir=output_folder, name='cluster_{0}'.format(i)) # Saving final models: if 'M' in self.pdb_output: if self.aa_rebuild: logger.log_file( module_name=_name, msg='Saving final models (in AA representation)') pdb_medoids = self.medoids.to_pdb() from CABS.ca2all import ca2all from CABS.pdblib import Pdb for i, fname in enumerate(pdb_medoids): ca2all(fname, output=os.path.join(output_folder, 'model_{0}.pdb'.format(i)), iterations=self.modeller_iterations, out_mdl=os.path.join( self.work_dir, 'output_data', 'modeller_output_{0}.txt'.format(i)), work_dir=self.work_dir) pth_tmp = os.path.join(self.work_dir, 'output_pdbs', 'model_{0}.pdb'.format(i)) mod = Pdb(pth_tmp) ssh = mod.mk_ss_header() mod.atoms.save_to_pdb(pth_tmp, header=ssh) else: logger.log_file( module_name=_name, msg='Saving final models (in CA representation)') self.medoids.to_pdb(mode='models', to_dir=output_folder, name='model')
def run(self): monitor = logger.CabsObserver(interval=0.2, progress_file=join( self.cfg['cwd'], 'PROGRESS')) cabs_proc = Popen(self.cfg['exe'], cwd=self.cfg['cwd'], stderr=PIPE, stdin=PIPE) (stdout, stderr) = cabs_proc.communicate() if stderr: logger.warning(module_name=_name, msg=stderr) monitor.exit()
def parse_reference(self, ref): try: try: dummy, trg_chids = ref.split(":") self.reference = (pdblib.Pdb(ref, selection='name CA', no_exit=True, verify=True).atoms, trg_chids) super(FlexTask, self).parse_reference(ref) except AttributeError: # if ref is None it has no split mth self.reference = (self.initial_complex, self.initial_complex.protein_chains) except (pdblib.Pdb.InvalidPdbInput, ValueError): logger.warning(_name, 'Invalid reference {}'.format(ref))
def save_config_file(self): if self.save_config: with open(os.path.join(self.work_dir, 'config.ini'), 'w') as configfile: configfile.write(CONFIG_HEADER) for k in sorted(self.config): value = self.config[k] name = re.sub("_", "-", str(k)) option = opt_parser.option_formatter(name, value) try: configfile.write(option) except Exception as e: logger.warning( _name, "Failed to save %s option to config file. Reason: %s." % (name, e.message))
def parse_reference(self, ref): try: source, rec, pep = ref.split(':') self.reference = (pdblib.Pdb(ref, selection='name CA', no_exit=True, verify=True).atoms, rec, pep) super(DockTask, self).parse_reference(ref) self.reference = (pdblib.Pdb(ref, selection='name CA', no_exit=True, verify=True).atoms, rec, pep) if len(self.initial_complex.peptide_chains) != len( self.reference[2]): raise ValueError except (ValueError, pdblib.Pdb.InvalidPdbInput): logger.warning(_name, 'Invalid reference {}'.format(ref)) self.reference = None
def if_append(option_name, value): """ Handles appended arguments that come as both lists of lists and lists of arguments""" try: if options[option_name]["action"] == "append": try: nargs = options[option_name]['nargs'] # TODO: options[option_name].get('nargs') if type(value) == list: line = "" for single_value in value: line += "\n" + option_name + " : " + " ".join([str(i) for i in single_value]) return line else: logger.warning("OptParse", "Issues while saving multiple argument option: %s" % option_name) raise KeyError except KeyError: if type(value) == list: line = "" for single_value in value: line += "\n" + option_name + " : " + str(single_value) return line else: logger.warning("OptParse", "Issues while saving appended argument option: %s" % option_name) raise KeyError except Exception: logger.warning("OptParse", "Issues while saving %s option" % option_name) raise KeyError else: raise KeyError except KeyError: raise
def __init__( self, source, selection='', remove_alternative_locations=True, fix_non_standard_aa=True, remove_water=True, remove_hetero=True, verify=False, no_exit=False, # does not exit on error, raises InvalidPdbInput instead ): logger.debug(_name, 'Creating Pdb object from {}'.format(source)) self.atoms = Atoms() words = source.split(':') try: name, rec, pep = words chains = rec + pep except ValueError: try: name, chains = words except ValueError: name = words[0] chains = '' try: self.body = self.read(name) self.name = os.path.basename(name).split('.')[0] except IOError: try: self.body = self.read(self.fetch(name)) self.name = name except ConnectionError as e: if no_exit: raise Pdb.InvalidPdbInput(e.message) else: logger.exit_program( module_name=_name, msg='Cannot connect to the PDB database', exc=e ) except HTTPError as e: if no_exit: raise Pdb.InvalidPdbInput(e.message) else: logger.exit_program( module_name=_name, msg='Invalid PDB code: {}'.format(name), exc=e ) except IOError as e: if no_exit: raise Pdb.InvalidPdbInput(e.message) else: logger.exit_program( module_name=_name, msg='File {} not found'.format(name), exc=e ) try: logger.debug(_name, 'Processing {}'.format(name)) current_model = 0 for line in self.body.split('\n'): match = re.match(r'(ATOM|HETATM)', line) if match: self.atoms.append(Atom(line, current_model)) else: match = re.match(r'MODEL\s+(\d+)', line) if match: current_model = int(match.group(1)) if chains: logger.debug(_name, 'Selected chains {}'.format(chains)) if selection: selection = '({}) and chain {}'.format(selection, ','.join(chains)) else: selection = 'chain {}'.format(','.join(chains)) if remove_alternative_locations: logger.debug(_name, 'Removing alternative locations from {}'.format(name)) self.atoms.remove_alternative_locations() if remove_water: logger.debug(_name, 'Removing water molecules from {}'.format(name)) self.atoms = self.atoms.drop('resname HOH') if fix_non_standard_aa: logger.debug(_name, 'Scanning {} for non-standard amino acids'.format(name)) aa_names = [AA_NAMES[k] for k in AA_NAMES] for model in self.atoms.models(): for residue in model.residues(): resname = residue[0].resname if resname not in aa_names: if resname not in AA_SUB_NAMES: logger.warning( _name, 'Unknown residue {} at {} in {}'.format( resname, residue[0].resid_id(), name ) ) else: sub_name = AA_SUB_NAMES[resname] for atom in residue: atom.resname = sub_name atom.hetatm = False logger.warning( _name, 'Replacing {} -> {} for {} in {}'.format( resname, sub_name, residue[0].resid_id(), name ) ) if remove_hetero: logger.debug(_name, 'Removing heteroatoms from {}'.format(name)) self.atoms = self.atoms.drop('hetero') if selection: logger.debug(_name, 'Selecting [{}] from {}'.format(selection, name)) self.atoms = self.atoms.select(selection) if ' ' in set([i.chid for i in self.atoms]): raise ValueError('Atoms with empty chain ID in selected part of PDB file detected.') if not len(self.atoms): raise Exception('{} contains no atoms'.format(source)) if chains and verify: actual_chains = ''.join(self.atoms.list_chains().keys()) logger.debug( module_name=_name, msg='Matching declared [{}] with actual [{}] chain IDs in {}.'.format(chains, actual_chains, name) ) if set(chains) != set(actual_chains): msg = 'Mismatch in chain IDs in {}: {} differs from {}'.format(name, chains, actual_chains) logger.warning(_name, msg) raise Exception(msg) except Exception as e: if no_exit: raise Pdb.InvalidPdbInput(e.message) else: logger.exit_program( module_name=_name, msg=e.message, exc=e )
def dssp(self, output=''): """Runs dssp on the read pdb file and returns a dictionary with secondary structure""" out = err = None try: proc = Popen([self.DSSP_COMMAND, '/dev/stdin'], stdin=PIPE, stdout=PIPE, stderr=PIPE) out, err = proc.communicate(input=self.body) logger.debug( module_name=_name, msg='Running DSSP' ) except OSError: logger.warning( module_name=_name, msg='DSSP not found.' ) tempfile = mkstemp(suffix='.pdb', prefix='.tmp.dssp.', dir=PDB_CACHE)[1] with open(tempfile, 'wb') as f: f.write(self.body) try: logger.debug( module_name=_name, msg='Submitting structure to the DSSP server' ) out, err = self.xssp(tempfile) except (HTTPError, ConnectionError): logger.warning( module_name=_name, msg='Cannot connect to the DSSP server. DSSP was not ran at all.' ) return None finally: try: os.remove(tempfile) except OSError: pass if err: logger.critical( module_name=_name, msg='DSSP ERROR: %s' % err.replace('\n', ' ') ) return None else: logger.debug(_name, 'DSSP successful') if logger.log_files() and output: output = os.path.join(output, 'output_data', 'DSSP_output_%s.txt' % self.name) d = os.path.dirname(output) if not isdir(d): os.makedirs(d) logger.to_file( filename=output, content=out, msg='Saving DSSP output to %s' % output ) sec = OrderedDict() p = '^([0-9 ]{5}) ([0-9 ]{4}.)([A-Z ]) ([A-Z]) ([HBEGITS ])(.*)$' for line in out.split('\n'): m = re.match(p, line) if m: key = str(m.group(2).strip() + ':' + m.group(3)) if m.group(5) in 'HGI': val = 'H' elif m.group(5) in 'BE': val = 'E' elif m.group(5) in 'T': val = 'T' else: val = 'C' sec[key] = val return sec
def __init__(self, source, flexibility=None, exclude=None, weights=None, work_dir='.'): Atoms.__init__(self) pdb = Pdb(source=source, selection='name CA') self.atoms = pdb.atoms.models()[0] logger.info(module_name=_name, msg="Loading %s as input protein" % source) # setup flexibility if flexibility: try: bfac = float(flexibility) self.atoms.set_bfac(bfac) except ValueError: if flexibility.lower() == 'bf': pass elif flexibility.lower() == 'bfi': for a in self.atoms: if a.bfac > 1.: a.bfac = 0. elif a.bfac < 0.: a.bfac = 1. else: a.bfac = 1. - a.bfac elif flexibility.lower() == 'bfg': for a in self.atoms: if a.bfac < 0.: a.bfac = 1. else: a.bfac = exp(-0.5 * a.bfac**2) else: try: d, de = self.read_flexibility(flexibility) self.atoms.update_bfac(d, de) except IOError: logger.warning( _name, 'Could not read flexibility file: %s' % flexibility) logger.warning( _name, 'Using default flexibility(1.0) for all residues.') self.atoms.set_bfac(1.0) else: self.atoms.set_bfac(1.0) # setup excluding self.exclude = {} if exclude: for s in exclude: words = s.split('@') if len(words) == 1: key = 'ALL' else: key = utils.pep2pep1(words[-1]) if key in self.exclude: self.exclude[key] += '+' + words[0] else: self.exclude[key] = words[0] for k, v in self.exclude.items(): self.exclude[k] = [] for word in v.split('+'): if ':' in word: if '-' in word: beg, end = word.split('-') self.exclude[k].extend( self.atoms.atom_range(beg, end)) else: self.exclude[k].append(word) else: chains = re.sub(r'[^%s]*' % word, '', ascii_uppercase) self.exclude[k].extend( a.resid_id() for a in self.atoms.select('chain %s' % chains)) ss = pdb.dssp(output=work_dir) self.old_ids = self.atoms.update_sec(ss).fix_broken_chains() self.new_ids = {v: k for k, v in self.old_ids.items()} for key, val in self.exclude.items(): self.exclude[key] = [self.new_ids[r] for r in val] # setup rmsd_weights self.weights = None if weights and weights.lower() == 'flex': self.weights = [a.bfac for a in self.atoms] if weights and weights.lower() == 'ss': self.weights = [(a.occ + 1.) % 2 for a in self.atoms] else: try: default = 1.0 self.weights = [] weights_dict = {} with open(weights, 'rb') as _file: for line in _file: k, v = line.split()[:2] weights_dict[k] = v if 'default' in weights_dict: default = float(weights_dict['default']) for a in self.atoms: w = weights_dict.get(a.resid_id()) w = float(w) if w else default self.weights.append(w) except (IOError, ValueError): logger.warning(_name, 'Could not read weights file: %s' % weights) logger.warning(_name, 'Using default weights(1.0) for all atoms.') self.center = self.cent_of_mass() self.dimension = self.max_dimension() self.patches = {}
def __init__(self, **kwargs): # self.__dict__.update(kwargs) self.aa_rebuild = kwargs.get('aa_rebuild') self.add_peptide = kwargs.get('add_peptide') self.align = kwargs.get('align') self.align_options = dict(kwargs.get('align_options', [])) self.align_peptide_options = dict( kwargs.get('align_peptide_options', [])) self.ca_rest_add = kwargs.get('ca_rest_add') self.ca_rest_file = kwargs.get('ca_rest_file') self.ca_rest_weight = kwargs.get('ca_rest_weight') self.clustering_iterations = kwargs.get('clustering_iterations') self.clustering_medoids = kwargs.get('clustering_medoids') self.contact_map_colors = kwargs.get('contact_map_colors') self.contact_maps = kwargs.get('contact_maps') self.contact_threshold = kwargs.get('contact_threshold') self.dssp_command = kwargs.get('dssp_command') self.exclude = kwargs.get('exclude') self.excluding_distance = kwargs.get('excluding_distance') self.filtering_count = kwargs.get('filtering_count') self.filtering_mode = kwargs.get('filtering_mode') self.fortran_command = kwargs.get('fortran_command') self.image_file_format = kwargs.get('image_file_format') self.input_protein = kwargs.get('input_protein') self.insertion_attempts = kwargs.get('insertion_attempts') self.insertion_clash = kwargs.get('insertion_clash') self.load_cabs_files = kwargs.get('load_cabs_files') self.mc_annealing = kwargs.get('mc_annealing') self.mc_cycles = kwargs.get('mc_cycles') self.mc_steps = kwargs.get('mc_steps') self.modeller_iterations = kwargs.get('modeller_iterations') self.pdb_output = kwargs.get('pdb_output') self.peptide = kwargs.get('peptide') self.protein_flexibility = kwargs.get('protein_flexibility') self.protein_restraints = kwargs.get('protein_restraints') self.protein_restraints_reduce = kwargs.get( 'protein_restraints_reduce') self.random_seed = kwargs.get('random_seed') self.reference_pdb = kwargs.get('reference_pdb') self.remote = kwargs.get('remote') self.replicas = kwargs.get('replicas') self.replicas_dtemp = kwargs.get('replicas_dtemp') self.save_cabs_files = kwargs.get('save_cabs_files') self.save_config = kwargs.get('save_config') self.sc_rest_add = kwargs.get('sc_rest_add') self.sc_rest_file = kwargs.get('sc_rest_file') self.sc_rest_weight = kwargs.get('sc_rest_weight') self.separation = kwargs.get('separation') self.temperature = kwargs.get('temperature') self.verbose = kwargs.get('verbose') self.work_dir = kwargs.get('work_dir') self.weighted_fit = kwargs.get('weighted_fit') # Job attributes collected. self.config = kwargs self.initial_complex = None self.restraints = None self.cabsrun = None self.trajectory = None self.filtered_trajectory = None self.filtered_ndx = None self.medoids = None self.clusters_dict = None self.clusters = None self.rmslst = {} self.results = None self.reference = None # Work_dir processing: making sure work_dir is abspath self.work_dir = os.path.abspath(self.work_dir) try: logger.setup(log_level=self.verbose, remote=self.remote, work_dir=self.work_dir) os.makedirs(self.work_dir) except OSError: if os.path.isdir(self.work_dir): logger.warning( _name, '{} already exists. Output data will be overwritten.'. format(self.work_dir)) else: logger.exit_program( _name, '{} already exists and is not a directory. Choose different name.' .format(self.work_dir)) if self.dssp_command: pdblib.Pdb.DSSP_COMMAND = self.dssp_command if self.fortran_command: cabs.CabsRun.FORTRAN_COMMAND = self.fortran_command self.file_TRAF = self.file_SEQ = None if self.load_cabs_files: try: self.load_cabs_results() self.file_TRAF = os.path.join(self.work_dir, "TRAF") self.file_SEQ = os.path.join(self.work_dir, "SEQ") except (ValueError, TypeError, IOError) as e: logger.exit_program( module_name=_name, msg= "Could not load CABS files from %s. An error occurred: %s" % (self.load_cabs_files, e), exc=e) # self.peptide + self.add_peptide -> self.ligand self.peptides = [] if self.peptide: self.peptides.extend([[p, 'random', 'random'] for p in self.peptide]) if self.add_peptide: self.peptides.extend([p for p in self.add_peptide if p]) # Pdb output processing if 'A' in self.pdb_output: self.pdb_output = 'RFCM' elif 'N' in self.pdb_output: self.pdb_output = '' if self.contact_map_colors: self.colors = self.contact_map_colors else: self.colors = DEFAULT_COLORS # Flag to check if dynamic weights should be used self.gauss = self.weighted_fit == 'gauss'
def ca2all(filename, output=None, iterations=1, work_dir='.', out_mdl=os.path.join(os.getcwd(), 'output_data', 'modeller_output_0.txt')): """ Rebuilds ca to all-atom """ old_stdout = sys.stdout if logger.log_files(): sys.stdout = open(out_mdl, 'w') else: sys.stdout = open('/dev/null', 'w') pdb = mkstemp(prefix='.', suffix='.pdb', dir=work_dir, text=True)[1] prefix = basename(pdb).rsplit('.', 1)[0] aa_names = { 'A': 'ALA', 'C': 'CYS', 'D': 'ASP', 'E': 'GLU', 'F': 'PHE', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE', 'K': 'LYS', 'L': 'LEU', 'M': 'MET', 'N': 'ASN', 'P': 'PRO', 'Q': 'GLN', 'R': 'ARG', 'S': 'SER', 'T': 'THR', 'V': 'VAL', 'W': 'TRP', 'Y': 'TYR' } aa_names = {v: k for k, v in aa_names.items()} atoms = [] pattern = re.compile('ATOM.{9}CA .([A-Z]{3}) ([A-Z ])(.{5}).{27}(.{12}).*') try: with closing(filename) as f, open(pdb, 'w') as tmp: for line in f: if line.startswith('ENDMDL'): break else: match = re.match(pattern, line) if match: atoms.append(match.groups()) tmp.write(line) if not len(atoms): raise Exception('File %s contains no CA atoms' % filename) chains = [atoms[0][1]] seq = '' for a in atoms: s, c = a[:2] if c not in chains: chains += c seq += '/' seq += aa_names[s] pir = prefix + '.pir' with open(pir, 'w') as f: f.write(_PIR_TEMPLATE % (prefix, seq, pdb)) env = environ() env.io.atom_files_directory = ['.'] class MyModel(automodel): def special_patches(self, aln): self.rename_segments(segment_ids=chains) mdl = MyModel(env, alnfile=pir, knowns='model_ca', sequence=prefix, assess_methods=assess.DOPE) mdl.md_level = refine.slow mdl.auto_align(matrix_file=prefix + '.mat') mdl.starting_model = 1 mdl.ending_model = int(iterations) mdl.final_malign3d = True mdl.make() models = [m for m in mdl.outputs if m['failure'] is None] cmp_key = 'DOPE score' models.sort(lambda x, y: cmp(x[cmp_key], y[cmp_key])) final = models[0]['name'].rsplit('.', 1)[0] + '_fit.pdb' sys.stdout.close() sys.stdout = old_stdout if output: outfile = open(output, 'w') else: outfile = sys.stdout with open(final) as f: a = iter(atoms) current = ch = r = t = nl = None for line in f: if line.startswith('ATOM'): res = line[21:27] if not current or current != res: current = res ch, r, t = a.next()[1:] nl = line[:21] + ch + r + line[27:54] + t if len(line) > 66: nl += line[66:] outfile.write(nl) elif line.startswith('TER '): outfile.write(line[:22] + nl[22:27] + '\n') else: outfile.write(line) finally: junk = glob.glob(prefix + '*') try: map(os.remove, junk) except OSError as e: logger.warning(_name, e.message)
from tempfile import mkstemp from os.path import basename from contextlib import closing from CABS import logger _name = 'MODELLER' _PIR_TEMPLATE = '\n'.join([ '>P1;%s', 'sequence:::::::::', '%s', '*', '', '>P1;model_ca', 'structure:%s:FIRST:@:END:@::::', '*' ]) try: from modeller import * from modeller.automodel import * except ImportError: logger.warning(_name, 'MODELLER NOT FOUND') def ca2all(filename, output=None, iterations=1, work_dir='.', out_mdl=os.path.join(os.getcwd(), 'output_data', 'modeller_output_0.txt')): """ Rebuilds ca to all-atom """ old_stdout = sys.stdout if logger.log_files(): sys.stdout = open(out_mdl, 'w')