def __init__(self, cmd=None, dominfo=None, save_dir=None, sequence=None, range=None, max_iterations=None, selected_iterations=None, max_rerun=3, stdout=PIPE, stderr=PIPE, timeout=0): if cmd == None: cmd = Settings.get("buildali") self.cmd = cmd #getting information out of dominfo if dominfo: self.save_dir = dominfo['domain_path'] self.sequence = dominfo['profile_sequence_file'] self.range = dominfo['profile_sequence_range'] elif save_dir and sequence: self.save_dir = save_dir if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) self.sequence = sequence else: raise ProfileBuildingError("DomainInformation should be given.") if not os.path.exists(self.sequence): raise ProfileBuildingError("Sequence file %s cannot be found!" % self.sequence) #getting profile iteration information from setitng if max_iterations == None: max_iterations = int(Settings.get('max_iterations')) self.max_iterations = max_iterations if selected_iterations == None: selected_iterations = [ int(i) for i in Settings.get('selected_iterations').split() ] self.selected_iterations = selected_iterations if not self.save_dir: raise ProfileBuildingError("Saving directory should be set.") self.range = range if self.range: self.prepare_fasta(range=self.range) self.max_rerun = max_rerun #getting command line arguments and setting timedrunner self.command_line = self.get_command_line() #Runner.TimedRunner.__init__( self, self.command_line, **kwargs ) self.timeout = timeout self.stdout = stdout self.stderr = stderr
def __init__(self, alignment_method='', search_db='', search_db_size=None, **kwargs): from evdblib.DBTools import Settings self.alignment_method = alignment_method self.search_db = search_db self.search_db_size = search_db_size if self.search_db_size == None: raise ValueError("The search DB size is required!") if alignment_method == 'DaliLite': self.data_file_extention = Settings.get("dali_data_suffix") elif alignment_method == 'FAST' or alignment_method == 'TMalign': self.data_file_extention = Settings.get( "processed_ca_structure_suffix") else: if alignment_method in Settings.get( "structure_comparison_methods").split(): raise ValueError("The method is not implemented!", alignment_method) else: raise ValueError("Unknown structure comparison method!", alignment_method) self.alignment_extention = Settings.get("alignment_suffix") JobScriptBuilder.__init__(self, **kwargs)
def __init__(self, alignment_method='', search_db='', search_db_size=None, iteration='', **kwargs): from evdblib.DBTools import Settings self.alignment_method = alignment_method self.search_db = search_db self.search_db_size = search_db_size self.iteration = int(iteration) if self.search_db_size == None: raise ValueError("The search DB size is required!") if alignment_method == 'COMPASS': self.profile_extention = Settings.get("compass_suffix") elif alignment_method == 'HHsearch': self.profile_extention = Settings.get("hhm_suffix") else: if alignment_method in Settings.get("profile_comparison_methods"): raise ValueError("The method is not implemented!") else: raise ValueError("Unknown Profile comparison method!") self.alignment_extention = Settings.get("alignment_suffix") JobScriptBuilder.__init__(self, **kwargs)
def build_file(self, dominfo): filename = os.path.join(self.job_dir, 'prfb%s.job' % dominfo['uniqueid']) if not self.cmd: self.cmd = Settings.get('profile_builder') command = self.cmd if dominfo.get('profile_sequence_range'): command += ' -r ' + str(dominfo['profile_sequence_range']) if Settings.get('profile_type'): command += ' -m %s ' % Settings.get('profile_type') if Settings.get('blast_db'): command += ' -d %s ' % Settings.get('blast_db') command += ' %(profile_sequence_file)s %(domain_path)s' % dominfo if os.path.exists(filename): raise JobScriptWriteError("Profile job script %s already exists!" % filename) fp = open(filename, 'w') print(command, file=fp) fp.close() dominfo['progress'] = 2 return filename
def __init__(self, cmd=None, input_file=None, output_file=None, calibration_db=None, calibrate=True, calibration_cmd=None): if cmd == None: cmd = Settings.get("hhmake") if input_file == None: raise TypeError("Input_file should be given.") if calibration_cmd == None: calibration_cmd = Settings.get("hhsearch_cmd") if calibration_db == None: calibration_db = Settings.get("hhm_cal_db") if output_file == None: dir, base_filename, iteration, ext = parse_profile_filename( input_file) output_file = build_profile_filename(dir, base_filename, iteration, '.hhm') self.cmd = cmd self.calibration_cmd = calibration_cmd self.calibration_db = calibration_db self.calibrate = calibrate self.input_file = input_file self.output_file = output_file #the following part is added due to hhsearch bug #of cannot handle long input file name handling! self.tmpinput = tempfile.NamedTemporaryFile() self.tmpinputname = self.tmpinput.name try: shutil.copy(self.input_file, self.tmpinputname) except IOError: self.tmpinputname = self.input_file self.tmpoutput = tempfile.NamedTemporaryFile() self.tmpoutputname = self.tmpoutput.name #need to be copied after the execution! self.command_lines = self.get_command_lines()
def __init__(self, cmd=None, inputfile=None, outputfile=None, dbfile=None, cpu=1, db_size=None): if cmd == None: cmd = Settings.get('hhsearch_cmd') if inputfile == None: raise AlingerError("No input file is given.") if outputfile == None: self.outputfile_fp = tempfile.NamedTemporaryFile() outputfile = self.outputfile_fp.name if dbfile == None: raise AlignerError("No DB file is given.") self.db_size = db_size DBAligner.__init__(self, cmd=cmd, inputfile=inputfile, outputfile=outputfile, dbfile=dbfile) #setting number of CPU's can be used. self.cpu = cpu
def build_file(self, dominfo): from evdblib.DBTools import Settings filename = os.path.join(self.job_dir, 'strs%s.job' % dominfo['uniqueid']) if not self.cmd: self.cmd = Settings.get('structure_searcher') command = self.cmd domain_path = dominfo['domain_path'] search_method = self.alignment_method search_queryid = dominfo['uniqueid'] search_query = build_sequence_filename(domain_path, dominfo['uniqueid'], self.data_file_extention) search_db = self.search_db search_db_size = self.search_db_size search_output = build_sequence_filename(domain_path, dominfo['uniqueid'], self.alignment_extention) command += ' -q %(search_queryid)s -m %(search_method)s -u -s %(search_db_size)s -d %(search_db)s %(search_query)s %(search_output)s' % locals( ) if os.path.exists(filename): raise JobScriptWriteError( "Structure search job script %s already exists!" % filename) fp = open(filename, 'w') print(command, file=fp) fp.close() return filename
def get_command_line(self): #need to check self.cmd here to avoid writing #another version of __init__ #probably not a good practice. if self.cmd == None: self.cmd = Settings.get("tmalign") return [self.cmd, self.inputfile2, self.inputfile1]
def __init__( self, cmd=None, save_dir=None, sequence=None, range=None, max_iterations=1, save_all_iteration_results=False, msa_input_fn=None, max_rerun=3, stdout=PIPE, stderr=PIPE, timeout=0, number_of_processors=1 ) : ''' sequence: fasta file for the query sequence range: ''' self.temp_dir = None #initialized at the first if save_dir == None : self.save_dir = tempfile.mkdtemp() self.temp_dir = self.save_dir else : self.save_dir = save_dir if cmd == None : cmd = Settings.get( "buildali" ) self.cmd = cmd self.number_of_processors = number_of_processors self.sequence = sequence self.msa_input_fn = msa_input_fn if msa_input_fn : self.sequence = msa_input_fn self.range = range print(self.sequence, self.msa_input_fn) if self.sequence or self.msa_input_fn : pass else : raise BuildAliRunnerError( "Query sequence or MSA is necessary to build profile." ) if (self.sequence and os.path.exists( self.sequence )) or (self.msa_input_fn and os.path.exists(self.msa_input_fn) ): pass else : raise ProfileBuildingError( "Query sequence file %s cannot be found!"%self.sequence ) self.max_rerun = max_rerun self.save_all_iteration_results = save_all_iteration_results #buildali options #getting profile iteration information from setitng self.max_iterations = max_iterations self.msa_input_fn = msa_input_fn #getting command line arguments and setting timedrunner self.command_line = self.get_command_line() #Runner.TimedRunner.__init__( self, self.command_line, **kwargs ) self.timeout = timeout self.stdout = stdout self.stderr = stderr
def _check_profile_alignment(dominfo, filtered_ids): if dominfo.get('profile_alignment_integrity'): return dominfo['profile_alignment_integrity'] domid = dominfo['uniqueid'] ext = Settings.get("alignment_suffix") aln_fn = build_sequence_filename(dominfo['domain_path'], domid, ext) if os.path.exists(aln_fn): alignments = PairwiseAlignmentRecords() alignments.parse(aln_fn) for alignment_method in Settings.get( "profile_comparison_methods").split(): method_name = alignment_method.lower() + "_1" if not alignments.count(domid, filtered_ids, method_name) == len(filtered_ids): return 0 else: return 1 return 0
def __init__(self, cmd=None, input_file=None, output_file=None): if cmd == None: cmd = Settings.get("mk_compass_db") if input_file == None: raise TypeError("Input_file should be given.") self.input_file = input_file self.output_file = output_file #self.input_type = input_type #self.output_type = output_type self.cmd = cmd self.command_line = self.get_command_line()
def submit(self): ''' Submit a job script to the queue and get the SGE Queue Job ID. ''' command_line = [self.submit_cmd] if self.name: command_line.append('-N') command_line.append(self.name) if Settings.get('sge_job_queue'): command_line.append('-q') command_line.append(Settings.get('sge_job_queue')) command_line.append("-cwd") #run in the current directory command_line.append(self.script_file) #sorting out submit directory issue #if the submission directory is not the same directory #of the script. if self.use_script_dir: script_dir = os.path.dirname(self.script_file) else: script_dir = None output = Popen(command_line, stdout=PIPE, cwd=script_dir).communicate()[0] #saving previous job ids. if self.job_id: self.old_job_ids.append(self.job_id) try: self.job_id = output.split()[2] except IndexError: raise SunGridEngineError("Submission %s failed!" % self.script_file)
def __init__(self, cmd=None, inputfile=None, outputfile=None, dbfile=None): if cmd == None: cmd = Settings.get('compass_cmd') if inputfile == None: raise AlingerError("No input file is given.") if outputfile == None: self.outputfile_fp = tempfile.NamedTemporaryFile() outputfile = self.outputfile_fp.name if dbfile == None: raise AlignerError("No DB file is given.") DBAligner.__init__(self, cmd=cmd, inputfile=inputfile, outputfile=outputfile, dbfile=dbfile)
def __init__(self, cmd=None, input_file=None, output_file=None, input_type=None, output_type=None, remove_query_gap=True): if cmd == None: cmd = Settings.get("reformat") self.input_file = input_file self.output_file = output_file self.input_type = input_type self.output_type = output_type self.cmd = cmd self.command_line = self.get_command_line() self.remove_query_gap = remove_query_gap
def build_file(self, dominfo): from evdblib.DBTools import Settings filename = os.path.join(self.job_dir, 'prfs%s.job' % dominfo['uniqueid']) if not self.cmd: self.cmd = Settings.get('profile_searcher') command = self.cmd domain_path = dominfo['domain_path'] profile_search_method = self.alignment_method profile_search_queryid = dominfo['uniqueid'] iteration = self.iteration query_iteration = min(self.iteration, check_profile_integrity(dominfo)) profile_search_query = build_profile_filename( domain_path, dominfo['uniqueid'] + '.prof', query_iteration, self.profile_extention) profile_search_db = self.search_db profile_search_db_size = self.search_db_size profile_search_output = build_sequence_filename( domain_path, dominfo['uniqueid'], self.alignment_extention) command += ' -q %(profile_search_queryid)s -j %(iteration)s -m %(profile_search_method)s -u -s %(profile_search_db_size)s -d %(profile_search_db)s %(profile_search_query)s %(profile_search_output)s' % locals( ) if os.path.exists(filename): raise JobScriptWriteError( "Profile search job script %s already exists!" % filename) fp = open(filename, 'w') print(command, file=fp) fp.close() return filename
def _prepare_compass_search_db(db_filename, domain_informations, iteration, use_between, selected_iterations): ''' prepare compass DB. ''' db_fp = open(db_filename, 'w') ext = Settings.get('compass_suffix') db_size_fp = open(db_filename + ".len", 'w') #need to be built compass_db_size = 0 previous_iteration = selected_iterations[max( selected_iterations.index(iteration) - 1, 0)] number_of_records = 0 for dominfo in domain_informations: #read domain path domain_path = dominfo['domain_path'] if not domain_path: if verbose: print("WARNING: Dominfo does not have domain_path...") print(dominfo) continue domid = dominfo['uniqueid'] compass_file = build_profile_filename(domain_path, domid + '.prof', iteration, ext) if not os.path.exists(compass_file) and use_between: #in case the hhsearch file of the iteration #does not exists #and the use_between flag is On... #find the last iteration last_available_iteration = check_profile_integrity(dominfo) if not last_available_iteration: print("WARNING: Profile is bad!", domain_path, domid) continue if last_available_iteration > previous_iteration: compass_file = build_profile_filename( domain_path, domid + '.prof', last_available_iteration, ext) else: if verbose: print("No between iteration available!", iteration, last_available_iteration) continue elif not os.path.exists(compass_file) and not use_between: #when hhsearch file is not availble and use between flag is off. last_available_iteration = check_profile_integrity(dominfo) compass_file = build_profile_filename(domain_path, domid + '.prof', last_available_iteration, ext) #final check! if not os.path.exists(compass_file): #error! print("WARNING: COMPASS file should be available but not found!", compass_file, file=sys.stderr) raise SearchDatabasePreparationError( "COMPASS numerical profile file is not availble!", compass_file) continue fp = open(compass_file) content = fp.read() fp.close() db_fp.write(content) number_of_records += 1 compass_size_file = compass_file + ".len" try: fp = open(compass_size_file) compass_db_size += int(fp.read().strip()) fp.close() except: print("WARNING: Cannot read compass profile size file.", compass_size_file) db_fp.close() print(compass_db_size, file=db_size_fp) db_size_fp.close() return number_of_records
def _prepare_hhsearch_search_db(db_filename, domain_informations, iteration, use_between, selected_iterations): ''' prepare HHsearch DB. and returns the number of records saved in the database file. ''' db_fp = open(db_filename, 'w') ext = Settings.get('hhm_suffix') #getting previous iteration #for selecting iteration bigger than before. previous_iteration = selected_iterations[max( selected_iterations.index(iteration) - 1, 0)] number_of_records = 0 for dominfo in domain_informations: #read domain path if 'domain_path' in dominfo: domain_path = dominfo['domain_path'] else: raise ValueError('domain_path does not exists', dominfo) if not domain_path: if verbose: print("WARNING: Dominfo does not have domain_path...") print(dominfo) continue domid = dominfo['uniqueid'] hhsearch_file = build_profile_filename(domain_path, domid + '.prof', iteration, ext) if not os.path.exists(hhsearch_file) and use_between: #in case the hhsearch file of the iteration #does not exists #and the use_between flag is On... #means the profile is generation is good! #and the value is max iteratoin!! last_available_iteration = check_profile_integrity(dominfo) if not last_available_iteration: print("WARNING: Profile is bad!", domain_path, domid) continue if last_available_iteration > previous_iteration: hhsearch_file = build_profile_filename( domain_path, domid + '.prof', last_available_iteration, ext) else: if verbose: print("WARNING: No between iteration available!", iteration, last_available_iteration) continue elif not os.path.exists(hhsearch_file) and not use_between: #when hhsearch file is not availble and use between flag is off. last_available_iteration = check_profile_integrity(dominfo) hhsearch_file = build_profile_filename(domain_path, domid + '.prof', last_available_iteration, ext) #final check! if not os.path.exists(hhsearch_file): #error! print( "Error: HHsearch HMM file should be available but not found!", hhsearch_file, file=sys.stderr) raise SearchDatabasePreparationError( "HHsearch HHM file is not availble!", hhsearch_file) fp = open(hhsearch_file) content = fp.read() fp.close() db_fp.write(content) number_of_records += 1 db_fp.close() return number_of_records
def prepare_structure_search_database(method, domain_informations=None, prefix='', dir=None, strict=False, compute_node_dir=None): ''' Prepares local condensed search database directory for convienent and fast search. Methods should be one of the search method defined in database configuration. Currently "DaliLite", "FAST", "TMalign" can be used as a method keyword. Optional domain_informations list can be given for building local db for subset of the current content in the database ''' ############################## #preparation for options ############################## if domain_informations == None: domain_informations = DomainInformation.get_all_records() else: pass if strict and domain_informations == None: raise TypeError("Domain information fetch failed.") #specific directory to have all data files into one directory. if dir == None: dir = Settings.get("local_db_space") local_db_root = os.path.join(dir, prefix) if not os.path.exists(local_db_root): os.makedirs(local_db_root) #unlike to profile search case, #the main db path is directory name #not the filename. #since all of the methods are essentially pairwise db_dir = os.path.join(local_db_root, '.'.join([prefix, method])) if os.path.exists(db_dir): shutil.rmtree(db_dir) if not os.path.exists(db_dir): os.makedirs(db_dir) ############################## #actual db building code ############################## if method == 'DaliLite': number_of_records = _prepare_dalilite_search_db( db_dir, domain_informations) elif method == 'FAST': number_of_records = _prepare_fast_search_db(db_dir, domain_informations) elif method == 'TMalign': number_of_records = _prepare_tmalign_search_db(db_dir, domain_informations) else: raise TypeError( "Structure search database method should be DaliLite, FAST or TMalign.", method) ################################ #copying LOCAL_SCRATCH if it is set in the db.config. if compute_node_dir == None: compute_node_db_dir = Settings.get("compute_node_db_space") if compute_node_db_dir: compute_node_db_root = os.path.join(compute_node_db_dir, prefix) db_dir = _prepare_compute_node_db(db_dir, compute_node_db_root) return db_dir, number_of_records
def prepare_input_sequence_for_profile_building(dominfo): ''' build a sequence files for profile and save the information into dominfo. New items "profile_sequence_file" and "profile_sequence_range" will be added into dominfo dictionary. Note that the profile sequence file and range will be same as input file if the db type is sequence. For structure, profile sequence file will be biologically relavent region defined by DBREF in PDB. ''' data_type = Settings.get("data_type") reference_sequence_suffix = Settings.get("reference_sequence_suffix") profile_sequence_suffix = Settings.get("profile_sequence_suffix") profile_sequence_file = os.path.join( dominfo['domain_path'], dominfo['uniqueid']) + profile_sequence_suffix #reference_sequence_file = os.path.join( dominfo['domain_path'], dominfo['uniqueid'] ) + profile_sequence_suffix if data_type == 'sequence': if dominfo['original_input_path']: try: shutil.copyfile(dominfo['original_input_path'], profile_sequence_file) dominfo['profile_sequence_file'] = profile_sequence_file dominfo['profile_sequence_range'] = dominfo['range'] except IOError: dominfo['profile_sequence_file'] = '' dominfo['profile_sequence_range'] = '' print("WARNING: Profile sequence file cannot be written.", profile_sequence_file, file=sys.stderr) else: dominfo[ 'profile_sequence_file'] = '' #dominfo[ 'original_input_path' ] dominfo['profile_sequence_range'] = '' #dominfo[ 'range' ] elif data_type == 'structure' and dominfo[ 'original_input_path'] and dominfo['domain_path']: pdb = PDB.parse(dominfo['original_input_path']) pdbrange = PDBRange() pdbrange.parse(dominfo['range']) chainrange = PDBRange() chainrange.parse(','.join( [cid + ':' for cid in pdbrange.get_unique_chain_ids()])) #Full SEQRES sequence profile_sequence = pdb.extract_sequence(chainrange, biological=False, standard_residue_name=False, atomrecord=True, backbone=False)[0] #convert the pdbrange into the sequence range matching the given #set of the residue indications. profile_sequence_range = pdb.pdbrange2sequencerange( pdbrange, biological=False, standard_residue_name=True, atomrecord=True, backbone=False) ########################### #print "*"*10, "dominfo" #print dominfo #print profile_sequence_range #for i,contig in enumerate(profile_sequence_range) : #print 'contig:%d'%i, contig.get_start(), contig.get_end() header = '>%s' % (dominfo['uniqueid']) try: if os.path.exists(profile_sequence_file): raise IOError fp = open(profile_sequence_file, 'w') print(header, file=fp) print(profile_sequence, file=fp) fp.close() except IOError: dominfo['profile_sequence_file'] = '' dominfo['profile_sequence_range'] = '' print("WARNING: Profile sequence file cannot be written.", profile_sequence_file, file=sys.stderr) else: dominfo['profile_sequence_file'] = profile_sequence_file dominfo['profile_sequence_range'] = str(profile_sequence_range) else: print("Error!", file=sys.stderr) print(dominfo, file=sys.stderr) raise Exception("Cannot build profile sequence file.")
def prepare_profile_search_database(method, domain_informations=None, iteration=None, prefix='', dir=None, use_between=None, selected_iterations=None, strict=False): ''' Prepares local condensed search database file for convienent and fast search. Methods should be one of the search method defined in database configuration. Currently "COMPASS", "HHsearch" can be used as a method keyword. Optional domain_informations list can be given for building local db for subset of the current content in the database This function returns a filename of a composite local db filename and the number of records in the database. The number of records in the db is helpful especially for running HHsearch to print out all the available alignments. Note that the use_between option True makes the local db building procedure use in between selected iterations if the specified "iteraion" does not exist. e.g. for iteration=3, selected_iterations = [1,3,5,8] the domain converged at iteration 2. Then the iteration 2 will be used if the "use_between" option is true. By Default, the following options will use values specified in config file; use_between selected_iterations domain_informations . ''' ############################## #preparation for options ############################## if domain_informations == None: domain_informations = DomainInformation.get_all_records() if strict and domain_informations == None: raise TypeError("Domain information fetch failed.") if iteration == None: raise TypeError("Integer value of iteration should be given.") try: iteration = int(iteration) except ValueError: raise ValueError( "Iteration should be integer or should be convetable to an integer value." ) if dir == None: dir = Settings.get("local_db_space") local_db_root = os.path.join(dir, prefix) if not os.path.exists(local_db_root): os.makedirs(local_db_root) if use_between == None: use_between_string = Settings.get("use_between_selected_iterations") #print >>sys.stderr, 'use_between_string', use_between_string if use_between_string in ['true', 'True']: use_between = True elif use_between_string in ['false', 'False']: use_between = False else: raise ValueError( "database configuration of use_between_selected_iterations is wrong: %s" % use_between_string) if selected_iterations == None: selected_iterations = [ int(i) for i in Settings.get("selected_iterations").split() ] db_filename = os.path.join(local_db_root, '.'.join([prefix, method, str(iteration)])) ############################## #actual db building code ############################## if method == 'HHsearch': number_of_records = _prepare_hhsearch_search_db( db_filename, domain_informations, iteration, use_between, selected_iterations) elif method == 'COMPASS': number_of_records = _prepare_compass_search_db(db_filename, domain_informations, iteration, use_between, selected_iterations) else: raise TypeError( "Profile search database method should be HHsearch or COMPASS.") return db_filename, number_of_records
def __init__(self, cmd=None, inputfile1=None, inputfile2=None, identifier1=None, identifier2=None, fakeid1='1domA', fakeid2='2domA', outputfile=None, parser=None, verbose=False): ''' Alinger class for DaliLite pairwise program. Currently Parser does not work! If verbose is True, output from DaliLite will be printed out! Use symlink True when the DAT files are on fast access directories. For remote files, turn off symlink. This will make the local copy of .dat files for DaliLite. ''' if cmd == None: cmd = Settings.get('dalilite') self.temp_dir = tempfile.mkdtemp() if inputfile1 == None: raise AlignerError("No input file 1 is given.") if inputfile2 == None: raise AlignerError("No input file 2 is given.") if outputfile == None: self.outputfile = os.path.join( self.temp_dir, os.path.basename(inputfile1).replace('.dat', '.dccp')) if identifier1 == None: identifier1 = parse_sequence_filename(inputfile1)[1] if identifier2 == None: identifier2 = parse_sequence_filename(inputfile2)[1] if os.path.exists(self.outputfile): raise AlignerError(outputfile, "Outputfile already exists!") self.identifier1 = identifier1 self.identifier2 = identifier2 self.fakeid1 = fakeid1 self.fakeid2 = fakeid2 #preparing self.dat_dir = os.path.join(self.temp_dir, 'DAT') os.mkdir(self.dat_dir) self.dat_file1 = os.path.join(self.dat_dir, self.fakeid1 + '.dat') self.dat_file2 = os.path.join(self.dat_dir, self.fakeid2 + '.dat') dat1 = DaliLiteDAT(inputfile1) dat1.convert_identifier(output=self.dat_file1, output_identifier=self.fakeid1) dat2 = DaliLiteDAT(inputfile2) dat2.convert_identifier(output=self.dat_file2, output_identifier=self.fakeid2) self.temp_output = os.path.join(self.temp_dir, self.fakeid1 + ".dccp") PairAligner.__init__(self, cmd=cmd, cwd=self.temp_dir, inputfile1=inputfile1, inputfile2=inputfile2, identifier1=identifier1, identifier2=identifier2, outputfile=self.temp_output, parser=None, verbose=verbose)
def __init__(self, pdbfn, cmd=None, save_dir=None, output_fn=None, identifier=None, echo=True): ''' Run DaliLite command and generate DAT file for large scale DaliLite structure comparisons. identifier is quite similar to PDBID (with chainID followed) except that DaliLite differentiate lower vs upper characters and the first character does not have to be an numeric character. Note that the settings in this DAT generator is somewhat different from the original settings in DaliLite program. In DaliLite the filename of DAT file should be (I guess) matching with the identifier and chainID. echo is a flag that controls output from the program for logging purposes and the actual command run to generate the DAT file. WARNING: This class depends on a slightly modified version of DaliLite that saves DAT files into the DAT directory in the current working directory. ''' self.cmd = cmd self.pdbfn = pdbfn self.save_dir = save_dir self.output_fn = output_fn self.identifier = identifier self.echo = echo self.temp_dir = tempfile.mkdtemp() if self.cmd == None: self.cmd = Settings.get("dalilite") if self.save_dir: if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) else: self.save_dir = os.getcwd() #convert the data into absolute_dir. if self.pdbfn: shutil.copy(self.pdbfn, self.temp_dir) self.pdbfn = os.path.join(self.temp_dir, os.path.basename(self.pdbfn)) if not self.output_fn: dir, basename, ext = parse_sequence_filename( os.path.basename(self.pdbfn)) self.output_fn = basename + '.dat' if os.path.exists(os.path.join(self.save_dir, self.output_fn)): raise IOError("File already exists.", self.output_fn) #read settings file to get the default idenfier #this is important for internal consistency if self.identifier == None: self.identifier = Settings.get("default_dali_id") #original DAT file that will be produced by the DaliLite program! self.dat_fn = os.path.join(self.temp_dir, "DAT", self.identifier + ".dat") self.dssp_fn = os.path.join(self.temp_dir, self.identifier[:4] + ".dssp")
def __init__(self, inputpdb=None, read_cmd=None, buildbackbone_cmd=None, dglp_list=None, save_dir=None, output_fn=None, echo=True): ''' Run MaxSprout command buildbackbone to generate a new PDB file filling backbone atoms for CA only residues. This class does not full model building but only builds backbones. Full sidechain optimization has some problems. echo is a flag that controls output from the program for logging purposes and the actual command run to generate the full backbone PDB file. Note that inputpdb is assumed to be a single chain PDB file. If the input PDB sequence and output PDB sequence does not match, MaxSproutRunnerSequenceChangeError will be raised! Note that the finally backbone built model is actually not a complete PDB file format. It has missing records like occupancies and B-factors. ''' self.read_cmd = read_cmd self.buildbackbone_cmd = buildbackbone_cmd self.dglp_list = dglp_list #Necessary input param for buildbackbone cmd. self.pdbfn = inputpdb self.save_dir = save_dir self.output_fn = output_fn self.echo = echo self.temp_dir = tempfile.mkdtemp() if self.read_cmd == None: self.read_cmd = Settings.get("maxsprout_readbrk") if self.buildbackbone_cmd == None: self.buildbackbone_cmd = Settings.get("maxsprout_buildbackbone") if self.dglp_list == None: self.dglp_list = Settings.get("maxsprout_dglp_list") if not (self.read_cmd and self.buildbackbone_cmd and self.dglp_list): for k, v in Settings.settings.items(): print(k, ":", v) raise MaxSproutRunnerError( "Commands or dglp.list info was not retrieved!", self.read_cmd, self.buildbackbone_cmd, self.dglp_list) if self.save_dir: if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) else: self.save_dir = os.getcwd() #convert the data into absolute_dir. if self.pdbfn: shutil.copy(self.pdbfn, self.temp_dir) self.pdbfn = os.path.join(self.temp_dir, os.path.basename(self.pdbfn)) if not self.output_fn: dir, basename, ext = parse_sequence_filename( os.path.basename(self.pdbfn)) self.output_fn = basename + '.maxsprout' if os.path.exists(os.path.join(self.save_dir, self.output_fn)): raise IOError("Maxsprouted file already exists.", self.output_fn)
def __init__( self, msa=None, query=None, #main input number_of_processors=None, database=None, #BLAST Setting if MSA should be built echo=True, #echoing ouptut option #commands formatdb=None, makemat=None, psipred=None, psipass2=None, #data directory containing weight file for psipred psipred_data_dir=None): ''' Predict secondary structure using PSIPRED. if multiple sequence alignment or msa (MSA object) is given, the msa is used for prediction. if query is given, blastpgp will be used for building multiple sequence alignment and predict the secondary structure. Mode 1 of this class is basically copied from runpsipred script. ''' if formatdb == None: formatdb = Settings.get('formatdb') if makemat == None: makemat = Settings.get('makemat') if psipred == None: psipred = Settings.get('psipred') if psipass2 == None: psipass2 = Settings.get('psipass2') if psipred_data_dir == None: psipred_data_dir = Settings.get('psipred_data_dir') print("PSIPRED is running...") self.temp_dir = None #tempfile.tempname() self.number_of_processors = number_of_processors #mode 2 stuff. self.database = database self.query = query #Mode 1. use the given MSA to predict 2nd Structures if msa != None: input_string = str(msa.query) self.temp_dir = tempfile.mkdtemp() alignment_input = os.path.join(self.temp_dir, "query.aln") fp = open(alignment_input, 'w') msa.build_psiblast_alignment_input(fp) fp.close() dummy_db = os.path.join(self.temp_dir, 'query.seq') msa.query.save(dummy_db) os.system(formatdb + ' -i ' + dummy_db) checkpoint = os.path.join(self.temp_dir, 'query.chk') output = '/dev/null' if verbose: print('temp_dir:', self.temp_dir) print('input_string:', input_string) print('dummy_db', dummy_db) print('checkpoint', checkpoint) print('alignment_input', alignment_input) print(open(alignment_input).read()) runner = PSIBLASTRunner(input_string=input_string, max_iterations=1, output=output, input_alignment=alignment_input, database=dummy_db, number_of_processors=number_of_processors, checkpoint=checkpoint) runner.run() basename = os.path.join(self.temp_dir, 'query') fp = open(basename + ".pn", 'w') print("query.chk", file=fp) fp.close() fp = open(basename + ".sn", 'w') print("query.seq", file=fp) fp.close() if verbose: print("basename:", basename) print(basename + '.pn') print(open(basename + '.pn').read()) print(basename + '.sn') print(open(basename + '.sn').read()) os.system(makemat + " -P " + basename) weight_file = os.path.join(psipred_data_dir, "weights.dat") os.system( psipred + " %(basename)s.mtx %(weight_file)s %(weight_file)s2 %(weight_file)s3 %(weight_file)s4 > %(basename)s.ss" % locals()) os.system( "%(psipass2)s %(psipred_data_dir)s/weights_p2.dat 1 0.98 1.09 %(basename)s.ss2 %(basename)s.ss > %(basename)s.horiz" % locals()) self.output = basename + '.horiz' #important output !!! #Mode 2. build MSA and then predict 2nd structures elif query != None: raise NotYetImplementedError( 'Building Query Mode has not yet been implemented!') else: raise PSIPREDRunnerError('MSA or query FASTA should be given!')
''' This subpackage contains modules to manage information about proteins or domains in the database. ''' #debug = 1 verbose = 1 import os, shelve from evdblib.DBTools import Settings domain_info_db = Settings.get('domain_info_db') classification_info_db = Settings.get('classification_info_db') classification_levels = int(Settings.get('classification_levels')) ######################################### #Using local database ######################################### if Settings.get('use_local_db'): cwd = os.getcwd() domain_info_db = os.path.join(cwd, os.path.basename(domain_info_db)) classification_info_db = os.path.join( cwd, os.path.basename(classification_info_db)) ##################################k #temporary blocking!!! ##################################k #class DomInfoDB : #def __init__( self, domain_info_db ) : # self.dominf = shelve.open( domain_info_db ) ''' def __del__( self ) :
def generate_intermediate_dbs(domain_informations): #Settings.get( 'intermediate_result_dir' ) intermediate_domain_info_db = Settings.get('intermediate_domain_info_db') intermediate_classification_info_db = Settings.get( 'intermediate_classification_info_db')
This module contains wrappers for the blastpgp program. BLASTRunner class will run protein blast or psiblast. ''' import os, sys, time, tempfile, shutil, copy from subprocess import Popen, PIPE from io import StringIO from evdblib.Utils.Parsers import FASTA, BLAST from evdblib.Utils import parse_sequence_filename, find_command_in_path, build_profile_filename from . import Runner formatdb = 'formatdb' if find_command_in_path(formatdb): from evdblib.DBTools import Settings formatdb = Settings.get('formatdb') verbose = 0 default_max_iterations_for_neighbors = 1 class BLASTRunner: ''' BLASTRunner class will run blastpgp program. ''' def __init__(self, input=None, output=None, database=None, evalue_cutoff=None, input_string=None,