Ejemplo n.º 1
0
    def __init__(self,
                 stepName,
                 isHomoComplex,
                 savedModelsPath=None,
                 averageLRscores=False):
        '''

      :param stepName: str. Must startswith seq_train or struct or mixed (seq_train, mixed_2, structX, seq_train1... are also valid)
      :param isHomoComplex: boolean. Is the target complex h**o or hetero
      :param savedModelsPath: str. A path to the directory where models have been saved. If None,
                                   it will used the path indicated in Config
      :param averageLRscores: True if Ligand and receptor are the same protein and thus, binding site prediction should be averaged
    '''
        Configuration.__init__(self)

        self.isHomoComplex = isHomoComplex
        self.stepName = stepName
        self.averageLRscores = averageLRscores
        if not savedModelsPath is None:
            self.savedModelsPath = savedModelsPath

        self.model = None
        print(stepName)
        self.savedModelsPath = os.path.join(
            self.savedModelsPath, "h**o" if self.isHomoComplex else "hetero")
        for fname in os.listdir(self.savedModelsPath):
            if fname.endswith(stepName):
                print("Loading model %s %s" %
                      ("h**o" if isHomoComplex else "hetero", fname))
                self.model = joblib_load(
                    os.path.join(self.savedModelsPath, fname))
        assert not self.model is None, "Error, there is no valid model in %s for step %s" % (
            self.savedModelsPath, self.stepName)
Ejemplo n.º 2
0
    def __init__(self,
                 rFname,
                 lFname,
                 computedFeatsRootDir=None,
                 statusManager=None):
        '''
      @param rFname: str. path to receptor pdb file
      @param lFname: str. path to ligand pdb file
      @param computedFeatsRootDir: str. path where features will be stored
      @param statusManager: class that implements .setStatus(msg) to communicate
    '''

        Configuration.__init__(
            self)  # Load configuration parameters such as path to programs
        self.statusManager = statusManager
        if computedFeatsRootDir != None:
            self.computedFeatsRootDir = computedFeatsRootDir
        self.computedFeatsRootDir = os.path.expanduser(
            self.computedFeatsRootDir
        )  #Creates root path where features will be saved
        myMakeDir(self.computedFeatsRootDir)
        self.rFname = rFname
        self.lFname = lFname

        if not os.path.isdir(self.computedFeatsRootDir):
            os.mkdir(self.computedFeatsRootDir)
Ejemplo n.º 3
0
    def __init__(self,
                 dataRootPath,
                 singleChainfeatsToInclude,
                 pairfeatsToInclude=None,
                 verbose=False):
        '''
      @param dataRootPath: str. A path to computedFeatures directory that contains needed features. Example:
                computedFeatures/
                  common/
                    contactMaps/
                  seqStep/
                    conservation/
                    ...
                  structStep/
                    PSAIA/
                    VORONOI/
                    ...    

      @param singleChainfeatsToInclude: dict. Dictionary that contains the paths where features needed for complex codification 
                                        that describre single chain properties are
                                      located. Must have the following format:
                                      {"featName":(relativePath_from_dataRootPath, listOfColumnNumbers, dictForNamedColums)}
                                      dictForNamedColums= {"myFeatName":colNumber}
      @param pairfeatsToInclude: dict. Dictionary that contains the paths where features needed for complex codification 
                                        that describre single chain properties are
                                      located. Must have the following format:
                                      {"featName":(relativePath_from_dataRootPath, listOfColumnNumbers, dictForNamedColums)}
                                      dictForNamedColums= {"myFeatName":colNumber}
      @param verbose: bool.
    '''
        Configuration.__init__(self)
        self.dataRootPath = dataRootPath
        self.verbose = verbose
        self.singleChainfeatsToInclude = singleChainfeatsToInclude
        self.pairfeatsToInclude = None if pairfeatsToInclude is None else pairfeatsToInclude[:]
Ejemplo n.º 4
0
    def __init__(self, consDbSqlite=None, consDbFilesPath=None):
        Configuration.__init__(self)
        self.isReady = True
        if consDbFilesPath:
            self.consDbFilesPath = consDbFilesPath
        if consDbSqlite:
            self.consDbSqlite = consDbSqlite

        self.unirefType = None
        if not os.path.isfile(
                self.consDbSqlite) or not self.checkIfDbFilesAvailable():
            self.isReady = False
        else:
            self.unirefType = os.path.basename(self.consDbFilesPath)
            assert self.unirefType in ["uniref90", "uniref100"], ("Error, consDbFilesPath %s "+\
                                                    ":must be path/to/data/[uniref90|uniref100]")%(self.consDbFilesPath)

            self.sqliteConn = sqlite3.connect(self.consDbSqlite)
            self.sqliteCursor = self.sqliteConn.cursor()
            try:  #check if sqlite was correctly opened
                self.sqliteCursor.execute(
                    "SELECT seqId FROM sequencesTable where sequence== 0"
                ).fetchone()
            except sqlite3.OperationalError:
                self.isReady = False
Ejemplo n.º 5
0
    def __init__(self,
                 trainDataPath,
                 testPath,
                 outputPath=None,
                 nFolds=None,
                 isLastStep=False,
                 saveModelFname=None,
                 verbose=True,
                 numProc=1):
        '''
      builder
       
       :param trainDataPath: str. Path to a dir where training data files are stored
       :param testPath: str. Path to a dir where testing data files are stored
       :param outputPath: str. Path to a dir where predictions will be stored. If None, results will not be saved
                               and just performance evaluation will be carried out
       :param nFolds: int. Number of folds for k-fold cross-validation. If -1, leave-one-out will be performed.
                           If 0, testing set will be used as if it where independent. Cannot be 1

       :param isLastStep: bool. True if this train is the second step of a two steps workflow or the first one in one step workflow
       :param saveModelFname: str. A path where the final model, trained with all data will be saved. If None,
                                  model won't be saved              
       :param verbose: boolean. Whether or not print to stdout info
       :param numProc: int. Number of processes to use in parallel
    '''
        Configuration.__init__(
            self)  # Load configuration parameters such as path to programs

        parentPath, __ = os.path.split(trainDataPath)
        parentPath, stepName = os.path.split(parentPath)
        parentPath, __ = os.path.split(parentPath)

        self.outputPath = outputPath
        self.saveModelFname = saveModelFname
        self.verbose = verbose
        self.numProc = numProc
        self.nFolds = nFolds
        self.isLastStep = isLastStep
        self.trainPath = trainDataPath

        trainFilesNames = sorted(os.listdir(self.trainPath))
        self.trainPrefixes = sorted(
            set([fname.split(".")[0] for fname in trainFilesNames]))

        self.testPath = testPath
        if not self.testPath is None:
            testFilesNames = sorted(os.listdir(self.testPath))
            self.testPrefixes = sorted(
                set([fname.split(".")[0] for fname in testFilesNames]))
        else:
            self.testPrefixes = []

        self.data, self.prefixesUsedInModel = None, None  # self.loadTrainingData(sharedMemoryPath=None) will be executed latter

        if self.verbose:
            print("%d train complexes loaded." % (len(self.trainPrefixes)))
        self.numTestComplexes = 0 if self.testPrefixes == None else len(
            self.testPrefixes)
Ejemplo n.º 6
0
 def __init__(self, computedFeatsRootDir, statusManager=None):
     '''
   :param computedFeatsRootDir: str. root path where results will be saved
   :param statusManager: class that implements .setStatus(msg) to communicate
 '''
     Configuration.__init__(self)
     self.computedFeatsRootDir = computedFeatsRootDir
     self.filterOutLabels = FILTER_OUT_LABELS
     self.filterOutNoStandard = IGNORE_NO_STANDARD
     self.statusManager = statusManager
Ejemplo n.º 7
0
  def __init__(self, data_path= None):
    '''
      @param data_path: str: Path where AAIndex files are located
    '''
    Configuration.__init__(self)  # Load configuration parameters such as path to programs

    self.protein_proteinIndexes=["KESO980101","KESO980102","MOOG990101"]
    if data_path is None:
      self.data_path= self.AAindexPath
    else:
      self.data_path=data_path
    self.data=self.load()
Ejemplo n.º 8
0
    def __init__(self, seqsManager, outPath, winSize):
        '''
      @param seqsManager: ..manageSeqs.seqsManager.SeqsManager 
      @param outPath: str. root path where psiblast and al2co scores will be saved
      @param winSize: int. The size of sliding window 
    '''
        Configuration.__init__(self)

        self.seqsManager = seqsManager
        self.seqsWorkingDir = self.seqsManager.getSeqsOutDir()
        self.outPath = outPath
        self.winSize = winSize
        self.filterOutLabels = FILTER_OUT_LABELS
Ejemplo n.º 9
0
 def __init__(self, prefix, computedFeatsRootDir=None, statusManager=None):
     '''
   @prefix. An id for a complex. Example: 1A2K
   :param computedFeatsRootDir: str. path where features will be stored. If None, read from Confinguration
   :param statusManager: class that implements .setStatus(msg) to communicate
 '''
     Configuration.__init__(
         self)  # Load configuration parameters such as path to programs
     self.prefix = prefix
     self.statusManager = statusManager
     if computedFeatsRootDir != None:
         self.computedFeatsRootDir = computedFeatsRootDir
     self.computedFeatsRootDir = self.computedFeatsRootDir  #Creates root path where features will be saved
Ejemplo n.º 10
0
    def __init__(self, dataRootPath, verbose=False):
        '''
      :param dataRootPath: str. A path to computedFeatures directory that contains needed features. Example:
                computedFeatures/
                    common/
                      contactMaps/
                    seqStep/
                      conservation/
                      ...
                    structStep/
                      PSAIA/
                      ...    

      :param verbose: bool.
    '''
        Configuration.__init__(self)
        if dataRootPath is None:
            dataRootPath = self.computedFeatsRootDir
        self.dataRootPath = dataRootPath
        self.verbose = verbose
Ejemplo n.º 11
0
    def __init__(self, stepName, savedModelsPath=None):
        '''

      @param stepName: str. Must startswith seq_train or struct or mixed (seq_train, mixed_2, structX, seq_train1... are also valid)
      @param savedModelsPath: str. A path to the directory where models have been saved. If None, 
                                   it will used the path indicated in Config
    '''
        Configuration.__init__(self)

        self.stepName = stepName
        if not savedModelsPath is None:
            self.savedModelsPath = savedModelsPath

        self.model = None
        print(stepName)
        for fname in os.listdir(self.savedModelsPath):
            if fname.endswith(stepName):
                print("Loading model %s" % (fname))
                self.model = joblib_load(
                    os.path.join(self.savedModelsPath, fname))
        assert not self.model is None, "Error, there is no valid model in %s for step %s" % (
            self.savedModelsPath, self.stepName)
Ejemplo n.º 12
0
    def __init__(self,
                 features_path=None,
                 out_Codified_path=None,
                 feedback_paths=None,
                 environType=None,
                 ncpu=1,
                 overridePrevComp=False,
                 verbose=False):
        '''
      :param features_path: str. A path to the computedFeatures directory that contains needed features.
                            If None, Config.py DEFAULT_PARAMETERS["computedFeatsRootDir"] will be used
                            Example:
                            features_path/
                              common/
                                contactMaps/
                              seqStep/
                                conservation/
                                ...
                              structStep/
                                PSAIA/
                                VORONOI/
                                 ...

      :param out_Codified_path: str. Root directory where codified complexes will be saved. Files will be saved at directory
                            out_Codified_path/seq[_n] if sequential environment protocol will be used or at
                            out_Codified_path/struct[_n] if structural environment protocol will be used. (attribute self.environType)
                            If more than one step of the same type, the path will end with "_#", p.e:
                              1) path/to/outpath/seq/
                              2) path/to/outpath/struct_0/
                              3) path/to/outpath/struct_1/
                            If None, Config.py DEFAULT_PARAMETERS["codifiedDataRootDir"] will be used as out_Codified_path                                                

      :param feedback_paths: str or str[]. A path to a previous results files directory. Contact maps for evaluation will
                                          be obtained from this file. If None, contact maps will be loaded from contactMaps
                                          files contained at features_path/common/contactMaps/
                                 
      :param environType: str. "seq" if sequential environment protocol want to be used (sliding window of pssms...)
                               "struct" if VORONOI neighbours environment protocol want to be used (mean, min, max, sum and
                                count for neighbour residues and their properties), "mixed" if one partner will be codified
                                using struct environment and the other partner will be codified using sequence environment
                                In the "mixed" case, both A_B and B_A will be considered

      :param ncpu: int. Number of processes to use in parallel (each process will codify one complex)
      
      :param overridePrevComp: boolean. If True and there are complexes at out_Codified_path, those complexes will be overrided.
                             If False, already computed complexes will be kept and codification will continue
                             with non computed complexes 
          
    '''
        Configuration.__init__(self)
        if not (environType.startswith("seq")
                or environType.startswith("struct")
                or environType.startswith("mixed")):
            raise CodifyComplexException(
                "environType must be 'seq' or 'struct' or mixed")

        self.environType = environType
        if features_path is None:
            features_path = self.computedFeatsRootDir
        self.dataRootPath = os.path.realpath(os.path.expanduser(features_path))
        try:
            self.prefixes = sorted([
                getPrefix(elem) for elem in os.listdir(
                    os.path.join(os.path.expanduser(self.dataRootPath),
                                 "common", "contactMaps"))
            ])
        except (OSError, IOError) as e:
            self.prefixes = None
        if out_Codified_path is None:
            out_Codified_path = self.codifiedDataRootDir
        self.out_Codified_path = myMakeDir(
            os.path.realpath(os.path.expanduser(out_Codified_path)))
        self.out_Codified_path = myMakeDir(self.out_Codified_path, environType)

        self.feedback_paths = feedback_paths  #Either a path or None
        self.overridePrevComp = overridePrevComp
        self.verbose = verbose

        self.ncpu = ncpu
        self.testingDataPath = os.path.join(self.out_Codified_path,
                                            "allInputs")
        self.trainingDataPath = os.path.join(self.out_Codified_path,
                                             "sampledInputs")

        if self.ncpu > multiprocessing.cpu_count() or self.ncpu == -1:
            self.ncpu = multiprocessing.cpu_count()
        elif self.ncpu < 1:
            self.ncpu = 1
Ejemplo n.º 13
0
 def __init__(self, msg):
   Configuration.__init__(self)
   MyException.__init__(self, msg)
Ejemplo n.º 14
0
 def __init__(self, nResidues, partnerId):
   Configuration.__init__(self)
   MyException.__init__(self, "Bad number of residues for partner %s: %d. Number of residues must be %d < nResidues < %d"%(
                                partnerId, nResidues, self.minNumResiduesPartner , self.maxNumResiduesPartner))