def setLearnerVars(self):
     if self.priorsGUI:
         self.priors = str(self.priorsGUI)
     else:   
         self.priors = None
     self.optAlg = int(self.optAlgGUI)
     self.stopCrit = int(self.stopCritGUI) + 1
     if miscUtilities.isNumber(self.stopValueGUI):
         if self.stopCrit == 1: #ITER
             self.maxIter = int(self.stopValueGUI)
         else: #EPS
             self.eps = float(self.stopValueGUI)
     else:
         self.warning(0, "Value for stop criteria must be a number! Using default value.")
         self.maxIter = AZOC.CVANNDEFAULTDICT["maxIter"]
         self.eps = AZOC.CVANNDEFAULTDICT["eps"]
     self.scaleData = bool(self.scaleDataGUI)
     self.scaleClass = bool(self.scaleClassGUI)
     # Transform the nHidden to a list
     try:
         self.nHidden = string.split(str(self.nHiddenGUI),",")
         self.nHidden = [int(elem.strip()) for elem in self.nHidden]
     except:
         self.warning(0, "Bad values for hidden neuronsi! Using default value.")
         self.nHidden = AZOC.CVANNDEFAULTDICT["nHidden"]
     self.setGUIVars()           
Example #2
0
def calcMahalanobis(data, testData):
    """
    Calculates Mahalanobis distances.
    The data should only contain attributes that are relevant for similarity. OBS data is assumed to have a response variable.
    data - X matrix used to calculate the covariance matrix
    testData - the examples in an ExampleTable object for which to calculate the MDs
    Returns a list of Mahalanobis distances between the examples in testData and training data.
    The elements of the list are dictionaries, giving the Mahalanobis distances to the average (_MD), the nearest neighbor and 
    an average of the 3 nearest neighbors (_train_av3nearest). 
    """

    # Impute any missing values
    averageImputer = orange.ImputerConstructor_average(data)
    data = averageImputer(data)
    averageImputer = orange.ImputerConstructor_average(testData)
    testData = averageImputer(testData)

    # Test if there is any non-numeric value within the dataset
    for ex in testData:
        # It is much faster to address the ex elements by their position instead of the correpondent name
        for idx in range(len(ex.domain.attributes)):
            if not miscUtilities.isNumber(ex[idx].value):
                raise Exception(
                    "Cannot calculate Mahalanobis distances. The attribute '"
                    + ex.domain.attributes[idx].name
                    + "' has non-numeric values. Ex: "
                    + str(ex[idx].value)
                )

    # Create a trainingSet object.
    trainingSet = getTrainingSet(data)
    trainingset_descriptor_names = trainingSet.descr_names
    mahalanobisCalculator = Mahalanobis.MahalanobisDistanceCalculator(trainingSet)

    MDlist = []
    for ex in testData:
        # Create a numeric vector from the example and assure the same order as in trainingset_descriptor_names
        descriptor_values = []
        for name in trainingset_descriptor_names:
            try:
                descriptor_values.append(float(ex[name].value))
            except:
                raise Exception("Not possible to calculate Mahalanobis distances. Some attribute is not numeric.")

        # descriptor_values = [1.5] * len(trainingset_descriptor_names)
        MD = mahalanobisCalculator.calculateDistances(descriptor_values, NO_OF_NEIGHBORS)
        MDlist.append(MD)
    return MDlist
    def updateParametersFromTable(self):
        """Updates the parameters of the optimizer with the ones present on GUI table
           Returns True if all OK
           Returns False if Errors occurred
        """
        #self.paramsNames: ["Name","Optimize","Lower Limit","Upper Limit","Distribution","Step","Default"]
        #self.comboDistItems: ["Continuous","Power2","By Step","Specific Values"]
        if self.paramsTable.columnCount() < 7:
            self.setErrors("Wrong number of columns in table!")
            return False
        RangePars,distType = [None,None]
        self.nParameters = len(self.parameters)
        for row in range(self.paramsTable.rowCount()):
            for col in range(7):
                if col == 0:#Name
                    name=str(self.paramsTable.item(row,col).text()).strip()
                    RangePars,distType = self.getRangeParsAndDistType(self.originalParameters,name)  
                    if RangePars==None or distType==None:
                        self.setErrors("It was not possible to identify the range parameters")
                        return False
                elif col == 1:#Optimize
                    if self.paramsTable.cellWidget(row,col).checkState()==2:
                        optimize=True
                    else:
                        optimize = False
                        self.nParameters -= 1
                elif col == 2:#Lower Limit
                    Llimit = str(self.paramsTable.item(row,col).text()).strip()
                elif col == 3:#Upper Limit
                    Ulimit = str(self.paramsTable.item(row,col).text()).strip()
                elif col == 4:#Distribution
                    if "Combo" in str(type(self.paramsTable.cellWidget(row,col))):
                        dist =  str(self.paramsTable.cellWidget(row,col).currentIndex())
                    else:
                        dist = self.comboDistItems.index(self.paramsTable.item(row,col).text())
                elif col == 5:#Step
                    step = str(self.paramsTable.item(row,col).text()).strip()
                elif col == 6:#Default
                    if "Combo" in str(type(self.paramsTable.cellWidget(row,col))):
                        default =  self.paramsTable.cellWidget(row,col).currentIndex()
                        if len(self.parameters[name][3]) <= default:  #the parameter did not have an alias, the text is the value
                            default = str(self.paramsTable.cellWidget(row,col).currentText()).strip()
                        else:
                            default = RangePars[default]
                    else:#is string
                        default = str(self.paramsTable.item(row,col).text()).strip()

#['types.StringType', 'values', "['kernel' , 'pls1' , 'simpls']", ['Kernel', 'PLS1', 'SimPLS'], "'simpls'", True, False]

            if optimize:
                self.parameters[name][5] = True
                if "Combo" in str(type(self.paramsTable.cellWidget(row,4))):
                    comboDistType = self.paramsTable.cellWidget(row,4).currentIndex()
                else:
                    comboDistType = self.comboDistItems.index(self.paramsTable.item(row,4).text())
                if comboDistType==0:     #Continuous
                    if not miscUtilities.isNumber(Llimit):
                        if "N_EX" not in Llimit and "N_ATTR" not in Llimit:
                            QMessageBox.warning(self,"Invalid parameter","Parameter "+name+" has invalid Lower limit",QMessageBox.Ok)               
                            return False
                    if not miscUtilities.isNumber(Ulimit):
                        if "N_EX" not in Ulimit and "N_ATTR" not in Ulimit:
                            QMessageBox.warning(self,"Invalid parameter","Parameter "+name+" has invalid Upper limit",QMessageBox.Ok)
                            return False
                    self.parameters[name][1] = "interval"
                    self.parameters[name][2] = "[" + Llimit + " , " + Ulimit + "]"
                    self.parameters[name][3] = ""
                    self.parameters[name][4] = default
                elif comboDistType==1:   #Power2
                    if not miscUtilities.isNumber(Llimit):
                        if "N_EX" not in Llimit and "N_ATTR" not in Llimit:
                            QMessageBox.warning(self,"Invalid parameter","Parameter "+name+" has invalid Lower limit",QMessageBox.Ok)
                            return False
                    if not miscUtilities.isNumber(Ulimit):
                        if "N_EX" not in Ulimit and "N_ATTR" not in Ulimit:
                            QMessageBox.warning(self,"Invalid parameter","Parameter "+name+" has invalid Upper limit",QMessageBox.Ok)
                            return False
                    if not miscUtilities.isNumber(step):
                        QMessageBox.warning(self,"Invalid parameter","Parameter "+name+" has an invalid step value",QMessageBox.Ok)
                        return False
                    self.parameters[name][1] = "values"
                    self.parameters[name][2] = "miscUtilities.power2Range(" + Llimit + "," + Ulimit + "," + step + ")"
                    self.parameters[name][3] = ""
                    self.parameters[name][4] = default
                elif comboDistType==2:   #By Step
                    if not miscUtilities.isNumber(Llimit):
                        if "N_EX" not in Llimit and "N_ATTR" not in Llimit:
                            QMessageBox.warning(self,"Invalid parameter","Parameter "+name+" has invalid Lower limit",QMessageBox.Ok)
                            return False
                    if not miscUtilities.isNumber(Ulimit):
                        if "N_EX" not in Ulimit and "N_ATTR" not in Ulimit:
                            QMessageBox.warning(self,"Invalid parameter","Parameter "+name+" has invalid Upper limit",QMessageBox.Ok)
                            return False
                    if not miscUtilities.isNumber(step):
                        QMessageBox.warning(self,"Invalid parameter","Parameter "+name+" has an invalid step value",QMessageBox.Ok)
                        return False
                    self.parameters[name][1] = "values"
                    self.parameters[name][2] = "miscUtilities.Range(" + Llimit + "," + Ulimit + "," + step + ")"
                    self.parameters[name][3] = ""
                    self.parameters[name][4] = default
                else:                                                 #Specific Values
                    #The 'Specific Values' refere to the parameters specified in the original AZLearnersParamsConfig.py file
                    self.parameters[name][1] = self.originalParameters[name][1]
                    self.parameters[name][2] = self.originalParameters[name][2]
                    self.parameters[name][3] = self.originalParameters[name][3]   
                    self.parameters[name][4] = default 
            else:
                self.parameters[name][5] = False
                #self.parameters[name][1]='values'
                if "Combo" in str(type(self.paramsTable.cellWidget(row,6))):
                    default =  self.paramsTable.cellWidget(row,6).currentIndex()
                    if len(RangePars) <= default:  #the parameter did not have an alias, the text is the value
                        #self.parameters[name][2]='[' +  str(self.paramsTable.cellWidget(row,6).currentText()).strip() + ']'
                        self.parameters[name][4]=str(self.paramsTable.cellWidget(row,6).currentText()).strip()
                    else:
                        #self.parameters[name][2]='[' + RangePars[default]  + ']'
                        self.parameters[name][4]= RangePars[default]
                else:#is string
                    #self.parameters[name][2]='[' +  str(self.paramsTable.item(row,6).text()).strip() + ']'
                    self.parameters[name][4]=str(self.paramsTable.item(row,6).text()).strip()
        return True
Example #4
0
def getNearestNeighbors(query,
                        n,
                        NNDataPath,
                        FPPath=None,
                        resPath=None,
                        idx=0):
    """ get the n nearest neighbors
        query: bin string with query fingerprint
        returns an ordered list with the n top neighbors (each one in a dict):
            [ {
                "id"          : ID, 
                "expVal"      : ExpValues, 
                "similarity"  : TanimotoSimilarity, 
                "smi"         : smiles, 
                "imgPath"     : imgPath,
                "MeanInhib"   : Mean Inhib. },  ... ]        

        It will saves the images in resPath:
             NN_1.png    #1 neighbor
             NN_2.png    #2 neighbor
             ...
             NN_n.png    #n neighbor
    """
    if not query or not n or not NNDataPath or not FPPath:
        return []
    #if resPath and not os.path.isdir(resPath):
    #    os.makedirs(resPath)

    # get the correct header
    file = open(NNDataPath, "r")
    header = file.readline().strip().split('\t')
    file.close()

    if "Molecule SMILES" not in header or "Compound Name" not in header:
        print "NN dataset ", NNDataPath, " have not the correct header. It must contain 'Molecule SMILES' and 'Compound Name' attributes."
        return []
    # Index will have to be sum 1 because the TS will be prepended
    idxID = header.index("Compound Name") + 1
    idxExpVal = len(header)
    idxSMILES = header.index("Molecule SMILES") + 1
    idxSimilarity = 0

    Nbits = 2048
    cmdStr = 'echo "' + query + '" | fpin ' + FPPath + " " + NNDataPath + ' 0.0 ' + str(
        n)
    status, output = commands.getstatusoutput(cmdStr)
    if status:
        print status
        print output
        raise Exception(str(output))
    #             TS              SMILES                    AZID         DATE       expRes
    # output = "0.7117   CCCC(C)C1(C(=O)NC(=O)NC1=O)CC   AZ10046012   2009-12-02   3.480007"
    TS = []
    for ts in output.split("\n"):
        TS.append(ts.strip().split('\t'))
    # in TS:
    #    TS[n][0] - tanimoto similarity
    #    TS[n][1] - SMILES
    #    TS[n][2] - AZID
    #    TS[n][-1]- expRes
    res = []
    timeStamp = str(time.time()).replace(".", '')
    for fidx, nn in enumerate(TS):
        ID = nn[idxID]
        if miscUtilities.isNumber(nn[idxExpVal]):
            expVal = str(round(float(nn[idxExpVal]), 2))
        else:
            expVal = nn[idxExpVal]
        SMILES = nn[idxSMILES]
        if resPath and os.path.isdir(resPath):
            imgPath = os.path.join(
                resPath, "NN" + str(idx) + "_" + str(fidx + 1) + "_" +
                timeStamp + ".png")
            mol = Chem.MolFromSmiles(SMILES)
            # save the respective imgPath...
            Draw.MolToImageFile(mol,
                                imgPath,
                                size=(300, 300),
                                kekulize=True,
                                wedgeBonds=True)
        else:
            imgPath = ""
        res.append({
            "id": ID,
            "expVal": expVal,
            "similarity": nn[idxSimilarity],
            "smi": SMILES,
            "imgPath": imgPath,
            "MeanInhib": ''
        })
    return res
Example #5
0
def calcMahalanobis(data, testData, invCovMatFile = None, centerFile = None, dataTableFile = None, domain = None, nNN = NO_OF_NEIGHBORS):
    """
    Calculates Mahalanobis distances.
    The data should only contain attributes that are relevant for similarity. OBS data is assumed to have a response variable.
    data - X matrix used to calculate the covariance matrix
    testData - the examples in an ExampleTable object for which to calculate the MDs
    Returns a list of Mahalanobis distances between the examples in testData and training data.
    The elements of the list are dictionaries, giving the Mahalanobis distances to the average (_MD), the nearest neighbor and 
    an average of the 3 nearest neighbors (_train_av3nearest). 
    """

    # Impute any missing valuesi
    if data:
        averageImputer = orange.ImputerConstructor_average(data)
        data = averageImputer(data)
    #If Class is continuous and all class values are unknown (and they usually are in ex to predict), the imputer cannot be created.
    # Since we are only using attributes, not the class, we will assign 0 to the class values in order to impute the testData
    if testData.domain.classVar and testData.domain.classVar.varType == orange.VarTypes.Continuous:
        for ex in testData:
            if ex.getclass().isSpecial():
                ex.setclass(0)
    # This can also happens when calculating a single example with missing attributes
    try:
        averageImputer = orange.ImputerConstructor_average(testData)
    except:
        for ex in testData:
            for attr in [a for a in testData.domain.attributes if a.varType == orange.VarTypes.Continuous]:
                if ex[attr].isSpecial():
                    ex[attr] = 0
        averageImputer = orange.ImputerConstructor_average(testData)

    testData = averageImputer(testData)


    #Test if there is any non-numeric value within the dataset
    for ex in testData:
        #It is much faster to address the ex elements by their position instead of the correpondent name
        for idx in range(len(ex.domain.attributes)):
            if not miscUtilities.isNumber(ex[idx].value):
                raise Exception("Cannot calculate Mahalanobis distances. The attribute '" + \
                      ex.domain.attributes[idx].name + "' has non-numeric values. Ex: " + \
                      str(ex[idx].value))
    if data:
        trainingSet = getTrainingSet(data)
        trainingset_descriptor_names = trainingSet.descr_names
    else:
        trainingSet = None
        trainingset_descriptor_names = [attr.name for attr in domain.attributes] 
    mahalanobisCalculator = Mahalanobis.MahalanobisDistanceCalculator(trainingSet,invCovMatFile,centerFile,dataTableFile)
    MDlist = []
    for ex in testData:
        # Create a numeric vector from the example and assure the same order as in trainingset_descriptor_names
        descriptor_values = []
        for name in trainingset_descriptor_names:
            try:
                descriptor_values.append(float(ex[name].value))
            except:
                raise Exception("Not possible to calculate Mahalanobis distances. Some attribute is not numeric.")
                
        #descriptor_values = [1.5] * len(trainingset_descriptor_names)
        MD = mahalanobisCalculator.calculateDistances(descriptor_values, nNN)
        MDlist.append(MD)
    return MDlist
    def __init__(self, **kwds):
        self.verbose = 0
        self.logFile = None
        self.resultsFile = None
        self.nExtFolds = 5
        self.nInnerFolds = 5
        self.data = None
        self.learner = None
        self.paramList = None
        self.queueType = "NoSGE"
        self.responseType = None
        self.fixedParams = {} 
        self.testAttrFilter = None
        self.testFilterVal = None
        self.sampler = dataUtilities.SeedDataSampler
        # Append arguments to the __dict__ member variable 
        self.__dict__.update(kwds)
        self.learnerName = ""

        self.preDefIndices = orange.LongList()
        self.usePreDefFolds = False 
        self.useVarCtrlCV = False
        if self.testAttrFilter and self.testAttrFilter in self.data.domain:
            if self.testFilterVal and type(self.testFilterVal) == list and type(self.testAttrFilter) == str:
                self.useVarCtrlCV = True
                self.usePreDefFolds = False
                for ex in self.data:
                    if ex[self.testAttrFilter].value in self.testFilterVal: # Compound selected to be allowed in the test set
                        self.preDefIndices.append(1)
                    else:                                                 # Compound to not include in the test set. Always to be shifted to the train
                        self.preDefIndices.append(0)
            elif self.testFilterVal is None:
                    self.usePreDefFolds = True
                    self.useVarCtrlCV = False
                    #Enable pre-selected-indices  ( index 0 will be set for train Bias)
                    foldsCounter = {}
                    for ex in self.data:
                        value = str(ex[self.testAttrFilter].value)
                        if not miscUtilities.isNumber(value):
                            self.__log("Invalid fold value:"+str(value)+". It must be str convertable to an int.")
                            return False
                        value = int(float(value))
                        if value not in foldsCounter:
                            foldsCounter[value] = 1
                        else:
                            foldsCounter[value] += 1
                        self.preDefIndices.append(value)

                    self.__log( "INFO: Pre-selected "+str(len([f for f in foldsCounter.keys() if f != 0]))+" folds for CV:")
                    self.__log( "      Examples in data: "+str(sum(foldsCounter.values())))
                    self.__log( "      Examples selected for validation: "+str(sum([foldsCounter[f] for f in foldsCounter if f != 0])))
                    self.__log( "      Examples to be appended to the train set: "+ str(0 in foldsCounter.keys() and foldsCounter[0] or 0))
            else:
                self.__log("ERROR: Attribute Filter Ctrl was selected, but attribute is not in expected format: " + str(self.testAttrFilter))
                return False
            self.data = dataUtilities.attributeDeselectionData(self.data, [self.testAttrFilter]) 
        else:
            self.usePreDefFolds = False
            self.useVarCtrlCV = False
            self.testAttrFilter = None
            self.testFilterVal = None
Example #7
0
def calcMahalanobis(data,
                    testData,
                    invCovMatFile=None,
                    centerFile=None,
                    dataTableFile=None,
                    domain=None,
                    nNN=NO_OF_NEIGHBORS):
    """
    Calculates Mahalanobis distances.
    The data should only contain attributes that are relevant for similarity. OBS data is assumed to have a response variable.
    data - X matrix used to calculate the covariance matrix
    testData - the examples in an ExampleTable object for which to calculate the MDs
    Returns a list of Mahalanobis distances between the examples in testData and training data.
    The elements of the list are dictionaries, giving the Mahalanobis distances to the average (_MD), the nearest neighbor and 
    an average of the 3 nearest neighbors (_train_av3nearest). 
    """

    # Impute any missing valuesi
    if data:
        averageImputer = orange.ImputerConstructor_average(data)
        data = averageImputer(data)
    #If Class is continuous and all class values are unknown (and they usually are in ex to predict), the imputer cannot be created.
    # Since we are only using attributes, not the class, we will assign 0 to the class values in order to impute the testData
    if testData.domain.classVar and testData.domain.classVar.varType == orange.VarTypes.Continuous:
        for ex in testData:
            if ex.getclass().isSpecial():
                ex.setclass(0)
    # This can also happens when calculating a single example with missing attributes
    try:
        averageImputer = orange.ImputerConstructor_average(testData)
    except:
        for ex in testData:
            for attr in [
                    a for a in testData.domain.attributes
                    if a.varType == orange.VarTypes.Continuous
            ]:
                if ex[attr].isSpecial():
                    ex[attr] = 0
        averageImputer = orange.ImputerConstructor_average(testData)

    testData = averageImputer(testData)

    #Test if there is any non-numeric value within the dataset
    for ex in testData:
        #It is much faster to address the ex elements by their position instead of the correpondent name
        for idx in range(len(ex.domain.attributes)):
            if not miscUtilities.isNumber(ex[idx].value):
                raise Exception("Cannot calculate Mahalanobis distances. The attribute '" + \
                      ex.domain.attributes[idx].name + "' has non-numeric values. Ex: " + \
                      str(ex[idx].value))
    if data:
        trainingSet = getTrainingSet(data)
        trainingset_descriptor_names = trainingSet.descr_names
    else:
        trainingSet = None
        trainingset_descriptor_names = [
            attr.name for attr in domain.attributes
        ]
    mahalanobisCalculator = Mahalanobis.MahalanobisDistanceCalculator(
        trainingSet, invCovMatFile, centerFile, dataTableFile)
    MDlist = []
    for ex in testData:
        # Create a numeric vector from the example and assure the same order as in trainingset_descriptor_names
        descriptor_values = []
        for name in trainingset_descriptor_names:
            try:
                descriptor_values.append(float(ex[name].value))
            except:
                raise Exception(
                    "Not possible to calculate Mahalanobis distances. Some attribute is not numeric."
                )

        #descriptor_values = [1.5] * len(trainingset_descriptor_names)
        MD = mahalanobisCalculator.calculateDistances(descriptor_values, nNN)
        MDlist.append(MD)
    return MDlist
Example #8
0
    def updateParametersFromTable(self):
        """Updates the parameters of the optimizer with the ones present on GUI table
           Returns True if all OK
           Returns False if Errors occurred
        """
        #self.paramsNames: ["Name","Optimize","Lower Limit","Upper Limit","Distribution","Step","Default"]
        #self.comboDistItems: ["Continuous","Power2","By Step","Specific Values"]
        if self.paramsTable.columnCount() < 7:
            self.setErrors("Wrong number of columns in table!")
            return False
        RangePars, distType = [None, None]
        self.nParameters = len(self.parameters)
        for row in range(self.paramsTable.rowCount()):
            for col in range(7):
                if col == 0:  #Name
                    name = str(self.paramsTable.item(row, col).text()).strip()
                    RangePars, distType = self.getRangeParsAndDistType(
                        self.originalParameters, name)
                    if RangePars == None or distType == None:
                        self.setErrors(
                            "It was not possible to identify the range parameters"
                        )
                        return False
                elif col == 1:  #Optimize
                    if self.paramsTable.cellWidget(row, col).checkState() == 2:
                        optimize = True
                    else:
                        optimize = False
                        self.nParameters -= 1
                elif col == 2:  #Lower Limit
                    Llimit = str(self.paramsTable.item(row,
                                                       col).text()).strip()
                elif col == 3:  #Upper Limit
                    Ulimit = str(self.paramsTable.item(row,
                                                       col).text()).strip()
                elif col == 4:  #Distribution
                    if "Combo" in str(
                            type(self.paramsTable.cellWidget(row, col))):
                        dist = str(
                            self.paramsTable.cellWidget(row,
                                                        col).currentIndex())
                    else:
                        dist = self.comboDistItems.index(
                            self.paramsTable.item(row, col).text())
                elif col == 5:  #Step
                    step = str(self.paramsTable.item(row, col).text()).strip()
                elif col == 6:  #Default
                    if "Combo" in str(
                            type(self.paramsTable.cellWidget(row, col))):
                        default = self.paramsTable.cellWidget(
                            row, col).currentIndex()
                        if len(
                                self.parameters[name][3]
                        ) <= default:  #the parameter did not have an alias, the text is the value
                            default = str(
                                self.paramsTable.cellWidget(
                                    row, col).currentText()).strip()
                        else:
                            default = RangePars[default]
                    else:  #is string
                        default = str(self.paramsTable.item(
                            row, col).text()).strip()


#['types.StringType', 'values', "['kernel' , 'pls1' , 'simpls']", ['Kernel', 'PLS1', 'SimPLS'], "'simpls'", True, False]

            if optimize:
                self.parameters[name][5] = True
                if "Combo" in str(type(self.paramsTable.cellWidget(row, 4))):
                    comboDistType = self.paramsTable.cellWidget(
                        row, 4).currentIndex()
                else:
                    comboDistType = self.comboDistItems.index(
                        self.paramsTable.item(row, 4).text())
                if comboDistType == 0:  #Continuous
                    if not miscUtilities.isNumber(Llimit):
                        if "N_EX" not in Llimit and "N_ATTR" not in Llimit:
                            QMessageBox.warning(
                                self, "Invalid parameter", "Parameter " +
                                name + " has invalid Lower limit",
                                QMessageBox.Ok)
                            return False
                    if not miscUtilities.isNumber(Ulimit):
                        if "N_EX" not in Ulimit and "N_ATTR" not in Ulimit:
                            QMessageBox.warning(
                                self, "Invalid parameter", "Parameter " +
                                name + " has invalid Upper limit",
                                QMessageBox.Ok)
                            return False
                    self.parameters[name][1] = "interval"
                    self.parameters[name][
                        2] = "[" + Llimit + " , " + Ulimit + "]"
                    self.parameters[name][3] = ""
                    self.parameters[name][4] = default
                elif comboDistType == 1:  #Power2
                    if not miscUtilities.isNumber(Llimit):
                        if "N_EX" not in Llimit and "N_ATTR" not in Llimit:
                            QMessageBox.warning(
                                self, "Invalid parameter", "Parameter " +
                                name + " has invalid Lower limit",
                                QMessageBox.Ok)
                            return False
                    if not miscUtilities.isNumber(Ulimit):
                        if "N_EX" not in Ulimit and "N_ATTR" not in Ulimit:
                            QMessageBox.warning(
                                self, "Invalid parameter", "Parameter " +
                                name + " has invalid Upper limit",
                                QMessageBox.Ok)
                            return False
                    if not miscUtilities.isNumber(step):
                        QMessageBox.warning(
                            self, "Invalid parameter",
                            "Parameter " + name + " has an invalid step value",
                            QMessageBox.Ok)
                        return False
                    self.parameters[name][1] = "values"
                    self.parameters[name][
                        2] = "miscUtilities.power2Range(" + Llimit + "," + Ulimit + "," + step + ")"
                    self.parameters[name][3] = ""
                    self.parameters[name][4] = default
                elif comboDistType == 2:  #By Step
                    if not miscUtilities.isNumber(Llimit):
                        if "N_EX" not in Llimit and "N_ATTR" not in Llimit:
                            QMessageBox.warning(
                                self, "Invalid parameter", "Parameter " +
                                name + " has invalid Lower limit",
                                QMessageBox.Ok)
                            return False
                    if not miscUtilities.isNumber(Ulimit):
                        if "N_EX" not in Ulimit and "N_ATTR" not in Ulimit:
                            QMessageBox.warning(
                                self, "Invalid parameter", "Parameter " +
                                name + " has invalid Upper limit",
                                QMessageBox.Ok)
                            return False
                    if not miscUtilities.isNumber(step):
                        QMessageBox.warning(
                            self, "Invalid parameter",
                            "Parameter " + name + " has an invalid step value",
                            QMessageBox.Ok)
                        return False
                    self.parameters[name][1] = "values"
                    self.parameters[name][
                        2] = "miscUtilities.Range(" + Llimit + "," + Ulimit + "," + step + ")"
                    self.parameters[name][3] = ""
                    self.parameters[name][4] = default
                else:  #Specific Values
                    #The 'Specific Values' refere to the parameters specified in the original AZLearnersParamsConfig.py file
                    self.parameters[name][1] = self.originalParameters[name][1]
                    self.parameters[name][2] = self.originalParameters[name][2]
                    self.parameters[name][3] = self.originalParameters[name][3]
                    self.parameters[name][4] = default
            else:
                self.parameters[name][5] = False
                #self.parameters[name][1]='values'
                if "Combo" in str(type(self.paramsTable.cellWidget(row, 6))):
                    default = self.paramsTable.cellWidget(row,
                                                          6).currentIndex()
                    if len(
                            RangePars
                    ) <= default:  #the parameter did not have an alias, the text is the value
                        #self.parameters[name][2]='[' +  str(self.paramsTable.cellWidget(row,6).currentText()).strip() + ']'
                        self.parameters[name][4] = str(
                            self.paramsTable.cellWidget(
                                row, 6).currentText()).strip()
                    else:
                        #self.parameters[name][2]='[' + RangePars[default]  + ']'
                        self.parameters[name][4] = RangePars[default]
                else:  #is string
                    #self.parameters[name][2]='[' +  str(self.paramsTable.item(row,6).text()).strip() + ']'
                    self.parameters[name][4] = str(
                        self.paramsTable.item(row, 6).text()).strip()
        return True
Example #9
0
    def getExamplesAndSetTrainBias(self, data, testAttrFilter, testFilterVal):
        """
        Collects and returns the examples that match the filterValue at the Attr defined
        The remaining examples (that do not match the filterValue at the Attr defined) are
        placed in the trainBias to be added in all train events.
        """
        self.trainBias = None
        if testAttrFilter is not None and  testFilterVal is not None and testAttrFilter in data.domain:
            if type(testFilterVal) != list:
                raise Exception("Invalid Attr filter value. It must be a list of strings")
            else:
                allDataEx = len(data)
                examples = orange.ExampleTable(data.domain)
                self.trainBias = orange.ExampleTable(data.domain)
                for ex in data:
                    inExamples = False
                    for Vfilter in testFilterVal:
                        if ex[testAttrFilter].value == Vfilter:
                            examples.append(ex)
                            inExamples = True
                            break
                    if not inExamples:
                        self.trainBias.append(ex)

                print "INFO: Variable control validation:"
                print "      Examples in data: "+str(allDataEx)
                print "      Examples selected for validation: "+str(len(examples))
                print "      Examples to be appended to the train set: "+str(len(self.trainBias))
                examples = dataUtilities.attributeDeselectionData(examples, [testAttrFilter])
        elif testAttrFilter is not None and testFilterVal is None and testAttrFilter in data.domain:
            #Enable pre-selected-indices
            self.fixedIdx = orange.LongList()
            allDataEx = len(data)
            examples = orange.ExampleTable(data.domain)
            self.trainBias = orange.ExampleTable(data.domain)
            foldsCounter = {}
            for ex in data:
                value = str(ex[testAttrFilter].value)
                if not miscUtilities.isNumber(value):
                   raise Exception("Invalid fold value:"+str(value)+". It must be str convertable to an int.")
                value = int(float(value))
                if value not in foldsCounter:
                    foldsCounter[value] = 1
                else:
                    foldsCounter[value] += 1
                if not miscUtilities.isNumber:
                    raise Exception("Invalid fold value:"+str(value)+". It must be str convertable to an int.")
                if value != 0:
                    examples.append(ex)
                    self.fixedIdx.append(value - 1)
                else:
                    self.trainBias.append(ex)

            print "INFO: Pre-selected "+str(len([f for f in foldsCounter if f != 0]))+" folds for CV:"
            print "      Examples in data: "+str(allDataEx)
            print "      Examples selected for validation: "+str(len(examples))
            print "      Examples to be appended to the train set: "+str(len(self.trainBias))
            examples = dataUtilities.attributeDeselectionData(examples, [testAttrFilter])

        else:
            examples = data

        return examples
Example #10
0
def getNearestNeighbors(query, n, NNDataPath, FPPath = None, resPath = None, idx = 0):
    """ get the n nearest neighbors
        query: bin string with query fingerprint
        returns an ordered list with the n top neighbors (each one in a dict):
            [ {
                "id"          : ID, 
                "expVal"      : ExpValues, 
                "similarity"  : TanimotoSimilarity, 
                "smi"         : smiles, 
                "imgPath"     : imgPath,
                "MeanInhib"   : Mean Inhib. },  ... ]        

        It will saves the images in resPath:
             NN_1.png    #1 neighbor
             NN_2.png    #2 neighbor
             ...
             NN_n.png    #n neighbor
    """
    if not query or not n or not  NNDataPath or not  FPPath:
        return []
    #if resPath and not os.path.isdir(resPath):
    #    os.makedirs(resPath)

    # get the correct header
    file = open(NNDataPath,"r")
    header = file.readline().strip().split('\t')
    file.close()

    if "Molecule SMILES" not in header or "Compound Name" not in header:
        print "NN dataset ",NNDataPath, " have not the correct header. It must contain 'Molecule SMILES' and 'Compound Name' attributes."
        return [] 
    # Index will have to be sum 1 because the TS will be prepended
    idxID = header.index("Compound Name") + 1
    idxExpVal = len(header) 
    idxSMILES = header.index("Molecule SMILES") + 1
    idxSimilarity = 0


    Nbits = 2048
    cmdStr = 'echo "' + query + '" | fpin ' + FPPath + " "  +NNDataPath + ' 0.0 '+str(n)
    status,output = commands.getstatusoutput(cmdStr)
    if status:
        print status
        print output
        raise Exception(str(output))
    #             TS              SMILES                    AZID         DATE       expRes
    # output = "0.7117   CCCC(C)C1(C(=O)NC(=O)NC1=O)CC   AZ10046012   2009-12-02   3.480007"
    TS=[]
    for ts in output.split("\n"):
        TS.append(ts.strip().split('\t'))
    # in TS:
    #    TS[n][0] - tanimoto similarity
    #    TS[n][1] - SMILES
    #    TS[n][2] - AZID
    #    TS[n][-1]- expRes
    res = []
    timeStamp=str(time.time()).replace(".",'')
    for fidx,nn in enumerate(TS):
        ID= nn[idxID]
        if miscUtilities.isNumber(nn[idxExpVal]):
            expVal = str(round(float(nn[idxExpVal]),2))
        else:
            expVal = nn[idxExpVal]
        SMILES = nn[idxSMILES]
        if resPath and os.path.isdir(resPath):
            imgPath = os.path.join(resPath,"NN"+str(idx)+"_"+str(fidx+1)+"_"+timeStamp+".png")
            mol = Chem.MolFromSmiles(SMILES)
            # save the respective imgPath...  
            Draw.MolToImageFile(mol,imgPath,size=(300, 300), kekulize=True, wedgeBonds=True)
        else:
            imgPath = ""
        res.append( {
                "id": ID, 
                "expVal": expVal, 
                "similarity": nn[idxSimilarity], 
                "smi": SMILES, 
                "imgPath": imgPath,
                "MeanInhib": ''} )
    return res
Example #11
0
    def __init__(self, **kwds):
        self.verbose = 0
        self.logFile = None
        self.resultsFile = None
        self.nExtFolds = 5
        self.nInnerFolds = 5
        self.data = None
        self.learner = None
        self.paramList = None
        self.queueType = "NoSGE"
        self.responseType = None
        self.fixedParams = {}
        self.testAttrFilter = None
        self.testFilterVal = None
        self.sampler = dataUtilities.SeedDataSampler
        # Append arguments to the __dict__ member variable
        self.__dict__.update(kwds)
        self.learnerName = ""

        self.preDefIndices = orange.LongList()
        self.usePreDefFolds = False
        self.useVarCtrlCV = False
        if self.testAttrFilter and self.testAttrFilter in self.data.domain:
            if self.testFilterVal and type(
                    self.testFilterVal) == list and type(
                        self.testAttrFilter) == str:
                self.useVarCtrlCV = True
                self.usePreDefFolds = False
                for ex in self.data:
                    if ex[self.
                          testAttrFilter].value in self.testFilterVal:  # Compound selected to be allowed in the test set
                        self.preDefIndices.append(1)
                    else:  # Compound to not include in the test set. Always to be shifted to the train
                        self.preDefIndices.append(0)
            elif self.testFilterVal is None:
                self.usePreDefFolds = True
                self.useVarCtrlCV = False
                #Enable pre-selected-indices  ( index 0 will be set for train Bias)
                foldsCounter = {}
                for ex in self.data:
                    value = str(ex[self.testAttrFilter].value)
                    if not miscUtilities.isNumber(value):
                        self.__log("Invalid fold value:" + str(value) +
                                   ". It must be str convertable to an int.")
                        return False
                    value = int(float(value))
                    if value not in foldsCounter:
                        foldsCounter[value] = 1
                    else:
                        foldsCounter[value] += 1
                    self.preDefIndices.append(value)

                self.__log("INFO: Pre-selected " +
                           str(len([f
                                    for f in foldsCounter.keys() if f != 0])) +
                           " folds for CV:")
                self.__log("      Examples in data: " +
                           str(sum(foldsCounter.values())))
                self.__log(
                    "      Examples selected for validation: " +
                    str(sum([foldsCounter[f]
                             for f in foldsCounter if f != 0])))
                self.__log(
                    "      Examples to be appended to the train set: " +
                    str(0 in foldsCounter.keys() and foldsCounter[0] or 0))
            else:
                self.__log(
                    "ERROR: Attribute Filter Ctrl was selected, but attribute is not in expected format: "
                    + str(self.testAttrFilter))
                return False
            self.data = dataUtilities.attributeDeselectionData(
                self.data, [self.testAttrFilter])
        else:
            self.usePreDefFolds = False
            self.useVarCtrlCV = False
            self.testAttrFilter = None
            self.testFilterVal = None