예제 #1
0
def computeCorrelationInsideClassesBetweenAttributes(data,
                                                     attrList,
                                                     minCorrelation=0.0,
                                                     progressCallback=None):
    if not data.domain.classVar or data.domain.classVar.varType == orange.VarTypes.Continuous:
        return []
    correlations = []
    attrListLen = len(attrList)
    iterCount = attrListLen * (attrListLen - 1) / 2
    iter = 0
    milestones = progressBarMilestones(iterCount)
    for i in range(len(attrList)):
        if data.domain.attributes[i].varType != orange.VarTypes.Continuous:
            continue
        for j in range(i + 1, len(attrList)):
            if data.domain.attributes[j].varType != orange.VarTypes.Continuous:
                continue
            corr, corrs, lengths = computeCorrelationInsideClasses(
                data, attrList[i], attrList[j])
            if corr >= minCorrelation:
                correlations.append((corr, attrList[i], attrList[j]))
            iter += 1
            if progressCallback and iter in milestones:
                progressCallback(100.0 * iter / iterCount)

    return sorted(correlations, reverse=True)
예제 #2
0
 def computeMatrix(self):
     if not self.data:
         return
     data = self.data
     dist = self.metrics[self.Metrics][1](data)
     self.error(0)
     try:
         self.matrix = orange.SymMatrix(len(data))
     except orange.KernelException as ex:
         self.error(0, "Could not create distance matrix! %s" % str(ex))
         self.matrix = None
         self.send("Distance Matrix", None)
         return
     self.matrix.setattr('items', data)
     pb = OWGUI.ProgressBar(self, 100)
     milestones = orngMisc.progressBarMilestones(
         len(data) * (len(data) - 1) / 2, 100)
     count = 0
     for i in range(len(data)):
         for j in range(i + 1):
             self.matrix[i, j] = dist(data[i], data[j])
             if count in milestones:
                 pb.advance()
             count += 1
     pb.finish()
     self.send("Distance Matrix", self.matrix)
예제 #3
0
def computeCorrelationBetweenAttributes(data, attrList, minCorrelation = 0.0, progressCallback=None):
    correlations = []
    attrListLen = len(attrList)
    iterCount = attrListLen * (attrListLen - 1) / 2
    iter = 0
    milestones = progressBarMilestones(iterCount)
    for i in range(len(attrList)):
        if data.domain.attributes[i].varType != orange.VarTypes.Continuous:
            continue
        for j in range(i+1, len(attrList)):
            if data.domain.attributes[j].varType != orange.VarTypes.Continuous:
                continue
            val = abs(computeCorrelation(data, attrList[i], attrList[j]))
            if val >= minCorrelation:
                correlations.append((val, attrList[i], attrList[j]))
            iter += 1
            if progressCallback and iter in milestones:
                progressCallback(100.0 * iter / iterCount)
                
    return sorted(correlations, reverse=True)
예제 #4
0
def computeCorrelationInsideClassesBetweenAttributes(data, attrList, minCorrelation = 0.0, progressCallback=None):
    if not data.domain.classVar or data.domain.classVar.varType == orange.VarTypes.Continuous:
        return []
    correlations = []
    attrListLen = len(attrList)
    iterCount = attrListLen * (attrListLen - 1) / 2
    iter = 0
    milestones = progressBarMilestones(iterCount)
    for i in range(len(attrList)):
        if data.domain.attributes[i].varType != orange.VarTypes.Continuous:
            continue
        for j in range(i+1, len(attrList)):
            if data.domain.attributes[j].varType != orange.VarTypes.Continuous:
                continue
            corr, corrs, lengths = computeCorrelationInsideClasses(data, attrList[i], attrList[j])
            if corr >= minCorrelation:
                correlations.append((corr, attrList[i], attrList[j]))
            iter += 1
            if progressCallback and iter in milestones:
                progressCallback(100.0 * iter / iterCount)
                
    return sorted(correlations, reverse=True)
예제 #5
0
def computeCorrelationBetweenAttributes(data,
                                        attrList,
                                        minCorrelation=0.0,
                                        progressCallback=None):
    correlations = []
    attrListLen = len(attrList)
    iterCount = attrListLen * (attrListLen - 1) / 2
    iter = 0
    milestones = progressBarMilestones(iterCount)
    for i in range(len(attrList)):
        if data.domain.attributes[i].varType != orange.VarTypes.Continuous:
            continue
        for j in range(i + 1, len(attrList)):
            if data.domain.attributes[j].varType != orange.VarTypes.Continuous:
                continue
            val = abs(computeCorrelation(data, attrList[i], attrList[j]))
            if val >= minCorrelation:
                correlations.append((val, attrList[i], attrList[j]))
            iter += 1
            if progressCallback and iter in milestones:
                progressCallback(100.0 * iter / iterCount)

    return sorted(correlations, reverse=True)
예제 #6
0
class OWExampleDistance(OWWidget):
    settingsList = ["Metrics", "Normalize"]
    contextHandlers = {"": DomainContextHandler("", ["Label"])}

    def __init__(self, parent=None, signalManager = None):
        OWWidget.__init__(self, parent, signalManager, 'ExampleDistance', wantMainArea = 0, resizingEnabled = 0)

        self.inputs = [("Data", ExampleTable, self.dataset)]
        self.outputs = [("Distances", orange.SymMatrix)]

        self.Metrics = 0
        self.Normalize = True
        self.Label = ""
        self.loadSettings()
        self.data = None
        self.matrix = None

        self.metrics = [
            ("Euclidean", orange.ExamplesDistanceConstructor_Euclidean),
            ("Pearson Correlation", orngClustering.ExamplesDistanceConstructor_PearsonR),
            ("Spearman Rank Correlation", orngClustering.ExamplesDistanceConstructor_SpearmanR),
            ("Manhattan", orange.ExamplesDistanceConstructor_Manhattan),
            ("Hamming", orange.ExamplesDistanceConstructor_Hamming),
            ("Relief", orange.ExamplesDistanceConstructor_Relief),
            ]

        cb = OWGUI.comboBox(self.controlArea, self, "Metrics", box="Distance Metrics",
            items=[x[0] for x in self.metrics],
            tooltip="Choose metrics to measure pairwise distance between examples.",
            callback=self.distMetricChanged, valueType=str)
        cb.setMinimumWidth(170)
        
        OWGUI.separator(self.controlArea)
        
        box = OWGUI.widgetBox(self.controlArea, "Normalization", 
                              addSpace=True)
        self.normalizeCB = OWGUI.checkBox(box, self, "Normalize", "Normalize data", 
                                          callback=self.computeMatrix)
        
        self.normalizeCB.setEnabled(self.Metrics in [0, 3])
        
        self.labelCombo = OWGUI.comboBox(self.controlArea, self, "Label", box="Example Label",
            items=[],
            tooltip="Attribute used for example labels",
            callback=self.setLabel, sendSelectedValue = 1)

        self.labelCombo.setDisabled(1)
        
        OWGUI.rubber(self.controlArea)

    def sendReport(self):
        self.reportSettings("Settings",
                            [("Metrics", self.metrics[self.Metrics][0]),
                             ("Label", self.Label)])
        self.reportData(self.data)

    def distMetricChanged(self):
        self.normalizeCB.setEnabled(self.Metrics in [0, 3])
        self.computeMatrix()

    def computeMatrix(self):
        if not self.data:
            return
        data = self.data
        constructor = self.metrics[self.Metrics][1]()
        constructor.normalize = self.Normalize
        dist = constructor(data)
        self.error(0)
        try:
            self.matrix = orange.SymMatrix(len(data))
        except orange.KernelException, ex:
            self.error(0, "Could not create distance matrix! %s" % str(ex))
            self.matrix = None
            self.send("Distances", None)
            return
        self.matrix.setattr('items', data)
        pb = OWGUI.ProgressBar(self, 100)
        milestones  = orngMisc.progressBarMilestones(len(data)*(len(data)-1)/2, 100)
        count = 0
        for i in range(len(data)):
            for j in range(i+1):
                self.matrix[i, j] = dist(data[i], data[j])
                if count in milestones:
                    pb.advance()
                count += 1
        pb.finish()
        self.send("Distances", self.matrix)
예제 #7
0
def readMatrix(fn, progress=None):
    msg = None
    matrix = labels = data = None
    
    if type(fn) != file and (os.path.splitext(fn)[1] == '.pkl' or os.path.splitext(fn)[1] == '.sym'):
        pkl_file = open(fn, 'rb')
        matrix = pickle.load(pkl_file)
        data = None
        if hasattr(matrix, 'items'):
            items = matrix.items
            if isinstance(items, orange.ExampleTable):
                data = items
            elif isinstance(items, list) or hasattr(item, "__iter__"):
                labels = items
        pkl_file.close()
    elif type(fn) != file and os.path.splitext(fn)[1] == '.npy':
        import numpy
        nmatrix = numpy.load(fn)
        matrix = orange.SymMatrix(len(nmatrix))
        milestones = orngMisc.progressBarMilestones(matrix.dim, 100)
        for i in range(len(nmatrix)):
            for j in range(i+1):
                matrix[j,i] = nmatrix[i,j]
                
            if progress and i in milestones:
                progress.advance()
        #labels = [""] * len(nmatrix)
    else:    
        if type(fn) == file:
            fle = fn
        else:
            fle = open(fn)
        while 1:
            lne = fle.readline().strip()
            if lne:
                break
        spl = lne.split()
        try:
            dim = int(spl[0])
        except IndexError:
            raise ValueError("Matrix dimension expected in the first line.")
        
        #print dim
        labeled = len(spl) > 1 and spl[1] in ["labelled", "labeled"]
        matrix = orange.SymMatrix(dim)
        data = None
        
        milestones = orngMisc.progressBarMilestones(dim, 100)     
        if labeled:
            labels = []
        else:
            labels = [""] * dim
        for li, lne in enumerate(fle):
            if li > dim:
                if not li.strip():
                    continue
                raise ValueError("File to long")
            
            spl = lne.split("\t")
            if labeled:
                labels.append(spl[0].strip())
                spl = spl[1:]
            if len(spl) > dim:
                raise ValueError("Line %i too long" % li+2)
            
            for lj, s in enumerate(spl):
                if s:
                    try:
                        matrix[li, lj] = float(s)
                    except ValueError:
                        raise ValueError("Invalid number in line %i, column %i" % (li+2, lj))
                    
            if li in milestones:
                if progress:
                    progress.advance()
    if progress:
        progress.finish()

    return matrix, labels, data