def calculate_bcubed_measures(self, documents_labels_clusters):
     '''
     This method calculates the BCubed precision, recall and F measures. 
     BCubed measures are extrinsic measures and require the presence of a ground
     truth.
     The function assumes that the clusters are in the range 0..Nc and the labels 0..Nl.
     For more details : http://www.cs.utsa.edu/~qitian/seminar/Spring11/03_11_11/IR2009.pdf
     '''
     grouped_by_label = [list(label[1]) for label in g(sorted(documents_labels_clusters), key=lambda(x):x[0])]
     grouped_by_cluster = {cluster[0] :list(cluster[1]) for cluster in g(sorted(documents_labels_clusters ,key=lambda(x):x[1]), key=lambda(x):x[1])}
Beispiel #2
0
def classify_by_neighbours(data, labels, item, k=10):
    # with k=10, this gets 24% wrong and takes 80 seconds
    import heapq

    inner_products = data.dot(np.array(item).transpose())

    thing = zip(inner_products.flatten(), labels)

    heapq.heapify(thing)

    voters = [x[1] for x in heapq.nlargest(k, thing)]

    from itertools import groupby as g
    return max(g(sorted(voters)),
               key=lambda (x, v): (len(list(v)), -voters.index(x)))[0]
                            def most_common_oneliner(L):
                                """
                                :param L:
                                """

                                return max(g(sorted(L)), key=lambda(x, v): (len(list(v)), -L.index(x)))[0]
        if (geneClusterMap.has_key(geneId)):
            geneClusterMap[geneId].append(clusterId)
        else:
            geneClusterMap[geneId] = []
            geneClusterMap[geneId].append(clusterId)

        if (clusterGeneMap.has_key(clusterId)):
            clusterGeneMap[clusterId].append(geneId)
        else:
            clusterGeneMap[clusterId] = []
            clusterGeneMap[clusterId].append(geneId)

for k in geneClusterMap.keys():
    L = geneClusterMap[k]
    finalGeneClusterMap[k] = max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0]

for k in clusterGeneMap.keys():
    L = clusterGeneMap[k]
    finalClusterGeneMap[k] = max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0]

fpTotal = 0
fnTotal = 0

with open(clusteringOutputFile) as f:
    for line in f:
        if(line[0] == '\n' ):
            continue
        clusterId = line.split(' ')[1].split('\n')[0]
        geneId = line.split(' ')[0]
        if(finalGeneClusterMap[geneId] != clusterId):
    
    lineS = line.strip().split("\t")
    occId = lineS[0]
    surfaceForm = lineS[2]
    dbpediaUri = lineS[4]
    fields = dbpediaUri.split("/") # what happens with pure NILs?
    dbpediaUri = fields[0]
    if len(fields)<2:
	alternativeUri = surfaceForm
    else: 
    	alternativeUri = fields[1]     # this is what we found before we decided to output NIL 
    
    if dbpediaUri == "NIL":
	if alternativeUri not in nilDict:
		nilCounter += 1
		nilDict[alternativeUri] = "NIL"+str(nilCounter).zfill(3)
	dbpediaUri = nilDict[alternativeUri] 
    if occId not in out:
	out[occId] = []
    out[occId].append(dbpediaUri)

from itertools import groupby as g

for (occId, uris) in out.items():
    uri = max(g(sorted(uris)), key=lambda(x, v):(len(list(v)),-uris.index(x)))[0]
    
    print occId + "\t" + d.get(uri, uri)



def classify(sorted_neighbors):
    # Adapted from https://stackoverflow.com/a/1520716
    sorted_neighbors = sorted_neighbors.tolist()
    return max(g(sorted(sorted_neighbors)),
               key=lambda xv:
               (len(list(xv[1])), -sorted_neighbors.index(xv[0])))[0]
Beispiel #7
0
def common(l):
  return max(g(sorted(l)), key=lambda(x, v):(len(list(v)),-l.index(x)))[0]
Beispiel #8
0
def most_common(L):
    # Fetch most common item from a list.
  try:
    return max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0]
  except:
    return ""
def MostCommon(L):
    return max(g(sorted(L)), key=lambda (x, v): (len(list(v)), -L.index(x)))[0]
Beispiel #10
0
    def scorePlayArea(self):
        # Scores a round of SushiGo
        # Does not clear the play area

        points = 0
        tempuraCount = 0
        sashimiCount = 0

        #All Nigiri, Tempura and Sashimi
        for i in self.playArea:
            if i.isCard("Egg Nigiri"):
                points += 1
            elif i.isCard("Salmon Nigiri"):
                points += 2
            elif i.isCard("Squid Nigiri"):
                points += 3
            elif i.isCard("Miso Soup"):
                points += 3
            elif i.isCard("Tempura"):
                tempuraCount += 1
            elif i.isCard("Sashimi"):
                sashimiCount += 1

        #Tempura and Sashimi points rely on sets
        points += math.floor(tempuraCount / 2) * 5
        points += math.floor(sashimiCount / 3) * 10

        #Wasabi uses card order
        wasabiCount = 0
        for i in self.playArea:
            if i.sushiName == "Wasabi":
                wasabiCount += 1
            if wasabiCount > 0 and i.sushiType == "Nigiri":
                if i.sushiName == "Egg Nigiri":
                    points += 2
                    wasabiCount -= 1
                elif i.sushiName == "Salmon Nigiri":
                    points += 4
                    wasabiCount -= 1
                elif i.sushiName == "Squid Nigiri":
                    points += 6
                    wasabiCount -= 1

        #Tea relys on card colours, these are all different except nigiri/wasabi
        #teaList is the playArea but made with colours
        teaList = []

        for i in self.playArea:
            if i.sushiType == "Nigiri" or i.sushiName == "Wasabi":
                teaList.append('Yellow')
            else:
                teaList.append(i.sushiName)

        #Function found online to group the list
        teaScore = 0
        for _, b in g(sorted(teaList)):
            x = sum(1 for _ in b)
            if x > teaScore:
                teaScore = x
                #print(teaScore)
        for i in self.playArea:
            if i.sushiName == "Tea":
                points += teaScore

        #Maki Rolls are scored at a global level

        #Points total is added to players score
        self.addPoints(points)
Beispiel #11
0
def mode(L):
    return max(g(sorted(L)), key=lambda (x, v): (len(list(v)), -L.index(x)))[0]
Beispiel #12
0
def mode(L):
	return max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0]
        continue

    lineS = line.strip().split("\t")
    occId = lineS[0]
    surfaceForm = lineS[2]
    dbpediaUri = lineS[4]
    fields = dbpediaUri.split("/")  # what happens with pure NILs?
    dbpediaUri = fields[0]
    if len(fields) < 2:
        alternativeUri = surfaceForm
    else:
        alternativeUri = fields[
            1]  # this is what we found before we decided to output NIL

    if dbpediaUri == "NIL":
        if alternativeUri not in nilDict:
            nilCounter += 1
            nilDict[alternativeUri] = "NIL" + str(nilCounter).zfill(3)
        dbpediaUri = nilDict[alternativeUri]
    if occId not in out:
        out[occId] = []
    out[occId].append(dbpediaUri)

from itertools import groupby as g

for (occId, uris) in out.items():
    uri = max(g(sorted(uris)),
              key=lambda (x, v): (len(list(v)), -uris.index(x)))[0]

    print occId + "\t" + d.get(uri, uri)
Beispiel #14
0
def sort_by_frequency(L):
	return [x for (x,y) in sorted(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))]	
def MostCommon(L):
    return max(g(sorted(L)), key=lambda (x, v): (len(list(v)), -L.index(x)))[0]
Beispiel #16
0
from itertools import groupby as g
s = input()
for i, j in g(s):
    print((len(list(j)), int(i)), end=' ')
Beispiel #17
0
def most_common(iterator):
    return max(g(sorted(iterator)),
               key=lambda (x, v): (len(list(v)), -iterator.index(x)))[0]
Beispiel #18
0
def most_common_oneliner(L):
    return max(g(sorted(L)), key=lambda (x, v): (len(list(v)), -L.index(x)))[0]
def most_common_angle(L):
    return max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0]