def calculate_bcubed_measures(self, documents_labels_clusters): ''' This method calculates the BCubed precision, recall and F measures. BCubed measures are extrinsic measures and require the presence of a ground truth. The function assumes that the clusters are in the range 0..Nc and the labels 0..Nl. For more details : http://www.cs.utsa.edu/~qitian/seminar/Spring11/03_11_11/IR2009.pdf ''' grouped_by_label = [list(label[1]) for label in g(sorted(documents_labels_clusters), key=lambda(x):x[0])] grouped_by_cluster = {cluster[0] :list(cluster[1]) for cluster in g(sorted(documents_labels_clusters ,key=lambda(x):x[1]), key=lambda(x):x[1])}
def classify_by_neighbours(data, labels, item, k=10): # with k=10, this gets 24% wrong and takes 80 seconds import heapq inner_products = data.dot(np.array(item).transpose()) thing = zip(inner_products.flatten(), labels) heapq.heapify(thing) voters = [x[1] for x in heapq.nlargest(k, thing)] from itertools import groupby as g return max(g(sorted(voters)), key=lambda (x, v): (len(list(v)), -voters.index(x)))[0]
def most_common_oneliner(L): """ :param L: """ return max(g(sorted(L)), key=lambda(x, v): (len(list(v)), -L.index(x)))[0]
if (geneClusterMap.has_key(geneId)): geneClusterMap[geneId].append(clusterId) else: geneClusterMap[geneId] = [] geneClusterMap[geneId].append(clusterId) if (clusterGeneMap.has_key(clusterId)): clusterGeneMap[clusterId].append(geneId) else: clusterGeneMap[clusterId] = [] clusterGeneMap[clusterId].append(geneId) for k in geneClusterMap.keys(): L = geneClusterMap[k] finalGeneClusterMap[k] = max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0] for k in clusterGeneMap.keys(): L = clusterGeneMap[k] finalClusterGeneMap[k] = max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0] fpTotal = 0 fnTotal = 0 with open(clusteringOutputFile) as f: for line in f: if(line[0] == '\n' ): continue clusterId = line.split(' ')[1].split('\n')[0] geneId = line.split(' ')[0] if(finalGeneClusterMap[geneId] != clusterId):
lineS = line.strip().split("\t") occId = lineS[0] surfaceForm = lineS[2] dbpediaUri = lineS[4] fields = dbpediaUri.split("/") # what happens with pure NILs? dbpediaUri = fields[0] if len(fields)<2: alternativeUri = surfaceForm else: alternativeUri = fields[1] # this is what we found before we decided to output NIL if dbpediaUri == "NIL": if alternativeUri not in nilDict: nilCounter += 1 nilDict[alternativeUri] = "NIL"+str(nilCounter).zfill(3) dbpediaUri = nilDict[alternativeUri] if occId not in out: out[occId] = [] out[occId].append(dbpediaUri) from itertools import groupby as g for (occId, uris) in out.items(): uri = max(g(sorted(uris)), key=lambda(x, v):(len(list(v)),-uris.index(x)))[0] print occId + "\t" + d.get(uri, uri)
def classify(sorted_neighbors): # Adapted from https://stackoverflow.com/a/1520716 sorted_neighbors = sorted_neighbors.tolist() return max(g(sorted(sorted_neighbors)), key=lambda xv: (len(list(xv[1])), -sorted_neighbors.index(xv[0])))[0]
def common(l): return max(g(sorted(l)), key=lambda(x, v):(len(list(v)),-l.index(x)))[0]
def most_common(L): # Fetch most common item from a list. try: return max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0] except: return ""
def MostCommon(L): return max(g(sorted(L)), key=lambda (x, v): (len(list(v)), -L.index(x)))[0]
def scorePlayArea(self): # Scores a round of SushiGo # Does not clear the play area points = 0 tempuraCount = 0 sashimiCount = 0 #All Nigiri, Tempura and Sashimi for i in self.playArea: if i.isCard("Egg Nigiri"): points += 1 elif i.isCard("Salmon Nigiri"): points += 2 elif i.isCard("Squid Nigiri"): points += 3 elif i.isCard("Miso Soup"): points += 3 elif i.isCard("Tempura"): tempuraCount += 1 elif i.isCard("Sashimi"): sashimiCount += 1 #Tempura and Sashimi points rely on sets points += math.floor(tempuraCount / 2) * 5 points += math.floor(sashimiCount / 3) * 10 #Wasabi uses card order wasabiCount = 0 for i in self.playArea: if i.sushiName == "Wasabi": wasabiCount += 1 if wasabiCount > 0 and i.sushiType == "Nigiri": if i.sushiName == "Egg Nigiri": points += 2 wasabiCount -= 1 elif i.sushiName == "Salmon Nigiri": points += 4 wasabiCount -= 1 elif i.sushiName == "Squid Nigiri": points += 6 wasabiCount -= 1 #Tea relys on card colours, these are all different except nigiri/wasabi #teaList is the playArea but made with colours teaList = [] for i in self.playArea: if i.sushiType == "Nigiri" or i.sushiName == "Wasabi": teaList.append('Yellow') else: teaList.append(i.sushiName) #Function found online to group the list teaScore = 0 for _, b in g(sorted(teaList)): x = sum(1 for _ in b) if x > teaScore: teaScore = x #print(teaScore) for i in self.playArea: if i.sushiName == "Tea": points += teaScore #Maki Rolls are scored at a global level #Points total is added to players score self.addPoints(points)
def mode(L): return max(g(sorted(L)), key=lambda (x, v): (len(list(v)), -L.index(x)))[0]
def mode(L): return max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0]
continue lineS = line.strip().split("\t") occId = lineS[0] surfaceForm = lineS[2] dbpediaUri = lineS[4] fields = dbpediaUri.split("/") # what happens with pure NILs? dbpediaUri = fields[0] if len(fields) < 2: alternativeUri = surfaceForm else: alternativeUri = fields[ 1] # this is what we found before we decided to output NIL if dbpediaUri == "NIL": if alternativeUri not in nilDict: nilCounter += 1 nilDict[alternativeUri] = "NIL" + str(nilCounter).zfill(3) dbpediaUri = nilDict[alternativeUri] if occId not in out: out[occId] = [] out[occId].append(dbpediaUri) from itertools import groupby as g for (occId, uris) in out.items(): uri = max(g(sorted(uris)), key=lambda (x, v): (len(list(v)), -uris.index(x)))[0] print occId + "\t" + d.get(uri, uri)
def sort_by_frequency(L): return [x for (x,y) in sorted(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))]
from itertools import groupby as g s = input() for i, j in g(s): print((len(list(j)), int(i)), end=' ')
def most_common(iterator): return max(g(sorted(iterator)), key=lambda (x, v): (len(list(v)), -iterator.index(x)))[0]
def most_common_oneliner(L): return max(g(sorted(L)), key=lambda (x, v): (len(list(v)), -L.index(x)))[0]
def most_common_angle(L): return max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0]