testfile[1]) if a > targetscore[target]: targetscore[target] = a sortedtargets = sorted(targets, key=lambda ia: targetscore[ia], reverse=True) for rank in range(len(sortedtargets)): if sortedtargets[rank] == authorindex: averagerankofauthorhit += rank + 1 targetvote = {} for target in targets: for cat in categories: targetvote[cat] = 0 for pp in sortedtargets[:itempooldepth]: targetvote[categorytable[pp]] += 1 logger( str(pp) + "\t" + str(categorytable[pp]) + "\t" + str(targetscore[pp]), debug) sortedpredictions = sorted(categories, key=lambda ia: targetvote[ia], reverse=True) prediction = sortedpredictions[0] confusion.addconfusion(facittable[authornametable[authorindex]], prediction) logger("Done testing files.", monitor) confusion.evaluate() if len(testvectors) > 0: print(averagerankofauthorhit, len(testvectors), averagerankofauthorhit / len(testvectors), sep="\t")
targetspace.indexspace[otheritem]) logger("Done calculating neighbours", monitor) logger("Pool depth " + str(itempooldepth), monitor) if averagelinkage: logger("Averagelinkage", monitor) if votelinkage: logger("Votelinkage", monitor) confusion = ConfusionMatrix() primeconfusion = ConfusionMatrix() targetscore = {} for item in testers: sortedneighbours = sorted(neighbours[item], key=lambda hh: neighbours[item][hh], reverse=True)[:itempooldepth] primeconfusion.addconfusion(facittable[testitemspace.name[item]], targetspace.category[sortedneighbours[0]]) for target in categories: targetscore[target] = 0 if averagelinkage: # take all test neighbours and sum their scores for neighbour in sortedneighbours: targetscore[targetspace. category[neighbour]] += neighbours[item][neighbour] elif votelinkage: for neighbour in sortedneighbours: targetscore[targetspace.category[neighbour]] += 1 sortedpredictions = sorted(categories, key=lambda ia: targetscore[ia], reverse=True) prediction = sortedpredictions[0] logger( prediction + "?" + " " + facittable[testitemspace.name[item]] +
def runbatchtest(fraction, n: int = 100): logger("{} {} {}".format(n, fraction, ticker), monitor) keylist = list(vectorrepositoryall.keys())[:n] random.shuffle(keylist) split = int(len(keylist) * fraction) train = keylist[:split] test = keylist[split:] logger("{} train vs {} test".format(len(train), len(test)), monitor) ones = [] nils = [] dummymaxconfusionmatrix = ConfusionMatrix() dummyrandomconfusionmatrix = ConfusionMatrix() centroidconfusionmatrix = ConfusionMatrix() poolconfusionmatrix = ConfusionMatrix() for trainitem in test: if illness[trainitem] == "1": ones.append(vectorrepositoryall[trainitem]) else: nils.append(vectorrepositoryall[trainitem]) onecentroid = sparsevectors.centroid(ones) nilcentroid = sparsevectors.centroid(nils) if len(nils) > len(ones): dummymaxguess = "0" else: dummymaxguess = "1" # factor = len(ones) / len(nils) # no, bad idea, go for fifty-fifty factor = 1 / 2 for testitem in test: dummymaxconfusionmatrix.addconfusion(illness[testitem], dummymaxguess) if random.random() > factor: dummyrandomguess = "0" else: dummyrandomguess = "1" dummyrandomconfusionmatrix.addconfusion(illness[testitem], dummyrandomguess) probe = vectorrepositoryall[testitem] resultc = "0" i1 = sparsevectors.sparsecosine(probe, onecentroid) n1 = sparsevectors.sparsecosine(probe, nilcentroid) if i1 > n1: resultc = "1" centroidconfusionmatrix.addconfusion(illness[testitem], resultc) probeneighbours = {} for targetitem in train: probeneighbours[targetitem] = sparsevectors.sparsecosine( probe, vectorrepositoryall[targetitem]) sortedfriends = sorted(probeneighbours, key=lambda hh: probeneighbours[hh], reverse=True)[:pooldepth] illity = 0 result = "0" for friend in sortedfriends: if illness[friend] == "1": illity += 1 if illity > pooldepth * factor: result = "1" nullity = pooldepth - illity poolconfusionmatrix.addconfusion(illness[testitem], result) print("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format( testitem, illness[testitem], resultc, i1, n1, result, illity, nullity, pooldepth)) print("RANDOM ----------------") dummyrandomconfusionmatrix.evaluate() print("MAX ----------------") dummymaxconfusionmatrix.evaluate() print("CENTROID ----------------") centroidconfusionmatrix.evaluate() print("NEIGHBOURS --------------") poolconfusionmatrix.evaluate()
prediction = sortedpredictions[0] logger(prediction + "?" + " " + itemspace.category[item] + ".", debug) for iii in range(itempooldepth): try: logger( itemspace.name[item] + " (" + itemspace.category[item] + ") " + "\t" + str(neighbours[item][sortedneighbours[iii]]) + "\t" + itemspace.name[sortedneighbours[iii]] + " (" + itemspace.category[sortedneighbours[iii]] + ") ", debug) except: logger("keyerror " + str(iii), error) if cleanup: prunedprediction = prunedsortedpredictions[0] confusion.addconfusion(itemspace.category[item], prediction) if cleanup: prunedconfusion.addconfusion(itemspace.category[item], prunedprediction) confusion.evaluate() if cleanup: prunedconfusion.evaluate() for c in categories: try: result[c][itempooldepth] = confusion.carat[c] / confusion.weight[c] prunedresult[c][itempooldepth] = prunedconfusion.carat[ c] / prunedconfusion.weight[c] except KeyError: result[c][itempooldepth] = 0 prunedresult[c][itempooldepth] = 0 logger("Done testing.", monitor)