testfile[1])
                if a > targetscore[target]:
                    targetscore[target] = a
    sortedtargets = sorted(targets,
                           key=lambda ia: targetscore[ia],
                           reverse=True)
    for rank in range(len(sortedtargets)):
        if sortedtargets[rank] == authorindex:
            averagerankofauthorhit += rank + 1
    targetvote = {}
    for target in targets:
        for cat in categories:
            targetvote[cat] = 0
    for pp in sortedtargets[:itempooldepth]:
        targetvote[categorytable[pp]] += 1
        logger(
            str(pp) + "\t" + str(categorytable[pp]) + "\t" +
            str(targetscore[pp]), debug)
    sortedpredictions = sorted(categories,
                               key=lambda ia: targetvote[ia],
                               reverse=True)
    prediction = sortedpredictions[0]
    confusion.addconfusion(facittable[authornametable[authorindex]],
                           prediction)
logger("Done testing files.", monitor)
confusion.evaluate()
if len(testvectors) > 0:
    print(averagerankofauthorhit,
          len(testvectors),
          averagerankofauthorhit / len(testvectors),
          sep="\t")
                targetspace.indexspace[otheritem])
    logger("Done calculating neighbours", monitor)

    logger("Pool depth " + str(itempooldepth), monitor)
    if averagelinkage:
        logger("Averagelinkage", monitor)
    if votelinkage:
        logger("Votelinkage", monitor)
    confusion = ConfusionMatrix()
    primeconfusion = ConfusionMatrix()
    targetscore = {}
    for item in testers:
        sortedneighbours = sorted(neighbours[item],
                                  key=lambda hh: neighbours[item][hh],
                                  reverse=True)[:itempooldepth]
        primeconfusion.addconfusion(facittable[testitemspace.name[item]],
                                    targetspace.category[sortedneighbours[0]])
        for target in categories:
            targetscore[target] = 0
        if averagelinkage:  # take all test neighbours and sum their scores
            for neighbour in sortedneighbours:
                targetscore[targetspace.
                            category[neighbour]] += neighbours[item][neighbour]
        elif votelinkage:
            for neighbour in sortedneighbours:
                targetscore[targetspace.category[neighbour]] += 1
        sortedpredictions = sorted(categories,
                                   key=lambda ia: targetscore[ia],
                                   reverse=True)
        prediction = sortedpredictions[0]
        logger(
            prediction + "?" + " " + facittable[testitemspace.name[item]] +
Exemple #3
0
def runbatchtest(fraction, n: int = 100):
    logger("{} {} {}".format(n, fraction, ticker), monitor)
    keylist = list(vectorrepositoryall.keys())[:n]
    random.shuffle(keylist)
    split = int(len(keylist) * fraction)
    train = keylist[:split]
    test = keylist[split:]
    logger("{} train vs {} test".format(len(train), len(test)), monitor)
    ones = []
    nils = []
    dummymaxconfusionmatrix = ConfusionMatrix()
    dummyrandomconfusionmatrix = ConfusionMatrix()
    centroidconfusionmatrix = ConfusionMatrix()
    poolconfusionmatrix = ConfusionMatrix()
    for trainitem in test:
        if illness[trainitem] == "1":
            ones.append(vectorrepositoryall[trainitem])
        else:
            nils.append(vectorrepositoryall[trainitem])
    onecentroid = sparsevectors.centroid(ones)
    nilcentroid = sparsevectors.centroid(nils)
    if len(nils) > len(ones):
        dummymaxguess = "0"
    else:
        dummymaxguess = "1"
    # factor = len(ones) / len(nils)
    #  no, bad idea, go for fifty-fifty
    factor = 1 / 2
    for testitem in test:
        dummymaxconfusionmatrix.addconfusion(illness[testitem], dummymaxguess)
        if random.random() > factor:
            dummyrandomguess = "0"
        else:
            dummyrandomguess = "1"
        dummyrandomconfusionmatrix.addconfusion(illness[testitem],
                                                dummyrandomguess)
        probe = vectorrepositoryall[testitem]
        resultc = "0"
        i1 = sparsevectors.sparsecosine(probe, onecentroid)
        n1 = sparsevectors.sparsecosine(probe, nilcentroid)
        if i1 > n1:
            resultc = "1"
        centroidconfusionmatrix.addconfusion(illness[testitem], resultc)
        probeneighbours = {}
        for targetitem in train:
            probeneighbours[targetitem] = sparsevectors.sparsecosine(
                probe, vectorrepositoryall[targetitem])
        sortedfriends = sorted(probeneighbours,
                               key=lambda hh: probeneighbours[hh],
                               reverse=True)[:pooldepth]
        illity = 0
        result = "0"
        for friend in sortedfriends:
            if illness[friend] == "1":
                illity += 1
        if illity > pooldepth * factor:
            result = "1"
        nullity = pooldepth - illity
        poolconfusionmatrix.addconfusion(illness[testitem], result)
        print("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(
            testitem, illness[testitem], resultc, i1, n1, result, illity,
            nullity, pooldepth))
    print("RANDOM ----------------")
    dummyrandomconfusionmatrix.evaluate()
    print("MAX ----------------")
    dummymaxconfusionmatrix.evaluate()
    print("CENTROID ----------------")
    centroidconfusionmatrix.evaluate()
    print("NEIGHBOURS --------------")
    poolconfusionmatrix.evaluate()
        prediction = sortedpredictions[0]
        logger(prediction + "?" + " " + itemspace.category[item] + ".", debug)
        for iii in range(itempooldepth):
            try:
                logger(
                    itemspace.name[item] + " (" + itemspace.category[item] +
                    ") " + "\t" +
                    str(neighbours[item][sortedneighbours[iii]]) + "\t" +
                    itemspace.name[sortedneighbours[iii]] + " (" +
                    itemspace.category[sortedneighbours[iii]] + ") ", debug)
            except:
                logger("keyerror " + str(iii), error)

        if cleanup:
            prunedprediction = prunedsortedpredictions[0]
        confusion.addconfusion(itemspace.category[item], prediction)
        if cleanup:
            prunedconfusion.addconfusion(itemspace.category[item],
                                         prunedprediction)
    confusion.evaluate()
    if cleanup:
        prunedconfusion.evaluate()
    for c in categories:
        try:
            result[c][itempooldepth] = confusion.carat[c] / confusion.weight[c]
            prunedresult[c][itempooldepth] = prunedconfusion.carat[
                c] / prunedconfusion.weight[c]
        except KeyError:
            result[c][itempooldepth] = 0
            prunedresult[c][itempooldepth] = 0
logger("Done testing.", monitor)