def matchFinder(list1, list2): for i in range(0, len(list1)): for j in range(0, len(list2)): if trainClassifier([list1[i].verts, list2[j].verts) ==1: match=[list1[i], list2[j]] print match mismatches=(float(len(list1)))**2 - matches print "There were", len(matches), "matches, and", mismatches, "mismatches found." print "Following IDs match:", matchFinder(list1, list2)
def featuretrainer(f, numiter, biniter, seeds): treeIDsleft=[73337, 73698, 73230, 74504, 72481, 72295, 71887, 73544, 73675, 72743, 74329, 74434, 79954] treeIDsright=[75616, 75783, 76408, 76825, 105203, 74877, 75408, 75949, 76718, 75854, 77041, 76923, 92479] wrongIDs1=[77161, 76052, 70195, 89088, 77829, 81321, 89147, 83589, 88107, 94359, 98723, 99045, 99118] wrongIDs2=[77155, 82591, 83068, 89094, 79740, 81032, 89245, 85171, 89060, 96733, 101155, 87617, 77177] matching = f(treeIDsleft, treeIDsright, biniter) nonmatching=f(wrongIDs1, wrongIDs2, biniter) numTrees = numiter #number of trees in randomForest numFeatures = len(matching.values()[0]) classifier = createClassifier(numTrees, numFeatures + 1, seeds) # +1 to include the class outofbag = trainClassifier(classifier, matching.values(), nonmatching.values()) """ #--------------------List of possible tests-------------- print "lch55 left right:", classify(classifier, [featureList([72481], [105203], biniter).values()[0]]) print "random against random tree:", classify(classifier, [featureList([87617], [77155], biniter).values()[0]]) print "lesA against other lesA:", classify(classifier, [featureList([73337], [75616], biniter).values()[0]]) print "false positive match?:", classify(classifier, [featureList([83486], [97790], biniter).values()[0]]) #print "small fragment vs big:", classify(classifier, [featureList([99370], [93400], biniter).values()[0]]) #print "handle against other:", classify(classifier, [featureList([99580], [99640], biniter).values()[0]]) """ return outofbag
def kfold(mleft, mright, nmleft, nmright, numtree, biniter, seed): """ Defines a kfold cross-validation method where T is the dataset to train the RandomForest algorithm and k is a subelement of T which then is tested on training-set T """ matchesleft = mleft matchesright = mright nonmatchesleft = nmleft nonmatchesright = nmright matchresult=[] nonmatchresult=[] oob=0 for ids in range(0, len(treeIDsleft)): biniter=biniter matchlistleft=[] matchlistright=[] nonmatchlistleft=[] nonmatchlistright=[] for ID in range(0, len(matchesleft)): if matchesleft[ID] == matchesleft[ids]: continue else: matchlistleft.append(matchesleft[ID]) for ID in range(0, len(matchesleft)): if matchesright[ID] == matchesright[ids]: continue else: matchlistright.append(matchesright[ID]) for ID in range(0, len(matchesleft)): if nonmatchesleft[ID] == nonmatchesleft[ids]: continue else: nonmatchlistleft.append(nonmatchesleft[ID]) for ID in range(0, len(matchesleft)): if nonmatchesright[ID] == nonmatchesright[ids]: continue else: nonmatchlistright.append(nonmatchesright[ID]) kmatch =[matchesleft[ids], matchesright[ids]] knonmatch= [nonmatchesleft[ids], nonmatchesright[ids]] matching = featureList(matchlistleft, matchlistright, biniter) nonmatching = featureList(nonmatchlistleft, nonmatchlistright, biniter) numTrees = numtree numFeatures = len(matching.values()[0]) classifier = createClassifier(numTrees, numFeatures + 1, seed) # +1 to include the class outofbag = trainClassifier(classifier, matching.values(), nonmatching.values()) oob+=float(outofbag) print "oob =", outofbag print "match vs raining set:", classify(classifier, [featureList([kmatch[0]], [kmatch[1]], biniter).values()[0]]) print "nonmatch vs training set:", classify(classifier, [featureList([knonmatch[0]], [knonmatch[1]], biniter).values()[0]]) matchresult.append(classify(classifier, [featureList([kmatch[0]], [kmatch[1]], biniter).values()[0]])) nonmatchresult.append(classify(classifier, [featureList([knonmatch[0]], [knonmatch[1]], biniter).values()[0]])) #print matchresult #print nonmatchresult counter=0 for m in matchresult: if m[0] == 1.0: counter+=1 for m in nonmatchresult: if m[0] == 0.0: counter +=1 elements = 2 * len(matchresult) performance = (float(counter) / float(elements)) moob = oob / float(elements / 2) return "The performance is:", performance, "and the mean oob is:", moob