def crossVal(args): fileList = [file for file in glob.glob(os.path.join(args.directory, '*.xml'))] if args.folds != None: numFolds = int(args.folds) numFiles = len(fileList) foldIndices = generateFoldIndices(numFolds,numFiles - 1) for i in foldIndices[0]: for f in foldIndices[1:-1]: for j in f: if i == j: return resultsFile = 'Results_' +args.type + '.txt' results = open(resultsFile, 'w') for testFold in range(numFolds): trainingSet = [] for index, fold in enumerate(foldIndices): if index != testFold: for i in fold: trainingSet.append(fileList[i]) testSet = [fileList[j] for j in foldIndices[testFold]] solutionTreeDict, allProductions = music_grammar.getAllProductions(args.directory, args.type, trainingSet, args.type, args.verbose) foldFile = 'fold' + str(testFold) + '_' + args.type + '_trainingProductions.txt' f = open(foldFile, 'w') for prod in allProductions: f.write(prod + '\n') foldTestFile = 'fold' + str(testFold) + '_' + args.type + '_testSetFilenames.txt' f2 = open(foldTestFile, 'w') for filename in testSet: f2.write(filename + '\n') S = Nonterminal('S') smallTrees = music_grammar.collectProductions(allProductions, args.verbose) trainedGrammar = induce_pcfg(S, smallTrees) print(trainedGrammar) print('length of the trainingset is: ' + str(len(trainingSet))) print('length of the testset is: ' + str(len(testSet))) print("starting to get solutions for the test set ") testProductions = [] solutionTreeDictForTestSet, testProductions = music_grammar.getAllProductions(args.directory, args.type, testSet, args.type, args.verbose) foldTestSolutionsFile = 'fold' + str(testFold) + '_' + args.type + '_testSolutions.txt' f3 = open(foldTestSolutionsFile, 'w') #for key, value in solutionTreeDictForTestSet.items(): # f3.write(key + '\n' + str(value) + '\n') json.dump(solutionTreeDictForTestSet,f3) print("parsing the test set") totalCorrect, totalCorrectNonN, totalProductions, totalLeaves, parseTreeStrings = music_grammar.parseAllTestXmls(testSet, trainedGrammar, solutionTreeDictForTestSet, args.verbose, False)#"./MusicXml/Test" #print the parses foldParsesFile = 'fold' + str(testFold) + '_' + args.type + '_parsedTestSet.txt' f4 = open(foldParsesFile, 'w') #for filename, parse in parseTreeStrings.items(): #f4.write(filename + '\n' + parse + '\n') json.dump(parseTreeStrings, f4) percentageCorrect = -1 percentageCorrectNonN = -1 percentageLeaves = -1 if totalProductions > 0: percentageCorrect = totalCorrect / totalProductions percentageCorrectNonN = totalCorrectNonN / totalProductions percentageLeaves = totalLeaves / totalProductions results.write("Fold number " + str(testFold) + " results:\ntotalCorrect: " + str(totalCorrect) + "\npercentageCorrect: " + str(percentageCorrect) + "\ntotalCorrectNonN: " + str(totalCorrectNonN) + "\npercentageCorrectNonN: " + str(percentageCorrectNonN) + "\ntotalProductions: " + str(totalProductions) + "\ntotalLeaves: " + str(totalLeaves) + "\npercentageLeavess: " + str(percentageLeaves) + "\n")
def main(): parser = argparse.ArgumentParser() parser.add_argument("directory", help="Directory that contains melody files") parser.add_argument("solutions", help="Directory that contains the solution files") parser.add_argument("-g", "--grammar", help="The file that specifies a saved grammar, this grammar will be used instead of training a new one") parser.add_argument("-f", "--folds", help="number of folds desired") parser.add_argument("-o", "--outfile", help="The file that the grammar will be saved in") parser.add_argument("-t", "--type", help="The type of solutions file we're using, either 'PR' or 'TS' for Prolongational Reduction or Time-span Tree, respectively") parser.add_argument("-v", "--verbose", help="increase output verbosity") args = parser.parse_args() print(args) if args.verbose == None or args.verbose == 'False': args.verbose = False elif args.verbose == 'True': args.verbose = True #If the grammar is specified, then the "directory" folder will be used as a test set #If the grammar is not specified, it will use 20% of the files in "directory" as a test set, and the rest as a training set to create the grammar #If folds are specified, then it will split the files in "directory" into numFolds groups, applying training and testing, and then adding up the percentages overall allProductions = [] stats = True if args.grammar != None and args.grammar != '': if not os.path.isfile(args.grammar): return f = open(args.grammar, 'r') for line in f.readlines(): allProductions.append(line) S = Nonterminal('S') smallTrees = music_grammar.collectProductions(allProductions, args.verbose) trainedGrammar = induce_pcfg(S, smallTrees) print(trainedGrammar) np_productions = trainedGrammar.productions(Nonterminal('4')) dict = {} for pr in np_productions: dict[pr.rhs()] = pr.prob() np_probDist = DictionaryProbDist(dict) #Used this code for generating specific figures: exampleTree = Tree.fromstring('(S (N (N (5 5)) (N (m4 -4))) (N (6 6)))') #exampleTree.draw() exampleTreeToCompare = Tree.fromstring('(S (N (5 5)) (N (N (m4 -4)) (N (6 6))))') #exampleTreeToCompare.draw() validate_tree.compareTreesBottomUp(exampleTree, exampleTreeToCompare) #exampleTreeToCompare.draw() for i in range(100): rightHand = np_probDist.generate() print(rightHand) print(len(rightHand)) generatedTree = pcfg_generate.generate(trainedGrammar) print('resulting tree: ') generatedTreeObj = Tree.fromstring(generatedTree) print(generatedTreeObj) print(str(generatedTreeObj.leaves())) print('\n\n') embellishedTree = pcfg_generate.expandAllLeaves(trainedGrammar, generatedTreeObj) print(embellishedTree) print(str(Tree.fromstring(str(embellishedTree)).leaves())) fileToTest = "./MusicXml/72_Preludes 1 La fille aux cheveux de lin.xml" musicXmlTest = converter.parse(fileToTest) curPitchList = music_grammar.getPitchListFromFile(fileToTest) intervalList = music_grammar.getIntervalStringsFromPitchList(curPitchList) with open(music_grammar.musicGrammarFilename, 'r') as f: musicGrammarString = f.read() musicGrammar = CFG.fromstring(musicGrammarString) parser = ChartParser(musicGrammar, trace=2) #parses = parser.parse(intervalList) print(intervalList) print('num intervals is: ' + str(len(intervalList))) #numTrees = sum(1 for _ in parses) #print('numTrees is: ' + str(numTrees)) #return #this is for the musical examples trainedParser = ViterbiParser(trainedGrammar) parses = trainedParser.parse_all(intervalList) bestParse = parses[0] #bestParse.draw() treeType = bestParse.convert(ParentedTree) parse_depth = 0 depth = score_from_tree.get_total_depth_of_tree_obj(bestParse, parse_depth) print('depth is : ' + str(depth)) print('builtin height is : ' + str(bestParse.height())) print(bestParse) bestParse.draw() #score_from_tree.get_tree_obj_to_negative_depth(bestParse, 2, parse_depth) #prunedBestParse, removedLeaves, leafIndex= score_from_tree.remove_embellishment_rules_from_tree_below_depth(bestParse, {}, depth - 2, 0, 0) prunedBestParse, removedLeaves, leafIndex, maxSubtreeDepth = score_from_tree.remove_embellishment_rules_from_tree_negative_depth(bestParse, {}, 3, 0, 0) print(prunedBestParse) #for s in parentedBestParse.subtrees(lambda t: t.height() > parse_depth - 3): #treepos = parentedBestParse.treeposition(s) #parentedBestParse.remove(treepos) prunedBestParse.draw() score_from_tree.get_melody_from_parse_tree(bestParse, removedLeaves, musicXmlTest) PR_fileToTest = "./MusicXml/PR/PR-39_Andante C dur.xml" ET = ElementTree() ET.parse(PR_fileToTest) root = ET.getroot() rootName = args.type.lower() topXml = root.find(rootName) depth = 0 depth = score_from_tree.get_total_depth_of_tree(topXml, depth, rootName) print('depth is ' + str(depth)) musicXmlTest.show() for d in reversed(range(0, depth - 1)): pitch_refs = score_from_tree.gather_note_refs_of_depth(topXml, [], rootName, d, 0) pitch_refs.sort(key=music_grammar.pitchRefToNum) melody_of_depth = score_from_tree.pitch_refs_to_notes(pitch_refs, musicXmlTest) melody_of_depth.show() print (pitch_refs) #examples with 3-child nodes #, './MusicXml/MSC-166.xml', './MusicXml/MSC-103.xml', './MusicXml/37_Sonate fur Klavier Nr.48 C dur Op.30-1 Mov.1.xml', './MusicXml/MSC-211.xml' #wrong buti like it , './MusicXml/MSC-238.xml' #like: ./MusicXml/MSC-141.xml fold 2, #filesToTest = ['./MusicXml/MSC-238.xml', './MusicXml/39_Andante C dur.xml']#fold 1 #filesToTest = ['./MusicXml/MSC-224.xml', './MusicXml/MSC-141.xml']# fold 2 #filesToTest = ["./MusicXml/03_Bagatelle 'Fur Elise' WoO.59.xml"]#fold 3 #filesToTest = ['./MusicXml/MSC-111.xml'] #['./MusicXml/MSC-108.xml', './MusicXml/01_Waltz in E flat Grande Valse Brillante Op.18.xml', './MusicXml/MSC-231.xml', './MusicXml/37_Sonate fur Klavier Nr.48 C dur Op.30-1 Mov.1.xml', './MusicXml/59_Schwanengesang No.1 Op.72-4 D.957-4 Standchen.xml']#fold4 #filesToTest = ['./MusicXml/MSC-111.xml', './MusicXml/MSC-108.xml', './MusicXml/01_Waltz in E flat Grande Valse Brillante Op.18.xml', './MusicXml/MSC-231.xml', './MusicXml/59_Schwanengesang No.1 Op.72-4 D.957-4 Standchen.xml'] #PR #filesToTest = ['./MusicXml/80_Symphonie Nr.40 g moll KV.550 1.Satz.xml', './MusicXml/31_Sinfonie Nr.9 d moll Op.125 4.Satz An die Freude.xml']# fold 0 PR #filesToTest = ['./MusicXml/34_Water Music in D major HWV 349 No.11 Alla Hornpipe.xml', './MusicXml/02_Moments Musicaux.xml']#fold 0 20% #filesToTest = ['./MusicXml/84_Toccata und Fuge d moll BWV565.xml'] #fold 1 #filesToTest = ['./MusicXml/33_Swan Lake Op.20 No.9 Finale.xml', './MusicXml/40_Alpengluhen Op.193.xml']# fold 3 Pr #filesToTest = ['./MusicXml/57_Waves of the Danube.xml']#, './MusicXml/60_Ma Vlast Moldau.xml']# fold 3 PR < 20% filesToTest = ['./MusicXml/02_Moments Musicaux.xml']#fold 4 ts totalCorrect, totalCorrectNonN, totalProductions, totalLeaves, parseTreeStrings = music_grammar.parseAllTestXmls(filesToTest, trainedGrammar, solutionTreeDictForTestSet, args.verbose, False) solutionTreeDictForTestSet, testProductions = music_grammar.getAllProductions(args.directory, args.type, filesToTest, args.type, args.verbose) parseFilename = "fold4_" + args.type + "_parsedTestSet.txt" parseFile = open(parseFilename, 'r') parses = json.load(parseFile) for filename, solutionTree in parseTreeStrings.items(): if "_afterComparison" in filename: continue treeSolution = solutionTreeDictForTestSet[filename] percentageCorrect = -1 print(filename) treeSolutionObj = Tree.fromstring(treeSolution) treeSolutionObj.draw() parseTreeNoProbabilities = removeProbability(str(parseTreeStrings[filename])) parseTreeObj = Tree.fromstring(parseTreeNoProbabilities) parseTreeObj.draw() parseAfterCompNoProbabilities = removeProbability(str(parseTreeStrings[filename+'_afterComparison'])) parseAfterCompObj = Tree.fromstring(parseAfterCompNoProbabilities) parseAfterCompObj.draw() percentageCorrectNonN = -1 percentageLeaves = -1 if totalProductions > 0: percentageCorrect = totalCorrect / totalProductions percentageCorrectNonN = totalCorrectNonN / totalProductions percentageLeaves = totalLeaves / totalProductions print("results:\ntotalCorrect: " + str(totalCorrect) + "\npercentageCorrect: " + str(percentageCorrect) + "\ntotalCorrectNonN: " + str(totalCorrectNonN) + "\npercentageCorrectNonN: " + str(percentageCorrectNonN) + "\ntotalProductions: " + str(totalProductions) + "\ntotalLeaves: " + str(totalLeaves) + "\npercentageLeavess: " + str(percentageLeaves) + "\n") #finish this case return if stats == True: totalCorrect = 0 totalCorrectNonN = 0 totalProductions = 0 totalLeaves = 0 #./MusicXml/MSC-103.xml, ./MusicXml/24_Orphee aux Enfers Overture.xml, ./MusicXml/MSC-211.xml, ./MusicXml/39_Andante C dur.xml,./MusicXml/01_Waltz in E flat Grande Valse Brillante Op.18.xml #small ones #./MusicXml/MSC-224.xml #pretty good one: ['./MusicXml/57_Waves of the Danube.xml', './MusicXml/MSC-107.xml','./MusicXml/59_Schwanengesang No.1 Op.72-4 D.957-4 Standchen.xml', './MusicXml/MSC-231.xml'] goodOnesTS = ['./MusicXml/57_Waves of the Danube.xml', './MusicXml/MSC-107.xml','./MusicXml/59_Schwanengesang No.1 Op.72-4 D.957-4 Standchen.xml', './MusicXml/MSC-231.xml'] goodOnesPR = ['./MusicXml/02_Moments Musicaux.xml',"./MusicXml/95_12 Variationen uber ein franzosisches Lied 'Ah,vous dirai-je, maman' C dur K.265 K6.300e.xml"] #music_grammar.getAllProductionsHarmonicGrammar(args.directory, args.type, [goodOnesPR[0]], args.type, "MINOR", args.verbose) #if stats == True: # return num_skip = 0 for fold in range(int(args.folds)): bestSolutionFiles = [] worstSolutionFile = "" bestPercentage = .25 worstPercentage = .2 #get parses from file parseFilename = "fold" + str(fold) + "_" + args.type + "_parsedTestSet.txt" parseFile = open(parseFilename, 'r') parses = json.load(parseFile) #get solutions from file solutionsFilename = "fold" + str(fold) + "_" + args.type + "_testSolutions.txt" solutionsFile = open(solutionsFilename, 'r') solutions = json.load(solutionsFile) foldLeaves = 0 foldProductions = 0 foldCorrect = 0 foldCorrectNonN = 0 for filename, solutionTree in solutions.items(): if parses[filename] != None and parses[filename] != '': solutionTreeObj = Tree.fromstring(solutionTree) parseStr = parses[filename] probabilisticPart = re.findall('(\(p=[^()]*\))', parseStr) indexOfProbPart = parseStr.index(probabilisticPart[0]) parseTreeObj = Tree.fromstring(parseStr[:indexOfProbPart]) #here's where we look at example reductions in musical scores curMusicXml = converter.parse(filename) if len(curMusicXml.flat.notes) >= 15 and len(curMusicXml.flat.notes) < 20 or filename == './MusicXml/03_Bagatelle \'Fur Elise\' WoO.59.xml': print(filename) if filename != './MusicXml/03_Bagatelle \'Fur Elise\' WoO.59.xml': continue #if args.type == 'PR': solutionFilename = args.type + "-" + basename(filename) solutionFilepath = args.directory + '/' + args.type + '/' + solutionFilename #else: # solutionFilename = args.type + "-" + basename(filename)[4:] # solutionFilepath = args.directory + '/' + args.type + '/' + solutionFilename[:-4] + '_1' + solutionFilename[-4:] ET = ElementTree() ET.parse(solutionFilepath) root = ET.getroot() rootName = args.type.lower() topXml = root.find(rootName) #topXml.show() if num_skip > 0: num_skip -= 1 continue parseTreeObj.draw() #score_from_tree.print_reductions_for_parse_tree(parseTreeObj, curMusicXml) depth = 0 depth = score_from_tree.get_total_depth_of_tree(topXml, depth, rootName) print('depth is ' + str(depth)) curMusicXml.show() for d in reversed(range(0, depth - 1)): pitch_refs = score_from_tree.gather_note_refs_of_depth(topXml, [], rootName, d, 0) pitch_refs.sort(key=music_grammar.pitchRefToNum) melody_of_depth = score_from_tree.pitch_refs_to_notes(pitch_refs, curMusicXml) melody_of_depth.show() print (pitch_refs) continue parseTreeObjAfterComparison = copy.deepcopy(parseTreeObj) numProductions = len(solutionTreeObj.productions()) foldProductions += numProductions bottomCorrect, bottomCorrectNonN = validate_tree.compareTreesBottomUp(solutionTreeObj, parseTreeObjAfterComparison) if bottomCorrect / numProductions > worstPercentage:# and bottomCorrect / numProductions < bestPercentage: bestSolutionFiles.append(filename) if filename in goodOnesPR and False: print(filename) print(parseTreeObj.leaves()) solutionTreeObj.draw() parseTreeObj.draw() parseTreeObjAfterComparison.draw() #bestPercentage = bottomCorrect / numProductions #if bottomCorrect / numProductions < worstPercentage: # worstSolutionFile = filename # worstPercentage = bottomCorrect / numProductions foldLeaves = len(solutionTreeObj.leaves()) foldCorrect += bottomCorrect foldCorrectNonN += bottomCorrectNonN totalProductions += foldProductions totalLeaves += foldLeaves totalCorrect += foldCorrect totalCorrectNonN += foldCorrectNonN foldPercentageCorrect = -1 foldPercentageCorrectNonN = -1 foldPercentageLeaves = -1 if foldProductions > 0: foldPercentageCorrect = foldCorrect / foldProductions foldPercentageCorrectNonN = foldCorrectNonN / foldProductions foldPercentageLeaves = foldLeaves / foldProductions print("Fold number " + str(fold) + " results:\nfoldCorrect: " + str(foldCorrect) + "\nfoldPercentageCorrect: " + str(foldPercentageCorrect) + "\nfoldCorrectNonN: " + str(foldCorrectNonN) + "\nfoldPercentageCorrectNonN: " + str(foldPercentageCorrectNonN) + "\nfoldProductions: " + str(foldProductions) + "\nfoldLeaves: " + str(foldLeaves) + "\nfoldPercentageLeaves: " + str(foldPercentageLeaves)) print("Best: " + str(bestSolutionFiles) + ', ' + str(bestPercentage)) print("Worst: " + worstSolutionFile + ', ' + str(worstPercentage)+ "\n") percentageCorrect = -1 percentageCorrectNonN = -1 percentageLeaves = -1 if totalProductions > 0: percentageCorrect = totalCorrect / totalProductions percentageCorrectNonN = totalCorrectNonN / totalProductions percentageLeaves = totalLeaves / totalProductions print("results:\ntotalCorrect: " + str(totalCorrect) + "\npercentageCorrect: " + str(percentageCorrect) + "\ntotalCorrectNonN: " + str(totalCorrectNonN) + "\npercentageCorrectNonN: " + str(percentageCorrectNonN) + "\ntotalProductions: " + str(totalProductions) + "\ntotalLeaves: " + str(totalLeaves) + "\npercentageLeavess: " + str(percentageLeaves) + "\n") #finish this case return #cross-validate crossVal(args)