def getHarmony_from_Trigram(self, trigram, bigram, tonality, firstSymbol, numberChords, duration): chordList = [] symbolList = [] ng = ngram.Ngram() c = chord.Chord(self.getChord(firstSymbol, tonality)) c.duration.type = duration chordList.append(c) symbolList.append(firstSymbol) symbol1 = firstSymbol symbol2 = ng.getRandomTuple(ng.subBiGram(bigram, firstSymbol)) c = chord.Chord(self.getChord(symbol2, tonality)) c.duration.type = duration chordList.append(c) for i in range(numberChords - 2): symbol3 = ng.getRandomTuple( ng.subTriGram(trigram, symbol1, symbol2)) c = chord.Chord(self.getChord(symbol3, tonality)) c.duration.type = duration chordList.append(c) symbolList.append(symbol3) symbol1 = symbol2 symbol2 = symbol3 return [chordList, symbolList]
def load_ngram(filename): i_ngram = ngram.Ngram() for line in open(filename).readlines(): fields = line.split(' (') prefix = fields[0] probs = fields[1].strip(')\n').split(', ') count = int(probs[0]) prob = float(probs[1]) l = len(prefix.split(ngram.DELIM)) i_ngram.set_prob_dict(prefix, count, prob) return i_ngram
def test_ngram_count_3(): sent = ['<s>', 'I', 'am', 'robot', '.', '</s>'] params = defaultdict( lambda: { 'count': 0, 2: defaultdict(lambda: {'count': 0}, {}), 3: defaultdict(lambda: {'count': 0}, {}) }, {}) n = ngram.Ngram() params = n.count(sent, params, 3) for k, v in params.items(): assert v['count'] == 1
def getRhythm_from_Bigram(self, bigram, startSymbol, totalDuration): noteList = [] duration = 0 ng = ngram.Ngram() buf = createNote_or_Rest(startSymbol) for item in buf: noteList.append(item) duration = duration + item.quarterLength symbol = startSymbol while (duration < totalDuration): symbol = ng.getRandomTuple(ng.subBiGram(bigram, symbol)) buf = createNote_or_Rest(symbol) for item in buf: noteList.append(item) duration = duration + item.quarterLength return noteList
def test_ngram_count_2(): sent = ['<s>', 'I', 'am', 'robot', '.', '</s>'] params = defaultdict( lambda: { 'count': 0, 'single': False, 2: defaultdict(lambda: {'count': 0}, {}), 3: defaultdict(lambda: {'count': 0}, {}) }, {}) n = ngram.Ngram() params = n.count(sent, params) for k, v in params.items(): assert v['count'] == 1 if v['single'] is True: assert len(v[2]) == 1
def getHarmony_from_Bigram(self, bigram, tonality, firstSymbol, numberChords, duration): chordList = [] symbolList = [] ng = ngram.Ngram() c = chord.Chord(self.getChord(firstSymbol, tonality)) c.duration.type = duration chordList.append(c) symbolList.append(firstSymbol) symbol = firstSymbol for i in range(numberChords - 1): symbol = ng.getRandomTuple(subBiGram(bigram, symbol)) c = chord.Chord(self.getChord(symbol, tonality)) c.duration.type = duration chordList.append(c) symbolList.append(symbol) return [chordList, symbolList]
help="Top words to print in verbose mode") parser.add_argument("-v", "--verbose", action="count", default=0, dest="verbose", required=False, help="Print status messages to stdout") args = parser.parse_args() assert ( args.neutral_corpus or args.restore), \ "Must supply either neutral corpus or a pickled version" ngram = ngram.Ngram( "Ngram Classifier", args.bad_corpus, args.neutral_corpus, args.verbose, args.top_words, args.pickle, args.restore, args.ngram_count, args.abuse_freq ) server_class = BaseHTTPServer.HTTPServer httpd = server_class(('localhost', 50007), MyHandler) print("Serving") httpd.serve_forever() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(('localhost', 50007)) s.listen(1) conn, addr = s.accept() print('Connected by', addr) while True:
assert ( not ( args.neutral_corpus and args.restore) ), \ "Neutral corpus and restore request are mutually exclusive" verbose = args.verbose shoot = args.shoot mention_limit = args.mention_limit # Construct our classifiers # Currently we only have language classifiers looking for potential # abuse posts. # FIXME: Extend for online grooming. if (shoot): print "Loading unigram training data" unigram = ngram.Ngram("unigram classifier", args.bad_corpus, args.neutral_corpus, verbose, args.top_words, args.pickle, args.restore, 1, args.abuse_freq) print "Loading bigram training data" bigram = ngram.Ngram("bigram classifier", args.bad_corpus, args.neutral_corpus, verbose, args.top_words, args.pickle, args.restore, 2, args.abuse_freq) print "Loading trigram training data" trigram = ngram.Ngram("trigram classifier", args.bad_corpus, args.neutral_corpus, verbose, args.top_words, args.pickle, args.restore, 3, args.abuse_freq) else: unigram = None bigram = None trigram = None
def __init__(self, n): self.humanScore = 0 self.computerScore = 0 self.ng = ngram.Ngram(n) self.moves = [] self.accepted = 'rps'
def getRhythm_from_Trigram(self, trigram, firstSymbol, secondSymbol, totalDuration, anacrusa, quarterLengthNotaFinal): noteList = [] symbolList = [] ng = ngram.Ngram() duration = 0 silenceDuration = 0 iteration = 0 duracionMaxima = totalDuration - quarterLengthNotaFinal #append the quarter silence if (anacrusa == True): buf = self.createNote_or_Rest("R1.0") for item in buf: noteList.append(item) duration = duration + item.quarterLength symbolList.append("R1.0") buf = self.createNote_or_Rest(firstSymbol) for item in buf: noteList.append(item) duration = duration + item.quarterLength symbolList.append(firstSymbol) buf = self.createNote_or_Rest(secondSymbol) for item in buf: noteList.append(item) duration = duration + item.quarterLength symbolList.append(secondSymbol) symbol1 = firstSymbol symbol2 = secondSymbol while (duration < duracionMaxima): symbol3 = ng.getRandomTuple( ng.subTriGram(trigram, symbol1, symbol2)) #Comprobamos que los tresillos solo aparezcan en los tiempos fuertes de los compases if (self.checkTuplet_to_Beat(duration, symbol3)): #Comprobamos que no se exceda un maximo numero de tiempos en silencio if (self.checkSilence(silenceDuration, symbol3)): buf = self.createNote_or_Rest(symbol3) #Comprobamos que no se exceda la duracion maxima del ritmo durBuf = duration for item in buf: durBuf = durBuf + item.quarterLength if durBuf <= duracionMaxima: for item in buf: noteList.append(item) duration = duration + item.quarterLength #Acumulamos la duracion de los silencios para controlar el numero maximo if ("R" in symbol3): silenceDuration = silenceDuration + item.quarterLength else: silenceDuration = 0 symbol1 = symbol2 symbol2 = symbol3 symbolList.append(symbol3) iteration += 1 if iteration == 50: return None ## print("Ritmo calculado con duracion:" + str(duration)) #append the last note if (quarterLengthNotaFinal > 0): sbuf = "" if (quarterLengthNotaFinal == 1.0): sbuf = "N1.0" elif (quarterLengthNotaFinal == 2.0): sbuf = "N2.0" elif (quarterLengthNotaFinal == 3.0): sbuf = "N3.0" elif (quarterLengthNotaFinal == 4.0): sbuf = "N4.0" buf = self.createNote_or_Rest(sbuf) for item in buf: noteList.append(item) duration = duration + item.quarterLength symbolList.append(sbuf) return [noteList, symbolList]
def startParsing(self, composerName, mode, maxTokens): try: #INICIALIZAR LAS CLASES fio = fileIO.FileIO() ng = ngram.Ngram() lp = smoothing.Smoothing() #LEER LOS DATOS EN CRUDO print(">>> Reading data raw") fileNames = fio.getFileNames(composerName, mode) dataRawFile = fileNames[0] dataRaw = fio.readText('raw/' + dataRawFile) ## TOKENIZACION print(">>> Tokenization") tokens = ng.getTokens(dataRaw) ## printFrequencyChart(tokens) ## return "ok" ## STOP WORDS print(">>> Removing stop words") tokens = ng.removeStopWords(tokens) ## GRAFICAS DE FRECUENCIAS ## printFrequencyChart(tokens) #FILTRADO DE LOS MAS FECUENTES print(">>> Filtering " + str(maxTokens) + " most frequent tokens") tokens = ng.filterMostFrequentTokens(tokens, maxTokens) ## UNIGRAMA print(">>> Counting unigram") unigram_count = ng.getUnigram(tokens) print(">>> Saving unigram") fio.save_obj(unigram_count, fileNames[5]) ## BIGRAMA print(">>> Counting bigram") bigram_count = ng.getBigram(tokens) bigram_prob = ng.calculateBigramProbability( unigram_count, bigram_count) ## TRIGRAMA print(">>> Counting trigram") trigram_count = ng.getTrigram(tokens) trigram_prob = ng.calculateTrigramProbability( unigram_count, trigram_count) ## SMOOTHING alfa = 0.0001 ## hacer mencion a un trabajo anterior el paper que indica una tecnica de smoothing apropiada print(">>> Laplace smoothing of bigram") bigram_prob_Laplace = lp.calculateBigramProbabilityLaplace( unigram_count, bigram_count, alfa) unigram_ZERO = lp.smoothingLaplaceBigramZERO( unigram_count, bigram_count, alfa) print(">>> Laplace smoothing of trigram") trigram_prob_Laplace = lp.calculateBigramProbabilityLaplace( unigram_count, trigram_count, alfa) bigram_ZERO = lp.smoothingLaplaceTrigramZERO( unigram_count, trigram_count, alfa) print(">>> Saving bigram") fio.save_obj(bigram_prob_Laplace, fileNames[1]) fio.save_obj(unigram_ZERO, fileNames[3]) print(">>> Saving trigram") fio.save_obj(trigram_prob_Laplace, fileNames[2]) fio.save_obj(bigram_ZERO, fileNames[4]) #PROCESO FINALIZADO print(">>> Process finished OK") except: print(">>> Process failed")
def setUp(self): self.ngram = ngram.Ngram(3)
def test_ngram_not_integer(self): with self.assertRaises(Exception): ng = ngram.Ngram(1.2) with self.assertRaises(Exception): ng = ngram.Ngram('3')
def test_ngram_negative_n(self): with self.assertRaises(Exception): ng = ngram.Ngram(-1)
assert ( not ( args.neutral_corpus and args.restore) ), \ "Neutral corpus and restore request are mutually exclusive" # Construct our classifiers if (args.bow): bagofwords = bow.SimpleBagOfWords("BagOfWords", args.bow) if (args.botw): bagoftaggedwords = bow.BagOfTaggedWords("TaggedBagOfWord", args.botw, args.verbose) if (args.verbose): print "Loading unigram training data" unigram = ngram.Ngram("Unigram", args.bad_corpus, args.neutral_corpus, args.verbose, args.top_words, args.pickle, args.restore, 1, args.abuse_freq) if (args.verbose): print "Loading bigram training data" bigram = ngram.Ngram("Bigram", args.bad_corpus, args.neutral_corpus, args.verbose, args.top_words, args.pickle, args.restore, 2, args.abuse_freq) if (args.verbose): print "Loading trigram training data" trigram = ngram.Ngram("Trigram", args.bad_corpus, args.neutral_corpus, args.verbose, args.top_words, args.pickle, args.restore, 3, args.abuse_freq) raters = [unigram, bigram, trigram]