Python Ngram Beispiele, ngram.Ngram Python Beispiele

Beispiel #1

0

Datei anzeigen

    def getHarmony_from_Trigram(self, trigram, bigram, tonality, firstSymbol,
                                numberChords, duration):
        chordList = []
        symbolList = []
        ng = ngram.Ngram()

        c = chord.Chord(self.getChord(firstSymbol, tonality))
        c.duration.type = duration
        chordList.append(c)
        symbolList.append(firstSymbol)
        symbol1 = firstSymbol

        symbol2 = ng.getRandomTuple(ng.subBiGram(bigram, firstSymbol))
        c = chord.Chord(self.getChord(symbol2, tonality))
        c.duration.type = duration
        chordList.append(c)

        for i in range(numberChords - 2):
            symbol3 = ng.getRandomTuple(
                ng.subTriGram(trigram, symbol1, symbol2))
            c = chord.Chord(self.getChord(symbol3, tonality))
            c.duration.type = duration
            chordList.append(c)
            symbolList.append(symbol3)
            symbol1 = symbol2
            symbol2 = symbol3
        return [chordList, symbolList]

Beispiel #2

0

Datei anzeigen

def load_ngram(filename):
    i_ngram = ngram.Ngram()
    for line in open(filename).readlines():
        fields = line.split(' (')
        prefix = fields[0]
        probs = fields[1].strip(')\n').split(', ')
        count = int(probs[0])
        prob = float(probs[1])
        l = len(prefix.split(ngram.DELIM))
        i_ngram.set_prob_dict(prefix, count, prob)
    return i_ngram

Beispiel #3

0

Datei anzeigen

def test_ngram_count_3():
    sent = ['<s>', 'I', 'am', 'robot', '.', '</s>']
    params = defaultdict(
        lambda: {
            'count': 0,
            2: defaultdict(lambda: {'count': 0}, {}),
            3: defaultdict(lambda: {'count': 0}, {})
        }, {})

    n = ngram.Ngram()
    params = n.count(sent, params, 3)

    for k, v in params.items():
        assert v['count'] == 1

Beispiel #4

0

Datei anzeigen

Datei: rhythm.py Projekt: BrianComposer/HarmonicVoice

 def getRhythm_from_Bigram(self, bigram, startSymbol, totalDuration):
     noteList = []
     duration = 0
     ng = ngram.Ngram()
     buf = createNote_or_Rest(startSymbol)
     for item in buf:
         noteList.append(item)
         duration = duration + item.quarterLength
     symbol = startSymbol
     while (duration < totalDuration):
         symbol = ng.getRandomTuple(ng.subBiGram(bigram, symbol))
         buf = createNote_or_Rest(symbol)
         for item in buf:
             noteList.append(item)
             duration = duration + item.quarterLength
     return noteList

Beispiel #5

0

Datei anzeigen

def test_ngram_count_2():
    sent = ['<s>', 'I', 'am', 'robot', '.', '</s>']
    params = defaultdict(
        lambda: {
            'count': 0,
            'single': False,
            2: defaultdict(lambda: {'count': 0}, {}),
            3: defaultdict(lambda: {'count': 0}, {})
        }, {})

    n = ngram.Ngram()
    params = n.count(sent, params)

    for k, v in params.items():
        assert v['count'] == 1
        if v['single'] is True:
            assert len(v[2]) == 1

Beispiel #6

0

Datei anzeigen

 def getHarmony_from_Bigram(self, bigram, tonality, firstSymbol,
                            numberChords, duration):
     chordList = []
     symbolList = []
     ng = ngram.Ngram()
     c = chord.Chord(self.getChord(firstSymbol, tonality))
     c.duration.type = duration
     chordList.append(c)
     symbolList.append(firstSymbol)
     symbol = firstSymbol
     for i in range(numberChords - 1):
         symbol = ng.getRandomTuple(subBiGram(bigram, symbol))
         c = chord.Chord(self.getChord(symbol, tonality))
         c.duration.type = duration
         chordList.append(c)
         symbolList.append(symbol)
     return [chordList, symbolList]

Beispiel #7

0

Datei anzeigen

Datei: ngram_freq.py Projekt: JackStaples/TrollTracker

                      help="Top words to print in verbose mode")

    parser.add_argument("-v", "--verbose",
                      action="count",
                      default=0,
                      dest="verbose",
                      required=False,
                      help="Print status messages to stdout")

    args = parser.parse_args()

    assert ( args.neutral_corpus or args.restore), \
             "Must supply either neutral corpus or a pickled version"

    ngram = ngram.Ngram( "Ngram Classifier", args.bad_corpus, args.neutral_corpus,
                         args.verbose, args.top_words,
                         args.pickle, args.restore, args.ngram_count, args.abuse_freq )


    server_class = BaseHTTPServer.HTTPServer
    httpd = server_class(('localhost', 50007), MyHandler)
    print("Serving")
    httpd.serve_forever()


    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.bind(('localhost', 50007))
    s.listen(1)
    conn, addr = s.accept()
    print('Connected by', addr)
    while True:

Beispiel #8

0

Datei anzeigen

Datei: protect.py Projekt: JackStaples/TrollTracker

    assert ( not ( args.neutral_corpus and args.restore) ), \
             "Neutral corpus and restore request are mutually exclusive"

    verbose = args.verbose
    shoot = args.shoot
    mention_limit = args.mention_limit

    # Construct our classifiers
    # Currently we only have language classifiers looking for potential
    # abuse posts.
    # FIXME: Extend for online grooming.
    if (shoot):
        print "Loading unigram training data"
        unigram = ngram.Ngram("unigram classifier", args.bad_corpus,
                              args.neutral_corpus, verbose, args.top_words,
                              args.pickle, args.restore, 1, args.abuse_freq)

        print "Loading bigram training data"
        bigram = ngram.Ngram("bigram classifier", args.bad_corpus,
                             args.neutral_corpus, verbose, args.top_words,
                             args.pickle, args.restore, 2, args.abuse_freq)

        print "Loading trigram training data"
        trigram = ngram.Ngram("trigram classifier", args.bad_corpus,
                              args.neutral_corpus, verbose, args.top_words,
                              args.pickle, args.restore, 3, args.abuse_freq)
    else:
        unigram = None
        bigram = None
        trigram = None

Beispiel #9

0

Datei anzeigen

	def __init__(self, n):
		self.humanScore = 0
		self.computerScore = 0
		self.ng = ngram.Ngram(n)
		self.moves = []
		self.accepted = 'rps'

Beispiel #10

0

Datei anzeigen

Datei: rhythm.py Projekt: BrianComposer/HarmonicVoice

    def getRhythm_from_Trigram(self, trigram, firstSymbol, secondSymbol,
                               totalDuration, anacrusa,
                               quarterLengthNotaFinal):
        noteList = []
        symbolList = []
        ng = ngram.Ngram()
        duration = 0
        silenceDuration = 0
        iteration = 0

        duracionMaxima = totalDuration - quarterLengthNotaFinal

        #append the quarter silence
        if (anacrusa == True):
            buf = self.createNote_or_Rest("R1.0")
            for item in buf:
                noteList.append(item)
                duration = duration + item.quarterLength
            symbolList.append("R1.0")

        buf = self.createNote_or_Rest(firstSymbol)
        for item in buf:
            noteList.append(item)
            duration = duration + item.quarterLength
        symbolList.append(firstSymbol)

        buf = self.createNote_or_Rest(secondSymbol)
        for item in buf:
            noteList.append(item)
            duration = duration + item.quarterLength
        symbolList.append(secondSymbol)

        symbol1 = firstSymbol
        symbol2 = secondSymbol
        while (duration < duracionMaxima):
            symbol3 = ng.getRandomTuple(
                ng.subTriGram(trigram, symbol1, symbol2))
            #Comprobamos que los tresillos solo aparezcan en los tiempos fuertes de los compases
            if (self.checkTuplet_to_Beat(duration, symbol3)):
                #Comprobamos que no se exceda un maximo numero de tiempos en silencio
                if (self.checkSilence(silenceDuration, symbol3)):
                    buf = self.createNote_or_Rest(symbol3)
                    #Comprobamos que no se exceda la duracion maxima del ritmo
                    durBuf = duration
                    for item in buf:
                        durBuf = durBuf + item.quarterLength
                    if durBuf <= duracionMaxima:
                        for item in buf:
                            noteList.append(item)
                            duration = duration + item.quarterLength
                            #Acumulamos la duracion de los silencios para controlar el numero maximo
                            if ("R" in symbol3):
                                silenceDuration = silenceDuration + item.quarterLength
                            else:
                                silenceDuration = 0
                        symbol1 = symbol2
                        symbol2 = symbol3
                        symbolList.append(symbol3)
            iteration += 1
            if iteration == 50: return None

    ##    print("Ritmo calculado con duracion:" + str(duration))

    #append the last note
        if (quarterLengthNotaFinal > 0):
            sbuf = ""
            if (quarterLengthNotaFinal == 1.0):
                sbuf = "N1.0"
            elif (quarterLengthNotaFinal == 2.0):
                sbuf = "N2.0"
            elif (quarterLengthNotaFinal == 3.0):
                sbuf = "N3.0"
            elif (quarterLengthNotaFinal == 4.0):
                sbuf = "N4.0"
            buf = self.createNote_or_Rest(sbuf)
            for item in buf:
                noteList.append(item)
                duration = duration + item.quarterLength
            symbolList.append(sbuf)

        return [noteList, symbolList]

Beispiel #11

0

Datei anzeigen

Datei: parsing.py Projekt: BrianComposer/HarmonicVoice

    def startParsing(self, composerName, mode, maxTokens):
        try:
            #INICIALIZAR LAS CLASES
            fio = fileIO.FileIO()
            ng = ngram.Ngram()
            lp = smoothing.Smoothing()

            #LEER LOS DATOS EN CRUDO
            print(">>> Reading data raw")

            fileNames = fio.getFileNames(composerName, mode)
            dataRawFile = fileNames[0]
            dataRaw = fio.readText('raw/' + dataRawFile)

            ##            TOKENIZACION
            print(">>> Tokenization")
            tokens = ng.getTokens(dataRaw)
            ##            printFrequencyChart(tokens)
            ##            return "ok"

            ##            STOP WORDS
            print(">>> Removing stop words")
            tokens = ng.removeStopWords(tokens)
            ##            GRAFICAS DE FRECUENCIAS
            ##            printFrequencyChart(tokens)

            #FILTRADO DE LOS MAS FECUENTES
            print(">>> Filtering " + str(maxTokens) + " most frequent tokens")
            tokens = ng.filterMostFrequentTokens(tokens, maxTokens)

            ##            UNIGRAMA
            print(">>> Counting unigram")
            unigram_count = ng.getUnigram(tokens)
            print(">>> Saving unigram")
            fio.save_obj(unigram_count, fileNames[5])

            ##            BIGRAMA
            print(">>> Counting bigram")
            bigram_count = ng.getBigram(tokens)
            bigram_prob = ng.calculateBigramProbability(
                unigram_count, bigram_count)

            ##            TRIGRAMA
            print(">>> Counting trigram")
            trigram_count = ng.getTrigram(tokens)
            trigram_prob = ng.calculateTrigramProbability(
                unigram_count, trigram_count)

            ##            SMOOTHING
            alfa = 0.0001
            ##            hacer mencion a un trabajo anterior el paper que indica una tecnica de smoothing apropiada
            print(">>> Laplace smoothing of bigram")
            bigram_prob_Laplace = lp.calculateBigramProbabilityLaplace(
                unigram_count, bigram_count, alfa)
            unigram_ZERO = lp.smoothingLaplaceBigramZERO(
                unigram_count, bigram_count, alfa)

            print(">>> Laplace smoothing of trigram")
            trigram_prob_Laplace = lp.calculateBigramProbabilityLaplace(
                unigram_count, trigram_count, alfa)
            bigram_ZERO = lp.smoothingLaplaceTrigramZERO(
                unigram_count, trigram_count, alfa)

            print(">>> Saving bigram")
            fio.save_obj(bigram_prob_Laplace, fileNames[1])
            fio.save_obj(unigram_ZERO, fileNames[3])

            print(">>> Saving trigram")
            fio.save_obj(trigram_prob_Laplace, fileNames[2])
            fio.save_obj(bigram_ZERO, fileNames[4])

            #PROCESO FINALIZADO
            print(">>> Process finished OK")
        except:
            print(">>> Process failed")

Beispiel #12

0

Datei anzeigen

 def setUp(self):
     self.ngram = ngram.Ngram(3)

Beispiel #13

0

Datei anzeigen

 def test_ngram_not_integer(self):
     with self.assertRaises(Exception):
         ng = ngram.Ngram(1.2)
     with self.assertRaises(Exception):
         ng = ngram.Ngram('3')

Beispiel #14

0

Datei anzeigen

 def test_ngram_negative_n(self):
     with self.assertRaises(Exception):
         ng = ngram.Ngram(-1)

Beispiel #15

0

Datei anzeigen

Datei: parse.py Projekt: Sucker-P/BS

    assert ( not ( args.neutral_corpus and args.restore) ), \
             "Neutral corpus and restore request are mutually exclusive"

    # Construct our classifiers
    if (args.bow):
        bagofwords = bow.SimpleBagOfWords("BagOfWords", args.bow)

    if (args.botw):
        bagoftaggedwords = bow.BagOfTaggedWords("TaggedBagOfWord", args.botw,
                                                args.verbose)

    if (args.verbose):
        print "Loading unigram training data"
    unigram = ngram.Ngram("Unigram", args.bad_corpus, args.neutral_corpus,
                          args.verbose, args.top_words, args.pickle,
                          args.restore, 1, args.abuse_freq)

    if (args.verbose):
        print "Loading bigram training data"
    bigram = ngram.Ngram("Bigram", args.bad_corpus, args.neutral_corpus,
                         args.verbose, args.top_words, args.pickle,
                         args.restore, 2, args.abuse_freq)

    if (args.verbose):
        print "Loading trigram training data"
    trigram = ngram.Ngram("Trigram", args.bad_corpus, args.neutral_corpus,
                          args.verbose, args.top_words, args.pickle,
                          args.restore, 3, args.abuse_freq)

    raters = [unigram, bigram, trigram]