예제 #1
0
def graphSentenceWordUse(sentences, top=0, fileName=args.args.file):
    num = len(sentences) + 1
    chart = pygal.StackedBar(print_labels=True, show_legend=False)
    chart.title = 'Word Use By Sentence'
    if top > 0:
        chart.title += ' Of Top ' + str(top) + ' Words'
        mostUsed = most_used_words.mostUsed(fileName, top=top)
        mostUsed = {word['label']: word['value'] \
            for word in mostUsed}
    chart.x_labels = map(str, range(1, num))
    sentencesByWord = [[taxonomy.sanitize(word) for word in sentence.split()] \
        for sentence in sentences]
    allWords = {}
    for sentence in sentencesByWord:
        for word in sentence:
            allWords[word] = {'label': word, 'value': 0}
    wordCounts = [copy.deepcopy(allWords) for sentence in sentencesByWord]
    for i in xrange(0, len(sentencesByWord)):
        for word in sentencesByWord[i]:
            wordCounts[i][word]['value'] += 1
    perWord = {}
    for i in xrange(0, len(wordCounts)):
        for word in wordCounts[i]:
            if not word in perWord:
                perWord[word] = []
            if wordCounts[i][word]['value'] < 1 or \
                (top > 0 and not word in mostUsed):
                wordCounts[i][word]['label'] = ''
                wordCounts[i][word]['value'] = 0
            perWord[word].append(wordCounts[i][word])
    for word in perWord:
        chart.add(word, perWord[word])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
예제 #2
0
def graphSentenceWordUse(sentences, top=0, fileName=args.args.file):
    num = len(sentences) + 1
    chart = pygal.StackedBar(print_labels=True, show_legend=False)
    chart.title = 'Word Use By Sentence'
    if top > 0:
        chart.title += ' Of Top ' + str(top) + ' Words'
        mostUsed = most_used_words.mostUsed(fileName, top=top)
        mostUsed = {word['label']: word['value'] \
            for word in mostUsed}
    chart.x_labels = map(str, range(1, num))
    sentencesByWord = [[taxonomy.sanitize(word) for word in sentence.split()] \
        for sentence in sentences]
    allWords = {}
    for sentence in sentencesByWord:
        for word in sentence:
            allWords[word] = {'label': word, 'value': 0}
    wordCounts = [copy.deepcopy(allWords) for sentence in sentencesByWord]
    for i in xrange(0, len(sentencesByWord)):
        for word in sentencesByWord[i]:
            wordCounts[i][word]['value'] += 1
    perWord = {}
    for i in xrange(0, len(wordCounts)):
        for word in wordCounts[i]:
            if not word in perWord:
                perWord[word] = []
            if wordCounts[i][word]['value'] < 1 or \
                (top > 0 and not word in mostUsed):
                wordCounts[i][word]['label'] = ''
                wordCounts[i][word]['value'] = 0
            perWord[word].append(wordCounts[i][word])
    for word in perWord:
        chart.add(word, perWord[word])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
예제 #3
0
def mostUsed(fileName, top=10):
    fileData = taxonomy.readFile(fileName)
    section = [taxonomy.sanitize(word) for word in fileData.split()]
    wordCounts = {word: 0 for word in section}
    for word in section:
        wordCounts[word] += 1
    allWords = [{'label': word, 'value': wordCounts[word]} \
        for word in wordCounts]
    allWords = sorted(allWords, key=lambda x: x['value'], reverse=True)
    topWords = allWords[:top]
    return topWords
예제 #4
0
def mostUsed(fileName, top=10):
    fileData = taxonomy.readFile(fileName)
    section = [taxonomy.sanitize(word) for word in fileData.split()]
    wordCounts = {word: 0 for word in section}
    for word in section:
        wordCounts[word] += 1
    allWords = [{'label': word, 'value': wordCounts[word]} \
        for word in wordCounts]
    allWords = sorted(allWords, key=lambda x: x['value'], reverse=True)
    topWords = allWords[:top]
    return topWords
예제 #5
0
def handleSentence(sentence, num):
    sentence = sentence.strip()
    words = sentence.split()
    wordTypes = []
    for i in xrange(0, len(words)):
        clearScreen()
        before = ' '.join(words[:i]) + ' '
        if i == 0:
            before = ''
        display = before + colorama.Fore.RED + words[i] \
            + colorama.Style.RESET_ALL + ' ' + ' '.join(words[i + 1:])
        word = taxonomy.sanitize(words[i])
        info = taxonomy.word(word)
        print info['definition']
        print ''
        del info['definition']
        del info['_id']
        print info
        print ''
        for j in xrange(1, len(taxonomy.TYPES)):
            print j, taxonomy.TYPES[j - 1]
        print ''
        print display
        print ''
        res = inp('What kind of word is this? ')
        if res == '':
            res = info['type'][-1]
        else:
            res = taxonomy.TYPES[int(res) - 1]
        wordTypes.append(res)
        if 'type' in info and isinstance(info['type'], list) and \
            not res in info['type']:
            info['type'].append(res)
        else:
            info['type'] = [res]
        mongo.coll.update_one({'_id': word}, {'$set': {'type': info['type']}}, \
            upsert=False)
    try:
        mongo.coll.insert_one({
            '_id': num,
            'sentence': words,
            'types': wordTypes
        })
    except Exception as e:
        print e
        time.sleep(1)
예제 #6
0
def handleSentence(sentence, num):
    sentence = sentence.strip()
    words = sentence.split()
    wordTypes = []
    for i in xrange(0, len(words)):
        clearScreen()
        before = ' '.join(words[:i]) + ' '
        if i == 0:
            before = ''
        display = before + colorama.Fore.RED + words[i] \
            + colorama.Style.RESET_ALL + ' ' + ' '.join(words[i + 1:])
        word = taxonomy.sanitize(words[i])
        info = taxonomy.word(word)
        print info['definition']
        print ''
        del info['definition']
        del info['_id']
        print info
        print ''
        for j in xrange(1, len(taxonomy.TYPES)):
            print j, taxonomy.TYPES[j - 1]
        print ''
        print display
        print ''
        res = inp('What kind of word is this? ')
        if res == '':
            res = info['type'][-1]
        else:
            res = taxonomy.TYPES[int(res) - 1]
        wordTypes.append(res)
        if 'type' in info and isinstance(info['type'], list) and \
            not res in info['type']:
            info['type'].append(res)
        else:
            info['type'] = [res]
        mongo.coll.update_one({'_id': word}, {'$set': {'type': info['type']}}, \
            upsert=False)
    try:
        mongo.coll.insert_one({'_id': num, 'sentence': words, 'types': wordTypes})
    except Exception as e:
        print e
        time.sleep(1)
예제 #7
0
 def test_sanitize(self):
     real = 'some string'
     test = 'S!O@,M$E%% S^&*T(R):;I,.N\'\"G<>.,'
     res = taxonomy.sanitize(test)
     self.assertEqual(res, real)
예제 #8
0
 def test_sanitize(self):
     real = 'some string'
     test = 'S!O@,M$E%% S^&*T(R):;I,.N\'\"G<>.,'
     res = taxonomy.sanitize(test)
     self.assertEqual(res, real)