コード例 #1
0
def graphSentenceWordUse(sentences, top=0, fileName=args.args.file):
    num = len(sentences) + 1
    chart = pygal.StackedBar(print_labels=True, show_legend=False)
    chart.title = 'Word Use By Sentence'
    if top > 0:
        chart.title += ' Of Top ' + str(top) + ' Words'
        mostUsed = most_used_words.mostUsed(fileName, top=top)
        mostUsed = {word['label']: word['value'] \
            for word in mostUsed}
    chart.x_labels = map(str, range(1, num))
    sentencesByWord = [[taxonomy.sanitize(word) for word in sentence.split()] \
        for sentence in sentences]
    allWords = {}
    for sentence in sentencesByWord:
        for word in sentence:
            allWords[word] = {'label': word, 'value': 0}
    wordCounts = [copy.deepcopy(allWords) for sentence in sentencesByWord]
    for i in xrange(0, len(sentencesByWord)):
        for word in sentencesByWord[i]:
            wordCounts[i][word]['value'] += 1
    perWord = {}
    for i in xrange(0, len(wordCounts)):
        for word in wordCounts[i]:
            if not word in perWord:
                perWord[word] = []
            if wordCounts[i][word]['value'] < 1 or \
                (top > 0 and not word in mostUsed):
                wordCounts[i][word]['label'] = ''
                wordCounts[i][word]['value'] = 0
            perWord[word].append(wordCounts[i][word])
    for word in perWord:
        chart.add(word, perWord[word])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #2
0
ファイル: sentence.py プロジェクト: pdxjohnny/taxonomy
def graphSentenceWordUse(sentences, top=0, fileName=args.args.file):
    num = len(sentences) + 1
    chart = pygal.StackedBar(print_labels=True, show_legend=False)
    chart.title = 'Word Use By Sentence'
    if top > 0:
        chart.title += ' Of Top ' + str(top) + ' Words'
        mostUsed = most_used_words.mostUsed(fileName, top=top)
        mostUsed = {word['label']: word['value'] \
            for word in mostUsed}
    chart.x_labels = map(str, range(1, num))
    sentencesByWord = [[taxonomy.sanitize(word) for word in sentence.split()] \
        for sentence in sentences]
    allWords = {}
    for sentence in sentencesByWord:
        for word in sentence:
            allWords[word] = {'label': word, 'value': 0}
    wordCounts = [copy.deepcopy(allWords) for sentence in sentencesByWord]
    for i in xrange(0, len(sentencesByWord)):
        for word in sentencesByWord[i]:
            wordCounts[i][word]['value'] += 1
    perWord = {}
    for i in xrange(0, len(wordCounts)):
        for word in wordCounts[i]:
            if not word in perWord:
                perWord[word] = []
            if wordCounts[i][word]['value'] < 1 or \
                (top > 0 and not word in mostUsed):
                wordCounts[i][word]['label'] = ''
                wordCounts[i][word]['value'] = 0
            perWord[word].append(wordCounts[i][word])
    for word in perWord:
        chart.add(word, perWord[word])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #3
0
ファイル: sentence.py プロジェクト: pdxjohnny/taxonomy
def graphSentenceLength(sentences):
    chart = pygal.Pie(print_labels=True)
    chart.title = 'Sentence Length'
    for i in xrange(1, len(sentences) + 1):
        senLen = len(sentences[i - 1].split())
        title = str(i) + ' - ' + str(senLen) + ' words'
        senLen = {'value': senLen, 'label': str(senLen)}
        chart.add(title, [senLen])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #4
0
ファイル: most_used_words.py プロジェクト: pdxjohnny/taxonomy
def main():
    topWords = mostUsed(args.args.file, top=10)
    chart = pygal.Pie(print_labels=True)
    chart.title = 'Most Used Words'
    for word in topWords:
        title = word['label']
        word['label'] += ' - ' + str(word['value'])
        chart.add(title, [word])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #5
0
ファイル: most_used_words.py プロジェクト: pdxjohnny/taxonomy
def main():
    topWords = mostUsed(args.args.file, top=10)
    chart = pygal.Pie(print_labels=True)
    chart.title = 'Most Used Words'
    for word in topWords:
        title = word['label']
        word['label'] += ' - ' + str(word['value'])
        chart.add(title, [word])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #6
0
def graphSentenceLength(sentences):
    chart = pygal.Pie(print_labels=True)
    chart.title = 'Sentence Length'
    for i in xrange(1, len(sentences) + 1):
        senLen = len(sentences[i - 1].split())
        title = str(i) + ' - ' + str(senLen) + ' words'
        senLen = {'value': senLen, 'label': str(senLen)}
        chart.add(title, [senLen])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #7
0
ファイル: sentence_kind.py プロジェクト: pdxjohnny/taxonomy
def graphSentenceClauses(sentences):
    num = len(sentences) + 1
    chart = pygal.StackedBar(print_labels=True)
    chart.title = 'Sentence Clauses'
    chart.x_labels = map(str, range(1, num))
    allClauses = {clause: [] for clause in CLAUSE_KINDS}
    for i in xrange(1, num):
        sentence = taxonomy.word(i)
        for clause in CLAUSE_KINDS:
            if sentence[clause] != 0:
                sentence[clause] = {'value': sentence[clause], 'label': str(sentence[clause])}
            else:
                sentence[clause] = {'value': sentence[clause], 'label': ''}
            allClauses[clause].append(sentence[clause])
    for clause in allClauses:
        chart.add(clause, allClauses[clause])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #8
0
ファイル: sentence_kind.py プロジェクト: pdxjohnny/taxonomy
def graphSentenceKinds(sentences):
    num = len(sentences) + 1
    chart = pygal.StackedBar()
    chart.title = 'Kinds Of Sentences'
    chart.x_labels = map(str, range(1, num))
    allKinds = {kind: [] for kind in SENTENCE_KINDS}
    for i in xrange(1, num):
        sentence = taxonomy.word(i)
        for kind in SENTENCE_KINDS:
            if sentence['dependent'] == SENTENCE_KINDS[kind]['dependent'] and \
                sentence['independent'] == SENTENCE_KINDS[kind]['independent']:
                sentenceKind = {'value': 1, 'label': kind}
            else:
                sentenceKind = {'value': 0, 'label': ''}
            allKinds[kind].append(sentenceKind)
    for kind in allKinds:
        chart.add(kind, allKinds[kind])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #9
0
def graphSentenceKinds(sentences):
    num = len(sentences) + 1
    chart = pygal.StackedBar()
    chart.title = 'Kinds Of Sentences'
    chart.x_labels = map(str, range(1, num))
    allKinds = {kind: [] for kind in SENTENCE_KINDS}
    for i in xrange(1, num):
        sentence = taxonomy.word(i)
        for kind in SENTENCE_KINDS:
            if sentence['dependent'] == SENTENCE_KINDS[kind]['dependent'] and \
                sentence['independent'] == SENTENCE_KINDS[kind]['independent']:
                sentenceKind = {'value': 1, 'label': kind}
            else:
                sentenceKind = {'value': 0, 'label': ''}
            allKinds[kind].append(sentenceKind)
    for kind in allKinds:
        chart.add(kind, allKinds[kind])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #10
0
ファイル: sentence.py プロジェクト: pdxjohnny/taxonomy
def graphSentencePartsOfSpeech(sentences):
    num = len(sentences) + 1
    chart = pygal.StackedBar(print_labels=True)
    chart.title = 'Parts of Speech In Sentences'
    chart.x_labels = map(str, range(1, num))
    allParts = {part: [] for part in taxonomy.TYPES}
    for i in xrange(1, num):
        sentence = taxonomy.word(i)
        counts = {part: 0 for part in taxonomy.TYPES}
        for wordType in sentence['types']:
            counts[wordType] += 1
        for part in counts:
            if counts[part] != 0:
                counts[part] = {'value': counts[part], 'label': str(counts[part])}
            else:
                counts[part] = {'value': counts[part], 'label': ''}
            allParts[part].append(counts[part])
    for part in allParts:
        chart.add(part, allParts[part])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #11
0
def graphSentenceClauses(sentences):
    num = len(sentences) + 1
    chart = pygal.StackedBar(print_labels=True)
    chart.title = 'Sentence Clauses'
    chart.x_labels = map(str, range(1, num))
    allClauses = {clause: [] for clause in CLAUSE_KINDS}
    for i in xrange(1, num):
        sentence = taxonomy.word(i)
        for clause in CLAUSE_KINDS:
            if sentence[clause] != 0:
                sentence[clause] = {
                    'value': sentence[clause],
                    'label': str(sentence[clause])
                }
            else:
                sentence[clause] = {'value': sentence[clause], 'label': ''}
            allClauses[clause].append(sentence[clause])
    for clause in allClauses:
        chart.add(clause, allClauses[clause])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #12
0
def graphAllPuntuation(sentences):
    punctuationData = {kind: [{'value': 0, 'label': PUNCTUATION[kind]} \
        for i in xrange(0, len(sentences))] \
        for kind in PUNCTUATION}
    for i in xrange(0, len(sentences)):
        for kind in PUNCTUATION:
            punctuationData[kind][i]['value'] = sentences[i].count(kind)
            if kind == '\"' or kind == '\'':
                punctuationData[kind][i]['value'] /= 2
    for i in xrange(0, len(sentences)):
        for kind in PUNCTUATION:
            punctuationData[kind][i]['label'] += ' - ' \
                + str(punctuationData[kind][i]['value'])
            if punctuationData[kind][i]['value'] < 1:
                punctuationData[kind][i]['label'] = ''
    chart = pygal.StackedBar(print_labels=True)
    chart.title = 'Punctuation By Sentence'
    num = len(sentences) + 1
    chart.x_labels = map(str, range(1, num))
    for kind in punctuationData:
        chart.add(kind, punctuationData[kind])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #13
0
def graphSentencePartsOfSpeech(sentences):
    num = len(sentences) + 1
    chart = pygal.StackedBar(print_labels=True)
    chart.title = 'Parts of Speech In Sentences'
    chart.x_labels = map(str, range(1, num))
    allParts = {part: [] for part in taxonomy.TYPES}
    for i in xrange(1, num):
        sentence = taxonomy.word(i)
        counts = {part: 0 for part in taxonomy.TYPES}
        for wordType in sentence['types']:
            counts[wordType] += 1
        for part in counts:
            if counts[part] != 0:
                counts[part] = {
                    'value': counts[part],
                    'label': str(counts[part])
                }
            else:
                counts[part] = {'value': counts[part], 'label': ''}
            allParts[part].append(counts[part])
    for part in allParts:
        chart.add(part, allParts[part])
    fileName = chart.title.lower().replace(' ', '_')
    chart.render_to_png(taxonomy.outdir(fileName + '.png'))
コード例 #14
0
ファイル: taxonomy_test.py プロジェクト: pdxjohnny/taxonomy
 def test_outdir(self):
     real = os.path.join('out', 'fileName')
     test = 'fileName'
     res = taxonomy.outdir(test)
     self.assertEqual(res, real)
コード例 #15
0
ファイル: taxonomy_test.py プロジェクト: pdxjohnny/taxonomy
 def test_outdir(self):
     real = os.path.join('out', 'fileName')
     test = 'fileName'
     res = taxonomy.outdir(test)
     self.assertEqual(res, real)