def graphSentenceWordUse(sentences, top=0, fileName=args.args.file): num = len(sentences) + 1 chart = pygal.StackedBar(print_labels=True, show_legend=False) chart.title = 'Word Use By Sentence' if top > 0: chart.title += ' Of Top ' + str(top) + ' Words' mostUsed = most_used_words.mostUsed(fileName, top=top) mostUsed = {word['label']: word['value'] \ for word in mostUsed} chart.x_labels = map(str, range(1, num)) sentencesByWord = [[taxonomy.sanitize(word) for word in sentence.split()] \ for sentence in sentences] allWords = {} for sentence in sentencesByWord: for word in sentence: allWords[word] = {'label': word, 'value': 0} wordCounts = [copy.deepcopy(allWords) for sentence in sentencesByWord] for i in xrange(0, len(sentencesByWord)): for word in sentencesByWord[i]: wordCounts[i][word]['value'] += 1 perWord = {} for i in xrange(0, len(wordCounts)): for word in wordCounts[i]: if not word in perWord: perWord[word] = [] if wordCounts[i][word]['value'] < 1 or \ (top > 0 and not word in mostUsed): wordCounts[i][word]['label'] = '' wordCounts[i][word]['value'] = 0 perWord[word].append(wordCounts[i][word]) for word in perWord: chart.add(word, perWord[word]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def graphSentenceLength(sentences): chart = pygal.Pie(print_labels=True) chart.title = 'Sentence Length' for i in xrange(1, len(sentences) + 1): senLen = len(sentences[i - 1].split()) title = str(i) + ' - ' + str(senLen) + ' words' senLen = {'value': senLen, 'label': str(senLen)} chart.add(title, [senLen]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def main(): topWords = mostUsed(args.args.file, top=10) chart = pygal.Pie(print_labels=True) chart.title = 'Most Used Words' for word in topWords: title = word['label'] word['label'] += ' - ' + str(word['value']) chart.add(title, [word]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def graphSentenceClauses(sentences): num = len(sentences) + 1 chart = pygal.StackedBar(print_labels=True) chart.title = 'Sentence Clauses' chart.x_labels = map(str, range(1, num)) allClauses = {clause: [] for clause in CLAUSE_KINDS} for i in xrange(1, num): sentence = taxonomy.word(i) for clause in CLAUSE_KINDS: if sentence[clause] != 0: sentence[clause] = {'value': sentence[clause], 'label': str(sentence[clause])} else: sentence[clause] = {'value': sentence[clause], 'label': ''} allClauses[clause].append(sentence[clause]) for clause in allClauses: chart.add(clause, allClauses[clause]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def graphSentenceKinds(sentences): num = len(sentences) + 1 chart = pygal.StackedBar() chart.title = 'Kinds Of Sentences' chart.x_labels = map(str, range(1, num)) allKinds = {kind: [] for kind in SENTENCE_KINDS} for i in xrange(1, num): sentence = taxonomy.word(i) for kind in SENTENCE_KINDS: if sentence['dependent'] == SENTENCE_KINDS[kind]['dependent'] and \ sentence['independent'] == SENTENCE_KINDS[kind]['independent']: sentenceKind = {'value': 1, 'label': kind} else: sentenceKind = {'value': 0, 'label': ''} allKinds[kind].append(sentenceKind) for kind in allKinds: chart.add(kind, allKinds[kind]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def graphSentencePartsOfSpeech(sentences): num = len(sentences) + 1 chart = pygal.StackedBar(print_labels=True) chart.title = 'Parts of Speech In Sentences' chart.x_labels = map(str, range(1, num)) allParts = {part: [] for part in taxonomy.TYPES} for i in xrange(1, num): sentence = taxonomy.word(i) counts = {part: 0 for part in taxonomy.TYPES} for wordType in sentence['types']: counts[wordType] += 1 for part in counts: if counts[part] != 0: counts[part] = {'value': counts[part], 'label': str(counts[part])} else: counts[part] = {'value': counts[part], 'label': ''} allParts[part].append(counts[part]) for part in allParts: chart.add(part, allParts[part]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def graphSentenceClauses(sentences): num = len(sentences) + 1 chart = pygal.StackedBar(print_labels=True) chart.title = 'Sentence Clauses' chart.x_labels = map(str, range(1, num)) allClauses = {clause: [] for clause in CLAUSE_KINDS} for i in xrange(1, num): sentence = taxonomy.word(i) for clause in CLAUSE_KINDS: if sentence[clause] != 0: sentence[clause] = { 'value': sentence[clause], 'label': str(sentence[clause]) } else: sentence[clause] = {'value': sentence[clause], 'label': ''} allClauses[clause].append(sentence[clause]) for clause in allClauses: chart.add(clause, allClauses[clause]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def graphAllPuntuation(sentences): punctuationData = {kind: [{'value': 0, 'label': PUNCTUATION[kind]} \ for i in xrange(0, len(sentences))] \ for kind in PUNCTUATION} for i in xrange(0, len(sentences)): for kind in PUNCTUATION: punctuationData[kind][i]['value'] = sentences[i].count(kind) if kind == '\"' or kind == '\'': punctuationData[kind][i]['value'] /= 2 for i in xrange(0, len(sentences)): for kind in PUNCTUATION: punctuationData[kind][i]['label'] += ' - ' \ + str(punctuationData[kind][i]['value']) if punctuationData[kind][i]['value'] < 1: punctuationData[kind][i]['label'] = '' chart = pygal.StackedBar(print_labels=True) chart.title = 'Punctuation By Sentence' num = len(sentences) + 1 chart.x_labels = map(str, range(1, num)) for kind in punctuationData: chart.add(kind, punctuationData[kind]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def graphSentencePartsOfSpeech(sentences): num = len(sentences) + 1 chart = pygal.StackedBar(print_labels=True) chart.title = 'Parts of Speech In Sentences' chart.x_labels = map(str, range(1, num)) allParts = {part: [] for part in taxonomy.TYPES} for i in xrange(1, num): sentence = taxonomy.word(i) counts = {part: 0 for part in taxonomy.TYPES} for wordType in sentence['types']: counts[wordType] += 1 for part in counts: if counts[part] != 0: counts[part] = { 'value': counts[part], 'label': str(counts[part]) } else: counts[part] = {'value': counts[part], 'label': ''} allParts[part].append(counts[part]) for part in allParts: chart.add(part, allParts[part]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def test_outdir(self): real = os.path.join('out', 'fileName') test = 'fileName' res = taxonomy.outdir(test) self.assertEqual(res, real)