def main(): allLines = taxonomy.readFile(args.args.file) num = 1 sentences = [sentence.strip() for sentence in allLines.split('.')] graphSentenceLength(sentences) graphSentenceWordUse(sentences) graphSentenceWordUse(sentences, top=5) for sentence in sentences: if len(sentence) > 0: alreadyDone = taxonomy.word(num) if alreadyDone == None: handleSentence(sentence, num) num += 1 graphSentencePartsOfSpeech(sentences)
def main(): allLines = taxonomy.readFile(args.args.file) num = 1 sentences = [sentence.strip() for sentence in allLines.split('.')] for sentence in sentences: if len(sentence) > 0: alreadyDone = taxonomy.word(num) if alreadyDone == None or \ not 'independent' in alreadyDone or \ not 'dependent' in alreadyDone: handleSentence(sentence, num) num += 1 graphSentenceClauses(sentences) graphSentenceKinds(sentences)
def handleSentence(sentence, num): sentence = sentence.strip() words = sentence.split() wordTypes = [] for i in xrange(0, len(words)): clearScreen() before = ' '.join(words[:i]) + ' ' if i == 0: before = '' display = before + colorama.Fore.RED + words[i] \ + colorama.Style.RESET_ALL + ' ' + ' '.join(words[i + 1:]) word = taxonomy.sanitize(words[i]) info = taxonomy.word(word) print info['definition'] print '' del info['definition'] del info['_id'] print info print '' for j in xrange(1, len(taxonomy.TYPES)): print j, taxonomy.TYPES[j - 1] print '' print display print '' res = inp('What kind of word is this? ') if res == '': res = info['type'][-1] else: res = taxonomy.TYPES[int(res) - 1] wordTypes.append(res) if 'type' in info and isinstance(info['type'], list) and \ not res in info['type']: info['type'].append(res) else: info['type'] = [res] mongo.coll.update_one({'_id': word}, {'$set': {'type': info['type']}}, \ upsert=False) try: mongo.coll.insert_one({ '_id': num, 'sentence': words, 'types': wordTypes }) except Exception as e: print e time.sleep(1)
def graphSentenceClauses(sentences): num = len(sentences) + 1 chart = pygal.StackedBar(print_labels=True) chart.title = 'Sentence Clauses' chart.x_labels = map(str, range(1, num)) allClauses = {clause: [] for clause in CLAUSE_KINDS} for i in xrange(1, num): sentence = taxonomy.word(i) for clause in CLAUSE_KINDS: if sentence[clause] != 0: sentence[clause] = {'value': sentence[clause], 'label': str(sentence[clause])} else: sentence[clause] = {'value': sentence[clause], 'label': ''} allClauses[clause].append(sentence[clause]) for clause in allClauses: chart.add(clause, allClauses[clause]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def graphSentenceKinds(sentences): num = len(sentences) + 1 chart = pygal.StackedBar() chart.title = 'Kinds Of Sentences' chart.x_labels = map(str, range(1, num)) allKinds = {kind: [] for kind in SENTENCE_KINDS} for i in xrange(1, num): sentence = taxonomy.word(i) for kind in SENTENCE_KINDS: if sentence['dependent'] == SENTENCE_KINDS[kind]['dependent'] and \ sentence['independent'] == SENTENCE_KINDS[kind]['independent']: sentenceKind = {'value': 1, 'label': kind} else: sentenceKind = {'value': 0, 'label': ''} allKinds[kind].append(sentenceKind) for kind in allKinds: chart.add(kind, allKinds[kind]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def handleSentence(sentence, num): sentence = sentence.strip() words = sentence.split() wordTypes = [] for i in xrange(0, len(words)): clearScreen() before = ' '.join(words[:i]) + ' ' if i == 0: before = '' display = before + colorama.Fore.RED + words[i] \ + colorama.Style.RESET_ALL + ' ' + ' '.join(words[i + 1:]) word = taxonomy.sanitize(words[i]) info = taxonomy.word(word) print info['definition'] print '' del info['definition'] del info['_id'] print info print '' for j in xrange(1, len(taxonomy.TYPES)): print j, taxonomy.TYPES[j - 1] print '' print display print '' res = inp('What kind of word is this? ') if res == '': res = info['type'][-1] else: res = taxonomy.TYPES[int(res) - 1] wordTypes.append(res) if 'type' in info and isinstance(info['type'], list) and \ not res in info['type']: info['type'].append(res) else: info['type'] = [res] mongo.coll.update_one({'_id': word}, {'$set': {'type': info['type']}}, \ upsert=False) try: mongo.coll.insert_one({'_id': num, 'sentence': words, 'types': wordTypes}) except Exception as e: print e time.sleep(1)
def graphSentencePartsOfSpeech(sentences): num = len(sentences) + 1 chart = pygal.StackedBar(print_labels=True) chart.title = 'Parts of Speech In Sentences' chart.x_labels = map(str, range(1, num)) allParts = {part: [] for part in taxonomy.TYPES} for i in xrange(1, num): sentence = taxonomy.word(i) counts = {part: 0 for part in taxonomy.TYPES} for wordType in sentence['types']: counts[wordType] += 1 for part in counts: if counts[part] != 0: counts[part] = {'value': counts[part], 'label': str(counts[part])} else: counts[part] = {'value': counts[part], 'label': ''} allParts[part].append(counts[part]) for part in allParts: chart.add(part, allParts[part]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def graphSentenceClauses(sentences): num = len(sentences) + 1 chart = pygal.StackedBar(print_labels=True) chart.title = 'Sentence Clauses' chart.x_labels = map(str, range(1, num)) allClauses = {clause: [] for clause in CLAUSE_KINDS} for i in xrange(1, num): sentence = taxonomy.word(i) for clause in CLAUSE_KINDS: if sentence[clause] != 0: sentence[clause] = { 'value': sentence[clause], 'label': str(sentence[clause]) } else: sentence[clause] = {'value': sentence[clause], 'label': ''} allClauses[clause].append(sentence[clause]) for clause in allClauses: chart.add(clause, allClauses[clause]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))
def main(): word = args.args.word try: word = int(word) except Exception as e: pass info = taxonomy.word(word) if 'definition' in info: definition = info['definition'] del info['definition'] print definition print '' print info print '' print info['_id'] if args.args.set != '': to = args.args.to try: to = int(to) except Exception as e: pass mongo.coll.update_one({'_id': info['_id']}, \ {'$set': {args.args.set: to}}, \ upsert=False)
def graphSentencePartsOfSpeech(sentences): num = len(sentences) + 1 chart = pygal.StackedBar(print_labels=True) chart.title = 'Parts of Speech In Sentences' chart.x_labels = map(str, range(1, num)) allParts = {part: [] for part in taxonomy.TYPES} for i in xrange(1, num): sentence = taxonomy.word(i) counts = {part: 0 for part in taxonomy.TYPES} for wordType in sentence['types']: counts[wordType] += 1 for part in counts: if counts[part] != 0: counts[part] = { 'value': counts[part], 'label': str(counts[part]) } else: counts[part] = {'value': counts[part], 'label': ''} allParts[part].append(counts[part]) for part in allParts: chart.add(part, allParts[part]) fileName = chart.title.lower().replace(' ', '_') chart.render_to_png(taxonomy.outdir(fileName + '.png'))