def run(self, data): results = [ ] try: for corpus in data: temp_bubble = TextBubble(corpus.contents) # Gather Unigram Frequencies temp_unigrams = temp_bubble.unigrams() unigrams = dict() for item in temp_unigrams: unigrams[item[0]] = unigrams.get(item[0], 0) + 1 # Gather Bigram Frequencies temp_bigrams = temp_bubble.bigrams() bigrams = dict() for item in temp_bigrams: parsed_item = ' '.join(item) bigrams[parsed_item] = bigrams.get(parsed_item, 0) + 1 # Gather Trigram Frequencies temp_trigrams = temp_bubble.trigrams() trigrams = dict() for item in temp_trigrams: parsed_item = ' '.join(item) trigrams[parsed_item] = trigrams.get(parsed_item, 0) + 1 results.append({'corpus_id': corpus.id, 'unigrams': unigrams, 'bigrams': bigrams, 'trigrams': trigrams}) results = json.dumps(results) print(results) return results except TypeError: raise TransactionException('Corpus contents does not exist.')
def run(self,data): results = [ ] pos_parsed = {} try: for corpus in data: temp_bubble = TextBubble(corpus.contents) pos_tags = temp_bubble.pos() pos_counts = temp_bubble.pos_counts() for tuple in pos_tags: k = tuple[0] v = tuple[1] if v in pos_parsed.keys(): if k not in pos_parsed[v]: pos_parsed[v].append(k) else: pos_parsed[v] = [ ] pos_parsed[v].append(k) results.append({'corpus_id': corpus.id, 'pos_tags': pos_parsed, 'pos_counts': pos_counts}) results = json.dumps(results) print(results) return results except TypeError as e: print(e) raise TransactionException('Failed to run SplatPOSFrequencies.')
def run(self, data): results = [ ] syllables_parsed = { } try: for corpus in data: temp_bubble = TextBubble(corpus.contents) temp_tokens = temp_bubble.tokens() temp_tokens = ' '.join(temp_tokens).strip("\n").split(' ') for tok in temp_tokens: temp_tok = tok.strip("\n") temp_syll_count = cUtil.count_syllables([temp_tok]) if temp_syll_count == 0: temp_syll_count = 1 if str(temp_syll_count) in syllables_parsed.keys(): if tok not in syllables_parsed[str(temp_syll_count)]: syllables_parsed[str(temp_syll_count)].append(temp_tok) else: syllables_parsed[str(temp_syll_count)] = [ ] syllables_parsed[str(temp_syll_count)].append(temp_tok) print("Creating results...") results.append({'corpus_id': corpus.id, 'syllables': syllables_parsed}) results = json.dumps(results) print(results) return results except TypeError as e: print(e) raise TransactionException('Failed to run SplatSyllables.')
def run(self, data): results = [ ] try: for corpus in data: temp_bubble = TextBubble(corpus.contents) raw_disfluencies = Util.count_disfluencies(temp_bubble.sents()) print(raw_disfluencies) sentences = { } average_disfluencies = 0 um_count, uh_count, ah_count, er_count, hm_count, sl_count, rep_count, brk_count = (0,) * 8 # Sort the data so it looks better in JSON for i in raw_disfluencies[0]: temp_dis = {"UM": raw_disfluencies[0][i][0], "UH": raw_disfluencies[0][i][1], "AH": raw_disfluencies[0][i][2], "ER": raw_disfluencies[0][i][3], "HM": raw_disfluencies[0][i][4], "SILENT PAUSE": raw_disfluencies[0][i][5], "REPETITION": raw_disfluencies[0][i][6], "BREAK": raw_disfluencies[0][i][7]} sentences[i] = temp_dis for (k, v) in temp_dis.items(): # Gather total disfluencies for each disfluency type average_disfluencies += v if k == "UM": um_count += v elif k == "UH": uh_count += v elif k == "AH": ah_count += v elif k == "ER": er_count += v elif k == "HM": hm_count += v elif k == "SILENT PAUSE": sl_count += v elif k == "REPETITION": rep_count += v elif k == "BREAK": brk_count += v temp_total = average_disfluencies # Calculate the average disfluencies per sentence in the whole text average_disfluencies = float(average_disfluencies / len(raw_disfluencies[0])) total_disfluencies = {"UM": um_count, "UH": uh_count, "AH": ah_count, "ER": er_count, "HM": hm_count, "SILENT PAUSE": sl_count, "REPETITION": rep_count, "BREAK": brk_count, "TOTAL": temp_total} results.append({'corpus_id': corpus.id, 'sentences': sentences, 'average_disfluencies_per_sentence': average_disfluencies, 'total_disfluencies': total_disfluencies}) results = json.dumps(results) print(results) return results except TypeError: raise TransactionException('Corpus contents does not exist.')
def run(self, data): results = [ ] try: for corpus in data: temp_bubble = TextBubble(corpus.contents) cdensity = temp_bubble.content_density() idensity = temp_bubble.idea_density() flesch_score = temp_bubble.flesch_readability() kincaid_score = temp_bubble.kincaid_grade_level() results.append({'corpus_id': corpus.id, 'content_density': cdensity, 'idea_density': idensity, 'flesch_score': flesch_score, 'kincaid_score': kincaid_score}) results = json.dumps(results) print(results) return results except TypeError as e: print(e) raise TransactionException('Corpus contents does not exist.')