def sentence_length (pylinguistObj): temp = pylinguistObj #pylinguistObj.tokens = tools.getTokens(value) array = [] for key, value in enumerate(pylinguistObj.tokenized_sentences): temp.text = value tokens=tools.getTokens(temp) array.append(len(tokens)) #returns an int array containing the size of all sentences return array
def sentence_sized_30 (pylinguistObj): temp = pylinguistObj #pylinguistObj.tokens = tools.getTokens(value) count= 0 for key, value in enumerate(pylinguistObj.tokenized_sentences): temp.text = value #print value tokens=tools.getTokens(temp) if len(tokens) > 30: count = count+1 total = (key + 1) percent = (count*100)/total return percent
def word_count (pylinguistObj): if (pylinguistObj.tokens == []): pylinguistObj.tokens = tools.getTokens(pylinguistObj.text) count = len(pylinguistObj.tokens) for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #punctuations are not words if word_clas == "." : count -=1 #print word pylinguistObj.word_count = count return count
def text(self, text): self.types = [] self.tokens = [] self.postag = [] self.text = text #.decode('utf-8','ignore') self.tokens = tools.getTokens(self) #optimization self.postag = tools.getPosTag(self) #Descriptive counts self.tokenized_sentences = descriptive.tokenized_sentences(self) self.word_count = descriptive.word_count(self) self.sentence_count = descriptive.sentence_count(self) self.avg_word_per_sentence = descriptive.avg_word_per_sentence(self) #self.word_length = descriptive.word_length(self) #Doesn't work, for some reason #self.syllable_count = descriptive.syllable_count(self) #self.avg_syllables_per_word = descriptive.avg_syllables_per_word(self) return self
def text(self, text): self.types=[] self.tokens=[] self.postag=[] self.text = text #.decode('utf-8','ignore') self.tokens = tools.getTokens(self) #optimization self.postag = tools.getPosTag(self) #Descriptive counts self.tokenized_sentences = descriptive.tokenized_sentences(self) self.word_count = descriptive.word_count(self) self.sentence_count = descriptive.sentence_count(self) self.avg_word_per_sentence = descriptive.avg_word_per_sentence(self) #self.word_length = descriptive.word_length(self) #Doesn't work, for some reason #self.syllable_count = descriptive.syllable_count(self) #self.avg_syllables_per_word = descriptive.avg_syllables_per_word(self) return self
def orthographic_neighborhood (pylinguistObj): if (pylinguistObj.tokens == []): pylinguistObj.tokens = tools.getTokens(pylinguistObj.text) array = [] for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #punctuations are not words if word_clas == "." : array else: array.append(word) count = 0 for index in range(len(array)): for x in range(1,20): if((index + x) < len(array)): if(array[index] == array[index+x]): count = count + 1 print count return count