class Essay: def __init__(self, filename, text, real_score = 0): self.text = text self.text_list = self.text.replace('\n'," ").split() self.tags = nl.pos_tag(self.text_list) self.raw_score = Score() self.normalized_score = Score() self.real_score = real_score self.filename = filename self.grader_score = 0.0 self.classification = 1 self.classification_penalty = 0 # Calculates the weighted score for an essay def calculate_score(self): s = self.normalized_score self.grader_score = s.spelling + s.sbj_vrb + s.vrb_tense + 2*s.sent_form + 2*s.coherence + 3*s.topic + 2*s.length return None # Selects the class of the essay based on the learned cutoffs def classify(self): if self.grader_score > NormalizedCutoffs.total[0]: self.classification += 1 if self.grader_score > NormalizedCutoffs.total[1]: self.classification += 1 return None # Replaces VBZ tags with VBZis or VBZhas def disambiguate_vbz(self): tags = map(lambda x : (x[0], 'VBZis') if x[1] == 'VBZ' and x[0] == 'is' else x, self.tags) self.tags_vbz = map(lambda x : (x[0], 'VBZhas') if x[1] == 'VBZ' and x[0] == 'has' else x, tags) return None # Returns a string formatted for the competition def output(self): t = self.classification if t == 1: grade = "low" elif t == 2: grade = "medium" else: grade = "high" string = self.filename + "\t" + str(self.normalized_score.spelling) + "\t" + str(self.normalized_score.sbj_vrb) + "\t" + str(self.normalized_score.vrb_tense) + "\t" + str(self.normalized_score.sent_form) + "\t" + str(self.normalized_score.anaphora) + "\t" + str(self.normalized_score.topic) + "\t" + str(self.normalized_score.length) + "\t" + grade + "\n" return string # Builds a parse tree using Pattern.en def parse(self): self.parsetree = parsetree(self.text, relations=True) return None # Sends the essay down the pipeline to calculate it's raw scores for each category def pipeline(self): print "Processing " + self.filename self.raw_score.spelling = spelling.mistakes(self) self.raw_score.sbj_vrb = sva.mistakes(self) self.raw_score.length = sentence.length(self) self.raw_score.vrb_tense = verb.mistakes(self) self.raw_score.sent_form = sentenceFormation.mistakes(self) self.raw_score.anaphora = centering.compute_ambiguity(self) self.raw_score.topic = topicCoherence.score(self) # Now predict self.predict() return None # Takes an essay and scores it according to the trained cutoff points def predict(self): for cat in categories.ALL: raw = getattr(self.raw_score, cat) score = 1 # Score the essay according to its cutoff level if cat == "length" or cat == "topic": for i, cutoff in enumerate(getattr(NormalizedCutoffs, cat)): if raw > cutoff: score += 1 else: for i, cutoff in enumerate(getattr(NormalizedCutoffs, cat)): if raw < cutoff: score += 1 # Store the normalized score setattr(self.normalized_score, cat, score) # Calculate the essay's total score and classify self.calculate_score() self.classify() # Calculate mis-classification penalty if self.real_score != 0 and self.real_score != self.classification: self.classification_penalty = abs(self.real_score - self.classification) return None # Prints a summary of the essay def summary(self): print self.filename print "Real Score: " + str(self.real_score) print "Classification: " + str(self.classification) print "Grader Score: " + str(self.grader_score) self.normalized_score._print() return None
class Essay: def __init__(self, filename, text, real_score=0): self.text = text self.text_list = self.text.replace('\n', " ").split() self.tags = nl.pos_tag(self.text_list) self.raw_score = Score() self.normalized_score = Score() self.real_score = real_score self.filename = filename self.grader_score = 0.0 self.classification = 1 self.classification_penalty = 0 # Calculates the weighted score for an essay def calculate_score(self): s = self.normalized_score self.grader_score = s.spelling + s.sbj_vrb + s.vrb_tense + 2 * s.sent_form + 2 * s.coherence + 3 * s.topic + 2 * s.length return None # Selects the class of the essay based on the learned cutoffs def classify(self): if self.grader_score > NormalizedCutoffs.total[0]: self.classification += 1 if self.grader_score > NormalizedCutoffs.total[1]: self.classification += 1 return None # Replaces VBZ tags with VBZis or VBZhas def disambiguate_vbz(self): tags = map( lambda x: (x[0], 'VBZis') if x[1] == 'VBZ' and x[0] == 'is' else x, self.tags) self.tags_vbz = map( lambda x: (x[0], 'VBZhas') if x[1] == 'VBZ' and x[0] == 'has' else x, tags) return None # Returns a string formatted for the competition def output(self): t = self.classification if t == 1: grade = "low" elif t == 2: grade = "medium" else: grade = "high" string = self.filename + "\t" + str( self.normalized_score.spelling ) + "\t" + str(self.normalized_score.sbj_vrb) + "\t" + str( self.normalized_score.vrb_tense) + "\t" + str( self.normalized_score.sent_form) + "\t" + str( self.normalized_score.anaphora) + "\t" + str( self.normalized_score.topic) + "\t" + str( self.normalized_score.length) + "\t" + grade + "\n" return string # Builds a parse tree using Pattern.en def parse(self): self.parsetree = parsetree(self.text, relations=True) return None # Sends the essay down the pipeline to calculate it's raw scores for each category def pipeline(self): print "Processing " + self.filename self.raw_score.spelling = spelling.mistakes(self) self.raw_score.sbj_vrb = sva.mistakes(self) self.raw_score.length = sentence.length(self) self.raw_score.vrb_tense = verb.mistakes(self) self.raw_score.sent_form = sentenceFormation.mistakes(self) self.raw_score.anaphora = centering.compute_ambiguity(self) self.raw_score.topic = topicCoherence.score(self) # Now predict self.predict() return None # Takes an essay and scores it according to the trained cutoff points def predict(self): for cat in categories.ALL: raw = getattr(self.raw_score, cat) score = 1 # Score the essay according to its cutoff level if cat == "length" or cat == "topic": for i, cutoff in enumerate(getattr(NormalizedCutoffs, cat)): if raw > cutoff: score += 1 else: for i, cutoff in enumerate(getattr(NormalizedCutoffs, cat)): if raw < cutoff: score += 1 # Store the normalized score setattr(self.normalized_score, cat, score) # Calculate the essay's total score and classify self.calculate_score() self.classify() # Calculate mis-classification penalty if self.real_score != 0 and self.real_score != self.classification: self.classification_penalty = abs(self.real_score - self.classification) return None # Prints a summary of the essay def summary(self): print self.filename print "Real Score: " + str(self.real_score) print "Classification: " + str(self.classification) print "Grader Score: " + str(self.grader_score) self.normalized_score._print() return None