def calc_line_format_confidence_ii(self): # remove tokens we don't care about drop_types = ['whitespace', 'comment', 'EOF'] tokens = Examiner.drop_tokens(self.tokens, drop_types) # join continued lines tokens = self.join_continued_lines(tokens) # split tokens by lines lines = self.split_tokens_into_lines(tokens) # check that each line either blank or starts with a keyword num_lines = len(lines) num_lines_correct = 0 for line in lines: if len(line) > 0: if line[0].group == 'keyword': num_lines_correct += 1 else: self.errors.append({ 'TYPE': 'LINE FORMAT', 'FIRST': line[0].group, 'SECOND': line[0].text }) else: num_lines_correct += 1 line_format_confidence = 1.0 if num_lines > 0: line_format_confidence = num_lines_correct / num_lines self.confidences['line format'] = line_format_confidence return tokens
def calc_line_format_confidence(self): drop_types = ['whitespace', 'comment', 'line continuation'] tokens = Examiner.drop_tokens(self.tokens, drop_types) line_bracket_count = 0 num_bracket_count = 0 prev2_token = Token('\n', 'newline', False) prev_token = Token('\n', 'newline', False) for token in tokens: if token.group == 'group' and token.text == '{': num_bracket_count += 1 if prev_token.group == 'newline' and\ (prev2_token.group != 'group' or prev2_token.text != '{'): line_bracket_count += 1 self.errors.append({ 'TYPE': 'LINE FORMAT', 'TOKEN': token.text }) prev2_token = prev_token prev_token = token line_format_confidence = 1.0 if num_bracket_count > 0: line_format_confidence = 1.0 - (line_bracket_count / num_bracket_count) self.confidences['line format'] = line_format_confidence
def check_expected_keywords(self): counts = { 'IDENTIFICATION': 0, 'ENVIRONMENT': 0, 'DATA': 0, 'PROCEDURE': 0 } drop_types = ['newline', 'whitespace', 'comment', 'line continuation'] tokens = Examiner.drop_tokens(self.tokens, drop_types) prev_text = '' for token in tokens: text = token.text if text == 'DIVISION' and prev_text in ['IDENTIFICATION', 'ID']: counts['IDENTIFICATION'] += 1 if text == 'DIVISION' and prev_text == 'ENVIRONMENT': counts['ENVIRONMENT'] += 1 if text == 'DIVISION' and prev_text == 'DATA': counts['DATA'] += 1 if text == 'DIVISION' and prev_text == 'PROCEDURE': counts['PROCEDURE'] += 1 prev_text = text expected_keyword_confidence = 1.00 if counts['IDENTIFICATION'] != 1: expected_keyword_confidence -= 0.01 self.errors.append({ 'TYPE': 'EXPECTED KEYWORD', 'MISSING': 'IDENTIFICATION or ID DIVISION' }) if counts['ENVIRONMENT'] != 1: expected_keyword_confidence != 0.01 self.errors.append({ 'TYPE': 'EXPECTED KEYWORD', 'MISSING': 'ENVIRONMENT DIVISION' }) if counts['DATA'] != 1: expected_keyword_confidence -= 0.01 self.errors.append({ 'TYPE': 'EXPECTED KEYWORD', 'MISSING': 'DATA DIVISION' }) if counts['PROCEDURE'] != 1: expected_keyword_confidence -= 0.01 self.errors.append({ 'TYPE': 'EXPECTED KEYWORD', 'MISSING': 'PROCEDURE DIVISION' }) return expected_keyword_confidence
def calc_line_format_confidence(self): # certain keyword lines end in colon tokens = self.unwrap_code_lines(self.tokens) # drop tokens not used by interpreter drop_types = ['whitespace', 'comment'] tokens = Examiner.drop_tokens(tokens, drop_types) # split into lines lines = self.split_tokens_to_lines(tokens) # check certain lines end in colon num_lines = 0 num_lines_correct = 0 colon_keywords = ['class', 'def', 'for', 'while', 'if', 'else', 'elif'] for line in lines: if len(line) > 1: first_token = line[0] last_token = line[-1] if first_token.group == 'keyword' and first_token.text in colon_keywords: num_lines += 1 if last_token.group == 'operator' and last_token.text == ':': num_lines_correct += 1 else: self.errors.append({ 'TYPE': 'LINE FORMAT', 'FIRST': first_token.text, 'SECOND': "END '" + last_token.text + "' NOT ':'" }) line_format_2_confidence = 1.0 if num_lines > 0: line_format_2_confidence = num_lines_correct / num_lines self.confidences['line format'] = line_format_2_confidence
def check_paired_tokens(self, tokens, open_tokens, close_tokens): level = 0 min_level = 0 num_open = 0 num_close = 0 prev_token_lower = '' prev_token = Token('\n', 'newline', False) prev_reqs = [';', '='] conditional_openers = ['if', 'case', 'while', 'until', 'unless'] drop_types = ['whitespace', 'comment', 'line continuation'] tokens = Examiner.drop_tokens(tokens, drop_types) openers_stack = [] for token in tokens: token_lower = token.text.lower() if token.group == 'keyword': if token_lower in open_tokens or\ token_lower in conditional_openers and\ (prev_token.group == 'newline' or prev_token_lower in prev_reqs): num_open += 1 level += 1 openers_stack.append(token_lower) if token_lower in close_tokens: num_close += 1 level -= 1 if level < min_level: min_level = level if len(openers_stack) > 0: openers_stack = openers_stack[:-1] prev_token_lower = token_lower prev_token = token ok = level == 0 and min_level == 0 return ok, num_open, num_close
def calc_line_format_confidence(self): # check PICTURE keywords are followed by a picture element # and picture elements are preceded by a PICTURE keyword drop_types = [ 'newline', 'whitespace', 'comment', 'line description', 'line continuation' ] tokens = Examiner.drop_tokens(self.tokens, drop_types) errors = 0 prev_token = Token('\n', 'newline', False) for token in tokens: if prev_token.group == 'keyword' and prev_token.text in [ 'PIC', 'PICTURE' ]: if token.group != 'picture': errors += 1 self.errors.append({ 'TYPE': 'PICTURE', 'FIRST': prev_token.text, 'SECOND': token.text }) if token.group == 'picture': if prev_token.group != 'keyword' or prev_token.text not in [ 'PIC', 'PICTURE' ]: errors += 1 self.errors.append({ 'TYPE': 'PICTURE', 'FIRST': prev_token.text, 'SECOND': token.text }) picture_confidence = 1.0 if len(self.tokens) > 0: picture_confidence = errors / len(self.tokens) self.confidences['line format'] = picture_confidence
def calc_line_format_confidence(self): # remove tokens we don't care about drop_types = ['whitespace', 'comment', 'EOF'] tokens = Examiner.drop_tokens(self.tokens, drop_types) # join continued lines tokens = self.join_continued_lines(tokens) # split tokens by lines lines = self.split_tokens_into_lines(tokens) # check that line that begin with 'if' or 'elseif' end with 'then' num_lines = len(lines) num_lines_correct = 0 for line in lines: if len(line) > 0: if line[0].text.lower() in ['if', 'endif']: if line[-1].text.lower() == 'then': num_lines_correct += 1 else: self.errors.append({ 'TYPE': 'LINE FORMAT', 'FIRST': line[0].text, 'SECOND': line[-1].text }) else: num_lines_correct += 1 else: num_lines_correct += 1 line_format_confidence = 1.0 if num_lines > 0: line_format_confidence = num_lines_correct / num_lines self.confidences['line format'] = line_format_confidence return tokens