Esempio n. 1
0
  def calc_line_format_confidence_ii(self):
    # remove tokens we don't care about
    drop_types = ['whitespace', 'comment', 'EOF']
    tokens = Examiner.drop_tokens(self.tokens, drop_types)

    # join continued lines
    tokens = self.join_continued_lines(tokens)

    # split tokens by lines
    lines = self.split_tokens_into_lines(tokens)

    # check that each line either blank or starts with a keyword
    num_lines = len(lines)
    num_lines_correct = 0

    for line in lines:
      if len(line) > 0:
        if line[0].group == 'keyword':
          num_lines_correct += 1
        else:
          self.errors.append({
            'TYPE': 'LINE FORMAT',
            'FIRST': line[0].group,
            'SECOND': line[0].text
          })
      else:
        num_lines_correct += 1

    line_format_confidence = 1.0
    if num_lines > 0:
      line_format_confidence = num_lines_correct / num_lines

    self.confidences['line format'] = line_format_confidence

    return tokens
Esempio n. 2
0
    def calc_line_format_confidence(self):
        drop_types = ['whitespace', 'comment', 'line continuation']
        tokens = Examiner.drop_tokens(self.tokens, drop_types)

        line_bracket_count = 0
        num_bracket_count = 0
        prev2_token = Token('\n', 'newline', False)
        prev_token = Token('\n', 'newline', False)
        for token in tokens:
            if token.group == 'group' and token.text == '{':
                num_bracket_count += 1

                if prev_token.group == 'newline' and\
                  (prev2_token.group != 'group' or prev2_token.text != '{'):
                    line_bracket_count += 1
                    self.errors.append({
                        'TYPE': 'LINE FORMAT',
                        'TOKEN': token.text
                    })

            prev2_token = prev_token
            prev_token = token

        line_format_confidence = 1.0

        if num_bracket_count > 0:
            line_format_confidence = 1.0 - (line_bracket_count /
                                            num_bracket_count)

        self.confidences['line format'] = line_format_confidence
Esempio n. 3
0
    def check_expected_keywords(self):
        counts = {
            'IDENTIFICATION': 0,
            'ENVIRONMENT': 0,
            'DATA': 0,
            'PROCEDURE': 0
        }

        drop_types = ['newline', 'whitespace', 'comment', 'line continuation']

        tokens = Examiner.drop_tokens(self.tokens, drop_types)

        prev_text = ''
        for token in tokens:
            text = token.text

            if text == 'DIVISION' and prev_text in ['IDENTIFICATION', 'ID']:
                counts['IDENTIFICATION'] += 1
            if text == 'DIVISION' and prev_text == 'ENVIRONMENT':
                counts['ENVIRONMENT'] += 1
            if text == 'DIVISION' and prev_text == 'DATA':
                counts['DATA'] += 1
            if text == 'DIVISION' and prev_text == 'PROCEDURE':
                counts['PROCEDURE'] += 1

            prev_text = text

        expected_keyword_confidence = 1.00
        if counts['IDENTIFICATION'] != 1:
            expected_keyword_confidence -= 0.01
            self.errors.append({
                'TYPE': 'EXPECTED KEYWORD',
                'MISSING': 'IDENTIFICATION or ID DIVISION'
            })

        if counts['ENVIRONMENT'] != 1:
            expected_keyword_confidence != 0.01
            self.errors.append({
                'TYPE': 'EXPECTED KEYWORD',
                'MISSING': 'ENVIRONMENT DIVISION'
            })

        if counts['DATA'] != 1:
            expected_keyword_confidence -= 0.01
            self.errors.append({
                'TYPE': 'EXPECTED KEYWORD',
                'MISSING': 'DATA DIVISION'
            })

        if counts['PROCEDURE'] != 1:
            expected_keyword_confidence -= 0.01
            self.errors.append({
                'TYPE': 'EXPECTED KEYWORD',
                'MISSING': 'PROCEDURE DIVISION'
            })

        return expected_keyword_confidence
Esempio n. 4
0
    def calc_line_format_confidence(self):
        # certain keyword lines end in colon

        tokens = self.unwrap_code_lines(self.tokens)

        # drop tokens not used by interpreter
        drop_types = ['whitespace', 'comment']

        tokens = Examiner.drop_tokens(tokens, drop_types)

        # split into lines
        lines = self.split_tokens_to_lines(tokens)

        # check certain lines end in colon
        num_lines = 0
        num_lines_correct = 0
        colon_keywords = ['class', 'def', 'for', 'while', 'if', 'else', 'elif']

        for line in lines:
            if len(line) > 1:
                first_token = line[0]
                last_token = line[-1]

                if first_token.group == 'keyword' and first_token.text in colon_keywords:
                    num_lines += 1

                    if last_token.group == 'operator' and last_token.text == ':':
                        num_lines_correct += 1
                    else:
                        self.errors.append({
                            'TYPE':
                            'LINE FORMAT',
                            'FIRST':
                            first_token.text,
                            'SECOND':
                            "END '" + last_token.text + "' NOT ':'"
                        })

        line_format_2_confidence = 1.0

        if num_lines > 0:
            line_format_2_confidence = num_lines_correct / num_lines

        self.confidences['line format'] = line_format_2_confidence
Esempio n. 5
0
    def check_paired_tokens(self, tokens, open_tokens, close_tokens):
        level = 0
        min_level = 0
        num_open = 0
        num_close = 0

        prev_token_lower = ''
        prev_token = Token('\n', 'newline', False)

        prev_reqs = [';', '=']
        conditional_openers = ['if', 'case', 'while', 'until', 'unless']

        drop_types = ['whitespace', 'comment', 'line continuation']
        tokens = Examiner.drop_tokens(tokens, drop_types)

        openers_stack = []
        for token in tokens:
            token_lower = token.text.lower()

            if token.group == 'keyword':
                if token_lower in open_tokens or\
                  token_lower in conditional_openers and\
                    (prev_token.group == 'newline' or prev_token_lower in prev_reqs):
                    num_open += 1
                    level += 1
                    openers_stack.append(token_lower)

            if token_lower in close_tokens:
                num_close += 1
                level -= 1

                if level < min_level:
                    min_level = level

                if len(openers_stack) > 0:
                    openers_stack = openers_stack[:-1]

            prev_token_lower = token_lower
            prev_token = token

        ok = level == 0 and min_level == 0
        return ok, num_open, num_close
Esempio n. 6
0
    def calc_line_format_confidence(self):
        # check PICTURE keywords are followed by a picture element
        # and picture elements are preceded by a PICTURE keyword
        drop_types = [
            'newline', 'whitespace', 'comment', 'line description',
            'line continuation'
        ]

        tokens = Examiner.drop_tokens(self.tokens, drop_types)

        errors = 0
        prev_token = Token('\n', 'newline', False)
        for token in tokens:

            if prev_token.group == 'keyword' and prev_token.text in [
                    'PIC', 'PICTURE'
            ]:
                if token.group != 'picture':
                    errors += 1
                    self.errors.append({
                        'TYPE': 'PICTURE',
                        'FIRST': prev_token.text,
                        'SECOND': token.text
                    })

            if token.group == 'picture':
                if prev_token.group != 'keyword' or prev_token.text not in [
                        'PIC', 'PICTURE'
                ]:
                    errors += 1
                    self.errors.append({
                        'TYPE': 'PICTURE',
                        'FIRST': prev_token.text,
                        'SECOND': token.text
                    })

        picture_confidence = 1.0

        if len(self.tokens) > 0:
            picture_confidence = errors / len(self.tokens)

        self.confidences['line format'] = picture_confidence
Esempio n. 7
0
  def calc_line_format_confidence(self):
    # remove tokens we don't care about
    drop_types = ['whitespace', 'comment', 'EOF']
    tokens = Examiner.drop_tokens(self.tokens, drop_types)

    # join continued lines
    tokens = self.join_continued_lines(tokens)

    # split tokens by lines
    lines = self.split_tokens_into_lines(tokens)

    # check that line that begin with 'if' or 'elseif' end with 'then'
    num_lines = len(lines)
    num_lines_correct = 0

    for line in lines:
      if len(line) > 0:
        if line[0].text.lower() in ['if', 'endif']:
          if line[-1].text.lower() == 'then':
            num_lines_correct += 1
          else:
            self.errors.append({
              'TYPE': 'LINE FORMAT',
              'FIRST': line[0].text,
              'SECOND': line[-1].text
            })
        else:
          num_lines_correct += 1
      else:
        num_lines_correct += 1

    line_format_confidence = 1.0
    if num_lines > 0:
      line_format_confidence = num_lines_correct / num_lines

    self.confidences['line format'] = line_format_confidence

    return tokens