Python AnswerCodeSpec Examples

Programming Language: Python

Namespace/Package Name: specs

Class/Type: AnswerCodeSpec

Examples at hotexamples.com: 4

Python AnswerCodeSpec - 4 examples found. These are the top rated real world Python examples of specs.AnswerCodeSpec extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get_anstype_spec(1)

get_default_spec(1)

is_code_like(1)

Example #1

Show file

File: specparser.py Project: Ajmal-Ismail/toe

 def __init__(self, content, name, resource_dir):
     self.log = logging.getLogger()
     if content is None:
         raise Exception("SpecParser cannot be initialized, content is None")
     self.content = content
     self.name = name
     self.resource_list = None
     self.load_resources(resource_dir)
     self.answer_code_spec = AnswerCodeSpec()
     self.errors = None
     self.is_error = False
     self.img_content_re = re.compile(r"(<img_[A-Za-z0-9._]+>)")

Example #2

Show file

File: odtparser.py Project: Ajmal-Ismail/toe

 def __init__(self, target_file, path, serialize_raw=False):
     self.log = logging.getLogger()
     self.target_file = target_file
     self._path = path
     self.position = 0
     self.content = []
     self.styles = {}
     #self.__root = None
     self._style_element = None
     self._text_element = None
     self.errors = None
     self.is_error = False
     self.CONTENT_FILE = 'content.xml'
     self.answer_code_spec = AnswerCodeSpec()
     self.serialize_raw = serialize_raw
     deflate(self.target_file, self._path)

Example #3

Show file

File: odtparser.py Project: Ajmal-Ismail/toe

class ODTParser:
    """
    - Deflates target_file on the given path.
    - Reads content.xml files and loads a list of data objects. Each object contains text&style info in following format:
        {'text': str,
        'style': list of style-attributes/vlaues},
        'position': text position,
        'is_mathml': 'True' if text is mathml}
    - Slice the single data list into sublist, based on qstart/qend markers. i.e. a sublist contains data for a question.
    - Convert each question list into qustion object. Each answer code section is separated. For example"
        {'qstart': {[] of data objects,
         'anstype': {[] of data objects,
         'prompt': {[] of data objects,
         ...
        }
        For now Question object is a dict.
    """
    def __init__(self, target_file, path, serialize_raw=False):
        self.log = logging.getLogger()
        self.target_file = target_file
        self._path = path
        self.position = 0
        self.content = []
        self.styles = {}
        #self.__root = None
        self._style_element = None
        self._text_element = None
        self.errors = None
        self.is_error = False
        self.CONTENT_FILE = 'content.xml'
        self.answer_code_spec = AnswerCodeSpec()
        self.serialize_raw = serialize_raw
        deflate(self.target_file, self._path)

    def parse(self):
        self.parse_xml()
        if self.is_error:
            return self.content, self.errors

        # make a lisk of question(dictionary objects)
        raw_list = self.slice_dice()
        if self.is_error:
            self.log.error("Slicing data into [] of questions FAILED")
            return self.content, self.errors
        self.log.debug("Slicing data into [] of questions. Done. Questions found %s." % len(raw_list))

        self.content = self.convert_to_dict(raw_list)
        if self.is_error:
            self.log.error("Converting question [] to object FAILED")
        return self.content, self.errors

    def parse_xml(self):
        self.load_components()
        for child in self._text_element:
            if (self.is_p_tag(child) or self.is_list_tag(child)) and self._is_leaf(child):
                self.extract_leaf(child)
            else:
                self.dig(child)

        # if self.serialize_raw:
        #     if self.content is not None:
        #         f = os.sep.join([self._path, 'raw.text'])
        #         print '---------', f
        #         with open(f, 'w') as raw:
        #             for data in self.content:
        #                 v = data.get('text')
        #                 if v is None:
        #                     v = 'None'
        #                 raw.write(v.encode('utf8'))

        return self.content

    def slice_dice(self):
        """
        [] -> [][] - Make question slices based on question boundaries
        """
        questions = list()
        sub_list = None
        for i, data in enumerate(self.content):
            text = data.get('text')
            if text is None and sub_list is None:
                # ignoring blank lines outside question boundaries
                continue
            if self.is_qstart(text):
                if sub_list is not None:
                    msg = "Question End 'qend' not found"
                    # print ' - Sublist is not None: %s' % sub_list
                    self.log.error(msg)
                    self.add_error(msg)
                    break
                sub_list = list()
                sub_list.append(data)
                continue
            if text is not None and sub_list is None:
                msg = "Question Start 'qstart' not found"
                self.log.error(msg)
                self.add_error(msg)
                break
            if self.is_qend(text):
                sub_list.append(data)
                questions.append(sub_list)
                sub_list = None
            else:
                sub_list.append(data)
        return questions

    def convert_to_dict(self, qlist):
        questions = list()
        for que_list in qlist:
            question = {}
            code = None
            values = list()
            for line_obj in que_list:
                text = line_obj.get('text')
                #if self._is_code(text):
                if self.answer_code_spec.is_code_like(text):
                    if code is None:
                        code = text
                    else:
                        # codes are ':' stripped and lowered
                        code = code.strip().rstrip(':').lower()
                        question[code] = values
                        code = text
                        values = list()
                else:
                    values.append(line_obj)

            questions.append(question)
        return questions

    def load_components(self):
        tree = etree.parse(os.path.join(self._path, self.CONTENT_FILE))
        root = tree.getroot()
        for element in root:
            if 'automatic-styles' in element.tag:
                self._style_element = element
            elif 'body' in element.tag:
                self._text_element = element[0]

    def dig(self, elem):
        if self.is_list_tag(elem):
            self.process_list(elem)
        elif self.is_p_tag(elem) or self.is_span_tag(elem):
            if self._is_leaf(elem):
                self.extract_leaf(elem)
            else:
                for child in elem.xpath("./node()"):
                    #print '>>>>>', sub, 'TYPE: ', type(sub)
                    # TODO: confirm unicode transformation
                    if isinstance(child, etree._ElementStringResult):
                        self.extract_leaf(elem, text=str(child))
                    elif isinstance(child, etree._ElementUnicodeResult):
                        self.extract_leaf(elem, text=repr(child))
                    elif isinstance(child, etree._Element):
                        if self._is_leaf(child):
                            self.extract_leaf(child)
                        else:
                            self.dig(child)
        elif self.is_equation_tag(elem):
            obj_ref = elem[0].get("{http://www.w3.org/1999/xlink}href")
            if obj_ref:
                obj_path = self._path + '/' + obj_ref + '/' + 'content.xml'
                if os.path.exists(obj_path):
                    ref_file = open(obj_path)
                    mathml = ref_file.read()
                    ref_file.close()
                    self.extract_leaf(None, mathml=mathml)
                else:
                    msg = "Invalid Object referenced '%s'" % obj_ref
                    self.log.error(msg)
                    self.add_error(msg)

    def extract_leaf(self, element, text=None, mathml=None):
        if mathml is None and element.tag.endswith('soft-page-break'):
            return
        self.position += 1
        if mathml:
            param = {'text': mathml, 'style': None, 'position': self.position, 'is_mathml': True}
            self.content.append(param)
            #print 'Leaf +>', param.get('text')
            return
        if not text:
            if self.is_p_tag(element):
                if element.text is not None:
                    text = '\n' + element.text + '\n'
                # elif element.text is None:
                #     text = '\n'
            else:
                text = element.text
        param = {'text': text, 'style': self.get_style(element), 'position': self.position}
        self.content.append(param)
        #print 'Leaf +>', param.get('text')

    def process_list(self, element):
        # todo Test variations in multiple documents
        for child in element[0]:
            if self._is_leaf(child):
                self.extract_leaf(child)
            else:
                self.dig(child)

    def get_style(self, element):
        # return None
        style_name = element.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name")
        #print " ## target style: ",style_name
        if self.styles.get(style_name):
            return self.styles.get(style_name)

        for child in self._style_element:
            if child.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") == style_name:
                #print "--------",style_name
                #if sub.find([0].tag.endswith('text-properties'):
                target_property_tag_names = ['text-properties', 'paragraph-properties']
                style_info = {}
                for tag_name in target_property_tag_names:
                    prop_elem = self._get_child(child, tag_name)
                    if prop_elem is not None:
                        #BOLD "{urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0}font-weight"
                        #ITALIC "{urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0}font-style"
                        # Load all style info
                        for attrib_name in prop_elem.keys():
                            style_info[attrib_name] = prop_elem.get(attrib_name)

                if style_info:
                    style_info['style_name'] = style_name
                self.styles[style_name] = style_info
        if self.styles.get(style_name) is not None:
            return self.styles.get(style_name)
        elif style_name not in [None, 'Standard']:
            self.log.info('No style found with name "%s"' % style_name)

    def _get_child(self, elem, name):
        for sub in elem.iter():
            if sub.tag.endswith(name):
                return sub

    def _is_leaf(self, element):
        if len(element)==0:
            return True

    def is_p_tag(self, element):
        if element.tag.endswith('}p'):
            return True

    def is_span_tag(self, element):
        if element.tag.endswith('}span'):
            return True

    def is_list_tag(self, element):
        if element.tag.endswith('}list'):
            return True

    def is_equation_tag(self, element):
        if element.tag.endswith('}frame'):
            return True

    def add_error(self, msg):
        if self.errors is None:
            self.errors = list()
        self.is_error = True
        self.errors.append(msg)

    def is_qstart(self, text):
        if text and text.strip().lower() == 'qstart:':
            return True

    def is_qend(self, text):
        if text and text.strip().lower() == 'qend:':
            return True

Example #4

Show file

File: specparser.py Project: Ajmal-Ismail/toe

class SpecParser:
    """
    Acceps a list of Question objects parsed by ODTParser in content
    """
    def __init__(self, content, name, resource_dir):
        self.log = logging.getLogger()
        if content is None:
            raise Exception("SpecParser cannot be initialized, content is None")
        self.content = content
        self.name = name
        self.resource_list = None
        self.load_resources(resource_dir)
        self.answer_code_spec = AnswerCodeSpec()
        self.errors = None
        self.is_error = False
        self.img_content_re = re.compile(r"(<img_[A-Za-z0-9._]+>)")

    def process(self):
        self.content = self.merge_clean(self.content)
        if self.is_error:
            return self.content, self.errors

        if not self.validate():
            self.log.info("Validation Failed")
        return self.content, self.errors

    def validate(self):
        #todo Apply spec validation
        validated = True
        for index, question in enumerate(self.content):
            #print "******* validating question %s of %s " % ((index + 1),len(self.content))
            if not self.validate_question(question, index + 1):
                validated = False
        return validated

    def validate_question(self, question, queno):
        anstype = self.get_question_anstype(question)
        anstype_spec = self.answer_code_spec.get_anstype_spec(anstype)
        if not anstype_spec:
            print ">>>>>>>>>>>>>>>> Anstype spec not defined for '%s'<<<<<<<<<<<<<<<<<<" % anstype
            self.log.warning("Answer type spec not defined for '%s'" % anstype)
        is_valid = True
        for code in anstype_spec.keys():
            # Question must contain emtpy or non empty code
            if code not in question:
                msg = "Q#[%s]: Incomlete question. Code '%s' is required. Codes found are %s" % \
                      (queno, code, question.keys())
                self.log.error(msg)
                self.add_error(msg)
                is_valid = False
                continue
            code_data = question.get(code)
            code_spec = anstype_spec.get(code)
            if code_spec.get('plain_text', False) and code_spec.get('values', []):
                values = code_spec.get('values')
                if not self.validate_value(code_data, values):
                    msg = "Q#[%s]: Invalid value '%s'. Code '%s' can only have values %s" % (queno, code_data[0].get('text'), code, values)
                    self.log.error(msg)
                    self.add_error(msg)
                    is_valid = False

            if code_spec.get('plain_text') is None:
                if self.validate_for_images(code_data, queno) is False:
                    is_valid = False
                question[code] = self.clean(code_data)

        return is_valid

    def validate_value(self, data, values):
        if data[0].get('text').strip() in values:
            return True

    def merge_clean(self, questions):
        default_spec = self.answer_code_spec.get_default_spec()
        for queno, question in enumerate(questions):
            queno += 1
            if 'anstype' not in question.keys():
                msg = "Q#[%s]: Mandatory code 'anstype' not found" % queno
                self.log.error(msg)
                self.add_error(msg)
                return None
            for code in question.keys():
                # for each code in question
                #print '--------------', code
                qdata = question[code]
                spec = default_spec[code]
                if spec is None:
                    msg = "Q#[%s]: Invalid Code %s" % (queno, code)
                    self.log.error(msg)
                    self.add_error(msg)
                    return None
                if spec.get('plain_text', False):
                    # single object instead of a list
                    d = self._merge_as_plain(qdata)
                    question[code] = d

            # anstype value should be flattened by now
            anstype = question.get('anstype')[0].get('text')
            if anstype:
                anstype = anstype.strip()
                question.get('anstype')[0]['text'] = anstype
            if not self.is_valid_anstype(anstype):
                msg = "Q#[%s]: Invalid answer type value '%s'. Valid values are %s" % \
                      (queno, anstype, AnswerCodeSpec.ANSWER_TYPES)
                self.log.error(msg)
                self.add_error(msg)
                return None

        return questions

    def clean(self, code_data):
        squeezed = []
        for data in code_data:
            if data.get('text') and data.get('text').strip():
                if 'style' in data:
                    del data['style']
                squeezed.append(data)
        return squeezed
        # squeezed = []
        # merged = None
        # for data in code_data:
        #     if data.get('is_mathml', False):
        #         if merged:
        #             squeezed.append({'text': merged})
        #             merged = None
        #         squeezed.append(data)
        #         continue
        #     if merged is None:
        #         merged = ''
        #
        #     if data.get('text'):
        #         merged += data.get('text')
        #     else:
        #         merged += '\n'
        # if merged:
        #     squeezed.append({'text': merged})
        # return squeezed


    def validate_for_images(self, text, queno):
        # <img_3.MD.1_003> = 3.MD.1_003.png, <img_3.MD.1_002a> = 3.MD.1_002a.png
        is_valid = True
        for line in text:
            if line.get('text'):
                for img in self.img_content_re.findall(line.get('text')):
                    if self.ref_object_exist(img) is False:
                        is_valid = False
                        msg = "Q#[%s]: Image not available '%s'" % (queno, img)
                        self.log.error(msg)
                        self.add_error(msg)
        return is_valid

    def is_valid_anstype(self, anstype):
        return anstype in AnswerCodeSpec.ANSWER_TYPES


    def ref_object_exist(self, name):
        if self.resource_list is None:
            return False
        name = name.lstrip("<img_").rstrip(">")
        return name+".png" in self.resource_list

    def get_question_anstype(self, question):
        return question.get('anstype')[0].get('text')

    def load_resources(self, dir):
        if os.path.exists(dir) is False:
            self.log.warning("Image dir not found")
            return
        if self.resource_list is None:
            self.resource_list = list()
            for name in os.listdir(dir):
                self.resource_list.append(name)

    def _merge_as_plain(self, data):
        text = ''
        for obj in data:
            t = obj.get('text')
            if t is None:
                text += '\n'
            else:
                text += t
        return [{'text': text}]

    def add_error(self, msg):
        if self.errors is None:
            self.errors = list()
        self.is_error = True
        self.errors.append(msg)