Ejemplo n.º 1
0
class SpecParser:
    """
    Acceps a list of Question objects parsed by ODTParser in content
    """
    def __init__(self, content, name, resource_dir):
        self.log = logging.getLogger()
        if content is None:
            raise Exception("SpecParser cannot be initialized, content is None")
        self.content = content
        self.name = name
        self.resource_list = None
        self.load_resources(resource_dir)
        self.answer_code_spec = AnswerCodeSpec()
        self.errors = None
        self.is_error = False
        self.img_content_re = re.compile(r"(<img_[A-Za-z0-9._]+>)")

    def process(self):
        self.content = self.merge_clean(self.content)
        if self.is_error:
            return self.content, self.errors

        if not self.validate():
            self.log.info("Validation Failed")
        return self.content, self.errors

    def validate(self):
        #todo Apply spec validation
        validated = True
        for index, question in enumerate(self.content):
            #print "******* validating question %s of %s " % ((index + 1),len(self.content))
            if not self.validate_question(question, index + 1):
                validated = False
        return validated

    def validate_question(self, question, queno):
        anstype = self.get_question_anstype(question)
        anstype_spec = self.answer_code_spec.get_anstype_spec(anstype)
        if not anstype_spec:
            print ">>>>>>>>>>>>>>>> Anstype spec not defined for '%s'<<<<<<<<<<<<<<<<<<" % anstype
            self.log.warning("Answer type spec not defined for '%s'" % anstype)
        is_valid = True
        for code in anstype_spec.keys():
            # Question must contain emtpy or non empty code
            if code not in question:
                msg = "Q#[%s]: Incomlete question. Code '%s' is required. Codes found are %s" % \
                      (queno, code, question.keys())
                self.log.error(msg)
                self.add_error(msg)
                is_valid = False
                continue
            code_data = question.get(code)
            code_spec = anstype_spec.get(code)
            if code_spec.get('plain_text', False) and code_spec.get('values', []):
                values = code_spec.get('values')
                if not self.validate_value(code_data, values):
                    msg = "Q#[%s]: Invalid value '%s'. Code '%s' can only have values %s" % (queno, code_data[0].get('text'), code, values)
                    self.log.error(msg)
                    self.add_error(msg)
                    is_valid = False

            if code_spec.get('plain_text') is None:
                if self.validate_for_images(code_data, queno) is False:
                    is_valid = False
                question[code] = self.clean(code_data)

        return is_valid

    def validate_value(self, data, values):
        if data[0].get('text').strip() in values:
            return True

    def merge_clean(self, questions):
        default_spec = self.answer_code_spec.get_default_spec()
        for queno, question in enumerate(questions):
            queno += 1
            if 'anstype' not in question.keys():
                msg = "Q#[%s]: Mandatory code 'anstype' not found" % queno
                self.log.error(msg)
                self.add_error(msg)
                return None
            for code in question.keys():
                # for each code in question
                #print '--------------', code
                qdata = question[code]
                spec = default_spec[code]
                if spec is None:
                    msg = "Q#[%s]: Invalid Code %s" % (queno, code)
                    self.log.error(msg)
                    self.add_error(msg)
                    return None
                if spec.get('plain_text', False):
                    # single object instead of a list
                    d = self._merge_as_plain(qdata)
                    question[code] = d

            # anstype value should be flattened by now
            anstype = question.get('anstype')[0].get('text')
            if anstype:
                anstype = anstype.strip()
                question.get('anstype')[0]['text'] = anstype
            if not self.is_valid_anstype(anstype):
                msg = "Q#[%s]: Invalid answer type value '%s'. Valid values are %s" % \
                      (queno, anstype, AnswerCodeSpec.ANSWER_TYPES)
                self.log.error(msg)
                self.add_error(msg)
                return None

        return questions

    def clean(self, code_data):
        squeezed = []
        for data in code_data:
            if data.get('text') and data.get('text').strip():
                if 'style' in data:
                    del data['style']
                squeezed.append(data)
        return squeezed
        # squeezed = []
        # merged = None
        # for data in code_data:
        #     if data.get('is_mathml', False):
        #         if merged:
        #             squeezed.append({'text': merged})
        #             merged = None
        #         squeezed.append(data)
        #         continue
        #     if merged is None:
        #         merged = ''
        #
        #     if data.get('text'):
        #         merged += data.get('text')
        #     else:
        #         merged += '\n'
        # if merged:
        #     squeezed.append({'text': merged})
        # return squeezed


    def validate_for_images(self, text, queno):
        # <img_3.MD.1_003> = 3.MD.1_003.png, <img_3.MD.1_002a> = 3.MD.1_002a.png
        is_valid = True
        for line in text:
            if line.get('text'):
                for img in self.img_content_re.findall(line.get('text')):
                    if self.ref_object_exist(img) is False:
                        is_valid = False
                        msg = "Q#[%s]: Image not available '%s'" % (queno, img)
                        self.log.error(msg)
                        self.add_error(msg)
        return is_valid

    def is_valid_anstype(self, anstype):
        return anstype in AnswerCodeSpec.ANSWER_TYPES


    def ref_object_exist(self, name):
        if self.resource_list is None:
            return False
        name = name.lstrip("<img_").rstrip(">")
        return name+".png" in self.resource_list

    def get_question_anstype(self, question):
        return question.get('anstype')[0].get('text')

    def load_resources(self, dir):
        if os.path.exists(dir) is False:
            self.log.warning("Image dir not found")
            return
        if self.resource_list is None:
            self.resource_list = list()
            for name in os.listdir(dir):
                self.resource_list.append(name)

    def _merge_as_plain(self, data):
        text = ''
        for obj in data:
            t = obj.get('text')
            if t is None:
                text += '\n'
            else:
                text += t
        return [{'text': text}]

    def add_error(self, msg):
        if self.errors is None:
            self.errors = list()
        self.is_error = True
        self.errors.append(msg)