Ejemplo n.º 1
0
    def __init__(self, file):
        self.log = logging.getLogger()
        self.input_file = file
        self.timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%I%M%S%f")

        # using os temp directory as a work directory
        # self._work_dir = os.path.join(tempfile.gettempdir(), 'tenmarks'+self.timestamp)
        self.work_dir = os.path.join(tempfile.gettempdir(), "tenmarks")
        self.image_dir = os.path.join(self.work_dir, 'Images')

        self.log.info('Working dir: "%s"' % self.work_dir)
        #deflate(self.input_file, self.work_dir)
        self.work_dir = '/home/makra/tenmarks/sample/invalid_samples1'
        self.parse_result = ParseResult()
Ejemplo n.º 2
0
class TenmarksODTExtractor:
    """
    Works on .zip files. Assumes that extracted zip will contains mulitple odt files and an images folder
    """

    def __init__(self, file):
        self.log = logging.getLogger()
        self.input_file = file
        self.timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%I%M%S%f")

        # using os temp directory as a work directory
        # self._work_dir = os.path.join(tempfile.gettempdir(), 'tenmarks'+self.timestamp)
        self.work_dir = os.path.join(tempfile.gettempdir(), "tenmarks")
        self.image_dir = os.path.join(self.work_dir, 'Images')

        self.log.info('Working dir: "%s"' % self.work_dir)
        #deflate(self.input_file, self.work_dir)
        self.work_dir = '/home/makra/tenmarks/sample/invalid_samples1'
        self.parse_result = ParseResult()

    def process(self):
        odt_list = []
        for name in os.listdir(self.work_dir):
            if not name.endswith(".odt"):
                continue
            odt_list.append(name)
            content, errors = self.extract_odt(name)
            if errors:
                self.parse_result.add_failure(name, errors, content)
            else:
                self.parse_result.add_success(name, content)
        if not odt_list:
            self.parse_result.add_failure(self.input_file, ["No odt file found"])

    def extract_odt(self, name):
        """
        Process given odt file
        """
        self.log.debug("Parsing file: '%s'" % name)
        # Extract odt in a subdir of work_dir
        abs_name = os.path.join(self.work_dir, name)
        odt_extract_path = os.path.join(self.work_dir, name.rstrip('.odt'))

        content, errors = ODTParser(abs_name, odt_extract_path).parse()
        if errors:
            return content, errors

        content, errors = SpecParser(content, name=name, resource_dir=self.image_dir).process()
        return content, errors

    def has_failed(self):
        if self.error_dict:
            return True