Esempio n. 1
0
File: latex.py Progetto: sdaps/sdaps
def create_stamp_pdf(survey, output_filename, questionnaire_ids):

    if questionnaire_ids is None:
        log.warn(_("There should be no need to stamp a SDAPS Project that uses LaTeX and does not have different questionnaire IDs printed on each sheet.\nI am going to do so anyways."))

    # Temporary directory for TeX files.
    tmpdir = tempfile.mkdtemp(prefix='sdaps-stamp-')

    try:
        latex.write_override(survey, os.path.join(tmpdir, 'sdaps.opt'), questionnaire_ids=questionnaire_ids)

        print(_("Running %s now multiple times to generate the stamped questionnaire.") % survey.defs.engine)
        latex.compile(survey.defs.engine, 'questionnaire.tex', tmpdir, inputs=[os.path.abspath(survey.path())])

        if not os.path.exists(os.path.join(tmpdir, 'questionnaire.pdf')):
            log.error(_("Error running \"%s\" to compile the LaTeX file.") % defs.latex_engine)
            raise AssertionError('PDF file not generated')

        shutil.move(os.path.join(tmpdir, 'questionnaire.pdf'), output_filename)

    except:
        log.error(_("An error occured during creation of the report. Temporary files left in '%s'." % tmpdir))

        raise

    shutil.rmtree(tmpdir)
Esempio n. 2
0
 def validate(self):
     Question.validate(self)
     if not len(self.obj.answers) == 2:
         log.warn(_(u'%(class)s %(l0)i.%(l1)i got not exactly two answers.') % {
             'class': self.obj.__class__.__name__,
             'l0': self.obj.id[0], 'l1': self.obj.id[1]
         })
Esempio n. 3
0
 def validate(self):
     Question.validate(self)
     if not len(self.obj.boxes) == 1:
         log.warn(_(u'%(class)s %(l0)i.%(l1)i got not exactly one box.') % {
             'class': self.obj.__class__.__name__,
             'l0': self.obj.id[0], 'l1': self.obj.id[1]
         })
Esempio n. 4
0
 def validate(self):
     Question.validate(self)
     if self.cache:
         raise AssertionError(_("Error in question \"%s\"") % self.obj.question)
     del self.cache
     if not self.obj.boxes:
         log.warn(_(u'%(class)s %(l0)i.%(l1)i got no boxes.') % {
             'class': self.obj.__class__.__name__,
             'l0': self.obj.id[0], 'l1': self.obj.id[1]
         })
Esempio n. 5
0
File: latex.py Progetto: sdaps/sdaps
def unicode_to_latex(string):
    string = str(string)
    for char, replacement in unicode_to_latex_mapping.items():
        string = string.replace(char, replacement)

    # The returned string may still contain unicode characters if
    # the user is using xelatex. But in that case, the remapping is not
    # needed anyway.
    # However, it could also mean that the mapping needs to be updated.
    try:
        string.encode('ascii')
    except UnicodeEncodeError:
        log.warn(_("Generated string for LaTeX contains unicode characters. This may not work correctly and could mean the LaTeX character map needs to be updated."))
    return string
Esempio n. 6
0
def convert_images(images, outfile, paper_width, paper_height, transform=False):

    portrait = paper_height >= paper_width

    for i, (img, filename, page) in enumerate(opencv.iter_images_and_pages(images)):
        img = opencv.ensure_orientation(img, portrait)
        img = opencv.sharpen(img)

        if transform:
            try:
                img = opencv.transform_using_corners(img, paper_width, paper_height)
            except AssertionError:
                log.warn(_("Could not apply 3D-transformation to image '%s', page %i!") % (filename, page))

        mono = opencv.convert_to_monochrome(img)
        image.write_a1_to_tiff(outfile, opencv.to_a1_surf(mono))
Esempio n. 7
0
    def upgrade(self):
        """Ensure that all data structures conform to this version of SDAPS."""

        msg = _('Running upgrade routines for file format version %i')
        if self.version < 2:
            log.warn(msg % (1))
            # Changes between version 1 and 2:
            #  * Simplex surveys get a dummy page added for every image. This
            #    way they can be handled in the same way as "duplex" mode
            #    (and duplex scan can be supported).
            #  * The data for "Textbox" has a string. This will be used in the
            #    report if it contains data.

            # Insert dummy images.
            if not self.defs.duplex:
                from sdaps.model.sheet import Image

                for sheet in self.sheets:
                    images = sheet.images

                    # And readd with 
                    sheet.images = list()
                    for img in images:
                        sheet.add_image(img)
                        img.ignored = False

                        dummy = Image()
                        dummy.filename = "DUMMY"
                        dummy.tiff_page = -1
                        dummy.ignored = True

                        sheet.add_image(dummy)

            # Add the "text" attribute to Textbox.
            from sdaps.model.data import Textbox
            for sheet in self.sheets:
                for data in sheet.data.itervalues():
                    if isinstance(data, Textbox):
                        data.text = unicode()

        if self.version < 3:
            log.warn(msg % (2))
            for sheet in self.sheets:
                sheet.recognized = False
                sheet.verified = False

        self.version = 3
Esempio n. 8
0
def create_stamp_pdf(survey, output_filename, questionnaire_ids):

    if questionnaire_ids is None:
        log.warn(
            _(
                "There should be no need to stamp a SDAPS Project that uses LaTeX and does not have different questionnaire IDs printed on each sheet.\nI am going to do so anyways."
            )
        )

    # Temporary directory for TeX files.
    tmpdir = tempfile.mkdtemp()

    try:
        # Similar to setuptex/setup.py, but we also set questionnaire IDs
        latex_override = open(os.path.join(tmpdir, "sdaps.opt"), "w")
        latex_override.write('% This file exists to force the latex document into "final" mode.\n')
        latex_override.write("% It is parsed after the setup phase of the SDAPS class.\n\n")
        latex_override.write("\setcounter{surveyidlshw}{%i}\n" % (survey.survey_id % (2 ** 16)))
        latex_override.write("\setcounter{surveyidmshw}{%i}\n" % (survey.survey_id / (2 ** 16)))
        latex_override.write("\def\surveyid{%i}\n" % (survey.survey_id))
        latex_override.write(
            "\def\globalid{%s}\n" % (tex_quote_braces(survey.global_id)) if survey.global_id is not None else ""
        )
        latex_override.write("\\@STAMPtrue\n")
        latex_override.write("\\@PAGEMARKtrue\n")
        latex_override.write("\\@sdaps@draftfalse\n")
        if questionnaire_ids is not None:
            quoted_ids = [tex_quote_braces(str(id)) for id in questionnaire_ids]
            latex_override.write("\def\questionnaireids{{%s}}\n" % "},{".join(quoted_ids))
        latex_override.close()

        print _("Running %s now twice to generate the stamped questionnaire.") % defs.latex_engine
        latex.compile("questionnaire.tex", tmpdir, inputs=[os.path.abspath(survey.path())])

        if not os.path.exists(os.path.join(tmpdir, "questionnaire.pdf")):
            log.error(_('Error running "%s" to compile the LaTeX file.') % defs.latex_engine)
            raise AssertionError("PDF file not generated")

        shutil.move(os.path.join(tmpdir, "questionnaire.pdf"), output_filename)

    except:
        log.error(_("An error occured during creation of the report. Temporary files left in '%s'." % tmpdir))

        raise

    shutil.rmtree(tmpdir)
Esempio n. 9
0
def create_stamp_pdf(survey, output_filename, questionnaire_ids):

    if questionnaire_ids is None:
        log.warn(
            _("There should be no need to stamp a SDAPS Project that uses LaTeX and does not have different questionnaire IDs printed on each sheet.\nI am going to do so anyways."
              ))

    # Temporary directory for TeX files.
    tmpdir = tempfile.mkdtemp(prefix='sdaps-stamp-')

    try:
        latex.write_override(survey,
                             os.path.join(tmpdir, 'sdaps.opt'),
                             questionnaire_ids=questionnaire_ids)

        print(
            _("Running %s now twice to generate the stamped questionnaire.") %
            defs.latex_engine)
        latex.compile('questionnaire.tex',
                      tmpdir,
                      inputs=[os.path.abspath(survey.path())])

        if not os.path.exists(os.path.join(tmpdir, 'questionnaire.pdf')):
            log.error(
                _("Error running \"%s\" to compile the LaTeX file.") %
                defs.latex_engine)
            raise AssertionError('PDF file not generated')

        shutil.move(os.path.join(tmpdir, 'questionnaire.pdf'), output_filename)

    except:
        log.error(
            _("An error occured during creation of the report. Temporary files left in '%s'."
              % tmpdir))

        raise

    shutil.rmtree(tmpdir)
Esempio n. 10
0
    def duplex_copy_image_attr(self, failed_pages, attr, error_msg=None):
        u"""If in duplex mode, this function will copy the given attribute
        from the image that defines it over to the one that does not.
        ie. if the attribute is None in one and differently in the other image
        it is copied.

        """

        i = 0
        while i < len(self.obj.images):
            failed = (i in failed_pages or i + 1 in failed_pages)

            first = self.obj.images[i]
            second = self.obj.images[i + 1]

            if getattr(first, attr) is None and getattr(second, attr) is None:
                if error_msg is not None and not failed:
                    log.warn(error_msg % (first.filename, first.tiff_page, second.filename, second.tiff_page))
            elif getattr(first, attr) is None:
                setattr(first, attr, getattr(second, attr))
            elif getattr(second, attr) is None:
                setattr(second, attr, getattr(first, attr))

            i += 2
Esempio n. 11
0
    def duplex_copy_image_attr(self, failed_pages, attr, error_msg=None):
        u"""If in duplex mode, this function will copy the given attribute
        from the image that defines it over to the one that does not.
        ie. if the attribute is None in one and differently in the other image
        it is copied.

        """

        i = 0
        while i < len(self.obj.images):
            failed = (i in failed_pages or i + 1 in failed_pages)

            first = self.obj.images[i]
            second = self.obj.images[i + 1]

            if getattr(first, attr) is None and getattr(second, attr) is None:
                if error_msg is not None and not failed:
                    log.warn(error_msg % (first.filename, first.tiff_page, second.filename, second.tiff_page))
            elif getattr(first, attr) is None:
                setattr(first, attr, getattr(second, attr))
            elif getattr(second, attr) is None:
                setattr(second, attr, getattr(first, attr))

            i += 2
Esempio n. 12
0
def convert_images(images,
                   outfile,
                   paper_width,
                   paper_height,
                   transform=False):

    portrait = paper_height >= paper_width

    for i, (img, filename,
            page) in enumerate(opencv.iter_images_and_pages(images)):
        img = opencv.ensure_orientation(img, portrait)
        img = opencv.sharpen(img)

        if transform:
            try:
                img = opencv.transform_using_corners(img, paper_width,
                                                     paper_height)
            except AssertionError:
                log.warn(
                    _("Could not apply 3D-transformation to image '%s', page %i!"
                      ) % (filename, page))

        mono = opencv.convert_to_monochrome(img)
        image.write_a1_to_tiff(outfile, opencv.to_a1_surf(mono))
Esempio n. 13
0
    def upgrade(self):
        """Ensure that all data structures conform to this version of SDAPS."""

        msg = _('Running upgrade routines for file format version %i')
        if self.version < 2:
            log.warn(msg % (1))
            # Changes between version 1 and 2:
            #  * Simplex surveys get a dummy page added for every image. This
            #    way they can be handled in the same way as "duplex" mode
            #    (and duplex scan can be supported).
            #  * The data for "Textbox" has a string. This will be used in the
            #    report if it contains data.

            # Insert dummy images.
            if not self.defs.duplex:
                from sdaps.model.sheet import Image

                for sheet in self.sheets:
                    images = sheet.images

                    # And re-add
                    sheet.images = list()
                    for img in images:
                        sheet.add_image(img)
                        img.ignored = False

                        dummy = Image()
                        dummy.filename = "DUMMY"
                        dummy.tiff_page = -1
                        dummy.ignored = True

                        sheet.add_image(dummy)

            # Add the "text" attribute to Textbox.
            from sdaps.model.data import Textbox
            for sheet in self.sheets:
                for data in sheet.data.itervalues():
                    if isinstance(data, Textbox):
                        data.text = unicode()

        if self.version < 3:
            log.warn(msg % (3))
            for sheet in self.sheets:
                sheet.recognized = False
                sheet.verified = False

        if self.version < 4:
            log.warn(msg % (4))
            self.defs.checkmode = "checkcorrect"

        self.version = 4
Esempio n. 14
0
 def validate(self):
     if not self.obj.question:
         log.warn(_(u'%(class)s %(l0)i.%(l1)i got no question.') % {
             'class': self.obj.__class__.__name__,
             'l0': self.obj.id[0], 'l1': self.obj.id[1]
         })
Esempio n. 15
0
 def validate(self):
     if not self.obj.title:
         log.warn(_(u'Head %(l0)i got no title.') % {'l0': self.obj.id[0]})
Esempio n. 16
0
def setup(survey_dir, questionnaire_tex, engine, additionalqobjects=None, extra_files=[]):
    if os.access(survey_dir, os.F_OK):
        log.error(_('The survey directory already exists.'))
        return 1

    mime = mimetype(questionnaire_tex)
    if mime != 'text/x-tex' and mime != '':
        log.warn(_('Unknown file type (%s). questionnaire_tex should be of type text/x-tex.') % mime)
        log.warn(_('Will keep going, but expect failure!'))

    if additionalqobjects is not None:
        mime = mimetype(additionalqobjects)
        if mime != 'text/plain' and mime != '':
            log.error(_('Unknown file type (%s). additionalqobjects should be text/plain.') % mime)
            return 1

    # Create the survey directory, and copy the tex file.
    survey = model.survey.Survey.new(survey_dir)
    survey.defs.engine = engine

    # Add the new questionnaire
    survey.add_questionnaire(model.questionnaire.Questionnaire())

    try:
        shutil.copy(questionnaire_tex, survey.path('questionnaire.tex'))

        latex.write_override(survey, survey.path('sdaps.opt'), draft=True)

        # Copy class and dictionary files
        if paths.local_run:
            cls_extra_files = os.path.join(paths.source_dir, 'tex', '*.cls')
            cls_files = os.path.join(paths.source_dir, 'tex', 'class', 'build', 'local', '*.cls')
            tex_files = os.path.join(paths.source_dir, 'tex', 'class', 'build', 'local', '*.tex')
            sty_files = os.path.join(paths.source_dir, 'tex', 'class', 'build', 'local', '*.sty')
            dict_files = os.path.join(paths.build_dir, 'tex', '*.dict')
        else:
            cls_extra_files = None
            cls_files = os.path.join(paths.prefix, 'share', 'sdaps', 'tex', '*.cls')
            tex_files = os.path.join(paths.prefix, 'share', 'sdaps', 'tex', '*.tex')
            sty_files = os.path.join(paths.prefix, 'share', 'sdaps', 'tex', '*.sty')
            dict_files = os.path.join(paths.prefix, 'share', 'sdaps', 'tex', '*.dict')

        def copy_to_survey(files_glob):
            files = glob.glob(files_glob)
            for file in files:
                shutil.copyfile(file, survey.path(os.path.basename(file)))

        if cls_extra_files is not None:
            copy_to_survey(cls_extra_files)
        copy_to_survey(cls_files)
        copy_to_survey(tex_files)
        copy_to_survey(sty_files)
        copy_to_survey(dict_files)

        for add_file in extra_files:
            if os.path.isdir(add_file):
                shutil.copytree(add_file, survey.path(os.path.basename(add_file)))
            else:
                shutil.copyfile(add_file, survey.path(os.path.basename(add_file)))

        print(_("Running %s now multiple times to generate the questionnaire.") % survey.defs.engine)
        latex.compile(survey.defs.engine, 'questionnaire.tex', cwd=survey.path())

        if not os.path.exists(survey.path('questionnaire.pdf')):
            print(_("Error running \"%s\" to compile the LaTeX file.") % defs.latex_engine)
            raise AssertionError('PDF file not generated')

        survey.defs.print_questionnaire_id = False
        survey.defs.print_survey_id = True

        # Parse qobjects
        try:
            sdapsfileparser.parse(survey)

            for qobject in survey.questionnaire.qobjects:
                qobject.setup.setup()
                qobject.setup.validate()

        except:
            log.error(_("Caught an Exception while parsing the SDAPS file. The current state is:"))
            print(str(survey.questionnaire), file=sys.stderr)
            print("------------------------------------", file=sys.stderr)

            raise

        # Parse additionalqobjects
        if additionalqobjects:
            additionalparser.parse(survey, additionalqobjects)

        # Last but not least calculate the survey id
        survey.calculate_survey_id()

        if not survey.check_settings():
            log.error(_("Some combination of options and project properties do not work. Aborted Setup."))
            shutil.rmtree(survey.path())
            return 1

        # We need to now rebuild everything so that the correct ID is at the bottom
        # Dissable draft mode if the survey doesn't have questionnaire IDs
        latex.write_override(survey, survey.path('sdaps.opt'), draft=survey.defs.print_questionnaire_id)
        print(_("Running %s now multiple imes to generate the questionnaire.") % survey.defs.engine)
        os.remove(survey.path('questionnaire.pdf'))
        latex.compile(survey.defs.engine, 'questionnaire.tex', survey.path())

        if not os.path.exists(survey.path('questionnaire.pdf')):
            print(_("Error running \"%s\" to compile the LaTeX file.") % survey.defs.engine)
            raise AssertionError('PDF file not generated')

        # Print the result
        print(survey.title)

        for item in list(survey.info.items()):
            print('%s: %s' % item)

        log.logfile.open(survey.path('log'))

        survey.save()
        log.logfile.close()
    except:
        log.error(_("An error occured in the setup routine. The survey directory still exists. You can for example check the questionnaire.log file for LaTeX compile errors."))
        raise
Esempio n. 17
0
def create_stamp_pdf(survey, output_filename, questionnaire_ids):

    if questionnaire_ids is None:
        log.warn(_("There should be no need to stamp a SDAPS Project that uses LaTeX and does not have different questionnaire IDs printed on each sheet.\nI am going to do so anyways."))

    # Temporary directory for TeX files.
    tmpdir = tempfile.mkdtemp()

    try:
        # Copy class and dictionary files
        tex_file = survey.path('questionnaire.tex')
        code128_file = survey.path('code128.tex')
        cls_file = survey.path('sdaps.cls')
        dict_files = survey.path('*.dict')
        dict_files = glob.glob(dict_files)

        shutil.copyfile(tex_file, os.path.join(tmpdir, 'questionnaire.tex'))
        shutil.copyfile(code128_file, os.path.join(tmpdir, 'code128.tex'))
        shutil.copyfile(cls_file, os.path.join(tmpdir, 'sdaps.cls'))
        for dict_file in dict_files:
            shutil.copyfile(dict_file, os.path.join(tmpdir, os.path.basename(dict_file)))

        latex_override = open(os.path.join(tmpdir, 'report.tex'), 'w')

        # Similar to setuptex/setup.py, but we also set questionnaire IDs
        latex_override = open(os.path.join(tmpdir, 'sdaps.opt'), 'w')
        latex_override.write('% This file exists to force the latex document into "final" mode.\n')
        latex_override.write('% It is parsed after the setup phase of the SDAPS class.\n\n')
        latex_override.write('\setcounter{surveyidlshw}{%i}\n' % (survey.survey_id % (2 ** 16)))
        latex_override.write('\setcounter{surveyidmshw}{%i}\n' % (survey.survey_id / (2 ** 16)))
        latex_override.write('\def\surveyid{%i}\n' % (survey.survey_id))
        latex_override.write('\def\globalid{%s}\n' % (tex_quote_braces(survey.global_id)) if survey.global_id is not None else '')
        latex_override.write('\\@STAMPtrue\n')
        latex_override.write('\\@PAGEMARKtrue\n')
        latex_override.write('\\@sdaps@draftfalse\n')
        if questionnaire_ids is not None:
            quoted_ids = [tex_quote_braces(str(id)) for id in questionnaire_ids]
            latex_override.write('\def\questionnaireids{{%s}}\n' % '},{'.join(quoted_ids))
        latex_override.close()

        print _("Running %s now twice to generate the stamped questionnaire.") % defs.latex_engine
        os.environ['TEXINPUTS'] = ':' + os.path.abspath(survey.path())
        subprocess.call([defs.latex_engine, '-halt-on-error',
                         '-interaction', 'batchmode',
                         os.path.join(tmpdir, 'questionnaire.tex')],
                        cwd=tmpdir)
        # And again
        subprocess.call([defs.latex_engine, '-halt-on-error',
                         '-interaction', 'batchmode',
                         os.path.join(tmpdir, 'questionnaire.tex')],
                        cwd=tmpdir)
        if not os.path.exists(os.path.join(tmpdir, 'questionnaire.pdf')):
            log.error(_("Error running \"%s\" to compile the LaTeX file.") % defs.latex_engine)
            raise AssertionError('PDF file not generated')

        shutil.move(os.path.join(tmpdir, 'questionnaire.pdf'), output_filename)

    except:
        log.error(_("An error occured during creation of the report. Temporary files left in '%s'." % tmpdir))

        raise

    shutil.rmtree(tmpdir)
Esempio n. 18
0
    def recognize(self):
        global warned_multipage_not_correctly_scanned

        self.obj.valid = 1

        duplex_mode = self.obj.survey.defs.duplex

        # Load all images of this sheet
        for image in self.obj.images:
            image.rotated = 0
            image.surface.load()

        failed_pages = set()

        # Matrix recognition for all of them
        for page, image in enumerate(self.obj.images):
            try:
                image.recognize.calculate_matrix()
            except RecognitionError:
                log.warn(_('%s, %i: Matrix not recognized.') % (image.filename, image.tiff_page))
                failed_pages.add(page)

        # Rotation for all of them
        for page, image in enumerate(self.obj.images):
            try:
                # This may set the rotation to "None" for unknown
                image.recognize.calculate_rotation()
            except RecognitionError:
                log.warn(_('%s, %i: Rotation not found.') % (image.filename, image.tiff_page))
                failed_pages.add(page)

        # In simplex mode, all rotations have to be there now,
        # in duplex mode we may need to copy them over from the other page.
        if duplex_mode:
            i = 0
            while i < len(self.obj.images):
                # Try to recover the page rotation
                failed = (i in failed_pages or i + 1 in failed_pages)

                first = self.obj.images[i]
                second = self.obj.images[i + 1]

                if first.rotated is None and second.rotated is None:
                    # Whoa, that should not happen.
                    if not failed:
                        log.warn(_("Neither %s, %i or %s, %i has a known rotation!" %
                                 (first.filename, first.tiff_page, second.filename, second.tiff_page)))
                        failed_pages.add(i)
                        failed_pages.add(i + 1)
                elif first.rotated is None:
                    first.rotated = second.rotated
                elif second.rotated is None:
                    second.rotated = first.rotated
                elif first.rotated != second.rotated:
                    if not failed:
                        log.warn(_("Found inconsistency. %s, %i and %s, %i should have the same rotation, but don't!" %
                                 (first.filename, first.tiff_page, second.filename, second.tiff_page)))
                        failed_pages.add(i)
                        failed_pages.add(i + 1)

                i += 2

        # Reload any image that is rotated.
        for page, image in enumerate(self.obj.images):
            if image.rotated:
                image.surface.load()
                # And redo the whole matrix stuff ...
                # XXX: It would be better to manipulate the matrix instead.
                try:
                    image.recognize.calculate_matrix()
                except RecognitionError:
                    log.warn(_('%s, %i: Matrix not recognized (again).') % (image.filename, image.tiff_page))
                    failed_pages.add(page)

        ############
        # At this point we can extract the page numbers and IDs as neccessary.
        ############

        # Figure out the page numbers
        # ***************************
        for page, image in enumerate(self.obj.images):
            try:
                # This may set the page_number to "None" for unknown
                image.recognize.calculate_page_number()
            except RecognitionError:
                log.warn(_('%s, %i: Could not get page number.') % (image.filename, image.tiff_page))
                image.page_number = None
                failed_pages.add(page)

        if duplex_mode:
            i = 0
            while i < len(self.obj.images):
                # We try to recover at least the page number of failed pages
                # this way.
                failed = (i in failed_pages or i + 1 in failed_pages)

                first = self.obj.images[i]
                second = self.obj.images[i + 1]

                if first.page_number is None and second.page_number is None:
                    if not failed:
                        # Whoa, that should not happen.
                        log.warn(_("Neither %s, %i or %s, %i has a known page number!" %
                                 (first.filename, first.tiff_page, second.filename, second.tiff_page)))
                        failed_pages.add(i)
                        failed_pages.add(i + 1)
                elif first.page_number is None:
                    # One based, odd -> +1, even -> -1
                    first.page_number = second.page_number - 1 + 2 * (second.page_number % 2)
                elif second.page_number is None:
                    second.page_number = first.page_number - 1 + 2 * (first.page_number % 2)
                elif first.page_number != (second.page_number - 1 + 2 * (second.page_number % 2)):
                    if not failed:
                        log.warn(_("Images %s, %i and %s, %i do not have consecutive page numbers!" %
                                 (first.filename, first.tiff_page, second.filename, second.tiff_page)))

                        failed_pages.add(i)
                        failed_pages.add(i + 1)

                i += 2

        # Check that every page has a non None value, and each page exists once.
        pages = set()
        for i, image in enumerate(self.obj.images):
            if image.page_number is None:
                log.warn(_("No page number for page %s, %i exists." % (image.filename, image.tiff_page)))
                failed_pages.add(i)
                continue

            if image.page_number in pages:
                log.warn(_("Page number for page %s, %i already used by another image.") %
                         (image.filename, image.tiff_page))
                failed_pages.add(i)
                continue

            if image.page_number <= 0 or image.page_number > self.obj.survey.questionnaire.page_count:
                log.warn(_("Page number %i for page %s, %i is out of range.") %
                         (image.page_number, image.filename, image.tiff_page))
                failed_pages.add(i)
                continue

            pages.add(image.page_number)

        # Figure out the suvey ID if neccessary
        # *************************************
        if self.obj.survey.defs.print_survey_id:
            for page, image in enumerate(self.obj.images):
                try:
                    if not duplex_mode or (image.page_number is not None and image.page_number % 2 == 0):
                        image.recognize.calculate_survey_id()
                    else:
                        image.survey_id = None
                except RecognitionError:
                    log.warn(_('%s, %i: Could not read survey ID, but should be able to.') %
                             (image.filename, image.tiff_page))
                    failed_pages.add(page)

            self.duplex_copy_image_attr(failed_pages, "survey_id")

            # Simply use the survey ID from the first image globally
            self.obj.survey_id = self.obj.images[0].survey_id

            if self.obj.survey_id != self.obj.survey.survey_id:
                # Broken survey ID ...
                log.warn(_("Got a wrong survey ID (%s, %i)! It is %s, but should be %i.") %
                         (self.obj.images[0].filename,
                          self.obj.images[0].tiff_page,
                          self.obj.survey_id,
                          self.obj.survey.survey_id))
                self.obj.valid = 0
        else:
            # Assume that the data is from the correct survey
            self.obj.survey_id = self.obj.survey.survey_id
            for image in self.obj.images:
                image.survey_id = self.obj.survey.survey_id

        # Figure out the questionnaire ID if neccessary
        # *********************************************
        if self.obj.survey.defs.print_questionnaire_id:
            questionnaire_ids = []

            for page, image in enumerate(self.obj.images):
                try:
                    if not duplex_mode or (image.page_number is not None and image.page_number % 2 == 0):
                        image.recognize.calculate_questionnaire_id()
                except RecognitionError:
                    log.warn(_('%s, %i: Could not read questionnaire ID, but should be able to.') % \
                             (image.filename, image.tiff_page))
                    failed_pages.add(page)

            self.duplex_copy_image_attr(failed_pages, "questionnaire_id")

            self.obj.questionnaire_id = self.obj.images[0].questionnaire_id

        # Try to load the global ID. If it does not exist we will get None, if
        # it does, then it will be non-None. We don't care much about it
        # internally anyways.
        # However, we do want to ensure that it is the same everywhere if it
        # can be read in.
        # *********************************************
        for page, image in enumerate(self.obj.images):
            try:
                if not duplex_mode or (image.page_number is not None and image.page_number % 2 == 0):
                    image.recognize.calculate_global_id()
            except RecognitionError:
                pass

        self.duplex_copy_image_attr(failed_pages, "global_id")

        self.obj.global_id = self.obj.images[0].global_id

        for image in self.obj.images:
            if self.obj.global_id != image.global_id or \
                self.obj.survey_id != image.survey_id or \
                self.obj.questionnaire_id != image.questionnaire_id:

                if not warned_multipage_not_correctly_scanned:
                    log.warn(_("Got different IDs on different pages for at least one sheet! Do *NOT* try to use filters with this survey! You have to run a \"reorder\" step for this to work properly!"))

                    warned_multipage_not_correctly_scanned = True

        # Done
        if failed_pages:
            self.obj.valid = 0
Esempio n. 19
0
def create_stamp_pdf(survey, output_filename, questionnaire_ids):

    if questionnaire_ids is None:
        log.warn(
            _(
                "There should be no need to stamp a SDAPS Project that uses LaTeX and does not have different questionnaire IDs printed on each sheet.\nI am going to do so anyways."
            )
        )

    # Temporary directory for TeX files.
    tmpdir = tempfile.mkdtemp()

    try:
        # Copy class and dictionary files
        tex_file = survey.path("questionnaire.tex")
        code128_file = survey.path("code128.tex")
        cls_file = survey.path("sdaps.cls")
        dict_files = survey.path("*.dict")
        dict_files = glob.glob(dict_files)

        shutil.copyfile(tex_file, os.path.join(tmpdir, "questionnaire.tex"))
        shutil.copyfile(code128_file, os.path.join(tmpdir, "code128.tex"))
        shutil.copyfile(cls_file, os.path.join(tmpdir, "sdaps.cls"))
        for dict_file in dict_files:
            shutil.copyfile(dict_file, os.path.join(tmpdir, os.path.basename(dict_file)))

        latex_override = open(os.path.join(tmpdir, "report.tex"), "w")

        # Similar to setuptex/setup.py, but we also set questionnaire IDs
        latex_override = open(os.path.join(tmpdir, "sdaps.opt"), "w")
        latex_override.write('% This file exists to force the latex document into "final" mode.\n')
        latex_override.write("% It is parsed after the setup phase of the SDAPS class.\n\n")
        latex_override.write("\setcounter{surveyidlshw}{%i}\n" % (survey.survey_id % (2 ** 16)))
        latex_override.write("\setcounter{surveyidmshw}{%i}\n" % (survey.survey_id / (2 ** 16)))
        latex_override.write("\def\surveyid{%i}\n" % (survey.survey_id))
        latex_override.write(
            "\def\globalid{%s}\n" % (tex_quote_braces(survey.global_id)) if survey.global_id is not None else ""
        )
        latex_override.write("\\@STAMPtrue\n")
        latex_override.write("\\@PAGEMARKtrue\n")
        latex_override.write("\\@sdaps@draftfalse\n")
        if questionnaire_ids is not None:
            quoted_ids = [tex_quote_braces(str(id)) for id in questionnaire_ids]
            latex_override.write("\def\questionnaireids{{%s}}\n" % "},{".join(quoted_ids))
        latex_override.close()

        print _("Running %s now twice to generate the stamped questionnaire.") % defs.latex_engine
        os.environ["TEXINPUTS"] = ":" + os.path.abspath(survey.path())
        subprocess.call(
            [
                defs.latex_engine,
                "-halt-on-error",
                "-interaction",
                "batchmode",
                os.path.join(tmpdir, "questionnaire.tex"),
            ],
            cwd=tmpdir,
        )
        # And again
        subprocess.call(
            [
                defs.latex_engine,
                "-halt-on-error",
                "-interaction",
                "batchmode",
                os.path.join(tmpdir, "questionnaire.tex"),
            ],
            cwd=tmpdir,
        )
        if not os.path.exists(os.path.join(tmpdir, "questionnaire.pdf")):
            log.error(_('Error running "%s" to compile the LaTeX file.') % defs.latex_engine)
            raise AssertionError("PDF file not generated")

        shutil.move(os.path.join(tmpdir, "questionnaire.pdf"), output_filename)

    except:
        log.error(_("An error occured during creation of the report. Temporary files left in '%s'." % tmpdir))

        raise

    shutil.rmtree(tmpdir)
Esempio n. 20
0
def setup(survey, cmdline):
    if os.access(survey.path(), os.F_OK):
        log.error(_('The survey directory already exists.'))
        return 1

    questionnaire_tex = cmdline['questionnaire.tex']
    additionalqobjects = cmdline['additional_questions']

    mime = mimetype(questionnaire_tex)
    if mime != 'text/x-tex' and mime != '':
        log.warn(_('Unknown file type (%s). questionnaire_tex should be of type text/x-tex.') % mime)
        log.warn(_('Will keep going, but expect failure!'))

    if additionalqobjects is not None:
        mime = mimetype(additionalqobjects)
        if mime != 'text/plain' and mime != '':
            log.error(_('Unknown file type (%s). additionalqobjects should be text/plain.') % mime)
            return 1

    # Add the new questionnaire
    survey.add_questionnaire(model.questionnaire.Questionnaire())

    # Create the survey directory, and copy the tex file.
    os.mkdir(survey.path())
    try:
        shutil.copy(questionnaire_tex, survey.path('questionnaire.tex'))

        write_latex_override_file(survey, draft=True)

        # Copy class and dictionary files
        if paths.local_run:
            cls_file = os.path.join(paths.source_dir, 'tex', 'sdaps.cls')
            code128_file = os.path.join(paths.source_dir, 'tex', 'code128.tex')
            dict_files = os.path.join(paths.build_dir, 'tex', '*.dict')
            dict_files = glob.glob(dict_files)
        else:
            cls_file = os.path.join(paths.prefix, 'share', 'sdaps', 'tex', 'sdaps.cls')
            code128_file = os.path.join(paths.prefix, 'share', 'sdaps', 'tex', 'code128.tex')
            dict_files = os.path.join(paths.prefix, 'share', 'sdaps', 'tex', '*.dict')
            dict_files = glob.glob(dict_files)

        shutil.copyfile(cls_file, survey.path('sdaps.cls'))
        shutil.copyfile(code128_file, survey.path('code128.tex'))
        for dict_file in dict_files:
            shutil.copyfile(dict_file, survey.path(os.path.basename(dict_file)))

        for add_file in cmdline['add']:
            shutil.copyfile(add_file, survey.path(os.path.basename(add_file)))

        print _("Running %s now twice to generate the questionnaire.") % defs.latex_engine
        subprocess.call([defs.latex_engine, '-halt-on-error',
                         '-interaction', 'batchmode', 'questionnaire.tex'],
                        cwd=survey.path())
        # And again, without the draft mode
        subprocess.call([defs.latex_engine, '-halt-on-error',
                         '-interaction', 'batchmode', 'questionnaire.tex'],
                        cwd=survey.path())
        if not os.path.exists(survey.path('questionnaire.pdf')):
            print _("Error running \"%s\" to compile the LaTeX file.") % defs.latex_engine
            raise AssertionError('PDF file not generated')

        survey.defs.print_questionnaire_id = False
        survey.defs.print_survey_id = True

        # Parse qobjects
        try:
            sdapsfileparser.parse(survey)
        except Exception, e:
            log.error(_("Caught an Exception while parsing the SDAPS file. The current state is:"))
            print >>sys.stderr, unicode(survey.questionnaire)
            print >>sys.stderr, "------------------------------------"

            raise e

        # Parse additionalqobjects
        if additionalqobjects:
            additionalparser.parse(survey, additionalqobjects)

        # Last but not least calculate the survey id
        survey.calculate_survey_id()

        if not survey.check_settings():
            log.error(_("Some combination of options and project properties do not work. Aborted Setup."))
            shutil.rmtree(survey.path())
            return 1

        # We need to now rebuild everything so that the correct ID is at the bottom
        write_latex_override_file(survey)
        print _("Running %s now twice to generate the questionnaire.") % defs.latex_engine
        os.remove(survey.path('questionnaire.pdf'))
        subprocess.call([defs.latex_engine, '-halt-on-error',
                         '-interaction', 'batchmode', 'questionnaire.tex'],
                        cwd=survey.path())
        # And again, without the draft mode
        subprocess.call([defs.latex_engine, '-halt-on-error',
                         '-interaction', 'batchmode', 'questionnaire.tex'],
                        cwd=survey.path())
        if not os.path.exists(survey.path('questionnaire.pdf')):
            print _("Error running \"%s\" to compile the LaTeX file.") % defs.latex_engine
            raise AssertionError('PDF file not generated')

        # Print the result
        print survey.title

        for item in survey.info.items():
            print u'%s: %s' % item

        print unicode(survey.questionnaire)

        log.logfile.open(survey.path('log'))

        survey.save()
        log.logfile.close()
Esempio n. 21
0
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from sdaps import log
import re

from sdaps import defs
import subprocess
import os

try:
    from sdaps.utils.latexmap import mapping
except ImportError:
    mapping = {}
    log.warn(_('The latex character map is missing! Please build it using the supplied tool (create-latexmap.py).'))

# Add some more mappings
# NBSP
mapping['~'] = ' '


re_latex_to_unicode_mapping = {}
for token, replacement in mapping.items():
    regexp = re.compile('%s(?=^w|})' % re.escape(token))
    re_latex_to_unicode_mapping[regexp] = replacement

# Regular expressions don't work really, but we replace a single string anyways
unicode_to_latex_mapping = {}
for token, replacement in mapping.items():
    unicode_to_latex_mapping[replacement] = "{%s}" % token
Esempio n. 22
0
    def recognize(self):
        global warned_multipage_not_correctly_scanned

        self.obj.valid = 1

        duplex_mode = self.obj.survey.defs.duplex

        # Load all images of this sheet
        for image in self.obj.images:
            if not image.ignored:
                image.rotated = 0
                image.surface.load()

        failed_pages = set()

        # Matrix recognition for all of them
        matrix_errors = set()
        for page, image in enumerate(self.obj.images):
            try:
                image.recognize.calculate_matrix()
            except RecognitionError:
                matrix_errors.add(page)

        # We need to check the matrix_errors. Some are expected in simplex mode
        for page in matrix_errors:
            # in simplex mode every page will have a matrix; it might be a None
            # matrix though

            log.warn(_('%s, %i: Matrix not recognized.') % (self.obj.images[page].filename, self.obj.images[page].tiff_page))
            failed_pages.add(page)

        # Rotation for all of them
        for page, image in enumerate(self.obj.images):
            try:
                # This may set the rotation to "None" for unknown
                image.recognize.calculate_rotation()
            except RecognitionError:
                log.warn(_('%s, %i: Rotation not found.') % (image.filename, image.tiff_page))
                failed_pages.add(page)

        # Copy the rotation over (if required) and print warning if the rotation is unknown
        self.duplex_copy_image_attr(failed_pages, 'rotated', _("Neither %s, %i or %s, %i has a known rotation!"))

        # Reload any image that is rotated.
        for page, image in enumerate(self.obj.images):
            if image.rotated and not image.ignored:
                image.surface.load()
                # And redo the whole matrix stuff ...
                # XXX: It would be better to manipulate the matrix instead.
                try:
                    image.recognize.calculate_matrix()
                except RecognitionError:
                    if duplex_mode:
                        log.warn(_('%s, %i: Matrix not recognized (again).') % (image.filename, image.tiff_page))
                        failed_pages.add(page)

        ############
        # At this point we can extract the page numbers and IDs as neccessary.
        ############

        # Figure out the page numbers
        # ***************************
        for page, image in enumerate(self.obj.images):
            try:
                # This may set the page_number to "None" for unknown
                image.recognize.calculate_page_number()
            except RecognitionError:
                log.warn(_('%s, %i: Could not get page number.') % (image.filename, image.tiff_page))
                image.page_number = None
                failed_pages.add(page)

        i = 0
        while i < len(self.obj.images):
            # We try to recover at least the page number of failed pages
            # this way.
            # NOTE: In simplex mode dummy pages will be inserted, so one page
            # always has no page number, and the other one has one.
            # This is exactly what we want, so we don't need to do anything
            # (except warn if we did not find any page!)
            failed = (i in failed_pages or i + 1 in failed_pages)

            first = self.obj.images[i]
            second = self.obj.images[i + 1]

            if first.page_number is None and second.page_number is None:
                if not failed:
                    # Whoa, that should not happen.
                    log.warn(_("Neither %s, %i or %s, %i has a known page number!" %
                             (first.filename, first.tiff_page, second.filename, second.tiff_page)))
                    failed_pages.add(i)
                    failed_pages.add(i + 1)

            elif duplex_mode == False:
                # Simplex mode is special, as we know that one has to be unreadable
                # we need to ensure one of the page numbers is None
                if first.page_number is not None and second.page_number is not None:
                    # We don't touch the ignore flag in this case
                    # Simply print a message as this should *never* happen
                    log.error(_("Got a simplex document where two adjacent pages had a known page number. This should never happen as even simplex scans are converted to duplex by inserting dummy pages. Maybe you did a simplex scan but added it in duplex mode? The pages in question are %s, %i and %s, %i.") % (first.filename, first.tiff_page, second.filename, second.tiff_page))

                # Set the ignored flag for the unreadable page. This is a valid
                # operation as the back side of a readable page is known to be
                # empty.
                elif first.page_number is None:
                    first.ignored = True
                else:
                    second.ignored = True

            elif first.page_number is None:
                # One based, odd -> +1, even -> -1
                first.page_number = second.page_number - 1 + 2 * (second.page_number % 2)
            elif second.page_number is None:
                second.page_number = first.page_number - 1 + 2 * (first.page_number % 2)
            elif first.page_number != (second.page_number - 1 + 2 * (second.page_number % 2)):
                if not failed:
                    log.warn(_("Images %s, %i and %s, %i do not have consecutive page numbers!" %
                             (first.filename, first.tiff_page, second.filename, second.tiff_page)))

                    failed_pages.add(i)
                    failed_pages.add(i + 1)

            i += 2

        # Check that every page has a non None value, and each page exists once.
        pages = set()
        for i, image in enumerate(self.obj.images):
            # Ignore known blank pages
            if image.ignored:
                continue

            if image.page_number is None:
                log.warn(_("No page number for page %s, %i exists." % (image.filename, image.tiff_page)))
                failed_pages.add(i)
                continue

            if image.page_number in pages:
                log.warn(_("Page number for page %s, %i already used by another image.") %
                         (image.filename, image.tiff_page))
                failed_pages.add(i)
                continue

            if image.page_number <= 0 or image.page_number > self.obj.survey.questionnaire.page_count:
                log.warn(_("Page number %i for page %s, %i is out of range.") %
                         (image.page_number, image.filename, image.tiff_page))
                failed_pages.add(i)
                continue

            pages.add(image.page_number)

        # Figure out the suvey ID if neccessary
        # *************************************
        if self.obj.survey.defs.print_survey_id:
            for page, image in enumerate(self.obj.images):
                try:
                    if not duplex_mode or (image.page_number is not None and image.page_number % 2 == 0):
                        image.recognize.calculate_survey_id()
                    else:
                        image.survey_id = None
                except RecognitionError:
                    log.warn(_('%s, %i: Could not read survey ID, but should be able to.') %
                             (image.filename, image.tiff_page))
                    failed_pages.add(page)

            self.duplex_copy_image_attr(failed_pages, "survey_id", _("Could not read survey ID of either %s, %i or %s, %i!"))

            # Simply use the survey ID from the first image globally
            self.obj.survey_id = self.obj.images[0].survey_id

            if self.obj.survey_id != self.obj.survey.survey_id:
                # Broken survey ID ...
                log.warn(_("Got a wrong survey ID (%s, %i)! It is %s, but should be %i.") %
                         (self.obj.images[0].filename,
                          self.obj.images[0].tiff_page,
                          self.obj.survey_id,
                          self.obj.survey.survey_id))
                self.obj.valid = 0
        else:
            # Assume that the data is from the correct survey
            self.obj.survey_id = self.obj.survey.survey_id
            for image in self.obj.images:
                image.survey_id = self.obj.survey.survey_id

        # Figure out the questionnaire ID if neccessary
        # *********************************************
        if self.obj.survey.defs.print_questionnaire_id:
            questionnaire_ids = []

            for page, image in enumerate(self.obj.images):
                try:
                    if not duplex_mode or (image.page_number is not None and image.page_number % 2 == 0):
                        image.recognize.calculate_questionnaire_id()
                except RecognitionError:
                    log.warn(_('%s, %i: Could not read questionnaire ID, but should be able to.') % \
                             (image.filename, image.tiff_page))
                    failed_pages.add(page)
                if image.questionnaire_id is not None:
                    questionnaire_ids.append(image.questionnaire_id)

            self.duplex_copy_image_attr(failed_pages, "questionnaire_id", _("Could not read questionnaire ID of either %s, %i or %s, %i!"))

            if len(questionnaire_ids):
                self.obj.questionnaire_id = questionnaire_ids
            else:
                self.obj.questionnaire_id

        # Try to load the global ID. If it does not exist we will get None, if
        # it does, then it will be non-None. We don't care much about it
        # internally anyways.
        # However, we do want to ensure that it is the same everywhere if it
        # can be read in.
        # *********************************************
        for page, image in enumerate(self.obj.images):
            try:
                if not duplex_mode or (image.page_number is not None and image.page_number % 2 == 0):
                    image.recognize.calculate_global_id()
            except RecognitionError:
                pass

        self.duplex_copy_image_attr(failed_pages, "global_id")

        self.obj.global_id = self.obj.images[0].global_id

        for image in self.obj.images:
            if self.obj.global_id != image.global_id or \
                self.obj.survey_id != image.survey_id or \
                self.obj.questionnaire_id != image.questionnaire_id:

                if not warned_multipage_not_correctly_scanned:
                    log.warn(_("Got different IDs on different pages for at least one sheet! Do *NOT* try to use filters with this survey! You have to run a \"reorder\" step for this to work properly!"))

                    warned_multipage_not_correctly_scanned = True

        # Done
        if failed_pages:
            self.obj.valid = 0
Esempio n. 23
0
import cv2
import numpy as np
from sdaps import image
import cairo
from sdaps import defs
from sdaps import log

from sdaps.utils.ugettext import ugettext, ungettext
_ = ugettext

try:
    import gi
    gi.require_version('Poppler', '0.18')
    from gi.repository import Poppler, Gio
except:
    log.warn(_("Cannot convert PDF files as poppler is not installed or usable!"))

def iter_images_and_pages(images):
    """This function iterates over a images and also the contained pages. As
    OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal
    loading method for those."""

    for filename in images:
        pages = 1
        is_tiff = False
        is_pdf = False

        try:
            # Check whether this is a TIFF file (ie. try to retrieve the page count)
            pages = image.get_tiff_page_count(filename)
            is_tiff = True
Esempio n. 24
0
 def validate(self):
     if not self.obj.title:
         log.warn(_(u'Head %(l0)i got no title.') % {'l0': self.obj.id[0]})
Esempio n. 25
0
File: latex.py Progetto: sdaps/sdaps
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from sdaps import log
import re

from sdaps import defs
import subprocess
import os

try:
    from sdaps.utils.latexmap import mapping
except ImportError:
    mapping = {}
    log.warn(_('The latex character map is missing! Please build it using the supplied tool (create-latexmap.py).'))

# Add some more mappings
# NBSP
mapping['~'] = ' '


re_latex_to_unicode_mapping = {}
for token, replacement in mapping.items():
    regexp = re.compile('%s(?=^w|})' % re.escape(token))
    re_latex_to_unicode_mapping[regexp] = replacement

# Regular expressions don't work really, but we replace a single string anyways
unicode_to_latex_mapping = {}
for token, replacement in mapping.items():
    unicode_to_latex_mapping[replacement] = "{%s}" % token
Esempio n. 26
0
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import cv2
import numpy as np
from sdaps import image
import cairo
from sdaps import defs
from sdaps import log

from sdaps.utils.ugettext import ugettext, ungettext
_ = ugettext

try:
    from gi.repository import Poppler, Gio
except:
    log.warn(_("Cannot convert PDF files as poppler is not installed or usable!"))

def iter_images_and_pages(images):
    """This function iterates over a images and also the contained pages. As
    OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal
    loading method for those."""

    for filename in images:
        pages = 1
        is_tiff = False
        is_pdf = False

        try:
            # Check whether this is a TIFF file (ie. try to retrieve the page count)
            pages = image.get_tiff_page_count(filename)
            is_tiff = True
Esempio n. 27
0
def setup(survey, cmdline):
    if os.access(survey.path(), os.F_OK):
        log.error(_('The survey directory already exists.'))
        return 1

    questionnaire_tex = cmdline['questionnaire.tex']
    additionalqobjects = cmdline['additional_questions']

    mime = mimetype(questionnaire_tex)
    if mime != 'text/x-tex' and mime != '':
        log.warn(
            _('Unknown file type (%s). questionnaire_tex should be of type text/x-tex.'
              ) % mime)
        log.warn(_('Will keep going, but expect failure!'))

    if additionalqobjects is not None:
        mime = mimetype(additionalqobjects)
        if mime != 'text/plain' and mime != '':
            log.error(
                _('Unknown file type (%s). additionalqobjects should be text/plain.'
                  ) % mime)
            return 1

    # Add the new questionnaire
    survey.add_questionnaire(model.questionnaire.Questionnaire())

    # Create the survey directory, and copy the tex file.
    os.mkdir(survey.path())
    try:
        shutil.copy(questionnaire_tex, survey.path('questionnaire.tex'))

        write_latex_override_file(survey, draft=True)

        # Copy class and dictionary files
        if paths.local_run:
            cls_file = os.path.join(paths.source_dir, 'tex', 'sdaps.cls')
            code128_file = os.path.join(paths.source_dir, 'tex', 'code128.tex')
            dict_files = os.path.join(paths.build_dir, 'tex', '*.dict')
            dict_files = glob.glob(dict_files)
        else:
            cls_file = os.path.join(paths.prefix, 'share', 'sdaps', 'tex',
                                    'sdaps.cls')
            code128_file = os.path.join(paths.prefix, 'share', 'sdaps', 'tex',
                                        'code128.tex')
            dict_files = os.path.join(paths.prefix, 'share', 'sdaps', 'tex',
                                      '*.dict')
            dict_files = glob.glob(dict_files)

        shutil.copyfile(cls_file, survey.path('sdaps.cls'))
        shutil.copyfile(code128_file, survey.path('code128.tex'))
        for dict_file in dict_files:
            shutil.copyfile(dict_file,
                            survey.path(os.path.basename(dict_file)))

        for add_file in cmdline['add']:
            shutil.copyfile(add_file, survey.path(os.path.basename(add_file)))

        print _("Running %s now twice to generate the questionnaire."
                ) % defs.latex_engine
        subprocess.call([
            defs.latex_engine, '-halt-on-error', '-interaction', 'batchmode',
            'questionnaire.tex'
        ],
                        cwd=survey.path())
        # And again, without the draft mode
        subprocess.call([
            defs.latex_engine, '-halt-on-error', '-interaction', 'batchmode',
            'questionnaire.tex'
        ],
                        cwd=survey.path())
        if not os.path.exists(survey.path('questionnaire.pdf')):
            print _("Error running \"%s\" to compile the LaTeX file."
                    ) % defs.latex_engine
            raise AssertionError('PDF file not generated')

        survey.defs.print_questionnaire_id = False
        survey.defs.print_survey_id = True

        # Parse qobjects
        try:
            sdapsfileparser.parse(survey)
        except Exception, e:
            log.error(
                _("Caught an Exception while parsing the SDAPS file. The current state is:"
                  ))
            print >> sys.stderr, unicode(survey.questionnaire)
            print >> sys.stderr, "------------------------------------"

            raise e

        # Parse additionalqobjects
        if additionalqobjects:
            additionalparser.parse(survey, additionalqobjects)

        # Last but not least calculate the survey id
        survey.calculate_survey_id()

        if not survey.check_settings():
            log.error(
                _("Some combination of options and project properties do not work. Aborted Setup."
                  ))
            shutil.rmtree(survey.path())
            return 1

        # We need to now rebuild everything so that the correct ID is at the bottom
        write_latex_override_file(survey)
        print _("Running %s now twice to generate the questionnaire."
                ) % defs.latex_engine
        os.remove(survey.path('questionnaire.pdf'))
        subprocess.call([
            defs.latex_engine, '-halt-on-error', '-interaction', 'batchmode',
            'questionnaire.tex'
        ],
                        cwd=survey.path())
        # And again, without the draft mode
        subprocess.call([
            defs.latex_engine, '-halt-on-error', '-interaction', 'batchmode',
            'questionnaire.tex'
        ],
                        cwd=survey.path())
        if not os.path.exists(survey.path('questionnaire.pdf')):
            print _("Error running \"%s\" to compile the LaTeX file."
                    ) % defs.latex_engine
            raise AssertionError('PDF file not generated')

        # Print the result
        print survey.title

        for item in survey.info.items():
            print u'%s: %s' % item

        print unicode(survey.questionnaire)

        log.logfile.open(survey.path('log'))

        survey.save()
        log.logfile.close()
Esempio n. 28
0
    def recognize(self):
        global warned_multipage_not_correctly_scanned

        self.obj.valid = 1

        duplex_mode = self.obj.survey.defs.duplex

        # Load all images of this sheet
        for image in self.obj.images:
            if not image.ignored:
                image.rotated = 0
                image.surface.load()

        failed_pages = set()

        # Matrix recognition for all of them
        matrix_errors = set()
        for page, image in enumerate(self.obj.images):
            try:
                image.recognize.calculate_matrix()
            except RecognitionError:
                matrix_errors.add(page)

        # We need to check the matrix_errors. Some are expected in simplex mode
        for page in matrix_errors:
            # in simplex mode every page will have a matrix; it might be a None
            # matrix though

            log.warn(
                _('%s, %i: Matrix not recognized.') %
                (self.obj.images[page].filename,
                 self.obj.images[page].tiff_page))
            failed_pages.add(page)

        # Rotation for all of them
        for page, image in enumerate(self.obj.images):
            try:
                # This may set the rotation to "None" for unknown
                image.recognize.calculate_rotation()
            except RecognitionError:
                log.warn(
                    _('%s, %i: Rotation not found.') %
                    (image.filename, image.tiff_page))
                failed_pages.add(page)

        # Copy the rotation over (if required) and print warning if the rotation is unknown
        self.duplex_copy_image_attr(
            failed_pages, 'rotated',
            _("Neither %s, %i or %s, %i has a known rotation!"))

        # Reload any image that is rotated.
        for page, image in enumerate(self.obj.images):
            if image.rotated and not image.ignored:
                image.surface.load()
                # And redo the whole matrix stuff ...
                # XXX: It would be better to manipulate the matrix instead.
                try:
                    image.recognize.calculate_matrix()
                except RecognitionError:
                    if duplex_mode:
                        log.warn(
                            _('%s, %i: Matrix not recognized (again).') %
                            (image.filename, image.tiff_page))
                        failed_pages.add(page)

        ############
        # At this point we can extract the page numbers and IDs as neccessary.
        ############

        # Figure out the page numbers
        # ***************************
        for page, image in enumerate(self.obj.images):
            try:
                # This may set the page_number to "None" for unknown
                image.recognize.calculate_page_number()
            except RecognitionError:
                log.warn(
                    _('%s, %i: Could not get page number.') %
                    (image.filename, image.tiff_page))
                image.page_number = None
                failed_pages.add(page)

        i = 0
        while i < len(self.obj.images):
            # We try to recover at least the page number of failed pages
            # this way.
            # NOTE: In simplex mode dummy pages will be inserted, so one page
            # always has no page number, and the other one has one.
            # This is exactly what we want, so we don't need to do anything
            # (except warn if we did not find any page!)
            failed = (i in failed_pages or i + 1 in failed_pages)

            first = self.obj.images[i]
            second = self.obj.images[i + 1]

            if first.page_number is None and second.page_number is None:
                if not failed:
                    # Whoa, that should not happen.
                    log.warn(
                        _("Neither %s, %i or %s, %i has a known page number!" %
                          (first.filename, first.tiff_page, second.filename,
                           second.tiff_page)))
                    failed_pages.add(i)
                    failed_pages.add(i + 1)

            elif duplex_mode == False:
                # Simplex mode is special, as we know that one has to be unreadable
                # we need to ensure one of the page numbers is None
                if first.page_number is not None and second.page_number is not None:
                    # We don't touch the ignore flag in this case
                    # Simply print a message as this should *never* happen
                    log.error(
                        _("Got a simplex document where two adjacent pages had a known page number. This should never happen as even simplex scans are converted to duplex by inserting dummy pages. Maybe you did a simplex scan but added it in duplex mode? The pages in question are %s, %i and %s, %i."
                          ) % (first.filename, first.tiff_page,
                               second.filename, second.tiff_page))

                # Set the ignored flag for the unreadable page. This is a valid
                # operation as the back side of a readable page is known to be
                # empty.
                elif first.page_number is None:
                    first.ignored = True
                else:
                    second.ignored = True

            elif first.page_number is None:
                # One based, odd -> +1, even -> -1
                first.page_number = second.page_number - 1 + 2 * (
                    second.page_number % 2)
            elif second.page_number is None:
                second.page_number = first.page_number - 1 + 2 * (
                    first.page_number % 2)
            elif first.page_number != (second.page_number - 1 + 2 *
                                       (second.page_number % 2)):
                if not failed:
                    log.warn(
                        _("Images %s, %i and %s, %i do not have consecutive page numbers!"
                          % (first.filename, first.tiff_page, second.filename,
                             second.tiff_page)))

                    failed_pages.add(i)
                    failed_pages.add(i + 1)

            i += 2

        # Check that every page has a non None value, and each page exists once.
        pages = set()
        for i, image in enumerate(self.obj.images):
            # Ignore known blank pages
            if image.ignored:
                continue

            if image.page_number is None:
                log.warn(
                    _("No page number for page %s, %i exists." %
                      (image.filename, image.tiff_page)))
                failed_pages.add(i)
                continue

            if image.page_number in pages:
                log.warn(
                    _("Page number for page %s, %i already used by another image."
                      ) % (image.filename, image.tiff_page))
                failed_pages.add(i)
                continue

            if image.page_number <= 0 or image.page_number > self.obj.survey.questionnaire.page_count:
                log.warn(
                    _("Page number %i for page %s, %i is out of range.") %
                    (image.page_number, image.filename, image.tiff_page))
                failed_pages.add(i)
                continue

            pages.add(image.page_number)

        # Figure out the suvey ID if neccessary
        # *************************************
        if self.obj.survey.defs.print_survey_id:
            for page, image in enumerate(self.obj.images):
                try:
                    if not duplex_mode or (image.page_number is not None
                                           and image.page_number % 2 == 0):
                        image.recognize.calculate_survey_id()
                    else:
                        image.survey_id = None
                except RecognitionError:
                    log.warn(
                        _('%s, %i: Could not read survey ID, but should be able to.'
                          ) % (image.filename, image.tiff_page))
                    failed_pages.add(page)

            self.duplex_copy_image_attr(
                failed_pages, "survey_id",
                _("Could not read survey ID of either %s, %i or %s, %i!"))

            # Simply use the survey ID from the first image globally
            self.obj.survey_id = self.obj.images[0].survey_id

            if self.obj.survey_id != self.obj.survey.survey_id:
                # Broken survey ID ...
                log.warn(
                    _("Got a wrong survey ID (%s, %i)! It is %s, but should be %i."
                      ) %
                    (self.obj.images[0].filename, self.obj.images[0].tiff_page,
                     self.obj.survey_id, self.obj.survey.survey_id))
                self.obj.valid = 0
        else:
            # Assume that the data is from the correct survey
            self.obj.survey_id = self.obj.survey.survey_id
            for image in self.obj.images:
                image.survey_id = self.obj.survey.survey_id

        # Figure out the questionnaire ID if neccessary
        # *********************************************
        if self.obj.survey.defs.print_questionnaire_id:
            questionnaire_ids = []

            for page, image in enumerate(self.obj.images):
                try:
                    if not duplex_mode or (image.page_number is not None
                                           and image.page_number % 2 == 0):
                        image.recognize.calculate_questionnaire_id()
                except RecognitionError:
                    log.warn(_('%s, %i: Could not read questionnaire ID, but should be able to.') % \
                             (image.filename, image.tiff_page))
                    failed_pages.add(page)
                if image.questionnaire_id is not None:
                    questionnaire_ids.append(image.questionnaire_id)

            self.duplex_copy_image_attr(
                failed_pages, "questionnaire_id",
                _("Could not read questionnaire ID of either %s, %i or %s, %i!"
                  ))

            if len(questionnaire_ids):
                self.obj.questionnaire_id = questionnaire_ids
            else:
                self.obj.questionnaire_id

        # Try to load the global ID. If it does not exist we will get None, if
        # it does, then it will be non-None. We don't care much about it
        # internally anyways.
        # However, we do want to ensure that it is the same everywhere if it
        # can be read in.
        # *********************************************
        for page, image in enumerate(self.obj.images):
            try:
                if not duplex_mode or (image.page_number is not None
                                       and image.page_number % 2 == 0):
                    image.recognize.calculate_global_id()
            except RecognitionError:
                pass

        self.duplex_copy_image_attr(failed_pages, "global_id")

        self.obj.global_id = self.obj.images[0].global_id

        for image in self.obj.images:
            if self.obj.global_id != image.global_id or \
                self.obj.survey_id != image.survey_id or \
                self.obj.questionnaire_id != image.questionnaire_id:

                if not warned_multipage_not_correctly_scanned:
                    log.warn(
                        _("Got different IDs on different pages for at least one sheet! Do *NOT* try to use filters with this survey! You have to run a \"reorder\" step for this to work properly!"
                          ))

                    warned_multipage_not_correctly_scanned = True

        # Done
        if failed_pages:
            self.obj.valid = 0
Esempio n. 29
0
def setup(survey, questionnaire_tex, additionalqobjects=None, extra_files=[]):
    if os.access(survey.path(), os.F_OK):
        log.error(_('The survey directory already exists.'))
        return 1

    mime = mimetype(questionnaire_tex)
    if mime != 'text/x-tex' and mime != '':
        log.warn(
            _('Unknown file type (%s). questionnaire_tex should be of type text/x-tex.'
              ) % mime)
        log.warn(_('Will keep going, but expect failure!'))

    if additionalqobjects is not None:
        mime = mimetype(additionalqobjects)
        if mime != 'text/plain' and mime != '':
            log.error(
                _('Unknown file type (%s). additionalqobjects should be text/plain.'
                  ) % mime)
            return 1

    # Add the new questionnaire
    survey.add_questionnaire(model.questionnaire.Questionnaire())

    # Create the survey directory, and copy the tex file.
    os.makedirs(survey.path())
    try:
        shutil.copy(questionnaire_tex, survey.path('questionnaire.tex'))

        latex.write_override(survey, survey.path('sdaps.opt'), draft=True)

        # Copy class and dictionary files
        if paths.local_run:
            cls_extra_files = os.path.join(paths.source_dir, 'tex', '*.cls')
            cls_files = os.path.join(paths.source_dir, 'tex', 'class', 'build',
                                     'local', '*.cls')
            tex_files = os.path.join(paths.source_dir, 'tex', 'class', 'build',
                                     'local', '*.tex')
            sty_files = os.path.join(paths.source_dir, 'tex', 'class', 'build',
                                     'local', '*.sty')
            dict_files = os.path.join(paths.build_dir, 'tex', '*.dict')
        else:
            cls_extra_files = None
            cls_files = os.path.join(paths.prefix, 'share', 'sdaps', 'tex',
                                     '*.cls')
            tex_files = os.path.join(paths.prefix, 'share', 'sdaps', 'tex',
                                     '*.tex')
            sty_files = os.path.join(paths.prefix, 'share', 'sdaps', 'tex',
                                     '*.sty')
            dict_files = os.path.join(paths.prefix, 'share', 'sdaps', 'tex',
                                      '*.dict')

        def copy_to_survey(files_glob):
            files = glob.glob(files_glob)
            for file in files:
                shutil.copyfile(file, survey.path(os.path.basename(file)))

        if cls_extra_files is not None:
            copy_to_survey(cls_extra_files)
        copy_to_survey(cls_files)
        copy_to_survey(tex_files)
        copy_to_survey(sty_files)
        copy_to_survey(dict_files)

        for add_file in extra_files:
            if os.path.isdir(add_file):
                shutil.copytree(add_file,
                                survey.path(os.path.basename(add_file)))
            else:
                shutil.copyfile(add_file,
                                survey.path(os.path.basename(add_file)))

        print _("Running %s now twice to generate the questionnaire."
                ) % defs.latex_engine
        latex.compile('questionnaire.tex', cwd=survey.path())

        if not os.path.exists(survey.path('questionnaire.pdf')):
            print _("Error running \"%s\" to compile the LaTeX file."
                    ) % defs.latex_engine
            raise AssertionError('PDF file not generated')

        survey.defs.print_questionnaire_id = False
        survey.defs.print_survey_id = True

        # Parse qobjects
        try:
            sdapsfileparser.parse(survey)

            for qobject in survey.questionnaire.qobjects:
                qobject.setup.setup()
                qobject.setup.validate()

        except:
            log.error(
                _("Caught an Exception while parsing the SDAPS file. The current state is:"
                  ))
            print >> sys.stderr, unicode(survey.questionnaire)
            print >> sys.stderr, "------------------------------------"

            raise

        # Parse additionalqobjects
        if additionalqobjects:
            additionalparser.parse(survey, additionalqobjects)

        # Last but not least calculate the survey id
        survey.calculate_survey_id()

        if not survey.check_settings():
            log.error(
                _("Some combination of options and project properties do not work. Aborted Setup."
                  ))
            shutil.rmtree(survey.path())
            return 1

        # We need to now rebuild everything so that the correct ID is at the bottom
        # Dissable draft mode if the survey doesn't have questionnaire IDs
        latex.write_override(survey,
                             survey.path('sdaps.opt'),
                             draft=survey.defs.print_questionnaire_id)
        print _("Running %s now twice to generate the questionnaire."
                ) % defs.latex_engine
        os.remove(survey.path('questionnaire.pdf'))
        latex.compile('questionnaire.tex', survey.path())

        if not os.path.exists(survey.path('questionnaire.pdf')):
            print _("Error running \"%s\" to compile the LaTeX file."
                    ) % defs.latex_engine
            raise AssertionError('PDF file not generated')

        # Print the result
        print survey.title

        for item in survey.info.items():
            print u'%s: %s' % item

        print unicode(survey.questionnaire)

        log.logfile.open(survey.path('log'))

        survey.save()
        log.logfile.close()
    except:
        log.error(
            _("An error occured in the setup routine. The survey directory still exists. You can for example check the questionnaire.log file for LaTeX compile errors."
              ))
        raise