Esempio n. 1
0
    def questionnaires_in_dir(self, image_directory, pattern=ScanFile.scan_fn_pat, move_bad=True):
        """
      given a directory and a file pattern, return a list of the questionnaires whose images
      are in the directory

      Args:
        image_directory: the directory to search for questionnaire files
        pattern: the regular expression to use to extract questionnaire files
                 (defaults to ScanFile.scan_fn_pat)

      Returns:
        a list of ScanFile objects, one for each file found in the directory
      """

        files = os.listdir(os.path.expanduser(image_directory))

        okfiles = filter(lambda x: re.match(pattern, os.path.splitext(x)[0], re.IGNORECASE), files)

        badfiles = set(files) - set(okfiles)

        if move_bad:
            for bf in badfiles:
                # this really shouldn't happen, because the filenames get checked
                # when they are harvested from the ftp upload directories to the raw
                # scans directories
                se.move_file(os.path.join(os.path.expanduser(image_directory), bf), self.scandirs["staging_error"])
                msg = "{} is not a well-formed filename! Moving to staging error directory...".format(bf)
                ScanFile.tracker.create_issue(title="bad filename", message_text=msg)
                logger.error(msg)

        resfiles = []

        for f in okfiles:
            try:
                thissf = ScanFile(os.path.join(os.path.expanduser(image_directory), f))
            except BaseException, obj:
                msg = "error converting {} : {}".format(f, obj)
                if move_bad:
                    se.move_file(os.path.join(os.path.expanduser(image_directory), f), self.scandirs["staging_error"])
                ScanFile.tracker.create_issue(title="error converting", message_text=msg)
                logger.error(msg)

            resfiles.append(thissf)
Esempio n. 2
0
    def stage_files(self, qs, stage_root_dir="~/Dropbox/brazil/scans-staging"):
        """
        route the scanned images in a given directory to appropriate staging
        directories and figure out which paths they should take through captricity

        stage_files checks the list of already-staged files, using
        secondEntry.config.read_already_staged(), and only stages files
        that are not in that list. once it is finished, it updates the list
        to reflect the files it just staged

        Args:
            qs: a list of ScanFile objects, one per questionnaire to be split
            stage_root_dir: the root directory where the staged pdfs should be sent

        Returns:
            staged, not_staged
            where staged is a list of ScanFile objects that were successfully staged
            and not_staged is a lits of ScanFile objects that could not be staged.
        """

        # read files that have already been staged
        already_staged = se.config.read_already_staged(self.scandirs["already_staged"])

        staged = []

        not_staged = [q for q in qs if q.filename in already_staged]
        qs = [q for q in qs if q.filename not in already_staged]

        for q in qs:
            try:
                q.split_pdf(os.path.expanduser(stage_root_dir))
                staged.append(q)

            except BaseException, msg:
                se.move_file(q.fullpath, self.scandirs["staging_error"])
                not_staged.append(q)

                msg = "ERROR splitting pdf {}: {}; moved to error directory".format(q.fullpath, str(msg))
                title = "error splitting pdf {}".format(q.filename)
                ScanFile.tracker.create_issue(title=title, message_text=msg)
                logger.error(msg)
Esempio n. 3
0
                    this_file = this_scanfile.filename + this_scanfile.ext
                    this_fullpath = this_scanfile.fullpath
                    this_iset_name = "{} - {}".format(this_job_name, this_file)

                    # we should copy the file here after it has been uploaded
                    this_donefile = os.path.join(uploaded_root_dir, "questionnaires", section + "_" + value, this_file)

                    try:
                        # TODO - consider making uploading files / creating isets
                        #        part of client class instead of router...
                        this_iset = client.create_instance_sets(
                            this_job["id"], {"name": this_iset_name, "multipage_file": open(this_fullpath)}
                        )

                        # move uploaded file out of staging directory...
                        se.move_file(this_fullpath, this_donefile)

                    except IOError, ioe:
                        msg = "error uploading file {}; copied to error directory".format(this_fullpath)
                        logger.error(msg)
                        ScanFile.tracker.create_issue(title="error uploading pdf", message_text=msg)
                        se.move_file(this_fullpath, errdir)

        logger.info("finished creating jobs")

        self.unstarted_jobs = self.unstarted_jobs + new_jobs

        return new_jobs

    def start_jobs(self):
        """