def questionnaires_in_dir(self, image_directory, pattern=ScanFile.scan_fn_pat, move_bad=True): """ given a directory and a file pattern, return a list of the questionnaires whose images are in the directory Args: image_directory: the directory to search for questionnaire files pattern: the regular expression to use to extract questionnaire files (defaults to ScanFile.scan_fn_pat) Returns: a list of ScanFile objects, one for each file found in the directory """ files = os.listdir(os.path.expanduser(image_directory)) okfiles = filter(lambda x: re.match(pattern, os.path.splitext(x)[0], re.IGNORECASE), files) badfiles = set(files) - set(okfiles) if move_bad: for bf in badfiles: # this really shouldn't happen, because the filenames get checked # when they are harvested from the ftp upload directories to the raw # scans directories se.move_file(os.path.join(os.path.expanduser(image_directory), bf), self.scandirs["staging_error"]) msg = "{} is not a well-formed filename! Moving to staging error directory...".format(bf) ScanFile.tracker.create_issue(title="bad filename", message_text=msg) logger.error(msg) resfiles = [] for f in okfiles: try: thissf = ScanFile(os.path.join(os.path.expanduser(image_directory), f)) except BaseException, obj: msg = "error converting {} : {}".format(f, obj) if move_bad: se.move_file(os.path.join(os.path.expanduser(image_directory), f), self.scandirs["staging_error"]) ScanFile.tracker.create_issue(title="error converting", message_text=msg) logger.error(msg) resfiles.append(thissf)
def stage_files(self, qs, stage_root_dir="~/Dropbox/brazil/scans-staging"): """ route the scanned images in a given directory to appropriate staging directories and figure out which paths they should take through captricity stage_files checks the list of already-staged files, using secondEntry.config.read_already_staged(), and only stages files that are not in that list. once it is finished, it updates the list to reflect the files it just staged Args: qs: a list of ScanFile objects, one per questionnaire to be split stage_root_dir: the root directory where the staged pdfs should be sent Returns: staged, not_staged where staged is a list of ScanFile objects that were successfully staged and not_staged is a lits of ScanFile objects that could not be staged. """ # read files that have already been staged already_staged = se.config.read_already_staged(self.scandirs["already_staged"]) staged = [] not_staged = [q for q in qs if q.filename in already_staged] qs = [q for q in qs if q.filename not in already_staged] for q in qs: try: q.split_pdf(os.path.expanduser(stage_root_dir)) staged.append(q) except BaseException, msg: se.move_file(q.fullpath, self.scandirs["staging_error"]) not_staged.append(q) msg = "ERROR splitting pdf {}: {}; moved to error directory".format(q.fullpath, str(msg)) title = "error splitting pdf {}".format(q.filename) ScanFile.tracker.create_issue(title=title, message_text=msg) logger.error(msg)
this_file = this_scanfile.filename + this_scanfile.ext this_fullpath = this_scanfile.fullpath this_iset_name = "{} - {}".format(this_job_name, this_file) # we should copy the file here after it has been uploaded this_donefile = os.path.join(uploaded_root_dir, "questionnaires", section + "_" + value, this_file) try: # TODO - consider making uploading files / creating isets # part of client class instead of router... this_iset = client.create_instance_sets( this_job["id"], {"name": this_iset_name, "multipage_file": open(this_fullpath)} ) # move uploaded file out of staging directory... se.move_file(this_fullpath, this_donefile) except IOError, ioe: msg = "error uploading file {}; copied to error directory".format(this_fullpath) logger.error(msg) ScanFile.tracker.create_issue(title="error uploading pdf", message_text=msg) se.move_file(this_fullpath, errdir) logger.info("finished creating jobs") self.unstarted_jobs = self.unstarted_jobs + new_jobs return new_jobs def start_jobs(self): """