def open(self, relative_path): path = os.path.join(self.input_dir(), relative_path) work = os.path.join(self.work_dir(), relative_path) move_file(path, work) self.files.append(relative_path) f = open(work) return f
def mv_files_from_work_dir(self, to_dir): """Move current endpoints files from work dir to to_dir.""" # Move the endpoint files for p in self.files: work = os.path.join(self.work_dir(), p) target = os.path.join(to_dir, p) move_file(work, target) self.files = []
def open(self, relative_path): path = os.path.join( self.input_dir(), relative_path ) work = os.path.join(self.work_dir(), relative_path) move_file( path, work ) self.files.append(relative_path) f = open(work) return f
def process_recursively(self, path=""): """Recusively inspect :attribute:`BaseConfig.input_dir` and process files using BFS Recursivly inspect :attribute:`BaseConfig.input_dir`, loads builder class through :method:`BaseConfig.load_builder` and run processing""" instances = None if hasattr(self, 'postprocess'): instances = [] log.info(u'process_recursively %s' % path) input, work, error, done = self.paths(path) if not os.path.exists(work): os.makedirs(work) if not os.path.exists(error): os.makedirs(error) if not os.path.exists(done): os.makedirs(done) # input_dir should exists log.info(u'work_path %s' % work) for f in self.listdir(input): # Relative file path from current path partial_file_path = os.path.join(path, f) # Absolute file path input_file_path = os.path.join(input, f) # For now, do not process non utf-8 file names #FIXME if not is_utf8(f): error_file_path = os.path.join(self.error_dir(), f) move_file(input_file_path, error_file_path) if os.path.isdir(input_file_path): self.process_recursively(partial_file_path) else: if not os.path.exists(input_file_path): # the file might have been already moved # by a nested builder continue # --- Check file age # Idea is to prevent from processing a file too much recent, to # avoid processing file while they are downloaded in input dir # and to minimize risk of missing dependency files # If you don't care about this, just do not set it in settings min_age = self.QUARANTINE # seconds if min_age > 0: st_mtime = os.stat(input_file_path).st_mtime age = time() - st_mtime if age < min_age: log.info(u"Skipping too recent file %s" % force_unicode(input_file_path)) continue # --- Load and process builder for file builder = self.load_builder(partial_file_path) if builder is None: log.info(u'skip file %s' % force_unicode(input_file_path)) continue else: log.info(u'match %s' % force_unicode(partial_file_path)) if not self.dryrun: try: new_instances, unhandled_errors = builder.process_and_save( ) except StopConfig, e: # this is a user controlled exception msg = u'Import stopped for %s' % self log.warning(msg, exc_info=sys.exc_info()) to_dir = self.error_dir() break except PostponeBuilder, e: # Implementor as asked to postpone current process msg = u'Builder postponed for %s' % self log.warning(msg, exc_info=sys.exc_info()) # Do not move files, keep them for next run to_dir = self.input_dir() continue # to next file except Exception, e: msg = u'builder processing of %s failed' % input_file_path log.error(msg, exc_info=sys.exc_info()) to_dir = self.error_dir() else: if hasattr(self, 'postprocess') and new_instances: instances.append(new_instances) to_dir = unhandled_errors and self.error_dir() \ or self.done_dir() finally:
if not self.dryrun: # Here is the simplest implementation to manage secondary files # i.e. files that has not been endpoint files # These files could have been used has dependency file, by one or # more import # We need to manage to cases: # - the case of a file that is a dependency of two endpoints files # - the case of a file that is a dependency of a endpoint file that # has gone in error # Both these cases should better be handled with transaction, but # we consider that the transaction implementation in Django is not # enouth advanced for these complex cases (m2m, post_save, etc.) # (See for example ticket #14051 in Django Trac) # When the Implementor has used Config.open to manage these files, # they already have been moved away for f in self.listdir(input): input_file_path = os.path.join(input, f) done_file_path = os.path.join(done, f) if os.path.isdir(input_file_path): continue grace_period = self.GRACE_PERIOD st_mtime = os.stat(input_file_path).st_mtime age = time() - st_mtime if age > grace_period: log.info(u"Removing old file from input dir: %s" % force_unicode(input_file_path)) move_file(input_file_path, done_file_path) if hasattr(self, 'postprocess'): self.postprocess(instances)
def process_recursively(self, path=""): """Recusively inspect :attribute:`BaseConfig.input_dir` and process files using BFS Recursivly inspect :attribute:`BaseConfig.input_dir`, loads builder class through :method:`BaseConfig.load_builder` and run processing""" instances = None if hasattr(self, 'postprocess'): instances = [] log.info(u'process_recursively %s' % path) input, work, error, done = self.paths(path) if not os.path.exists(work): os.makedirs(work) if not os.path.exists(error): os.makedirs(error) if not os.path.exists(done): os.makedirs(done) # input_dir should exists log.info(u'work_path %s' % work) for f in self.listdir(input): # Relative file path from current path partial_file_path = os.path.join(path, f) # Absolute file path input_file_path = os.path.join(input, f) # For now, do not process non utf-8 file names #FIXME if not is_utf8(f): error_file_path = os.path.join(self.error_dir(), f) move_file(input_file_path, error_file_path) if os.path.isdir(input_file_path): self.process_recursively(partial_file_path) else: if not os.path.exists(input_file_path): # the file might have been already moved # by a nested builder continue # --- Check file age # Idea is to prevent from processing a file too much recent, to # avoid processing file while they are downloaded in input dir # and to minimize risk of missing dependency files # If you don't care about this, just do not set it in settings min_age = self.QUARANTINE # seconds if min_age > 0: st_mtime = os.stat(input_file_path).st_mtime age = time() - st_mtime if age < min_age: log.info(u"Skipping too recent file %s" % force_unicode(input_file_path)) continue # --- Load and process builder for file builder = self.load_builder(partial_file_path) if builder is None: log.info(u'skip file %s' % force_unicode(input_file_path)) continue else: log.info(u'match %s' % force_unicode(partial_file_path)) if not self.dryrun: try: new_instances, unhandled_errors = builder.process_and_save() except StopConfig, e: # this is a user controlled exception msg = u'Import stopped for %s' % self log.warning(msg, exc_info=sys.exc_info()) to_dir = self.error_dir() break except PostponeBuilder, e: # Implementor as asked to postpone current process msg = u'Builder postponed for %s' % self log.warning(msg, exc_info=sys.exc_info()) # Do not move files, keep them for next run to_dir = self.input_dir() continue # to next file except Exception, e: msg = u'builder processing of %s failed' % input_file_path log.error(msg, exc_info=sys.exc_info()) to_dir = self.error_dir() else: if hasattr(self, 'postprocess') and new_instances: instances.append(new_instances) to_dir = unhandled_errors and self.error_dir() \ or self.done_dir() finally:
# --- Clean old files from current input directory if not self.dryrun: # Here is the simplest implementation to manage secondary files # i.e. files that has not been endpoint files # These files could have been used has dependency file, by one or # more import # We need to manage to cases: # - the case of a file that is a dependency of two endpoints files # - the case of a file that is a dependency of a endpoint file that # has gone in error # Both these cases should better be handled with transaction, but # we consider that the transaction implementation in Django is not # enouth advanced for these complex cases (m2m, post_save, etc.) # (See for example ticket #14051 in Django Trac) # When the Implementor has used Config.open to manage these files, # they already have been moved away for f in self.listdir(input): input_file_path = os.path.join(input, f) done_file_path = os.path.join(done, f) if os.path.isdir(input_file_path): continue grace_period = self.GRACE_PERIOD st_mtime = os.stat(input_file_path).st_mtime age = time() - st_mtime if age > grace_period: log.info(u"Removing old file from input dir: %s" % force_unicode(input_file_path)) move_file(input_file_path, done_file_path) if hasattr(self, 'postprocess'): self.postprocess(instances)