def main(argv): parser = argparse.ArgumentParser(description='Upload assets to Amazon') parser.add_argument('--config', dest='config_filename', action='store', default=CONFIG_FILE, help='optional custom configuration filename') parser.add_argument('--node', dest='node_name_override', action='store', default=False, help='optional override for the pid-id specified in the config file') parameters = parser.parse_args() current_defaults_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), parameters.config_filename) config = Config(path=current_defaults_filename) global access_key_id global secret_access_key access_key_id = config.get('Amazon', 'aws_access_key_id') secret_access_key = config.get('Amazon', 'aws_secret_access_key') log_file_path = config.get('General', 'log_file_path', '/var/log/s3ingest.log') log_level = config.getint('General', 'log_level', 20) target_bucket_name = config.get('Amazon', 's3_bucket_name') monitored_dir_name = config.get('General', 'monitored_directory') worker_threads = config.getint('General', 'worker_threads', 5) pid_file_path = config.get('General', 'pid_file_path', './s3ingest.semaphore') if not parameters.node_name_override: pid_id = config.get('General', 'pid_id').rstrip() else: pid_id = parameters.node_name_override.rstrip() HEART_BEAT_TIME_SECS = config.getint('General', 'heart_beat_time_secs', 300) MIN_MODIFIED_INTERVAL_SECS = 3600 # 3600 secs = 1 hr. Keep high to allow time for large files to upload and reduce false positives if not os.path.exists(monitored_dir_name): print "The directory to be monitored '{0}' does not exist".format(monitored_dir_name) sys.exit(1) logging.basicConfig(filename=log_file_path, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=log_level) mailhost = config.get('Mail', 'mailhost') fromaddr = config.get('Mail', 'fromaddr') toaddrs = config.get('Mail', 'toaddrs') smtp_handler = handlers.SMTPHandler(mailhost, fromaddr, toaddrs, 'S3Util error occurred') smtp_handler.setLevel(logging.ERROR) logging.getLogger().addHandler(smtp_handler) s3_util = S3Util(access_key_id, secret_access_key) s3_util.set_target_bucket_name(target_bucket_name) signal.signal(signal.SIGINT, s3_util.signal_handler) signal.signal(signal.SIGTERM, s3_util.signal_handler) # Check for pid file and create if not found if not os.path.exists(pid_file_path): pid_file = open(pid_file_path, "w+") fcntl.flock(pid_file.fileno(), fcntl.LOCK_EX) pid_file.write(str(pid_id)) fcntl.flock(pid_file.fileno(), fcntl.LOCK_UN) pid_file.close() s3_util.start_monitoring(monitored_dir_name) logging.debug("Starting worker threads") for i in range(worker_threads): t = S3Uploader(s3_util) t.setDaemon(True) t.start() logging.debug("Worker threads started") while True: pid_file = open(pid_file_path, "r+") logging.debug("Waiting for lock") fcntl.flock(pid_file.fileno(), fcntl.LOCK_SH) logging.debug("Acquired lock") current_pid = pid_file.readline().rstrip() st = os.stat(pid_file_path) now = time.time() pid_modified_time = st[stat.ST_MTIME] logging.debug("pid file: {0}, current_host: {1}".format(current_pid, pid_id)) if pid_id == current_pid: logging.debug("State - Active") os.utime(pid_file_path, None) s3_util.set_active(True) # Find files have been unmodified for a defined threshold and assume that they need to be queued for dirpath, dirnames, filenames in os.walk(monitored_dir_name): for name in filenames: file_path = os.path.normpath(os.path.join(dirpath, name)) last_modifed_time = os.path.getmtime(file_path) if ((now - last_modifed_time) > MIN_MODIFIED_INTERVAL_SECS and not (s3_util.is_queued(file_path) or s3_util.is_currently_processing(file_path))): logging.info("Directory scan found file '{0}' older than {1} seconds and added to queue".format(file_path, (now - last_modifed_time))) s3_util.add_to_queue(file_path) else: if now - pid_modified_time > HEART_BEAT_TIME_SECS: logging.debug("Stale pid file found, setting state - Active") pid_file.truncate(0) pid_file.seek(0) pid_file.write(str(pid_id)) s3_util.set_active(True) else: logging.debug("State - Inactive") s3_util.set_active(False) fcntl.flock(pid_file.fileno(), fcntl.LOCK_UN) logging.debug("Released lock") pid_file.close() #Play nice sleep(5) s3_util.wait_for_completion() logging.debug("Exiting") sys.exit(0)