예제 #1
0
파일: s3ingest.py 프로젝트: rmauge/S-3PO
def main(argv):
    parser = argparse.ArgumentParser(description='Upload assets to Amazon')
    parser.add_argument('--config',
                        dest='config_filename',
                        action='store',
                        default=CONFIG_FILE,
                        help='optional custom configuration filename')
    parser.add_argument('--node',
                        dest='node_name_override',
                        action='store',
                        default=False,
                        help='optional override for the pid-id specified in the config file')
    parameters = parser.parse_args()

    current_defaults_filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), parameters.config_filename)
    config = Config(path=current_defaults_filename)
    global access_key_id
    global secret_access_key
    access_key_id = config.get('Amazon', 'aws_access_key_id')
    secret_access_key = config.get('Amazon', 'aws_secret_access_key')
    log_file_path = config.get('General', 'log_file_path', '/var/log/s3ingest.log')
    log_level = config.getint('General', 'log_level', 20)
    target_bucket_name = config.get('Amazon', 's3_bucket_name')
    monitored_dir_name = config.get('General', 'monitored_directory')
    worker_threads = config.getint('General', 'worker_threads', 5)
    pid_file_path = config.get('General', 'pid_file_path', './s3ingest.semaphore')
    if not parameters.node_name_override:
        pid_id = config.get('General', 'pid_id').rstrip()
    else:
        pid_id = parameters.node_name_override.rstrip()
    HEART_BEAT_TIME_SECS = config.getint('General', 'heart_beat_time_secs', 300)
    MIN_MODIFIED_INTERVAL_SECS = 3600 # 3600 secs = 1 hr. Keep high to allow time for large files to upload and reduce false positives

    if not os.path.exists(monitored_dir_name):
        print "The directory to be monitored '{0}' does not exist".format(monitored_dir_name)
        sys.exit(1)

    logging.basicConfig(filename=log_file_path, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=log_level)
    mailhost = config.get('Mail', 'mailhost')
    fromaddr = config.get('Mail', 'fromaddr')
    toaddrs = config.get('Mail', 'toaddrs')
    smtp_handler = handlers.SMTPHandler(mailhost, fromaddr, toaddrs, 'S3Util error occurred')
    smtp_handler.setLevel(logging.ERROR)
    logging.getLogger().addHandler(smtp_handler)
    
    s3_util = S3Util(access_key_id, secret_access_key)

    s3_util.set_target_bucket_name(target_bucket_name)
    signal.signal(signal.SIGINT, s3_util.signal_handler)
    signal.signal(signal.SIGTERM, s3_util.signal_handler)

    # Check for pid file and create if not found
    if not os.path.exists(pid_file_path):
        pid_file = open(pid_file_path, "w+")
        fcntl.flock(pid_file.fileno(), fcntl.LOCK_EX)
        pid_file.write(str(pid_id))
        fcntl.flock(pid_file.fileno(), fcntl.LOCK_UN)
        pid_file.close()

    s3_util.start_monitoring(monitored_dir_name)

    logging.debug("Starting worker threads")
    for i in range(worker_threads):
        t = S3Uploader(s3_util)
        t.setDaemon(True)
        t.start()

    logging.debug("Worker threads started")

    while True:
        pid_file = open(pid_file_path, "r+")
        logging.debug("Waiting for lock")
        fcntl.flock(pid_file.fileno(), fcntl.LOCK_SH)
        logging.debug("Acquired lock")
        current_pid = pid_file.readline().rstrip()
        st = os.stat(pid_file_path)
        now = time.time()
        pid_modified_time = st[stat.ST_MTIME]
        logging.debug("pid file: {0}, current_host: {1}".format(current_pid, pid_id))
        if pid_id == current_pid:
            logging.debug("State - Active")
            os.utime(pid_file_path, None)
            s3_util.set_active(True)
            # Find files have been unmodified for a defined threshold and assume that they need to be queued
            for dirpath, dirnames, filenames in os.walk(monitored_dir_name):
                for name in filenames:
                    file_path = os.path.normpath(os.path.join(dirpath, name))
                    last_modifed_time = os.path.getmtime(file_path)
                    if ((now - last_modifed_time) > MIN_MODIFIED_INTERVAL_SECS and not
                        (s3_util.is_queued(file_path) or s3_util.is_currently_processing(file_path))):
                        logging.info("Directory scan found file '{0}' older than {1} seconds and added to queue".format(file_path, (now - last_modifed_time)))
                        s3_util.add_to_queue(file_path)
        else:
            if now - pid_modified_time > HEART_BEAT_TIME_SECS:
                logging.debug("Stale pid file found, setting state - Active")
                pid_file.truncate(0)
                pid_file.seek(0)
                pid_file.write(str(pid_id))
                s3_util.set_active(True)
            else:
                logging.debug("State - Inactive")
                s3_util.set_active(False)
        fcntl.flock(pid_file.fileno(), fcntl.LOCK_UN)
        logging.debug("Released lock")
        pid_file.close()
        #Play nice
        sleep(5)

    s3_util.wait_for_completion()
    logging.debug("Exiting")
    sys.exit(0)