def move_files_to_s3(s3helper, directory_name):
    for file_name in glob.glob(os.path.join(directory_name, '*')):
        (root_directory_name, tail_directory_name) = os.path.split(directory_name)
        (root_file_name, tail_file_name) = os.path.split(file_name)
        key = get_stats_archive_key(correct(tail_directory_name), tail_file_name)
        LOG.info('Adding {0} to {1}'.format(file_name, key))
        s3helper.add_file_to_bucket(get_archive_bucket(), key, file_name)

    shutil.rmtree(directory_name, ignore_errors=True)
def move_files_to_s3(s3helper, directory_name):
    for file_name in glob.glob(os.path.join(directory_name, '*')):
        (root_directory_name,
         tail_directory_name) = os.path.split(directory_name)
        (root_file_name, tail_file_name) = os.path.split(file_name)
        key = get_stats_archive_key(correct(tail_directory_name),
                                    tail_file_name)
        LOG.info('Adding {0} to {1}'.format(file_name, key))
        s3helper.add_file_to_bucket(get_archive_bucket(), key, file_name)

    shutil.rmtree(directory_name, ignore_errors=True)
Example #3
0
def access_s3():
    """
    Check we can access the archive bucket
    :return:
    """
    try:
        s3helper = S3Helper()
        bucket = s3helper.get_bucket(get_archive_bucket())
        LOG.info('Access S3 bucket name: {0}'.format(bucket.name))
    except Exception:
        LOG.exception('check_database_connection')
        return False

    return True
Example #4
0
def access_s3():
    """
    Check we can access the archive bucket
    :return:
    """
    try:
        s3helper = S3Helper()
        bucket = s3helper.get_bucket(get_archive_bucket())
        LOG.info("Access S3 bucket name: {0}".format(bucket.name))
    except Exception:
        LOG.exception("check_database_connection")
        return False

    return True
Example #5
0
def access_s3():
    """
    Check we can access the archive bucket
    :return:
    """
    try:
        LOG.info('Testing S3 access')
        s3helper = S3Helper()
        bucket = s3helper.get_bucket(get_archive_bucket())
        LOG.info('Access S3 bucket name: {0}'.format(bucket.name))
    except Exception:
        LOG.exception('access_s3')
        return False

    return True
Example #6
0
def access_s3():
    """
    Check we can access the archive bucket
    :return:
    """
    try:
        LOG.info('Testing S3 access')
        s3helper = S3Helper()
        bucket = s3helper.get_bucket(get_archive_bucket())
        LOG.info('Access S3 bucket name: {0}'.format(bucket.name))
    except Exception:
        LOG.exception('access_s3')
        return False

    return True
args = vars(parser.parse_args())

if args['option'] == 'boinc':
    LOG.info('PYTHONPATH = {0}'.format(sys.path))
    # We're running from the BOINC server
    process_boinc()
else:
    # We're running from a specially created AMI
    filename, full_filename = get_ami_log_file('archive_boinc_stats')
    add_file_handler_to_root(full_filename)
    LOG.info('PYTHONPATH = {0}'.format(sys.path))
    LOG.info('About to perform sanity checks')
    if pass_sanity_checks():
        process_ami()
    else:
        LOG.error('Failed to pass sanity tests')

    # Try copying the log file to S3
    try:
        LOG.info('About to copy the log file')
        s3helper = S3Helper()
        s3helper.add_file_to_bucket(get_archive_bucket(), get_log_archive_key('archive_boinc_stats', filename), full_filename, True)
        os.remove(full_filename)
    except:
        LOG.exception('Failed to copy the log file')

    ec2_helper = EC2Helper()
    ec2_helper.release_public_ip()

LOG.INFO('All done')
Example #8
0
parser.add_argument('option', choices=['boinc', 'ami'], help='are we running on the BOINC server or the AMI server')
args = vars(parser.parse_args())

if args['option'] == 'boinc':
    LOG.info('PYTHONPATH = {0}'.format(sys.path))
    # We're running from the BOINC server
    original_image_checked_boinc()
else:
    # We're running from a specially created AMI
    log_name = 'original_image_checked'
    filename, full_filename = get_ami_log_file(log_name)
    add_file_handler_to_root(full_filename)
    LOG.info('PYTHONPATH = {0}'.format(sys.path))
    LOG.info('About to perform sanity checks')
    if pass_sanity_checks():
        original_image_checked_ami()
    else:
        LOG.error('Failed to pass sanity tests')

    # Try copying the log file to S3
    try:
        LOG.info('About to copy the log file')
        s3helper = S3Helper()
        s3helper.add_file_to_bucket(get_archive_bucket(), get_log_archive_key(log_name, filename), full_filename, True)
        os.remove(full_filename)
    except:
        LOG.exception('Failed to copy the log file')


LOG.INFO('All done')
Example #9
0
def get_data(output_directory):
    """
    Get the stats from the S3 archive and build the csv files
    :param output_directory: where to store the files
    :return:
    """
    done_dates = get_done_dates()

    # Now get ready to load the files
    keys_being_restored = []
    s3helper = S3Helper()
    bucket = s3helper.get_bucket(get_archive_bucket())
    set_filenames = set()
    for prefix in bucket.list(prefix='stats/', delimiter='/'):
        elements = prefix.name.split('/')
        elements = elements[1].split('_')
        date_file = date(int(elements[1]), int(elements[2]), int(elements[3]))
        if date_file not in done_dates:
            stats_file = '{0}_{1}_{2}_user.gz'.format(elements[1], elements[2],
                                                      elements[3])
            full_filename = os.path.join(output_directory, stats_file)
            if full_filename in set_filenames:
                # Ignore
                pass
            elif not os.path.exists(full_filename) or os.path.getsize(
                    full_filename) == 9:
                set_filenames.add(full_filename)
                key = bucket.get_key(os.path.join(prefix.name, 'user.gz'))
                if key is not None:
                    if key.ongoing_restore or key.storage_class == 'GLACIER':
                        LOG.info('Restoring {0}'.format(key.name))
                        # We need retrieve it
                        if not key.ongoing_restore:
                            key.restore(days=5)
                        keys_being_restored.append([key.name, full_filename])

                        # Put an empty file in the directory
                        if not os.path.exists(full_filename):
                            output_file = open(full_filename, "wb")
                            output_file.write('Restoring')
                            output_file.close()
                    else:
                        # Put the file in the storage area
                        LOG.info('Fetching {0}'.format(key.name))
                        key.get_contents_to_filename(full_filename)

    # Now we have to wait for all the files we need to be restored
    for key_pair in keys_being_restored:
        key = bucket.get_key(key_pair[0])
        if key.ongoing_restore:
            time.sleep(300)
        else:
            # The file has been restored so copy it
            LOG.info('Fetching {0}'.format(key_pair[0]))
            key.get_contents_to_filename(key_pair[1])

    # Build the prepared statements
    insert_usage = USAGE.insert()
    insert_individual = INDIVIDUAL.insert()

    # Now build up the list of filenames
    for file_name in glob.glob(os.path.join(output_directory, '*_user.gz')):
        (head, tail) = os.path.split(file_name)
        elements = tail.split('_')
        date_file = date(int(elements[0]), int(elements[1]), int(elements[2]))

        if date_file not in done_dates:
            # Read the contents
            LOG.info('Processing {0}'.format(file_name))
            gzip_file = gzip.open(file_name, 'rb')
            contents = gzip_file.read()
            gzip_file.close()

            # Extract the XML data
            root = ET.fromstring(contents)

            # Initialise
            gflops = 0.0
            active_users = 0
            registered_users = 0
            transaction = connection.begin()

            # The users are in a random order
            for user in root:
                user_id = user.find('id').text
                user_id = int(user_id)

                expavg_credit = user.find('expavg_credit').text
                expavg_credit = float(expavg_credit)

                connection.execute(insert_individual,
                                   date=date_file,
                                   user_id=user_id,
                                   expavg_credit=expavg_credit)

                registered_users += 1

                if expavg_credit > 1:
                    active_users += 1

                gflops += expavg_credit

            connection.execute(insert_usage,
                               date=date_file,
                               gflops=gflops / COBBLESTONE_FACTOR,
                               active_users=active_users,
                               registered_users=registered_users)
            transaction.commit()
Example #10
0
args = vars(parser.parse_args())

if args['option'] == 'boinc':
    LOG.info('PYTHONPATH = {0}'.format(sys.path))
    # We're running from the BOINC server
    original_image_checked_boinc()
else:
    # We're running from a specially created AMI
    log_name = 'original_image_checked'
    filename, full_filename = get_ami_log_file(log_name)
    add_file_handler_to_root(full_filename)
    LOG.info('PYTHONPATH = {0}'.format(sys.path))
    LOG.info('About to perform sanity checks')
    if pass_sanity_checks():
        original_image_checked_ami()
    else:
        LOG.error('Failed to pass sanity tests')

    # Try copying the log file to S3
    try:
        LOG.info('About to copy the log file')
        s3helper = S3Helper()
        s3helper.add_file_to_bucket(get_archive_bucket(),
                                    get_log_archive_key(log_name, filename),
                                    full_filename, True)
        os.remove(full_filename)
    except:
        LOG.exception('Failed to copy the log file')

LOG.INFO('All done')
Example #11
0
def get_data(output_directory):
    """
    Get the stats from the S3 archive and build the csv files
    :param output_directory: where to store the files
    :return:
    """
    done_dates = get_done_dates()

    # Now get ready to load the files
    keys_being_restored = []
    s3helper = S3Helper()
    bucket = s3helper.get_bucket(get_archive_bucket())
    set_filenames = set()
    for prefix in bucket.list(prefix='stats/', delimiter='/'):
        elements = prefix.name.split('/')
        elements = elements[1].split('_')
        date_file = date(int(elements[1]), int(elements[2]), int(elements[3]))
        if date_file not in done_dates:
            stats_file = '{0}_{1}_{2}_user.gz'.format(elements[1], elements[2], elements[3])
            full_filename = os.path.join(output_directory, stats_file)
            if full_filename in set_filenames:
                # Ignore
                pass
            elif not os.path.exists(full_filename) or os.path.getsize(full_filename) == 9:
                set_filenames.add(full_filename)
                key = bucket.get_key(os.path.join(prefix.name, 'user.gz'))
                if key is not None:
                    if key.ongoing_restore or key.storage_class == 'GLACIER':
                        LOG.info('Restoring {0}'.format(key.name))
                        # We need retrieve it
                        if not key.ongoing_restore:
                            key.restore(days=5)
                        keys_being_restored.append([key.name, full_filename])

                        # Put an empty file in the directory
                        if not os.path.exists(full_filename):
                            output_file = open(full_filename, "wb")
                            output_file.write('Restoring')
                            output_file.close()
                    else:
                        # Put the file in the storage area
                        LOG.info('Fetching {0}'.format(key.name))
                        key.get_contents_to_filename(full_filename)

    # Now we have to wait for all the files we need to be restored
    for key_pair in keys_being_restored:
        key = bucket.get_key(key_pair[0])
        if key.ongoing_restore:
            time.sleep(300)
        else:
            # The file has been restored so copy it
            LOG.info('Fetching {0}'.format(key_pair[0]))
            key.get_contents_to_filename(key_pair[1])

    # Build the prepared statements
    insert_usage = USAGE.insert()
    insert_individual = INDIVIDUAL.insert()

    # Now build up the list of filenames
    for file_name in glob.glob(os.path.join(output_directory, '*_user.gz')):
        (head, tail) = os.path.split(file_name)
        elements = tail.split('_')
        date_file = date(int(elements[0]), int(elements[1]), int(elements[2]))

        if date_file not in done_dates:
            # Read the contents
            LOG.info('Processing {0}'.format(file_name))
            gzip_file = gzip.open(file_name, 'rb')
            contents = gzip_file.read()
            gzip_file.close()

            # Extract the XML data
            root = ET.fromstring(contents)

            # Initialise
            gflops = 0.0
            active_users = 0
            registered_users = 0
            transaction = connection.begin()

            # The users are in a random order
            for user in root:
                user_id = user.find('id').text
                user_id = int(user_id)

                expavg_credit = user.find('expavg_credit').text
                expavg_credit = float(expavg_credit)

                connection.execute(insert_individual, date=date_file, user_id=user_id, expavg_credit=expavg_credit)

                registered_users += 1

                if expavg_credit > 1:
                    active_users += 1

                gflops += expavg_credit

            connection.execute(insert_usage, date=date_file, gflops=gflops / COBBLESTONE_FACTOR, active_users=active_users, registered_users=registered_users)
            transaction.commit()