Exemple #1
0
    def add_file_to_bucket_multipart(self, bucket_name, key_name, source_path, parallel_processes=2, reduced_redundancy=True):
        """
        Parallel multipart upload.
        """
        LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, parallel_processes: {3}, reduced_redundancy: {4}'.format(
            bucket_name, key_name, source_path, parallel_processes, reduced_redundancy))

        source_size = os.stat(source_path).st_size
        bytes_per_chunk = 10 * 1024 * 1024
        chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))
        if chunk_amount < 10000:
            bucket = self.get_bucket(bucket_name)

            headers = {'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream'}
            mp = bucket.initiate_multipart_upload(key_name, headers=headers, reduced_redundancy=reduced_redundancy)

            LOGGER.info('bytes_per_chunk: {0}, chunk_amount: {1}'.format(bytes_per_chunk, chunk_amount))

            # You can only upload 10,000 chunks
            pool = Pool(processes=parallel_processes)
            for i in range(chunk_amount):
                offset = i * bytes_per_chunk
                remaining_bytes = source_size - offset
                bytes_to_copy = min([bytes_per_chunk, remaining_bytes])
                part_num = i + 1
                pool.apply_async(upload_part, [self._aws_access_key_id, self._aws_secret_access_key, bucket_name, mp.id, part_num, source_path, offset, bytes_to_copy])
            pool.close()
            pool.join()

            if len(mp.get_all_parts()) == chunk_amount:
                mp.complete_upload()
            else:
                mp.cancel_upload()
        else:
            raise S3UploadException('Too many chunks')
Exemple #2
0
def main():
    parser = argparse.ArgumentParser('Start a number of CLEAN servers')
    parser.add_argument('-a', '--ami_id', help='the AMI id to use')
    parser.add_argument('-i', '--instance_type', required=True, help='the instance type to use')
    parser.add_argument('-c', '--created_by', help='the username to use')
    parser.add_argument('-n', '--name', required=True, help='the instance name to use')
    parser.add_argument('-s', '--spot_price', type=float, help='the spot price to use')
    parser.add_argument('-b', '--bash_script', help='the bash script to use')
    parser.add_argument('-e', '--ebs', type=int, help='the size in GB of any EBS volume')
    parser.add_argument('bottom_frequency', help='The bottom frequency')
    parser.add_argument('frequency_range', help='the range of frequencies')
    parser.add_argument('obs_id', help='the observation id')

    args = vars(parser.parse_args())

    corrected_args = check_args(args)
    if corrected_args is None:
        LOGGER.error('The arguments are incorrect: {0}'.format(args))
    else:
        start_servers(
            corrected_args['ami_id'],
            corrected_args['user_data'],
            corrected_args['setup_disks'],
            args['instance_type'],
            make_safe_filename(args['obs_id']),
            corrected_args['created_by'],
            args['name'],
            corrected_args['instance_details'],
            corrected_args['spot_price'],
            args['ebs'],
            args['bottom_frequency'],
            args['frequency_range'])
Exemple #3
0
 def _upload(retries_left=amount_of_retries):
     try:
         LOGGER.info(
             'Start uploading part: #{0}, source_path: {1}'.format(
                 part_num,
                 source_path
             )
         )
         conn = get_s3_connection(aws_access_key_id, aws_secret_access_key)
         bucket = conn.get_bucket(bucket_name)
         for mp in bucket.get_all_multipart_uploads():
             if mp.id == multipart_id:
                 with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes_to_copy) as fp:
                     mp.upload_part_from_file(fp=fp, part_num=part_num)
                 break
     except Exception, exc:
         if retries_left:
             _upload(retries_left=retries_left - 1)
         else:
             LOGGER.info(
                 'Failed uploading part: #{0}, source_path: {1}'.format(
                     part_num,
                     source_path
                 )
             )
             raise exc
Exemple #4
0
    def __call__(self):
        """
        Actually run the job
        """
        # Get the name of the volume
        ec2_helper = EC2Helper()
        iops = None
        if self._instance_details.iops_support:
            iops = 500

        zone = ec2_helper.get_cheapest_spot_price(self._instance_type, self._spot_price)
        if zone is not None:
            volume, snapshot_name = ec2_helper.create_volume(self._snapshot_id, zone, iops=iops)
            LOGGER.info('obs_id: {0}, volume_name: {1}'.format(self._obs_id, snapshot_name))
            user_data_mime = self.get_mime_encoded_user_data(volume.id)

            if self._spot_price is not None:
                ec2_helper.run_spot_instance(
                    self._ami_id,
                    self._spot_price,
                    user_data_mime,
                    self._instance_type,
                    volume.id,
                    self._created_by,
                    '{1}-{2}-{0}'.format(self._name, snapshot_name, self._counter),
                    self._instance_details,
                    zone,
                    ephemeral=True)
        else:
            LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(self._instance_type, self._spot_price))
def copy_files(processes, bottom_frequency, frequency_range):
    # Create the directory
    if not exists(DIRECTORY):
        os.makedirs(DIRECTORY)

    # Scan the bucket
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    LOGGER.info('Scanning bucket: {0}/CLEAN'.format(bucket))

    # Create the queue
    queue = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    for key in bucket.list(prefix='CLEAN/'):
        LOGGER.info('Checking {0}'.format(key.key))
        # Ignore the key
        if key.key.endswith('.image.tar.gz') or key.key.endswith('.image.tar'):
            # Do we need this file?
            basename_key = basename(key.key)
            if in_frequency_range(basename_key, bottom_frequency, frequency_range):
                # Queue the copy of the file
                temp_file = os.path.join(DIRECTORY, basename_key)
                queue.put(Task(key, temp_file, DIRECTORY))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
def copy_files(frequency_id):
    s3_helper = S3Helper()
    # Look in the output directory
    LOGGER.info('directory_data: {0}'.format(CHILES_CLEAN_OUTPUT))
    for dir_name in os.listdir(CHILES_CLEAN_OUTPUT):
        LOGGER.info('dir_name: {0}'.format(dir_name))
        result_dir = join(CHILES_CLEAN_OUTPUT, dir_name)
        if isdir(result_dir) and dir_name.startswith('cube_') and dir_name.endswith('.image'):
            LOGGER.info('dir_name: {0}'.format(dir_name))
            output_tar_filename = join(CHILES_CLEAN_OUTPUT, dir_name + '.tar')

            if can_be_multipart_tar(result_dir):
                LOGGER.info('Using add_tar_to_bucket_multipart')
                s3_helper.add_tar_to_bucket_multipart(
                    CHILES_BUCKET_NAME,
                    '/CLEAN/{0}/{1}'.format(frequency_id, basename(output_tar_filename)),
                    result_dir)
            else:
                LOGGER.info('Using make_tarfile, then adding file to bucket')
                make_tarfile(output_tar_filename, result_dir)

                s3_helper.add_file_to_bucket(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(frequency_id, basename(output_tar_filename)),
                    output_tar_filename)

                # Clean up
                os.remove(output_tar_filename)
Exemple #7
0
def main():
    parser = argparse.ArgumentParser('Start a number of CVEL servers')
    parser.add_argument('-a', '--ami_id', help='the AMI id to use')
    parser.add_argument('-i', '--instance_type', required=True, help='the instance type to use')
    parser.add_argument('-c', '--created_by', help='the username to use')
    parser.add_argument('-n', '--name', required=True, help='the instance name to use')
    parser.add_argument('-s', '--spot_price', type=float, help='the spot price to use')
    parser.add_argument('-b', '--bash_script', help='the bash script to use')
    parser.add_argument('-p', '--processes', type=int, default=1, help='the number of processes to run')
    parser.add_argument('-fc', '--frequency_channels', type=int, default=28, help='how many frequency channels per AWS instance')
    parser.add_argument('--force', action='store_true', default=False, help='proceed with a frequency band even if we already have it')

    parser.add_argument('obs_ids', nargs='+', help='the ids of the observation')

    args = vars(parser.parse_args())

    corrected_args = check_args(args)
    if corrected_args is None:
        LOGGER.error('The arguments are incorrect: {0}'.format(args))
    else:
        start_servers(
            args['processes'],
            corrected_args['ami_id'],
            corrected_args['user_data'],
            corrected_args['setup_disks'],
            args['instance_type'],
            corrected_args['obs_ids'],
            corrected_args['created_by'],
            args['name'],
            corrected_args['instance_details'],
            corrected_args['spot_price'],
            args['frequency_channels'],
            args['force'])
Exemple #8
0
def main():
    parser = argparse.ArgumentParser("Start a number of CLEAN servers")
    parser.add_argument("-a", "--ami_id", help="the AMI id to use")
    parser.add_argument("-i", "--instance_type", required=True, help="the instance type to use")
    parser.add_argument("-c", "--created_by", help="the username to use")
    parser.add_argument("-n", "--name", required=True, help="the instance name to use")
    parser.add_argument("-s", "--spot_price", type=float, help="the spot price to use")
    parser.add_argument("-b", "--bash_script", help="the bash script to use")
    parser.add_argument("-p", "--processes", type=int, default=1, help="the number of processes to run")
    parser.add_argument("snapshots", nargs="+", help="the snapshots to use")

    args = vars(parser.parse_args())

    corrected_args = check_args(args)
    if corrected_args is None:
        LOGGER.error("The arguments are incorrect: {0}".format(args))
    else:
        start_server(
            args["processes"],
            corrected_args["ami_id"],
            corrected_args["user_data"],
            corrected_args["setup_disks"],
            args["instance_type"],
            args["snapshots"],
            corrected_args["created_by"],
            args["name"],
            corrected_args["instance_details"],
            corrected_args["spot_price"],
        )
def copy_files(frequency_id, processes, days):
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    LOGGER.info('Scanning bucket: {0}, frequency_id: {1}'.format(bucket, frequency_id))

    # Create the queue
    queue = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    for key in bucket.list(prefix='CVEL/{0}'.format(frequency_id)):
        LOGGER.info('Checking {0}'.format(key.key))
        # Ignore the key
        if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'):
            elements = key.key.split('/')
            if elements[2] in days:
                directory = '/mnt/output/Chiles/split_vis/{0}/'.format(elements[2])

                # Queue the copy of the file
                temp_file = os.path.join(directory, 'data.tar.gz' if key.key.endswith('/data.tar.gz') else 'data.tar')
                queue.put(Task(key, temp_file, directory, frequency_id))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
def copy_files(args):
    # Create the queue
    queue = multiprocessing.JoinableQueue()
    # Start the consumers
    for x in range(PROCESSES):
        consumer = Consumer(queue)
        consumer.start()

    # Look in the output directory
    for root, dir_names, filenames in os.walk(args.product_dir):
        LOGGER.debug('root: {0}, dir_names: {1}, filenames: {2}'.format(root, dir_names, filenames))
        for match in fnmatch.filter(dir_names, '13B-266*calibrated_deepfield.ms'):
            result_dir = join(root, match)
            LOGGER.info('Queuing result_dir: {0}'.format(result_dir))

            queue.put(
                CopyTask(
                    args.bucket,
                    match,
                    result_dir,
                    args.aws_access_key_id,
                    args.aws_secret_access_key
                )
            )

    # Add a poison pill to shut things down
    for x in range(PROCESSES):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Exemple #11
0
def main():
    parser = argparse.ArgumentParser('Start a number of CLEAN servers')
    parser.add_argument('-a', '--ami_id', help='the AMI id to use')
    parser.add_argument('-i', '--instance_type', required=True, help='the instance type to use')
    parser.add_argument('-c', '--created_by', help='the username to use')
    parser.add_argument('-n', '--name', required=True, help='the instance name to use')
    parser.add_argument('-s', '--spot_price', type=float, help='the spot price to use')
    parser.add_argument('-b', '--bash_script', help='the bash script to use')
    parser.add_argument('-p', '--processes', type=int, default=1, help='the number of processes to run')
    parser.add_argument('frequencies', nargs='+', help='the frequencies to use (vis_14XX~14YY')

    args = vars(parser.parse_args())

    corrected_args = check_args(args)
    if corrected_args is None:
        LOGGER.error('The arguments are incorrect: {0}'.format(args))
    else:
        start_servers(
            args['processes'],
            corrected_args['ami_id'],
            corrected_args['user_data'],
            corrected_args['setup_disks'],
            args['instance_type'],
            args['frequencies'],
            corrected_args['created_by'],
            args['name'],
            corrected_args['instance_details'],
            corrected_args['spot_price'])
Exemple #12
0
def start_servers(
        processes,
        ami_id,
        user_data,
        setup_disks,
        instance_type,
        obs_ids,
        created_by,
        name,
        instance_details,
        spot_price,
        frequency_channels,
        force):
    cvel_data = get_cvel()

    # Create the queue
    tasks = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(tasks)
        consumer.start()

    counter = 1
    for obs_id in obs_ids:
        snapshot_id = OBS_IDS.get(obs_id)
        if snapshot_id is None:
            LOGGER.warning('The obs-id: {0} does not exist in the settings file')
        else:
            obs_id_dashes = obs_id.replace('_', '-')
            for frequency_groups in get_frequency_groups(frequency_channels, obs_id_dashes, cvel_data, force):
                tasks.put(
                    Task(
                        ami_id,
                        user_data,
                        setup_disks,
                        instance_type,
                        obs_id,
                        snapshot_id,
                        created_by,
                        name,
                        spot_price,
                        instance_details,
                        frequency_groups,
                        counter
                    )
                )
                counter += 1

        # Add a poison pill to shut things down
    for x in range(processes):
        tasks.put(None)

    # Wait for the queue to terminate
    tasks.join()
 def __call__(self):
     # noinspection PyBroadException
     try:
         LOGGER.info('Copying {0} to s3:{1}'.format(self._filename, self._bucket_location))
         s3_helper = S3Helper()
         s3_helper.add_file_to_bucket(
             CHILES_BUCKET_NAME,
             self._bucket_location,
             self._filename)
     except Exception:
         LOGGER.exception('CopyTask died')
def get_mime_encoded_user_data(volume_id, setup_disks, in_user_data, now):
    """
    AWS allows for a multipart m
    """
    user_data = MIMEMultipart()
    user_data.attach(get_cloud_init())

    data_formatted = in_user_data.format(volume_id, now, PIP_PACKAGES)
    LOGGER.info(data_formatted)
    user_data.attach(MIMEText(setup_disks + data_formatted))
    return user_data.as_string()
def copy_files(date, vis_file):
    s3_helper = S3Helper()
    # Look in the output directory
    for root, dir_names, filenames in os.walk(CHILES_CVEL_OUTPUT):
        LOGGER.info('root: {0}, dir_names: {1}, filenames: {2}'.format(root, dir_names, filenames))
        for match in fnmatch.filter(dir_names, vis_file):
            result_dir = join(root, match)
            LOGGER.info('Working on: {0}'.format(result_dir))

            if can_be_multipart_tar(result_dir):
                LOGGER.info('Using add_tar_to_bucket_multipart')
                s3_helper.add_tar_to_bucket_multipart(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(vis_file, date),
                    result_dir)
            else:
                LOGGER.info('Using make_tarfile, then adding file to bucket')
                output_tar_filename = join(root, match + '.tar')
                make_tarfile(output_tar_filename, result_dir)

                s3_helper.add_file_to_bucket(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(vis_file, date),
                    output_tar_filename)

                # Clean up
                os.remove(output_tar_filename)

            shutil.rmtree(result_dir, ignore_errors=True)
Exemple #16
0
    def get_mime_encoded_user_data(self, volume_id):
        """
        AWS allows for a multipart m
        """
        user_data = MIMEMultipart()
        user_data.attach(get_cloud_init())

        # Build the strings we need
        cvel_pipeline = self.build_cvel_pipeline()

        data_formatted = self._user_data.format(cvel_pipeline, self._obs_id, volume_id, self._now, self._counter, PIP_PACKAGES)
        LOGGER.info(data_formatted)
        user_data.attach(MIMEText(self._setup_disks + data_formatted))
        return user_data.as_string()
Exemple #17
0
    def add_tar_to_bucket_multipart(self, bucket_name, key_name, source_path, gzip=False, parallel_processes=2, reduced_redundancy=True, bufsize=10*1024*1024):
        """
        Parallel multipart upload.
        """
        LOGGER.info(
            'bucket_name: {0}, key_name: {1}, source_path: {2}, parallel_processes: {3}, reduced_redundancy: {4}, bufsize: {5}'.format(
                bucket_name,
                key_name,
                source_path,
                parallel_processes,
                reduced_redundancy,
                bufsize
            )
        )
        bucket = self.get_bucket(bucket_name)

        headers = {'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream'}
        mp = bucket.initiate_multipart_upload(key_name, headers=headers, reduced_redundancy=reduced_redundancy)
        s3_feeder = S3Feeder(mp.id, bufsize, bucket_name, parallel_processes, self._aws_access_key_id, self._aws_secret_access_key)

        if gzip:
            mode = "w|gz"
        else:
            mode = "w|"
        tar = tarfile.open(mode=mode, fileobj=s3_feeder, bufsize=int(bufsize / 10))

        complete = True
        # noinspection PyBroadException
        try:
            for entry in os.listdir(source_path):
                full_filename = join(source_path, entry)
                LOGGER.info(
                    'tar: [full_filename: {0}, entry: {1}]'.format(
                        full_filename,
                        entry
                    )
                )
                tar.add(full_filename, arcname=entry)

            tar.close()
            s3_feeder.close()
        except Exception:
            complete = False
            s3_feeder.close()

        # Finish the upload
        if complete:
            mp.complete_upload()
        else:
            mp.cancel_upload()
Exemple #18
0
def get_cvel():
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    cvel_data = {}
    for key in bucket.list(prefix='CVEL/'):
        LOGGER.info('Checking {0}'.format(key.key))
        if key.key.endswith('data.tar.gz') or key.key.endswith('data.tar'):
            elements = key.key.split('/')
            data_list = cvel_data.get(str(elements[1]))
            if data_list is None:
                data_list = []
                cvel_data[str(elements[1])] = data_list
            data_list.append(str(elements[2]))

    return cvel_data
Exemple #19
0
def get_mime_encoded_user_data(instance_details, setup_disks, user_data):
    """
    AWS allows for a multipart m
    """
    # Split the frequencies
    min_freq = 940
    max_freq = 1424
    LOGGER.info("min_freq: {0}, max_freq: {1}".format(min_freq, max_freq))

    # Build the mime message
    mime_data = MIMEMultipart()
    mime_data.attach(get_cloud_init())

    swap_size = get_swap_size(instance_details)
    data_formatted = user_data.format("TODO", min_freq, max_freq, swap_size, PIP_PACKAGES)
    mime_data.attach(MIMEText(setup_disks + data_formatted))
    return mime_data.as_string()
    def __call__(self):
        # noinspection PyBroadException
        try:
            s3_helper = S3Helper(self._aws_access_key_id, self._aws_secret_access_key)
            LOGGER.info('Copying to: {0}/{1}/measurement_set.tar'.format(self._bucket, self._bucket_location))

            # We can have 10,000 parts
            # The biggest file from Semester 1 is 803GB
            # So 100 MB
            s3_helper.add_tar_to_bucket_multipart(
                self._bucket,
                '{0}/measurement_set.tar'.format(self._bucket_location),
                self._filename,
                parallel_processes=2,
                bufsize=100*1024*1024
            )
        except Exception:
            LOGGER.exception('CopyTask died')
Exemple #21
0
    def get_mime_encoded_user_data(self):
        """
        AWS allows for a multipart m
        """
        # Split the frequencies
        index_underscore = find(self._frequency_id, '_')
        index_tilde = find(self._frequency_id, '~')
        min_freq = self._frequency_id[index_underscore + 1:index_tilde]
        max_freq = self._frequency_id[index_tilde + 1:]
        LOGGER.info('min_freq: {0}, max_freq: {1}'.format(min_freq, max_freq))

        # Build the mime message
        user_data = MIMEMultipart()
        user_data.attach(get_cloud_init())

        swap_size = self.get_swap_size()
        data_formatted = self._user_data.format(self._frequency_id, min_freq, max_freq, swap_size, PIP_PACKAGES)
        user_data.attach(MIMEText(self._setup_disks + data_formatted))
        return user_data.as_string()
Exemple #22
0
def check_args(args):
    """
    Check the arguments and prompt for new ones
    """
    map_args = {}

    if args['obs_ids'] is None:
        return None
    elif len(args['obs_ids']) == 1 and args['obs_ids'][0] == '*':
        map_args['obs_ids'] = OBS_IDS.keys()
    else:
        map_args['obs_ids'] = args['obs_ids']

    if args['instance_type'] is None:
        return None

    if args['name'] is None:
        return None

    instance_details = AWS_INSTANCES.get(args['instance_type'])
    if instance_details is None:
        LOGGER.error('The instance type {0} is not supported.'.format(args['instance_type']))
        return None
    else:
        LOGGER.info(
            'instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}'.format(
                args['instance_type'],
                instance_details.vCPU,
                instance_details.memory,
                instance_details.number_disks,
                instance_details.size,
                instance_details.iops_support))

    map_args.update({
        'ami_id': args['ami_id'] if args['ami_id'] is not None else AWS_AMI_ID,
        'created_by': args['created_by'] if args['created_by'] is not None else getpass.getuser(),
        'spot_price': args['spot_price'] if args['spot_price'] is not None else None,
        'user_data': get_script(args['bash_script'] if args['bash_script'] is not None else BASH_SCRIPT_CVEL),
        'setup_disks': get_script(BASH_SCRIPT_SETUP_DISKS),
        'instance_details': instance_details,
    })

    return map_args
Exemple #23
0
def check_args(args):
    """
    Check the arguments and prompt for new ones
    """
    map_args = {}

    if args["snapshots"] is None:
        return None

    if args["instance_type"] is None:
        return None

    if args["name"] is None:
        return None

    instance_details = AWS_INSTANCES.get(args["instance_type"])
    if instance_details is None:
        LOGGER.error("The instance type {0} is not supported.".format(args["instance_type"]))
        return None
    else:
        LOGGER.info(
            "instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}".format(
                args["instance_type"],
                instance_details.vCPU,
                instance_details.memory,
                instance_details.number_disks,
                instance_details.size,
                instance_details.iops_support,
            )
        )

    map_args.update(
        {
            "ami_id": args["ami_id"] if args["ami_id"] is not None else AWS_AMI_ID,
            "created_by": args["created_by"] if args["created_by"] is not None else getpass.getuser(),
            "spot_price": args["spot_price"] if args["spot_price"] is not None else None,
            "user_data": get_script(args["bash_script"] if args["bash_script"] is not None else BASH_SCRIPT_CLEAN_ALL),
            "setup_disks": get_script(BASH_SCRIPT_SETUP_DISKS),
            "instance_details": instance_details,
        }
    )
    return map_args
Exemple #24
0
    def run_instance(self, ami_id, user_data, instance_type, volume_id, created_by, name, zone, ephemeral=False):
        """
        Run up an instance
        """
        bdm = self.build_block_device_map(ephemeral)

        LOGGER.info('Running instance: ami: {0}'.format(ami_id))
        reservations = self.ec2_connection.run_instances(ami_id,
                                                         instance_type=instance_type,
                                                         instance_initiated_shutdown_behavior='terminate',
                                                         subnet_id=AWS_SUBNETS[zone],
                                                         key_name=AWS_KEY_NAME,
                                                         security_group_ids=AWS_SECURITY_GROUPS,
                                                         user_data=user_data,
                                                         block_device_map=bdm)
        instance = reservations.instances[0]
        time.sleep(5)

        while not instance.update() == 'running':
            LOGGER.info('Not running yet')
            time.sleep(5)

        if volume_id:
            # Now we have an instance id we can attach the disk
            self.ec2_connection.attach_volume(volume_id, instance.id, '/dev/xvdf')

        LOGGER.info('Assigning the tags')
        self.ec2_connection.create_tags([instance.id],
                                        {'AMI': '{0}'.format(ami_id),
                                         'Name': '{0}'.format(name),
                                         'Volume_id': '{0}'.format(volume_id),
                                         'Created By': '{0}'.format(created_by)})

        return instance
def copy_files(s3_tag, processes):
    # Create the queue
    queue = multiprocessing.JoinableQueue()
    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    # Look in the output directory
    today = datetime.date.today()
    for root, dir_names, filenames in os.walk(CHILES_LOGS):
        for match in fnmatch.filter(filenames, '*.log'):
            LOGGER.info('Looking at: {0}'.format(join(root, match)))
            queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match)))

    for root, dir_names, filenames in os.walk(BENCHMARKING_LOGS):
        for match in fnmatch.filter(filenames, '*.csv'):
            LOGGER.info('Looking at: {0}'.format(join(root, match)))
            queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match)))
        for match in fnmatch.filter(filenames, '*.log'):
            LOGGER.info('Looking at: {0}'.format(join(root, match)))
            queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match)))

    queue.put(CopyTask('/var/log/chiles-output.log', '{0}/{1}{2:02d}{3:02d}/chiles-output.log'.format(s3_tag, today.year, today.month, today.day)))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
def copy_files(cube):
    s3_helper = S3Helper()
    # Look in the output directory
    directory_to_save = join(CHILES_IMGCONCAT_OUTPUT, cube) + '.cube'
    if isdir(directory_to_save):
        LOGGER.info('dir_name: {0}'.format(directory_to_save))
        output_tar_filename = directory_to_save + '.tar'

        if can_be_multipart_tar(directory_to_save):
            LOGGER.info('Using add_tar_to_bucket_multipart')
            s3_helper.add_tar_to_bucket_multipart(
                CHILES_BUCKET_NAME,
                'IMGCONCAT/{0}'.format(basename(output_tar_filename)),
                directory_to_save,
                bufsize=20 * 1024 * 1024)
        else:
            LOGGER.info('Using make_tarfile, then adding file to bucket')
            make_tarfile(output_tar_filename, directory_to_save)

            s3_helper.add_file_to_bucket(
                CHILES_BUCKET_NAME,
                'IMGCONCAT/{0}'.format(basename(output_tar_filename)),
                output_tar_filename)

            # Clean up
            os.remove(output_tar_filename)
    def __call__(self):
        """
        Actually run the job
        """
        if self._tar_file.endswith('.tar.gz'):
            image_name = basename(self._tar_file).replace('.tar.gz', '')
        else:
            image_name = basename(self._tar_file).replace('.tar', '')
        directory = join(self._directory, image_name)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format(self._key.key, self._tar_file, directory))
            if not os.path.exists(directory):
                os.makedirs(directory)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('.tar.gz') else "r:")) as tar:
                tar.extractall(path=directory)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(directory, ignore_errors=True)
Exemple #28
0
    def add_file_to_bucket(self, bucket_name, key_name, filename, reduced_redundancy=True):
        """
        Add file to a bucket

        :param bucket_name:
        :param key_name:
        :param filename:
        """
        LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}'.format(bucket_name, key_name, filename, reduced_redundancy))
        retry_count = 0
        done = False
        while retry_count < 3 and not done:
            try:
                bucket = self.get_bucket(bucket_name)
                key = Key(bucket)
                key.key = key_name
                key.set_contents_from_filename(filename, reduced_redundancy=reduced_redundancy)
                done = True
            except socket.error:
                LOGGER.exception('Error')
                retry_count += 1
                time.sleep(10)
    def __call__(self):
        """
        Actually run the job
        """
        corrected_path = join(self._directory, self._frequency_id)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}, frequency_id: {3}'.format(
                self._key.key,
                self._tar_file,
                self._directory,
                self._frequency_id))
            if not os.path.exists(corrected_path):
                os.makedirs(corrected_path)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('tar.gz') else "r:")) as tar:
                tar.extractall(path=corrected_path)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(corrected_path, ignore_errors=True)
Exemple #30
0
def start_servers(
        ami_id,
        user_data,
        setup_disks,
        instance_type,
        obs_id,
        created_by,
        name,
        instance_details,
        spot_price,
        ebs,
        bottom_frequency,
        frequency_range):
    LOGGER.info('obs_id: {0}, bottom_frequency: {1}, frequency_range: {2}'.format(obs_id, bottom_frequency, frequency_range))
    ec2_helper = EC2Helper()
    zone = ec2_helper.get_cheapest_spot_price(instance_type, spot_price)

    if zone is not None:
        # Swap size
        if ebs is None:
            swap_size = 1
        else:
            ephemeral_size = instance_details.number_disks * instance_details.size
            swap_size = min(int(ephemeral_size * 0.75), 16)

        user_data_mime = get_mime_encoded_user_data(
            user_data,
            obs_id,
            setup_disks,
            bottom_frequency,
            frequency_range,
            swap_size
        )
        LOGGER.info('{0}'.format(user_data_mime))

        ec2_helper.run_spot_instance(
            ami_id,
            spot_price,
            user_data_mime,
            instance_type,
            None,
            created_by,
            name + '- {0}'.format(obs_id),
            instance_details=instance_details,
            zone=zone,
            ebs_size=ebs,
            number_ebs_volumes=4,
            ephemeral=True)
    else:
        LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(instance_type, spot_price))