def copy_files(args):
    # Create the queue
    queue = multiprocessing.JoinableQueue()
    # Start the consumers
    for x in range(PROCESSES):
        consumer = Consumer(queue)
        consumer.start()

    # Look in the output directory
    for root, dir_names, filenames in os.walk(args.product_dir):
        LOGGER.debug('root: {0}, dir_names: {1}, filenames: {2}'.format(
            root, dir_names, filenames))
        for match in fnmatch.filter(dir_names,
                                    '13B-266*calibrated_deepfield.ms'):
            result_dir = join(root, match)
            LOGGER.info('Queuing result_dir: {0}'.format(result_dir))

            queue.put(
                CopyTask(args.bucket, match, result_dir,
                         args.aws_access_key_id, args.aws_secret_access_key))

    # Add a poison pill to shut things down
    for x in range(PROCESSES):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Exemple #2
0
 def _upload(retries_left=amount_of_retries):
     try:
         LOGGER.info(
             'Start uploading part: #{0}, source_path: {1}'.format(
                 part_num,
                 source_path
             )
         )
         conn = get_s3_connection(aws_access_key_id, aws_secret_access_key)
         bucket = conn.get_bucket(bucket_name)
         for mp in bucket.get_all_multipart_uploads():
             if mp.id == multipart_id:
                 with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes_to_copy) as fp:
                     mp.upload_part_from_file(fp=fp, part_num=part_num)
                 break
     except Exception, exc:
         if retries_left:
             _upload(retries_left=retries_left - 1)
         else:
             LOGGER.info(
                 'Failed uploading part: #{0}, source_path: {1}'.format(
                     part_num,
                     source_path
                 )
             )
             raise exc
def copy_files(frequency_id, processes, days):
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    LOGGER.info('Scanning bucket: {0}, frequency_id: {1}'.format(
        bucket, frequency_id))

    # Create the queue
    queue = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    for key in bucket.list(prefix='CVEL/{0}'.format(frequency_id)):
        LOGGER.info('Checking {0}'.format(key.key))
        # Ignore the key
        if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'):
            elements = key.key.split('/')
            if elements[2] in days:
                directory = '/mnt/output/Chiles/split_vis/{0}/'.format(
                    elements[2])

                # Queue the copy of the file
                temp_file = os.path.join(
                    directory, 'data.tar.gz'
                    if key.key.endswith('/data.tar.gz') else 'data.tar')
                queue.put(Task(key, temp_file, directory, frequency_id))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Exemple #4
0
def create_users(emails):
    failed = []
    users = []

    for email in emails:
        username = email.split('@')[0]

        data = {
            'username': username,
            'password': config['default_password'],
            'email': email
        }

        dumped = json.dumps(data, ensure_ascii=False).encode('utf-8')

        resp = requests.post(
            url="{}/register/".format(base_url),
            data=dumped,
            headers={'Content-Type': 'application/json;charset=UTF-8'})

        if resp.status_code == 400:
            failed.append(email)
            LOGGER.error(
                "Failed to create account for email {0}. Response: {1}".format(
                    email, str(resp.json())))

        if resp.status_code == 201:
            users.append(resp.json())
            LOGGER.info("Account successfully created.")

    return users, failed
Exemple #5
0
    def __call__(self):
        """
        Actually run the job
        """
        LOGGER.info('frequency_id: {0}'.format(self._frequency_id))
        ec2_helper = EC2Helper()
        zone = ec2_helper.get_cheapest_spot_price(self._instance_type,
                                                  self._spot_price)

        if zone is not None:
            user_data_mime = self.get_mime_encoded_user_data()
            LOGGER.info('{0}'.format(user_data_mime))

            ec2_helper.run_spot_instance(
                self._ami_id,
                self._spot_price,
                user_data_mime,
                self._instance_type,
                None,
                self._created_by,
                '{0}-{1}'.format(self._frequency_id, self._name),
                instance_details=self._instance_details,
                zone=zone,
                ephemeral=True)
        else:
            LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(
                self._instance_type, self._spot_price))
Exemple #6
0
    def add_file_to_bucket_multipart(self, bucket_name, key_name, source_path, parallel_processes=2, reduced_redundancy=True):
        """
        Parallel multipart upload.
        """
        LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, parallel_processes: {3}, reduced_redundancy: {4}'.format(
            bucket_name, key_name, source_path, parallel_processes, reduced_redundancy))

        source_size = os.stat(source_path).st_size
        bytes_per_chunk = 10 * 1024 * 1024
        chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))
        if chunk_amount < 10000:
            bucket = self.get_bucket(bucket_name)

            headers = {'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream'}
            mp = bucket.initiate_multipart_upload(key_name, headers=headers, reduced_redundancy=reduced_redundancy)

            LOGGER.info('bytes_per_chunk: {0}, chunk_amount: {1}'.format(bytes_per_chunk, chunk_amount))

            # You can only upload 10,000 chunks
            pool = Pool(processes=parallel_processes)
            for i in range(chunk_amount):
                offset = i * bytes_per_chunk
                remaining_bytes = source_size - offset
                bytes_to_copy = min([bytes_per_chunk, remaining_bytes])
                part_num = i + 1
                pool.apply_async(upload_part, [self._aws_access_key_id, self._aws_secret_access_key, bucket_name, mp.id, part_num, source_path, offset, bytes_to_copy])
            pool.close()
            pool.join()

            if len(mp.get_all_parts()) == chunk_amount:
                mp.complete_upload()
            else:
                mp.cancel_upload()
        else:
            raise S3UploadException('Too many chunks')
def copy_files(frequency_id, processes, days):
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    LOGGER.info('Scanning bucket: {0}, frequency_id: {1}'.format(bucket, frequency_id))

    # Create the queue
    queue = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    for key in bucket.list(prefix='CVEL/{0}'.format(frequency_id)):
        LOGGER.info('Checking {0}'.format(key.key))
        # Ignore the key
        if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'):
            elements = key.key.split('/')
            if elements[2] in days:
                directory = '/mnt/output/Chiles/split_vis/{0}/'.format(elements[2])

                # Queue the copy of the file
                temp_file = os.path.join(directory, 'data.tar.gz' if key.key.endswith('/data.tar.gz') else 'data.tar')
                queue.put(Task(key, temp_file, directory, frequency_id))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Exemple #8
0
    def __call__(self):
        """
        Actually run the job
        """
        # Get the name of the volume
        ec2_helper = EC2Helper()
        iops = None
        if self._instance_details.iops_support:
            iops = 500

        zone = ec2_helper.get_cheapest_spot_price(self._instance_type, self._spot_price)
        if zone is not None:
            volume, snapshot_name = ec2_helper.create_volume(self._snapshot_id, zone, iops=iops)
            LOGGER.info('obs_id: {0}, volume_name: {1}'.format(self._obs_id, snapshot_name))
            user_data_mime = self.get_mime_encoded_user_data(volume.id)

            if self._spot_price is not None:
                ec2_helper.run_spot_instance(
                    self._ami_id,
                    self._spot_price,
                    user_data_mime,
                    self._instance_type,
                    volume.id,
                    self._created_by,
                    '{1}-{2}-{0}'.format(self._name, snapshot_name, self._counter),
                    self._instance_details,
                    zone,
                    ephemeral=True)
        else:
            LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(self._instance_type, self._spot_price))
Exemple #9
0
def copy_files(frequency_id):
    s3_helper = S3Helper()
    # Look in the output directory
    LOGGER.info('directory_data: {0}'.format(CHILES_CLEAN_OUTPUT))
    for dir_name in os.listdir(CHILES_CLEAN_OUTPUT):
        LOGGER.info('dir_name: {0}'.format(dir_name))
        result_dir = join(CHILES_CLEAN_OUTPUT, dir_name)
        if isdir(result_dir) and dir_name.startswith(
                'cube_') and dir_name.endswith('.image'):
            LOGGER.info('dir_name: {0}'.format(dir_name))
            output_tar_filename = join(CHILES_CLEAN_OUTPUT, dir_name + '.tar')

            if can_be_multipart_tar(result_dir):
                LOGGER.info('Using add_tar_to_bucket_multipart')
                s3_helper.add_tar_to_bucket_multipart(
                    CHILES_BUCKET_NAME,
                    '/CLEAN/{0}/{1}'.format(frequency_id,
                                            basename(output_tar_filename)),
                    result_dir)
            else:
                LOGGER.info('Using make_tarfile, then adding file to bucket')
                make_tarfile(output_tar_filename, result_dir)

                s3_helper.add_file_to_bucket(
                    CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(
                        frequency_id, basename(output_tar_filename)),
                    output_tar_filename)

                # Clean up
                os.remove(output_tar_filename)
def copy_files(frequency_id):
    s3_helper = S3Helper()
    # Look in the output directory
    LOGGER.info('directory_data: {0}'.format(CHILES_CLEAN_OUTPUT))
    for dir_name in os.listdir(CHILES_CLEAN_OUTPUT):
        LOGGER.info('dir_name: {0}'.format(dir_name))
        result_dir = join(CHILES_CLEAN_OUTPUT, dir_name)
        if isdir(result_dir) and dir_name.startswith('cube_') and dir_name.endswith('.image'):
            LOGGER.info('dir_name: {0}'.format(dir_name))
            output_tar_filename = join(CHILES_CLEAN_OUTPUT, dir_name + '.tar')

            if can_be_multipart_tar(result_dir):
                LOGGER.info('Using add_tar_to_bucket_multipart')
                s3_helper.add_tar_to_bucket_multipart(
                    CHILES_BUCKET_NAME,
                    '/CLEAN/{0}/{1}'.format(frequency_id, basename(output_tar_filename)),
                    result_dir)
            else:
                LOGGER.info('Using make_tarfile, then adding file to bucket')
                make_tarfile(output_tar_filename, result_dir)

                s3_helper.add_file_to_bucket(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(frequency_id, basename(output_tar_filename)),
                    output_tar_filename)

                # Clean up
                os.remove(output_tar_filename)
def copy_files(args):
    # Create the queue
    queue = multiprocessing.JoinableQueue()
    # Start the consumers
    for x in range(PROCESSES):
        consumer = Consumer(queue)
        consumer.start()

    # Look in the output directory
    for root, dir_names, filenames in os.walk(args.product_dir):
        LOGGER.debug('root: {0}, dir_names: {1}, filenames: {2}'.format(root, dir_names, filenames))
        for match in fnmatch.filter(dir_names, '13B-266*calibrated_deepfield.ms'):
            result_dir = join(root, match)
            LOGGER.info('Queuing result_dir: {0}'.format(result_dir))

            queue.put(
                CopyTask(
                    args.bucket,
                    match,
                    result_dir,
                    args.aws_access_key_id,
                    args.aws_secret_access_key
                )
            )

    # Add a poison pill to shut things down
    for x in range(PROCESSES):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Exemple #12
0
    def __call__(self):
        """
        Actually run the job
        """
        if self._tar_file.endswith('.tar.gz'):
            image_name = basename(self._tar_file).replace('.tar.gz', '')
        else:
            image_name = basename(self._tar_file).replace('.tar', '')
        directory = join(self._directory, image_name)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format(
                self._key.key, self._tar_file, directory))
            if not os.path.exists(directory):
                os.makedirs(directory)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(
                    tarfile.open(
                        self._tar_file, "r:gz" if
                        self._tar_file.endswith('.tar.gz') else "r:")) as tar:
                tar.extractall(path=directory)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(directory, ignore_errors=True)
Exemple #13
0
    def add_file_to_bucket(self,
                           bucket_name,
                           key_name,
                           filename,
                           reduced_redundancy=True):
        """
        Add file to a bucket

        :param bucket_name:
        :param key_name:
        :param filename:
        """
        LOGGER.info(
            'bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}'
            .format(bucket_name, key_name, filename, reduced_redundancy))
        retry_count = 0
        done = False
        while retry_count < 3 and not done:
            try:
                bucket = self.get_bucket(bucket_name)
                key = Key(bucket)
                key.key = key_name
                key.set_contents_from_filename(
                    filename, reduced_redundancy=reduced_redundancy)
                done = True
            except socket.error:
                LOGGER.exception('Error')
                retry_count += 1
                time.sleep(10)
def copy_files(processes, bottom_frequency, frequency_range):
    # Create the directory
    if not exists(DIRECTORY):
        os.makedirs(DIRECTORY)

    # Scan the bucket
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    LOGGER.info('Scanning bucket: {0}/CLEAN'.format(bucket))

    # Create the queue
    queue = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    for key in bucket.list(prefix='CLEAN/'):
        LOGGER.info('Checking {0}'.format(key.key))
        # Ignore the key
        if key.key.endswith('.image.tar.gz') or key.key.endswith('.image.tar'):
            # Do we need this file?
            basename_key = basename(key.key)
            if in_frequency_range(basename_key, bottom_frequency, frequency_range):
                # Queue the copy of the file
                temp_file = os.path.join(DIRECTORY, basename_key)
                queue.put(Task(key, temp_file, DIRECTORY))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Exemple #15
0
    def start_pong(self, xxx_id: int) -> None:
        global g_processing_xxx_id
        r = get_redis_client()
        processing_key = (f"{PROJECT_NAME}.{APP_NAME}.xxx.is_processing.{xxx_id}")
        if r.get(processing_key) == '1':
            LOGGER.info(f"ignore processing xxx {xxx_id}")
            return
        r.set(processing_key, '1', ex=30)
        g_processing_xxx_id = xxx_id

        list(range(10000))

        def start() -> None:
            LOGGER.info(f"start_work with {xxx_id}")
            list(range(20000))
            try:
                while True:
                    list(range(3000))
                    self.dispatch("processed", {
                        'xxx_id': xxx_id,
                        'info': {},
                    })
                    r.expire(processing_key, 3)
                    time.sleep(1)
            finally:
                r.delete(processing_key)
 def __call__(self):
     # noinspection PyBroadException
     try:
         LOGGER.info('Copying {0} to s3:{1}'.format(self._filename,
                                                    self._bucket_location))
         s3_helper = S3Helper()
         s3_helper.add_file_to_bucket(CHILES_BUCKET_NAME,
                                      self._bucket_location, self._filename)
     except Exception:
         LOGGER.exception('CopyTask died')
Exemple #17
0
    def add_tar_to_bucket_multipart(self,
                                    bucket_name,
                                    key_name,
                                    source_path,
                                    gzip=False,
                                    parallel_processes=2,
                                    reduced_redundancy=True,
                                    bufsize=10 * 1024 * 1024):
        """
        Parallel multipart upload.
        """
        LOGGER.info(
            'bucket_name: {0}, key_name: {1}, source_path: {2}, parallel_processes: {3}, reduced_redundancy: {4}, bufsize: {5}'
            .format(bucket_name, key_name, source_path, parallel_processes,
                    reduced_redundancy, bufsize))
        bucket = self.get_bucket(bucket_name)

        headers = {
            'Content-Type':
            mimetypes.guess_type(key_name)[0] or 'application/octet-stream'
        }
        mp = bucket.initiate_multipart_upload(
            key_name, headers=headers, reduced_redundancy=reduced_redundancy)
        s3_feeder = S3Feeder(mp.id, bufsize, bucket_name, parallel_processes,
                             self._aws_access_key_id,
                             self._aws_secret_access_key)

        if gzip:
            mode = "w|gz"
        else:
            mode = "w|"
        tar = tarfile.open(mode=mode,
                           fileobj=s3_feeder,
                           bufsize=int(bufsize / 10))

        complete = True
        # noinspection PyBroadException
        try:
            for entry in os.listdir(source_path):
                full_filename = join(source_path, entry)
                LOGGER.info('tar: [full_filename: {0}, entry: {1}]'.format(
                    full_filename, entry))
                tar.add(full_filename, arcname=entry)

            tar.close()
            s3_feeder.close()
        except Exception:
            complete = False
            s3_feeder.close()

        # Finish the upload
        if complete:
            mp.complete_upload()
        else:
            mp.cancel_upload()
def get_mime_encoded_user_data(volume_id, setup_disks, in_user_data, now):
    """
    AWS allows for a multipart m
    """
    user_data = MIMEMultipart()
    user_data.attach(get_cloud_init())

    data_formatted = in_user_data.format(volume_id, now, PIP_PACKAGES)
    LOGGER.info(data_formatted)
    user_data.attach(MIMEText(setup_disks + data_formatted))
    return user_data.as_string()
Exemple #19
0
def get_mime_encoded_user_data(volume_id, setup_disks, in_user_data, now):
    """
    AWS allows for a multipart m
    """
    user_data = MIMEMultipart()
    user_data.attach(get_cloud_init())

    data_formatted = in_user_data.format(volume_id, now, PIP_PACKAGES)
    LOGGER.info(data_formatted)
    user_data.attach(MIMEText(setup_disks + data_formatted))
    return user_data.as_string()
Exemple #20
0
    def get(self) -> t.Dict[str, t.Any]:
        try:
            rpc_proxy: Pyro4.Proxy = get_health_service_rpc_proxy('pong')
        except LookupError as e:
            LOGGER.error(f"{e}: {traceback.format_exc()}")
            return {'hello': f'error {e}'}
        LOGGER.info(rpc_proxy.dev_pyro4_ping(1, src='duang'))

        with make_service_proxy() as service_proxy:
            LOGGER.info(service_proxy.pong.dev_nameko_ping(1, src='duang'))
        return {'hello': 'world'}
 def __call__(self):
     # noinspection PyBroadException
     try:
         LOGGER.info('Copying {0} to s3:{1}'.format(self._filename, self._bucket_location))
         s3_helper = S3Helper()
         s3_helper.add_file_to_bucket(
             CHILES_BUCKET_NAME,
             self._bucket_location,
             self._filename)
     except Exception:
         LOGGER.exception('CopyTask died')
Exemple #22
0
    def add_file_to_bucket_multipart(self,
                                     bucket_name,
                                     key_name,
                                     source_path,
                                     parallel_processes=2,
                                     reduced_redundancy=True):
        """
        Parallel multipart upload.
        """
        LOGGER.info(
            'bucket_name: {0}, key_name: {1}, filename: {2}, parallel_processes: {3}, reduced_redundancy: {4}'
            .format(bucket_name, key_name, source_path, parallel_processes,
                    reduced_redundancy))

        source_size = os.stat(source_path).st_size
        bytes_per_chunk = 10 * 1024 * 1024
        chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))
        if chunk_amount < 10000:
            bucket = self.get_bucket(bucket_name)

            headers = {
                'Content-Type':
                mimetypes.guess_type(key_name)[0] or 'application/octet-stream'
            }
            mp = bucket.initiate_multipart_upload(
                key_name,
                headers=headers,
                reduced_redundancy=reduced_redundancy)

            LOGGER.info('bytes_per_chunk: {0}, chunk_amount: {1}'.format(
                bytes_per_chunk, chunk_amount))

            # You can only upload 10,000 chunks
            pool = Pool(processes=parallel_processes)
            for i in range(chunk_amount):
                offset = i * bytes_per_chunk
                remaining_bytes = source_size - offset
                bytes_to_copy = min([bytes_per_chunk, remaining_bytes])
                part_num = i + 1
                pool.apply_async(upload_part, [
                    self._aws_access_key_id, self._aws_secret_access_key,
                    bucket_name, mp.id, part_num, source_path, offset,
                    bytes_to_copy
                ])
            pool.close()
            pool.join()

            if len(mp.get_all_parts()) == chunk_amount:
                mp.complete_upload()
            else:
                mp.cancel_upload()
        else:
            raise S3UploadException('Too many chunks')
Exemple #23
0
    def handle_stop_pong_event(self, event_data: t.Dict[str, t.Any]) -> None:
        global g_processing_xxx_id
        xxx_id = event_data['xxx_id']
        if xxx_id != g_processing_xxx_id:
            return
        LOGGER.info("stop self")

        def kill_self() -> None:
            time.sleep(0.1)
            os.kill(os.getpid(), signal.SIGQUIT)

        threading.Thread(target=kill_self, daemon=True).start()
Exemple #24
0
    def get_mime_encoded_user_data(self, volume_id):
        """
        AWS allows for a multipart m
        """
        user_data = MIMEMultipart()
        user_data.attach(get_cloud_init())

        # Build the strings we need
        cvel_pipeline = self.build_cvel_pipeline()

        data_formatted = self._user_data.format(cvel_pipeline, self._obs_id, volume_id, self._now, self._counter, PIP_PACKAGES)
        LOGGER.info(data_formatted)
        user_data.attach(MIMEText(self._setup_disks + data_formatted))
        return user_data.as_string()
def copy_files(date, vis_file):
    s3_helper = S3Helper()
    # Look in the output directory
    for root, dir_names, filenames in os.walk(CHILES_CVEL_OUTPUT):
        LOGGER.info('root: {0}, dir_names: {1}, filenames: {2}'.format(root, dir_names, filenames))
        for match in fnmatch.filter(dir_names, vis_file):
            result_dir = join(root, match)
            LOGGER.info('Working on: {0}'.format(result_dir))

            if can_be_multipart_tar(result_dir):
                LOGGER.info('Using add_tar_to_bucket_multipart')
                s3_helper.add_tar_to_bucket_multipart(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(vis_file, date),
                    result_dir)
            else:
                LOGGER.info('Using make_tarfile, then adding file to bucket')
                output_tar_filename = join(root, match + '.tar')
                make_tarfile(output_tar_filename, result_dir)

                s3_helper.add_file_to_bucket(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(vis_file, date),
                    output_tar_filename)

                # Clean up
                os.remove(output_tar_filename)

            shutil.rmtree(result_dir, ignore_errors=True)
Exemple #26
0
def copy_files(date, vis_file):
    s3_helper = S3Helper()
    # Look in the output directory
    for root, dir_names, filenames in os.walk(CHILES_CVEL_OUTPUT):
        LOGGER.info('root: {0}, dir_names: {1}, filenames: {2}'.format(
            root, dir_names, filenames))
        for match in fnmatch.filter(dir_names, vis_file):
            result_dir = join(root, match)
            LOGGER.info('Working on: {0}'.format(result_dir))

            if can_be_multipart_tar(result_dir):
                LOGGER.info('Using add_tar_to_bucket_multipart')
                s3_helper.add_tar_to_bucket_multipart(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(vis_file, date), result_dir)
            else:
                LOGGER.info('Using make_tarfile, then adding file to bucket')
                output_tar_filename = join(root, match + '.tar')
                make_tarfile(output_tar_filename, result_dir)

                s3_helper.add_file_to_bucket(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(vis_file,
                                                   date), output_tar_filename)

                # Clean up
                os.remove(output_tar_filename)

            shutil.rmtree(result_dir, ignore_errors=True)
Exemple #27
0
 def start() -> None:
     LOGGER.info(f"start_work with {xxx_id}")
     list(range(20000))
     try:
         while True:
             list(range(3000))
             self.dispatch("processed", {
                 'xxx_id': xxx_id,
                 'info': {},
             })
             r.expire(processing_key, 3)
             time.sleep(1)
     finally:
         r.delete(processing_key)
Exemple #28
0
    def add_tar_to_bucket_multipart(self, bucket_name, key_name, source_path, gzip=False, parallel_processes=2, reduced_redundancy=True, bufsize=10*1024*1024):
        """
        Parallel multipart upload.
        """
        LOGGER.info(
            'bucket_name: {0}, key_name: {1}, source_path: {2}, parallel_processes: {3}, reduced_redundancy: {4}, bufsize: {5}'.format(
                bucket_name,
                key_name,
                source_path,
                parallel_processes,
                reduced_redundancy,
                bufsize
            )
        )
        bucket = self.get_bucket(bucket_name)

        headers = {'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream'}
        mp = bucket.initiate_multipart_upload(key_name, headers=headers, reduced_redundancy=reduced_redundancy)
        s3_feeder = S3Feeder(mp.id, bufsize, bucket_name, parallel_processes, self._aws_access_key_id, self._aws_secret_access_key)

        if gzip:
            mode = "w|gz"
        else:
            mode = "w|"
        tar = tarfile.open(mode=mode, fileobj=s3_feeder, bufsize=int(bufsize / 10))

        complete = True
        # noinspection PyBroadException
        try:
            for entry in os.listdir(source_path):
                full_filename = join(source_path, entry)
                LOGGER.info(
                    'tar: [full_filename: {0}, entry: {1}]'.format(
                        full_filename,
                        entry
                    )
                )
                tar.add(full_filename, arcname=entry)

            tar.close()
            s3_feeder.close()
        except Exception:
            complete = False
            s3_feeder.close()

        # Finish the upload
        if complete:
            mp.complete_upload()
        else:
            mp.cancel_upload()
Exemple #29
0
def start_servers(
        ami_id,
        user_data,
        setup_disks,
        instance_type,
        obs_id,
        created_by,
        name,
        instance_details,
        spot_price,
        ebs,
        bottom_frequency,
        frequency_range):
    LOGGER.info('obs_id: {0}, bottom_frequency: {1}, frequency_range: {2}'.format(obs_id, bottom_frequency, frequency_range))
    ec2_helper = EC2Helper()
    zone = ec2_helper.get_cheapest_spot_price(instance_type, spot_price)

    if zone is not None:
        # Swap size
        if ebs is None:
            swap_size = 1
        else:
            ephemeral_size = instance_details.number_disks * instance_details.size
            swap_size = min(int(ephemeral_size * 0.75), 16)

        user_data_mime = get_mime_encoded_user_data(
            user_data,
            obs_id,
            setup_disks,
            bottom_frequency,
            frequency_range,
            swap_size
        )
        LOGGER.info('{0}'.format(user_data_mime))

        ec2_helper.run_spot_instance(
            ami_id,
            spot_price,
            user_data_mime,
            instance_type,
            None,
            created_by,
            name + '- {0}'.format(obs_id),
            instance_details=instance_details,
            zone=zone,
            ebs_size=ebs,
            number_ebs_volumes=4,
            ephemeral=True)
    else:
        LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(instance_type, spot_price))
Exemple #30
0
def check_args(args):
    """
    Check the arguments and prompt for new ones
    """
    map_args = {}

    if args['obs_ids'] is None:
        return None
    elif len(args['obs_ids']) == 1 and args['obs_ids'][0] == '*':
        map_args['obs_ids'] = OBS_IDS.keys()
    else:
        map_args['obs_ids'] = args['obs_ids']

    if args['instance_type'] is None:
        return None

    if args['name'] is None:
        return None

    instance_details = AWS_INSTANCES.get(args['instance_type'])
    if instance_details is None:
        LOGGER.error('The instance type {0} is not supported.'.format(
            args['instance_type']))
        return None
    else:
        LOGGER.info(
            'instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}'
            .format(args['instance_type'], instance_details.vCPU,
                    instance_details.memory, instance_details.number_disks,
                    instance_details.size, instance_details.iops_support))

    map_args.update({
        'ami_id':
        args['ami_id'] if args['ami_id'] is not None else AWS_AMI_ID,
        'created_by':
        args['created_by']
        if args['created_by'] is not None else getpass.getuser(),
        'spot_price':
        args['spot_price'] if args['spot_price'] is not None else None,
        'user_data':
        get_script(args['bash_script']
                   if args['bash_script'] is not None else BASH_SCRIPT_CVEL),
        'setup_disks':
        get_script(BASH_SCRIPT_SETUP_DISKS),
        'instance_details':
        instance_details,
    })

    return map_args
Exemple #31
0
def get_cvel():
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    cvel_data = {}
    for key in bucket.list(prefix='CVEL/'):
        LOGGER.info('Checking {0}'.format(key.key))
        if key.key.endswith('data.tar.gz') or key.key.endswith('data.tar'):
            elements = key.key.split('/')
            data_list = cvel_data.get(str(elements[1]))
            if data_list is None:
                data_list = []
                cvel_data[str(elements[1])] = data_list
            data_list.append(str(elements[2]))

    return cvel_data
Exemple #32
0
def get_cvel():
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    cvel_data = {}
    for key in bucket.list(prefix='CVEL/'):
        LOGGER.info('Checking {0}'.format(key.key))
        if key.key.endswith('data.tar.gz') or key.key.endswith('data.tar'):
            elements = key.key.split('/')
            data_list = cvel_data.get(str(elements[1]))
            if data_list is None:
                data_list = []
                cvel_data[str(elements[1])] = data_list
            data_list.append(str(elements[2]))

    return cvel_data
Exemple #33
0
    def get_mime_encoded_user_data(self, volume_id):
        """
        AWS allows for a multipart m
        """
        user_data = MIMEMultipart()
        user_data.attach(get_cloud_init())

        # Build the strings we need
        cvel_pipeline = self.build_cvel_pipeline()

        data_formatted = self._user_data.format(cvel_pipeline, self._obs_id,
                                                volume_id, self._now,
                                                self._counter, PIP_PACKAGES)
        LOGGER.info(data_formatted)
        user_data.attach(MIMEText(self._setup_disks + data_formatted))
        return user_data.as_string()
Exemple #34
0
def get_mime_encoded_user_data(instance_details, setup_disks, user_data):
    """
    AWS allows for a multipart m
    """
    # Split the frequencies
    min_freq = 940
    max_freq = 1424
    LOGGER.info("min_freq: {0}, max_freq: {1}".format(min_freq, max_freq))

    # Build the mime message
    mime_data = MIMEMultipart()
    mime_data.attach(get_cloud_init())

    swap_size = get_swap_size(instance_details)
    data_formatted = user_data.format("TODO", min_freq, max_freq, swap_size, PIP_PACKAGES)
    mime_data.attach(MIMEText(setup_disks + data_formatted))
    return mime_data.as_string()
Exemple #35
0
 def _upload(retries_left=amount_of_retries):
     try:
         LOGGER.info('Start uploading part #{0} ...'.format(part_num))
         conn = get_s3_connection(aws_access_key_id, aws_secret_access_key)
         bucket = conn.get_bucket(bucket_name)
         for mp in bucket.get_all_multipart_uploads():
             if mp.id == multipart_id:
                 fp = StringIO(data_to_store)
                 mp.upload_part_from_file(fp=fp, part_num=part_num, replace=True)
                 fp.close()          # Tidy up after ourselves
                 break
     except Exception, exc:
         if retries_left:
             LOGGER.exception('... Failed uploading part #{0} retries left {1}'.format(part_num, retries_left))
             _upload(retries_left=retries_left - 1)
         else:
             LOGGER.exception('... Failed uploading part #{0}'.format(part_num))
             raise exc
    def __call__(self):
        # noinspection PyBroadException
        try:
            s3_helper = S3Helper(self._aws_access_key_id, self._aws_secret_access_key)
            LOGGER.info('Copying to: {0}/{1}/measurement_set.tar'.format(self._bucket, self._bucket_location))

            # We can have 10,000 parts
            # The biggest file from Semester 1 is 803GB
            # So 100 MB
            s3_helper.add_tar_to_bucket_multipart(
                self._bucket,
                '{0}/measurement_set.tar'.format(self._bucket_location),
                self._filename,
                parallel_processes=2,
                bufsize=100*1024*1024
            )
        except Exception:
            LOGGER.exception('CopyTask died')
Exemple #37
0
def check_args(args):
    """
    Check the arguments and prompt for new ones
    """
    map_args = {}

    if args['obs_ids'] is None:
        return None
    elif len(args['obs_ids']) == 1 and args['obs_ids'][0] == '*':
        map_args['obs_ids'] = OBS_IDS.keys()
    else:
        map_args['obs_ids'] = args['obs_ids']

    if args['instance_type'] is None:
        return None

    if args['name'] is None:
        return None

    instance_details = AWS_INSTANCES.get(args['instance_type'])
    if instance_details is None:
        LOGGER.error('The instance type {0} is not supported.'.format(args['instance_type']))
        return None
    else:
        LOGGER.info(
            'instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}'.format(
                args['instance_type'],
                instance_details.vCPU,
                instance_details.memory,
                instance_details.number_disks,
                instance_details.size,
                instance_details.iops_support))

    map_args.update({
        'ami_id': args['ami_id'] if args['ami_id'] is not None else AWS_AMI_ID,
        'created_by': args['created_by'] if args['created_by'] is not None else getpass.getuser(),
        'spot_price': args['spot_price'] if args['spot_price'] is not None else None,
        'user_data': get_script(args['bash_script'] if args['bash_script'] is not None else BASH_SCRIPT_CVEL),
        'setup_disks': get_script(BASH_SCRIPT_SETUP_DISKS),
        'instance_details': instance_details,
    })

    return map_args
    def get_mime_encoded_user_data(self):
        """
        AWS allows for a multipart m
        """
        # Split the frequencies
        index_underscore = find(self._frequency_id, '_')
        index_tilde = find(self._frequency_id, '~')
        min_freq = self._frequency_id[index_underscore + 1:index_tilde]
        max_freq = self._frequency_id[index_tilde + 1:]
        LOGGER.info('min_freq: {0}, max_freq: {1}'.format(min_freq, max_freq))

        # Build the mime message
        user_data = MIMEMultipart()
        user_data.attach(get_cloud_init())

        swap_size = self.get_swap_size()
        data_formatted = self._user_data.format(self._frequency_id, swap_size, PIP_PACKAGES)
        user_data.attach(MIMEText(self._setup_disks + data_formatted))
        return user_data.as_string()
Exemple #39
0
    def get_mime_encoded_user_data(self):
        """
        AWS allows for a multipart m
        """
        # Split the frequencies
        index_underscore = find(self._frequency_id, '_')
        index_tilde = find(self._frequency_id, '~')
        min_freq = self._frequency_id[index_underscore + 1:index_tilde]
        max_freq = self._frequency_id[index_tilde + 1:]
        LOGGER.info('min_freq: {0}, max_freq: {1}'.format(min_freq, max_freq))

        # Build the mime message
        user_data = MIMEMultipart()
        user_data.attach(get_cloud_init())

        swap_size = self.get_swap_size()
        data_formatted = self._user_data.format(self._frequency_id, min_freq, max_freq, swap_size, PIP_PACKAGES)
        user_data.attach(MIMEText(self._setup_disks + data_formatted))
        return user_data.as_string()
Exemple #40
0
def check_args(args):
    """
    Check the arguments and prompt for new ones
    """
    map_args = {}

    if args["snapshots"] is None:
        return None

    if args["instance_type"] is None:
        return None

    if args["name"] is None:
        return None

    instance_details = AWS_INSTANCES.get(args["instance_type"])
    if instance_details is None:
        LOGGER.error("The instance type {0} is not supported.".format(args["instance_type"]))
        return None
    else:
        LOGGER.info(
            "instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}".format(
                args["instance_type"],
                instance_details.vCPU,
                instance_details.memory,
                instance_details.number_disks,
                instance_details.size,
                instance_details.iops_support,
            )
        )

    map_args.update(
        {
            "ami_id": args["ami_id"] if args["ami_id"] is not None else AWS_AMI_ID,
            "created_by": args["created_by"] if args["created_by"] is not None else getpass.getuser(),
            "spot_price": args["spot_price"] if args["spot_price"] is not None else None,
            "user_data": get_script(args["bash_script"] if args["bash_script"] is not None else BASH_SCRIPT_CLEAN_ALL),
            "setup_disks": get_script(BASH_SCRIPT_SETUP_DISKS),
            "instance_details": instance_details,
        }
    )
    return map_args
def start_servers(
        ami_id,
        user_data,
        setup_disks,
        instance_type,
        obs_id,
        created_by,
        name,
        instance_details,
        spot_price):

    snapshot_id = OBS_IDS.get(obs_id)
    if snapshot_id is None:
        LOGGER.warning('The obs-id: {0} does not exist in the settings file')
    else:
        ec2_helper = EC2Helper()
        iops = None
        if instance_details.iops_support:
            iops = 500

        zone = ec2_helper.get_cheapest_spot_price(instance_type, spot_price)
        if zone is not None:
            volume, snapshot_name = ec2_helper.create_volume(snapshot_id, zone, iops=iops)
            LOGGER.info('obs_id: {0}, volume_name: {1}'.format(obs_id, snapshot_name))
            now = datetime.datetime.now()
            user_data_mime = get_mime_encoded_user_data(volume.id, setup_disks, user_data, now.strftime('%Y-%m-%dT%H-%M-%S'))

            if spot_price is not None:
                ec2_helper.run_spot_instance(
                    ami_id,
                    spot_price,
                    user_data_mime,
                    instance_type,
                    volume.id,
                    created_by,
                    '{1}-{0}'.format(name, snapshot_name),
                    instance_details,
                    zone,
                    ephemeral=True)
        else:
            LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(instance_type, spot_price))
Exemple #42
0
    def run_instance(self, ami_id, user_data, instance_type, volume_id, created_by, name, zone, ephemeral=False):
        """
        Run up an instance
        """
        bdm = self.build_block_device_map(ephemeral)

        LOGGER.info('Running instance: ami: {0}'.format(ami_id))
        reservations = self.ec2_connection.run_instances(ami_id,
                                                         instance_type=instance_type,
                                                         instance_initiated_shutdown_behavior='terminate',
                                                         subnet_id=AWS_SUBNETS[zone],
                                                         key_name=AWS_KEY_NAME,
                                                         security_group_ids=AWS_SECURITY_GROUPS,
                                                         user_data=user_data,
                                                         block_device_map=bdm)
        instance = reservations.instances[0]
        time.sleep(5)

        while not instance.update() == 'running':
            LOGGER.info('Not running yet')
            time.sleep(5)

        if volume_id:
            # Now we have an instance id we can attach the disk
            self.ec2_connection.attach_volume(volume_id, instance.id, '/dev/xvdf')

        LOGGER.info('Assigning the tags')
        self.ec2_connection.create_tags([instance.id],
                                        {'AMI': '{0}'.format(ami_id),
                                         'Name': '{0}'.format(name),
                                         'Volume_id': '{0}'.format(volume_id),
                                         'Created By': '{0}'.format(created_by)})

        return instance
Exemple #43
0
def copy_files(cube):
    s3_helper = S3Helper()
    # Look in the output directory
    directory_to_save = join(CHILES_IMGCONCAT_OUTPUT, cube) + '.cube'
    if isdir(directory_to_save):
        LOGGER.info('dir_name: {0}'.format(directory_to_save))
        output_tar_filename = directory_to_save + '.tar'

        if can_be_multipart_tar(directory_to_save):
            LOGGER.info('Using add_tar_to_bucket_multipart')
            s3_helper.add_tar_to_bucket_multipart(
                CHILES_BUCKET_NAME,
                'IMGCONCAT/{0}'.format(basename(output_tar_filename)),
                directory_to_save,
                bufsize=20 * 1024 * 1024)
        else:
            LOGGER.info('Using make_tarfile, then adding file to bucket')
            make_tarfile(output_tar_filename, directory_to_save)

            s3_helper.add_file_to_bucket(
                CHILES_BUCKET_NAME,
                'IMGCONCAT/{0}'.format(basename(output_tar_filename)),
                output_tar_filename)

            # Clean up
            os.remove(output_tar_filename)
def copy_files(cube):
    s3_helper = S3Helper()
    # Look in the output directory
    directory_to_save = join(CHILES_IMGCONCAT_OUTPUT, cube) + '.cube'
    if isdir(directory_to_save):
        LOGGER.info('dir_name: {0}'.format(directory_to_save))
        output_tar_filename = directory_to_save + '.tar'

        if can_be_multipart_tar(directory_to_save):
            LOGGER.info('Using add_tar_to_bucket_multipart')
            s3_helper.add_tar_to_bucket_multipart(
                CHILES_BUCKET_NAME,
                'IMGCONCAT/{0}'.format(basename(output_tar_filename)),
                directory_to_save,
                bufsize=20 * 1024 * 1024)
        else:
            LOGGER.info('Using make_tarfile, then adding file to bucket')
            make_tarfile(output_tar_filename, directory_to_save)

            s3_helper.add_file_to_bucket(
                CHILES_BUCKET_NAME,
                'IMGCONCAT/{0}'.format(basename(output_tar_filename)),
                output_tar_filename)

            # Clean up
            os.remove(output_tar_filename)
def copy_files(s3_tag, processes):
    # Create the queue
    queue = multiprocessing.JoinableQueue()
    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    # Look in the output directory
    today = datetime.date.today()
    for root, dir_names, filenames in os.walk(CHILES_LOGS):
        for match in fnmatch.filter(filenames, '*.log'):
            LOGGER.info('Looking at: {0}'.format(join(root, match)))
            queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match)))

    for root, dir_names, filenames in os.walk(BENCHMARKING_LOGS):
        for match in fnmatch.filter(filenames, '*.csv'):
            LOGGER.info('Looking at: {0}'.format(join(root, match)))
            queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match)))
        for match in fnmatch.filter(filenames, '*.log'):
            LOGGER.info('Looking at: {0}'.format(join(root, match)))
            queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match)))

    queue.put(CopyTask('/var/log/chiles-output.log', '{0}/{1}{2:02d}{3:02d}/chiles-output.log'.format(s3_tag, today.year, today.month, today.day)))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Exemple #46
0
def start_servers(ami_id, user_data, setup_disks, instance_type, obs_id,
                  created_by, name, instance_details, spot_price):

    snapshot_id = OBS_IDS.get(obs_id)
    if snapshot_id is None:
        LOGGER.warning('The obs-id: {0} does not exist in the settings file')
    else:
        ec2_helper = EC2Helper()
        iops = None
        if instance_details.iops_support:
            iops = 500

        zone = ec2_helper.get_cheapest_spot_price(instance_type, spot_price)
        if zone is not None:
            volume, snapshot_name = ec2_helper.create_volume(snapshot_id,
                                                             zone,
                                                             iops=iops)
            LOGGER.info('obs_id: {0}, volume_name: {1}'.format(
                obs_id, snapshot_name))
            now = datetime.datetime.now()
            user_data_mime = get_mime_encoded_user_data(
                volume.id, setup_disks, user_data,
                now.strftime('%Y-%m-%dT%H-%M-%S'))

            if spot_price is not None:
                ec2_helper.run_spot_instance(ami_id,
                                             spot_price,
                                             user_data_mime,
                                             instance_type,
                                             volume.id,
                                             created_by,
                                             '{1}-{0}'.format(
                                                 name, snapshot_name),
                                             instance_details,
                                             zone,
                                             ephemeral=True)
        else:
            LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(
                instance_type, spot_price))
Exemple #47
0
 def _upload(retries_left=amount_of_retries):
     try:
         LOGGER.info('Start uploading part: #{0}, source_path: {1}'.format(
             part_num, source_path))
         conn = get_s3_connection(aws_access_key_id, aws_secret_access_key)
         bucket = conn.get_bucket(bucket_name)
         for mp in bucket.get_all_multipart_uploads():
             if mp.id == multipart_id:
                 with FileChunkIO(source_path,
                                  'r',
                                  offset=offset,
                                  bytes=bytes_to_copy) as fp:
                     mp.upload_part_from_file(fp=fp, part_num=part_num)
                 break
     except Exception, exc:
         if retries_left:
             _upload(retries_left=retries_left - 1)
         else:
             LOGGER.info(
                 'Failed uploading part: #{0}, source_path: {1}'.format(
                     part_num, source_path))
             raise exc
    def __call__(self):
        """
        Actually run the job
        """
        if self._tar_file.endswith('.tar.gz'):
            image_name = basename(self._tar_file).replace('.tar.gz', '')
        else:
            image_name = basename(self._tar_file).replace('.tar', '')
        directory = join(self._directory, image_name)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format(self._key.key, self._tar_file, directory))
            if not os.path.exists(directory):
                os.makedirs(directory)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('.tar.gz') else "r:")) as tar:
                tar.extractall(path=directory)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(directory, ignore_errors=True)
Exemple #49
0
    def add_file_to_bucket(self, bucket_name, key_name, filename, reduced_redundancy=True):
        """
        Add file to a bucket

        :param bucket_name:
        :param key_name:
        :param filename:
        """
        LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}'.format(bucket_name, key_name, filename, reduced_redundancy))
        retry_count = 0
        done = False
        while retry_count < 3 and not done:
            try:
                bucket = self.get_bucket(bucket_name)
                key = Key(bucket)
                key.key = key_name
                key.set_contents_from_filename(filename, reduced_redundancy=reduced_redundancy)
                done = True
            except socket.error:
                LOGGER.exception('Error')
                retry_count += 1
                time.sleep(10)
    def __call__(self):
        """
        Actually run the job
        """
        corrected_path = join(self._directory, self._frequency_id)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}, frequency_id: {3}'.format(
                self._key.key,
                self._tar_file,
                self._directory,
                self._frequency_id))
            if not os.path.exists(corrected_path):
                os.makedirs(corrected_path)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('tar.gz') else "r:")) as tar:
                tar.extractall(path=corrected_path)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(corrected_path, ignore_errors=True)
Exemple #51
0
    def process_entry(self, dn, entry):
        if entry['objectClass'][-1].decode("utf-8") == 'mailaccount':
            object_class = 'mailaccount'
            address = entry['cn'][0].decode("utf-8")
            if 'forwardto' in entry:
                LOGGER.debug('Skipping alias account {}'.format(address))
                return
        else:
            LOGGER.debug('Skipping object of class {}'.format(
                entry['objectClass'][-1].decode("utf-8")))
            return

        if 'lock' in entry:
            lock = entry['lock'][0].decode("utf-8")
            if lock == 'submit':
                LOGGER.info('Address {} has lock = submit!'.format(
                    address, self.mariadb_connection))
                locking_date = check_entry_mariadb(address,
                                                   self.mariadb_connection,
                                                   self.query)
                self.output_file.write(address + ' ' + object_class +
                                       ' locking date in MariaDB: ' +
                                       locking_date + '\n')
Exemple #52
0
 def _upload(retries_left=amount_of_retries):
     try:
         LOGGER.info('Start uploading part #{0} ...'.format(part_num))
         conn = get_s3_connection(aws_access_key_id, aws_secret_access_key)
         bucket = conn.get_bucket(bucket_name)
         for mp in bucket.get_all_multipart_uploads():
             if mp.id == multipart_id:
                 fp = StringIO(data_to_store)
                 mp.upload_part_from_file(fp=fp,
                                          part_num=part_num,
                                          replace=True)
                 fp.close()  # Tidy up after ourselves
                 break
     except Exception, exc:
         if retries_left:
             LOGGER.exception(
                 '... Failed uploading part #{0} retries left {1}'.format(
                     part_num, retries_left))
             _upload(retries_left=retries_left - 1)
         else:
             LOGGER.exception(
                 '... Failed uploading part #{0}'.format(part_num))
             raise exc
Exemple #53
0
    def __call__(self):
        """
        Actually run the job
        """
        # Get the name of the volume
        ec2_helper = EC2Helper()
        iops = None
        if self._instance_details.iops_support:
            iops = 500

        zone = ec2_helper.get_cheapest_spot_price(self._instance_type,
                                                  self._spot_price)
        if zone is not None:
            volume, snapshot_name = ec2_helper.create_volume(self._snapshot_id,
                                                             zone,
                                                             iops=iops)
            LOGGER.info('obs_id: {0}, volume_name: {1}'.format(
                self._obs_id, snapshot_name))
            user_data_mime = self.get_mime_encoded_user_data(volume.id)

            if self._spot_price is not None:
                ec2_helper.run_spot_instance(self._ami_id,
                                             self._spot_price,
                                             user_data_mime,
                                             self._instance_type,
                                             volume.id,
                                             self._created_by,
                                             '{1}-{2}-{0}'.format(
                                                 self._name, snapshot_name,
                                                 self._counter),
                                             self._instance_details,
                                             zone,
                                             ephemeral=True)
        else:
            LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(
                self._instance_type, self._spot_price))
Exemple #54
0
def copy_files(processes, bottom_frequency, frequency_range):
    # Create the directory
    if not exists(DIRECTORY):
        os.makedirs(DIRECTORY)

    # Scan the bucket
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    LOGGER.info('Scanning bucket: {0}/CLEAN'.format(bucket))

    # Create the queue
    queue = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    for key in bucket.list(prefix='CLEAN/'):
        LOGGER.info('Checking {0}'.format(key.key))
        # Ignore the key
        if key.key.endswith('.image.tar.gz') or key.key.endswith('.image.tar'):
            # Do we need this file?
            basename_key = basename(key.key)
            if in_frequency_range(basename_key, bottom_frequency,
                                  frequency_range):
                # Queue the copy of the file
                temp_file = os.path.join(DIRECTORY, basename_key)
                queue.put(Task(key, temp_file, DIRECTORY))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
    def __call__(self):
        """
        Actually run the job
        """
        corrected_path = join(self._directory, self._frequency_id)
        # noinspection PyBroadException
        try:
            LOGGER.info(
                'key: {0}, tar_file: {1}, directory: {2}, frequency_id: {3}'.
                format(self._key.key, self._tar_file, self._directory,
                       self._frequency_id))
            if not os.path.exists(corrected_path):
                os.makedirs(corrected_path)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(
                    tarfile.open(
                        self._tar_file, "r:gz" if
                        self._tar_file.endswith('tar.gz') else "r:")) as tar:
                tar.extractall(path=corrected_path)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(corrected_path, ignore_errors=True)
Exemple #56
0
    def __call__(self):
        """
        Actually run the job
        """
        LOGGER.info('frequency_id: {0}'.format(self._frequency_id))
        ec2_helper = EC2Helper()
        zone = ec2_helper.get_cheapest_spot_price(self._instance_type, self._spot_price)

        if zone is not None:
            user_data_mime = self.get_mime_encoded_user_data()
            LOGGER.info('{0}'.format(user_data_mime))

            ec2_helper.run_spot_instance(
                self._ami_id,
                self._spot_price,
                user_data_mime,
                self._instance_type, None,
                self._created_by,
                '{0}-{1}'.format(self._frequency_id, self._name),
                instance_details=self._instance_details,
                zone=zone,
                ephemeral=True)
        else:
            LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(self._instance_type, self._spot_price))
Exemple #57
0
def get_s3_connection(aws_access_key_id=None, aws_secret_access_key=None):
    if aws_access_key_id is not None and aws_secret_access_key is not None:
        LOGGER.info("Using user provided keys")
        return boto.connect_s3(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key)
    elif exists(join(expanduser('~'), '.aws/credentials')):
        # This relies on a ~/.aws/credentials file holding the '<aws access key>', '<aws secret key>'
        LOGGER.info("Using ~/.aws/credentials")
        return boto.connect_s3(profile_name='chiles')
    else:
        # This relies on a ~/.boto or /etc/boto.cfg file holding the '<aws access key>', '<aws secret key>'
        LOGGER.info("Using ~/.boto or /etc/boto.cfg")
        return boto.connect_s3()
Exemple #58
0
 def __init__(self, aws_access_key_id=None, aws_secret_access_key=None):
     """
     Get an EC2 connection
     """
     if aws_access_key_id is not None and aws_secret_access_key is not None:
         LOGGER.info("Using user provided keys")
         self.ec2_connection = boto.ec2.connect_to_region(AWS_REGION, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
     elif exists(join(expanduser('~'), '.aws/credentials')):
         # This relies on a ~/.aws/credentials file holding the '<aws access key>', '<aws secret key>'
         LOGGER.info("Using ~/.aws/credentials")
         self.ec2_connection = boto.ec2.connect_to_region(AWS_REGION, profile_name='chiles')
     else:
         # This relies on a ~/.boto or /etc/boto.cfg file holding the '<aws access key>', '<aws secret key>'
         LOGGER.info("Using ~/.boto or /etc/boto.cfg")
         self.ec2_connection = boto.ec2.connect_to_region(AWS_REGION)