Exemple #1
0
    def add_file_to_bucket(self,
                           bucket_name,
                           key_name,
                           filename,
                           reduced_redundancy=True):
        """
        Add file to a bucket

        :param bucket_name:
        :param key_name:
        :param filename:
        """
        LOGGER.info(
            'bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}'
            .format(bucket_name, key_name, filename, reduced_redundancy))
        retry_count = 0
        done = False
        while retry_count < 3 and not done:
            try:
                bucket = self.get_bucket(bucket_name)
                key = Key(bucket)
                key.key = key_name
                key.set_contents_from_filename(
                    filename, reduced_redundancy=reduced_redundancy)
                done = True
            except socket.error:
                LOGGER.exception('Error')
                retry_count += 1
                time.sleep(10)
 def __init__(self):
     try:
         with open(DIR_NAME + "/config.yml", 'r') as config_file:
             self.cfg = yaml.load(config_file, Loader=yaml.FullLoader)
     except:
         LOGGER.exception("Failed to open config file.", exc_info=True)
         sys.exit(1)
Exemple #3
0
    def __call__(self):
        """
        Actually run the job
        """
        if self._tar_file.endswith('.tar.gz'):
            image_name = basename(self._tar_file).replace('.tar.gz', '')
        else:
            image_name = basename(self._tar_file).replace('.tar', '')
        directory = join(self._directory, image_name)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format(
                self._key.key, self._tar_file, directory))
            if not os.path.exists(directory):
                os.makedirs(directory)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(
                    tarfile.open(
                        self._tar_file, "r:gz" if
                        self._tar_file.endswith('.tar.gz') else "r:")) as tar:
                tar.extractall(path=directory)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(directory, ignore_errors=True)
def open_mdb_connection():
    for host in MARIA_DB_CONFIGURATION['hosts']:
        try:
            connection = pymysql.connect(
                user=MARIA_DB_CONFIGURATION['user'],
                password=MARIA_DB_CONFIGURATION['password'],
                database=MARIA_DB_CONFIGURATION['database'],
                host=host,
                cursorclass=pymysql.cursors.DictCursor,
                autocommit=True,
            )
        except:
            if host == MARIA_DB_CONFIGURATION['hosts'][-1]:
                log_str = "Failed to connect to any MariaDB host"
                LOGGER.exception(log_str, exc_info=True)
                raise DBError(log_str)
            else:
                LOGGER.warning(
                    'Failed to connect to MariaDB on host {}. Trying next host.'
                    .format(host))
        else:
            if connection.open:
                LOGGER.debug(
                    'mariadb connection to host {} successful.'.format(host))
                return connection
            else:
                err_str = 'Connection to MariaDB failed.'
                LOGGER.error(err_str)
                raise DBError(err_str)
 def __call__(self):
     # noinspection PyBroadException
     try:
         LOGGER.info('Copying {0} to s3:{1}'.format(self._filename,
                                                    self._bucket_location))
         s3_helper = S3Helper()
         s3_helper.add_file_to_bucket(CHILES_BUCKET_NAME,
                                      self._bucket_location, self._filename)
     except Exception:
         LOGGER.exception('CopyTask died')
 def __call__(self):
     # noinspection PyBroadException
     try:
         LOGGER.info('Copying {0} to s3:{1}'.format(self._filename, self._bucket_location))
         s3_helper = S3Helper()
         s3_helper.add_file_to_bucket(
             CHILES_BUCKET_NAME,
             self._bucket_location,
             self._filename)
     except Exception:
         LOGGER.exception('CopyTask died')
    def __call__(self):
        # noinspection PyBroadException
        try:
            s3_helper = S3Helper(self._aws_access_key_id, self._aws_secret_access_key)
            LOGGER.info('Copying to: {0}/{1}/measurement_set.tar'.format(self._bucket, self._bucket_location))

            # We can have 10,000 parts
            # The biggest file from Semester 1 is 803GB
            # So 100 MB
            s3_helper.add_tar_to_bucket_multipart(
                self._bucket,
                '{0}/measurement_set.tar'.format(self._bucket_location),
                self._filename,
                parallel_processes=2,
                bufsize=100*1024*1024
            )
        except Exception:
            LOGGER.exception('CopyTask died')
Exemple #8
0
 def _upload(retries_left=amount_of_retries):
     try:
         LOGGER.info('Start uploading part #{0} ...'.format(part_num))
         conn = get_s3_connection(aws_access_key_id, aws_secret_access_key)
         bucket = conn.get_bucket(bucket_name)
         for mp in bucket.get_all_multipart_uploads():
             if mp.id == multipart_id:
                 fp = StringIO(data_to_store)
                 mp.upload_part_from_file(fp=fp, part_num=part_num, replace=True)
                 fp.close()          # Tidy up after ourselves
                 break
     except Exception, exc:
         if retries_left:
             LOGGER.exception('... Failed uploading part #{0} retries left {1}'.format(part_num, retries_left))
             _upload(retries_left=retries_left - 1)
         else:
             LOGGER.exception('... Failed uploading part #{0}'.format(part_num))
             raise exc
    def __call__(self):
        """
        Actually run the job
        """
        if self._tar_file.endswith('.tar.gz'):
            image_name = basename(self._tar_file).replace('.tar.gz', '')
        else:
            image_name = basename(self._tar_file).replace('.tar', '')
        directory = join(self._directory, image_name)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format(self._key.key, self._tar_file, directory))
            if not os.path.exists(directory):
                os.makedirs(directory)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('.tar.gz') else "r:")) as tar:
                tar.extractall(path=directory)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(directory, ignore_errors=True)
    def __call__(self):
        """
        Actually run the job
        """
        corrected_path = join(self._directory, self._frequency_id)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}, frequency_id: {3}'.format(
                self._key.key,
                self._tar_file,
                self._directory,
                self._frequency_id))
            if not os.path.exists(corrected_path):
                os.makedirs(corrected_path)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('tar.gz') else "r:")) as tar:
                tar.extractall(path=corrected_path)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(corrected_path, ignore_errors=True)
Exemple #11
0
    def add_file_to_bucket(self, bucket_name, key_name, filename, reduced_redundancy=True):
        """
        Add file to a bucket

        :param bucket_name:
        :param key_name:
        :param filename:
        """
        LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}'.format(bucket_name, key_name, filename, reduced_redundancy))
        retry_count = 0
        done = False
        while retry_count < 3 and not done:
            try:
                bucket = self.get_bucket(bucket_name)
                key = Key(bucket)
                key.key = key_name
                key.set_contents_from_filename(filename, reduced_redundancy=reduced_redundancy)
                done = True
            except socket.error:
                LOGGER.exception('Error')
                retry_count += 1
                time.sleep(10)
Exemple #12
0
 def _upload(retries_left=amount_of_retries):
     try:
         LOGGER.info('Start uploading part #{0} ...'.format(part_num))
         conn = get_s3_connection(aws_access_key_id, aws_secret_access_key)
         bucket = conn.get_bucket(bucket_name)
         for mp in bucket.get_all_multipart_uploads():
             if mp.id == multipart_id:
                 fp = StringIO(data_to_store)
                 mp.upload_part_from_file(fp=fp,
                                          part_num=part_num,
                                          replace=True)
                 fp.close()  # Tidy up after ourselves
                 break
     except Exception, exc:
         if retries_left:
             LOGGER.exception(
                 '... Failed uploading part #{0} retries left {1}'.format(
                     part_num, retries_left))
             _upload(retries_left=retries_left - 1)
         else:
             LOGGER.exception(
                 '... Failed uploading part #{0}'.format(part_num))
             raise exc
    def __call__(self):
        """
        Actually run the job
        """
        corrected_path = join(self._directory, self._frequency_id)
        # noinspection PyBroadException
        try:
            LOGGER.info(
                'key: {0}, tar_file: {1}, directory: {2}, frequency_id: {3}'.
                format(self._key.key, self._tar_file, self._directory,
                       self._frequency_id))
            if not os.path.exists(corrected_path):
                os.makedirs(corrected_path)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(
                    tarfile.open(
                        self._tar_file, "r:gz" if
                        self._tar_file.endswith('tar.gz') else "r:")) as tar:
                tar.extractall(path=corrected_path)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(corrected_path, ignore_errors=True)
Exemple #14
0
    def run_spot_instance(
            self,
            ami_id,
            spot_price,
            user_data,
            instance_type,
            volume_id,
            created_by,
            name,
            instance_details,
            zone,
            ephemeral=False,
            ebs_size=None,
            number_ebs_volumes=None,
            bdm=None):
        """
        Run the ami as a spot instance
        """
        subnet_id = AWS_SUBNETS[zone]
        now_plus = datetime.datetime.utcnow() + datetime.timedelta(minutes=5)
        if bdm is None:
            bdm = self.build_block_device_map(
                ephemeral,
                instance_details.number_disks,
                ebs_size=ebs_size,
                iops=instance_details.iops_support,
                number_ebs_volumes=number_ebs_volumes)
        spot_request = self.ec2_connection.request_spot_instances(
            spot_price,
            image_id=ami_id,
            count=1,
            valid_until=now_plus.isoformat(),
            instance_type=instance_type,
            subnet_id=subnet_id,
            key_name=AWS_KEY_NAME,
            ebs_optimized=True if instance_details.iops_support else False,
            security_group_ids=AWS_SECURITY_GROUPS,
            user_data=user_data,
            block_device_map=bdm)

        # Wait for EC2 to provision the instance
        time.sleep(10)
        instance_id = None
        error_count = 0

        # Has it been provisioned yet - we allow 3 errors before aborting
        while instance_id is None and error_count < 3:
            spot_request_id = spot_request[0].id
            requests = None
            try:
                requests = self.ec2_connection.get_all_spot_instance_requests(request_ids=[spot_request_id])
            except EC2ResponseError:
                LOGGER.exception('Error count = {0}'.format(error_count))
                error_count += 1

            if requests is None:
                # Wait for AWS to catch up
                time.sleep(10)
            else:
                LOGGER.info('{0}, state: {1}, status:{2}'.format(spot_request_id, requests[0].state, requests[0].status))
                if requests[0].state == 'active' and requests[0].status.code == 'fulfilled':
                    instance_id = requests[0].instance_id
                elif requests[0].state == 'cancelled':
                    raise CancelledException('Request {0} cancelled. Status: {1}'.format(spot_request_id, requests[0].status))
                elif requests[0].state == 'failed':
                    raise CancelledException('Request {0} failed. Status: {1}. Fault: {2}'.format(spot_request_id, requests[0].status, requests[0].fault))
                else:
                    time.sleep(10)

        reservations = self.ec2_connection.get_all_instances(instance_ids=[instance_id])
        instance = reservations[0].instances[0]

        LOGGER.info('Waiting to start up')
        while not instance.update() == 'running':
            LOGGER.info('Not running yet')
            time.sleep(5)

        if volume_id:
            LOGGER.info('Attaching {0}'.format(volume_id))
            # When we have an instance id we can attach the volume
            self.ec2_connection.attach_volume(volume_id, instance_id, '/dev/xvdf')

        # Give it time to settle down
        LOGGER.info('Assigning the tags')
        self.ec2_connection.create_tags(
            [instance_id],
            {
                'AMI': '{0}'.format(ami_id),
                'Name': '{0}'.format(name),
                'Volume_id': '{0}'.format(volume_id),
                'Created By': '{0}'.format(created_by)
            })

        return instance
Exemple #15
0
    def run_spot_instance(self,
                          ami_id,
                          spot_price,
                          user_data,
                          instance_type,
                          volume_id,
                          created_by,
                          name,
                          instance_details,
                          zone,
                          ephemeral=False,
                          ebs_size=None,
                          number_ebs_volumes=None,
                          bdm=None):
        """
        Run the ami as a spot instance
        """
        subnet_id = AWS_SUBNETS[zone]
        now_plus = datetime.datetime.utcnow() + datetime.timedelta(minutes=5)
        if bdm is None:
            bdm = self.build_block_device_map(
                ephemeral,
                instance_details.number_disks,
                ebs_size=ebs_size,
                iops=instance_details.iops_support,
                number_ebs_volumes=number_ebs_volumes)
        spot_request = self.ec2_connection.request_spot_instances(
            spot_price,
            image_id=ami_id,
            count=1,
            valid_until=now_plus.isoformat(),
            instance_type=instance_type,
            subnet_id=subnet_id,
            key_name=AWS_KEY_NAME,
            ebs_optimized=True if instance_details.iops_support else False,
            security_group_ids=AWS_SECURITY_GROUPS,
            user_data=user_data,
            block_device_map=bdm)

        # Wait for EC2 to provision the instance
        time.sleep(10)
        instance_id = None
        error_count = 0

        # Has it been provisioned yet - we allow 3 errors before aborting
        while instance_id is None and error_count < 3:
            spot_request_id = spot_request[0].id
            requests = None
            try:
                requests = self.ec2_connection.get_all_spot_instance_requests(
                    request_ids=[spot_request_id])
            except EC2ResponseError:
                LOGGER.exception('Error count = {0}'.format(error_count))
                error_count += 1

            if requests is None:
                # Wait for AWS to catch up
                time.sleep(10)
            else:
                LOGGER.info('{0}, state: {1}, status:{2}'.format(
                    spot_request_id, requests[0].state, requests[0].status))
                if requests[0].state == 'active' and requests[
                        0].status.code == 'fulfilled':
                    instance_id = requests[0].instance_id
                elif requests[0].state == 'cancelled':
                    raise CancelledException(
                        'Request {0} cancelled. Status: {1}'.format(
                            spot_request_id, requests[0].status))
                elif requests[0].state == 'failed':
                    raise CancelledException(
                        'Request {0} failed. Status: {1}. Fault: {2}'.format(
                            spot_request_id, requests[0].status,
                            requests[0].fault))
                else:
                    time.sleep(10)

        reservations = self.ec2_connection.get_all_instances(
            instance_ids=[instance_id])
        instance = reservations[0].instances[0]

        LOGGER.info('Waiting to start up')
        while not instance.update() == 'running':
            LOGGER.info('Not running yet')
            time.sleep(5)

        if volume_id:
            LOGGER.info('Attaching {0}'.format(volume_id))
            # When we have an instance id we can attach the volume
            self.ec2_connection.attach_volume(volume_id, instance_id,
                                              '/dev/xvdf')

        # Give it time to settle down
        LOGGER.info('Assigning the tags')
        self.ec2_connection.create_tags(
            [instance_id], {
                'AMI': '{0}'.format(ami_id),
                'Name': '{0}'.format(name),
                'Volume_id': '{0}'.format(volume_id),
                'Created By': '{0}'.format(created_by)
            })

        return instance