Ejemplo n.º 1
0
    def add_file_to_bucket_multipart(self, bucket_name, key_name, source_path, parallel_processes=2, reduced_redundancy=True):
        """
        Parallel multipart upload.
        """
        LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, parallel_processes: {3}, reduced_redundancy: {4}'.format(
            bucket_name, key_name, source_path, parallel_processes, reduced_redundancy))

        source_size = os.stat(source_path).st_size
        bytes_per_chunk = 10 * 1024 * 1024
        chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))
        if chunk_amount < 10000:
            bucket = self.get_bucket(bucket_name)

            headers = {'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream'}
            mp = bucket.initiate_multipart_upload(key_name, headers=headers, reduced_redundancy=reduced_redundancy)

            LOGGER.info('bytes_per_chunk: {0}, chunk_amount: {1}'.format(bytes_per_chunk, chunk_amount))

            # You can only upload 10,000 chunks
            pool = Pool(processes=parallel_processes)
            for i in range(chunk_amount):
                offset = i * bytes_per_chunk
                remaining_bytes = source_size - offset
                bytes_to_copy = min([bytes_per_chunk, remaining_bytes])
                part_num = i + 1
                pool.apply_async(upload_part, [self._aws_access_key_id, self._aws_secret_access_key, bucket_name, mp.id, part_num, source_path, offset, bytes_to_copy])
            pool.close()
            pool.join()

            if len(mp.get_all_parts()) == chunk_amount:
                mp.complete_upload()
            else:
                mp.cancel_upload()
        else:
            raise S3UploadException('Too many chunks')
Ejemplo n.º 2
0
    def __call__(self):
        """
        Actually run the job
        """
        # Get the name of the volume
        ec2_helper = EC2Helper()
        iops = None
        if self._instance_details.iops_support:
            iops = 500

        zone = ec2_helper.get_cheapest_spot_price(self._instance_type, self._spot_price)
        if zone is not None:
            volume, snapshot_name = ec2_helper.create_volume(self._snapshot_id, zone, iops=iops)
            LOGGER.info('obs_id: {0}, volume_name: {1}'.format(self._obs_id, snapshot_name))
            user_data_mime = self.get_mime_encoded_user_data(volume.id)

            if self._spot_price is not None:
                ec2_helper.run_spot_instance(
                    self._ami_id,
                    self._spot_price,
                    user_data_mime,
                    self._instance_type,
                    volume.id,
                    self._created_by,
                    '{1}-{2}-{0}'.format(self._name, snapshot_name, self._counter),
                    self._instance_details,
                    zone,
                    ephemeral=True)
        else:
            LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(self._instance_type, self._spot_price))
Ejemplo n.º 3
0
 def __init__(self):
     try:
         with open(DIR_NAME + "/config.yml", 'r') as config_file:
             self.cfg = yaml.load(config_file, Loader=yaml.FullLoader)
     except:
         LOGGER.exception("Failed to open config file.", exc_info=True)
         sys.exit(1)
def copy_files(args):
    # Create the queue
    queue = multiprocessing.JoinableQueue()
    # Start the consumers
    for x in range(PROCESSES):
        consumer = Consumer(queue)
        consumer.start()

    # Look in the output directory
    for root, dir_names, filenames in os.walk(args.product_dir):
        LOGGER.debug('root: {0}, dir_names: {1}, filenames: {2}'.format(
            root, dir_names, filenames))
        for match in fnmatch.filter(dir_names,
                                    '13B-266*calibrated_deepfield.ms'):
            result_dir = join(root, match)
            LOGGER.info('Queuing result_dir: {0}'.format(result_dir))

            queue.put(
                CopyTask(args.bucket, match, result_dir,
                         args.aws_access_key_id, args.aws_secret_access_key))

    # Add a poison pill to shut things down
    for x in range(PROCESSES):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Ejemplo n.º 5
0
 def _upload(retries_left=amount_of_retries):
     try:
         LOGGER.info(
             'Start uploading part: #{0}, source_path: {1}'.format(
                 part_num,
                 source_path
             )
         )
         conn = get_s3_connection(aws_access_key_id, aws_secret_access_key)
         bucket = conn.get_bucket(bucket_name)
         for mp in bucket.get_all_multipart_uploads():
             if mp.id == multipart_id:
                 with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes_to_copy) as fp:
                     mp.upload_part_from_file(fp=fp, part_num=part_num)
                 break
     except Exception, exc:
         if retries_left:
             _upload(retries_left=retries_left - 1)
         else:
             LOGGER.info(
                 'Failed uploading part: #{0}, source_path: {1}'.format(
                     part_num,
                     source_path
                 )
             )
             raise exc
Ejemplo n.º 6
0
def copy_files(processes, bottom_frequency, frequency_range):
    # Create the directory
    if not exists(DIRECTORY):
        os.makedirs(DIRECTORY)

    # Scan the bucket
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    LOGGER.info('Scanning bucket: {0}/CLEAN'.format(bucket))

    # Create the queue
    queue = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    for key in bucket.list(prefix='CLEAN/'):
        LOGGER.info('Checking {0}'.format(key.key))
        # Ignore the key
        if key.key.endswith('.image.tar.gz') or key.key.endswith('.image.tar'):
            # Do we need this file?
            basename_key = basename(key.key)
            if in_frequency_range(basename_key, bottom_frequency, frequency_range):
                # Queue the copy of the file
                temp_file = os.path.join(DIRECTORY, basename_key)
                queue.put(Task(key, temp_file, DIRECTORY))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Ejemplo n.º 7
0
def start_servers(processes, ami_id, user_data, setup_disks, instance_type,
                  obs_ids, created_by, name, instance_details, spot_price,
                  frequency_channels, force):
    cvel_data = get_cvel()

    # Create the queue
    tasks = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(tasks)
        consumer.start()

    counter = 1
    for obs_id in obs_ids:
        snapshot_id = OBS_IDS.get(obs_id)
        if snapshot_id is None:
            LOGGER.warning(
                'The obs-id: {0} does not exist in the settings file')
        else:
            obs_id_dashes = obs_id.replace('_', '-')
            for frequency_groups in get_frequency_groups(
                    frequency_channels, obs_id_dashes, cvel_data, force):
                tasks.put(
                    Task(ami_id, user_data, setup_disks, instance_type, obs_id,
                         snapshot_id, created_by, name, spot_price,
                         instance_details, frequency_groups, counter))
                counter += 1

        # Add a poison pill to shut things down
    for x in range(processes):
        tasks.put(None)

    # Wait for the queue to terminate
    tasks.join()
Ejemplo n.º 8
0
 def process_entry(self, dn, entry):
     try:
         address = entry['cn'][0].decode("utf-8")
     except KeyError:
         try:
             address = entry['dn'][0].decode("utf-8")
         except KeyError:
             return
     else:
         for field in entry:
             if self.handler.check_field(field) is False:
                 continue
             for value in entry[field]:
                 try:
                     decode_value = value.decode("utf-8")
                 except UnicodeDecodeError:
                     decode_value = value.decode('latin-1')
                 try:
                     if self.handler.execute(decode_value) is False:
                         continue
                 except IndexError:
                     continue
                 else:
                     LOGGER.debug(
                         'Address {} has wrong field: {}: {}'.format(
                             address, field, decode_value))
                     self.output_file.write(address + ' ' + field + ': ' +
                                            decode_value + '\n')
Ejemplo n.º 9
0
def create_users(emails):
    failed = []
    users = []

    for email in emails:
        username = email.split('@')[0]

        data = {
            'username': username,
            'password': config['default_password'],
            'email': email
        }

        dumped = json.dumps(data, ensure_ascii=False).encode('utf-8')

        resp = requests.post(
            url="{}/register/".format(base_url),
            data=dumped,
            headers={'Content-Type': 'application/json;charset=UTF-8'})

        if resp.status_code == 400:
            failed.append(email)
            LOGGER.error(
                "Failed to create account for email {0}. Response: {1}".format(
                    email, str(resp.json())))

        if resp.status_code == 201:
            users.append(resp.json())
            LOGGER.info("Account successfully created.")

    return users, failed
Ejemplo n.º 10
0
def create_posts(user):
    access_token, refresh_token = obtain_token_pair(user['username'], config['default_password'])

    num_posts = random.randint(1, config['max_number_of_posts'])

    headers = {
        'Authorization': 'Bearer {}'.format(access_token),
    }

    for i in range(num_posts):
        title = "{0} {1}".format(user['username'], str(i))
        content = "{0} says: {1}".format(user['username'], config['post_content'])

        payload = {
            "title": title,
            "content": content
        }

        resp = requests.post(url="{}/posts/".format(base_url), json=payload, headers=headers)

        if resp.status_code == 401 and resp.json()['code'] == 'token_not_valid':
            LOGGER.warning("{}. Obtaining new token pair...".format(resp.json()['messages'][0]['message']))
            access_token, refresh_token = obtain_token_pair(user['username'], config['default_password'])
            headers = {
                'Authorization': 'Bearer {}'.format(access_token)
            }
            resp = requests.post(url="{}/posts/".format(base_url), json=payload, headers=headers)

        if resp.status_code != 201:
            LOGGER.error("Failed to create post. Response code: {}".format(resp.status_code))
def copy_files(frequency_id, processes, days):
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    LOGGER.info('Scanning bucket: {0}, frequency_id: {1}'.format(bucket, frequency_id))

    # Create the queue
    queue = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    for key in bucket.list(prefix='CVEL/{0}'.format(frequency_id)):
        LOGGER.info('Checking {0}'.format(key.key))
        # Ignore the key
        if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'):
            elements = key.key.split('/')
            if elements[2] in days:
                directory = '/mnt/output/Chiles/split_vis/{0}/'.format(elements[2])

                # Queue the copy of the file
                temp_file = os.path.join(directory, 'data.tar.gz' if key.key.endswith('/data.tar.gz') else 'data.tar')
                queue.put(Task(key, temp_file, directory, frequency_id))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser('Start a number of CLEAN servers')
    parser.add_argument('-a', '--ami_id', help='the AMI id to use')
    parser.add_argument('-i', '--instance_type', required=True, help='the instance type to use')
    parser.add_argument('-c', '--created_by', help='the username to use')
    parser.add_argument('-n', '--name', required=True, help='the instance name to use')
    parser.add_argument('-s', '--spot_price', type=float, help='the spot price to use')
    parser.add_argument('-b', '--bash_script', help='the bash script to use')
    parser.add_argument('-p', '--processes', type=int, default=1, help='the number of processes to run')
    parser.add_argument('frequencies', nargs='+', help='the frequencies to use (vis_14XX~14YY')

    args = vars(parser.parse_args())

    corrected_args = check_args(args)
    if corrected_args is None:
        LOGGER.error('The arguments are incorrect: {0}'.format(args))
    else:
        start_servers(
            args['processes'],
            corrected_args['ami_id'],
            corrected_args['user_data'],
            corrected_args['setup_disks'],
            args['instance_type'],
            args['frequencies'],
            corrected_args['created_by'],
            args['name'],
            corrected_args['instance_details'],
            corrected_args['spot_price'])
Ejemplo n.º 13
0
    def add_file_to_bucket(self,
                           bucket_name,
                           key_name,
                           filename,
                           reduced_redundancy=True):
        """
        Add file to a bucket

        :param bucket_name:
        :param key_name:
        :param filename:
        """
        LOGGER.info(
            'bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}'
            .format(bucket_name, key_name, filename, reduced_redundancy))
        retry_count = 0
        done = False
        while retry_count < 3 and not done:
            try:
                bucket = self.get_bucket(bucket_name)
                key = Key(bucket)
                key.key = key_name
                key.set_contents_from_filename(
                    filename, reduced_redundancy=reduced_redundancy)
                done = True
            except socket.error:
                LOGGER.exception('Error')
                retry_count += 1
                time.sleep(10)
Ejemplo n.º 14
0
def main():
    parser = argparse.ArgumentParser("Start a number of CLEAN servers")
    parser.add_argument("-a", "--ami_id", help="the AMI id to use")
    parser.add_argument("-i", "--instance_type", required=True, help="the instance type to use")
    parser.add_argument("-c", "--created_by", help="the username to use")
    parser.add_argument("-n", "--name", required=True, help="the instance name to use")
    parser.add_argument("-s", "--spot_price", type=float, help="the spot price to use")
    parser.add_argument("-b", "--bash_script", help="the bash script to use")
    parser.add_argument("-p", "--processes", type=int, default=1, help="the number of processes to run")
    parser.add_argument("snapshots", nargs="+", help="the snapshots to use")

    args = vars(parser.parse_args())

    corrected_args = check_args(args)
    if corrected_args is None:
        LOGGER.error("The arguments are incorrect: {0}".format(args))
    else:
        start_server(
            args["processes"],
            corrected_args["ami_id"],
            corrected_args["user_data"],
            corrected_args["setup_disks"],
            args["instance_type"],
            args["snapshots"],
            corrected_args["created_by"],
            args["name"],
            corrected_args["instance_details"],
            corrected_args["spot_price"],
        )
Ejemplo n.º 15
0
def detect_wrong_format(pattern_dict: dict, dump_file):
    try:
        type = pattern_dict['type']
    except KeyError:
        LOGGER.error("Introduce a type in the configuration file!")
        sys.exit(1)
    try:
        regex = pattern_dict['regex']
    except KeyError:
        LOGGER.error("Introduce if the pattern is a regex in the configuration file!")
        sys.exit(1)

    if regex:
        handler = PatternHandlerFactoryRegEx(pattern_dict)
    else:
        handler = PatternHandlerFactoryPattern(pattern_dict)

    if dump_file.endswith('.gz'):
        input_file = gzip.open(dump_file, 'rb')
    else:
        input_file = open(dump_file, 'rb')

    output_file = open('ldap_wrong_format.txt', 'w')
    processing_object = processing_object_builder(type, output_file, handler)
    parser = ldap_parser.ParseLDIF(input_file, processing_object)
    parser.parse()
Ejemplo n.º 16
0
def copy_files(frequency_id):
    s3_helper = S3Helper()
    # Look in the output directory
    LOGGER.info('directory_data: {0}'.format(CHILES_CLEAN_OUTPUT))
    for dir_name in os.listdir(CHILES_CLEAN_OUTPUT):
        LOGGER.info('dir_name: {0}'.format(dir_name))
        result_dir = join(CHILES_CLEAN_OUTPUT, dir_name)
        if isdir(result_dir) and dir_name.startswith('cube_') and dir_name.endswith('.image'):
            LOGGER.info('dir_name: {0}'.format(dir_name))
            output_tar_filename = join(CHILES_CLEAN_OUTPUT, dir_name + '.tar')

            if can_be_multipart_tar(result_dir):
                LOGGER.info('Using add_tar_to_bucket_multipart')
                s3_helper.add_tar_to_bucket_multipart(
                    CHILES_BUCKET_NAME,
                    '/CLEAN/{0}/{1}'.format(frequency_id, basename(output_tar_filename)),
                    result_dir)
            else:
                LOGGER.info('Using make_tarfile, then adding file to bucket')
                make_tarfile(output_tar_filename, result_dir)

                s3_helper.add_file_to_bucket(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(frequency_id, basename(output_tar_filename)),
                    output_tar_filename)

                # Clean up
                os.remove(output_tar_filename)
Ejemplo n.º 17
0
    def start_pong(self, xxx_id: int) -> None:
        global g_processing_xxx_id
        r = get_redis_client()
        processing_key = (f"{PROJECT_NAME}.{APP_NAME}.xxx.is_processing.{xxx_id}")
        if r.get(processing_key) == '1':
            LOGGER.info(f"ignore processing xxx {xxx_id}")
            return
        r.set(processing_key, '1', ex=30)
        g_processing_xxx_id = xxx_id

        list(range(10000))

        def start() -> None:
            LOGGER.info(f"start_work with {xxx_id}")
            list(range(20000))
            try:
                while True:
                    list(range(3000))
                    self.dispatch("processed", {
                        'xxx_id': xxx_id,
                        'info': {},
                    })
                    r.expire(processing_key, 3)
                    time.sleep(1)
            finally:
                r.delete(processing_key)
Ejemplo n.º 18
0
def main():
    parser = argparse.ArgumentParser('Start a number of CLEAN servers')
    parser.add_argument('-a', '--ami_id', help='the AMI id to use')
    parser.add_argument('-i', '--instance_type', required=True, help='the instance type to use')
    parser.add_argument('-c', '--created_by', help='the username to use')
    parser.add_argument('-n', '--name', required=True, help='the instance name to use')
    parser.add_argument('-s', '--spot_price', type=float, help='the spot price to use')
    parser.add_argument('-b', '--bash_script', help='the bash script to use')
    parser.add_argument('-e', '--ebs', type=int, help='the size in GB of any EBS volume')
    parser.add_argument('bottom_frequency', help='The bottom frequency')
    parser.add_argument('frequency_range', help='the range of frequencies')
    parser.add_argument('obs_id', help='the observation id')

    args = vars(parser.parse_args())

    corrected_args = check_args(args)
    if corrected_args is None:
        LOGGER.error('The arguments are incorrect: {0}'.format(args))
    else:
        start_servers(
            corrected_args['ami_id'],
            corrected_args['user_data'],
            corrected_args['setup_disks'],
            args['instance_type'],
            make_safe_filename(args['obs_id']),
            corrected_args['created_by'],
            args['name'],
            corrected_args['instance_details'],
            corrected_args['spot_price'],
            args['ebs'],
            args['bottom_frequency'],
            args['frequency_range'])
Ejemplo n.º 19
0
def main():
    parser = argparse.ArgumentParser('Start a number of CVEL servers')
    parser.add_argument('-a', '--ami_id', help='the AMI id to use')
    parser.add_argument('-i', '--instance_type', required=True, help='the instance type to use')
    parser.add_argument('-c', '--created_by', help='the username to use')
    parser.add_argument('-n', '--name', required=True, help='the instance name to use')
    parser.add_argument('-s', '--spot_price', type=float, help='the spot price to use')
    parser.add_argument('-b', '--bash_script', help='the bash script to use')
    parser.add_argument('-p', '--processes', type=int, default=1, help='the number of processes to run')
    parser.add_argument('-fc', '--frequency_channels', type=int, default=28, help='how many frequency channels per AWS instance')
    parser.add_argument('--force', action='store_true', default=False, help='proceed with a frequency band even if we already have it')

    parser.add_argument('obs_ids', nargs='+', help='the ids of the observation')

    args = vars(parser.parse_args())

    corrected_args = check_args(args)
    if corrected_args is None:
        LOGGER.error('The arguments are incorrect: {0}'.format(args))
    else:
        start_servers(
            args['processes'],
            corrected_args['ami_id'],
            corrected_args['user_data'],
            corrected_args['setup_disks'],
            args['instance_type'],
            corrected_args['obs_ids'],
            corrected_args['created_by'],
            args['name'],
            corrected_args['instance_details'],
            corrected_args['spot_price'],
            args['frequency_channels'],
            args['force'])
Ejemplo n.º 20
0
def copy_files(frequency_id):
    s3_helper = S3Helper()
    # Look in the output directory
    LOGGER.info('directory_data: {0}'.format(CHILES_CLEAN_OUTPUT))
    for dir_name in os.listdir(CHILES_CLEAN_OUTPUT):
        LOGGER.info('dir_name: {0}'.format(dir_name))
        result_dir = join(CHILES_CLEAN_OUTPUT, dir_name)
        if isdir(result_dir) and dir_name.startswith(
                'cube_') and dir_name.endswith('.image'):
            LOGGER.info('dir_name: {0}'.format(dir_name))
            output_tar_filename = join(CHILES_CLEAN_OUTPUT, dir_name + '.tar')

            if can_be_multipart_tar(result_dir):
                LOGGER.info('Using add_tar_to_bucket_multipart')
                s3_helper.add_tar_to_bucket_multipart(
                    CHILES_BUCKET_NAME,
                    '/CLEAN/{0}/{1}'.format(frequency_id,
                                            basename(output_tar_filename)),
                    result_dir)
            else:
                LOGGER.info('Using make_tarfile, then adding file to bucket')
                make_tarfile(output_tar_filename, result_dir)

                s3_helper.add_file_to_bucket(
                    CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(
                        frequency_id, basename(output_tar_filename)),
                    output_tar_filename)

                # Clean up
                os.remove(output_tar_filename)
Ejemplo n.º 21
0
def copy_files(args):
    # Create the queue
    queue = multiprocessing.JoinableQueue()
    # Start the consumers
    for x in range(PROCESSES):
        consumer = Consumer(queue)
        consumer.start()

    # Look in the output directory
    for root, dir_names, filenames in os.walk(args.product_dir):
        LOGGER.debug('root: {0}, dir_names: {1}, filenames: {2}'.format(root, dir_names, filenames))
        for match in fnmatch.filter(dir_names, '13B-266*calibrated_deepfield.ms'):
            result_dir = join(root, match)
            LOGGER.info('Queuing result_dir: {0}'.format(result_dir))

            queue.put(
                CopyTask(
                    args.bucket,
                    match,
                    result_dir,
                    args.aws_access_key_id,
                    args.aws_secret_access_key
                )
            )

    # Add a poison pill to shut things down
    for x in range(PROCESSES):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Ejemplo n.º 22
0
    def __call__(self):
        """
        Actually run the job
        """
        if self._tar_file.endswith('.tar.gz'):
            image_name = basename(self._tar_file).replace('.tar.gz', '')
        else:
            image_name = basename(self._tar_file).replace('.tar', '')
        directory = join(self._directory, image_name)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format(
                self._key.key, self._tar_file, directory))
            if not os.path.exists(directory):
                os.makedirs(directory)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(
                    tarfile.open(
                        self._tar_file, "r:gz" if
                        self._tar_file.endswith('.tar.gz') else "r:")) as tar:
                tar.extractall(path=directory)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(directory, ignore_errors=True)
def copy_files(frequency_id, processes, days):
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    LOGGER.info('Scanning bucket: {0}, frequency_id: {1}'.format(
        bucket, frequency_id))

    # Create the queue
    queue = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    for key in bucket.list(prefix='CVEL/{0}'.format(frequency_id)):
        LOGGER.info('Checking {0}'.format(key.key))
        # Ignore the key
        if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'):
            elements = key.key.split('/')
            if elements[2] in days:
                directory = '/mnt/output/Chiles/split_vis/{0}/'.format(
                    elements[2])

                # Queue the copy of the file
                temp_file = os.path.join(
                    directory, 'data.tar.gz'
                    if key.key.endswith('/data.tar.gz') else 'data.tar')
                queue.put(Task(key, temp_file, directory, frequency_id))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Ejemplo n.º 24
0
def processing_object_builder(type, output_file, handler):
    if type == 'general':
        return ProblemsDetectorGeneral(output_file, handler)
    elif type == 'lock_submit':
        return SubmitLockDetector(output_file, handler, mariadb_connection=open_mdb_connection())
    else:
        LOGGER.error("Introduce a defined type in the configuration file!")
        sys.exit(1)
Ejemplo n.º 25
0
 def __init__(self, output_file, handler: PatternHandlerFactory,
              mariadb_connection):
     super().__init__(output_file, handler)
     self.mariadb_connection = mariadb_connection
     try:
         self.query = self.handler.pattern_dict['mariadb_query']
     except KeyError:
         LOGGER.error('Provide a MariaDB query!')
         sys.exit(1)
Ejemplo n.º 26
0
 def __call__(self):
     # noinspection PyBroadException
     try:
         LOGGER.info('Copying {0} to s3:{1}'.format(self._filename,
                                                    self._bucket_location))
         s3_helper = S3Helper()
         s3_helper.add_file_to_bucket(CHILES_BUCKET_NAME,
                                      self._bucket_location, self._filename)
     except Exception:
         LOGGER.exception('CopyTask died')
Ejemplo n.º 27
0
    def add_tar_to_bucket_multipart(self,
                                    bucket_name,
                                    key_name,
                                    source_path,
                                    gzip=False,
                                    parallel_processes=2,
                                    reduced_redundancy=True,
                                    bufsize=10 * 1024 * 1024):
        """
        Parallel multipart upload.
        """
        LOGGER.info(
            'bucket_name: {0}, key_name: {1}, source_path: {2}, parallel_processes: {3}, reduced_redundancy: {4}, bufsize: {5}'
            .format(bucket_name, key_name, source_path, parallel_processes,
                    reduced_redundancy, bufsize))
        bucket = self.get_bucket(bucket_name)

        headers = {
            'Content-Type':
            mimetypes.guess_type(key_name)[0] or 'application/octet-stream'
        }
        mp = bucket.initiate_multipart_upload(
            key_name, headers=headers, reduced_redundancy=reduced_redundancy)
        s3_feeder = S3Feeder(mp.id, bufsize, bucket_name, parallel_processes,
                             self._aws_access_key_id,
                             self._aws_secret_access_key)

        if gzip:
            mode = "w|gz"
        else:
            mode = "w|"
        tar = tarfile.open(mode=mode,
                           fileobj=s3_feeder,
                           bufsize=int(bufsize / 10))

        complete = True
        # noinspection PyBroadException
        try:
            for entry in os.listdir(source_path):
                full_filename = join(source_path, entry)
                LOGGER.info('tar: [full_filename: {0}, entry: {1}]'.format(
                    full_filename, entry))
                tar.add(full_filename, arcname=entry)

            tar.close()
            s3_feeder.close()
        except Exception:
            complete = False
            s3_feeder.close()

        # Finish the upload
        if complete:
            mp.complete_upload()
        else:
            mp.cancel_upload()
Ejemplo n.º 28
0
def start_servers(
        processes,
        ami_id,
        user_data,
        setup_disks,
        instance_type,
        obs_ids,
        created_by,
        name,
        instance_details,
        spot_price,
        frequency_channels,
        force):
    cvel_data = get_cvel()

    # Create the queue
    tasks = multiprocessing.JoinableQueue()

    # Start the consumers
    for x in range(processes):
        consumer = Consumer(tasks)
        consumer.start()

    counter = 1
    for obs_id in obs_ids:
        snapshot_id = OBS_IDS.get(obs_id)
        if snapshot_id is None:
            LOGGER.warning('The obs-id: {0} does not exist in the settings file')
        else:
            obs_id_dashes = obs_id.replace('_', '-')
            for frequency_groups in get_frequency_groups(frequency_channels, obs_id_dashes, cvel_data, force):
                tasks.put(
                    Task(
                        ami_id,
                        user_data,
                        setup_disks,
                        instance_type,
                        obs_id,
                        snapshot_id,
                        created_by,
                        name,
                        spot_price,
                        instance_details,
                        frequency_groups,
                        counter
                    )
                )
                counter += 1

        # Add a poison pill to shut things down
    for x in range(processes):
        tasks.put(None)

    # Wait for the queue to terminate
    tasks.join()
Ejemplo n.º 29
0
 def __call__(self):
     # noinspection PyBroadException
     try:
         LOGGER.info('Copying {0} to s3:{1}'.format(self._filename, self._bucket_location))
         s3_helper = S3Helper()
         s3_helper.add_file_to_bucket(
             CHILES_BUCKET_NAME,
             self._bucket_location,
             self._filename)
     except Exception:
         LOGGER.exception('CopyTask died')
Ejemplo n.º 30
0
def get_mime_encoded_user_data(volume_id, setup_disks, in_user_data, now):
    """
    AWS allows for a multipart m
    """
    user_data = MIMEMultipart()
    user_data.attach(get_cloud_init())

    data_formatted = in_user_data.format(volume_id, now, PIP_PACKAGES)
    LOGGER.info(data_formatted)
    user_data.attach(MIMEText(setup_disks + data_formatted))
    return user_data.as_string()
Ejemplo n.º 31
0
def get_mime_encoded_user_data(volume_id, setup_disks, in_user_data, now):
    """
    AWS allows for a multipart m
    """
    user_data = MIMEMultipart()
    user_data.attach(get_cloud_init())

    data_formatted = in_user_data.format(volume_id, now, PIP_PACKAGES)
    LOGGER.info(data_formatted)
    user_data.attach(MIMEText(setup_disks + data_formatted))
    return user_data.as_string()
Ejemplo n.º 32
0
    def handle_stop_pong_event(self, event_data: t.Dict[str, t.Any]) -> None:
        global g_processing_xxx_id
        xxx_id = event_data['xxx_id']
        if xxx_id != g_processing_xxx_id:
            return
        LOGGER.info("stop self")

        def kill_self() -> None:
            time.sleep(0.1)
            os.kill(os.getpid(), signal.SIGQUIT)

        threading.Thread(target=kill_self, daemon=True).start()
Ejemplo n.º 33
0
    def add_file_to_bucket_multipart(self,
                                     bucket_name,
                                     key_name,
                                     source_path,
                                     parallel_processes=2,
                                     reduced_redundancy=True):
        """
        Parallel multipart upload.
        """
        LOGGER.info(
            'bucket_name: {0}, key_name: {1}, filename: {2}, parallel_processes: {3}, reduced_redundancy: {4}'
            .format(bucket_name, key_name, source_path, parallel_processes,
                    reduced_redundancy))

        source_size = os.stat(source_path).st_size
        bytes_per_chunk = 10 * 1024 * 1024
        chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk)))
        if chunk_amount < 10000:
            bucket = self.get_bucket(bucket_name)

            headers = {
                'Content-Type':
                mimetypes.guess_type(key_name)[0] or 'application/octet-stream'
            }
            mp = bucket.initiate_multipart_upload(
                key_name,
                headers=headers,
                reduced_redundancy=reduced_redundancy)

            LOGGER.info('bytes_per_chunk: {0}, chunk_amount: {1}'.format(
                bytes_per_chunk, chunk_amount))

            # You can only upload 10,000 chunks
            pool = Pool(processes=parallel_processes)
            for i in range(chunk_amount):
                offset = i * bytes_per_chunk
                remaining_bytes = source_size - offset
                bytes_to_copy = min([bytes_per_chunk, remaining_bytes])
                part_num = i + 1
                pool.apply_async(upload_part, [
                    self._aws_access_key_id, self._aws_secret_access_key,
                    bucket_name, mp.id, part_num, source_path, offset,
                    bytes_to_copy
                ])
            pool.close()
            pool.join()

            if len(mp.get_all_parts()) == chunk_amount:
                mp.complete_upload()
            else:
                mp.cancel_upload()
        else:
            raise S3UploadException('Too many chunks')
Ejemplo n.º 34
0
def detect_wrong_format(input_file, pattern):
    """Delete entries in ldap older than limit_days_ago before today
        :param input_file: Ldap dumb file to parse
        :param pattern: Pattern to be detected, it must be defined in config.yml!
    """
    cfg = load_config()
    try:
        cfg[pattern]
    except KeyError:
        LOGGER.error("Pattern not found in the config.yml file!")
        sys.exit(1)
    else:
        detect_ldap_problems.detect_wrong_format(cfg[pattern], input_file)
Ejemplo n.º 35
0
def main():
    parser = argparse.ArgumentParser('Start a number of CVEL servers')
    parser.add_argument('-a', '--ami_id', help='the AMI id to use')
    parser.add_argument('-i',
                        '--instance_type',
                        required=True,
                        help='the instance type to use')
    parser.add_argument('-c', '--created_by', help='the username to use')
    parser.add_argument('-n',
                        '--name',
                        required=True,
                        help='the instance name to use')
    parser.add_argument('-s',
                        '--spot_price',
                        type=float,
                        help='the spot price to use')
    parser.add_argument('-b', '--bash_script', help='the bash script to use')
    parser.add_argument('-p',
                        '--processes',
                        type=int,
                        default=1,
                        help='the number of processes to run')
    parser.add_argument('-fc',
                        '--frequency_channels',
                        type=int,
                        default=28,
                        help='how many frequency channels per AWS instance')
    parser.add_argument(
        '--force',
        action='store_true',
        default=False,
        help='proceed with a frequency band even if we already have it')

    parser.add_argument('obs_ids',
                        nargs='+',
                        help='the ids of the observation')

    args = vars(parser.parse_args())

    corrected_args = check_args(args)
    if corrected_args is None:
        LOGGER.error('The arguments are incorrect: {0}'.format(args))
    else:
        start_servers(args['processes'], corrected_args['ami_id'],
                      corrected_args['user_data'],
                      corrected_args['setup_disks'], args['instance_type'],
                      corrected_args['obs_ids'], corrected_args['created_by'],
                      args['name'], corrected_args['instance_details'],
                      corrected_args['spot_price'], args['frequency_channels'],
                      args['force'])
Ejemplo n.º 36
0
 def start() -> None:
     LOGGER.info(f"start_work with {xxx_id}")
     list(range(20000))
     try:
         while True:
             list(range(3000))
             self.dispatch("processed", {
                 'xxx_id': xxx_id,
                 'info': {},
             })
             r.expire(processing_key, 3)
             time.sleep(1)
     finally:
         r.delete(processing_key)
Ejemplo n.º 37
0
def open_mdb_connection():
    for host in MARIA_DB_CONFIGURATION['hosts']:
        try:
            connection = pymysql.connect(
                user=MARIA_DB_CONFIGURATION['user'],
                password=MARIA_DB_CONFIGURATION['password'],
                database=MARIA_DB_CONFIGURATION['database'],
                host=host,
                cursorclass=pymysql.cursors.DictCursor,
                autocommit=True,
            )
        except:
            if host == MARIA_DB_CONFIGURATION['hosts'][-1]:
                log_str = "Failed to connect to any MariaDB host"
                LOGGER.exception(log_str, exc_info=True)
                raise DBError(log_str)
            else:
                LOGGER.warning(
                    'Failed to connect to MariaDB on host {}. Trying next host.'
                    .format(host))
        else:
            if connection.open:
                LOGGER.debug(
                    'mariadb connection to host {} successful.'.format(host))
                return connection
            else:
                err_str = 'Connection to MariaDB failed.'
                LOGGER.error(err_str)
                raise DBError(err_str)
Ejemplo n.º 38
0
    def get_mime_encoded_user_data(self, volume_id):
        """
        AWS allows for a multipart m
        """
        user_data = MIMEMultipart()
        user_data.attach(get_cloud_init())

        # Build the strings we need
        cvel_pipeline = self.build_cvel_pipeline()

        data_formatted = self._user_data.format(cvel_pipeline, self._obs_id, volume_id, self._now, self._counter, PIP_PACKAGES)
        LOGGER.info(data_formatted)
        user_data.attach(MIMEText(self._setup_disks + data_formatted))
        return user_data.as_string()
Ejemplo n.º 39
0
    def add_tar_to_bucket_multipart(self, bucket_name, key_name, source_path, gzip=False, parallel_processes=2, reduced_redundancy=True, bufsize=10*1024*1024):
        """
        Parallel multipart upload.
        """
        LOGGER.info(
            'bucket_name: {0}, key_name: {1}, source_path: {2}, parallel_processes: {3}, reduced_redundancy: {4}, bufsize: {5}'.format(
                bucket_name,
                key_name,
                source_path,
                parallel_processes,
                reduced_redundancy,
                bufsize
            )
        )
        bucket = self.get_bucket(bucket_name)

        headers = {'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream'}
        mp = bucket.initiate_multipart_upload(key_name, headers=headers, reduced_redundancy=reduced_redundancy)
        s3_feeder = S3Feeder(mp.id, bufsize, bucket_name, parallel_processes, self._aws_access_key_id, self._aws_secret_access_key)

        if gzip:
            mode = "w|gz"
        else:
            mode = "w|"
        tar = tarfile.open(mode=mode, fileobj=s3_feeder, bufsize=int(bufsize / 10))

        complete = True
        # noinspection PyBroadException
        try:
            for entry in os.listdir(source_path):
                full_filename = join(source_path, entry)
                LOGGER.info(
                    'tar: [full_filename: {0}, entry: {1}]'.format(
                        full_filename,
                        entry
                    )
                )
                tar.add(full_filename, arcname=entry)

            tar.close()
            s3_feeder.close()
        except Exception:
            complete = False
            s3_feeder.close()

        # Finish the upload
        if complete:
            mp.complete_upload()
        else:
            mp.cancel_upload()
Ejemplo n.º 40
0
def copy_files(date, vis_file):
    s3_helper = S3Helper()
    # Look in the output directory
    for root, dir_names, filenames in os.walk(CHILES_CVEL_OUTPUT):
        LOGGER.info('root: {0}, dir_names: {1}, filenames: {2}'.format(
            root, dir_names, filenames))
        for match in fnmatch.filter(dir_names, vis_file):
            result_dir = join(root, match)
            LOGGER.info('Working on: {0}'.format(result_dir))

            if can_be_multipart_tar(result_dir):
                LOGGER.info('Using add_tar_to_bucket_multipart')
                s3_helper.add_tar_to_bucket_multipart(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(vis_file, date), result_dir)
            else:
                LOGGER.info('Using make_tarfile, then adding file to bucket')
                output_tar_filename = join(root, match + '.tar')
                make_tarfile(output_tar_filename, result_dir)

                s3_helper.add_file_to_bucket(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(vis_file,
                                                   date), output_tar_filename)

                # Clean up
                os.remove(output_tar_filename)

            shutil.rmtree(result_dir, ignore_errors=True)
Ejemplo n.º 41
0
def copy_files(date, vis_file):
    s3_helper = S3Helper()
    # Look in the output directory
    for root, dir_names, filenames in os.walk(CHILES_CVEL_OUTPUT):
        LOGGER.info('root: {0}, dir_names: {1}, filenames: {2}'.format(root, dir_names, filenames))
        for match in fnmatch.filter(dir_names, vis_file):
            result_dir = join(root, match)
            LOGGER.info('Working on: {0}'.format(result_dir))

            if can_be_multipart_tar(result_dir):
                LOGGER.info('Using add_tar_to_bucket_multipart')
                s3_helper.add_tar_to_bucket_multipart(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(vis_file, date),
                    result_dir)
            else:
                LOGGER.info('Using make_tarfile, then adding file to bucket')
                output_tar_filename = join(root, match + '.tar')
                make_tarfile(output_tar_filename, result_dir)

                s3_helper.add_file_to_bucket(
                    CHILES_BUCKET_NAME,
                    'CVEL/{0}/{1}/data.tar'.format(vis_file, date),
                    output_tar_filename)

                # Clean up
                os.remove(output_tar_filename)

            shutil.rmtree(result_dir, ignore_errors=True)
Ejemplo n.º 42
0
def check_args(args):
    """
    Check the arguments and prompt for new ones
    """
    map_args = {}

    if args['obs_ids'] is None:
        return None
    elif len(args['obs_ids']) == 1 and args['obs_ids'][0] == '*':
        map_args['obs_ids'] = OBS_IDS.keys()
    else:
        map_args['obs_ids'] = args['obs_ids']

    if args['instance_type'] is None:
        return None

    if args['name'] is None:
        return None

    instance_details = AWS_INSTANCES.get(args['instance_type'])
    if instance_details is None:
        LOGGER.error('The instance type {0} is not supported.'.format(
            args['instance_type']))
        return None
    else:
        LOGGER.info(
            'instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}'
            .format(args['instance_type'], instance_details.vCPU,
                    instance_details.memory, instance_details.number_disks,
                    instance_details.size, instance_details.iops_support))

    map_args.update({
        'ami_id':
        args['ami_id'] if args['ami_id'] is not None else AWS_AMI_ID,
        'created_by':
        args['created_by']
        if args['created_by'] is not None else getpass.getuser(),
        'spot_price':
        args['spot_price'] if args['spot_price'] is not None else None,
        'user_data':
        get_script(args['bash_script']
                   if args['bash_script'] is not None else BASH_SCRIPT_CVEL),
        'setup_disks':
        get_script(BASH_SCRIPT_SETUP_DISKS),
        'instance_details':
        instance_details,
    })

    return map_args
Ejemplo n.º 43
0
def get_cvel():
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    cvel_data = {}
    for key in bucket.list(prefix='CVEL/'):
        LOGGER.info('Checking {0}'.format(key.key))
        if key.key.endswith('data.tar.gz') or key.key.endswith('data.tar'):
            elements = key.key.split('/')
            data_list = cvel_data.get(str(elements[1]))
            if data_list is None:
                data_list = []
                cvel_data[str(elements[1])] = data_list
            data_list.append(str(elements[2]))

    return cvel_data
Ejemplo n.º 44
0
def get_cvel():
    s3_helper = S3Helper()
    bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME)
    cvel_data = {}
    for key in bucket.list(prefix='CVEL/'):
        LOGGER.info('Checking {0}'.format(key.key))
        if key.key.endswith('data.tar.gz') or key.key.endswith('data.tar'):
            elements = key.key.split('/')
            data_list = cvel_data.get(str(elements[1]))
            if data_list is None:
                data_list = []
                cvel_data[str(elements[1])] = data_list
            data_list.append(str(elements[2]))

    return cvel_data
Ejemplo n.º 45
0
    def get_mime_encoded_user_data(self, volume_id):
        """
        AWS allows for a multipart m
        """
        user_data = MIMEMultipart()
        user_data.attach(get_cloud_init())

        # Build the strings we need
        cvel_pipeline = self.build_cvel_pipeline()

        data_formatted = self._user_data.format(cvel_pipeline, self._obs_id,
                                                volume_id, self._now,
                                                self._counter, PIP_PACKAGES)
        LOGGER.info(data_formatted)
        user_data.attach(MIMEText(self._setup_disks + data_formatted))
        return user_data.as_string()
Ejemplo n.º 46
0
def get_mime_encoded_user_data(instance_details, setup_disks, user_data):
    """
    AWS allows for a multipart m
    """
    # Split the frequencies
    min_freq = 940
    max_freq = 1424
    LOGGER.info("min_freq: {0}, max_freq: {1}".format(min_freq, max_freq))

    # Build the mime message
    mime_data = MIMEMultipart()
    mime_data.attach(get_cloud_init())

    swap_size = get_swap_size(instance_details)
    data_formatted = user_data.format("TODO", min_freq, max_freq, swap_size, PIP_PACKAGES)
    mime_data.attach(MIMEText(setup_disks + data_formatted))
    return mime_data.as_string()
Ejemplo n.º 47
0
def like_em_all(users):
    iterator = PostIterator()

    for user in users:
        access, refresh = obtain_token_pair(user['username'],
                                            config['default_password'])
        iterator.set_auth(access, refresh)
        iterator.set_user(user)
        iterator.set_posts(get_all_posts())

        while True:
            try:
                post_to_like = next(iterator)
                perform_like(post=post_to_like, user=user, token=access)
            except StopIteration:
                break
            except BotClientError as e:
                LOGGER.warning("Failed to perform a like: {}".format(str(e)))
    def __call__(self):
        # noinspection PyBroadException
        try:
            s3_helper = S3Helper(self._aws_access_key_id, self._aws_secret_access_key)
            LOGGER.info('Copying to: {0}/{1}/measurement_set.tar'.format(self._bucket, self._bucket_location))

            # We can have 10,000 parts
            # The biggest file from Semester 1 is 803GB
            # So 100 MB
            s3_helper.add_tar_to_bucket_multipart(
                self._bucket,
                '{0}/measurement_set.tar'.format(self._bucket_location),
                self._filename,
                parallel_processes=2,
                bufsize=100*1024*1024
            )
        except Exception:
            LOGGER.exception('CopyTask died')
Ejemplo n.º 49
0
def check_args(args):
    """
    Check the arguments and prompt for new ones
    """
    map_args = {}

    if args['obs_ids'] is None:
        return None
    elif len(args['obs_ids']) == 1 and args['obs_ids'][0] == '*':
        map_args['obs_ids'] = OBS_IDS.keys()
    else:
        map_args['obs_ids'] = args['obs_ids']

    if args['instance_type'] is None:
        return None

    if args['name'] is None:
        return None

    instance_details = AWS_INSTANCES.get(args['instance_type'])
    if instance_details is None:
        LOGGER.error('The instance type {0} is not supported.'.format(args['instance_type']))
        return None
    else:
        LOGGER.info(
            'instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}'.format(
                args['instance_type'],
                instance_details.vCPU,
                instance_details.memory,
                instance_details.number_disks,
                instance_details.size,
                instance_details.iops_support))

    map_args.update({
        'ami_id': args['ami_id'] if args['ami_id'] is not None else AWS_AMI_ID,
        'created_by': args['created_by'] if args['created_by'] is not None else getpass.getuser(),
        'spot_price': args['spot_price'] if args['spot_price'] is not None else None,
        'user_data': get_script(args['bash_script'] if args['bash_script'] is not None else BASH_SCRIPT_CVEL),
        'setup_disks': get_script(BASH_SCRIPT_SETUP_DISKS),
        'instance_details': instance_details,
    })

    return map_args
Ejemplo n.º 50
0
    def get_mime_encoded_user_data(self):
        """
        AWS allows for a multipart m
        """
        # Split the frequencies
        index_underscore = find(self._frequency_id, '_')
        index_tilde = find(self._frequency_id, '~')
        min_freq = self._frequency_id[index_underscore + 1:index_tilde]
        max_freq = self._frequency_id[index_tilde + 1:]
        LOGGER.info('min_freq: {0}, max_freq: {1}'.format(min_freq, max_freq))

        # Build the mime message
        user_data = MIMEMultipart()
        user_data.attach(get_cloud_init())

        swap_size = self.get_swap_size()
        data_formatted = self._user_data.format(self._frequency_id, min_freq, max_freq, swap_size, PIP_PACKAGES)
        user_data.attach(MIMEText(self._setup_disks + data_formatted))
        return user_data.as_string()
Ejemplo n.º 51
0
def check_args(args):
    """
    Check the arguments and prompt for new ones
    """
    map_args = {}

    if args["snapshots"] is None:
        return None

    if args["instance_type"] is None:
        return None

    if args["name"] is None:
        return None

    instance_details = AWS_INSTANCES.get(args["instance_type"])
    if instance_details is None:
        LOGGER.error("The instance type {0} is not supported.".format(args["instance_type"]))
        return None
    else:
        LOGGER.info(
            "instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}".format(
                args["instance_type"],
                instance_details.vCPU,
                instance_details.memory,
                instance_details.number_disks,
                instance_details.size,
                instance_details.iops_support,
            )
        )

    map_args.update(
        {
            "ami_id": args["ami_id"] if args["ami_id"] is not None else AWS_AMI_ID,
            "created_by": args["created_by"] if args["created_by"] is not None else getpass.getuser(),
            "spot_price": args["spot_price"] if args["spot_price"] is not None else None,
            "user_data": get_script(args["bash_script"] if args["bash_script"] is not None else BASH_SCRIPT_CLEAN_ALL),
            "setup_disks": get_script(BASH_SCRIPT_SETUP_DISKS),
            "instance_details": instance_details,
        }
    )
    return map_args
Ejemplo n.º 52
0
def copy_files(cube):
    s3_helper = S3Helper()
    # Look in the output directory
    directory_to_save = join(CHILES_IMGCONCAT_OUTPUT, cube) + '.cube'
    if isdir(directory_to_save):
        LOGGER.info('dir_name: {0}'.format(directory_to_save))
        output_tar_filename = directory_to_save + '.tar'

        if can_be_multipart_tar(directory_to_save):
            LOGGER.info('Using add_tar_to_bucket_multipart')
            s3_helper.add_tar_to_bucket_multipart(
                CHILES_BUCKET_NAME,
                'IMGCONCAT/{0}'.format(basename(output_tar_filename)),
                directory_to_save,
                bufsize=20 * 1024 * 1024)
        else:
            LOGGER.info('Using make_tarfile, then adding file to bucket')
            make_tarfile(output_tar_filename, directory_to_save)

            s3_helper.add_file_to_bucket(
                CHILES_BUCKET_NAME,
                'IMGCONCAT/{0}'.format(basename(output_tar_filename)),
                output_tar_filename)

            # Clean up
            os.remove(output_tar_filename)
Ejemplo n.º 53
0
def copy_files(s3_tag, processes):
    # Create the queue
    queue = multiprocessing.JoinableQueue()
    # Start the consumers
    for x in range(processes):
        consumer = Consumer(queue)
        consumer.start()

    # Look in the output directory
    today = datetime.date.today()
    for root, dir_names, filenames in os.walk(CHILES_LOGS):
        for match in fnmatch.filter(filenames, '*.log'):
            LOGGER.info('Looking at: {0}'.format(join(root, match)))
            queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match)))

    for root, dir_names, filenames in os.walk(BENCHMARKING_LOGS):
        for match in fnmatch.filter(filenames, '*.csv'):
            LOGGER.info('Looking at: {0}'.format(join(root, match)))
            queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match)))
        for match in fnmatch.filter(filenames, '*.log'):
            LOGGER.info('Looking at: {0}'.format(join(root, match)))
            queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match)))

    queue.put(CopyTask('/var/log/chiles-output.log', '{0}/{1}{2:02d}{3:02d}/chiles-output.log'.format(s3_tag, today.year, today.month, today.day)))

    # Add a poison pill to shut things down
    for x in range(processes):
        queue.put(None)

    # Wait for the queue to terminate
    queue.join()
Ejemplo n.º 54
0
    def run_instance(self, ami_id, user_data, instance_type, volume_id, created_by, name, zone, ephemeral=False):
        """
        Run up an instance
        """
        bdm = self.build_block_device_map(ephemeral)

        LOGGER.info('Running instance: ami: {0}'.format(ami_id))
        reservations = self.ec2_connection.run_instances(ami_id,
                                                         instance_type=instance_type,
                                                         instance_initiated_shutdown_behavior='terminate',
                                                         subnet_id=AWS_SUBNETS[zone],
                                                         key_name=AWS_KEY_NAME,
                                                         security_group_ids=AWS_SECURITY_GROUPS,
                                                         user_data=user_data,
                                                         block_device_map=bdm)
        instance = reservations.instances[0]
        time.sleep(5)

        while not instance.update() == 'running':
            LOGGER.info('Not running yet')
            time.sleep(5)

        if volume_id:
            # Now we have an instance id we can attach the disk
            self.ec2_connection.attach_volume(volume_id, instance.id, '/dev/xvdf')

        LOGGER.info('Assigning the tags')
        self.ec2_connection.create_tags([instance.id],
                                        {'AMI': '{0}'.format(ami_id),
                                         'Name': '{0}'.format(name),
                                         'Volume_id': '{0}'.format(volume_id),
                                         'Created By': '{0}'.format(created_by)})

        return instance
Ejemplo n.º 55
0
    def __call__(self):
        """
        Actually run the job
        """
        if self._tar_file.endswith('.tar.gz'):
            image_name = basename(self._tar_file).replace('.tar.gz', '')
        else:
            image_name = basename(self._tar_file).replace('.tar', '')
        directory = join(self._directory, image_name)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format(self._key.key, self._tar_file, directory))
            if not os.path.exists(directory):
                os.makedirs(directory)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('.tar.gz') else "r:")) as tar:
                tar.extractall(path=directory)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(directory, ignore_errors=True)
    def __call__(self):
        """
        Actually run the job
        """
        corrected_path = join(self._directory, self._frequency_id)
        # noinspection PyBroadException
        try:
            LOGGER.info('key: {0}, tar_file: {1}, directory: {2}, frequency_id: {3}'.format(
                self._key.key,
                self._tar_file,
                self._directory,
                self._frequency_id))
            if not os.path.exists(corrected_path):
                os.makedirs(corrected_path)
            self._key.get_contents_to_filename(self._tar_file)
            with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('tar.gz') else "r:")) as tar:
                tar.extractall(path=corrected_path)

            os.remove(self._tar_file)
        except Exception:
            LOGGER.exception('Task died')
            shutil.rmtree(corrected_path, ignore_errors=True)
Ejemplo n.º 57
0
    def add_file_to_bucket(self, bucket_name, key_name, filename, reduced_redundancy=True):
        """
        Add file to a bucket

        :param bucket_name:
        :param key_name:
        :param filename:
        """
        LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}'.format(bucket_name, key_name, filename, reduced_redundancy))
        retry_count = 0
        done = False
        while retry_count < 3 and not done:
            try:
                bucket = self.get_bucket(bucket_name)
                key = Key(bucket)
                key.key = key_name
                key.set_contents_from_filename(filename, reduced_redundancy=reduced_redundancy)
                done = True
            except socket.error:
                LOGGER.exception('Error')
                retry_count += 1
                time.sleep(10)
Ejemplo n.º 58
0
def start_servers(
        ami_id,
        user_data,
        setup_disks,
        instance_type,
        obs_id,
        created_by,
        name,
        instance_details,
        spot_price,
        ebs,
        bottom_frequency,
        frequency_range):
    LOGGER.info('obs_id: {0}, bottom_frequency: {1}, frequency_range: {2}'.format(obs_id, bottom_frequency, frequency_range))
    ec2_helper = EC2Helper()
    zone = ec2_helper.get_cheapest_spot_price(instance_type, spot_price)

    if zone is not None:
        # Swap size
        if ebs is None:
            swap_size = 1
        else:
            ephemeral_size = instance_details.number_disks * instance_details.size
            swap_size = min(int(ephemeral_size * 0.75), 16)

        user_data_mime = get_mime_encoded_user_data(
            user_data,
            obs_id,
            setup_disks,
            bottom_frequency,
            frequency_range,
            swap_size
        )
        LOGGER.info('{0}'.format(user_data_mime))

        ec2_helper.run_spot_instance(
            ami_id,
            spot_price,
            user_data_mime,
            instance_type,
            None,
            created_by,
            name + '- {0}'.format(obs_id),
            instance_details=instance_details,
            zone=zone,
            ebs_size=ebs,
            number_ebs_volumes=4,
            ephemeral=True)
    else:
        LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(instance_type, spot_price))