def calculate_work_to_do(self):
        session = boto3.Session(profile_name='aws-chiles02')
        s3 = session.resource('s3', use_ssl=False)

        list_measurement_sets = []
        self._bucket = s3.Bucket(self._bucket_name)
        for key in self._bucket.objects.filter(Prefix='observation_data'):
            if key.key.endswith('_calibrated_deepfield.ms.tar'):
                LOG.info('Found {0}'.format(key.key))

                elements = key.key.split('/')
                list_measurement_sets.append(MeasurementSetData(elements[1], key.size))

        # Get work we've already done
        self._list_frequencies = get_list_frequency_groups(self._width)
        self._work_already_done = self._get_work_already_done()

        for day_to_process in list_measurement_sets:
            day_work_already_done = self._work_already_done.get(day_to_process.short_name)
            list_frequency_groups = self._get_details_for_measurement_set(day_work_already_done)

            if self._ignore_day(list_frequency_groups):
                LOG.info('{0} has already been process.'.format(day_to_process.full_tar_name))
            else:
                self._work_to_do[day_to_process] = list_frequency_groups
Ejemplo n.º 2
0
    def calculate_work_to_do(self):
        session = boto3.Session(profile_name='aws-chiles02')
        s3 = session.resource('s3', use_ssl=False)

        cleaned_objects = []
        self._bucket = s3.Bucket(self._bucket_name)
        for key in self._bucket.objects.filter(Prefix='{0}'.format(self._s3_clean_name)):
            cleaned_objects.append(key.key)
            LOG.info('{0} found'.format(key.key))

        # Get work we've already done
        self._list_frequencies = get_list_frequency_groups(self._width)
        for frequency_pair in self._list_frequencies:
            # Use the min and max frequency
            if self._min_frequency is not None and frequency_pair.top_frequency < self._min_frequency:
                continue
            if self._max_frequency is not None and frequency_pair.bottom_frequency > self._max_frequency:
                continue

            expected_tar_file = '{0}/cleaned_{1}_{2}.tar'.format(
                self._s3_clean_name,
                frequency_pair.bottom_frequency,
                frequency_pair.top_frequency,
            )
            if expected_tar_file not in cleaned_objects:
                self._work_to_do.append(frequency_pair)
Ejemplo n.º 3
0
def analyse_data(measurement_sets, split_entries, width):
    # Build the expected list
    list_frequencies = get_list_frequency_groups(width)

    expected_combinations = {}
    for key in measurement_sets:
        list_data = []
        for frequency in list_frequencies:
            list_data.append(frequency.name)
        expected_combinations[key.short_name] = list_data

    for element in split_entries:
        frequencies = expected_combinations[element[0]]
        pair = element[1].split("_")
        frequency_pair = FrequencyPair(pair[0], pair[1]).name
        if frequency_pair in frequencies:
            frequencies.remove(frequency_pair)

    number_entries = len(list_frequencies)
    ordered_dictionary = collections.OrderedDict(sorted(expected_combinations.items()))
    output1 = "\n"
    output2 = "\n"
    for key, value in ordered_dictionary.iteritems():
        if len(value) == number_entries:
            output1 += '{0} = "All"\n'.format(key)
            output2 += "{0} ".format(key)
        else:
            output1 += '{0} = "{1}"\n'.format(key, value)
            if len(value) >= 1:
                output2 += "{0} ".format(key)

    LOG.info(output1)
    LOG.info(output2)

    return ordered_dictionary
Ejemplo n.º 4
0
def analyse_data(clean_entries, width):
    # Build the expected list
    expected_combinations = [
        'cleaned_{0}_{1}.tar'.format(frequency.bottom_frequency,
                                     frequency.top_frequency)
        for frequency in get_list_frequency_groups(width)
    ]

    output = '\n'
    list_output = '\n'
    for key in sorted(expected_combinations):
        output += '{0} = {1}\n'.format(
            key, 'Done' if clean_entries.__contains__(key) else 'Not done')
        if not clean_entries.__contains__(key):
            list_output += '{0} '.format(key)
    LOG.info(output)
    LOG.info(list_output)
Ejemplo n.º 5
0
    def calculate_work_to_do(self):
        session = boto3.Session(profile_name='aws-chiles02')
        s3 = session.resource('s3', use_ssl=False)

        cleaned_objects = []
        self._bucket = s3.Bucket(self._bucket_name)
        for key in self._bucket.objects.filter(
                Prefix='{0}'.format(self._s3_clean_name)):
            cleaned_objects.append(key.key)
            LOG.info('{0} found'.format(key.key))

        # Get work we've already done
        self._list_frequencies = get_list_frequency_groups(self._width)
        for frequency_pair in self._list_frequencies:
            expected_tar_file = '{0}/cleaned_{1}_{2}.tar'.format(
                self._s3_clean_name,
                frequency_pair.bottom_frequency,
                frequency_pair.top_frequency,
            )
            if expected_tar_file not in cleaned_objects:
                self._work_to_do.append(frequency_pair)
Ejemplo n.º 6
0
def analyse_data(measurement_sets, split_entries, width):
    # Build the expected list
    list_frequencies = get_list_frequency_groups(width)

    expected_combinations = {}
    for key in measurement_sets:
        list_data = []
        for frequency in list_frequencies:
            list_data.append(frequency.name)
        expected_combinations[key.short_name] = list_data

    for element in split_entries:
        frequencies = expected_combinations[element[0]]
        pair = element[1].split('_')
        frequency_pair = FrequencyPair(pair[0], pair[1]).name
        if frequency_pair in frequencies:
            frequencies.remove(frequency_pair)

    number_entries = len(list_frequencies)
    ordered_dictionary = collections.OrderedDict(
        sorted(expected_combinations.items()))
    output1 = '\n'
    output2 = '\n'
    for key, value in ordered_dictionary.iteritems():
        if len(value) == number_entries:
            output1 += '{0} = "All"\n'.format(key)
            output2 += '{0} '.format(key)
        else:
            output1 += '{0} = "{1}"\n'.format(key, value)
            if len(value) >= 1:
                output2 += '{0} '.format(key)

    LOG.info(output1)
    LOG.info(output2)

    return ordered_dictionary
Ejemplo n.º 7
0
def analyse_data(clean_entries, width):
    # Build the expected list
    expected_combinations = [
        'cleaned_{0}_{1}.tar'.format(frequency.bottom_frequency, frequency.top_frequency) for frequency in get_list_frequency_groups(width)]

    output = '\n'
    list_output = '\n'
    for key in sorted(expected_combinations):
        output += '{0} = {1}\n'.format(key, 'Done' if clean_entries.__contains__(key) else 'Not done')
        if not clean_entries.__contains__(key):
            list_output += '{0} '.format(key)
    LOG.info(output)
    LOG.info(list_output)