def calculate_work_to_do(self): session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) list_measurement_sets = [] self._bucket = s3.Bucket(self._bucket_name) for key in self._bucket.objects.filter(Prefix='observation_data'): if key.key.endswith('_calibrated_deepfield.ms.tar'): LOG.info('Found {0}'.format(key.key)) elements = key.key.split('/') list_measurement_sets.append(MeasurementSetData(elements[1], key.size)) # Get work we've already done self._list_frequencies = get_list_frequency_groups(self._width) self._work_already_done = self._get_work_already_done() for day_to_process in list_measurement_sets: day_work_already_done = self._work_already_done.get(day_to_process.short_name) list_frequency_groups = self._get_details_for_measurement_set(day_work_already_done) if self._ignore_day(list_frequency_groups): LOG.info('{0} has already been process.'.format(day_to_process.full_tar_name)) else: self._work_to_do[day_to_process] = list_frequency_groups
def calculate_work_to_do(self): session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) cleaned_objects = [] self._bucket = s3.Bucket(self._bucket_name) for key in self._bucket.objects.filter(Prefix='{0}'.format(self._s3_clean_name)): cleaned_objects.append(key.key) LOG.info('{0} found'.format(key.key)) # Get work we've already done self._list_frequencies = get_list_frequency_groups(self._width) for frequency_pair in self._list_frequencies: # Use the min and max frequency if self._min_frequency is not None and frequency_pair.top_frequency < self._min_frequency: continue if self._max_frequency is not None and frequency_pair.bottom_frequency > self._max_frequency: continue expected_tar_file = '{0}/cleaned_{1}_{2}.tar'.format( self._s3_clean_name, frequency_pair.bottom_frequency, frequency_pair.top_frequency, ) if expected_tar_file not in cleaned_objects: self._work_to_do.append(frequency_pair)
def analyse_data(measurement_sets, split_entries, width): # Build the expected list list_frequencies = get_list_frequency_groups(width) expected_combinations = {} for key in measurement_sets: list_data = [] for frequency in list_frequencies: list_data.append(frequency.name) expected_combinations[key.short_name] = list_data for element in split_entries: frequencies = expected_combinations[element[0]] pair = element[1].split("_") frequency_pair = FrequencyPair(pair[0], pair[1]).name if frequency_pair in frequencies: frequencies.remove(frequency_pair) number_entries = len(list_frequencies) ordered_dictionary = collections.OrderedDict(sorted(expected_combinations.items())) output1 = "\n" output2 = "\n" for key, value in ordered_dictionary.iteritems(): if len(value) == number_entries: output1 += '{0} = "All"\n'.format(key) output2 += "{0} ".format(key) else: output1 += '{0} = "{1}"\n'.format(key, value) if len(value) >= 1: output2 += "{0} ".format(key) LOG.info(output1) LOG.info(output2) return ordered_dictionary
def analyse_data(clean_entries, width): # Build the expected list expected_combinations = [ 'cleaned_{0}_{1}.tar'.format(frequency.bottom_frequency, frequency.top_frequency) for frequency in get_list_frequency_groups(width) ] output = '\n' list_output = '\n' for key in sorted(expected_combinations): output += '{0} = {1}\n'.format( key, 'Done' if clean_entries.__contains__(key) else 'Not done') if not clean_entries.__contains__(key): list_output += '{0} '.format(key) LOG.info(output) LOG.info(list_output)
def calculate_work_to_do(self): session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) cleaned_objects = [] self._bucket = s3.Bucket(self._bucket_name) for key in self._bucket.objects.filter( Prefix='{0}'.format(self._s3_clean_name)): cleaned_objects.append(key.key) LOG.info('{0} found'.format(key.key)) # Get work we've already done self._list_frequencies = get_list_frequency_groups(self._width) for frequency_pair in self._list_frequencies: expected_tar_file = '{0}/cleaned_{1}_{2}.tar'.format( self._s3_clean_name, frequency_pair.bottom_frequency, frequency_pair.top_frequency, ) if expected_tar_file not in cleaned_objects: self._work_to_do.append(frequency_pair)
def analyse_data(measurement_sets, split_entries, width): # Build the expected list list_frequencies = get_list_frequency_groups(width) expected_combinations = {} for key in measurement_sets: list_data = [] for frequency in list_frequencies: list_data.append(frequency.name) expected_combinations[key.short_name] = list_data for element in split_entries: frequencies = expected_combinations[element[0]] pair = element[1].split('_') frequency_pair = FrequencyPair(pair[0], pair[1]).name if frequency_pair in frequencies: frequencies.remove(frequency_pair) number_entries = len(list_frequencies) ordered_dictionary = collections.OrderedDict( sorted(expected_combinations.items())) output1 = '\n' output2 = '\n' for key, value in ordered_dictionary.iteritems(): if len(value) == number_entries: output1 += '{0} = "All"\n'.format(key) output2 += '{0} '.format(key) else: output1 += '{0} = "{1}"\n'.format(key, value) if len(value) >= 1: output2 += '{0} '.format(key) LOG.info(output1) LOG.info(output2) return ordered_dictionary
def analyse_data(clean_entries, width): # Build the expected list expected_combinations = [ 'cleaned_{0}_{1}.tar'.format(frequency.bottom_frequency, frequency.top_frequency) for frequency in get_list_frequency_groups(width)] output = '\n' list_output = '\n' for key in sorted(expected_combinations): output += '{0} = {1}\n'.format(key, 'Done' if clean_entries.__contains__(key) else 'Not done') if not clean_entries.__contains__(key): list_output += '{0} '.format(key) LOG.info(output) LOG.info(list_output)