def from_range_strings(ranges, boundtype=int): """ Parse a list of ranges expressed as strings in the form "value" or "first:last" into an equivalent segments.segments.segmentlist. In the latter case, an empty string for "first" and(or) "last" indicates a (semi)infinite range. A typical use for this function is in parsing command line options or entries in configuration files. NOTE: the output is a segmentlist as described by the strings; if the segments in the input file are not coalesced or out of order, then thusly shall be the output of this function. It is recommended that this function's output be coalesced before use. Example: >>> text = "0:10,35,100:" >>> from_range_strings(text.split(",")) [segment(0, 10), segment(35, 35), segment(100, infinity)] """ # preallocate segmentlist segs = segments.segmentlist([None] * len(ranges)) # iterate over strings for i, range in enumerate(ranges): parts = range.split(":") if len(parts) == 1: parts = boundtype(parts[0]) segs[i] = segments.segment(parts, parts) continue if len(parts) != 2: raise ValueError(range) if parts[0] == "": parts[0] = segments.NegInfinity else: parts[0] = boundtype(parts[0]) if parts[1] == "": parts[1] = segments.PosInfinity else: parts[1] = boundtype(parts[1]) segs[i] = segments.segment(parts[0], parts[1]) # success return segs
def from_range_strings(ranges, boundtype = int): """ Parse a list of ranges expressed as strings in the form "value" or "first:last" into an equivalent glue.segments.segmentlist. In the latter case, an empty string for "first" and(or) "last" indicates a (semi)infinite range. A typical use for this function is in parsing command line options or entries in configuration files. NOTE: the output is a segmentlist as described by the strings; if the segments in the input file are not coalesced or out of order, then thusly shall be the output of this function. It is recommended that this function's output be coalesced before use. Example: >>> text = "0:10,35,100:" >>> from_range_strings(text.split(",")) [segment(0, 10), segment(35, 35), segment(100, infinity)] """ # preallocate segmentlist segs = segments.segmentlist([None] * len(ranges)) # iterate over strings for i, range in enumerate(ranges): parts = range.split(":") if len(parts) == 1: parts = boundtype(parts[0]) segs[i] = segments.segment(parts, parts) continue if len(parts) != 2: raise ValueError(range) if parts[0] == "": parts[0] = segments.NegInfinity else: parts[0] = boundtype(parts[0]) if parts[1] == "": parts[1] = segments.PosInfinity else: parts[1] = boundtype(parts[1]) segs[i] = segments.segment(parts[0], parts[1]) # success return segs
def ingest_csv(datasets=None, range=None): existing_files = filter(lambda x: os.path.isfile(x[1]), datasets) assert len(list(datasets)) == len(list(existing_files)) if range: range = map(int, range.split(',')) data = {} for (dsname, dsfile) in datasets: print('Reading %s from %s' % (dsname, dsfile)) with open(dsfile) as f: d = csv.DictReader(f) data[dsname] = [] for e in d: if range: re = reduce_filename(e['model']) in_range = (re >= range[0] and re <= range[1]) if in_range: data[dsname].append(e) else: data[dsname].append(e) return data
def ingest_csv(datasets=None, range=None): existing_files = filter(lambda x: os.path.isfile(x[1]), datasets) assert len(datasets) == len(existing_files) if range: range = map(int, range.split(',')) data = {} for (dsname, dsfile) in datasets: print('Reading %s from %s' % (dsname, dsfile)) with open(dsfile) as f: d = csv.DictReader(f) data[dsname] = [] for e in d: if range: re = reduce_filename(e['model']) in_range = (re >= range[0] and re <= range[1]) if in_range: data[dsname].append(e) else: data[dsname].append(e) return data
def _segment_listing_iterator(self, req, version, account, segments, recursion_depth=1): for seg_dict in segments: if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) # We handle the range stuff here so that we can be smart about # skipping unused submanifests. For example, if our first segment is a # submanifest referencing 50 MiB total, but start_byte falls in # the 51st MiB, then we can avoid fetching the first submanifest. # # If we were to make SegmentedIterable handle all the range # calculations, we would be unable to make this optimization. total_length = sum(self._segment_length(seg) for seg in segments) if self.first_byte is None: self.first_byte = 0 if self.last_byte is None: self.last_byte = total_length - 1 last_sub_path = None for seg_dict in segments: seg_length = self._segment_length(seg_dict) if self.first_byte >= seg_length: # don't need any bytes from this segment self.first_byte -= seg_length self.last_byte -= seg_length continue if self.last_byte < 0: # no bytes are needed from this or any future segment break range = seg_dict.get('range') if range is None: range_start, range_end = 0, seg_length - 1 else: # We already validated and supplied concrete values # for the range on upload range_start, range_end = map(int, range.split('-')) if config_true_value(seg_dict.get('sub_slo')): # do this check here so that we can avoid fetching this last # manifest before raising the exception if recursion_depth >= self.max_slo_recursion_depth: raise ListingIterError("Max recursion depth exceeded") sub_path = get_valid_utf8_str(seg_dict['name']) sub_cont, sub_obj = split_path(sub_path, 2, 2, True) if last_sub_path != sub_path: sub_segments = self._fetch_sub_slo_segments( req, version, account, sub_cont, sub_obj) last_sub_path = sub_path # Use the existing machinery to slice into the sub-SLO. # This requires that we save off our current state, and # restore at the other end. orig_start, orig_end = self.first_byte, self.last_byte self.first_byte = range_start + max(0, self.first_byte) self.last_byte = min(range_end, range_start + self.last_byte) for sub_seg_dict, sb, eb in self._segment_listing_iterator( req, version, account, sub_segments, recursion_depth=recursion_depth + 1): yield sub_seg_dict, sb, eb # Restore the first/last state self.first_byte, self.last_byte = orig_start, orig_end else: if isinstance(seg_dict['name'], six.text_type): seg_dict['name'] = seg_dict['name'].encode("utf-8") yield (seg_dict, max(0, self.first_byte) + range_start, min(range_end, range_start + self.last_byte)) self.first_byte -= seg_length self.last_byte -= seg_length
def _segment_listing_iterator(self, req, version, account, segments, recursion_depth=1): for seg_dict in segments: if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) # We handle the range stuff here so that we can be smart about # skipping unused submanifests. For example, if our first segment is a # submanifest referencing 50 MiB total, but start_byte falls in # the 51st MiB, then we can avoid fetching the first submanifest. # # If we were to make SegmentedIterable handle all the range # calculations, we would be unable to make this optimization. total_length = sum(self._segment_length(seg) for seg in segments) if self.first_byte is None: self.first_byte = 0 if self.last_byte is None: self.last_byte = total_length - 1 last_sub_path = None for seg_dict in segments: seg_length = self._segment_length(seg_dict) if self.first_byte >= seg_length: # don't need any bytes from this segment self.first_byte -= seg_length self.last_byte -= seg_length continue if self.last_byte < 0: # no bytes are needed from this or any future segment break range = seg_dict.get('range') if range is None: range_start, range_end = 0, seg_length - 1 else: # We already validated and supplied concrete values # for the range on upload range_start, range_end = map(int, range.split('-')) if config_true_value(seg_dict.get('sub_slo')): # do this check here so that we can avoid fetching this last # manifest before raising the exception if recursion_depth >= self.max_slo_recursion_depth: raise ListingIterError("Max recursion depth exceeded") sub_path = get_valid_utf8_str(seg_dict['name']) sub_cont, sub_obj = split_path(sub_path, 2, 2, True) if last_sub_path != sub_path: sub_segments = self._fetch_sub_slo_segments( req, version, account, sub_cont, sub_obj) last_sub_path = sub_path # Use the existing machinery to slice into the sub-SLO. # This requires that we save off our current state, and # restore at the other end. orig_start, orig_end = self.first_byte, self.last_byte self.first_byte = range_start + max(0, self.first_byte) self.last_byte = min(range_end, range_start + self.last_byte) for sub_seg_dict, sb, eb in self._segment_listing_iterator( req, version, account, sub_segments, recursion_depth=recursion_depth + 1): yield sub_seg_dict, sb, eb # Restore the first/last state self.first_byte, self.last_byte = orig_start, orig_end else: if isinstance(seg_dict['name'], unicode): seg_dict['name'] = seg_dict['name'].encode("utf-8") yield (seg_dict, max(0, self.first_byte) + range_start, min(range_end, range_start + self.last_byte)) self.first_byte -= seg_length self.last_byte -= seg_length