Ejemplo n.º 1
0
def from_range_strings(ranges, boundtype=int):
    """
	Parse a list of ranges expressed as strings in the form "value" or
	"first:last" into an equivalent segments.segments.segmentlist.  In the
	latter case, an empty string for "first" and(or) "last" indicates a
	(semi)infinite range.  A typical use for this function is in
	parsing command line options or entries in configuration files.

	NOTE:  the output is a segmentlist as described by the strings;  if
	the segments in the input file are not coalesced or out of order,
	then thusly shall be the output of this function.  It is
	recommended that this function's output be coalesced before use.

	Example:

	>>> text = "0:10,35,100:"
	>>> from_range_strings(text.split(","))
	[segment(0, 10), segment(35, 35), segment(100, infinity)]
	"""
    # preallocate segmentlist
    segs = segments.segmentlist([None] * len(ranges))

    # iterate over strings
    for i, range in enumerate(ranges):
        parts = range.split(":")
        if len(parts) == 1:
            parts = boundtype(parts[0])
            segs[i] = segments.segment(parts, parts)
            continue
        if len(parts) != 2:
            raise ValueError(range)
        if parts[0] == "":
            parts[0] = segments.NegInfinity
        else:
            parts[0] = boundtype(parts[0])
        if parts[1] == "":
            parts[1] = segments.PosInfinity
        else:
            parts[1] = boundtype(parts[1])
        segs[i] = segments.segment(parts[0], parts[1])

    # success
    return segs
Ejemplo n.º 2
0
def from_range_strings(ranges, boundtype = int):
	"""
	Parse a list of ranges expressed as strings in the form "value" or
	"first:last" into an equivalent glue.segments.segmentlist.  In the
	latter case, an empty string for "first" and(or) "last" indicates a
	(semi)infinite range.  A typical use for this function is in
	parsing command line options or entries in configuration files.

	NOTE:  the output is a segmentlist as described by the strings;  if
	the segments in the input file are not coalesced or out of order,
	then thusly shall be the output of this function.  It is
	recommended that this function's output be coalesced before use.

	Example:

	>>> text = "0:10,35,100:"
	>>> from_range_strings(text.split(","))
	[segment(0, 10), segment(35, 35), segment(100, infinity)]
	"""
	# preallocate segmentlist
	segs = segments.segmentlist([None] * len(ranges))

	# iterate over strings
	for i, range in enumerate(ranges):
		parts = range.split(":")
		if len(parts) == 1:
			parts = boundtype(parts[0])
			segs[i] = segments.segment(parts, parts)
			continue
		if len(parts) != 2:
			raise ValueError(range)
		if parts[0] == "":
			parts[0] = segments.NegInfinity
		else:
			parts[0] = boundtype(parts[0])
		if parts[1] == "":
			parts[1] = segments.PosInfinity
		else:
			parts[1] = boundtype(parts[1])
		segs[i] = segments.segment(parts[0], parts[1])

	# success
	return segs
def ingest_csv(datasets=None, range=None):
    existing_files = filter(lambda x: os.path.isfile(x[1]), datasets)
    assert len(list(datasets)) == len(list(existing_files))

    if range:
        range = map(int, range.split(','))

    data = {}
    for (dsname, dsfile) in datasets:
        print('Reading %s from %s' % (dsname, dsfile))
        with open(dsfile) as f:
            d = csv.DictReader(f)
            data[dsname] = []
            for e in d:
                if range:
                    re       = reduce_filename(e['model'])
                    in_range = (re >= range[0] and re <= range[1])
                    if in_range:
                        data[dsname].append(e)
                else:
                    data[dsname].append(e)

    return data
Ejemplo n.º 4
0
def ingest_csv(datasets=None, range=None):
    existing_files = filter(lambda x: os.path.isfile(x[1]), datasets)
    assert len(datasets) == len(existing_files)

    if range:
        range = map(int, range.split(','))

    data = {}
    for (dsname, dsfile) in datasets:
        print('Reading %s from %s' % (dsname, dsfile))
        with open(dsfile) as f:
            d = csv.DictReader(f)
            data[dsname] = []
            for e in d:
                if range:
                    re       = reduce_filename(e['model'])
                    in_range = (re >= range[0] and re <= range[1])
                    if in_range:
                        data[dsname].append(e)
                else:
                    data[dsname].append(e)

    return data
Ejemplo n.º 5
0
    def _segment_listing_iterator(self, req, version, account, segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but start_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(self._segment_length(seg) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        last_sub_path = None
        for seg_dict in segments:
            seg_length = self._segment_length(seg_dict)
            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte -= seg_length
                self.last_byte -= seg_length
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            range = seg_dict.get('range')
            if range is None:
                range_start, range_end = 0, seg_length - 1
            else:
                # We already validated and supplied concrete values
                # for the range on upload
                range_start, range_end = map(int, range.split('-'))

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                if last_sub_path != sub_path:
                    sub_segments = self._fetch_sub_slo_segments(
                        req, version, account, sub_cont, sub_obj)
                last_sub_path = sub_path

                # Use the existing machinery to slice into the sub-SLO.
                # This requires that we save off our current state, and
                # restore at the other end.
                orig_start, orig_end = self.first_byte, self.last_byte
                self.first_byte = range_start + max(0, self.first_byte)
                self.last_byte = min(range_end, range_start + self.last_byte)

                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req, version, account, sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb

                # Restore the first/last state
                self.first_byte, self.last_byte = orig_start, orig_end
            else:
                if isinstance(seg_dict['name'], six.text_type):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                yield (seg_dict,
                       max(0, self.first_byte) + range_start,
                       min(range_end, range_start + self.last_byte))

            self.first_byte -= seg_length
            self.last_byte -= seg_length
Ejemplo n.º 6
0
    def _segment_listing_iterator(self,
                                  req,
                                  version,
                                  account,
                                  segments,
                                  recursion_depth=1):
        for seg_dict in segments:
            if config_true_value(seg_dict.get('sub_slo')):
                override_bytes_from_content_type(seg_dict,
                                                 logger=self.slo.logger)

        # We handle the range stuff here so that we can be smart about
        # skipping unused submanifests. For example, if our first segment is a
        # submanifest referencing 50 MiB total, but start_byte falls in
        # the 51st MiB, then we can avoid fetching the first submanifest.
        #
        # If we were to make SegmentedIterable handle all the range
        # calculations, we would be unable to make this optimization.
        total_length = sum(self._segment_length(seg) for seg in segments)
        if self.first_byte is None:
            self.first_byte = 0
        if self.last_byte is None:
            self.last_byte = total_length - 1

        last_sub_path = None
        for seg_dict in segments:
            seg_length = self._segment_length(seg_dict)
            if self.first_byte >= seg_length:
                # don't need any bytes from this segment
                self.first_byte -= seg_length
                self.last_byte -= seg_length
                continue

            if self.last_byte < 0:
                # no bytes are needed from this or any future segment
                break

            range = seg_dict.get('range')
            if range is None:
                range_start, range_end = 0, seg_length - 1
            else:
                # We already validated and supplied concrete values
                # for the range on upload
                range_start, range_end = map(int, range.split('-'))

            if config_true_value(seg_dict.get('sub_slo')):
                # do this check here so that we can avoid fetching this last
                # manifest before raising the exception
                if recursion_depth >= self.max_slo_recursion_depth:
                    raise ListingIterError("Max recursion depth exceeded")

                sub_path = get_valid_utf8_str(seg_dict['name'])
                sub_cont, sub_obj = split_path(sub_path, 2, 2, True)
                if last_sub_path != sub_path:
                    sub_segments = self._fetch_sub_slo_segments(
                        req, version, account, sub_cont, sub_obj)
                last_sub_path = sub_path

                # Use the existing machinery to slice into the sub-SLO.
                # This requires that we save off our current state, and
                # restore at the other end.
                orig_start, orig_end = self.first_byte, self.last_byte
                self.first_byte = range_start + max(0, self.first_byte)
                self.last_byte = min(range_end, range_start + self.last_byte)

                for sub_seg_dict, sb, eb in self._segment_listing_iterator(
                        req,
                        version,
                        account,
                        sub_segments,
                        recursion_depth=recursion_depth + 1):
                    yield sub_seg_dict, sb, eb

                # Restore the first/last state
                self.first_byte, self.last_byte = orig_start, orig_end
            else:
                if isinstance(seg_dict['name'], unicode):
                    seg_dict['name'] = seg_dict['name'].encode("utf-8")
                yield (seg_dict, max(0, self.first_byte) + range_start,
                       min(range_end, range_start + self.last_byte))

            self.first_byte -= seg_length
            self.last_byte -= seg_length