Esempio n. 1
0
    def __init__(self,
                 data,
                 startIdxs,
                 endIdxs=None,
                 subseqLength=None,
                 labels=None,
                 name=None,
                 id=0):
        self.data = ensure2D(data)
        self.startIdxs = np.asarray(startIdxs, dtype=np.int)
        self.labels = np.asarray(labels)
        self.name = name
        self.id = int(id)

        if endIdxs is not None:
            self.endIdxs = np.asarray(endIdxs, dtype=np.int)
            self.subseqLength = None
        elif subseqLength:
            self.endIdxs = self.startIdxs + subseqLength
            self.subseqLength = subseqLength
        else:
            raise ValueError(
                "Either endIdxs or subseqLength must be specified!")

        if labels is None or len(labels) == 0:
            self.labels = np.zeros(len(startIdxs),
                                   dtype=np.int) + DEFAULT_LABEL

        if startIdxs is not None and endIdxs is not None:
            # equal lengths
            nStart, nEnd = len(startIdxs), len(endIdxs)
            if nStart != nEnd:
                raise ValueError("Number of start indices must equal number"
                                 "of end indices! {0} != {1}".format(
                                     nStart, nEnd))
            # starts before or equal to ends
            violators = np.where(startIdxs > endIdxs)[0]
            if np.any(violators):
                raise ValueError("Some start indices exceed end indices!"
                                 "Violators at {}".format(str(violators)))
            # valid indices
            violators = np.where(startIdxs < 0)[0]
            if np.any(violators):
                raise ValueError("Some start indices < 0!"
                                 "Violators at {}".format(str(violators)))
            violators = np.where(endIdxs > len(data))[0]
            if np.any(violators):
                violatorValues = endIdxs[violators]
                raise ValueError("Some end indices > length of data {}! "
                                 "Violators {} at {}".format(
                                     len(data), str(violatorValues),
                                     str(violators)))
Esempio n. 2
0
	def __init__(self, data, startIdxs, endIdxs=None, subseqLength=None,
		labels=None, name=None, id=0):
		self.data = ensure2D(data)
		self.startIdxs = np.asarray(startIdxs, dtype=np.int)
		self.labels = np.asarray(labels)
		self.name = name
		self.id = int(id)

		if endIdxs is not None:
			self.endIdxs = np.asarray(endIdxs, dtype=np.int)
			self.subseqLength = None
		elif subseqLength:
			self.endIdxs = self.startIdxs + subseqLength
			self.subseqLength = subseqLength
		else:
			raise ValueError("Either endIdxs or subseqLength must be specified!")

		if labels is None or len(labels) == 0:
			self.labels = np.zeros(len(startIdxs), dtype=np.int) + DEFAULT_LABEL

		if startIdxs is not None and endIdxs is not None:
			# equal lengths
			nStart, nEnd = len(startIdxs), len(endIdxs)
			if nStart != nEnd:
				raise ValueError("Number of start indices must equal number"
					"of end indices! {0} != {1}".format(nStart, nEnd))
			# starts before or equal to ends
			violators = np.where(startIdxs > endIdxs)[0]
			if np.any(violators):
				raise ValueError("Some start indices exceed end indices!"
					"Violators at {}".format(str(violators)))
			# valid indices
			violators = np.where(startIdxs < 0)[0]
			if np.any(violators):
				raise ValueError("Some start indices < 0!"
					"Violators at {}".format(str(violators)))
			violators = np.where(endIdxs > len(data))[0]
			if np.any(violators):
				violatorValues = endIdxs[violators]
				raise ValueError("Some end indices > length of data {}! "
					"Violators {} at {}".format(len(data),
						str(violatorValues), str(violators)))
Esempio n. 3
0
def sectionsOfDataNearAnnotationsImpure(X, startIdxs, endIdxs, labels,
	instancesPerTs=10, shuffle=False, padLen=0, maxPadJitter=0,
	keepLabels=None, datasetName="Dataset"):

	assert(len(startIdxs) == len(endIdxs))
	assert(len(startIdxs) == len(labels))

	startIdxs = np.asarray(startIdxs)
	endIdxs = np.asarray(endIdxs)

	# filter out labels we don't care about
	if keepLabels:
		allIdxs = np.arange(len(labels))
		keepIdxs = [i for i in allIdxs if labels[i] in keepLabels]
		keepIdxs = np.array(keepIdxs, dtype=np.int)
		startIdxs = startIdxs[keepIdxs]
		endIdxs = endIdxs[keepIdxs]
		labels = labels[keepIdxs]

	# find sections of nearby annotations in the data and group these
	# sections together; we'll concat these groups together to form a ts
	combinedRanges = unionOfRanges(startIdxs, endIdxs, len(X), padLen=padLen)
	rangeGroups = formGroupsOfSize(combinedRanges, groupSize=instancesPerTs,
		shuffle=shuffle)

	# now the hard part--create a LabeledTimeSeries from each of these
	# sections of signal; we have to not only find which annotations
	# fall within each range, but also adjust the start and end indices
	# so that they're correct in the new ts formed by concatenating the
	# data in each range together
	tsList = []
	for groupNum, ranges in enumerate(rangeGroups):

		ranges = sorted(ranges, key=lambda r: r[0]) # sort by range start idx

		dataLenSoFar = 0
		dataInRanges = []
		startsInRanges = []
		endsInRanges = []
		labelsInRanges = []

		for rang in ranges:
			start, end = rang

			firstInRange, lastInRange = whereStartEndPairsInRange(startIdxs,
				endIdxs, start, end)
			idxsInRange = np.arange(firstInRange, lastInRange)

			# move the start and end indices around a bit so that ranges
			# aren't spaced exactly uniformly, which can lead to an
			# artificial semblance of regularity
			if maxPadJitter > 0:
				if firstInRange > 0:
					firstStartIdx = startIdxs[firstInRange]
					prevEndIdx = endIdxs[firstInRange-1]
					gap = firstStartIdx - prevEndIdx
					if gap > 1:
						gap = min(gap - 1, maxPadJitter)
						offset = int(np.random.rand() * gap)
						start -= offset
				if lastInRange < (len(startIdxs) - 1):
					lastEndIdx = endIdxs[lastInRange-1] # last idx not inclusive
					nextStartIdx = startIdxs[lastInRange]
					gap = nextStartIdx - lastEndIdx
					if gap > 1:
						gap = min(gap - 1, maxPadJitter)
						offset = int(np.random.rand() * gap)
						end += offset

			starts = startIdxs[idxsInRange] - start + dataLenSoFar
			ends = endIdxs[idxsInRange] - start + dataLenSoFar
			lbls = labels[idxsInRange]

			startsInRanges += list(starts)
			endsInRanges += list(ends)
			labelsInRanges += list(lbls)

			data = ensure2D(X[start:end])
			dataInRanges.append(data)

			dataLenSoFar += len(data)

		if len(labelsInRanges) < 2: # need more than one pattern instance per ts
			continue

		groupData = np.vstack(dataInRanges)
		groupStarts = np.array(startsInRanges, dtype=np.int)
		groupEnds = np.array(endsInRanges, dtype=np.int)
		groupLabels = np.array(labelsInRanges, dtype=np.object)

		name = "{}-group{}".format(datasetName, groupNum)
		uniqId = hash(name)

		ts = LabeledTimeSeries(groupData, startIdxs=groupStarts,
			endIdxs=groupEnds, labels=groupLabels, name=name, id=uniqId)

		tsList.append(ts)

	return tsList