class BOSSVS(): def __init__(self, maxF, maxS, windowLength, normMean, logger = None): self.maxF = maxF self.symbols = maxS self.windowLength = windowLength self.normMean = normMean self.signature = None logger.Log(self.__dict__, level = 0) self.logger = logger def createWords(self, samples): if self.signature == None: self.signature = SFA("EQUI_DEPTH", logger = self.logger) self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean,True) self.signature.printBins(self.logger) words = [] for i in range(samples["Samples"]): sfaWords = self.signature.transformWindowing(samples[i]) words_small = [] for word in sfaWords: words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols))) words.append(words_small) return words def createWord(self, numbers, maxF, bits): shortsPerLong = int(round(60 / bits)) to = min([len(numbers), maxF]) b = 0 s = 0 shiftOffset = 1 for i in range(s, (min(to, shortsPerLong + s))): shift = 1 for j in range(bits): if (numbers[i] & shift) != 0: b |= shiftOffset shiftOffset <<= 1 shift <<= 1 limit = 2147483647 total = 2147483647 + 2147483648 while b > limit: b = b - total - 1 return b def createBagOfPattern(self, words, samples, f): bagOfPatterns = [] usedBits = int2byte(self.symbols) mask = (1 << (usedBits * f)) - 1 for j in range(len(words)): BOP = BagOfBigrams(samples[j].label)# {} lastWord = -9223372036854775808 for offset in range(len(words[j])): word = words[j][offset] & mask if word != lastWord: if word in BOP.bob.keys(): BOP.bob[word] += 1 else: BOP.bob[word] = 1 lastWord = word bagOfPatterns.append(BOP) return bagOfPatterns def createTfIdf(self, bagOfPatterns, sampleIndices, uniqueLabels, labels): matrix = {} for label in uniqueLabels: matrix[label] = {} for j in sampleIndices: label = labels[j] for key, value in bagOfPatterns[j].bob.items(): matrix[label][key] = matrix[label][key] + value if key in matrix[label].keys() else value wordInClassFreq = {} for key, value in matrix.items(): for key2, value2 in matrix[key].items(): wordInClassFreq[key2] = wordInClassFreq[key2] + 1 if key2 in wordInClassFreq.keys() else 1 for key, value in matrix.items(): tfIDFs = matrix[key] for key2, value2 in tfIDFs.items(): wordCount = wordInClassFreq.get(key2) if (value2 > 0) & (len(uniqueLabels) != wordCount): tfValue = 1. + math.log10(value2) idfValue = math.log10(1. + len(uniqueLabels) / wordCount) tfIdf = tfValue / idfValue tfIDFs[key2] = tfIdf else: tfIDFs[key2] = 0. matrix[key] = tfIDFs matrix = self.normalizeTfIdf(matrix) return matrix def normalizeTfIdf(self, classStatistics): for key, values in classStatistics.items(): squareSum = 0. for key2, value2 in classStatistics[key].items(): squareSum += value2 ** 2 squareRoot = math.sqrt(squareSum) if squareRoot > 0: for key2, value2 in classStatistics[key].items(): classStatistics[key][key2] /= squareRoot return classStatistics
def sfaToWord(word): word_string = "" for w in word: word_string += chr(w + 97) return word_string def sfaToWordList(wordList): list_string = "" for word in wordList: list_string += sfaToWord(word) list_string += "; " return list_string train, test, train_labels, test_labels = load("CBF", "\t") sfa = SFA("EQUI_DEPTH") sfa.fitWindowing(train, train_labels, windowLength, wordLength, symbols, normMean, True) sfa.printBins() for i in range(test.shape[0]): wordList = sfa.transformWindowing(test.iloc[i, :]) print( str(i) + "-th transformed time series SFA word " + "\t" + sfaToWordList(wordList))
class BOSS(): def __init__(self, maxF, maxS, windowLength, normMean, logger=None): self.maxF = maxF self.symbols = maxS self.windowLength = windowLength self.normMean = normMean self.signature = None logger.Log(self.__dict__, level=0) self.logger = logger def createWords(self, samples): if self.signature == None: self.signature = SFA("EQUI_DEPTH", logger=self.logger) self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean, True) self.signature.printBins(self.logger) words = [] for i in range(samples["Samples"]): sfaWords = self.signature.transformWindowing(samples[i]) words_small = [] for word in sfaWords: words_small.append( self.createWord(word, self.maxF, int2byte(self.symbols))) words.append(words_small) return words def createWord(self, numbers, maxF, bits): shortsPerLong = int(round(60 / bits)) to = min([len(numbers), maxF]) b = 0 s = 0 shiftOffset = 1 for i in range(s, (min(to, shortsPerLong + s))): shift = 1 for j in range(bits): if (numbers[i] & shift) != 0: b |= shiftOffset shiftOffset <<= 1 shift <<= 1 limit = 2147483647 total = 2147483647 + 2147483648 while b > limit: b = b - total - 1 return b def createBagOfPattern(self, words, samples, f): bagOfPatterns = [ BagOfBigrams(samples[j].label) for j in range(samples["Samples"]) ] # bagOfPatterns = [] usedBits = int2byte(self.symbols) mask = (1 << (usedBits * f)) - 1 for j in range(len(words)): BOP = {} lastWord = -9223372036854775808 for offset in range(len(words[j])): word = words[j][offset] & mask if word != lastWord: if word in BOP.keys(): BOP[word] += 1 else: BOP[word] = 1 lastWord = word bagOfPatterns[j].bob = BOP return bagOfPatterns def int2byte(self, number): log = 0 if (number & 0xffff0000) != 0: number >>= 16 log = 16 if number >= 256: number >>= 8 log += 8 if number >= 16: number >>= 4 log += 4 if number >= 4: number >>= 2 log += 2 return log + (number >> 1) def bag2dict(self, bag): bag_dict = [] for list in bag: new_dict = {} for element in list: if element in new_dict.keys(): new_dict[element] += 1 else: new_dict[element] = 1 bag_dict.append(new_dict) return bag_dict