def lookup(self, windowLength, wordLength, symbols, sequence, val): normMean = True key = (windowLength, wordLength, symbols) if key not in self.sfa: sfa = SFA("EQUI_DEPTH") sfa.fitWindowing(self.raw, windowLength, wordLength, symbols, normMean, True) self.sfa[key] = [] for i in range(self.raw["Samples"]): sfa_sr = [] wordList = sfa.transformWindowing(self.raw[i]) for word in wordList: sfa_sr.append(self.sfaToDWord(word, symbols)) #print(str(i) + "-th transformed time series SFA word " + "\t" + sfaToWordList(wordList)) self.sfa[key].append(sfa_sr) for i in range(self.raw["Samples"]): for j in range(len(self.sfa[key][i])): if sequence in self.sfa[key][i][j]: self.acscores[i, j:(j + windowLength)] += val
def sfaToWord(word): word_string = "" for w in word: word_string += chr(w + 97) return word_string def sfaToWordList(wordList): list_string = "" for word in wordList: list_string += sfaToWord(word) list_string += "; " return list_string train, test, train_labels, test_labels = load("CBF", "\t") sfa = SFA("EQUI_DEPTH") sfa.fitWindowing(train, train_labels, windowLength, wordLength, symbols, normMean, True) sfa.printBins() for i in range(test.shape[0]): wordList = sfa.transformWindowing(test.iloc[i, :]) print( str(i) + "-th transformed time series SFA word " + "\t" + sfaToWordList(wordList))
class BOSS(): def __init__(self, maxF, maxS, windowLength, normMean): self.maxF = maxF self.symbols = maxS self.windowLength = windowLength self.normMean = normMean self.signature = None def createWords(self, samples): if self.signature == None: self.signature = SFA("EQUI_DEPTH") self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean, True) # self.signature.printBins() words = [] for i in range(samples["Samples"]): sfaWords = self.signature.transformWindowing(samples[i]) words_small = [] for word in sfaWords: words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols))) words.append(words_small) return words def createWord(self, numbers, maxF, bits): shortsPerLong = int(round(60 / bits)) to = min([len(numbers), maxF]) b = 0 s = 0 shiftOffset = 1 for i in range(s, (min(to, shortsPerLong + s))): shift = 1 for j in range(bits): if (numbers[i] & shift) != 0: b |= shiftOffset shiftOffset <<= 1 shift <<= 1 limit = 2147483647 total = 2147483647 + 2147483648 while b > limit: b = b - total - 1 return b def createBagOfPattern(self, words, samples, f): bagOfPatterns = [] usedBits = int2byte(self.symbols) mask = (1 << (usedBits * f)) - 1 for j in range(len(words)): BOP = {} lastWord = -9223372036854775808 for offset in range(len(words[j])): word = words[j][offset] & mask if word != lastWord: if word in BOP.keys(): BOP[word] += 1 else: BOP[word] = 1 lastWord = word bagOfPatterns.append(BOP) return bagOfPatterns def int2byte(self, number): log = 0 if (number & 0xffff0000) != 0: number >>= 16 log = 16 if number >= 256: number >>= 8 log += 8 if number >= 16: number >>= 4 log += 4 if number >= 4: number >>= 2 log += 2 return log + (number >> 1) def bag2dict(self, bag): bag_dict = [] for list in bag: new_dict = {} for element in list: if element in new_dict.keys(): new_dict[element] += 1 else: new_dict[element] = 1 bag_dict.append(new_dict) return bag_dict
class BOSSVS(): def __init__(self, maxF, maxS, windowLength, normMean, logger = None): self.maxF = maxF self.symbols = maxS self.windowLength = windowLength self.normMean = normMean self.signature = None logger.Log(self.__dict__, level = 0) self.logger = logger def createWords(self, samples): if self.signature == None: self.signature = SFA("EQUI_DEPTH", logger = self.logger) self.signature.fitWindowing(samples, self.windowLength, self.maxF, self.symbols, self.normMean,True) self.signature.printBins(self.logger) words = [] for i in range(samples["Samples"]): sfaWords = self.signature.transformWindowing(samples[i]) words_small = [] for word in sfaWords: words_small.append(self.createWord(word, self.maxF, int2byte(self.symbols))) words.append(words_small) return words def createWord(self, numbers, maxF, bits): shortsPerLong = int(round(60 / bits)) to = min([len(numbers), maxF]) b = 0 s = 0 shiftOffset = 1 for i in range(s, (min(to, shortsPerLong + s))): shift = 1 for j in range(bits): if (numbers[i] & shift) != 0: b |= shiftOffset shiftOffset <<= 1 shift <<= 1 limit = 2147483647 total = 2147483647 + 2147483648 while b > limit: b = b - total - 1 return b def createBagOfPattern(self, words, samples, f): bagOfPatterns = [] usedBits = int2byte(self.symbols) mask = (1 << (usedBits * f)) - 1 for j in range(len(words)): BOP = BagOfBigrams(samples[j].label)# {} lastWord = -9223372036854775808 for offset in range(len(words[j])): word = words[j][offset] & mask if word != lastWord: if word in BOP.bob.keys(): BOP.bob[word] += 1 else: BOP.bob[word] = 1 lastWord = word bagOfPatterns.append(BOP) return bagOfPatterns def createTfIdf(self, bagOfPatterns, sampleIndices, uniqueLabels, labels): matrix = {} for label in uniqueLabels: matrix[label] = {} for j in sampleIndices: label = labels[j] for key, value in bagOfPatterns[j].bob.items(): matrix[label][key] = matrix[label][key] + value if key in matrix[label].keys() else value wordInClassFreq = {} for key, value in matrix.items(): for key2, value2 in matrix[key].items(): wordInClassFreq[key2] = wordInClassFreq[key2] + 1 if key2 in wordInClassFreq.keys() else 1 for key, value in matrix.items(): tfIDFs = matrix[key] for key2, value2 in tfIDFs.items(): wordCount = wordInClassFreq.get(key2) if (value2 > 0) & (len(uniqueLabels) != wordCount): tfValue = 1. + math.log10(value2) idfValue = math.log10(1. + len(uniqueLabels) / wordCount) tfIdf = tfValue / idfValue tfIDFs[key2] = tfIdf else: tfIDFs[key2] = 0. matrix[key] = tfIDFs matrix = self.normalizeTfIdf(matrix) return matrix def normalizeTfIdf(self, classStatistics): for key, values in classStatistics.items(): squareSum = 0. for key2, value2 in classStatistics[key].items(): squareSum += value2 ** 2 squareRoot = math.sqrt(squareSum) if squareRoot > 0: for key2, value2 in classStatistics[key].items(): classStatistics[key][key2] /= squareRoot return classStatistics
word_string = "" alphabet = "abcdefghijklmnopqrstuv" for w in word: word_string += alphabet[w] return word_string def sfaToWordList(wordList): list_string = "" for word in wordList: list_string += sfaToWord(word) list_string += "; " return list_string train, test = uv_load("Gun_Point") sfa = SFA("EQUI_DEPTH") sfa.fitWindowing(train, windowLength, wordLength, symbols, normMean, True) sfa.printBins() for i in range(test["Samples"]): wordList = sfa.transformWindowing(test[i]) print(str(i) + "-th transformed time series SFA word " + "\t" + sfaToWordList(wordList))
logger.Log("Test: SFAWordTest") from src.transformation.SFA import * sfa = SFA(FIXED_PARAMETERS["histogram_type"], logger=logger) sfa.fitTransform(train, FIXED_PARAMETERS['wordLength'], FIXED_PARAMETERS['symbols'], FIXED_PARAMETERS['normMean']) logger.Log(sfa.__dict__) for i in range(test["Samples"]): wordList = sfa.transform2(test[i].data, "null", str_return=True) logger.Log("%s-th transformed TEST time series SFA word \t %s " % (i, wordList)) if FIXED_PARAMETERS['test'] == 'SFAWordWindowingTest': logger.Log("Test: SFAWordWindowingTest") from src.transformation.SFA import * sfa = SFA(FIXED_PARAMETERS["histogram_type"], logger=logger) sfa.fitWindowing(train, FIXED_PARAMETERS['windowLength'], FIXED_PARAMETERS['wordLength'], FIXED_PARAMETERS['symbols'], FIXED_PARAMETERS['normMean'], True) logger.Log(sfa.__dict__) for i in range(test["Samples"]): wordList = sfa.transformWindowing(test[i], str_return=True) logger.Log("%s-th transformed time series SFA word \t %s " % (i, wordList)) except: logger.Log("Test and Dataset combo entered is not available")