class LyricsForm: def __init__(self): self.__viz = Visualization() self.__ssm = SSMOperation() self.__LENRATE = 6 / 7.0 self.__TOPK = 5 def formAnalysis(self, SSM, isViz = False): """ 視覺化Matrix """ if isViz: self.__SSMViz(SSM) """ NMF test from nmf import nmf n = SSM.shape[0] r = 5 w = numpy.random.random([n, r]) h = numpy.random.random([r, n]) (wo,ho) = nmf(SSM, w, h, 0.001, 1000, 100) if isViz: self.__SSMViz(numpy.dot(wo, ho)) for k in range(r): print k wmask = numpy.zeros((n, r)) wmask[:, k] = numpy.ones(n) hmask = numpy.zeros((r, n)) hmask[k, :] = numpy.ones(n) self.__SSMViz(numpy.dot(wo * wmask, hmask * ho)) """ """ 計算 SSM 中所有的 Block Family """ startTime = time.time() self.__allFamilyM = self.__allBlockFamily(SSM) endTime = time.time() print "LyricsForm: Family Matrix Construction Time = %.2fsec" % (endTime - startTime) self.__allFamilyM = numpy.insert(self.__allFamilyM, 0, 0, axis = 0) """ Block Family Combination """ startTime = time.time() ff = FormFinder(self.__allFamilyM, self.__TOPK) combineList = ff.computing() endTime = time.time() print "LyricsForm: Block Combination Time = %.2fsec" % (endTime - startTime) print """ combineList 轉換成詞式格式 """ lineNum = SSM.shape[0] formList = self.__resultForm(combineList, lineNum) return formList def __resultForm(self, combineList, lineNum): resultList = [] for combine in combineList: lyricsLine = set(range(1, lineNum + 1)) form = [] familyList = [] cohesionList = [] for coor in combine["coors"]: cohesion = self.__allFamilyM[coor[0]][coor[1]]["cohesion"] family = self.__allFamilyM[coor[0]][coor[1]]["family"] lineNumFamily = map(lambda block: [block[0] + 1, block[1] + 1], family) familyList.append(lineNumFamily) cohesionList.append(cohesion) for block in lineNumFamily: start = block[0] end = block[1] lyricsLine -= set(range(start, end + 1)) """ 判斷副歌 """ maxCohesion = max(cohesionList) tempList = [] for i in range(len(cohesionList)): if cohesionList[i] == maxCohesion: tempList.append((i, len(familyList[i]))) idx = numpy.argmax(map(lambda pair: pair[1], tempList)) chorusIdx = tempList[idx][0] chorus = {"label": "chorus", "group": familyList[chorusIdx]} form.append(chorus) familyList.pop(chorusIdx) """ 判斷主歌 """ for i in range(len(familyList)): verse = {"label": "verse" + str(i + 1), "group": familyList[i]} form.append(verse) """ 判斷前段、橋段與尾聲 """ if len(lyricsLine) > 0: lyricsLine = list(lyricsLine) prevLineNum = lyricsLine[0] - 1 block = [] remainBlocks = [] for i in range(0, len(lyricsLine)): if prevLineNum + 1 != lyricsLine[i]: """ 一個 block 形成 """ remainBlocks.append([ block[0], block[-1] ]) block = [lyricsLine[i]] else: block.append(lyricsLine[i]) prevLineNum = lyricsLine[i] remainBlocks.append([ block[0], block[-1] ]) """ 加入前段 """ if remainBlocks[0][0] == 1: form.append({"label": "intro", "group": [remainBlocks[0]]}) """ 加入橋段 """ if len(remainBlocks[1:-2]) > 0: form.append({"label": "bridge", "group": remainBlocks[1:-2]}) """ 加入尾聲 """ if remainBlocks[-1][1] == lineNum: form.append({"label": "outro", "group": [remainBlocks[-1]]}) resultList.append({"score": combine["score"], "form": form}) return resultList def __allBlockFamily(self, M): M = copy.deepcopy(M) """ 記錄所有 family 的資料結構 table """ familyM = [] for i in range(M.shape[0] / 2): familyM.append([None] * M.shape[0]) """ 將 SSM 中相似度為 1.0 的值去除,記錄到 exOneArray 中 """ exOneArray = numpy.extract(M != 1.0, M) """ 計算 Children 與 Parent 之間的相似度(Similarity)門檻值 """ simT = exOneArray.mean() + exOneArray.std() #simT = exOneArray.mean() """ 產生 ChildrenFinder 物件 cf,並且傳入這首歌詞的總行數 """ cf = ChildrenFinder() """ 計算所有的 Parent Block (start line & size) 的 Children """ for size in range(4, len(M) / 2 + 1): for start in range(0, M.shape[0] - size): """ 建立 SSM 的 Corridor(廊道) Matrix """ corridorMask = numpy.zeros(M.shape) corridorMask[start: start + size] = 1 corridorM = M * corridorMask """ 找出 start 到 start + size parent block 所框出的 children matrix 範圍 """ childrenMatrix = M[start: start + size, start + size: M.shape[1] ] """ 計算 Children 與 Parent 之間的長度(Length)門檻值 """ lenT = math.ceil(float(size) * self.__LENRATE) #lenT = float(size) #if lenT > 7: # lenT -= 1 """ 利用 Children Finding Algorithm 計算出此 Parent 最佳的 Repeating Pattern 所形成的 Children """ blockFamily = cf.children(childrenMatrix, lenT = lenT, simT = simT) """ 有找到 children 才需要進一步考慮 """ if blockFamily != []: """ Family Block Range 移動到絕對位置的 Range,Family 的 Block 都是從 第1行 開始算起 並且加入 Parent Block 本身到 Block Family 中 """ for i in range(len(blockFamily)): blockFamily[i] = [lineNum + (start + size) for lineNum in blockFamily[i]] blockFamily.insert(0, [start, start + size - 1]) """ 計算此 family 所形成的 complete graph G(V, E) ,E 上的 weight 表示兩個 block 之間的相似度 以及 family 的 cohesion """ familyGraph, familyCohesion = self.__familyGraphBuild(blockFamily, M) """ family 的覆蓋長度總合 """ familyCoverage = sum(map(lambda block: block[1] - block[0] + 1, blockFamily)) """ 將 familyBlock 記錄到 Block Matrix """ familyM[size - 1][start] = {"graph": familyGraph, "family": blockFamily, "cohesion": familyCohesion, "coverage": familyCoverage} """ 視覺化檢查工具 """ #self.__viz.grayMatrix(corridorM, "Row Mask SSM: start= " + str(start) + " size= " + str(size)) #pathMask = cf.getPathMask() #familyMask = cf.getFamilyMask() #corridorM[start: start + size, start + size: M.shape[1] ] = pathMask #self.__viz.grayMatrix(corridorM, "Path Mask") #corridorM[start: start + size, start + size: M.shape[1] ] = familyMask #self.__viz.grayMatrix(corridorM, "Family Mask") return familyM def __familyGraphBuild(self, family, M): """ 建立 family 的 complete graph """ familyGraph = nx.complete_graph(len(family)) cohesion = 0.0 for i in range(len(family) - 1): for j in range(i + 1, len(family)): """ 選擇長度較短的 block length 為 i 軸 """ shortIdx = i longIdx = j shortLen = family[i][1] - family[i][0] + 1 longLen = family[j][1] - family[j][0] + 1 if shortLen > longLen: """ 交換 """ shortIdx, longIdx = j, i shortLen, longLen = longLen, shortLen sim = 0.0 iSlice = slice( family[shortIdx][0], family[shortIdx][1] + 1 ) windowSize = shortLen """ 計算兩個 block 的相似度所需跑的迴圈數 """ loopNum = longLen - shortLen + 1 for offset in range(loopNum): jSlice = slice( family[longIdx][0] + offset, family[longIdx][0] + windowSize + 1) tempSim = M[iSlice, jSlice].trace() / windowSize if tempSim > sim: sim = tempSim """ 將計算好的相似度(sim)放入 family graph 的 edge 上 """ familyGraph[i][j]["sim"] = sim cohesion += sim cohesion = cohesion / (len(family) * (len(family) - 1) / 2.0) #print "block family", family #print "family graph", familyGraph.edge #print "cohesion", cohesion #raw_input() return familyGraph, cohesion def SSMGen(self, lines, simObject, matrixType='sim'): """ 產生自比較矩陣 Self Matrix """ startTime = time.time() """ 宣告句子相似度矩陣 type: numpy.array """ SSM = numpy.zeros([len(lines), len(lines)]) """ Self Matrix 建立 """ for i in range(len(lines)): for j in range(i, len(lines)): lineSim = simObject.similarity(lines[i][:], lines[j][:]) """ 對稱的矩陣 """ SSM[i][j] = SSM[j][i] = lineSim """ 計算 Matrix 中最大的數值是多少 """ maxValue = SSM.max() print "LyricsForm: Matrix Max Value : %f" % maxValue """ 因為 DTW 演算法計算兩序列的距離,如果超過無限大,則會為回傳 -1 所以,如果 Matrix 中存在負的值,便將此值設為 Matrix 中的最大數值的 """ for rowIdx in range(SSM.shape[0]): SSM[rowIdx] = map(lambda ele: ele < 0.0 and maxValue or ele, SSM[rowIdx]) """ 如果 Similarity Object 是計算距離的話,就將 Matrix 中的數值從距離轉換成相似度,值越大越像 """ #print simObject.__class__.__name__ #if 'Dist' in simObject.__class__.__name__: if 'dist' in matrixType: #由 Matrix 中最大的距離來當作最低的相似度 """ minDist = M.min() maxDist = M.max() M = 1 - ((M - minDist) / (maxDist - minDist)) """ maxM = numpy.ones(SSM.shape) * maxValue SSM = maxM - SSM SSM = self.__ssm.localNormalize(SSM) endTime = time.time() print "LyricsForm: SSM Construction Time = %.2fsec" % (endTime - startTime) print "LyricsForm: Matrix Shape = %s" % str(SSM.shape) #print "LyricsForm: SSM Visualization..." #self.__SSMViz(SSM) return SSM def __SSMViz(self, SSM): """ 建立好的 Self Matrix 裡頭的每個 Element 有可能是 Distance 也有可能是 Similarity 目前是將 Distance 都轉換成 Similarity 最後會得到一個 SSM """ """ 將 SSM 做 Local Normalize,也就是除以 SSM 中的最大值,讓 SSM 中的值介在 [0, 1] 此步驟只對 Distance Matrix 以及 沒有 Normalize 的 Similarity Matrix 有效果 """ #self.__viz.grayMatrix(SSM, "Local Normalized SSM: " + self.__simObjClassName) self.__viz.grayMatrix(SSM, "Local Normalized SSM: ") #SSM = self.__ssm.secondOrder(SSM) #self.__viz.grayMatrix(SSM, "Second Order SSM: " + self.__simObjClassName) """ 擷取 Exact Path 出現的位置 """ #exactMask = self.__ssm.masker(SSM, 1.0) #self.__viz.grayMatrix(exactMask, "Exact Mask: " + self.__simObjClassName) """ SSM Enhancement enhance 函數的最後一個值是設定 L, L = 4 """ #enM = self.__ssm.enhance(SSM, 2) #self.__viz.grayMatrix(enM, "Enhanced SSM: " + self.__simObjClassName) """ Higher Order Matrix """ #SSM = self.__ssmSecondOrder(SSM) #SSM = self.__ssmNormalization(SSM) #self.__viz.grayMatrix(SSM, "Second Order SSM: " + self.__simObjClassName) """ 擷取 Enhanced Matrix 中的 Approximate Path 出現的位置 門檻值 = 平均值 + 一個標準差 """ #threshold = enM.mean() + enM.std() #approxMask = self.__ssm.masker(enM, threshold) #self.__viz.grayMatrix(approxMask, "Approximate Mask: " + self.__simObjClassName) """ 將 exact mask 與 approximate mask 做聯集 """ #mask = map(numpy.bitwise_or, approxMask, exactMask) #self.__viz.grayMatrix(mask, "Total Mask: " + self.__simObjClassName) #self.__viz.grayMatrix(SSM * mask, "Original SSM Mask: " + self.__simObjClassName) #self.__viz.grayMatrix(enM * mask, "Enhanced SSM Mask: " + self.__simObjClassName) def __matrix2ssm(self, M): M = copy.deepcopy(M) """ 計算 Matrix 中最大的數值是多少 """ maxValue = M.max() print "LyricsForm: Matrix Max Value : %f" % maxValue """ 因為 DTW 演算法計算兩序列的距離,如果超過無限大,則會為回傳 -1 所以,如果 Matrix 中存在負的值,便將此值設為 Matrix 中的最大數值的 """ for rowIdx in range(M.shape[0]): M[rowIdx] = map(lambda ele: ele < 0.0 and maxValue or ele, M[rowIdx]) """ 如果 Similarity Object 是計算距離的話,就將 Matrix 中的數值從距離轉換成相似度,值越大越像 """ if 'Dist' in self.__simObjClassName: #由 Matrix 中最大的距離來當作最低的相似度 """ minDist = M.min() maxDist = M.max() M = 1 - ((M - minDist) / (maxDist - minDist)) """ mm = numpy.ones(M.shape) * maxValue M = mm - M return M