Exemple #1
0
def help():
    print "Base Flags:"
    for flag in flags.basic:
        print "    %s: %s" % (flag, flags.descriptions[flag])
    print
    print "Compound Flags:"
    for flag in flags.compound:
        if flag == 'All':
            continue
        print "    %s: %s" % (flag, flags.descriptions[flag])
        util.printList(flags.compoundMap[flag], indent=8)
        print
Exemple #2
0
def help():
    print "Base Flags:"
    for flag in flags.basic:
        print "    %s: %s" % (flag, flags.descriptions[flag])
    print
    print "Compound Flags:"
    for flag in flags.compound:
        if flag == 'All':
            continue
        print "    %s: %s" % (flag, flags.descriptions[flag])
        util.printList(flags.compoundMap[flag], indent=8)
        print
Exemple #3
0
            length, ptr = length + 1, ptr.next

        # Calculate the offset
        offset, prev, ptr = k % length, None, head
        if offset == 0:  # Remains unchanged
            return head

        # Find the head of the new linked list
        for i in range(length - offset):
            prev, ptr = ptr, ptr.next
        newHead = ptr

        # Link the original tail to original head
        # Update new tail
        tail.next, prev.next = head, None

        return newHead


# 4 -> 5 -> 1 -> 2 -> 3
printList(Solution().rotateRight(initList([1, 2, 3, 4, 5]), 2))

# 2 -> 0 -> 1
printList(Solution().rotateRight(initList([0, 1, 2]), 4))

# None
printList(Solution().rotateRight(initList([]), 0))

# 1
printList(Solution().rotateRight(initList([1]), 1))
Exemple #4
0
            if llst1:
                tail.next = llst1
            elif llst2:
                tail.next = llst2
            return head

        def mergeSort(head):
            if not head or not head.next:
                return head
            llst1, llst2, midPrev = head, head, None
            while llst1 and llst1.next:
                midPrev = midPrev.next if midPrev else llst1
                llst1 = llst1.next.next
            midPrev.next, llst2, llst1 = None, midPrev.next, head

            llst1, llst2 = mergeSort(llst1), mergeSort(llst2)
            llst = mergeList(llst1, llst2)
            return llst

        return mergeSort(head)


# 1 -> 2 -> 3 -> 4 -> NULL
printList(Solution().sortList(initList([4, 2, 1, 3])))

# -1 -> 0 -> 3 -> 4 -> 5 -> NULL
printList(Solution().sortList(initList([-1, 5, 3, 4, 0])))

# NULL
printList(Solution().sortList(initList([])))
Exemple #5
0
from solution import Solution
from util import buildSequeneces
from util import printList
if __name__ == '__main__':
    sol = Solution()
    l1 = buildSequeneces([1, 2, 4])
    l2 = buildSequeneces([1, 3, 4])
    printList(sol.mergeTwoLists(l1, l2))
    l1 = buildSequeneces([])
    l2 = buildSequeneces([1, 2, 5])
    printList(sol.mergeTwoLists(l1, l2))
    l2 = buildSequeneces([])
    printList(sol.mergeTwoLists(l1, l2))
    
Exemple #6
0
from solution import Solution
from util import buildSequeneces
from util import printList

if __name__ == '__main__':
    sol = Solution()
    printList(sol.rotateRight(buildSequeneces([1, 2, 3, 4, 5]), 0))
    printList(sol.rotateRight(buildSequeneces([1, 2, 3, 4, 5]), 1))
    printList(sol.rotateRight(buildSequeneces([1, 2, 3, 4, 5]), 2))
    printList(sol.rotateRight(buildSequeneces([1, 2, 3, 4, 5]), 3))
    printList(sol.rotateRight(buildSequeneces([1, 2, 3, 4, 5]), 4))
    printList(sol.rotateRight(buildSequeneces([1, 2, 3, 4, 5]), 5))
    printList(sol.rotateRight(buildSequeneces([]), 5))
    printList(sol.rotateRight(buildSequeneces([1]), 5))
    printList(sol.rotateRight(buildSequeneces([1, 2]), 1))
    printList(sol.rotateRight(buildSequeneces([1, 2]), 2))
    printList(sol.rotateRight(buildSequeneces([1, 2]), 3))
        while l1 and l2:
            if l1.val < l2.val:
                if not tail:
                    tail, l1 = l1, l1.next
                    head = tail
                else:
                    tail.next, l1 = l1, l1.next
                    tail = tail.next
            else:
                if not tail:
                    tail, l2 = l2, l2.next
                    head = tail
                else:
                    tail.next, l2 = l2, l2.next
                    tail = tail.next
        if l1:
            tail.next = l1
        elif l2:
            tail.next = l2
        return head


# 1 -> 1 -> 2 -> 3 -> 4 -> 4 -> NULL
printList(Solution().mergeTwoLists(initList([1, 2, 4]), initList([1, 3, 4])))

# NULL
printList(Solution().mergeTwoLists(initList([]), initList([])))

# 0 -> NULL
printList(Solution().mergeTwoLists(initList([]), initList([0])))
        #         node1 = curr
        #     curr, length = curr.next, length + 1
        # node2 = head
        # for _ in range(length - k):
        #     node2 = node2.next
        # node1.val, node2.val = node2.val, node1.val
        # return head

        # O(n) time & O(n) space
        lst, curr, idx = [], head, 1
        while curr:
            lst+= [curr]
            curr, idx = curr.next, idx + 1
        lst[k - 1].val, lst[-k].val = lst[-k].val, lst[k - 1].val
        return head

# 5 -> 2 -> 3 -> 4 -> 1 -> NULL
printList(Solution().swapNodes(initList([1, 2, 3, 4, 5]), 1))

# 1 -> 4 -> 3 -> 2 -> 5 -> NULL
printList(Solution().swapNodes(initList([1, 2, 3, 4, 5]), 2))

# 1 -> 2 -> 3 -> 4 -> 5 -> NULL
printList(Solution().swapNodes(initList([1, 2, 3, 4, 5]), 3))

# 1 -> 4 -> 3 -> 2 -> 5 -> NULL
printList(Solution().swapNodes(initList([1, 2, 3, 4, 5]), 4))

# 5 -> 2 -> 3 -> 4 -> 1 -> NULL
printList(Solution().swapNodes(initList([1, 2, 3, 4, 5]), 5))
Exemple #9
0
from util import ListNode
from util import buildSequeneces
from util import printList
from solution import Solution

if __name__ == '__main__':
    sol = Solution()
    for i in range(0, 8):
        printList(sol.reverseKGroup(buildSequeneces([1, 2, 3, 4, 5]), i))

    for i in range(0, 2):
        printList(sol.reverseKGroup(buildSequeneces([]), i))

    k = 1
    for i in range(0, k + 2):
        printList(sol.reverseKGroup(buildSequeneces(list(range(1, 2))), i))
sys.path = ['.', '../', '../../'] + sys.path

from util import ListNode, initList, printList

class Solution:
    def removeNthFromEnd(self, head: ListNode, n: int) -> ListNode:
        prev_target, target, curr, diff = None, head, head, 1
        while diff < n and curr:
            curr, diff = curr.next, diff + 1
        while curr and curr.next:
            prev_target, target, curr = target, target.next, curr.next
        if prev_target:
            prev_target.next = target.next
        elif target:
            head = head.next
        else:
            head = None
        return head

# 1 -> 2 -> 3 -> 5 -> NULL
printList(Solution().removeNthFromEnd(initList([1, 2, 3, 4, 5]), 2))

# NULL
printList(Solution().removeNthFromEnd(initList([1]), 1))

# 1 -> NULL
printList(Solution().removeNthFromEnd(initList([1, 2]), 1))

# 2 -> NULL
printList(Solution().removeNthFromEnd(initList([1, 2]), 2))
#         self.next = next


class Solution:
    def addTwoNumbers(self, l1: ListNode, l2: ListNode) -> ListNode:
        carry, head, tail = 0, None, None
        while l1 or l2 or carry:
            d1, d2 = l1.val if l1 else 0, l2.val if l2 else 0
            if not head:
                head = ListNode((d1 + d2 + carry) % 10)
                tail = head
            else:
                tail.next = ListNode((d1 + d2 + carry) % 10)
                tail = tail.next
            carry = (d1 + d2 + carry) // 10
            l1, l2 = l1.next if l1 else l1, l2.next if l2 else l2
        return head


# 0 -> 1 -> NULL
ll1, ll2 = initList([5]), initList([5])
printList(Solution().addTwoNumbers(ll1, ll2))

# 1 -> 8 -> NULL
ll1, ll2 = initList([1, 8]), initList([0])
printList(Solution().addTwoNumbers(ll1, ll2))

# 0 -> 0 -> 0 -> 1 -> NULL
ll1, ll2 = initList([9, 9, 9]), initList([1])
printList(Solution().addTwoNumbers(ll1, ll2))
Exemple #12
0
sys.path = ['.', '../', '../../'] + sys.path

from util import ListNode, initList, printList


class Solution:
    def deleteDuplicates(self, head: ListNode) -> ListNode:
        prev, curr = None, head
        while curr and curr.next:
            if curr.val == curr.next.val:
                val = curr.val
                while curr and curr.val == val:
                    if prev:
                        prev.next = curr.next
                    else:
                        head = curr.next
                    curr = curr.next
            else:
                prev, curr = curr, curr.next
        return head


# 2 -> 3 -> NULL
printList(Solution().deleteDuplicates(initList([1, 1, 1, 2, 3])))

# NULL
printList(Solution().deleteDuplicates(initList([1, 1, 1])))

# 1 -> NULL
printList(Solution().deleteDuplicates(initList([1])))
Exemple #13
0
from util import buildSequeneces
from util import printList

from solution import Solution

if __name__ == '__main__':
    sol = Solution()
    printList(sol.swapPairs(buildSequeneces([])))
    printList(sol.swapPairs(buildSequeneces([1])))
    printList(sol.swapPairs(buildSequeneces([1, 2])))
    printList(sol.swapPairs(buildSequeneces([1, 2, 3])))
    printList(sol.swapPairs(buildSequeneces([1, 2, 3, 4])))
    result = sol.swapPairs(buildSequeneces([1, 2, 3, 4, 5, 6]))
    printList(result)
        # # O(n) time & O(n) space
        # queue, ptr = [], head
        # while ptr:
        #     queue.append(ptr)
        #     ptr = ptr.next
        # while queue:
        #     node = queue.pop(0)
        #     node.next, ptr = ptr, node
        # return ptr

        # # O(n) Recursive
        # if not head or not head.next:
        #     return head
        # tail = self.reverseList(head.next)
        # head.next.next, head.next = head, None
        # return tail


# 5 -> 4 -> 3 -> 2 -> 1 -> NULL
printList(Solution().reverseList(initList([1, 2, 3, 4, 5])))

# NULL
printList(Solution().reverseList(initList([])))

# 1 -> NULL
printList(Solution().reverseList(initList([1])))

# 2 -> 1 -> NULL
printList(Solution().reverseList(initList([1, 2])))
Exemple #15
0
def ldaa(dic, corpus, tfidf, bContentList, newUniBContentListString, topicNum):
    corpus_tfidf = tfidf[corpus]
    print '------------------type(corpus_tfidf):', type(corpus_tfidf)
    # for i in corpus_tfidf:
    #     print i

    lda = models.LdaModel(corpus_tfidf, id2word=dic, num_topics=topicNum)
    ldaOut = lda.print_topics(topicNum)

    li = 5
    vec = [(0, 1), (4, 1)]
    vec = dic.doc2bow(bContentList[li])

    # get similarity matrix of len(bContentList) * len(bContentList)
    index = similarities.MatrixSimilarity(lda[corpus])
    simMatrix = []

    # get the Similarity Matrix(eg: 100 * 100) of all barrages,
    for bIndex in range(len(bContentList)):
        vec = bContentList[bIndex]
        vec_bow = dic.doc2bow(bContentList[bIndex])
        vec_lda = lda[vec_bow]
        sims = index[vec_lda]

        # print list(enumerate(sims))

        # sorted with similarity from high to low
        # sims = sorted(enumerate(sims), key=lambda item: -item[1])
        # print sims, len(sims), type(sims)

        simMatrix.append(list(enumerate(sims)))

    # eg: simMatrix[1] = [(0, 0.91061151), (1, 0.99999994), (2, 0.99999994), (3, 0.99999994), (4, 0.73748994), (5, 0.81874228)......]
    # print len(simMatrix), simMatrix[1]
    # sys.exit()

    # print all lda topics words
    # such as:
    # 0.002*"合影" + 0.002*"钱" + 0.002*"撒花" + 0.002*"没" + 0.002*"完结" + 0.002*"看" + 0.002*"啊" + 0.002*"之" + 0.002*"湫" + 0.002*"一个"
    # 0.002*"买" + 0.002*"第一次" + 0.002*"支持" + 0.002*"啊" + 0.002*"没" + 0.002*"完结" + 0.002*"湫" + 0.002*"国漫" + 0.002*"撒花" + 0.002*"b"
    # 0.004*"第一次" + 0.003*"湫" + 0.003*"合影" + 0.003*"在" + 0.003*"存活" + 0.003*"买" + 0.003*"确认" + 0.003*"啊" + 0.003*"椿" + 0.002*"撒花"
    # 0.003*"完结" + 0.003*"撒花" + 0.003*"钱" + 0.003*"合影" + 0.002*"再见" + 0.002*"没" + 0.002*"啊" + 0.002*"湫" + 0.002*"好" + 0.001*"第一次"
    # 0.003*"存活" + 0.003*"确认" + 0.002*"合影" + 0.002*"没" + 0.002*"钱" + 0.002*"秋水共长天一色" + 0.002*"第一次" + 0.001*"靠" + 0.001*"也" + 0.001*"生日"
    for i in ldaOut:
        r = i[1].encode('utf-8')
        print r

    for i in ldaOut:
        r = i[1].encode('utf-8')
        print 'Topic', ldaOut.index(i), ':',
        util.printList(rmNum(r))

    # sys.exit()

    print type(ldaOut[0])
    print type(ldaOut[0][0])

    corpus_lda = lda[corpus_tfidf]
    resList = []
    iii = 0

    # eg: doc [(0, 0.033333333333334041), (1, 0.033333333333659149), (2, 0.03333333333337106), (3, 0.033333333333336511), (4, 0.033333333333333631), (5, 0.033333333577374141), (6, 0.033333333333333381), (7, 0.53333330176939997), (8, 0.033333333641347308), (9, 0.033333333333333388), (10, 0.033333333333333409), (11, 0.033333358397907714), (12, 0.033333333333333381), (13, 0.033333333333333368), (14, 0.033333339280269603)]
    for doc in corpus_lda:
        # eg: res = (3, 0.72867093662442284), res has 72% posibility to be in type 3
        res = getMax(doc)
        resList.append(res)
    print '---type(corpus_tfidf), type(corpus_lda)', type(corpus_tfidf), type(
        corpus_lda)
    print '---len(resList)', len(resList)

    # len = topicNum
    simMatrixTopicList = []
    for topicId in range(topicNum):
        simMatrixTopic = [
            i for i in range(len(resList)) if resList[i][0] == topicId
        ]
        print topicId, 'topic has:', len(simMatrixTopic), 'barrage'
        simMatrixTopicList.append(simMatrixTopic)
        # print len(simMatrixTopic), simMatrixTopic

    # without square
    # # inner distance
    # # sum of all similarity of i'th row
    # iRow = 0.0
    # num = 0
    # innDisMatrix = [0.0 for i in range(topicNum)]
    # for topicId in range(topicNum):
    #     for i in range(len(simMatrixTopicList[topicId])-1):
    #         for j in range(i+1, len(simMatrixTopicList[topicId])):
    #             # print simMatrix[simMatrixTopicList[topicId][i]][simMatrixTopicList[topicId][j]][1]
    #             iRow += simMatrix[simMatrixTopicList[topicId][i]][simMatrixTopicList[topicId][j]][1]
    #         # print topicId, 'topic, num:', num
    #     lenOfIRow = len(simMatrixTopicList[topicId])
    #     numOfIRow = (1 + lenOfIRow - 1) * (lenOfIRow - 1) / 2
    #     innDisMatrix[topicId] = iRow/numOfIRow
    #     iRow = 0.0
    # print 'inner distance:', innDisMatrix
    #
    # aveInnDis = sum(innDisMatrix) / len(innDisMatrix)
    # print 'average inner distance:', aveInnDis
    #
    # # external distance
    # cols = topicNum
    # rows = topicNum
    # extDisMatrix = [[0.0 for col in range(cols)] for row in range(rows)]
    # iRow = 0.0
    # for topicId in range(topicNum):
    #     for ti2 in range(topicId+1, topicNum):
    #         for i in range(len(simMatrixTopicList[topicId])):
    #             for j in range(len(simMatrixTopicList[ti2])):
    #                 iRow += simMatrix[simMatrixTopicList[topicId][i]][simMatrixTopicList[ti2][j]][1]
    #             # iRow += iRow
    #         # print iRow
    #         lenOfIRow = len(simMatrixTopicList[topicId]) * len(simMatrixTopicList[ti2])
    #         extDisMatrix[topicId][ti2] = iRow / float(lenOfIRow)
    #         iRow = 0.0
    #
    # print 'external distance:', extDisMatrix
    #
    # totExtDis = 0
    # aveExtDis = 0
    # num = 0
    # for i in extDisMatrix:
    #     for j in i:
    #         if j != 0:
    #             totExtDis += j
    #             num += 1
    # aveExtDis = totExtDis / float(num)
    #
    # print 'average external distance:', aveExtDis
    # print 'inner/external value:', aveInnDis/aveExtDis

    # within square(**2)

    # inner distance
    # sum of all similarity of i'th row
    iRow = 0.0
    num = 0
    innDisMatrix = [0.0 for i in range(topicNum)]

    # innDisMatrixNum[0]: the number of similarity value every topic
    innDisMatrixNum = [0.0 for i in range(topicNum)]
    for topicId in range(topicNum):
        for i in range(len(simMatrixTopicList[topicId]) - 1):
            for j in range(i + 1, len(simMatrixTopicList[topicId])):
                iRow += (simMatrix[simMatrixTopicList[topicId][i]][
                    simMatrixTopicList[topicId][j]][1])**2
            # print topicId, 'topic, num:', num
        lenOfIRow = len(simMatrixTopicList[topicId])
        numOfIRow = (1 + lenOfIRow - 1) * (lenOfIRow - 1) / 2
        innDisMatrix[topicId] = iRow / numOfIRow
        # innDisMatrixNum[topicId] = numOfIRow
        iRow = 0.0
    print 'inner distance:', innDisMatrix

    aveInnDis = 1 / (sum(innDisMatrix) / topicNum)
    print 'average inner distance:', aveInnDis

    # external distance
    cols = topicNum
    rows = topicNum
    extDisMatrix = [[0.0 for col in range(cols)] for row in range(rows)]

    # extDisMatrixNum[0]: the number of similarity value every topic
    # extDisMatrixNum = [[0.0 for col in range(cols)] for row in range(rows)]
    iRow = 0.0
    # countt = 0
    for topicId in range(topicNum):
        for ti2 in range(topicId + 1, topicNum):
            for i in range(len(simMatrixTopicList[topicId])):
                for j in range(len(simMatrixTopicList[ti2])):
                    iRow += (simMatrix[simMatrixTopicList[topicId][i]][
                        simMatrixTopicList[ti2][j]][1])**2
                    # countt += 1
            # print iRow
            iRowNum = len(simMatrixTopicList[topicId]) * len(
                simMatrixTopicList[ti2])
            # print 'iRowNum:', iRowNum, 'countt:', countt
            extDisMatrix[topicId][ti2] = iRow / iRowNum
            iRow = 0.0
            # countt = 0

    print 'external distance:', extDisMatrix

    totExtDis = 0
    aveExtDis = 0

    for i in extDisMatrix:
        for j in i:
            totExtDis += j
    extNoneZeroNum = (1 + cols - 1) * (cols - 1) / 2

    aveExtDis = 1 / (totExtDis / extNoneZeroNum)

    print 'average external distance:', aveExtDis
    print 'inner/external value:', aveInnDis / aveExtDis

    # return aveInnDis, aveExtDis

    # sys.exit()

    # topic possibility distribution in user profile
    topicPosi = []
    for topicId in range(topicNum):
        posiList = [i[1] for i in resList if i[0] == topicId]

        # average accuracy rate
        possi = sum(posiList) / len(posiList)
        topicPosi.append(possi)

    # sys.exit()

    fullPath = os.getcwd()

    # concatenate full path
    userCodeIdListFilePath = fullPath + '/data/users/' + vCid + '/userIdList.txt'
    userCodeIdList = util.getUserCodeIdList(userCodeIdListFilePath)
    # for i in userCodeIdList:
    #     print i

    favTagTlist = util.getFilesOfDir(vCid)

    # concatenate full path
    favTagTlist = [
        fullPath + '/data/users/' + vCid + '/' + tagT for tagT in favTagTlist
    ]
    for i in favTagTlist:
        print i
    tagMatrix, tagVNumMatrix, userList, catAll = clustering.scanAllTags(
        favTagTlist)

    catNum = len(catAll)

    # eg: topicDist =
    # [[125.  126.   83.   18.  121.   44.   72.    0.  108.  113.   46.   66.  114.    0.  109.],
    # [ 799.  785.  558.  141.  737.  286.  425.    2.  659.  611.  376.  460.  765.    0.  657.],
    # [ 308.  321.  238.   48.  272.  116.  162.    0.  259.  236.  135.  173.  284.    1.  267.],
    # [ 557.  540.  378.   99.  490.  215.  315.    0.  457.  424.  232.  295.  514.    0.  449.],
    # [ 537.  535.  361.   86.  477.  176.  293.    0.  463.  416.  234.  297.  509.    0.  444.]]
    # 音乐 动画 番剧 广告 电影 时尚 舞蹈 公告 游戏 鬼畜 娱乐 电视剧 生活 活动 科技
    topicDist = np.zeros((topicNum, catNum))
    # every percentage of topic
    topicPercDist = np.zeros((topicNum, catNum))

    topicDistNoneNum = np.zeros(topicNum)
    userIdNoneNum = 0

    # topic index list: [0, 1, 2, 3, 4]
    topicNumList = range(topicNum)

    # a list of: all users' barrage data of a topic
    aTopicNewUniBContentListString = []
    topicUserNumList = []

    for i in topicNumList:
        aTopicNewUniBContentListString.append([])
        topicUserNumList.append(0)

    # used to calculate the number of topicPercDist added
    topicPercDistAddNum = np.zeros(topicNum)
    for i in range(len(resList)):
        # print i
        topicId = resList[i][0]
        if topicId in topicNumList:
            userId = util.getUserId(newUniBContentListString[i][0],
                                    userCodeIdList)
            # print newUniBContentListString[i][0], userId
            aTopicNewUniBContentListString[topicId].append(
                newUniBContentListString[i])
            topicUserNumList[topicId] += 1

            if userId is not None:
                # print userId, favTagTlist, type(favTagTlist)
                res = clustering.getTagLineOfUser(tagMatrix, tagVNumMatrix,
                                                  userList, userId)
                if res is not None:
                    tagLineOfUI, tagVNumLineOfUI = res
                # userId is not in the list
                else:
                    continue

                if tagLineOfUI is not None:
                    # print len(tagLineOfUI), userId, tagLineOfUI
                    topicDist[topicId] += tagLineOfUI

                    # the perc distribution of tagVideo number of a user
                    tagVPercLineOfUI = np.around(
                        tagVNumLineOfUI / float(sum(tagVNumLineOfUI)), 3)
                    topicPercDist[topicId] += tagVPercLineOfUI
                    topicPercDistAddNum[topicId] += 1
                else:
                    topicDistNoneNum += 1
            else:
                userIdNoneNum += 1

    topicWordBListL = []
    # topicWordBListLSec = []
    topicWordBListL2 = []
    topicWordBListL3 = []

    # print Top 10 frequent words in a topic & the barrageList of the topic, in one time
    for i in topicNumList:
        bContentList2 = xmlDL.divideSent(aTopicNewUniBContentListString[i], 0)
        wordList2 = getMostWord(bContentList2, 20)
        print '------------topic', i, ':',
        for j in wordList2:
            print j[0],
        print

    top10weightActor = []
    top10weightNoActor = []

    # print Top 10 frequent words in a topic & the barrageList of the topic
    for i in topicNumList:
        print '------------topic', i, '-------------users:', topicUserNumList
        # divideSent(,0) no actors' name
        bContentList = xmlDL.divideSent(aTopicNewUniBContentListString[i], 0)
        wordList = getMostWord(bContentList, 20)
        for j in wordList:
            print j[0], j[1]

        for j in aTopicNewUniBContentListString[i]:
            # eg: abb5230a 417.671 灵尊:哟,火柴棍

            if wordList[0][0].encode('utf-8') in j[2]:
                util.printList(j)
        print 'wordList[0][0]', wordList[0][0]
        # the index list of all barrage in topic i which contains the 1st frequent word in topic i
        topicIWord1BList = [
            j[1] for j in aTopicNewUniBContentListString[i]
            if wordList[0][0].encode('utf-8') in j[2]
        ]
        topicWordBListL.append(topicIWord1BList)

        # second word
        # topicIWord1BListSec = [j[1] for j in aTopicNewUniBContentListString[i] if wordList[1][0].encode('utf-8') in j[2]]
        # topicWordBListLSec.append(topicIWord1BListSec)

        # 0.002*"合影" + 0.002*"钱" + 0.002*"撒花" + 0.002*"没" + 0.002*"完结" + 0.002*"看" + 0.002*"啊" + 0.002*"之" + 0.002*"湫" + 0.002*"一个"
        # "合影" + "钱" + "撒花" + "没" + .....
        wordList2 = rmNum(ldaOut[i][1].encode('utf-8'))

        # get the most weight word in wordList2(after deleting actors' name)
        # eg: wordList2 = '''0.440*"小凡" + 0.030*"鲸鱼" + 0.018*"上线" + 0.014*"灰" + 0.013*"套路" + 0.012*"官方" + 0.010*"小痴" + 0.009*"滴血" + 0.009*"姐姐" + 0.009*"嘴"'''
        # firstWord = '上线', (default firstWord is '小凡')
        firstWord = wordList2[0]

        for word in wordList2:
            # if '小葵' in wordList2:
            #     firstWord = '小葵'
            #     top10weightNoActor.append('小葵')
            #     break
            if word in util.getTxtList(
                    'data/stopWord/jiebaNewWord_Qingyunzhi.txt'):
                continue
            else:
                firstWord = word
                top10weightNoActor.append(word)
                break
        top10weightActor.append(wordList2[0])
        # the index list of all barrage in topic i which contains the 1st frequent word(with weight) in topic i
        topicIWord1BList2 = [
            j[1] for j in aTopicNewUniBContentListString[i]
            if firstWord in j[2]
        ]
        topicWordBListL2.append(topicIWord1BList2)

        # weight, with actors' name
        topicIWord1BList3 = [
            j[1] for j in aTopicNewUniBContentListString[i]
            if wordList2[0] in j[2]
        ]
        topicWordBListL3.append(topicIWord1BList3)

    print 'top 10 weight word with actor:',
    for i in top10weightActor:
        print i,
    print

    print 'top 10 weight word without actor:',
    for i in top10weightNoActor:
        print i,
    print

    plt.figure(1)
    plt.subplot(211)
    topicWordBListL = topicWordBListL3
    for i in topicWordBListL:
        y = [topicWordBListL.index(i) for indexx in range(len(i))]
        plt.scatter(i, y, marker='.', color='b')
        # print 'len(i), len(y):', len(i), len(y), i, y
    plt.plot([], marker='.', color='b', label='Most frequent words')
    plt.xlim(0, )
    plt.legend()
    plt.xlabel('Barrage Time(s)')
    plt.ylabel('Topic ID')

    plt.subplot(212)
    for i in topicWordBListL2:
        y = [topicWordBListL2.index(i) for indexx in range(len(i))]
        plt.scatter(i, y, marker='x', color='r')
    plt.plot([], marker='x', color='r', label='Most weight words')
    plt.xlim(0, )
    plt.legend()
    plt.xlabel('Barrage Time(s)')
    plt.ylabel('Topic ID')

    # plt.show()

    print 'the num of users of different topics:', topicUserNumList
    print 'the num of users who is not in userCodeIdList:', userIdNoneNum
    print '-------------------------'

    # print topicDist

    topicDist2 = np.sum(topicDist, axis=1)
    # the percentage of tag of a user
    topicDist3 = np.transpose(np.transpose(topicDist) / np.float16(topicDist2))
    print topicDist3, '\n'

    for i in range(len(topicPercDist)):
        for j in range(len(topicPercDist[i])):
            topicPercDist[i][j] = topicPercDist[i][j] / topicPercDistAddNum[i]
    # topicPercDist = topicPercDist/topicPercDistAddNum
    print 'topicPercDist, topicPercDist[1][1]:', topicPercDist, topicPercDist[
        1][1]
    np.savetxt('topicDist.txt', topicDist)
    np.savetxt('topicPercDist.txt', topicPercDist)

    colorList = ['b', 'c', 'g', 'k', 'm', 'r', 'y']
    plt.figure(5)
    for i in range(len(topicPercDist)):
        plt.plot(topicPercDist[i], colorList[i])
    plt.xlabel = u'用户收藏视频主题类型'
    plt.ylabel = u'用户收藏视频主题占比(各主题视频数量/所有主题视频数量)'

    plt.show()

    print topicNum, "topics possibility average value:", topicPosi, sum(
        topicPosi) / len(topicPosi)

    resIndexList = [i[0] for i in resList]
    resTypeList = list(set(resIndexList))
    # number of type index
    resTCountList = []
    # barrage sequence number of type index
    indexList = []

    for index in resTypeList:
        resTCountList.append(resIndexList.count(index))
        indexList.append([i for i, v in enumerate(resIndexList) if v == index])

    # print resTCountList
    # print indexList[0]

    # type 0
    print 'all barrage comments of type 000000000000000------------'
    # for i in indexList[0]:
    #     print uniBContentList[i][-1]

    plt.xlabel("Barrage Type")
    plt.ylabel("Barrage Number")
    plt.plot(resTCountList, 'r-')

    # plt.show()

    # return 1/aveInnDis, 1/aveExtDis
    return aveInnDis, aveExtDis
Exemple #16
0
            mid.next = reversed_tail
            reversed_tail = mid
            mid = nxt

        # Merge the first half and the reversed second half of the list
        pre, curr = None, head
        while curr and reversed_tail:
            nxt_front, nxt_tail = curr.next, reversed_tail.next
            pre = curr.next = reversed_tail
            reversed_tail.next = nxt_front
            curr, reversed_tail = nxt_front, nxt_tail
        if reversed_tail:
            pre.next = reversed_tail


llst = initList([1, 2, 3, 4])
Solution().reorderList(llst)
printList(llst)

llst = initList([1, 2, 3, 4, 5])
Solution().reorderList(llst)
printList(llst)

llst = initList([1])
Solution().reorderList(llst)
printList(llst)

llst = initList([1, 2])
Solution().reorderList(llst)
printList(llst)
Exemple #17
0
"""

from util import ListNode, initList, printList

# Definition for singly-linked list.
# class ListNode:
#     def __init__(self, val = 0, next = None):
#         self.val = val
#         self.next = next

class Solution:
    def removeElements(self, head: ListNode, val: int) -> ListNode:
        prev, curr = None, head
        while curr:
            if curr.val == val:
                if curr == head:
                    head = head.next
                    prev, curr = None, head
                    continue
                prev.next = curr.next
                curr = curr.next
                continue
            prev, curr = curr, curr.next
        return head

printList(Solution().removeElements(initList([1, 2, 6, 3, 4, 5, 6]), 6))    # 1 -> 2 -> 3 -> 4 -> 5
printList(Solution().removeElements(initList([6, 6, 6]), 6))                # (None)
printList(Solution().removeElements(initList([1]), 1))                      # (None)
printList(Solution().removeElements(initList([1, 2, 2, 1]), 2))             # 1 -> 1

Exemple #18
0
import sys
sys.path  = sys.path = ['.', '../', '../../'] + sys.path

from util import ListNode, initList, printList

class Solution:
    def insertionSortList(self, head: ListNode) -> ListNode:
        sortedList = None
        while head:
            ptr, head = head, head.next
            prev, curr = None, sortedList
            while curr and ptr.val > curr.val:
                prev, curr = curr, curr.next
            if prev:
                ptr.next = prev.next
                prev.next = ptr
            elif curr:
                ptr.next = sortedList
                sortedList = ptr
            else:
                ptr.next = None
                sortedList = ptr
        return sortedList

# 1 -> 2 -> 3 -> 4 -> NULL
printList(Solution().insertionSortList(initList([4, 2, 1, 3])))

# -1 -> 0 -> 3 -> 4 -> 5 -> NULL
printList(Solution().insertionSortList(initList([-1, 5, 3, 4, 0])))

        return head

        # # O(n) time & O(n) space
        # head, stack1, stack2, carry = None, [], [], 0
        # while l1 or l2:
        #     stack1 += [l1.val] if l1 else []
        #     stack2 += [l2.val] if l2 else []
        #     l1 = l1.next if l1 else l1
        #     l2 = l2.next if l2 else l2
        # while stack1 or stack2:
        #     num = carry
        #     num += stack1.pop() if stack1 else 0
        #     num += stack2.pop() if stack2 else 0
        #     head, carry = ListNode(num % 10, head), num // 10
        # head = ListNode(carry, head) if carry else head
        # return head


# 7 -> 8 -> 0 -> 7 -> NULL
printList(Solution().addTwoNumbers(initList([7, 2, 4, 3]), initList([5, 6,
                                                                     4])))

# 1 -> 0 -> NULL
printList(Solution().addTwoNumbers(initList([5]), initList([5])))

# 5 -> NULL
printList(Solution().addTwoNumbers(initList([]), initList([5])))

# NULL
printList(Solution().addTwoNumbers(initList([]), initList([])))