def testJoin():
    iss = cItemsets.cItemsets(3)

    a = cAssociationRules()
    
    u1 = urlparse.urlparse('http://eins')
    u2 = urlparse.urlparse('http://zwei')
    u3 = urlparse.urlparse('http://drei')
    u4 = urlparse.urlparse('http://vier')
    u5 = urlparse.urlparse('http://fuenf')

    i1 = cItemset.cItemset()
    i1.SetUrls([u1,u2,u3])
    i2 = cItemset.cItemset()
    i2.SetUrls([u1,u2,u4])
    i3 = cItemset.cItemset()
    i3.SetUrls([u1,u3,u4])
    i4 = cItemset.cItemset()
    i4.SetUrls([u1,u3,u5])
    i5 = cItemset.cItemset()
    i5.SetUrls([u2,u3,u4])

    iss.AddItemset(i1)
    iss.AddItemset(i2)
    iss.AddItemset(i3)
    iss.AddItemset(i4)
    iss.AddItemset(i5)

    a.Join(iss).Print()
 def __init__(self):
     """Constructor."""
     self.sItemsetsFileName = 'data/itemsets.xml'
     self.sRulesFileName = 'data/assorules.xml'
     # store only candidate one itemsets in a file
     self.Itemsets = cItemsets.cItemsets(1)
     self.Rules = cRules.cRules()
    def ComputeCandidateOneItemsets(self, lSessions):
        """Computes candidate one itemsets from session

        lSessions -- list of sessions
        return -- candidate one itemsets

        """
        oneitemsets = cItemsets.cItemsets(1)

        for session in lSessions:
            for click in session.GetClicks():
                iset = cItemset.cItemset()
                iset.SetUrls([click.GetUrl(),])
                oneitemsets.AddItemset(iset)

        return oneitemsets
    def Join(self, itemsets):
        """Join itemsets.

        itemsets -- Lk-1 large k-1 itemsets
        return -- Ck candidate k itemsets

        """
        k = itemsets.GetSize() + 1
        candidates = cItemsets.cItemsets(k)

        lJoined = []

        for item_i in itemsets.GetList():
            for item_j in itemsets.GetList():
                list_i = item_i.GetUrls()
                list_j = item_j.GetUrls()
                # use dict to eliminate double occurences
                dict = {}
                for url in list_i:
                    dict[url] = 1
                for url in list_j:
                    dict[url] = 1

                if len(dict) == k:
                    if dict not in lJoined:
                        lJoined.append(dict)

        for dict in lJoined:
            i = cItemset.cItemset()
            for url in dict.keys():
                i.AddUrl(url)

            candidates.AddItemset(i)

        if len(candidates.lData) == 0:
            return None
        else:
            return candidates