예제 #1
0
def test02():
    print("Test 02")

    eventsDF: DataFrame = Events.readFromFile()

    ratings4DF: DataFrame = eventsDF[[
        Events.COL_VISITOR_ID, Events.COL_ITEM_ID, Events.COL_EVENT
    ]]
    ratings4DF = ratings4DF.drop_duplicates()

    ratings4DF.loc[ratings4DF[Events.COL_EVENT] == "view", "rating"] = 1
    ratings4DF.loc[ratings4DF[Events.COL_EVENT] == "addtocart", "rating"] = 2
    ratings4DF.loc[ratings4DF[Events.COL_EVENT] == "transaction", "rating"] = 3

    ratingsDF: DataFrame = ratings4DF[[
        Events.COL_VISITOR_ID, Events.COL_ITEM_ID, "rating"
    ]]

    ratingsDF = ratingsDF.groupby([Events.COL_VISITOR_ID, Events.COL_ITEM_ID],
                                  as_index=False)["rating"].max()

    print(ratingsDF.head(40))

    print(len(eventsDF))
    print(len(ratingsDF))
예제 #2
0
    def generateFileRR(numberOfItems: int, countOfRepetitions: int,
                       behaviourID: str, uBehavDesc: UserBehaviourDescription):

        np.random.seed(42)
        random.seed(42)

        print("Generate Behaviour RR " + behaviourID)

        behaviourFile: str = BehavioursRR.getFile(behaviourID)

        eventsDF: DataFrame = Events.readFromFile()

        eventsCopyDF: DataFrame = eventsDF[[
            Events.COL_VISITOR_ID, Events.COL_ITEM_ID
        ]].copy()
        eventsCopyDF[BehavioursRR.COL_REPETITION] = [
            range(countOfRepetitions)
        ] * len(eventsCopyDF)

        behavioursDF: DataFrame = eventsCopyDF.explode(
            BehavioursRR.COL_REPETITION)
        behavioursDF[BehavioursRR.COL_BEHAVIOUR] = [None] * len(behavioursDF)
        behavioursDF.reset_index(inplace=True)

        #print("General")
        BehavioursRR.__generateGeneralBehaviourrRR(behavioursDF, numberOfItems,
                                                   countOfRepetitions,
                                                   uBehavDesc)

        #print(behavioursDF.head(10))
        del behavioursDF['index']
        #print(behavioursDF.head(10))

        behavioursDF.to_csv(behaviourFile, sep='\t', index=False)
예제 #3
0
def test04():
    print("Test 04")

    print("Running RecommenderVSKNN RR:")

    from datasets.retailrocket.events import Events  # class
    #eventsDF:DataFrame = Events.readFromFile()
    eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50)

    dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(),
                                            DataFrame())

    rec: ARecommender = RecommenderVMContextKNN("test", {})
    print("train")
    rec.train(HistoryDF("test"), dataset)

    uDF: DataFrame = DataFrame([eventsDF.iloc[9000]])
    print(uDF)
    rec.update(uDF, {})

    recommendation = rec.recommend(1093035, 20, {})
    print("Recommendation:")
    print(recommendation)

    print("================== END OF TEST 04 ======================\n\n\n\n\n")
예제 #4
0
    def readDatasetsWithFilter(minEventCount:int):

        eventsDF:DataFrame = Events.readFromFileWithFilter(minEventCount=minEventCount)
        categoryTreeDF:DataFrame = CategoryTree.readFromFile()
        itemPropertiesDF:DataFrame = ItemProperties.readFromFile()

        return DatasetRetailRocket("rrDivAll", eventsDF, categoryTreeDF, itemPropertiesDF)
예제 #5
0
    def readDatasets():

        eventsDF:DataFrame = Events.readFromFile()
        categoryTreeDF:DataFrame = CategoryTree.readFromFile()
        itemPropertiesDF:DataFrame = ItemProperties.readFromFile()

        return DatasetRetailRocket("rrDivAll", eventsDF, categoryTreeDF, itemPropertiesDF)
def test01():
    print("Test 01")

    print("Running RecommenderRepeatedPurchase RR:")

    from datasets.retailrocket.events import Events  # class
    #eventsDF:DataFrame = Events.readFromFile()
    eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50)
    #print(eventsDF)

    userID: int = 904351

    trainSer = pd.Series(
        [1433221523348, userID, Events.EVENT_ADDTOCART, 350688, "Nan"],
        index=[
            Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT,
            Events.COL_ITEM_ID, Events.EVENT_TRANSACTION
        ])
    trainDF = pd.DataFrame([trainSer])

    dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(),
                                            DataFrame())

    rec: ARecommender = RecommenderRepeatedPurchase("rRepeatedPurchase", {})
    rec.train(HistoryDF("test"), dataset)

    # nejcasteji opakovane kupovane itemy: 119736, 119736, 119736, 213834, 119736, 227311, 382885, 119736, 213834, 119736, 432171, 183756, 119736, 305675, 320130
    update1Ser = pd.Series(
        [1433221523348, userID, Events.EVENT_ADDTOCART, 119736, "Nan"],
        index=[
            Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT,
            Events.COL_ITEM_ID, Events.EVENT_TRANSACTION
        ])
    update1DF: DataFrame = pd.DataFrame([update1Ser])

    update2Ser = pd.Series(
        [1433221523348, userID, Events.EVENT_ADDTOCART, 213834, "Nan"],
        index=[
            Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT,
            Events.COL_ITEM_ID, Events.EVENT_TRANSACTION
        ])
    update2DF: DataFrame = pd.DataFrame([update2Ser])

    rec.update(update1DF, {})
    rec.update(update2DF, {})

    recommendationSer: Series = rec.recommend(userID, 20, {})

    print("Recommendation:")
    print(recommendationSer)
예제 #7
0
def test03():
    print("Test 03")

    print("Running RecommenderTheMostPopular ST:")

    from datasets.slantour.events import Events  # class
    eventsDF: DataFrame = Events.readFromFile()

    dataset: ADataset = DatasetST("test", eventsDF, DataFrame())

    rec: ARecommender = RecommenderTheMostPopular("rTheMostPopular", {})
    rec.train(HistoryDF("test"), dataset)

    recommendation = rec.recommend(
        1, 20, {rec.ARG_ALLOWED_ITEMIDS: list(range(0, 1000))})
    print(recommendation)
예제 #8
0
def test02():
    print("Test 02")

    print("Running RecommenderTheMostPopular RR:")

    from datasets.retailrocket.events import Events  # class
    #eventsDF:DataFrame = Events.readFromFile()
    eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50)

    dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(),
                                            DataFrame())

    rec: ARecommender = RecommenderTheMostPopular("rTheMostPopular", {})
    rec.train(HistoryDF("test"), dataset)

    recommendation = rec.recommend(1, 20, {})
    print("Recommendation:")
    print(recommendation)
예제 #9
0
def test21():
    print("Test 21")

    print("Running RecommenderItemBasedKNN ST:")

    from datasets.slantour.events import Events  # class
    eventsDF: DataFrame = Events.readFromFile()

    dataset: ADataset = DatasetST("test", eventsDF, DataFrame())

    rec: ARecommender = RecommenderItemBasedKNN("test", {})
    rec.train(HistoryDF("test"), dataset)

    uDF: DataFrame = DataFrame([eventsDF.iloc[9000]])
    print(uDF)
    rec.update(uDF, {})

    r = rec.recommend(3325463, 20, {})
    print(r)

    print("================== END OF TEST 05 ======================\n\n\n\n\n")
예제 #10
0
def test01():
    print("Test 01")

    userID: int = 565892
    currentItemID: int = 168952
    repetition: int = 0
    # 5919585    b11111100011100000000
    # 5947295    b11010110001101000010
    # 5980935    b11111000000010101000

    userID: int = 1043630
    currentItemID: int = 349104
    repetition: int = 0
    #5952135     b10111101000000000000
    #5962570     b11111100000100100000
    #5980920     b11111011101100101000
    #5980945     b11011100100110101100
    #5995975     b11111011000010001010

    eventsDF: DataFrame = Events.readFromFile()
    #evensDF = evensDF.loc[evens[Events.COL_EVENT] == "transaction"]

    eventsDF: DataFrame = eventsDF.sort_values(by=Events.COL_TIME_STAMP)

    users = eventsDF[Events.COL_VISITOR_ID].tolist()
    items = eventsDF[Events.COL_ITEM_ID].tolist()

    print(users[0])

    eventsDF = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID] == userID]
    eventsDF = eventsDF.loc[eventsDF[Events.COL_ITEM_ID] == currentItemID]

    #  for userIDI in users:
    #      eventsDF = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID] == userIDI]
    #      eventsDF = eventsDF.loc[eventsDF[Events.COL_ITEM_ID] == currentItemID]
    #      if len(eventsDF) > 0:
    #        print(len(eventsDF))

    print(eventsDF.head(100))
예제 #11
0
def test01():

    DEBUG:bool = False

    # First get Dataset Data
    eventsDF:DataFrame = Events.readFromFile()
    eventsDF = eventsDF.sort_values(Events.COL_TIME_STAMP)


    userIDs:List[int] = list(eventsDF[Events.COL_VISITOR_ID].unique())
    #eventsIDs:List[int] = list(eventsDF[Events.COL_EVENT].unique())

    print("Number of all events:                             " + str(len(eventsDF)))
    print("Number of all usersIDs:                           " + str(len(userIDs)))
    print("Number of all addtocart:                            " + str(len(eventsDF.loc[eventsDF[Events.COL_EVENT] == "addtocart"])))
    print("Number of all transaction:                          " + str(len(eventsDF.loc[eventsDF[Events.COL_EVENT] == "transaction"])))
    print("---------------------------------------------------------")

    # kolik vsech uziatelu si vlozilo do kosiku item vice nez jednou
    eventsACartDF:DataFrame = eventsDF.loc[eventsDF[Events.COL_EVENT] == "addtocart"]
    eventsACart2DF = eventsACartDF.groupby(
        [Events.COL_VISITOR_ID, Events.COL_ITEM_ID], as_index=False)[Events.COL_TIME_STAMP].count()
    eventsACart2DF = eventsACart2DF.loc[eventsACart2DF[Events.COL_TIME_STAMP] > 1]

    # kolik vsech uzivatelu si pridalo 1-N polozek
    usersAndAddedItemsCountDF:DataFrame = eventsACartDF[[Events.COL_VISITOR_ID, Events.COL_ITEM_ID]].drop_duplicates().groupby(
        [Events.COL_VISITOR_ID], as_index=False)[Events.COL_ITEM_ID].count()

    plt.hist(list(usersAndAddedItemsCountDF[Events.COL_ITEM_ID]))
    plt.yscale('log')
    if DEBUG:
        plt.show()


    userIDs:List[int] = list(eventsDF[Events.COL_VISITOR_ID].unique())
    print("Number of all users:                              " + str(len(userIDs)))

    print("Number of all users 1< #itemI added:                 " + str(len(eventsACart2DF)))
    print("---------------------------------------------------------")


    # filtrace eventu - vymazeme uzivatele, kteri maji mene nez K zaznamu
    userIdAndTimestampDF:DataFrame[int, int] = eventsDF.groupby(
        [Events.COL_VISITOR_ID], as_index=False)[Events.COL_TIME_STAMP].count()

    userIdAndTimestampSelDF:DataFrame[int, int] = userIdAndTimestampDF.loc[
        userIdAndTimestampDF[Events.COL_TIME_STAMP] > 5]
    #print(userIdAndTimestampSelDF)
    userIDsSel:List[int] = list(userIdAndTimestampSelDF[Events.COL_VISITOR_ID].unique())

    eventsSelDF:DataFrame = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID].isin(userIDsSel)]

    print("Number of selected events:                         " + str(len(eventsSelDF)))
    print("Number of selected usersIDs:                         " + str(len(userIDsSel)))
    print("Number of all addtocart:                            " + str(len(eventsSelDF.loc[eventsSelDF[Events.COL_EVENT] == "addtocart"])))
    print("Number of selected transaction:                      " + str(len(eventsSelDF.loc[eventsSelDF[Events.COL_EVENT] == "transaction"])))
    print("---------------------------------------------------------")



    # kolik vybranych uzivatelu si vlozilo do kosiku item aspon jednou
    eventsSelACartDF:DataFrame = eventsSelDF.loc[eventsSelDF[Events.COL_EVENT] == "addtocart"]
    userIDSelsACart:List[int] = list(eventsSelACartDF[Events.COL_VISITOR_ID].unique())

    eventsSelACart1DF = eventsSelACartDF.groupby(
        [Events.COL_VISITOR_ID, Events.COL_ITEM_ID], as_index=False)[Events.COL_TIME_STAMP].count()
    eventsSelACart1DF = eventsSelACart1DF.loc[eventsSelACart1DF[Events.COL_TIME_STAMP] > 0]

    print("Number of selected users:                            " + str(len(userIDsSel)))
    print("Number of selected users who added something to cart: " + str(len(userIDSelsACart)))

    print("---------------------------------------------------------")



    # kolik vybranych uzivatelu koupilo vice jak dva ne nutne ruzne itemy
    eventsSelACartDF:DataFrame = eventsSelDF.loc[eventsSelDF[Events.COL_EVENT] == "addtocart"]
    eventsSelACartDF_ = eventsSelACartDF.groupby(
        [Events.COL_VISITOR_ID], as_index=False)[Events.COL_TIME_STAMP].count()

    eventsSelACart2DF = eventsSelACartDF_.loc[eventsSelACartDF_[Events.COL_TIME_STAMP] > 1]
    userSelACart2IDs:List[int] = list(eventsSelACart2DF[Events.COL_VISITOR_ID])

    print("Number of selected users 1< #addItem:                 " + str(len(userSelACart2IDs)))

    eventsSelACart1DF = eventsSelACartDF_.loc[eventsSelACartDF_[Events.COL_TIME_STAMP] == 1]
    userSelACart1IDs:List[int] = list(eventsSelACart1DF[Events.COL_VISITOR_ID])

    print("Number of selected users 1= #addItem:                 " + str(len(userSelACart1IDs)))



    # kolik vybranych uzivatelu si pridalo jednu jedinou polozku libovolnekrat
    usersSelAndAddedItemsCountDF:DataFrame = eventsSelACartDF[[Events.COL_VISITOR_ID, Events.COL_ITEM_ID]].drop_duplicates().groupby(
        [Events.COL_VISITOR_ID], as_index=False)[Events.COL_ITEM_ID].count()

    plt.hist(list(usersSelAndAddedItemsCountDF[Events.COL_ITEM_ID]), bins=700)
    if DEBUG:
        plt.show()

    a = len(usersSelAndAddedItemsCountDF.loc[usersSelAndAddedItemsCountDF[Events.COL_ITEM_ID] == 1])
    print("Number of selected users who added only 1 unique itemI (possibly many times):     " + str(a))



    print("---------------------------------------------------------")

    # kolik procent uzivatelu si koupilo jen jednu vec vicekrat
    b = (a - len(userSelACart1IDs)) / len(userSelACart2IDs) * 100
    print(str(b) + " %")

    # zajimalo by me kolikrat si uzivatele kupovali stejnou vec
    eventsTransDF:DataFrame = eventsDF.loc[eventsDF[Events.COL_EVENT] == "transaction"]
    events2DF = eventsTransDF.groupby(
        [Events.COL_VISITOR_ID, Events.COL_ITEM_ID], as_index=False)[Events.COL_TIME_STAMP].count()
    events2DF = events2DF.loc[events2DF[Events.COL_TIME_STAMP] > 4]
    #print(events2DF.head(10))

    events152963DF:DataFrame = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID] == 152963]
    events152963DF = events152963DF.loc[events152963DF[Events.COL_ITEM_ID] == 119736]
    #print(events152963DF.head(1000000).to_string())

    events530559DF:DataFrame = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID] == 530559]
    events530559DF = events530559DF.loc[events530559DF[Events.COL_ITEM_ID] == 119736]
    #print(events530559DF.head(1000000).to_string())

    events76757DF:DataFrame = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID] == 76757]
    events76757DF:DataFrame = events76757DF.loc[events76757DF[Events.COL_EVENT] == "transaction"]