コード例 #1
0
ファイル: prepare_matrices.py プロジェクト: armovetz/course4
def createItem_x_ItemSimilarities(time_flag):
    
    if time_flag:
        """
            SPECIAL TIME CROSS-VALIDATION
        """
        os.chdir("../data/tmp/time_cross_validation")
    
        for i in range(ITERATIONS_NUMB):
            print "Iteration = ", i + 1, "/", ITERATIONS_NUMB
            
            # changing directory for current case
            cur_dir = str(i)
            os.chdir(cur_dir)
            
            # reading coords for current case
            coords = misc_functions.getWindowCoords()
    
            # get training and test items lists
            test_items = range(coords[1], coords[3] + 1)
            training_items = range(0, coords[1])
            
            createItem_x_ItemSimilarity_local(training_items, test_items)
                
            # returning to "cross_validation" directory
            os.chdir("..")
    else:    
        """
            CLASSIC CROSS-VALIDATION
        """
        os.chdir("../data/tmp/cross_validation")
    
        # cycle for each case
        for i in range(SWITCHES_USERS_NUMB):
            for j in range(SWITCHES_ITEMS_NUMB):
                print "window: user_window = ", i + 1, "/", SWITCHES_USERS_NUMB
                print "        item_window = ", j + 1, "/", SWITCHES_ITEMS_NUMB
    
                # changing directory for current case
                cur_dir = str(i) + "_" + str(j)
                os.chdir(cur_dir)

                # reading coords for current case
                coords = misc_functions.getWindowCoords()

                # get training and test items lists
                test_items = range(coords[1], coords[3] + 1)
                training_items = range(0, coords[1])
                training_items.extend(range((coords[3] + 1), ITEMS_NUMB))

                createItem_x_ItemSimilarity_local(training_items, test_items)

                # returning to "cross_validation" directory
                os.chdir("..")

    os.chdir("../../../python_sources")
    print "TOTAL SUCCESS! \n local similarities matrices are prepared!"
コード例 #2
0
ファイル: TI.py プロジェクト: armovetz/hybrid_rs
 def ti_CreateClusters(self):
     """
         prepare clusters for futher predictions
         Clusters are being saved in 
         "test_clusters_<days>" file in directory of the case.
     """
 
     for window_dir in self.ti_dirs:
         print window_dir
         
         #test_matrix = scipy.io.mmio.mmread("test.mtx")
         test_matrix_file = open(window_dir + "/test.mtx", 'r')
 
         # create item_X_time list
         item_X_time_list = []
         meta_file = open(self.dataset.events_file_name, 'r')
         for line in meta_file:
             item_X_time_list.append(misc_functions.getMetaString(line, self.dataset.time_meta_position))
         meta_file.close()
         
         #print "len(item_X_time_list)=",len(item_X_time_list)
     
         # reading coords for current case
         coords = misc_functions.getWindowCoords(window_dir)
 
         #print "coords = ", coords
 
         # stuff before cycle
         clusters_list = []
         cur_user_id = str(coords[0])
         cur_cluster = ["user" + "\t" + str(coords[0])]
 
         # skip comments
         for i2 in range(3):
             test_matrix_file.readline()
 
         cur_user = -1
         cur_cluster = []
 
         for line in test_matrix_file:
             #user_id = int(line.split()[0]) - 1 + coords[0]
             #item_id = int(line.split()[1]) - 1 + coords[2]
             user_id = int(line.split()[0])
             item_id = int(line.split()[1])
     
             if user_id != cur_user:     # next user
                 #print "user_id = ", user_id
                 cur_user = user_id
                 if cur_cluster != []:
                     clusters_list.append(cur_cluster)
                 cur_cluster = ["user\t" + str(user_id)]
             time_bounds = clusters.getTimeInterval(item_id, item_X_time_list, coords, self.cluster_size, self.dataset.events_numb)
             cur_cluster.append(str(time_bounds[0]) + "\t" + str(item_id) + "\t" + str(time_bounds[1]))
     
         test_clusters_file = open(window_dir + "/test_clusters_" + str(self.cluster_size), 'w')
         test_clusters_file.write("low_bound item_id high_bound\n")
         for cluster in clusters_list:
             for line in cluster:
                 test_clusters_file.write(line + "\n")
         test_clusters_file.close()
コード例 #3
0
    def ti_CreateTrainingMatrices(self):
        """
            creates training matrices for each window that has been
            created while preparing time intervals
        """

        print "Creating train matrices..."

        for window_dir in self.ti_dirs:
            now = datetime.datetime.now()
            now_string = now.strftime("%Y-%m-%d %H:%M")

            coords = misc_functions.getWindowCoords(window_dir)
            start_user = coords[0]
            stop_user = coords[1]
            start_item = coords[2]
            stop_item = coords[3]

            # Reading history matrix
            history_matrix = scipy.io.mmio.mmread(
                self.dataset.history_file_name)

            #print coords
            history_matrix

            # selecting part for the current window
            local_training_matrix = (history_matrix.tocsr()) \
                                            [start_user - 1 : stop_user, \
                                                            : start_item - 1].copy()

            scipy.io.mmio.mmwrite(window_dir + "/train", local_training_matrix,
                                  now_string, 'integer')
コード例 #4
0
    def ti_CreateTestingEvents(self):
        """
            creates test meta files for events for  each window 
            that has been created while preparing time intervals
        """

        print "Creating test meta files for events..."

        for window_dir in self.ti_dirs:

            coords = misc_functions.getWindowCoords(window_dir)
            start_item = coords[2]
            stop_item = coords[3]

            events_file = open(self.dataset.events_file_name, 'r')
            interval_events_file = open(window_dir + "/events_test", 'w')

            # skip all items until start item
            for i in range(start_item):
                events_file.readline()

            # recount ids
            new_local_ctr = 1
            for i in range(stop_item - start_item + 1):
                meta_line = events_file.readline()
                rest_of_line = meta_line[meta_line.find("\t"):]
                interval_events_file.write(str(new_local_ctr) + rest_of_line)
                new_local_ctr += 1

            events_file.close()
            interval_events_file.close()
コード例 #5
0
ファイル: TI.py プロジェクト: armovetz/hybrid_rs
    def ti_CreateTestingEvents(self):
        """
            creates test meta files for events for  each window 
            that has been created while preparing time intervals
        """
        
        print "Creating test meta files for events..."

        for window_dir in self.ti_dirs:
            
            coords = misc_functions.getWindowCoords(window_dir)
            start_item = coords[2]
            stop_item  = coords[3]
            
            events_file = open(self.dataset.events_file_name, 'r')
            interval_events_file = open(window_dir + "/events_test", 'w')
            
            # skip all items until start item
            for i in range(start_item):
                events_file.readline()

            # recount ids
            new_local_ctr = 1
            for i in range(stop_item - start_item + 1):
                meta_line = events_file.readline()
                rest_of_line = meta_line[meta_line.find("\t"):]
                interval_events_file.write(str(new_local_ctr) + rest_of_line)
                new_local_ctr += 1
            
            events_file.close()
            interval_events_file.close()
コード例 #6
0
ファイル: TI.py プロジェクト: armovetz/hybrid_rs
    def ti_CreateTrainingEvents(self):
        """
            creates train meta files for events for  each window 
            that has been created while preparing time intervals
        """
        
        print "Creating train meta files for events..."

        for window_dir in self.ti_dirs:
            
            coords = misc_functions.getWindowCoords(window_dir)
            start_item = coords[2]
            #stop_item  = coords[3]
            
            events_file = open(self.dataset.events_file_name, 'r')
            interval_events_file = open(window_dir + "/events_train", 'w')
            
            # take only meta lines for events until <start_item> events
            new_local_ctr = 1
            for i in range(start_item - 1):
                meta_line = events_file.readline()
                rest_of_line = meta_line[meta_line.find("\t"):]
                interval_events_file.write(str(new_local_ctr) + rest_of_line)
                new_local_ctr += 1
            
            events_file.close()
            interval_events_file.close()
コード例 #7
0
    def ti_CreateTrainingEvents(self):
        """
            creates train meta files for events for  each window 
            that has been created while preparing time intervals
        """

        print "Creating train meta files for events..."

        for window_dir in self.ti_dirs:

            coords = misc_functions.getWindowCoords(window_dir)
            start_item = coords[2]
            #stop_item  = coords[3]

            events_file = open(self.dataset.events_file_name, 'r')
            interval_events_file = open(window_dir + "/events_train", 'w')

            # take only meta lines for events until <start_item> events
            new_local_ctr = 1
            for i in range(start_item - 1):
                meta_line = events_file.readline()
                rest_of_line = meta_line[meta_line.find("\t"):]
                interval_events_file.write(str(new_local_ctr) + rest_of_line)
                new_local_ctr += 1

            events_file.close()
            interval_events_file.close()
コード例 #8
0
ファイル: TI.py プロジェクト: armovetz/hybrid_rs
 def ti_CreateTrainingMatrices(self):
     """
         creates training matrices for each window that has been
         created while preparing time intervals
     """
     
     print "Creating train matrices..."
     
     for window_dir in self.ti_dirs:
         now = datetime.datetime.now()
         now_string = now.strftime("%Y-%m-%d %H:%M")
         
         coords = misc_functions.getWindowCoords(window_dir)
         start_user = coords[0]
         stop_user  = coords[1]
         start_item = coords[2]
         stop_item  = coords[3]
         
         # Reading history matrix
         history_matrix = scipy.io.mmio.mmread(self.dataset.history_file_name)
         
         #print coords
         history_matrix
         
         # selecting part for the current window
         local_training_matrix = (history_matrix.tocsr()) \
                                         [start_user - 1 : stop_user, \
                                                         : start_item - 1].copy()
         
         scipy.io.mmio.mmwrite(window_dir + "/train", local_training_matrix, now_string, 'integer')
コード例 #9
0
ファイル: magician.py プロジェクト: armovetz/hybrid_rs
    def runCrossValidation(self):
        """
            launch cross validation procedure for loaded TI
        """
        
        ti_ctr = 1
        
        self.reporter.report("---")
        self.reporter.report("Cross-validation started\n")
        
        for interval_path in self.ti.intervals_list:
            
            self.reporter.report("  " + str(ti_ctr) + " of " + \
                str(len(self.ti.intervals_list)) + " intervals running")
            ti_ctr += 1
            
            self.interval_path = MAG_TI_DIR + "/" + interval_path

            self.window_coords = misc_functions.getWindowCoords(self.interval_path)

            if self.need_predict :
                train_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" + interval_path + "/train.mtx")
                self.reporter.report("    train mtx loaded")
                self.reporter.report("    prediction started")
                self.gear.predict(self, train_matrix)
            
            if self.need_estimate :
                test_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" + interval_path + "/test.mtx")
                self.reporter.report("    test mtx loaded")
                result_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" + interval_path + "/prediction.mtx")
                self.reporter.report("    results mtx loaded")
                self.reporter.report("    prediction started")
                self.estimator.estimate(self, test_matrix, result_matrix)
コード例 #10
0
ファイル: svd.py プロジェクト: armovetz/course4
def prediction(prediction_file_name, clusters_list):
    
    #subprocess.call(["~/graphchi/toolkits/collaborative_filtering/svd", "--training=history.mtx", "--nsv=10", "--nv=12", "--max_iter=5", " --quiet=1", "--tol=1e-1"])
    #subprocess.call(["~/graphchi/toolkits/collaborative_filtering/svd", "--training=history.mtx --nsv=10 --nv=12 --max_iter=5 --quiet=1  --tol=1e-1"], )
    subprocess.call(["~/graphchi/toolkits/collaborative_filtering/svd --training=history.mtx --nsv=10 --nv=12 --max_iter=5  --quiet=1 --tol=1e-1  > /dev/null"], shell=True)
        
    window_coord = misc_functions.getWindowCoords()

    computePredictionMatrixFromEigenVectors(5, "history.mtx", prediction_file_name, window_coord)
コード例 #11
0
ファイル: ti_functions.py プロジェクト: armovetz/course4
    def ti_CreateTrainingMatrices(self):
        """
            creates training matrices for each window that has been
            created while preparing time intervals
        """
        
        for window_dir in self.ti_dirs:
            # reading coords for current case
            coords = misc_functions.getWindowCoords(window_dir)
            
            original_history_file = open(self.dataset.history_file_name)
            local_training_history_file = open(window_dir + "/train", 'w')
            
            #skip comments and copy some of them
            local_training_history_file.write(original_history_file.readline())
            original_history_file.readline()
            original_history_file.readline()
            now = datetime.datetime.now()
            now_string = now.strftime("%Y-%m-%d %H:%M")
            local_training_history_file.write("%Generated " + now_string + "\n")
                
            # run through the whole history file
            ltw_list = [] #lines_to_write_list
            visits_ctr = 0
            zeros_ctr = 0
            for line in original_history_file:
                event_id = int(line.split("\t")[1]) - 1

                if event_id < coords[1]:
                    ltw_list.append(line)
                    visits_ctr += 1
                else:
                    zeros_ctr += 1
            
            # write properties of the training matrix
            train_users_numb = coords[1] - coords[0] + 1
            train_events_numb = coords[3] - coords[2] + 1
            local_training_history_file.write(str(train_users_numb) + \
                " " + str(train_events_numb) + " " + str(visits_ctr) + "\n")
            
            for line in ltw_list:
                local_training_history_file.write(line)
            
            original_history_file.close()
            local_training_history_file.close()
            
            """ 
            # DEBUGGING STUFF
            print "total_ctr = ", total_ctr
            print "visits_ctr = ", visits_ctr
            print "zeros_ctr = ", zeros_ctr
            print "zeros_ctr + visits_ctr = ", zeros_ctr + visits_ctr
            print " -------------------------- "
            """
            if (zeros_ctr + visits_ctr != self.dataset.visits_numb):
                raise Exception("counters mismatch")
コード例 #12
0
ファイル: prepare_matrices.py プロジェクト: armovetz/course4
def makeClusters():
    """
        Internal function.
        Function prepares test clusters when launched inside directory
        of case of cross-validation. Clusters are being saved in 
        "test_clusters" in directory of the case.
    """
    
    #test_matrix = scipy.io.mmio.mmread("test.mtx")
    test_matrix_file = open("test.mtx", 'r')
    
    # create item_X_time list
    item_X_time_list = []
    meta_file = open("../../../well_done/meta", 'r')
    for line in meta_file:
        item_X_time_list.append(getMetaString(line, TIME_ID))
    meta_file.close()
    
    # reading coords for current case
    coords = misc_functions.getWindowCoords()
    
    # stuff before cycle
    clusters_list = []
    cur_user_id = str(coords[0])
    cur_cluster = ["user" + "\t" + str(coords[0])]
    
    # skip comments
    for i2 in range(3):
        test_matrix_file.readline()
    
    cur_user = -1
    cur_cluster = []
    
    for line in test_matrix_file:
        user_id = int(line.split()[0]) - 1 + coords[0]
        item_id = int(line.split()[1]) - 1 + coords[1]
        
        if user_id != cur_user:     # next user
            #print "user_id = ", user_id
            cur_user = user_id
            if cur_cluster != []:
                clusters_list.append(cur_cluster)
            cur_cluster = ["user\t" + str(user_id)]
        time_bounds = getTimeInterval(item_id, item_X_time_list, coords)
        cur_cluster.append(str(time_bounds[0]) + "\t" + str(item_id) + "\t" + str(time_bounds[1]))
        
    test_clusters_file = open("test_clusters_" + str(DAYS_INTERVAL_PREPARE), 'w')
    test_clusters_file.write("low_bound item_id high_bound\n")
    for cluster in clusters_list:
        for line in cluster:
            test_clusters_file.write(line + "\n")
    
    test_clusters_file.close()
コード例 #13
0
ファイル: aggregator.py プロジェクト: armovetz/hybrid_rs
def ndcgPrediction(magician, train_matrix):
    """
    
    """

    prediction_file_name = magician.interval_path + "/prediction.mtx"
    train_file_name = magician.interval_path + "/train.mtx"

    clusters_list = clusters.getClustersListFromClustersFile(
        magician.interval_path, magician.interval_size)

    coords = misc_functions.getWindowCoords(magician.interval_path)

    test_users = range(coords[0], coords[1])
    test_items = range(coords[2], coords[3] + 1)

    prediction_matrix = scipy.zeros((len(test_users), len(test_items)),
                                    dtype=float)
    training_matrix = scipy.io.mmio.mmread(train_file_name).tocsr()

    # later?
    #item_X_meta_matrix = scipy.io.mmio.mmread("../../../well_done/items-metas_global.mtx").toarray()

    for user_cluster in clusters_list:
        user_id = int(user_cluster[0].split("\t")[1])
        #print "user #", user

        #user_metas = {} - changed to list because of problem with dimension
        user_metas = []

        #for item in test_items:
        for cluster in user_cluster[1:]:
            start_cluster_item = int(cluster.split("\t")[0])
            stop_cluster_item = int(cluster.split("\t")[2])

            cluster_items = range(start_cluster_item, stop_cluster_item + 1)

            #for item in cluster_items:
        prediction_matrix[user_id - 1] = scipy.zeros((len(test_items)),
                                                     dtype=float)

    # end of user-row cycle
    #########

    result_matrix = scipy.sparse.csr_matrix(prediction_matrix)
    scipy.io.mmio.mmwrite(prediction_file_name,
                          result_matrix,
                          field='real',
                          precision=5)
コード例 #14
0
ファイル: ndcg.py プロジェクト: armovetz/hybrid_rs
def estimate(magician, test_matrix, result_matrix):
    """
        Function run ndcg estimation for each query in clusters list
        It's very simple.
    """

    test_matrix_csr = test_matrix.tocsr()
    result_matrix_csr = result_matrix.tocsr()
    results_file_name = magician.results_file_name
    
    # TBD: get clusters list
    
    clusters_list = misc_functions.getClustersListFromClustersFile(magician.interval_path, magician.interval_size)
    
    coords = misc_functions.getWindowCoords(magician.interval_path)
    
    estimateNDCGp( test_matrix_csr, result_matrix_csr, clusters_list, coords, results_file_name)
コード例 #15
0
ファイル: ti_functions.py プロジェクト: armovetz/course4
 def ti_CreateTestingMatrices(self):
     """
         creates testing matrices for each window that has been
         created while preparing time intervals
     """
     
     for window_dir in self.ti_dirs:
         now = datetime.datetime.now()
         now_string = now.strftime("%Y-%m-%d %H:%M")
         
         coords = misc_functions.getWindowCoords(window_dir)
         
         # Reading history matrix
         history_matrix = scipy.io.mmio.mmread(self.dataset.history_file_name)
         
         # selecting part for the current window
         local_testing_matrix = (history_matrix.tocsr())[ coords[0] : coords[1] + 1, coords[2] : coords[3] + 1].copy()
         scipy.io.mmio.mmwrite(window_dir + "/test", local_testing_matrix, now_string, 'integer')
コード例 #16
0
ファイル: aggregator.py プロジェクト: armovetz/hybrid_rs
def ndcgPrediction(magician, train_matrix):
    """
    
    """
    
    prediction_file_name = magician.interval_path + "/prediction.mtx"
    train_file_name      = magician.interval_path + "/train.mtx"
    
    clusters_list = clusters.getClustersListFromClustersFile(magician.interval_path, magician.interval_size)
    
    coords = misc_functions.getWindowCoords(magician.interval_path)
    
    test_users = range(coords[0], coords[1]) 
    test_items = range(coords[2], coords[3] + 1)
    
    prediction_matrix = scipy.zeros((len(test_users), len(test_items)), dtype = float)
    training_matrix = scipy.io.mmio.mmread(train_file_name).tocsr()
    
    # later?
    #item_X_meta_matrix = scipy.io.mmio.mmread("../../../well_done/items-metas_global.mtx").toarray()
    
    for user_cluster in clusters_list:
        user_id = int (user_cluster[0].split("\t")[1])
        #print "user #", user
        
        #user_metas = {} - changed to list because of problem with dimension
        user_metas = []
        
        
        #for item in test_items:
        for cluster in user_cluster[1 : ]:
            start_cluster_item = int(cluster.split("\t")[0])
            stop_cluster_item  = int(cluster.split("\t")[2])

            cluster_items = range(start_cluster_item, stop_cluster_item + 1)
            
            #for item in cluster_items:
        prediction_matrix[user_id - 1] = scipy.zeros((len(test_items)), dtype=float)
    
    # end of user-row cycle
    #########
    
    result_matrix = scipy.sparse.csr_matrix(prediction_matrix)
    scipy.io.mmio.mmwrite(prediction_file_name, result_matrix, field = 'real', precision = 5)
コード例 #17
0
ファイル: ndcg.py プロジェクト: armovetz/hybrid_rs
def estimate(magician, test_matrix, result_matrix):
    """
        Function run ndcg estimation for each query in clusters list
        It's very simple.
    """

    test_matrix_csr = test_matrix.tocsr()
    result_matrix_csr = result_matrix.tocsr()
    results_file_name = magician.results_file_name

    # TBD: get clusters list

    clusters_list = misc_functions.getClustersListFromClustersFile(
        magician.interval_path, magician.interval_size)

    coords = misc_functions.getWindowCoords(magician.interval_path)

    estimateNDCGp(test_matrix_csr, result_matrix_csr, clusters_list, coords,
                  results_file_name)
コード例 #18
0
ファイル: magician.py プロジェクト: armovetz/hybrid_rs
    def runCrossValidation(self):
        """
            launch cross validation procedure for loaded TI
        """

        ti_ctr = 1

        self.reporter.report("---")
        self.reporter.report("Cross-validation started\n")

        for interval_path in self.ti.intervals_list:

            self.reporter.report("  " + str(ti_ctr) + " of " + \
                str(len(self.ti.intervals_list)) + " intervals running")
            ti_ctr += 1

            self.interval_path = MAG_TI_DIR + "/" + interval_path

            self.window_coords = misc_functions.getWindowCoords(
                self.interval_path)

            if self.need_predict:
                train_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" +
                                                    interval_path +
                                                    "/train.mtx")
                self.reporter.report("    train mtx loaded")
                self.reporter.report("    prediction started")
                self.gear.predict(self, train_matrix)

            if self.need_estimate:
                test_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" +
                                                   interval_path + "/test.mtx")
                self.reporter.report("    test mtx loaded")
                result_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" +
                                                     interval_path +
                                                     "/prediction.mtx")
                self.reporter.report("    results mtx loaded")
                self.reporter.report("    prediction started")
                self.estimator.estimate(self, test_matrix, result_matrix)
コード例 #19
0
ファイル: engine.py プロジェクト: armovetz/course4
def prediction(prediction_file_name, clusters_list, svd_use_flag):
    """
        Main function for computing prediction rating.
    """
    
    coords = misc_functions.getWindowCoords()
    
    test_users = range(coords[0], coords[2] + 1) 
    test_items = range(coords[1], coords[3] + 1)
    
    #print "len(test_users) = ", len(test_users)
    #print "len(test_items) = ", len(test_items)
    #print "test_items = ", test_items
    
    # this matrix to be written as result finally
    #misc_functions.step()
    prediction_matrix = zeros((len(test_users), len(test_items)), dtype = float)
    
    training_matrix = scipy.io.mmio.mmread("history.mtx").tocsr()
    
    item_X_meta_matrix = scipy.io.mmio.mmread("../../../well_done/items-metas_global.mtx").toarray()
    
    # getting meta matrices for corresponding using metas
    meta_ctr = 0
    meta_matrices = []
    for meta in METAS_TO_USE:
        if svd_use_flag:
            meta_matrice_file_name = "users-" + METAS_TO_USE[meta] + ".svd.mtx"
        else:
            meta_matrice_file_name = "users-" + METAS_TO_USE[meta] + ".mtx"
        exec("meta_matrices.append(scipy.io.mmio.mmread(\"" + meta_matrice_file_name + "\").toarray())")

    #user_counter = 0
    #for user in test_users:
    for cur_cluster in clusters_list:
    
        #print "cur_cluster[0] = ", cur_cluster[0]
        user = int (cur_cluster[0].split("\t")[1])
        #print "user #", user
        
        #user_metas = {} - changed to list because of problem with dimension
        user_metas = []
        
        values = zeros((len(METAS_TO_USE), len(test_items)), dtype = float)
        meta_ctr = 0
        for meta in METAS_TO_USE:
            
            #print "    meta_matrices = ", meta_matrices
            #print "    meta_matrices[meta_ctr] = ", meta_matrices[meta_ctr]
            user_vector = meta_matrices[meta_ctr][user]
            #print "    user_vector = ", user_vector
            #print "    len(user_metas) = ", len(user_metas)
            #print "    meta_ctr = ", meta_ctr
            #print "meta = ", meta
            #misc_functions.step()
            
            # normalizing counts of visited metas to use them as weights later
            if max(user_vector) != 0:
                user_metas.append(1.0 * user_vector / max(user_vector))
            else:
                user_metas.append(zeros((len(user_vector), ), dtype = float))
            #print "    user_metas[meta_ctr] = ", user_metas[meta_ctr]
            #print "    user_metas[meta_ctr].shape = ", user_metas[meta_ctr].shape
            
            #for item in test_items:
            for cluster in cur_cluster[1 : ]:
                start_cluster_item = int(cluster.split("\t")[0])
                stop_cluster_item  = int(cluster.split("\t")[2])
                
                cluster_items = range(start_cluster_item, stop_cluster_item + 1)
                
                for item in cluster_items:
                    meta_value = item_X_meta_matrix[item, meta]
                    
                    # PRICE
                    if meta == 8:
                        meta_value = priceToPriceCat(meta_value)
                    
                    # CITY HEURISTIC
                    if meta == 11:
                        if user_metas[meta_ctr][meta_value - 1] < CITY_TRESHOLD:
                            values[:, item - coords[1]] *= CITY_COEF
                    """
                    # DAYTIME
                    if meta == 17:
                        meta_value = dayTime(meta_value)
                    """
                    
                    #print "        meta_value = ", meta_value
                    #print "        item = ", item
                    #step()
                    values[meta_ctr][item - coords[1]] = (user_metas[meta_ctr])[meta_value - 1]
                    
                    """HEURISTICS """
                    
                    
                    
                    
                    
                    """\\ HEURISTICS """

            meta_ctr += 1
        #print "values[:, 0:10] = ", values[:, 0:10]
        prediction_vector = numpy.sum(META_WEIGHTS * values, axis = 0)
        #print "prediction_vector[0:10] = ", prediction_vector[0:10]
        #print "sum(prediction_vector) = ", sum(prediction_vector)
        prediction_matrix[user - coords[0]] = prediction_vector
        
        #step()
        
#  =====  END OF MAIN CYCLE  =====  

    result_matrix = scipy.sparse.csr_matrix(prediction_matrix)
    scipy.io.mmio.mmwrite(prediction_file_name, result_matrix, field = 'real', precision = 5)
コード例 #20
0
    def ti_CreateClusters(self):
        """
            prepare clusters for futher predictions
            Clusters are being saved in 
            "test_clusters_<days>" file in directory of the case.
        """

        for window_dir in self.ti_dirs:
            print window_dir

            #test_matrix = scipy.io.mmio.mmread("test.mtx")
            test_matrix_file = open(window_dir + "/test.mtx", 'r')

            # create item_X_time list
            item_X_time_list = []
            meta_file = open(self.dataset.events_file_name, 'r')
            for line in meta_file:
                item_X_time_list.append(
                    misc_functions.getMetaString(
                        line, self.dataset.time_meta_position))
            meta_file.close()

            #print "len(item_X_time_list)=",len(item_X_time_list)

            # reading coords for current case
            coords = misc_functions.getWindowCoords(window_dir)

            #print "coords = ", coords

            # stuff before cycle
            clusters_list = []
            cur_user_id = str(coords[0])
            cur_cluster = ["user" + "\t" + str(coords[0])]

            # skip comments
            for i2 in range(3):
                test_matrix_file.readline()

            cur_user = -1
            cur_cluster = []

            for line in test_matrix_file:
                #user_id = int(line.split()[0]) - 1 + coords[0]
                #item_id = int(line.split()[1]) - 1 + coords[2]
                user_id = int(line.split()[0])
                item_id = int(line.split()[1])

                if user_id != cur_user:  # next user
                    #print "user_id = ", user_id
                    cur_user = user_id
                    if cur_cluster != []:
                        clusters_list.append(cur_cluster)
                    cur_cluster = ["user\t" + str(user_id)]
                time_bounds = clusters.getTimeInterval(
                    item_id, item_X_time_list, coords, self.cluster_size,
                    self.dataset.events_numb)
                cur_cluster.append(
                    str(time_bounds[0]) + "\t" + str(item_id) + "\t" +
                    str(time_bounds[1]))

            test_clusters_file = open(
                window_dir + "/test_clusters_" + str(self.cluster_size), 'w')
            test_clusters_file.write("low_bound item_id high_bound\n")
            for cluster in clusters_list:
                for line in cluster:
                    test_clusters_file.write(line + "\n")
            test_clusters_file.close()
コード例 #21
0
ファイル: content_based.py プロジェクト: armovetz/course4
def prediction(prediction_file_name, clusters_list, trash):
    
    coords = misc_functions.getWindowCoords()
    
    test_users = range(coords[0], coords[2] + 1) 
    test_items = range(coords[1], coords[3] + 1)
    
    #print "len(test_users) = ", len(test_users)
    #print "len(test_items) = ", len(test_items)
    #print "test_items = ", test_items
    
    # this matrix to be written as result finally
    #misc_functions.step()
    prediction_matrix = zeros((len(test_users), len(test_items)), dtype = float)
    
    training_matrix = scipy.io.mmio.mmread("history.mtx").tocsr()
    
    #item_X_meta_matrix = scipy.io.mmio.mmread("../../../well_done/items-metas_global.mtx").toarray()
    item_X_item_matrix = scipy.io.mmio.mmread("../../../well_done/items-items.mtx").tocsr()
    
    #user_counter = 0
    #for user in test_users:
    for cur_cluster in clusters_list:
        user = int (cur_cluster[0].split("\t")[1])
        #print "user = "******"\t")[0])
            stop_cluster_item  = int(cluster.split("\t")[2])
            
            similarities_for_clusters = item_X_item_matrix[start_cluster_item : stop_cluster_item]
            #print "similarities_for_clusters = ", similarities_for_clusters
            
            prediction_vector += sum(sum(similarities_for_clusters * user_visits)) / K
            
            """
            cluster_items = range(start_cluster_item, stop_cluster_item + 1)
                
            for item in cluster_items:
                similarities = item_X_item_matrix[item].toarray()[0]
                numpy.dot
                #print "similarities = ", similarities
                #print "len(similarities) = ", len(similarities)
                #   indices = numpy.lexsort(keys = (-similarities, -similarities))
                #print "indices = ", indices
                #print "len(indices) = ", len(indices)
                #print "K = ", K
                
                #   indices = indices[0:K]
                #   sorted_similarities = similarities.take(indices, axis = 0)
                #print "indices = ", indices
                #print "sorted_similarities = ", sorted_similarities
                #print "len(sorted_similarities) = ", len(sorted_similarities)
                #sorted_similarities = sorted_similarities[0:K] 
                
                for K_ctr in range(K):
                    index = indices[K_ctr]
                
                
                for K_ctr in range(K):
                    index = indices[K_ctr]
                    #print "user_visits[index] = ", user_visits[index]
                    #print "user_visits[index] = ", user_visits[index]
                    #print "prediction_vector[item - coords[1]] = ", prediction_vector[item - coords[1]]
                    #print "sorted_similarities = ", sorted_similarities
                    #print "sorted_similarities[K_ctr] = ", sorted_similarities[K_ctr]
                    
                    prediction_vector[item - coords[1]] += (user_visits[index] * sorted_similarities[K_ctr]) / K
                """

        prediction_matrix[user - coords[0]] = prediction_vector

        #print "Press any key to continue:"
        #sys.stdin.read(1)
        
#  =====  END OF MAIN CYCLE  =====  

    result_matrix = scipy.sparse.csr_matrix(prediction_matrix)
    scipy.io.mmio.mmwrite(prediction_file_name, result_matrix, field = 'real', precision = 5)
コード例 #22
0
ファイル: prepare_matrices.py プロジェクト: armovetz/course4
def prepareTestingMatrices(time_flag):
    """
        Function prepares testing matrix for each case of cross-validation
        or time cross-validation - depending on flag. Matrix is saved
        as "test.mtx" in directory for each case.
    """
    
    if time_flag:
        """
            SPECIAL TIME CROSS-VALIDATION
        """
        os.chdir("../data/tmp/time_cross_validation")

        for i in range(ITERATIONS_NUMB):
            print "Iteration = ", i + 1, "/", ITERATIONS_NUMB
            
            # changing directory for current case
            now = datetime.datetime.now()
            now_string = now.strftime("%Y-%m-%d %H:%M")
            
            cur_dir = str(i)
            os.chdir(cur_dir)
            
            # reading coords for current case
            coords = misc_functions.getWindowCoords()

            print "Reading history matrix ..."
            history_matrix = scipy.io.mmio.mmread("../../../well_done/history.mm")
            
            print "Saving test matrix..."
            if i != ITERATIONS_NUMB - 1:
                local_testing_matrix = (history_matrix.tocsr())[ : , coords[1] : coords[3] + 1].copy()
            else:
                local_testing_matrix = (history_matrix.tocsr())[ : , coords[1] : ].copy()
            scipy.io.mmio.mmwrite("test", local_testing_matrix, now_string, 'integer')
            
            os.chdir("..")
    else:
        """
            CLASSIC CROSS-VALIDATION
        """
        os.chdir("../data/tmp/cross_validation")
    
        # cycle for each case
        for i in range(SWITCHES_USERS_NUMB):
            for j in range(SWITCHES_ITEMS_NUMB):
                print "window: user_window = ", i + 1, "/", SWITCHES_USERS_NUMB
                print "        item_window = ", j + 1, "/", SWITCHES_ITEMS_NUMB

                # changing directory for current case
                cur_dir = str(i) + "_" + str(j)
                os.chdir(cur_dir)

                now = datetime.datetime.now()
                now_string = now.strftime("%Y-%m-%d %H:%M")

                # reading coords for current case
                coords = misc_functions.getWindowCoords()

                print "Reading history matrix ..."
                history_matrix = scipy.io.mmio.mmread("../../../well_done/history.mm")
            
                print "Saving test matrix..."
                local_testing_matrix = (history_matrix.tocsr())[coords[0] : coords[2] + 1, coords[1] : coords[3] + 1].copy()
                scipy.io.mmio.mmwrite("test", local_testing_matrix, now_string, 'integer')
            
                os.chdir("..")
    
    os.chdir("../../../python_sources")
    print "TOTAL SUCCESS! \n Local testing matrices and clusters are prepared!"
コード例 #23
0
ファイル: prepare_matrices.py プロジェクト: armovetz/course4
def createUser_x_MetaMatrices(time_flag):
    """
        Function prepares user_X_meta matrices for each case of 
        cross-validation or time_cross_validation - depending on flag.
    """

    # delete previous old prepared matrices
    #subprocess.call(["rm", "-rf", "users-.*"])
    
    if time_flag:
        """
            SPECIAL TIME CROSS-VALIDATION
        """
        os.chdir("../data/tmp/time_cross_validation")
    
        for i in range(ITERATIONS_NUMB):
            print "Iteration = ", i + 1, "/", ITERATIONS_NUMB
            
            # changing directory for current case
            cur_dir = str(i)
            os.chdir(cur_dir)
            
            # reading coords for current case
            coords = misc_functions.getWindowCoords()
    
            for meta in METAS_TO_USE:
                print "Computing meta matrix for ", METAS_TO_USE[meta]
                meta_matrix_name = "users-" + METAS_TO_USE[meta]
                
                meta_list = []
                meta_file = open("../../../well_done/meta", 'r')
                for line in meta_file:
                    meta_list.append(line)
                meta_file.close()
                
                # sort strings of meta of seminars for future indexing
                sorted_list = misc_functions.sortMetaListByMeta(meta_list, meta)
                
                # main function for current case and <meta>
                computeMetaMatrix(sorted_list, meta)
                
            # returning to "cross_validation" directory
            os.chdir("..")
    else:    
        """
            CLASSIC CROSS-VALIDATION
        """
        os.chdir("../data/tmp/cross_validation")
    
        # cycle for each case
        for i in range(SWITCHES_USERS_NUMB):
            for j in range(SWITCHES_ITEMS_NUMB):
                print "window: user_window = ", i + 1, "/", SWITCHES_USERS_NUMB
                print "        item_window = ", j + 1, "/", SWITCHES_ITEMS_NUMB
    
                # changing directory for current case
                cur_dir = str(i) + "_" + str(j)
                os.chdir(cur_dir)
                
                # reading coords for current case
                coords = misc_functions.getWindowCoords()
    
                for meta in METAS_TO_USE:
                    print "Computing meta matrix for ", METAS_TO_USE[meta]
                    meta_matrix_name = "users-" + METAS_TO_USE[meta]
                
                    meta_list = []
                    meta_file = open("../../../well_done/meta", 'r')
                    for line in meta_file:
                        meta_list.append(line)
                    meta_file.close()
                
                    print "A"
                    # sort strings of meta of seminars for future indexing
                    sorted_list = misc_functions.sortMetaListByMeta(meta_list, meta)
                
                    # main function for current case and <meta>
                    computeMetaMatrix(sorted_list, meta)
            
                # returning to "cross_validation" directory
                os.chdir("..")

    os.chdir("../../../python_sources")
    print "TOTAL SUCCESS! \n USER x META local matrices are prepared!"
コード例 #24
0
ファイル: prepare_matrices.py プロジェクト: armovetz/course4
def prepareTrainingMatrices(time_flag):
    """
        Function prepares training matrix for each case of cross-validation
        or time cross-validation - depending on flag. Matrix is saved
        as "history.mtx" in directory for each case.
    """
    
    if time_flag:
        """
            SPECIAL TIME CROSS-VALIDATION
        """
        os.chdir("../data/tmp/time_cross_validation")
        for i in range(ITERATIONS_NUMB):
            print "Iteration = ", i + 1, "/", ITERATIONS_NUMB
            
            # changing directory for current case
            cur_dir = str(i)
            os.chdir(cur_dir)

            # reading coords for current case
            coords = misc_functions.getWindowCoords()
            
            original_history_file = open("../../../well_done/history.mm")
            local_training_history_file = open("history.mtx", 'w')
            
            #skip comments
            for i2 in range(2):
                local_training_history_file.write(original_history_file.readline())
                
            # debugging stuff
            total_ctr = int(original_history_file.readline().split("\t")[2])
            
            # run through the whole history file
            ltw_list = []
            visits_ctr = 0
            zeros_ctr = 0
            for line in original_history_file:
                item = int(line.split("\t")[1]) - 1

                #print "item = ", item
                #step()
                if item < coords[1]:
                    ltw_list.append(line)
                    visits_ctr += 1
                    #print "A"
                else:
                    zeros_ctr += 1
                #step()
            
            local_training_history_file.write(str(USERS_NUMB) + " " + str(ITEMS_NUMB) + " " + str(visits_ctr) + "\n")
            for line in ltw_list:
                local_training_history_file.write(line)
            
            original_history_file.close()
            local_training_history_file.close()
            
            """ 
            # DEBUGGING STUFF
            print "total_ctr = ", total_ctr
            print "visits_ctr = ", visits_ctr
            print "zeros_ctr = ", zeros_ctr
            print "zeros_ctr + visits_ctr = ", zeros_ctr + visits_ctr
            print " -------------------------- "
            """
            if (zeros_ctr + visits_ctr != total_ctr):
                raise Exception("counters mismatch")
            
            os.chdir("..")
    else:
        """
            CLASSIC CROSS-VALIDATION
        """
    
        os.chdir("../data/tmp/cross_validation")
        # cycle for each case
        for i in range(SWITCHES_USERS_NUMB):
            for j in range(SWITCHES_ITEMS_NUMB):
                #print "window: user_window = ", i + 1, "/", SWITCHES_USERS_NUMB
                #print "        item_window = ", j + 1, "/", SWITCHES_ITEMS_NUMB
                print "cur work_dir = " + "<" + str(i) + "_" + str(j) + ">"
            
                # changing directory for current case
                cur_dir = str(i) + "_" + str(j)
                os.chdir(cur_dir)
                
                print "Writing local training matrix ..."
                now = datetime.datetime.now()
                now_string = now.strftime("%Y-%m-%d %H:%M")
            
                # reading coords for current case
                coords = misc_functions.getWindowCoords()

                original_history_file = open("../../../well_done/history.mm")
                local_training_history_file = open("history.mtx", 'w')
            
                #skip comments
                for i2 in range(2):
                    local_training_history_file.write(original_history_file.readline())
                
                # debugging stuff
                total_ctr = int(original_history_file.readline().split("\t")[2])
            
                # run through the whole history file
                ltw_list = []
                visits_ctr = 0
                zeros_ctr = 0
                for line in original_history_file:
                    item = int(line.split("\t")[1]) - 1
                    user = int(line.split("\t")[0]) - 1

                    #print "item = ", item
                    #step()
                    if ((item < coords[1]) or (item > coords[3])) or ((user < coords[0]) or (user > coords[2])):
                        ltw_list.append(line)
                        visits_ctr += 1
                        #print "A"
                    else:
                        zeros_ctr += 1
                    #step()
            
                local_training_history_file.write(str(USERS_NUMB) + " " + str(ITEMS_NUMB) + " " + str(visits_ctr) + "\n")
            
                for line in ltw_list:
                    local_training_history_file.write(line)
            
                original_history_file.close()
                local_training_history_file.close()
            
                """
                # DEBUGGING STUFF
                print "total_ctr = ", total_ctr
                print "visits_ctr = ", visits_ctr
                print "zeros_ctr = ", zeros_ctr
                print "zeros_ctr + visits_ctr = ", zeros_ctr + visits_ctr
                print " -------------------------- "
                """
                
                if (zeros_ctr + visits_ctr != total_ctr):
                    raise Exception("counters mismatch")
            
                os.chdir("..")
    
    os.chdir("../../../python_sources")

    print "TOTAL SUCCESS! \n Training local matrices are prepared!"