def createItem_x_ItemSimilarities(time_flag): if time_flag: """ SPECIAL TIME CROSS-VALIDATION """ os.chdir("../data/tmp/time_cross_validation") for i in range(ITERATIONS_NUMB): print "Iteration = ", i + 1, "/", ITERATIONS_NUMB # changing directory for current case cur_dir = str(i) os.chdir(cur_dir) # reading coords for current case coords = misc_functions.getWindowCoords() # get training and test items lists test_items = range(coords[1], coords[3] + 1) training_items = range(0, coords[1]) createItem_x_ItemSimilarity_local(training_items, test_items) # returning to "cross_validation" directory os.chdir("..") else: """ CLASSIC CROSS-VALIDATION """ os.chdir("../data/tmp/cross_validation") # cycle for each case for i in range(SWITCHES_USERS_NUMB): for j in range(SWITCHES_ITEMS_NUMB): print "window: user_window = ", i + 1, "/", SWITCHES_USERS_NUMB print " item_window = ", j + 1, "/", SWITCHES_ITEMS_NUMB # changing directory for current case cur_dir = str(i) + "_" + str(j) os.chdir(cur_dir) # reading coords for current case coords = misc_functions.getWindowCoords() # get training and test items lists test_items = range(coords[1], coords[3] + 1) training_items = range(0, coords[1]) training_items.extend(range((coords[3] + 1), ITEMS_NUMB)) createItem_x_ItemSimilarity_local(training_items, test_items) # returning to "cross_validation" directory os.chdir("..") os.chdir("../../../python_sources") print "TOTAL SUCCESS! \n local similarities matrices are prepared!"
def ti_CreateClusters(self): """ prepare clusters for futher predictions Clusters are being saved in "test_clusters_<days>" file in directory of the case. """ for window_dir in self.ti_dirs: print window_dir #test_matrix = scipy.io.mmio.mmread("test.mtx") test_matrix_file = open(window_dir + "/test.mtx", 'r') # create item_X_time list item_X_time_list = [] meta_file = open(self.dataset.events_file_name, 'r') for line in meta_file: item_X_time_list.append(misc_functions.getMetaString(line, self.dataset.time_meta_position)) meta_file.close() #print "len(item_X_time_list)=",len(item_X_time_list) # reading coords for current case coords = misc_functions.getWindowCoords(window_dir) #print "coords = ", coords # stuff before cycle clusters_list = [] cur_user_id = str(coords[0]) cur_cluster = ["user" + "\t" + str(coords[0])] # skip comments for i2 in range(3): test_matrix_file.readline() cur_user = -1 cur_cluster = [] for line in test_matrix_file: #user_id = int(line.split()[0]) - 1 + coords[0] #item_id = int(line.split()[1]) - 1 + coords[2] user_id = int(line.split()[0]) item_id = int(line.split()[1]) if user_id != cur_user: # next user #print "user_id = ", user_id cur_user = user_id if cur_cluster != []: clusters_list.append(cur_cluster) cur_cluster = ["user\t" + str(user_id)] time_bounds = clusters.getTimeInterval(item_id, item_X_time_list, coords, self.cluster_size, self.dataset.events_numb) cur_cluster.append(str(time_bounds[0]) + "\t" + str(item_id) + "\t" + str(time_bounds[1])) test_clusters_file = open(window_dir + "/test_clusters_" + str(self.cluster_size), 'w') test_clusters_file.write("low_bound item_id high_bound\n") for cluster in clusters_list: for line in cluster: test_clusters_file.write(line + "\n") test_clusters_file.close()
def ti_CreateTrainingMatrices(self): """ creates training matrices for each window that has been created while preparing time intervals """ print "Creating train matrices..." for window_dir in self.ti_dirs: now = datetime.datetime.now() now_string = now.strftime("%Y-%m-%d %H:%M") coords = misc_functions.getWindowCoords(window_dir) start_user = coords[0] stop_user = coords[1] start_item = coords[2] stop_item = coords[3] # Reading history matrix history_matrix = scipy.io.mmio.mmread( self.dataset.history_file_name) #print coords history_matrix # selecting part for the current window local_training_matrix = (history_matrix.tocsr()) \ [start_user - 1 : stop_user, \ : start_item - 1].copy() scipy.io.mmio.mmwrite(window_dir + "/train", local_training_matrix, now_string, 'integer')
def ti_CreateTestingEvents(self): """ creates test meta files for events for each window that has been created while preparing time intervals """ print "Creating test meta files for events..." for window_dir in self.ti_dirs: coords = misc_functions.getWindowCoords(window_dir) start_item = coords[2] stop_item = coords[3] events_file = open(self.dataset.events_file_name, 'r') interval_events_file = open(window_dir + "/events_test", 'w') # skip all items until start item for i in range(start_item): events_file.readline() # recount ids new_local_ctr = 1 for i in range(stop_item - start_item + 1): meta_line = events_file.readline() rest_of_line = meta_line[meta_line.find("\t"):] interval_events_file.write(str(new_local_ctr) + rest_of_line) new_local_ctr += 1 events_file.close() interval_events_file.close()
def ti_CreateTrainingEvents(self): """ creates train meta files for events for each window that has been created while preparing time intervals """ print "Creating train meta files for events..." for window_dir in self.ti_dirs: coords = misc_functions.getWindowCoords(window_dir) start_item = coords[2] #stop_item = coords[3] events_file = open(self.dataset.events_file_name, 'r') interval_events_file = open(window_dir + "/events_train", 'w') # take only meta lines for events until <start_item> events new_local_ctr = 1 for i in range(start_item - 1): meta_line = events_file.readline() rest_of_line = meta_line[meta_line.find("\t"):] interval_events_file.write(str(new_local_ctr) + rest_of_line) new_local_ctr += 1 events_file.close() interval_events_file.close()
def ti_CreateTrainingMatrices(self): """ creates training matrices for each window that has been created while preparing time intervals """ print "Creating train matrices..." for window_dir in self.ti_dirs: now = datetime.datetime.now() now_string = now.strftime("%Y-%m-%d %H:%M") coords = misc_functions.getWindowCoords(window_dir) start_user = coords[0] stop_user = coords[1] start_item = coords[2] stop_item = coords[3] # Reading history matrix history_matrix = scipy.io.mmio.mmread(self.dataset.history_file_name) #print coords history_matrix # selecting part for the current window local_training_matrix = (history_matrix.tocsr()) \ [start_user - 1 : stop_user, \ : start_item - 1].copy() scipy.io.mmio.mmwrite(window_dir + "/train", local_training_matrix, now_string, 'integer')
def runCrossValidation(self): """ launch cross validation procedure for loaded TI """ ti_ctr = 1 self.reporter.report("---") self.reporter.report("Cross-validation started\n") for interval_path in self.ti.intervals_list: self.reporter.report(" " + str(ti_ctr) + " of " + \ str(len(self.ti.intervals_list)) + " intervals running") ti_ctr += 1 self.interval_path = MAG_TI_DIR + "/" + interval_path self.window_coords = misc_functions.getWindowCoords(self.interval_path) if self.need_predict : train_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" + interval_path + "/train.mtx") self.reporter.report(" train mtx loaded") self.reporter.report(" prediction started") self.gear.predict(self, train_matrix) if self.need_estimate : test_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" + interval_path + "/test.mtx") self.reporter.report(" test mtx loaded") result_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" + interval_path + "/prediction.mtx") self.reporter.report(" results mtx loaded") self.reporter.report(" prediction started") self.estimator.estimate(self, test_matrix, result_matrix)
def prediction(prediction_file_name, clusters_list): #subprocess.call(["~/graphchi/toolkits/collaborative_filtering/svd", "--training=history.mtx", "--nsv=10", "--nv=12", "--max_iter=5", " --quiet=1", "--tol=1e-1"]) #subprocess.call(["~/graphchi/toolkits/collaborative_filtering/svd", "--training=history.mtx --nsv=10 --nv=12 --max_iter=5 --quiet=1 --tol=1e-1"], ) subprocess.call(["~/graphchi/toolkits/collaborative_filtering/svd --training=history.mtx --nsv=10 --nv=12 --max_iter=5 --quiet=1 --tol=1e-1 > /dev/null"], shell=True) window_coord = misc_functions.getWindowCoords() computePredictionMatrixFromEigenVectors(5, "history.mtx", prediction_file_name, window_coord)
def ti_CreateTrainingMatrices(self): """ creates training matrices for each window that has been created while preparing time intervals """ for window_dir in self.ti_dirs: # reading coords for current case coords = misc_functions.getWindowCoords(window_dir) original_history_file = open(self.dataset.history_file_name) local_training_history_file = open(window_dir + "/train", 'w') #skip comments and copy some of them local_training_history_file.write(original_history_file.readline()) original_history_file.readline() original_history_file.readline() now = datetime.datetime.now() now_string = now.strftime("%Y-%m-%d %H:%M") local_training_history_file.write("%Generated " + now_string + "\n") # run through the whole history file ltw_list = [] #lines_to_write_list visits_ctr = 0 zeros_ctr = 0 for line in original_history_file: event_id = int(line.split("\t")[1]) - 1 if event_id < coords[1]: ltw_list.append(line) visits_ctr += 1 else: zeros_ctr += 1 # write properties of the training matrix train_users_numb = coords[1] - coords[0] + 1 train_events_numb = coords[3] - coords[2] + 1 local_training_history_file.write(str(train_users_numb) + \ " " + str(train_events_numb) + " " + str(visits_ctr) + "\n") for line in ltw_list: local_training_history_file.write(line) original_history_file.close() local_training_history_file.close() """ # DEBUGGING STUFF print "total_ctr = ", total_ctr print "visits_ctr = ", visits_ctr print "zeros_ctr = ", zeros_ctr print "zeros_ctr + visits_ctr = ", zeros_ctr + visits_ctr print " -------------------------- " """ if (zeros_ctr + visits_ctr != self.dataset.visits_numb): raise Exception("counters mismatch")
def makeClusters(): """ Internal function. Function prepares test clusters when launched inside directory of case of cross-validation. Clusters are being saved in "test_clusters" in directory of the case. """ #test_matrix = scipy.io.mmio.mmread("test.mtx") test_matrix_file = open("test.mtx", 'r') # create item_X_time list item_X_time_list = [] meta_file = open("../../../well_done/meta", 'r') for line in meta_file: item_X_time_list.append(getMetaString(line, TIME_ID)) meta_file.close() # reading coords for current case coords = misc_functions.getWindowCoords() # stuff before cycle clusters_list = [] cur_user_id = str(coords[0]) cur_cluster = ["user" + "\t" + str(coords[0])] # skip comments for i2 in range(3): test_matrix_file.readline() cur_user = -1 cur_cluster = [] for line in test_matrix_file: user_id = int(line.split()[0]) - 1 + coords[0] item_id = int(line.split()[1]) - 1 + coords[1] if user_id != cur_user: # next user #print "user_id = ", user_id cur_user = user_id if cur_cluster != []: clusters_list.append(cur_cluster) cur_cluster = ["user\t" + str(user_id)] time_bounds = getTimeInterval(item_id, item_X_time_list, coords) cur_cluster.append(str(time_bounds[0]) + "\t" + str(item_id) + "\t" + str(time_bounds[1])) test_clusters_file = open("test_clusters_" + str(DAYS_INTERVAL_PREPARE), 'w') test_clusters_file.write("low_bound item_id high_bound\n") for cluster in clusters_list: for line in cluster: test_clusters_file.write(line + "\n") test_clusters_file.close()
def ndcgPrediction(magician, train_matrix): """ """ prediction_file_name = magician.interval_path + "/prediction.mtx" train_file_name = magician.interval_path + "/train.mtx" clusters_list = clusters.getClustersListFromClustersFile( magician.interval_path, magician.interval_size) coords = misc_functions.getWindowCoords(magician.interval_path) test_users = range(coords[0], coords[1]) test_items = range(coords[2], coords[3] + 1) prediction_matrix = scipy.zeros((len(test_users), len(test_items)), dtype=float) training_matrix = scipy.io.mmio.mmread(train_file_name).tocsr() # later? #item_X_meta_matrix = scipy.io.mmio.mmread("../../../well_done/items-metas_global.mtx").toarray() for user_cluster in clusters_list: user_id = int(user_cluster[0].split("\t")[1]) #print "user #", user #user_metas = {} - changed to list because of problem with dimension user_metas = [] #for item in test_items: for cluster in user_cluster[1:]: start_cluster_item = int(cluster.split("\t")[0]) stop_cluster_item = int(cluster.split("\t")[2]) cluster_items = range(start_cluster_item, stop_cluster_item + 1) #for item in cluster_items: prediction_matrix[user_id - 1] = scipy.zeros((len(test_items)), dtype=float) # end of user-row cycle ######### result_matrix = scipy.sparse.csr_matrix(prediction_matrix) scipy.io.mmio.mmwrite(prediction_file_name, result_matrix, field='real', precision=5)
def estimate(magician, test_matrix, result_matrix): """ Function run ndcg estimation for each query in clusters list It's very simple. """ test_matrix_csr = test_matrix.tocsr() result_matrix_csr = result_matrix.tocsr() results_file_name = magician.results_file_name # TBD: get clusters list clusters_list = misc_functions.getClustersListFromClustersFile(magician.interval_path, magician.interval_size) coords = misc_functions.getWindowCoords(magician.interval_path) estimateNDCGp( test_matrix_csr, result_matrix_csr, clusters_list, coords, results_file_name)
def ti_CreateTestingMatrices(self): """ creates testing matrices for each window that has been created while preparing time intervals """ for window_dir in self.ti_dirs: now = datetime.datetime.now() now_string = now.strftime("%Y-%m-%d %H:%M") coords = misc_functions.getWindowCoords(window_dir) # Reading history matrix history_matrix = scipy.io.mmio.mmread(self.dataset.history_file_name) # selecting part for the current window local_testing_matrix = (history_matrix.tocsr())[ coords[0] : coords[1] + 1, coords[2] : coords[3] + 1].copy() scipy.io.mmio.mmwrite(window_dir + "/test", local_testing_matrix, now_string, 'integer')
def ndcgPrediction(magician, train_matrix): """ """ prediction_file_name = magician.interval_path + "/prediction.mtx" train_file_name = magician.interval_path + "/train.mtx" clusters_list = clusters.getClustersListFromClustersFile(magician.interval_path, magician.interval_size) coords = misc_functions.getWindowCoords(magician.interval_path) test_users = range(coords[0], coords[1]) test_items = range(coords[2], coords[3] + 1) prediction_matrix = scipy.zeros((len(test_users), len(test_items)), dtype = float) training_matrix = scipy.io.mmio.mmread(train_file_name).tocsr() # later? #item_X_meta_matrix = scipy.io.mmio.mmread("../../../well_done/items-metas_global.mtx").toarray() for user_cluster in clusters_list: user_id = int (user_cluster[0].split("\t")[1]) #print "user #", user #user_metas = {} - changed to list because of problem with dimension user_metas = [] #for item in test_items: for cluster in user_cluster[1 : ]: start_cluster_item = int(cluster.split("\t")[0]) stop_cluster_item = int(cluster.split("\t")[2]) cluster_items = range(start_cluster_item, stop_cluster_item + 1) #for item in cluster_items: prediction_matrix[user_id - 1] = scipy.zeros((len(test_items)), dtype=float) # end of user-row cycle ######### result_matrix = scipy.sparse.csr_matrix(prediction_matrix) scipy.io.mmio.mmwrite(prediction_file_name, result_matrix, field = 'real', precision = 5)
def estimate(magician, test_matrix, result_matrix): """ Function run ndcg estimation for each query in clusters list It's very simple. """ test_matrix_csr = test_matrix.tocsr() result_matrix_csr = result_matrix.tocsr() results_file_name = magician.results_file_name # TBD: get clusters list clusters_list = misc_functions.getClustersListFromClustersFile( magician.interval_path, magician.interval_size) coords = misc_functions.getWindowCoords(magician.interval_path) estimateNDCGp(test_matrix_csr, result_matrix_csr, clusters_list, coords, results_file_name)
def runCrossValidation(self): """ launch cross validation procedure for loaded TI """ ti_ctr = 1 self.reporter.report("---") self.reporter.report("Cross-validation started\n") for interval_path in self.ti.intervals_list: self.reporter.report(" " + str(ti_ctr) + " of " + \ str(len(self.ti.intervals_list)) + " intervals running") ti_ctr += 1 self.interval_path = MAG_TI_DIR + "/" + interval_path self.window_coords = misc_functions.getWindowCoords( self.interval_path) if self.need_predict: train_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" + interval_path + "/train.mtx") self.reporter.report(" train mtx loaded") self.reporter.report(" prediction started") self.gear.predict(self, train_matrix) if self.need_estimate: test_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" + interval_path + "/test.mtx") self.reporter.report(" test mtx loaded") result_matrix = scipy.io.mmio.mmread(MAG_TI_DIR + "/" + interval_path + "/prediction.mtx") self.reporter.report(" results mtx loaded") self.reporter.report(" prediction started") self.estimator.estimate(self, test_matrix, result_matrix)
def prediction(prediction_file_name, clusters_list, svd_use_flag): """ Main function for computing prediction rating. """ coords = misc_functions.getWindowCoords() test_users = range(coords[0], coords[2] + 1) test_items = range(coords[1], coords[3] + 1) #print "len(test_users) = ", len(test_users) #print "len(test_items) = ", len(test_items) #print "test_items = ", test_items # this matrix to be written as result finally #misc_functions.step() prediction_matrix = zeros((len(test_users), len(test_items)), dtype = float) training_matrix = scipy.io.mmio.mmread("history.mtx").tocsr() item_X_meta_matrix = scipy.io.mmio.mmread("../../../well_done/items-metas_global.mtx").toarray() # getting meta matrices for corresponding using metas meta_ctr = 0 meta_matrices = [] for meta in METAS_TO_USE: if svd_use_flag: meta_matrice_file_name = "users-" + METAS_TO_USE[meta] + ".svd.mtx" else: meta_matrice_file_name = "users-" + METAS_TO_USE[meta] + ".mtx" exec("meta_matrices.append(scipy.io.mmio.mmread(\"" + meta_matrice_file_name + "\").toarray())") #user_counter = 0 #for user in test_users: for cur_cluster in clusters_list: #print "cur_cluster[0] = ", cur_cluster[0] user = int (cur_cluster[0].split("\t")[1]) #print "user #", user #user_metas = {} - changed to list because of problem with dimension user_metas = [] values = zeros((len(METAS_TO_USE), len(test_items)), dtype = float) meta_ctr = 0 for meta in METAS_TO_USE: #print " meta_matrices = ", meta_matrices #print " meta_matrices[meta_ctr] = ", meta_matrices[meta_ctr] user_vector = meta_matrices[meta_ctr][user] #print " user_vector = ", user_vector #print " len(user_metas) = ", len(user_metas) #print " meta_ctr = ", meta_ctr #print "meta = ", meta #misc_functions.step() # normalizing counts of visited metas to use them as weights later if max(user_vector) != 0: user_metas.append(1.0 * user_vector / max(user_vector)) else: user_metas.append(zeros((len(user_vector), ), dtype = float)) #print " user_metas[meta_ctr] = ", user_metas[meta_ctr] #print " user_metas[meta_ctr].shape = ", user_metas[meta_ctr].shape #for item in test_items: for cluster in cur_cluster[1 : ]: start_cluster_item = int(cluster.split("\t")[0]) stop_cluster_item = int(cluster.split("\t")[2]) cluster_items = range(start_cluster_item, stop_cluster_item + 1) for item in cluster_items: meta_value = item_X_meta_matrix[item, meta] # PRICE if meta == 8: meta_value = priceToPriceCat(meta_value) # CITY HEURISTIC if meta == 11: if user_metas[meta_ctr][meta_value - 1] < CITY_TRESHOLD: values[:, item - coords[1]] *= CITY_COEF """ # DAYTIME if meta == 17: meta_value = dayTime(meta_value) """ #print " meta_value = ", meta_value #print " item = ", item #step() values[meta_ctr][item - coords[1]] = (user_metas[meta_ctr])[meta_value - 1] """HEURISTICS """ """\\ HEURISTICS """ meta_ctr += 1 #print "values[:, 0:10] = ", values[:, 0:10] prediction_vector = numpy.sum(META_WEIGHTS * values, axis = 0) #print "prediction_vector[0:10] = ", prediction_vector[0:10] #print "sum(prediction_vector) = ", sum(prediction_vector) prediction_matrix[user - coords[0]] = prediction_vector #step() # ===== END OF MAIN CYCLE ===== result_matrix = scipy.sparse.csr_matrix(prediction_matrix) scipy.io.mmio.mmwrite(prediction_file_name, result_matrix, field = 'real', precision = 5)
def ti_CreateClusters(self): """ prepare clusters for futher predictions Clusters are being saved in "test_clusters_<days>" file in directory of the case. """ for window_dir in self.ti_dirs: print window_dir #test_matrix = scipy.io.mmio.mmread("test.mtx") test_matrix_file = open(window_dir + "/test.mtx", 'r') # create item_X_time list item_X_time_list = [] meta_file = open(self.dataset.events_file_name, 'r') for line in meta_file: item_X_time_list.append( misc_functions.getMetaString( line, self.dataset.time_meta_position)) meta_file.close() #print "len(item_X_time_list)=",len(item_X_time_list) # reading coords for current case coords = misc_functions.getWindowCoords(window_dir) #print "coords = ", coords # stuff before cycle clusters_list = [] cur_user_id = str(coords[0]) cur_cluster = ["user" + "\t" + str(coords[0])] # skip comments for i2 in range(3): test_matrix_file.readline() cur_user = -1 cur_cluster = [] for line in test_matrix_file: #user_id = int(line.split()[0]) - 1 + coords[0] #item_id = int(line.split()[1]) - 1 + coords[2] user_id = int(line.split()[0]) item_id = int(line.split()[1]) if user_id != cur_user: # next user #print "user_id = ", user_id cur_user = user_id if cur_cluster != []: clusters_list.append(cur_cluster) cur_cluster = ["user\t" + str(user_id)] time_bounds = clusters.getTimeInterval( item_id, item_X_time_list, coords, self.cluster_size, self.dataset.events_numb) cur_cluster.append( str(time_bounds[0]) + "\t" + str(item_id) + "\t" + str(time_bounds[1])) test_clusters_file = open( window_dir + "/test_clusters_" + str(self.cluster_size), 'w') test_clusters_file.write("low_bound item_id high_bound\n") for cluster in clusters_list: for line in cluster: test_clusters_file.write(line + "\n") test_clusters_file.close()
def prediction(prediction_file_name, clusters_list, trash): coords = misc_functions.getWindowCoords() test_users = range(coords[0], coords[2] + 1) test_items = range(coords[1], coords[3] + 1) #print "len(test_users) = ", len(test_users) #print "len(test_items) = ", len(test_items) #print "test_items = ", test_items # this matrix to be written as result finally #misc_functions.step() prediction_matrix = zeros((len(test_users), len(test_items)), dtype = float) training_matrix = scipy.io.mmio.mmread("history.mtx").tocsr() #item_X_meta_matrix = scipy.io.mmio.mmread("../../../well_done/items-metas_global.mtx").toarray() item_X_item_matrix = scipy.io.mmio.mmread("../../../well_done/items-items.mtx").tocsr() #user_counter = 0 #for user in test_users: for cur_cluster in clusters_list: user = int (cur_cluster[0].split("\t")[1]) #print "user = "******"\t")[0]) stop_cluster_item = int(cluster.split("\t")[2]) similarities_for_clusters = item_X_item_matrix[start_cluster_item : stop_cluster_item] #print "similarities_for_clusters = ", similarities_for_clusters prediction_vector += sum(sum(similarities_for_clusters * user_visits)) / K """ cluster_items = range(start_cluster_item, stop_cluster_item + 1) for item in cluster_items: similarities = item_X_item_matrix[item].toarray()[0] numpy.dot #print "similarities = ", similarities #print "len(similarities) = ", len(similarities) # indices = numpy.lexsort(keys = (-similarities, -similarities)) #print "indices = ", indices #print "len(indices) = ", len(indices) #print "K = ", K # indices = indices[0:K] # sorted_similarities = similarities.take(indices, axis = 0) #print "indices = ", indices #print "sorted_similarities = ", sorted_similarities #print "len(sorted_similarities) = ", len(sorted_similarities) #sorted_similarities = sorted_similarities[0:K] for K_ctr in range(K): index = indices[K_ctr] for K_ctr in range(K): index = indices[K_ctr] #print "user_visits[index] = ", user_visits[index] #print "user_visits[index] = ", user_visits[index] #print "prediction_vector[item - coords[1]] = ", prediction_vector[item - coords[1]] #print "sorted_similarities = ", sorted_similarities #print "sorted_similarities[K_ctr] = ", sorted_similarities[K_ctr] prediction_vector[item - coords[1]] += (user_visits[index] * sorted_similarities[K_ctr]) / K """ prediction_matrix[user - coords[0]] = prediction_vector #print "Press any key to continue:" #sys.stdin.read(1) # ===== END OF MAIN CYCLE ===== result_matrix = scipy.sparse.csr_matrix(prediction_matrix) scipy.io.mmio.mmwrite(prediction_file_name, result_matrix, field = 'real', precision = 5)
def prepareTestingMatrices(time_flag): """ Function prepares testing matrix for each case of cross-validation or time cross-validation - depending on flag. Matrix is saved as "test.mtx" in directory for each case. """ if time_flag: """ SPECIAL TIME CROSS-VALIDATION """ os.chdir("../data/tmp/time_cross_validation") for i in range(ITERATIONS_NUMB): print "Iteration = ", i + 1, "/", ITERATIONS_NUMB # changing directory for current case now = datetime.datetime.now() now_string = now.strftime("%Y-%m-%d %H:%M") cur_dir = str(i) os.chdir(cur_dir) # reading coords for current case coords = misc_functions.getWindowCoords() print "Reading history matrix ..." history_matrix = scipy.io.mmio.mmread("../../../well_done/history.mm") print "Saving test matrix..." if i != ITERATIONS_NUMB - 1: local_testing_matrix = (history_matrix.tocsr())[ : , coords[1] : coords[3] + 1].copy() else: local_testing_matrix = (history_matrix.tocsr())[ : , coords[1] : ].copy() scipy.io.mmio.mmwrite("test", local_testing_matrix, now_string, 'integer') os.chdir("..") else: """ CLASSIC CROSS-VALIDATION """ os.chdir("../data/tmp/cross_validation") # cycle for each case for i in range(SWITCHES_USERS_NUMB): for j in range(SWITCHES_ITEMS_NUMB): print "window: user_window = ", i + 1, "/", SWITCHES_USERS_NUMB print " item_window = ", j + 1, "/", SWITCHES_ITEMS_NUMB # changing directory for current case cur_dir = str(i) + "_" + str(j) os.chdir(cur_dir) now = datetime.datetime.now() now_string = now.strftime("%Y-%m-%d %H:%M") # reading coords for current case coords = misc_functions.getWindowCoords() print "Reading history matrix ..." history_matrix = scipy.io.mmio.mmread("../../../well_done/history.mm") print "Saving test matrix..." local_testing_matrix = (history_matrix.tocsr())[coords[0] : coords[2] + 1, coords[1] : coords[3] + 1].copy() scipy.io.mmio.mmwrite("test", local_testing_matrix, now_string, 'integer') os.chdir("..") os.chdir("../../../python_sources") print "TOTAL SUCCESS! \n Local testing matrices and clusters are prepared!"
def createUser_x_MetaMatrices(time_flag): """ Function prepares user_X_meta matrices for each case of cross-validation or time_cross_validation - depending on flag. """ # delete previous old prepared matrices #subprocess.call(["rm", "-rf", "users-.*"]) if time_flag: """ SPECIAL TIME CROSS-VALIDATION """ os.chdir("../data/tmp/time_cross_validation") for i in range(ITERATIONS_NUMB): print "Iteration = ", i + 1, "/", ITERATIONS_NUMB # changing directory for current case cur_dir = str(i) os.chdir(cur_dir) # reading coords for current case coords = misc_functions.getWindowCoords() for meta in METAS_TO_USE: print "Computing meta matrix for ", METAS_TO_USE[meta] meta_matrix_name = "users-" + METAS_TO_USE[meta] meta_list = [] meta_file = open("../../../well_done/meta", 'r') for line in meta_file: meta_list.append(line) meta_file.close() # sort strings of meta of seminars for future indexing sorted_list = misc_functions.sortMetaListByMeta(meta_list, meta) # main function for current case and <meta> computeMetaMatrix(sorted_list, meta) # returning to "cross_validation" directory os.chdir("..") else: """ CLASSIC CROSS-VALIDATION """ os.chdir("../data/tmp/cross_validation") # cycle for each case for i in range(SWITCHES_USERS_NUMB): for j in range(SWITCHES_ITEMS_NUMB): print "window: user_window = ", i + 1, "/", SWITCHES_USERS_NUMB print " item_window = ", j + 1, "/", SWITCHES_ITEMS_NUMB # changing directory for current case cur_dir = str(i) + "_" + str(j) os.chdir(cur_dir) # reading coords for current case coords = misc_functions.getWindowCoords() for meta in METAS_TO_USE: print "Computing meta matrix for ", METAS_TO_USE[meta] meta_matrix_name = "users-" + METAS_TO_USE[meta] meta_list = [] meta_file = open("../../../well_done/meta", 'r') for line in meta_file: meta_list.append(line) meta_file.close() print "A" # sort strings of meta of seminars for future indexing sorted_list = misc_functions.sortMetaListByMeta(meta_list, meta) # main function for current case and <meta> computeMetaMatrix(sorted_list, meta) # returning to "cross_validation" directory os.chdir("..") os.chdir("../../../python_sources") print "TOTAL SUCCESS! \n USER x META local matrices are prepared!"
def prepareTrainingMatrices(time_flag): """ Function prepares training matrix for each case of cross-validation or time cross-validation - depending on flag. Matrix is saved as "history.mtx" in directory for each case. """ if time_flag: """ SPECIAL TIME CROSS-VALIDATION """ os.chdir("../data/tmp/time_cross_validation") for i in range(ITERATIONS_NUMB): print "Iteration = ", i + 1, "/", ITERATIONS_NUMB # changing directory for current case cur_dir = str(i) os.chdir(cur_dir) # reading coords for current case coords = misc_functions.getWindowCoords() original_history_file = open("../../../well_done/history.mm") local_training_history_file = open("history.mtx", 'w') #skip comments for i2 in range(2): local_training_history_file.write(original_history_file.readline()) # debugging stuff total_ctr = int(original_history_file.readline().split("\t")[2]) # run through the whole history file ltw_list = [] visits_ctr = 0 zeros_ctr = 0 for line in original_history_file: item = int(line.split("\t")[1]) - 1 #print "item = ", item #step() if item < coords[1]: ltw_list.append(line) visits_ctr += 1 #print "A" else: zeros_ctr += 1 #step() local_training_history_file.write(str(USERS_NUMB) + " " + str(ITEMS_NUMB) + " " + str(visits_ctr) + "\n") for line in ltw_list: local_training_history_file.write(line) original_history_file.close() local_training_history_file.close() """ # DEBUGGING STUFF print "total_ctr = ", total_ctr print "visits_ctr = ", visits_ctr print "zeros_ctr = ", zeros_ctr print "zeros_ctr + visits_ctr = ", zeros_ctr + visits_ctr print " -------------------------- " """ if (zeros_ctr + visits_ctr != total_ctr): raise Exception("counters mismatch") os.chdir("..") else: """ CLASSIC CROSS-VALIDATION """ os.chdir("../data/tmp/cross_validation") # cycle for each case for i in range(SWITCHES_USERS_NUMB): for j in range(SWITCHES_ITEMS_NUMB): #print "window: user_window = ", i + 1, "/", SWITCHES_USERS_NUMB #print " item_window = ", j + 1, "/", SWITCHES_ITEMS_NUMB print "cur work_dir = " + "<" + str(i) + "_" + str(j) + ">" # changing directory for current case cur_dir = str(i) + "_" + str(j) os.chdir(cur_dir) print "Writing local training matrix ..." now = datetime.datetime.now() now_string = now.strftime("%Y-%m-%d %H:%M") # reading coords for current case coords = misc_functions.getWindowCoords() original_history_file = open("../../../well_done/history.mm") local_training_history_file = open("history.mtx", 'w') #skip comments for i2 in range(2): local_training_history_file.write(original_history_file.readline()) # debugging stuff total_ctr = int(original_history_file.readline().split("\t")[2]) # run through the whole history file ltw_list = [] visits_ctr = 0 zeros_ctr = 0 for line in original_history_file: item = int(line.split("\t")[1]) - 1 user = int(line.split("\t")[0]) - 1 #print "item = ", item #step() if ((item < coords[1]) or (item > coords[3])) or ((user < coords[0]) or (user > coords[2])): ltw_list.append(line) visits_ctr += 1 #print "A" else: zeros_ctr += 1 #step() local_training_history_file.write(str(USERS_NUMB) + " " + str(ITEMS_NUMB) + " " + str(visits_ctr) + "\n") for line in ltw_list: local_training_history_file.write(line) original_history_file.close() local_training_history_file.close() """ # DEBUGGING STUFF print "total_ctr = ", total_ctr print "visits_ctr = ", visits_ctr print "zeros_ctr = ", zeros_ctr print "zeros_ctr + visits_ctr = ", zeros_ctr + visits_ctr print " -------------------------- " """ if (zeros_ctr + visits_ctr != total_ctr): raise Exception("counters mismatch") os.chdir("..") os.chdir("../../../python_sources") print "TOTAL SUCCESS! \n Training local matrices are prepared!"