def divide_dataset(self): tp = ReadFile(self.dataset, space_type=self.space_type) tp.triple_information() random.shuffle(tp.triple_dataset) # Get the number of interactions that each partition should have. partition_size = int(float(tp.number_interactions) / float(self.n_folds)) list_folds = list() last = -1 for p in xrange(self.n_folds): initial = 1 + last final = (p + 1) * partition_size list_folds.append(tp.triple_dataset[initial:final]) last = final for fold in xrange(self.n_folds): train_set = list() for fold_train in xrange(self.n_folds): if fold_train != fold: train_set += list_folds[fold_train] train_set.sort() list_folds[fold].sort() self.dict_folds[fold] = {'train': train_set, 'test': list_folds[fold]}
def treat_interactions(self): for num, interaction_file in enumerate(self.list_train_files): interaction = ReadFile(interaction_file, space_type=self.space_type) interaction.triple_information() self.individual_datasets.append(interaction.triple_dataset) self.final_dataset += interaction.triple_dataset self.dict_item, self.dict_not_item, self.list_users, self.list_items, \ self.dict_index = return_list_info(self.final_dataset) self.list_users = list(self.list_users) self.list_items = list(self.list_items)
def treat_interactions(self): for num, interaction_file in enumerate(self.list_train_files): interaction = ReadFile(interaction_file, space_type=self.space_type) interaction.triple_information() self.individual_datasets.append(interaction.triple_dataset) self.final_dataset += interaction.triple_dataset if num + 1 == len(self.list_train_files): for triple in interaction.triple_dataset: self.dict_item_tag[triple[0]] = self.dict_item_tag.get(triple[0], 0) + 1 self.dict_item_tag[triple[1]] = self.dict_item_tag.get(triple[1], 0) + 1 self.dict_item, self.dict_not_item, self.list_users, self.list_items, \ self.dict_index = return_list_info(self.final_dataset) self.list_users = list(self.list_users) self.list_items = list(self.list_items)