def plot_measure_of_group(group: List[int], method: str, model: models.ScoreModel, title: str, measure_array: np.array, axis_label: Tuple[str, str], dest_path_tail: str): #compare user dest = share.PLOT_TOP + '/measure_of_group/' + model.get_dir_name( ) + '/' + dest_path_tail util.init_file(dest) pyplot.figure(figsize=(10, 6)) pyplot.subplot(GS[0, :GRID_WIDTH - 1]) for user_id in group: source = util.get_result_path(dir_name=share.RESULT_TOP + '/' + model.get_dir_name(), method=method, user_id=user_id) data = pd.read_csv(source) xs = [x * 0.1 for x in range(measure_array.shape[0])] ys = [] #sequence type differ ok??? for measure in measure_array: ys.append(data[measure].values[0]) if measure_array.shape[0] == 1: pyplot.scatter(xs, ys, label='user' + str(user_id)) else: pyplot.plot(xs, ys, label='user' + str(user_id)) pyplot.title(title) pyplot.xlabel(axis_label[0]) pyplot.ylabel(axis_label[1]) pyplot.xticks(xs, measure_array) pyplot.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0) pyplot.savefig(dest)
def plot_measure_of_compare(compare_dict: Dict[str, Tuple[str, models.ScoreModel]], title: str, group: List[int], measure_array: np.array, axis_label: Tuple[str, str], dest_path_tail: str): #compare model of compare_dict dest = share.PLOT_TOP + '/measure_of_compare/' + dest_path_tail util.init_file(dest) pyplot.figure(figsize=(10, 6)) pyplot.subplot(GS[0, :GRID_WIDTH - 1]) for key, (method, compare) in compare_dict.items(): xs = [x * 0.1 for x in range(measure_array.shape[0])] #grid size ys = [] for measure in measure_array: value = .0 for user_id in group: source = util.get_result_path(dir_name=share.RESULT_TOP + '/' + compare.get_dir_name(), method=method, user_id=user_id) data = pd.read_csv(source) value += data[measure].values[0] / len(group) ys.append(value) if measure_array.shape[0] == 1: #for MAiP pyplot.scatter(xs, ys, label=key) else: pyplot.plot(xs, ys, label=key) pyplot.title(title) pyplot.xlabel(axis_label[0]) pyplot.ylabel(axis_label[1]) pyplot.xticks(xs, measure_array) pyplot.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0) pyplot.savefig(dest)
def set_exam_tex_form(group:List[int],compare_dict:Dict[str,Tuple[str,models.ScoreModel]],title:str): #OrderedDict,compare_dict={'key':(method,model),...},OrderedDict #user_id,measure1,measure2,... dest_h=dir_name=share.TEX_EXAM_SOURCE_TOP+'/'+title#method??? splitter=',' keys=compare_dict.keys() #ordered dict ideal for measure_type in share.MEASURE_TYPE_LIST: for measure in share.MEASURE_TYPE_MEASURE_DICT[measure_type]: dest=dest_h+'/'+measure header='user_id' for key in keys: header+=splitter+key util.init_file(dest) with open(dest,'wt') as fout: fout.write(header+'\n') for user_id in group: line=str(user_id) for key in keys: line+=splitter method,score_model=compare_dict[key] source=util.get_result_path(dir_name=share.RESULT_TOP+'/'+score_model.get_dir_name(),user_id=user_id,method=method) try: data=pd.read_csv(source) line+=str(round(data[measure].values[0],share.DIGIT)) except FileNotFoundError: print(source+' not found'+'\n') fout.write(line+'\n')
def get_measure_tex_form(group:List[int],compare_dict:Dict[str,Tuple[str,models.ScoreModel]],title:str):#compare_dict must be OrderedDict dest=dir_name=share.TEX_MEASURE_TOP+'/'+title splitter='&' measure_suffix='\\\n' measure_type_suffix='\\ \hline\n' header='measure' group_size=len(group) keys=compare_dict.keys() #ordered_dict for key in keys: header+=splitter+key header+=measure_suffix util.init_file(dest) with open(dest,'wt') as fout: fout.write(header) for measure_type in share.MEASURE_TYPE_LIST: measure_array=share.MEASURE_TYPE_MEASURE_DICT[measure_type] for i,measure in enumerate(measure_array): line=measure for key in keys: value=0.0 line+=splitter for user_id in group: method,model=compare_dict[key] source=util.get_result_path(dir_name=share.RESULT_TOP+'/'+model.get_dir_name(),user_id=user_id,method=method) try: data=pd.read_csv(source) value+=data[measure].values[0]/group_size except FileNotFoundError: print(source+' not found\n') break line+=str(round(value,share.DIGIT)) if i+1==measure_array.shape[0]: line+=measure_type_suffix else: line+=measure_suffix fout.write(line)
def plot_marg_and_rank(model: models.ScoreModel, user_id: str, train_id: int, method: str, boolean_value: int, title: str, denominator_list: List[int], denominator_title, dest_path_tail: str): rank_space = 10 color_list = [ 'red', 'orange', 'yellow', 'green', 'blue', 'violet', 'black' ] _, _, mapping_id = util.get_score_mapping_param(user_id) all_items_marg_path = model.get_dest_dict( )['all_items_marg_dict'] + '/' + mapping_id #all marg data with open(all_items_marg_path, 'rb') as fin: all_marg_dict = pickle.load(fin) user_input_path = model.get_dest_dict( )['log_weight_and_score_model_list'] + '/' + util.get_user_train_id_path( user_id=user_id, train_id=train_id) #user marg data with open(user_input_path, 'rb') as fin: user_weight_marg_dict_list = pickle.load(fin) #ranking data rank_input_path = util.get_result_path(dir_name=share.RANKING_TOP + '/' + model.get_dir_name(), method=method, user_id=user_id, train_id=train_id) rank_data = pd.read_csv(rank_input_path, index_col='id') #count true/false sum denominator_size = 0 ranking_list = [] bool_data = rank_data[rank_data['boolean'] == boolean_value] size = bool_data.shape[0] for i in color_list: ranking_list.append([]) for hotel_id, row in bool_data.iterrows(): i = int(row['ranking'] / rank_space) ranking_list[i].append(hotel_id) if row['boolean'] in denominator_list: denominator_size += 1 for score_type in share.DEFAULT_SCORE_TYPE_LIST: dest = util.get_result_path( dir_name=share.PLOT_TOP + '/marg_and_boolean/' + model.get_dir_name() + '/' + dest_path_tail, method=method, user_id=user_id, train_id=train_id) + '/' + score_type try: all_pdf = all_marg_dict[score_type].pdf user_pdf = util.get_pdf_from_weight_marg_dict_list( weight_marg_dict_list=user_weight_marg_dict_list, score_type=score_type) xs = [ x for x in np.arange(SCORE_SPACE_DICT[score_type][1], SCORE_SPACE_DICT[score_type][0], 0.01) ] ys = [user_pdf(x) for x in xs] xs_all = xs ys_all = [all_pdf(x) for x in xs_all] for i, color in enumerate(color_list): bool_xs = [ bool_data.loc[index][score_type] for index in ranking_list[i] ] bool_ys = [user_pdf(x) for x in bool_xs] pyplot.scatter(bool_xs, bool_ys, label='top' + str((i + 1) * rank_space), color=color) pyplot.plot(xs_all, ys_all, label='all_items') pyplot.plot(xs, ys, label='user') pyplot.title('pdf and ' + title + ' ' + str(size) + 'items/' + denominator_title + str(denominator_size) + 'items ' + 'for ' + score_type) pyplot.xlabel('score') pyplot.ylabel('pdf') pyplot.xticks( np.arange(SCORE_SPACE_DICT[score_type][1], SCORE_SPACE_DICT[score_type][0], 0.1)) pyplot.legend() util.init_file(dest) pyplot.savefig(dest) pyplot.figure() except KeyError: #score_type reduced by kl_reduced pass
def do_measure(model: models.ScoreModel, group: List[int]): model_remapping = model.get_remapping() for user_id in group: print('###########################################') print('user = '******'###########################################') respective_method_measures_dict = {} user_k_folded_path = share.TRAIN_DATA_TOP + '/user' + str( user_id) + '_kfolded.json' with open(user_k_folded_path, 'rt') as fin: #load train_and_test_data kfolded_training_and_test_data_list = json.load(fin) if model_remapping: remapping, score_mapping_dict, mapping_id = util.get_score_mapping_param( user_id) else: #remapping invalid for group users remapping = False if remapping: #differ from default mapping,=>remapping valid #deepcopy all_items = copy.deepcopy(share.ALL_ITEMS) util.convert_score(all_items, score_mapping_dict) else: all_items = share.ALL_ITEMS #shallow copy for train_id, training_and_test_data in enumerate( kfolded_training_and_test_data_list ): #train and test by TRAIN_SIZEs training_hotel_list = training_and_test_data['trainingTrue'] training_false_hotel_list = training_and_test_data['trainingFalse'] test_hotel_list = training_and_test_data['testTrue'] test_false_hotel_list = training_and_test_data['testFalse'] model.train( training_data_t=pd.DataFrame.from_records(training_hotel_list), training_data_f=pd.DataFrame.from_records( training_false_hotel_list), all_items=all_items, mapping_id=mapping_id, train_id=train_id, user_id=user_id) #log parameter of model.train() model.make_log() ranking_dict = model.calc_ranking(all_items=all_items) test_hotel_id_list = [ test_hotel['id'] for test_hotel in test_hotel_list ] training_hotel_id_list = [ training_hotel['id'] for training_hotel in training_hotel_list ] training_false_hotel_id_list = [ training_false_hotel['id'] for training_false_hotel in training_false_hotel_list ] for method, ranking in ranking_dict.items(): ranking = ranking.drop(training_hotel_id_list) ranking = ranking.drop(training_false_hotel_id_list) print(method + '\n') print(ranking) dest = util.get_result_path(dir_name=share.RANKING_TOP + '/' + model.get_dir_name(), method=method, user_id=user_id, train_id=train_id) util.log_ranking(all_items=all_items, ranking=ranking, path=dest, score_type_list=model.get_score_type_list(), test_id_list=test_hotel_id_list) #odd??? if method not in respective_method_measures_dict: temp = {} for measure_type in share.MEASURE_TYPE_LIST: temp[measure_type] = [ .0 ] * share.MEASURE_TYPE_MEASURE_DICT[ measure_type].shape[0] for label_type in share.LABEL_TYPE_LIST: temp[label_type] = [] respective_method_measures_dict[method] = temp ips = ip(ranking, test_hotel_id_list) for i in range(0, share.MEASURE_TYPE_MEASURE_DICT['iP'].shape[0]): #enumerate??? respective_method_measures_dict[method]['iP'][i] += ips[i] respective_method_measures_dict[method]['MAiP'][0] += ips[11] for i in range( 0, share.MEASURE_TYPE_MEASURE_DICT['nDCG'].shape[0]): respective_method_measures_dict[method]['nDCG'][ i] += n_dcg(ranking, 5 * (i + 1), test_hotel_id_list) for i in range(0, share.MEASURE_TYPE_MEASURE_DICT['P'].shape[0]): respective_method_measures_dict[method]['P'][ i] += precision(ranking, 5 * (i + 1), test_hotel_id_list) for i, label_type in enumerate(share.LABEL_TYPE_LIST): respective_method_measures_dict[method][label_type].extend( adhoc_task(ranking, 10 * (i + 1), test_hotel_id_list)) for method, respective_measures in respective_method_measures_dict.items( ): file_name = util.get_result_path(dir_name=share.RESULT_TOP + '/' + model.get_dir_name(), method=method, user_id=user_id) util.init_file(file_name) with open(file_name, 'wt') as fout: header = 'file,user' line = file_name + ',user' + str(user_id) for measure_type in share.MEASURE_TYPE_LIST: for item, measure in enumerate( share.MEASURE_TYPE_MEASURE_DICT[measure_type]): header += ',' + measure line += ',' + str( respective_measures[measure_type][item] / share.TRAIN_SIZE) header += '\n' line += '\n' fout.write(header + line) for label_type in share.LABEL_TYPE_LIST: label_file_name = util.get_result_path( dir_name=share.LABEL_TOP + '/' + label_type + '/' + model.get_dir_name(), method=method, user_id=user_id) adhoc_testing_task(label_file_name, respective_measures[label_type])