Beispiel #1
0
def plot_measure_of_group(group: List[int], method: str,
                          model: models.ScoreModel, title: str,
                          measure_array: np.array, axis_label: Tuple[str, str],
                          dest_path_tail: str):
    #compare user
    dest = share.PLOT_TOP + '/measure_of_group/' + model.get_dir_name(
    ) + '/' + dest_path_tail
    util.init_file(dest)
    pyplot.figure(figsize=(10, 6))
    pyplot.subplot(GS[0, :GRID_WIDTH - 1])
    for user_id in group:
        source = util.get_result_path(dir_name=share.RESULT_TOP + '/' +
                                      model.get_dir_name(),
                                      method=method,
                                      user_id=user_id)
        data = pd.read_csv(source)
        xs = [x * 0.1 for x in range(measure_array.shape[0])]
        ys = []  #sequence type differ ok???
        for measure in measure_array:
            ys.append(data[measure].values[0])
        if measure_array.shape[0] == 1:
            pyplot.scatter(xs, ys, label='user' + str(user_id))
        else:
            pyplot.plot(xs, ys, label='user' + str(user_id))
    pyplot.title(title)
    pyplot.xlabel(axis_label[0])
    pyplot.ylabel(axis_label[1])
    pyplot.xticks(xs, measure_array)
    pyplot.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0)
    pyplot.savefig(dest)
Beispiel #2
0
def set_exam_tex_form(group:List[int],compare_dict:Dict[str,Tuple[str,models.ScoreModel]],title:str):
    #OrderedDict,compare_dict={'key':(method,model),...},OrderedDict
    #user_id,measure1,measure2,...
    dest_h=dir_name=share.TEX_EXAM_SOURCE_TOP+'/'+title#method???
    splitter=','
    keys=compare_dict.keys()
    #ordered dict ideal
    for measure_type in share.MEASURE_TYPE_LIST:
        for measure in share.MEASURE_TYPE_MEASURE_DICT[measure_type]:
            dest=dest_h+'/'+measure
            header='user_id'
            for key in keys:
                header+=splitter+key
            util.init_file(dest)
            with open(dest,'wt') as fout:
                fout.write(header+'\n')
                for user_id in group:
                    line=str(user_id)
                    for key in keys:
                        line+=splitter
                        method,score_model=compare_dict[key]
                        source=util.get_result_path(dir_name=share.RESULT_TOP+'/'+score_model.get_dir_name(),user_id=user_id,method=method)
                        try:
                            data=pd.read_csv(source)
                            line+=str(round(data[measure].values[0],share.DIGIT))
                        except FileNotFoundError:
                            print(source+' not found'+'\n')
                    fout.write(line+'\n')
Beispiel #3
0
def plot_measure_of_compare(compare_dict: Dict[str, Tuple[str,
                                                          models.ScoreModel]],
                            title: str, group: List[int],
                            measure_array: np.array,
                            axis_label: Tuple[str, str], dest_path_tail: str):
    #compare model of compare_dict
    dest = share.PLOT_TOP + '/measure_of_compare/' + dest_path_tail
    util.init_file(dest)
    pyplot.figure(figsize=(10, 6))
    pyplot.subplot(GS[0, :GRID_WIDTH - 1])
    for key, (method, compare) in compare_dict.items():
        xs = [x * 0.1 for x in range(measure_array.shape[0])]  #grid size
        ys = []
        for measure in measure_array:
            value = .0
            for user_id in group:
                source = util.get_result_path(dir_name=share.RESULT_TOP + '/' +
                                              compare.get_dir_name(),
                                              method=method,
                                              user_id=user_id)
                data = pd.read_csv(source)
                value += data[measure].values[0] / len(group)
            ys.append(value)
        if measure_array.shape[0] == 1:
            #for MAiP
            pyplot.scatter(xs, ys, label=key)
        else:
            pyplot.plot(xs, ys, label=key)

    pyplot.title(title)
    pyplot.xlabel(axis_label[0])
    pyplot.ylabel(axis_label[1])
    pyplot.xticks(xs, measure_array)
    pyplot.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0)
    pyplot.savefig(dest)
Beispiel #4
0
def get_exam_tex_form(compare_list:List[str],compared:str,title:str):
    source_h=share.TEX_EXAM_SOURCE_TOP+'/'+title
    dest=dir_name=share.TEX_EXAM_TOP+'/'+title
    splitter='&'
    measure_suffix='\\\n'
    measure_type_suffix='\\ \hline\n'
    header='$measure$'
    for compare in compare_list:
        header+=splitter+compare
    header+=measure_type_suffix
    util.init_file(dest)
    with open(dest,'wt') as fout:
        fout.write(header)
        for measure_type in share.MEASURE_TYPE_LIST:
            measure_array=share.MEASURE_TYPE_MEASURE_DICT[measure_type]
            for i,measure in enumerate(measure_array):
                line=measure
                source=source_h+'/'+measure
                data=pd.read_csv(source)
                for compare in compare_list:
                    try:
                    #manwhitney u-test
                        u,p=stats.mannwhitneyu(data[compared],data[compare],alternative='greater')
                        line+=splitter+str(round(p,share.DIGIT))
                    except ValueError:
                        line+=splitter+str(np.nan)
                if i+1==measure_array.shape[0]:
                    line+=measure_type_suffix
                else:
                    line+=measure_suffix
                fout.write(line)
Beispiel #5
0
def create_cluster(df: pd.DataFrame, n_clusters: int,
                   target_axis_list: List[str], train_id: int, user_id: int,
                   remapping: bool) -> List[pd.DataFrame]:
    #n_clusters from 1,pred from 0
    hotel_cluster = []
    path = util.get_cluster_path(dir_name=share.CLUSTER_DATA_TOP +
                                 '/remapping_' + str(remapping) + '/cluster_' +
                                 str(n_clusters),
                                 train_id=train_id,
                                 user_id=user_id)
    #odd
    if share.REUSE_CLUSTER and os.path.isfile(path):
        #reuse valid and cluster_file exist
        hotel_cluster = read_cluster(n_clusters, path)
    else:
        util.init_file(path)
        header = 'n_clusters,cluster'
        for column in df.columns:
            header += ',' + column
        header += '\n'
        with open(path, 'wt') as fout:
            fout.write(header)
            if n_clusters == 1:
                hotel_cluster = [df]
                for i in df.index:
                    #access by label,series
                    hotel = df.loc[i]
                    line = util.get_line_from_series(data=hotel,
                                                     key_list=hotel.index,
                                                     splitter=',',
                                                     start=str(n_clusters) +
                                                     ',0')
                    fout.write(line + '\n')
            else:
                axis_array = [
                    df[score_type].tolist() for score_type in target_axis_list
                ]
                axis_array = np.array(axis_array).T
                pred = KMeans(n_clusters=n_clusters).fit_predict(axis_array)

                for _ in range(n_clusters):
                    hotel_cluster.append(pd.DataFrame())
                for i, cluster_num in enumerate(pred):
                    line = str(n_clusters) + ',' + str(cluster_num)
                    row = df.iloc[i]  #iloc use number
                    hotel_cluster[cluster_num] = hotel_cluster[
                        cluster_num].append(row)
                    # id_ignore???
                    line = util.get_line_from_series(key_list=row.index,
                                                     data=row,
                                                     splitter=',',
                                                     start=line)
                    fout.write(line + '\n')
    return hotel_cluster
Beispiel #6
0
 def log_weight_and_score_model_list(self):
     dest = self.dest_dict[
         'log_weight_and_score_model_list'] + '/' + self.user_train_id_path
     util.init_file(dest)
     # copula have '_'leading variable. This leads to pyper.RError
     # For this,pickle model_list except copula
     pickled = []
     for weight, score_model_dict in self.weight_and_score_model_list:
         temp_dict = {}
         for key, value in score_model_dict.items():
             if not key == 'copula':
                 temp_dict[key] = value
         pickled.append((weight, temp_dict))
     with open(dest, 'wb') as fout:
         pickle.dump(pickled, fout)
Beispiel #7
0
 def log_axis(self):
     dest = self.dest_dict['log_axis'] + '/' + self.user_train_id_path
     header = 'left,const_a,med,madn,bound_dict,kl_dict,prod,score_type_list,reduced,tl_score_type_list\n'
     line = 'left,' + str(self.const_a) + ',' + str(self.med) + ',' + str(
         self.madn)
     for column in [
             self.bound_dict, self.kl_dict, self.prod_axis,
             self.score_type_list, self.reduced_axis, self.tlr_axis
     ]:
         object_quotation = '"'
         line += ',' + object_quotation + str(column) + object_quotation
     line += '\n'
     util.init_file(dest)
     with open(dest, 'wt') as fout:
         fout.write(header)
         fout.write(line)
Beispiel #8
0
def plot_kl_profile(user_id: int, train_id: int, model: models.ScoreModel):
    #plot attention of score_type and bound by kl_divergence
    source = model.get_dest_dict(
    )['log_axis'] + '/' + util.get_user_train_id_path(user_id=user_id,
                                                      train_id=train_id)
    dest = share.PLOT_TOP + '/kl_profile/' + model.get_dir_name(
    ) + '/' + util.get_user_train_id_path(user_id=user_id, train_id=train_id)
    util.init_file(dest)
    pyplot.figure(figsize=(10, 6))
    pyplot.subplot(GS[0, :GRID_WIDTH - 1])
    marker_axis_list = []
    row = pd.read_csv(source).iloc[0]  #default quotechar is "
    bound_dict = eval(row['bound_dict'])  #ordered dict
    marker_axis_list.append(('v', eval(row['reduced'])))  #down triangle
    marker_axis_list.append(('.', eval(row['score_type_list'])))  #dot
    marker_axis_list.append(('^', eval(row['prod'])))  #up triangle
    marker_axis_list.append(('o', eval(row['tl_score_type_list'])))  #circle
    kl_dict = eval(row['kl_dict'])
    med, madn = row['med'], row['madn']
    for marker, axis in marker_axis_list:
        for score_type in axis:
            xs = [kl_dict[score_type]]
            ys = [norm.pdf(x=x, loc=med, scale=madn) for x in xs]
            pyplot.scatter(xs, ys, marker=marker, label=score_type)
    bound_color_dict = {
        'bound1': 'blue',
        'bound2': 'red',
        'bound3': 'green'
    }  #bound3 is optional
    y_top = norm.pdf(x=med, loc=med, scale=madn)
    for key, bound in bound_dict.items():
        pyplot.plot([bound] * 2, [.0, y_top],
                    label=key,
                    color=bound_color_dict[key])
    pyplot.title('kl_profile')
    pyplot.legend(bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0)
    pyplot.savefig(dest)
Beispiel #9
0
def get_measure_tex_form(group:List[int],compare_dict:Dict[str,Tuple[str,models.ScoreModel]],title:str):#compare_dict must be OrderedDict
    dest=dir_name=share.TEX_MEASURE_TOP+'/'+title
    splitter='&'
    measure_suffix='\\\n'
    measure_type_suffix='\\ \hline\n'
    header='measure'
    group_size=len(group)
    keys=compare_dict.keys()
    #ordered_dict
    for key in keys:
        header+=splitter+key
    header+=measure_suffix
    util.init_file(dest)
    with open(dest,'wt') as fout:
        fout.write(header)
        for measure_type in share.MEASURE_TYPE_LIST:
            measure_array=share.MEASURE_TYPE_MEASURE_DICT[measure_type]
            for i,measure in enumerate(measure_array):
                line=measure
                for key in keys:
                    value=0.0
                    line+=splitter
                    for user_id in group:
                        method,model=compare_dict[key]
                        source=util.get_result_path(dir_name=share.RESULT_TOP+'/'+model.get_dir_name(),user_id=user_id,method=method)
                        try:
                            data=pd.read_csv(source)
                            value+=data[measure].values[0]/group_size
                        except FileNotFoundError:
                            print(source+' not found\n')
                            break
                    line+=str(round(value,share.DIGIT))
                if i+1==measure_array.shape[0]:
                    line+=measure_type_suffix
                else:
                    line+=measure_suffix
                fout.write(line)
Beispiel #10
0
def plot_hist(path: str):
    #plot histgram of chargeScore and middleCharge
    bin_width = 1000
    charge_min = 1000
    charge_max = 28000
    charge_xticks = [
        x for x in range(int(charge_min / bin_width),
                         int(charge_max / bin_width) + 1)
    ]
    data = pd.read_csv(path)
    HIST_COLUMNS = ['hotelMiddleCharge', 'chargeScore']
    #charge_xs=data['hotelMiddleCharge'].values
    score_xs = data['chargeScore'].values

    size = score_xs.shape[0]
    score_dest = share.PLOT_TOP + 'histgram/chargeScore/'
    util.init_file(score_dest)
    pyplot.hist(score_xs, range=(0.0, 1.0))
    pyplot.title('histgram for chargeScore of all items')
    pyplot.legend()
    util.init_file(score_dest)
    pyplot.savefig(score_dest)
    pyplot.figure()

    charge_xs = data['hotelMiddleCharge'].values
    size = charge_xs.shape[0]
    vfunc = np.vectorize(pyfunc=lambda x: x / charge_min, otypes=int)
    charge_xs = vfunc(charge_xs)

    charge_dest = share.PLOT_TOP + '/histgram/middleCharge'
    util.initFile(charge_dest)
    pyplot.hist(charge_xs,
                range=(charge_min / bin_width, charge_max / bin_width),
                bins=bin_num)
    pyplot.xticks(charge_xticks)
    pyplot.title('histgram for Charge of all items')
    pyplot.legend()
    util.init_file(charge_dest)
    pyplot.savefig(charge_dest)
    pyplot.figure()
Beispiel #11
0
def plot_marg_and_rank(model: models.ScoreModel, user_id: str, train_id: int,
                       method: str, boolean_value: int, title: str,
                       denominator_list: List[int], denominator_title,
                       dest_path_tail: str):
    rank_space = 10
    color_list = [
        'red', 'orange', 'yellow', 'green', 'blue', 'violet', 'black'
    ]
    _, _, mapping_id = util.get_score_mapping_param(user_id)
    all_items_marg_path = model.get_dest_dict(
    )['all_items_marg_dict'] + '/' + mapping_id
    #all marg data
    with open(all_items_marg_path, 'rb') as fin:
        all_marg_dict = pickle.load(fin)

    user_input_path = model.get_dest_dict(
    )['log_weight_and_score_model_list'] + '/' + util.get_user_train_id_path(
        user_id=user_id, train_id=train_id)
    #user marg data
    with open(user_input_path, 'rb') as fin:
        user_weight_marg_dict_list = pickle.load(fin)

    #ranking data
    rank_input_path = util.get_result_path(dir_name=share.RANKING_TOP + '/' +
                                           model.get_dir_name(),
                                           method=method,
                                           user_id=user_id,
                                           train_id=train_id)

    rank_data = pd.read_csv(rank_input_path, index_col='id')
    #count true/false sum
    denominator_size = 0
    ranking_list = []

    bool_data = rank_data[rank_data['boolean'] == boolean_value]
    size = bool_data.shape[0]

    for i in color_list:
        ranking_list.append([])
    for hotel_id, row in bool_data.iterrows():
        i = int(row['ranking'] / rank_space)
        ranking_list[i].append(hotel_id)
        if row['boolean'] in denominator_list:
            denominator_size += 1

    for score_type in share.DEFAULT_SCORE_TYPE_LIST:
        dest = util.get_result_path(
            dir_name=share.PLOT_TOP + '/marg_and_boolean/' +
            model.get_dir_name() + '/' + dest_path_tail,
            method=method,
            user_id=user_id,
            train_id=train_id) + '/' + score_type
        try:
            all_pdf = all_marg_dict[score_type].pdf
            user_pdf = util.get_pdf_from_weight_marg_dict_list(
                weight_marg_dict_list=user_weight_marg_dict_list,
                score_type=score_type)

            xs = [
                x for x in np.arange(SCORE_SPACE_DICT[score_type][1],
                                     SCORE_SPACE_DICT[score_type][0], 0.01)
            ]
            ys = [user_pdf(x) for x in xs]
            xs_all = xs
            ys_all = [all_pdf(x) for x in xs_all]
            for i, color in enumerate(color_list):
                bool_xs = [
                    bool_data.loc[index][score_type]
                    for index in ranking_list[i]
                ]
                bool_ys = [user_pdf(x) for x in bool_xs]
                pyplot.scatter(bool_xs,
                               bool_ys,
                               label='top' + str((i + 1) * rank_space),
                               color=color)
            pyplot.plot(xs_all, ys_all, label='all_items')
            pyplot.plot(xs, ys, label='user')
            pyplot.title('pdf and ' + title + ' ' + str(size) + 'items/' +
                         denominator_title + str(denominator_size) + 'items ' +
                         'for ' + score_type)
            pyplot.xlabel('score')
            pyplot.ylabel('pdf')
            pyplot.xticks(
                np.arange(SCORE_SPACE_DICT[score_type][1],
                          SCORE_SPACE_DICT[score_type][0], 0.1))
            pyplot.legend()
            util.init_file(dest)
            pyplot.savefig(dest)
            pyplot.figure()

        except KeyError:
            #score_type reduced by kl_reduced
            pass
Beispiel #12
0
        sys.stderr.write(share.CLUSTER_DATA_TOP+' exist\n')
        sys.stderr.write("iloc "+str(iloc)+" is used."+"Retry command+='--iloc=another_loc'\n")
        sys.exit(share.ERROR_STATUS)

    #set ppl param per user of  group
    if args['set_ppl']:
        mapping_id_user_dict={}
        for user_id in group:
            path=share.PPL_TOP+'/'+'user'+str(user_id)
            _,_,mapping_id=util.set_score_mapping_param(path=path,user_id=user_id)
            if  mapping_id in mapping_id_user_dict:
                mapping_id_user_dict[mapping_id].append(user_id)
            else:
                mapping_id_user_dict[mapping_id]=[user_id]

        util.init_file(share.MAPPING_ID_USER_DICT_PATH)
        with open(share.MAPPING_ID_USER_DICT_PATH,'wt') as fout:
            header='mapping_id,user_id'
            fout.write(header+'\n')
            object_quotation='"'
            for mapping_id in mapping_id_user_dict.keys():
                line=mapping_id+','+object_quotation+str(mapping_id_user_dict[mapping_id])+object_quotation
                fout.write(line+'\n')
        sys.exit()

    #do plotting from result
    if args['plot']:
        plot.set_score_space_dict()
        plot.test_plot()
        sys.exit()
Beispiel #13
0
    def select_axis(self, mapping_id: str, training_data_t: pd.DataFrame,
                    training_data_f: pd.DataFrame, all_items: pd.DataFrame,
                    axis: List[str]):
        if (not self.mapping_id) or (not self.mapping_id == mapping_id):
            #self.mapping_id is init or reset ,reset all_items_marg
            self.mapping_id = mapping_id
            print(mapping_id)
            all_items_marg_path = self.dest_dict[
                'all_items_marg_dict'] + '/' + mapping_id
            if share.REUSE_PICKLE:
                if os.path.isfile(all_items_marg_path) and share.REUSE_PICKLE:
                    #already modeled,deserialize
                    with open(all_items_marg_path, 'rb') as fin:
                        self.all_items_marg_dict = pickle.load(fin)
                else:
                    #reuse valid but yet modeled,exit
                    sys.stderr.write(
                        'file ' + all_items_marg_path +
                        ' not found.retry command+=i_reuse_pickle\n')
                    sys.exit(share.ERROR_STATUS)
            else:
                #not yet modeled,model and serialize
                util.init_file(all_items_marg_path)
                self.all_items_marg_dict = {}
                for score_type in axis:
                    all_marg = marginal.factory_marg(
                        marg_name=self.marg_name, marg_option=self.marg_option)
                    all_marg.set_param(
                        training_data=all_items[score_type].values,
                        score_type=score_type)
                    self.all_items_marg_dict[score_type] = all_marg
                with open(all_items_marg_path, 'wb') as fout:
                    pickle.dump(self.all_items_marg_dict, fout)

        self.kl_dict = {}
        for score_type in axis:
            self.marg_model.set_param(
                training_data=training_data_t[score_type].values,
                score_type=score_type)
            kl = util.kl_divergence_between_population_and_users(
                all_marg=self.all_items_marg_dict[score_type],
                attn=self.attn,
                score_type=score_type,
                user_marg=self.marg_model)
            self.kl_dict[score_type] = kl

        tmp_dict = {k: v for k, v in self.kl_dict.items()}
        if self.attn == share.ATTN_INF:
            self.kl_dict = {k: np.log1p(v) for k, v in tmp_dict.items()}
        kl_values = np.array(list(self.kl_dict.values()))
        med = statistics.median(kl_values)
        mad = statistics.median([abs(x - med) for x in kl_values])
        madn = mad / 0.675
        bound1 = med - self.const_a * madn
        bound2 = med + self.const_a * madn
        bound_dict = OrderedDict()
        bound_dict['bound1'] = bound1
        bound_dict['bound2'] = bound2
        self.med, self.madn = med, madn

        # self.score_type sorted by attn
        axis = sorted(axis, key=lambda x: float(self.kl_dict[x]), reverse=True)
        self.prod_axis = [
            x for x in axis if self.kl_dict[x] > bound_dict['bound2']
        ]
        self.tlr_axis = []
        if self.tlr_limit:  #tlr valid
            if self.tlr == share.TLR_NUM_UPPER:
                #use prod_axis.it none,use tlr_limit num of upper_axis
                if self.prod_axis:
                    self.tlr_axis = copy.deepcopy(self.prod_axis)
                else:
                    self.tlr_axis = axis[0:int(self.tlr_limit)]
            elif self.tlr == share.TLR_OL:
                bound_dict['ol'] = med + madn * float(self.tlr_limit)
                self.tlr_axis = [
                    x for x in axis if self.kl_dict[x] > bound_dict['ol']
                ]
            elif self.tlr == share.TLR_PROD:
                self.tlr_axis = [x for x in self.prod_axis]
            elif not self.tlr == share.I_TLR:
                sys.stderr.write('invalid trl string')
                sys.exit(share.ERROR_STATUS)

        for score_type in share.DISC_SCORE_TYPE_LIST:
            #remove during iterator danger
            if score_type in self.tlr_axis:
                print('removing disc_score')
                self.tlr_axis.remove(score_type)
        #renew score_type_list
        self.score_type_list, self.reduced_axis = [], []
        for score_type in axis:
            if self.kl_dict[score_type] > bound_dict['bound1']:
                self.score_type_list.append(score_type)
            else:
                self.reduced_axis.append(score_type)

        self.bound_dict = bound_dict
        if len(self.score_type_list) == 1:
            self.score_type_list = axis
        print('prod ' + str(self.prod_axis))
        print('no_reduced' + str(self.score_type_list))
        print('tlr_axis' + str(self.tlr_axis))
Beispiel #14
0
def do_measure(model: models.ScoreModel, group: List[int]):
    model_remapping = model.get_remapping()
    for user_id in group:
        print('###########################################')
        print('user = '******'###########################################')
        respective_method_measures_dict = {}
        user_k_folded_path = share.TRAIN_DATA_TOP + '/user' + str(
            user_id) + '_kfolded.json'
        with open(user_k_folded_path, 'rt') as fin:  #load train_and_test_data
            kfolded_training_and_test_data_list = json.load(fin)

        if model_remapping:
            remapping, score_mapping_dict, mapping_id = util.get_score_mapping_param(
                user_id)
        else:  #remapping invalid for group users
            remapping = False
        if remapping:  #differ from default mapping,=>remapping valid
            #deepcopy
            all_items = copy.deepcopy(share.ALL_ITEMS)
            util.convert_score(all_items, score_mapping_dict)
        else:
            all_items = share.ALL_ITEMS  #shallow copy

        for train_id, training_and_test_data in enumerate(
                kfolded_training_and_test_data_list
        ):  #train and test by TRAIN_SIZEs
            training_hotel_list = training_and_test_data['trainingTrue']
            training_false_hotel_list = training_and_test_data['trainingFalse']
            test_hotel_list = training_and_test_data['testTrue']
            test_false_hotel_list = training_and_test_data['testFalse']

            model.train(
                training_data_t=pd.DataFrame.from_records(training_hotel_list),
                training_data_f=pd.DataFrame.from_records(
                    training_false_hotel_list),
                all_items=all_items,
                mapping_id=mapping_id,
                train_id=train_id,
                user_id=user_id)
            #log parameter of model.train()
            model.make_log()

            ranking_dict = model.calc_ranking(all_items=all_items)
            test_hotel_id_list = [
                test_hotel['id'] for test_hotel in test_hotel_list
            ]
            training_hotel_id_list = [
                training_hotel['id'] for training_hotel in training_hotel_list
            ]
            training_false_hotel_id_list = [
                training_false_hotel['id']
                for training_false_hotel in training_false_hotel_list
            ]

            for method, ranking in ranking_dict.items():
                ranking = ranking.drop(training_hotel_id_list)
                ranking = ranking.drop(training_false_hotel_id_list)
                print(method + '\n')
                print(ranking)
                dest = util.get_result_path(dir_name=share.RANKING_TOP + '/' +
                                            model.get_dir_name(),
                                            method=method,
                                            user_id=user_id,
                                            train_id=train_id)
                util.log_ranking(all_items=all_items,
                                 ranking=ranking,
                                 path=dest,
                                 score_type_list=model.get_score_type_list(),
                                 test_id_list=test_hotel_id_list)
                #odd???
                if method not in respective_method_measures_dict:
                    temp = {}
                    for measure_type in share.MEASURE_TYPE_LIST:
                        temp[measure_type] = [
                            .0
                        ] * share.MEASURE_TYPE_MEASURE_DICT[
                            measure_type].shape[0]
                    for label_type in share.LABEL_TYPE_LIST:
                        temp[label_type] = []
                    respective_method_measures_dict[method] = temp

                ips = ip(ranking, test_hotel_id_list)
                for i in range(0,
                               share.MEASURE_TYPE_MEASURE_DICT['iP'].shape[0]):
                    #enumerate???
                    respective_method_measures_dict[method]['iP'][i] += ips[i]
                respective_method_measures_dict[method]['MAiP'][0] += ips[11]
                for i in range(
                        0, share.MEASURE_TYPE_MEASURE_DICT['nDCG'].shape[0]):
                    respective_method_measures_dict[method]['nDCG'][
                        i] += n_dcg(ranking, 5 * (i + 1), test_hotel_id_list)
                for i in range(0,
                               share.MEASURE_TYPE_MEASURE_DICT['P'].shape[0]):
                    respective_method_measures_dict[method]['P'][
                        i] += precision(ranking, 5 * (i + 1),
                                        test_hotel_id_list)

                for i, label_type in enumerate(share.LABEL_TYPE_LIST):
                    respective_method_measures_dict[method][label_type].extend(
                        adhoc_task(ranking, 10 * (i + 1), test_hotel_id_list))

        for method, respective_measures in respective_method_measures_dict.items(
        ):
            file_name = util.get_result_path(dir_name=share.RESULT_TOP + '/' +
                                             model.get_dir_name(),
                                             method=method,
                                             user_id=user_id)
            util.init_file(file_name)
            with open(file_name, 'wt') as fout:
                header = 'file,user'
                line = file_name + ',user' + str(user_id)
                for measure_type in share.MEASURE_TYPE_LIST:
                    for item, measure in enumerate(
                            share.MEASURE_TYPE_MEASURE_DICT[measure_type]):
                        header += ',' + measure
                        line += ',' + str(
                            respective_measures[measure_type][item] /
                            share.TRAIN_SIZE)
                header += '\n'
                line += '\n'
                fout.write(header + line)

            for label_type in share.LABEL_TYPE_LIST:
                label_file_name = util.get_result_path(
                    dir_name=share.LABEL_TOP + '/' + label_type + '/' +
                    model.get_dir_name(),
                    method=method,
                    user_id=user_id)
                adhoc_testing_task(label_file_name,
                                   respective_measures[label_type])
Beispiel #15
0
def adhoc_testing_task(file_name: str, labels: List[bool]):
    num_label = ['1' if lbl else '0' for lbl in labels]
    util.init_file(file_name)
    with open(file_name, 'wt') as fout:
        fout.write(",".join(num_label))
        fout.write('\n')  #org