def map_compute_distance(args): if len(args) == 4: dist_matrix, dicts, mode, ctgy_chkin_dict = args queue = None else: dist_matrix, queue, dicts, mode, ctgy_chkin_dict = args inner_dist = list() outer_dist = list() for i, dist in enumerate(dist_matrix): if mode == 'sub': same_ctgy_chkins = ctgy_chkin_dict[dicts.chkin_ctgy_dict[ dicts.reverse_dictionary[i]]] else: same_ctgy_chkins = ctgy_chkin_dict[ dicts.ctgy_mapping['subctgy_ctgy_dict'][correct_errata( dicts.chkin_ctgy_dict[dicts.reverse_dictionary[i]])]] ids = [ dicts.dictionary[key] for key in same_ctgy_chkins if key in dicts.dictionary.keys() ] same_ctgy_mask = np.ones(dist_matrix.shape[1]).astype(int) same_ctgy_mask[ids] = 0 diff_ctgy_mask = np.logical_not(same_ctgy_mask) same_dists = ma.masked_array(dist, mask=same_ctgy_mask) diff_dists = ma.masked_array(dist, mask=diff_ctgy_mask) inner_dist.append(np.mean(same_dists)) outer_dist.append(np.mean(diff_dists)) if not queue is None: queue.put((inner_dist, outer_dist)) else: return inner_dist, outer_dist
def get_labels(dicts): subctgys = sorted(dicts.ctgy_chkin_dict.keys()) subctgy_dictionary = {k:i for i, k in enumerate(subctgys)} sublabels = [subctgy_dictionary[dicts.chkin_ctgy_dict[dicts.reverse_dictionary[i]]] for i in range(dicts.vocabulary_size-1)] rootctgys = sorted(dicts.rootctgy_chkin_dict.keys()) rootctgy_dictionary = {k:i for i, k in enumerate(rootctgys)} rootlabels = [rootctgy_dictionary[dicts.ctgy_mapping['subctgy_ctgy_dict'][correct_errata(dicts.chkin_ctgy_dict[dicts.reverse_dictionary[i]])]] for i in range(dicts.vocabulary_size-1)] return {'sub':sublabels, 'root':rootlabels}
def get_same_ctgy_chkins(i, dicts, mode): if mode == 'root': ctgy_chkin_dict = dicts.rootctgy_chkin_dict same_ctgy_chkins = ctgy_chkin_dict[dicts.ctgy_mapping['subctgy_ctgy_dict'][ correct_errata(dicts.chkin_ctgy_dict[dicts.reverse_dictionary[i]]) ]] else: ctgy_chkin_dict = dicts.ctgy_chkin_dict same_ctgy_chkins = ctgy_chkin_dict[dicts.chkin_ctgy_dict[dicts.reverse_dictionary[i]]] return same_ctgy_chkins
def get_ctgy_histogram_dict(traj_list, dicts): timebins = list(range(24)) substats = {k:np.zeros_like(timebins) for k in dicts.ctgy_chkin_dict} rootstats = {k:np.zeros_like(timebins) for k in dicts.ctgy_mapping['ctgy_subctgy_dict']} for seq in traj_list: for p in seq: poi = p[0] if dicts.reverse_dictionary[poi] == 'UNK': continue sctgy = dicts.chkin_ctgy_dict[dicts.reverse_dictionary[poi]] rctgy = dicts.ctgy_mapping['subctgy_ctgy_dict'][correct_errata(sctgy)] visit_time = p[-1].hour substats[sctgy][visit_time] += 1 rootstats[rctgy][visit_time] += 1 return substats, rootstats
def get_rootctgy_chkin(ctgy_chkin_dict, subctgy_ctgy_dict): newdict = defaultdict(list) for key in ctgy_chkin_dict: newdict[subctgy_ctgy_dict[correct_errata(key)]].extend( ctgy_chkin_dict[key]) return newdict
def id2rootctgy(dicts, ctgy_mapping, idx): return ctgy_mapping['subctgy_ctgy_dict'][correct_errata(dicts.chkin_ctgy_dict[dicts.reverse_dictionary[idx]])]