コード例 #1
0
    user_miss_loc = {}

    with open('raw/checkins_missing.txt', 'r') as f:
        for line in f:
            user, checkins = line.rstrip('\n').split(':')

            checkins = checkins.split(',')
            checkins = [(int(checkins[i]), checkins[i + 1])
                        for i in range(0, len(checkins), 2)]
            # checkins = [el for el in checkins if el[1] == '?' or loc[el[1]]['country'] == 'US']

            for i, checkin in enumerate(checkins):
                if checkin[1] != '?':
                    continue

                if user not in user_miss_loc:
                    user_miss_loc[user] = []

                if i != 0 and checkins[i - 1][1] != '?':
                    user_miss_loc[user].append(
                        (loc[checkins[i - 1][1]]['lat'],
                         loc[checkins[i - 1][1]]['lon']))

                if i != len(checkins) - 1 and checkins[i + 1][1] != '?':
                    user_miss_loc[user].append(
                        (loc[checkins[i + 1][1]]['lat'],
                         loc[checkins[i + 1][1]]['lon']))

    save_pkl('tmp/user_miss_loc.pkl', user_miss_loc)
コード例 #2
0
ファイル: tag2class.py プロジェクト: st9007a/LocationTracker
#!/usr/bin/env python3
from utils.io import read_pkl, save_pkl

if __name__ == '__main__':
    in_file = 'manual/reduce.txt'

    tag2class = {}
    target = ''

    with open(in_file, 'r') as f:
        for line in f:
            if line[0] != ' ':
                target = line.rstrip('\n').split(':')[1]
                continue

            tag2class[line[2:-1]] = target

    print(len(tag2class))
    save_pkl('tmp/tag2class.pkl', tag2class)
コード例 #3
0
from utils.io import read_pkl, save_pkl

if __name__ == '__main__':

    user_checkins = read_pkl('tmp/user_checkins.pkl')
    loc_db = read_pkl('tmp/location.pkl')
    nodes = read_pkl('tmp/nodes.pkl')
    node_features = read_pkl('tmp/features.pkl')

    for i, node in enumerate(nodes):
        if node[-1] != '?':
            continue
        if np.sum(node_features[i][24:]) > 0:
            continue

        user = node[:-2]
        group_features = np.zeros((6, 1))

        for checkin in user_checkins[user]:
            if checkin in loc_db:
                g = loc_db[checkin]['group']
                group_features[g][0] += 1

        group_features = normalize(group_features, axis=0)

        for j in range(6):
            node_features[i][j + 24] = group_features[j]

    save_pkl('tmp/features.pkl', node_features)
コード例 #4
0
            checkins = checkins.split(',')
            checkins = [(int(checkins[i]), checkins[i + 1])for i in range(0, len(checkins), 2)]
            checkins = [el for el in checkins if el[1] != '?']
            # checkins = [el for el in checkins if el[1] != '?' and loc_db[el[1]]['country'] == 'US']

            for checkin in checkins:
                if checkin[1] not in loc_in_checkins:
                    loc_in_checkins[checkin[1]] = [loc_db[checkin[1]]['lat'], loc_db[checkin[1]]['lon']]

    candidate = read_pkl('tmp/candidate.pkl')

    for cand in candidate:
        if cand not in loc_in_checkins:
            loc_in_checkins[cand] = [loc_db[cand]['lat'], loc_db[cand]['lon']]

    loc_id_coord = [(k, loc_in_checkins[k]) for k in loc_in_checkins]
    loc_coord = [el[1] for el in loc_id_coord]

    loc_coord = np.array(loc_coord)

    cluster = KMeans(n_clusters=6)
    cluster.fit(loc_coord)

    for i in range(len(loc_coord)):
        loc_name = loc_id_coord[i][0]
        group_id = cluster.labels_[i]
        loc_db[loc_name]['group'] = group_id

    save_pkl('tmp/location.pkl', loc_db)
コード例 #5
0
ファイル: label.py プロジェクト: st9007a/LocationTracker
            continue

        t = loc_db[node]['tag']

        if t not in [el[0] for el in keep_tags]:
            continue

        c = loc_db[node]['class']

        if c not in keep_classes:
            keep_classes[c] = 0
        keep_classes[c] += 1

    categorical = list(keep_classes)
    print('Num of classes:', len(categorical))

    train_mask = []
    labels = np.zeros((len(nodes), len(categorical)))
    for i, node in enumerate(nodes):
        if node[-1] == '?':
            continue

        c = loc_db[node]['class']
        if c in categorical:
            train_mask.append(i)
            labels[i][categorical.index(c)] = 1

    save_pkl('tmp/categorical.pkl', categorical)
    save_pkl('tmp/train_mask.pkl', train_mask)
    save_pkl('tmp/labels.pkl', labels)
コード例 #6
0
    out_dir = 'tmp'

    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)

    loc_db = {}
    # tag2class = {}
    # ptr = -1
    #
    # with open(in_file_1, 'r') as f:
    #     for line in f:
    #         if line[0] != ' ':
    #             ptr += 1
    #             continue
    #
    #         tag2class[word_normalize(line)] = ptr
    # print(tag2class)

    with open(in_file, 'r') as f:

        for line in f:
            data = line.rstrip('\n').split('\t')
            loc_db[data[0]] = {
                'lat': float(data[1]),
                'lon': float(data[2]),
                'tag': word_normalize(data[3]),
                'country': data[4],
            }

    save_pkl('%s/location.pkl' % out_dir, loc_db)
コード例 #7
0
ファイル: setclass.py プロジェクト: st9007a/LocationTracker
    candidate = {}

    with open('raw/candidate_100_places.txt', 'r') as f:
        lines = f.readlines()
        lines = [el.rstrip('\n') for el in lines]

    for place in lines:
        candidate[place] = loc_db[place]
        tag = candidate[place]['tag']
        label = tag if tag not in tag2class else tag2class[tag]
        loc_db[place]['class'] = label

    with open('raw/checkins_missing.txt', 'r') as f:
        for line in f:
            user, checkins = line.rstrip('\n').split(':')

            checkins = checkins.split(',')
            checkins = [(int(checkins[i]), checkins[i + 1])for i in range(0, len(checkins), 2)]

            for checkin in checkins:
                if checkin[1] == '?':
                # if checkin[1] == '?' or loc_db[checkin[1]]['country'] != 'US':
                    continue

                tag = loc_db[checkin[1]]['tag']
                label = tag if tag not in tag2class else tag2class[tag]
                loc_db[checkin[1]]['class'] = label

    save_pkl('tmp/candidate.pkl', candidate)
    save_pkl('tmp/location.pkl', loc_db)
コード例 #8
0
#!/usr/bin/env python3
import numpy as np

from utils.io import read_pkl, save_pkl

if __name__ == '__main__':

    loc_db = read_pkl('tmp/location.pkl')
    user_checkins = {}

    c = 0
    with open('raw/checkins_missing.txt', 'r') as f:
        for line in f:
            user, checkins = line.rstrip('\n').split(':')

            checkins = checkins.split(',')
            checkins = [(int(checkins[i]), checkins[i + 1])
                        for i in range(0, len(checkins), 2)]
            checkins = [el for el in checkins if el[1] in loc_db]
            # checkins = [el for el in checkins if el[1] in loc_db and loc_db[el[1]]['country'] == 'US']

            if user not in user_checkins:
                user_checkins[user] = set()

            for checkin in checkins:
                user_checkins[user].add(checkin[1])

    save_pkl('tmp/user_checkins.pkl', user_checkins)