Python save_var Beispiele, persistent.save_var Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: experiments.py Projekt: daureg/magnet

def run_ring_experiment(size, nb_rings, ring_size_ratio=1.0, shared_sign=True,
                        rigged=False, n_rep=100, shared_edges=None,
                        pivot_strategy=densify.PivotStrategy.uniform,
                        triangle_strategy=TriangleStatus.closeable,
                        one_at_a_time=True,
                        pool=None):
    args = repeat({"size": size, "nb_rings": nb_rings, "ring_size_ratio":
                   ring_size_ratio, "shared_sign": shared_sign, "rigged":
                   rigged, "shared_edges": shared_edges,
                   "pivot_strategy": pivot_strategy, "triangle_strategy":
                   triangle_strategy, "one_at_a_time": one_at_a_time}, n_rep)
    if pool:
        runs = list(pool.imap_unordered(process_rings, args,
                                        chunksize=n_rep//NUM_THREADS))
    else:
        runs = list(map(process_rings, args))
    res = {'time': list(map(itemgetter(0), runs)),
           'nb_error': list(map(itemgetter(2), runs))}
    suffix = 'pos' if shared_sign else 'neg'
    suffix += '_rigged' if rigged else ''
    suffix += '_' + str(n_rep)
    heuristic = strategy_to_str(pivot_strategy, triangle_strategy,
                                one_at_a_time)
    exp_name = 'square_{:04d}_{:02d}_{:.1f}_{}_{}_{}.my'
    p.save_var(exp_name.format(size, nb_rings, ring_size_ratio, suffix,
                               heuristic, int(time.time())), res)

Beispiel #2

0

Datei anzeigen

Datei: neighborhood.py Projekt: wsgan001/illalla

def interpolate_distances(values_map, filename):
    """Plot the distance at every circle center and interpolate between"""
    from scipy.interpolate import griddata
    from matplotlib import pyplot as plt
    import persistent as p
    filename = os.path.join('distance_map', filename)
    x, y, z = [np.array(dim) for dim in zip(*[a for a in values_map])]
    x_ext = [x.min(), x.max()]
    y_ext = [y.min(), y.max()]
    xi = np.linspace(x_ext[0], x_ext[1], 100)
    yi = np.linspace(y_ext[0], y_ext[1], 100)
    zi = griddata((x, y), z, (xi[None, :], yi[:, None]), method='cubic')
    fig = plt.figure(figsize=(22, 18))
    plt.contour(xi, yi, zi, 20, linewidths=0.8, colors='#282828')
    plt.contourf(xi, yi, zi, 20, cmap=plt.cm.Greens)
    plt.colorbar()
    plt.scatter(x, y, marker='o', c='#282828', s=5)
    plt.tight_layout(pad=0)
    plt.xlim(*x_ext)
    plt.ylim(*y_ext)
    plt.savefig(filename,
                dpi=96,
                transparent=False,
                frameon=False,
                bbox_inches='tight',
                pad_inches=0.01)
    p.save_var(filename.replace('.png', '.my'), values_map)
    plt.close(fig)

Beispiel #3

0

Datei anzeigen

Datei: msst_heuristic.py Projekt: daureg/magnet

def augmented_ancestor(tree_adj, X):
    tree_root = max(((node, len(adj)) for node, adj in tree_adj.items()),
                    key=lambda x: x[1])[0]
    prt = gs.ancestor_info(tree_adj, tree_root)
    if len(prt) != len(tree_adj):
        persistent.save_var('bug_parent.my', (prt, tree_adj))
    assert len(prt) == len(tree_adj), set(tree_adj.keys()) - set(prt.keys())
    leaves = {u for u, adj in tree_adj.items() if len(adj) == 1}
    infos = {u: (prt[u], 1, int(u in X)) for u in leaves}
    possible_inclusion = defaultdict(int)
    for _ in infos.values():
        if _[0] is not None:
            possible_inclusion[_[0]] += 1

    def ready(u, vote):
        threshold = len(tree_adj[u]) - 1
        if prt[u] is None:
            threshold += 1
        return vote == threshold
    border = {u for u, vote in possible_inclusion.items() if ready(u, vote)}
    while border:
        for u in border:
            children = {v for v in tree_adj[u] if v in infos}
            subtree_size, num_in_x, parent = 1, int(u in X), prt[u]
            for v in children:
                subtree_size += infos[v][1]
                num_in_x += infos[v][2]
            infos[u] = (parent, subtree_size, num_in_x)
            del possible_inclusion[u]
            if parent is not None:
                possible_inclusion[parent] += 1
        border = {u for u, vote in possible_inclusion.items()
                  if ready(u, vote)}
    return infos, {u: v[0] for u, v in infos.items()}

Beispiel #4

0

Datei anzeigen

def global_info(city, standalone=False):
    """Gather global statistics about `city`."""
    lvenues = geo_project(city, DB.venue.find({'city': city}, {'loc': 1}))
    lcheckins = geo_project(city, DB.checkin.find({'city': city}, {'loc': 1}))
    lphotos = geo_project(city, CLIENT.world.photos.find({'hint': city},
                                                         {'loc': 1}))
    local_projection = [lvenues, lcheckins, lphotos]
    visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    visitors = xp.get_visitors(CLIENT, city)
    density = estimate_density(city)
    activity = [visits, visitors, density]
    global TOP_CATS
    TOP_CATS = p.load_var('top_cats.my')
    infos = {'venue': [] if standalone else ['cat', 'cats'],
             'photo': ['taken'] if standalone else ['venue']}
    svenues = s.Surrounding(DB.venue, {'city': city}, infos['venue'], lvenues)
    scheckins = s.Surrounding(DB.checkin, {'city': city}, ['time'], lcheckins)
    sphotos = s.Surrounding(CLIENT.world.photos, {'hint': city},
                            infos['photo'], lphotos)
    surroundings = [svenues, scheckins, sphotos]
    p.save_var('{}_s{}s.my'.format(city, 'venue'), svenues)
    if standalone:
        for name, var in zip(['venue', 'checkin', 'photo'], surroundings):
            p.save_var('{}_s{}s.my'.format(city, name), var)
    return local_projection + activity + surroundings

Beispiel #5

0

Datei anzeigen

Datei: more_query.py Projekt: daureg/illalla

def users_and_tag(tag):
    r = DB.photos.aggregate([
        {"$match": {"hint": "sf", "ntags": tag}},
        {"$project": {"uid": 1}},
        {"$group": {"_id": "$uid", "count": {"$sum": 1}}},
        {"$sort": SON([("count", -1), ("_id", -1)])}
    ])
    save_var('u14', r['result'])

Beispiel #6

0

Datei anzeigen

def increase_coverage(upto=5000):
    """Save `upto` unprocessed San Francisco tags"""
    from more_query import get_top_tags
    sup = persistent.load_var('supported')
    more = get_top_tags(upto, 'nsf_tag.dat')
    already = [v[0] for v in sup]
    addition = set(more).difference(set(already))
    persistent.save_var('addition', addition)

Beispiel #7

0

Datei anzeigen

Datei: synthetic_exp.py Projekt: daureg/magnet

def run_experiment(pool, process_function, savefile, process_args, n_rep):
    if pool:
        runs = list(pool.imap_unordered(process_function, process_args,
                                        chunksize=n_rep//NUM_THREADS))
    else:
        runs = [process_communities(_) for _ in process_args]
    res = {'time': list(map(itemgetter(0), runs)),
           'delta': list(map(itemgetter(1), runs)),
           'nb_error': list(map(itemgetter(2), runs))}
    p.save_var(savefile, res)

Beispiel #8

0

Datei anzeigen

Datei: convert_experiment.py Projekt: daureg/magnet

def run_circle_experiment(size, one_at_a_time, rigged=False, n_rep=100,
                          pivot=redensify.PivotSelection.Uniform, pool=None):
    args = repeat({"circle_size": size, "rigged": rigged, "pivot": pivot,
                   "one_at_a_time": one_at_a_time}, n_rep)

    if pool:
        runs = list(pool.imap_unordered(process_circle, args,
                                        chunksize=n_rep//NUM_THREADS))
    else:
        runs = list(map(process_circle, args))
    res = {'time': list(map(itemgetter(0), runs)),
           'nb_error': list(map(itemgetter(2), runs))}
    p.save_var(savefile_name('circle', [size, 0], pivot, one_at_a_time), res)

Beispiel #9

0

Datei anzeigen

Datei: neighborhood.py Projekt: daureg/illalla

def brute_search(city_desc, hsize, distance_function, threshold,
                 metric='jsd'):
    """Move a sliding circle over the whole city and keep track of the best
    result."""
    global SURROUNDINGS, CITY_FEATURES, THRESHOLD, RADIUS
    global METRIC_NAME, CITY_SUPPORT, DISTANCE_FUNCTION
    import multiprocessing
    RADIUS = hsize
    THRESHOLD = threshold
    METRIC_NAME = metric
    city_size, CITY_SUPPORT, CITY_FEATURES, city_infos = city_desc
    SURROUNDINGS, bounds = city_infos
    DISTANCE_FUNCTION = distance_function
    minx, miny, maxx, maxy = bounds
    nb_x_step = int(3*np.floor(city_size[0]) / hsize + 1)
    nb_y_step = int(3*np.floor(city_size[1]) / hsize + 1)
    best = [1e20, [], [], RADIUS]
    res_map = []
    pool = multiprocessing.Pool(4)

    x_steps = np.linspace(minx+hsize, maxx-hsize, nb_x_step)
    y_steps = np.linspace(miny+hsize, maxy-hsize, nb_y_step)
    x_vals, y_vals = np.meshgrid(x_steps, y_steps)
    to_cell_arg = lambda _: (float(_[1][0]), float(_[1][1]), _[0] % nb_x_step,
                             _[0]/nb_x_step, _[0])
    cells = i.imap(to_cell_arg, enumerate(i.izip(np.nditer(x_vals),
                                                 np.nditer(y_vals))))
    res = pool.map(one_cell, cells)
    pool.close()
    pool.join()
    res_map = []
    if metric == 'leftover':
        dsts = emd_leftover.collect_matlab_output(len(res))
        for cell, dst in i.izip(res, dsts):
            if cell[0]:
                cell[2] = dst
        clean_tmp_mats()
    for cell in res:
        if cell[0] is None:
            continue
        res_map.append(cell[:3])
        if cell[2] < best[0]:
            best = [cell[2], cell[3], [cell[0], cell[1]], RADIUS]

    if QUERY_NAME:
        import persistent as p
        logging.info('wrote: '+str(os.path.join(OTMPDIR, QUERY_NAME)))
        p.save_var(os.path.join(OTMPDIR, QUERY_NAME),
                   [[cell[2], cell[3], [cell[0], cell[1]], RADIUS]
                    for cell in res if cell[0]])
    yield best, res_map, 1.0

Beispiel #10

0

Datei anzeigen

Datei: neighborhood.py Projekt: wsgan001/illalla

def brute_search(city_desc, hsize, distance_function, threshold, metric='jsd'):
    """Move a sliding circle over the whole city and keep track of the best
    result."""
    global SURROUNDINGS, CITY_FEATURES, THRESHOLD, RADIUS
    global METRIC_NAME, CITY_SUPPORT, DISTANCE_FUNCTION
    import multiprocessing
    RADIUS = hsize
    THRESHOLD = threshold
    METRIC_NAME = metric
    city_size, CITY_SUPPORT, CITY_FEATURES, city_infos = city_desc
    SURROUNDINGS, bounds = city_infos
    DISTANCE_FUNCTION = distance_function
    minx, miny, maxx, maxy = bounds
    nb_x_step = int(3 * np.floor(city_size[0]) / hsize + 1)
    nb_y_step = int(3 * np.floor(city_size[1]) / hsize + 1)
    best = [1e20, [], [], RADIUS]
    res_map = []
    pool = multiprocessing.Pool(4)

    x_steps = np.linspace(minx + hsize, maxx - hsize, nb_x_step)
    y_steps = np.linspace(miny + hsize, maxy - hsize, nb_y_step)
    x_vals, y_vals = np.meshgrid(x_steps, y_steps)
    to_cell_arg = lambda _: (float(_[1][0]), float(_[1][1]), _[0] % nb_x_step,
                             _[0] / nb_x_step, _[0])
    cells = i.imap(to_cell_arg,
                   enumerate(i.izip(np.nditer(x_vals), np.nditer(y_vals))))
    res = pool.map(one_cell, cells)
    pool.close()
    pool.join()
    res_map = []
    if metric == 'leftover':
        dsts = emd_leftover.collect_matlab_output(len(res))
        for cell, dst in i.izip(res, dsts):
            if cell[0]:
                cell[2] = dst
        clean_tmp_mats()
    for cell in res:
        if cell[0] is None:
            continue
        res_map.append(cell[:3])
        if cell[2] < best[0]:
            best = [cell[2], cell[3], [cell[0], cell[1]], RADIUS]

    if QUERY_NAME:
        import persistent as p
        logging.info('wrote: ' + str(os.path.join(OTMPDIR, QUERY_NAME)))
        p.save_var(os.path.join(OTMPDIR, QUERY_NAME),
                   [[cell[2], cell[3], [cell[0], cell[1]], RADIUS]
                    for cell in res if cell[0]])
    yield best, res_map, 1.0

Beispiel #11

0

Datei anzeigen

Datei: more_query.py Projekt: wsgan001/illalla

def get_user_status(with_count=False):
    name = 'user_status' + ('_full' if with_count else '')
    fields = {'tourist': 1}
    if with_count:
        fields.update({'count': 1})
    try:
        d = load_var(name)
    except IOError:
        users = list(DB.users.find(fields=fields))
        if with_count:
            d = dict([(u['_id'], (u['count'], u['tourist'])) for u in users])
        else:
            d = dict([(u['_id'], u['tourist']) for u in users])
        save_var(name, d)
    return d

Beispiel #12

0

Datei anzeigen

Datei: convert_experiment.py Projekt: daureg/magnet

def run_planted_experiment(ball_size, nb_balls, one_at_a_time=True, n_rep=100,
                           pivot=redensify.PivotSelection.Uniform, pool=None):
    args = repeat({"ball_size": ball_size, "nb_balls": nb_balls,
                   "pivot": pivot, "one_at_a_time": one_at_a_time}, n_rep)

    if pool:
        runs = list(pool.imap_unordered(process_planted, args,
                                        chunksize=n_rep//NUM_THREADS))
    else:
        runs = list(map(process_planted, args))
    res = {'time': list(map(itemgetter(0), runs)),
           'delta': list(map(itemgetter(1), runs)),
           'nb_error': list(map(itemgetter(2), runs))}
    p.save_var(savefile_name('planted', [ball_size, nb_balls], pivot,
                             one_at_a_time), res)

Beispiel #13

0

Datei anzeigen

Datei: more_query.py Projekt: daureg/illalla

def get_user_status(with_count=False):
    name = 'user_status' + ('_full' if with_count else '')
    fields = {'tourist': 1}
    if with_count:
        fields.update({'count': 1})
    try:
        d = load_var(name)
    except IOError:
        users = list(DB.users.find(fields=fields))
        if with_count:
            d = dict([(u['_id'], (u['count'], u['tourist'])) for u in users])
        else:
            d = dict([(u['_id'], u['tourist']) for u in users])
        save_var(name, d)
    return d

Beispiel #14

0

Datei anzeigen

Datei: convert_experiment.py Projekt: daureg/magnet

def run_rings_experiment(size, nb_rings, shared_sign, rigged, one_at_a_time,
                         pivot=redensify.PivotSelection.Uniform, n_rep=100,
                         pool=None):
    args = repeat({"size": size, "nb_rings": nb_rings, "rigged": rigged,
                   "shared_sign": shared_sign, "pivot": pivot,
                   "one_at_a_time": one_at_a_time}, n_rep)

    if pool:
        runs = list(pool.imap_unordered(process_rings, args,
                                        chunksize=n_rep//NUM_THREADS))
    else:
        runs = [process_rings(_) for _ in args]
    res = {'time': list(map(itemgetter(0), runs)),
           'nb_error': list(map(itemgetter(2), runs))}
    p.save_var(savefile_name('rings', [size, nb_rings], pivot, one_at_a_time),
               res)

Beispiel #15

0

Datei anzeigen

Datei: clean_timeline.py Projekt: daureg/illalla

def load_existing_ids(cmd_args):
    """Read checkins ids in city from disk or DB."""
    city = cmd_args.city
    if city == 'whole':
        return []
    import persistent as p
    try:
        return p.load_var(city+'_checkins_ids.my')
    except IOError:
        pass
    import CommonMongo as cm
    db = cm.connect_to_db('foursquare', cmd_args.host, cmd_args.port)[0]
    ids = {str(_['_id']) for _ in db.checkin.find({'city': city}, {'_id': 1})
           if not isinstance(_['_id'], long)}
    p.save_var(city+'_checkins_ids.my', ids)
    return ids

Beispiel #16

0

Datei anzeigen

def get_categories(client=None):
    """Return categories list from disk or from Foursquare website using
    client"""
    if client is None:
        raw_cats = p.load_var('raw_categories')['categories']
    else:
        raw_cats = client.venues.categories()
        p.save_var('raw_categories', raw_cats)
        raw_cats = raw_cats['categories']
    cats = Category('1', 'Venue', 0, parse_categories(raw_cats))
    # pylint: disable=E1101
    id_index = [(id_, idx + 100)
                for idx, id_ in enumerate(sorted(CAT_TO_ID.values()))
                if id_ not in ['0', '1']]
    ID_TO_INDEX.update(id_index)
    return cats

Beispiel #17

0

Datei anzeigen

Datei: FSCategories.py Projekt: daureg/illalla

def get_categories(client=None):
    """Return categories list from disk or from Foursquare website using
    client"""
    if client is None:
        raw_cats = p.load_var('raw_categories')['categories']
    else:
        raw_cats = client.venues.categories()
        p.save_var('raw_categories', raw_cats)
        raw_cats = raw_cats['categories']
    cats = Category('1', 'Venue', 0, parse_categories(raw_cats))
    # pylint: disable=E1101
    id_index = [(id_, idx + 100)
                for idx, id_ in enumerate(sorted(CAT_TO_ID.values()))
                if id_ not in ['0', '1']]
    ID_TO_INDEX.update(id_index)
    return cats

Beispiel #18

0

Datei anzeigen

Datei: ServeNN.py Projekt: daureg/illalla

def seed_region():
    geo = f.json.loads(f.request.form['geo'])
    fields = ['metric', 'candidate', 'clustering']
    metric, candidate, clustering = [str(f.request.form[field])
                                     for field in fields]
    msg = 'From {}@{} to {} using {}, {}, {}'
    neighborhood = KNOWN_GEO.get(hash(str(geo)), 'custom')
    args = [ORIGIN['city'], neighborhood, DEST['city'], candidate,
            metric if candidate == 'dst' else 'NA', clustering]
    msg = msg.format(*args)
    print(msg)
    logging.warn(msg)
    res, log = nb.one_method_seed_regions(ORIGIN['city'], DEST['city'], geo,
                                          metric, candidate, clustering)
    res = dict(r=res, info=log)
    p.save_var('candidates/{}_{}_{}_{}_{}.my'.format(*args[1:]), res)
    return f.jsonify(res)

Beispiel #19

0

Datei anzeigen

Datei: extract_dataset.py Projekt: wsgan001/illalla

def get_data(DB):
    entropies = load_var('Hsupported')
    tags = sorted([k for k, v in entropies.items() if 2.5 <= v <= 3.01])
    save_var('mat_tag', tags)
    u = load_var('user_status')
    user_index = {k: i for i, k in enumerate(u)}

    def format_photo(p):
        user = user_index[p['uid']]
        loc = p['loc']['coordinates']
        taken = [p['taken'].weekday(), p['taken'].hour,
                 calendar.timegm(p['taken'].utctimetuple())]
        indicator = [int(t in p['ntags']) for t in tags]
        return [user] + loc + taken + indicator

    photos_feature = np.mat(tag_time(DB, tags, format_photo))
    sio.savemat('deep', {'A': scipy.sparse.csr_matrix(photos_feature)})

Beispiel #20

0

Datei anzeigen

Datei: clean_timeline.py Projekt: wsgan001/illalla

def load_existing_ids(cmd_args):
    """Read checkins ids in city from disk or DB."""
    city = cmd_args.city
    if city == 'whole':
        return []
    import persistent as p
    try:
        return p.load_var(city + '_checkins_ids.my')
    except IOError:
        pass
    import CommonMongo as cm
    db = cm.connect_to_db('foursquare', cmd_args.host, cmd_args.port)[0]
    ids = {
        str(_['_id'])
        for _ in db.checkin.find({'city': city}, {'_id': 1})
        if not isinstance(_['_id'], long)
    }
    p.save_var(city + '_checkins_ids.my', ids)
    return ids

Beispiel #21

0

Datei anzeigen

Datei: TaskChooser.py Projekt: daureg/ajoissa

def read_and_insert():
    db = db_filepath('laptop')
    end = datetime.utcnow()
    with sqlite3.connect(db) as conn:
        tasks = {id_: name.lower() for id_, name in
                 get_project_list(conn).items()}
        next_task = parse_task([x.lower() for x in tasks.values()], sys.argv)
        task = make_task(next_task, tasks)
        print(task)
        pending_file = mk_path(get_data_saving_path('tracker'), '_tsk')
        if task is None:
            insert_pending_task(conn, end, pending_file)
            return
        pending = task['end'] is None
        if pending:
            insert_pending_task(conn, end, pending_file)
            save_var(pending_file, task)
        else:
            insert_task(conn, task)

Beispiel #22

0

Datei anzeigen

Datei: ServeNN.py Projekt: morgz/illalla

def seed_region():
    geo = f.json.loads(f.request.form['geo'])
    fields = ['metric', 'candidate', 'clustering']
    metric, candidate, clustering = [
        str(f.request.form[field]) for field in fields
    ]
    msg = 'From {}@{} to {} using {}, {}, {}'
    neighborhood = KNOWN_GEO.get(hash(str(geo)), 'custom')
    args = [
        ORIGIN['city'], neighborhood, DEST['city'], candidate,
        metric if candidate == 'dst' else 'NA', clustering
    ]
    msg = msg.format(*args)
    print(msg)
    logging.warn(msg)
    res, log = nb.one_method_seed_regions(ORIGIN['city'], DEST['city'], geo,
                                          metric, candidate, clustering)
    res = dict(r=res, info=log)
    p.save_var('candidates/{}_{}_{}_{}_{}.my'.format(*args[1:]), res)
    return f.jsonify(res)

Beispiel #23

0

Datei anzeigen

Datei: more_query.py Projekt: wsgan001/illalla

def users_and_tag(tag):
    r = DB.photos.aggregate([{
        "$match": {
            "hint": "sf",
            "ntags": tag
        }
    }, {
        "$project": {
            "uid": 1
        }
    }, {
        "$group": {
            "_id": "$uid",
            "count": {
                "$sum": 1
            }
        }
    }, {
        "$sort": SON([("count", -1), ("_id", -1)])
    }])
    save_var('u14', r['result'])

Beispiel #24

0

Datei anzeigen

Datei: experiments.py Projekt: daureg/magnet

def run_circle_experiment(size, rigged=False,
                          pivot_strategy=densify.PivotStrategy.uniform,
                          triangle_strategy=TriangleStatus.closeable,
                          one_at_a_time=True,
                          n_rep=100, pool=None):
    args = repeat({"circle_size": size, "rigged": rigged,
                   "shared_edges": False,
                   "pivot_strategy": pivot_strategy, "triangle_strategy":
                   triangle_strategy, "one_at_a_time": one_at_a_time}, n_rep)

    if pool:
        runs = list(pool.imap_unordered(process_circle, args,
                                        chunksize=n_rep//NUM_THREADS))
    else:
        runs = list(map(process_planted, args))
    res = {'time': list(map(itemgetter(0), runs)),
           'nb_error': list(map(itemgetter(2), runs))}
    heuristic = strategy_to_str(pivot_strategy, triangle_strategy,
                                one_at_a_time)
    p.save_var('circle_{:04d}_{}_{}.my'.format(size, heuristic,
                                               int(time.time())),
               res)

Beispiel #25

0

Datei anzeigen

Datei: neighborhood.py Projekt: daureg/illalla

def interpolate_distances(values_map, filename):
    """Plot the distance at every circle center and interpolate between"""
    from scipy.interpolate import griddata
    from matplotlib import pyplot as plt
    import persistent as p
    filename = os.path.join('distance_map', filename)
    x, y, z = [np.array(dim) for dim in zip(*[a for a in values_map])]
    x_ext = [x.min(), x.max()]
    y_ext = [y.min(), y.max()]
    xi = np.linspace(x_ext[0], x_ext[1], 100)
    yi = np.linspace(y_ext[0], y_ext[1], 100)
    zi = griddata((x, y), z, (xi[None, :], yi[:, None]), method='cubic')
    fig = plt.figure(figsize=(22, 18))
    plt.contour(xi, yi, zi, 20, linewidths=0.8, colors='#282828')
    plt.contourf(xi, yi, zi, 20, cmap=plt.cm.Greens)
    plt.colorbar()
    plt.scatter(x, y, marker='o', c='#282828', s=5)
    plt.tight_layout(pad=0)
    plt.xlim(*x_ext)
    plt.ylim(*y_ext)
    plt.savefig(filename, dpi=96, transparent=False, frameon=False,
                bbox_inches='tight', pad_inches=0.01)
    p.save_var(filename.replace('.png', '.my'), values_map)
    plt.close(fig)

Beispiel #26

0

Datei anzeigen

Datei: scratch.py Projekt: daureg/magnet

def process_graph(G, E, noise, outname, asym=False):
    root = max(G.items(), key=lambda x: len(x[1]))[0]
    if not outname.startswith('belgrade/'):
        outname = 'belgrade/' + outname
    basename = '{}_{}'.format(outname, hostname())
    suffix = '.asymres' if asym else '.myres'
    if os.path.isfile(basename+'_perf'+suffix):
        return
    bfs = gs.perturbed_bfs(G, root)
    gtx, _ = galaxy_maker(G, 50, short=True, output_name=outname)
    stretch = None

    binary_signs = {e: (1 if s else -1) for e, s in E.items()}
    perf = []
    for train_edges in [bfs, gtx]:
        if asym:
            perf.extend(run_asym(G, E, train_edges))
        else:
            tree = {}
            for u, v in train_edges:
                gs.add_edge(tree, u, v)
            tags = pot.dfs_tagging(tree, binary_signs, root)
            gold, pred = pot.make_pred(tree, tags, binary_signs)
            tp, tn, fp, fn = confusion_number(gold, pred)
            perf.extend([accuracy(tp, tn, fp, fn), f1_score(tp, tn, fp, fn),
                         mcc(tp, tn, fp, fn)])
    if asym:
        _, edges = pot.read_tree(outname+'_0.edges')
        perf.extend(run_asym(G, E, edges))
        perf.extend(run_asym(G, E, tree_edges=None))
    else:
        gold, pred, _ = pot.predict_edges(outname+'_0', all_signs=E,
                                          degrees={root: 5})
        tp, tn, fp, fn = confusion_number(gold, pred)
        perf.extend([accuracy(tp, tn, fp, fn), f1_score(tp, tn, fp, fn),
                     mcc(tp, tn, fp, fn)])

    if noise == 0 and not asym:
        print(basename)
        bfsst = average_strech(set(E.keys()), bfs)
        persistent.save_var(basename+'_bfsst'+suffix, bfsst)
        gtxst = average_strech(set(E.keys()), gtx)
        persistent.save_var(basename+'_gtxst'+suffix, gtxst)
        stretch = [bfsst, gtxst]

    persistent.save_var(basename+'_perf'+suffix, perf)
    return perf, stretch

Beispiel #27

0

Datei anzeigen

Datei: more_query.py Projekt: wsgan001/illalla

    # tmp = load_var('supported')
    # tags = [v[0] for v in tmp]
    # shuffle(tags)
    # tags = [None] + tags
    # sf_entropy(None)
    # p = Pool(4)
    # res = p.map(sf_entropy, tags)
    # p.close()
    # outplot('nentropies_{}.dat'.format(GRID_SIZE), ['H', 'tag'], [r[0] for r in res], tags)
    # outplot('nKentropies_{}.dat'.format(GRID_SIZE), ['D', 'tag'], [r[1] for r in res], tags)
    # top_metrics(tags)
    # te = [time_entropy(tag) for tag in tags]
    # t = prettytable.PrettyTable(['tag'] + PERIOD_NAME, sortby='day')
    # t.align['tag'] = 'l'
    # t.padding_width = 0
    # for row in te:
    #     t.add_row(row)
    # with codecs.open('time_entropy.txt', 'w', 'utf8') as f:
    #     f.write(t.get_string(border=False, left_padding_width=0,
    #                          right_padding_width=2))
    save_var(
        'helsinki',
        tag_location(photos,
                     None,
                     CITY_BBOX,
                     FIRST_TIME,
                     LAST_TIME,
                     extra_info=['taken']))
    t = 1000 * (clock() - start)
    print('done in {:.3f}ms'.format(t))

Beispiel #28

0

Datei anzeigen

Datei: safe_galaxy.py Projekt: daureg/magnet

    import real_world as rw
    import redensify
    parser = ae.get_parser('Compute a galaxy tree')
    args = parser.parse_args()
    a = ae.further_parsing(args)
    basename, seeds, synthetic_data, prefix, noise, balanced = a

    if synthetic_data:
        try:
            ae.load_raw(basename, redensify, args)
        except IOError:
            import graph_tool as gt
            g = gt.load_graph(basename+'.gt')
            cexp.to_python_graph(g)
    else:
        rw.read_original_graph(basename, seed=args.seed, balanced=balanced)
        redensify.G = deepcopy(rw.G)
        redensify.EDGES_SIGN = deepcopy(rw.EDGE_SIGN)

    suffixes = ('_bal' if args.balanced else '',
                '_short' if args.short else '',
                '_safe' if args.safe else '', args.seed)
    outname = 'lp10/{}{}{}{}_{}'.format(args.data.lower(), *suffixes)
    print(outname)
    res = meta_galaxy(redensify.G, redensify.EDGES_SIGN, 10, outname,
                      safe=args.safe, short=args.short)
    if args.safe:
        gold, pred, _ = res
        import persistent
        persistent.save_var(outname+'_res.my', (gold, pred))

Beispiel #29

0

Datei anzeigen

Datei: significance_test.py Projekt: daureg/illalla

    NTEST = 2000
    city, districts = sys.argv[1], []
    city_info = load_data(city)
    gold_list = city_info[-1]
    districts = sorted([nn for nn, gold in gold_list.iteritems()
                        if city in gold['gold']])
    try:
        os.mkdir('random')
    except OSError:
        pass
    for district in districts:
        savename = 'random/{}_{}.my'.format(city, district)
        print(savename)
        if os.path.isfile(savename):
            continue
        distrib, best_score, best_region = [], 0, None
        for i in range(NTEST):
            regions, score = mock_random_list(city, district, city_info)
            if score > best_score:
                best_score, best_region = score, regions
            distrib.append(score)
        p.save_var(savename, distrib)
        outjson = [{
            'pos': rank+1, 'metric': 'random', 'dst': -1, 'venues': r[1],
            'geo': mapping(Polygon(np.fliplr(c.euclidean_to_geo(city, r[0]))))}
            for rank, r in enumerate(best_region)]
        filename = 'static/random_{}_{}.json'.format(city, district)
        with open(filename, 'w') as f:
            json.dump(outjson, f, sort_keys=True, indent=2,
                      separators=(',', ': '))

Beispiel #30

0

Datei anzeigen

    filename = sys.argv[1]
    if len(sys.argv) > 2:
        skip = int(sys.argv[2])
    else:
        skip = 0
    csv.field_size_limit(sys.maxsize)
    allowed_cities = set(SHORT_KEY)
    with open(filename, 'rb') as f:
        reader = csv.DictReader(f, delimiter=';', quoting=csv.QUOTE_NONE)
        for i, row in enumerate(reader):
            # if i > 31000:
            #     break
            if i < skip:
                continue
            if row['lon,lat'] is None:
                print(row['vid'])
                continue
            venue = reformat(row)
            # if i == 29950:
            #     print(venue)
            #     break
            # if venue['_id'] == '4ac518c5f964a520c1a420e3':
            #     print(venue, venue['city'] in allowed_cities)
            if venue['city'] in allowed_cities:
                VENUE_LOC[venue['_id']] = (venue['loc'], venue['city'])
                TO_BE_INSERTED.append(venue)
            if len(TO_BE_INSERTED) == 400:
                mongo_insertion(TABLE)
    mongo_insertion(TABLE)
    persistent.save_var('venue_loc.my', VENUE_LOC)

Beispiel #31

0

Datei anzeigen

Datei: FillDB.py Projekt: daureg/illalla

    city = args.city
    chunker = Chunker.Chunker(foursquare.MAX_MULTI_REQUESTS)
    previous = [e['_id'] for e in TABLE.find({'city': city})]
    potential = gather_all_entities_id(checkins, DB_FIELD, city=city)
    print('but already {} {}s in DB.'.format(len(previous), ENTITY_KIND))
    import persistent as p
    region = city or 'world'
    invalid_filename = 'non_{}_id_{}'.format(ENTITY_KIND, region)
    try:
        INVALID_ID = p.load_var(invalid_filename)
    except IOError:
        pass
    print('and {} {}s are invalid.'.format(len(INVALID_ID), ENTITY_KIND))
    new_ones = set(potential).difference(set(previous))
    new_ones = new_ones.difference(set(INVALID_ID))
    outside = set([e['_id'] for e in TABLE.find({'city': None}, {'_id': 1})])
    outside.intersection_update(new_ones)
    print('and {} {}s are outside range.'.format(len(outside), ENTITY_KIND))
    new_ones = new_ones.difference(outside)
    print('So only {} new ones.'.format(len(new_ones)))
    for batch in chunker(new_ones):
        IDS_QUEUE.put(batch)
        total_entities += len(batch)

    IDS_QUEUE.join()
    ENTITIES_QUEUE.join()
    mongo_insertion()
    print('{}/{} invalid id'.format(len(INVALID_ID), total_entities))
    print('{}/{} requests'.format(CLIENT.rate_remaining, CLIENT.rate_limit))
    p.save_var(invalid_filename, INVALID_ID)

Beispiel #32

0

Datei anzeigen

Datei: extract_dataset.py Projekt: wsgan001/illalla

    tags = sorted([k for k, v in entropies.items() if 2.5 <= v <= 3.01])
    save_var('mat_tag', tags)
    u = load_var('user_status')
    user_index = {k: i for i, k in enumerate(u)}

    def format_photo(p):
        user = user_index[p['uid']]
        loc = p['loc']['coordinates']
        taken = [p['taken'].weekday(), p['taken'].hour,
                 calendar.timegm(p['taken'].utctimetuple())]
        indicator = [int(t in p['ntags']) for t in tags]
        return [user] + loc + taken + indicator

    photos_feature = np.mat(tag_time(DB, tags, format_photo))
    sio.savemat('deep', {'A': scipy.sparse.csr_matrix(photos_feature)})

if __name__ == '__main__':
    import arguments
    args = arguments.city_parser().parse_args()
    city = args.city
    DB, client = cm.connect_to_db('world', args.host, args.port)
    s = clock()
    tags = supported_tags(DB, city, photos_threshold=30, users_threshold=5,
                          timespan=60)
    save_var(city+'_tag_support', tags)
    # entropies = {t[0]: period_entropy(DB, t[0]) for t in tags}
    # save_var('Hsupported', entropies)
    # get_data(DB)
    t = clock()
    print(t-s)

Beispiel #33

0

Datei anzeigen

Datei: cmp_tree_features.py Projekt: daureg/magnet

        if s == -1 == spred:
            path_lengths[0].append(slen)
        if s == -1 != spred:
            path_lengths[1].append(slen)
        if s == 1 != spred:
            path_lengths[2].append(slen)
        if s == 1 == spred:
            path_lengths[3].append(slen)
    # acc = accuracy_score(gold, pred)
    # f1, mcc = f1_score(gold, pred), matthews_corrcoef(gold, pred)
    mcc = compute_mcc(gold, pred)
    return (root_degree, branching_factors, positive_fraction, path_lengths,
            mcc)

if __name__ == '__main__':
    # pylint: disable=C0103
    num_threads, per_thread = 13, 6
    tasks = (num_threads*per_thread)
    rw.read_original_graph('soc-wiki.txt')
    roots = [_[0] for _ in rw.DEGREES[-tasks:]]
    edge_binary = {e: 2*int(s)-1 for e, s in rw.EDGE_SIGN.items()}
    features = []
    target = []
    pool = Pool(num_threads)
    res = list(pool.imap_unordered(do_it, roots[:tasks],
                                   chunksize=per_thread))
    pool.close()
    pool.join()
    import persistent
    persistent.save_var('wik_feature.my', res)

Beispiel #34

0

Datei anzeigen

Datei: make_triangles.py Projekt: daureg/magnet

#! /usr/bin/env python
# vim: set fileencoding=utf-8
"""Create Delaunay triangulation of random points in the plane."""
import sys
from timeit import default_timer as clock

import graph_tool.generation as gen
import numpy as np

import persistent


def to_python_graph(graph):
    """represents `graph` by two dictionaries"""
    G = {int(u): {int(v) for v in u.out_neighbours()}
         for u in graph.vertices()}
    E = {(int(e.source()), int(e.target())): True for e in graph.edges()}
    return G, E


if __name__ == '__main__':
    # pylint: disable=C0103
    n = int(sys.argv[1])
    start = clock()
    points = np.random.random((n, 2))*(n//50+1)
    g, _ = gen.triangulation(points, type="delaunay")
    persistent.save_var('belgrade/triangle_{}.my'.format(n),
                        to_python_graph(g))
    print('create {} edges in {:.3f} seconds'.format(g.num_edges(),
                                                     clock() - start))

Beispiel #35

0

Datei anzeigen

Datei: shazoo.py Projekt: daureg/magnet

    sys.exit()
    start = clock()
    shazoo(*make_graph(4000))
    print(clock() - start)

    adj, _, ew, _, _, gold_sign = make_graph(400)
    train_vertices = random.sample(gold_sign.keys(), 70)
    gold, pred = offline_shazoo(adj, ew, gold_sign, train_vertices)
    print(sum((1 for g, p in zip(gold, pred) if g != p)))

    timing = []
    for i in range(8):
        del FLEP_CALLS_TIMING[:]
        start = clock()
        shazoo(*make_graph(3250))
        p.save_var('flep_{}.my'.format(i), FLEP_CALLS_TIMING)
        # print('done in {:.3f} sec'.format(clock() - start))
        timing.append(clock() - start)
    print('avrg run: {:.3f}'.format(sum(timing)/len(timing)))

    def run_once(size):
        cexp.fast_preferential_attachment(size, 1)
        adj = cexp.redensify.G
        ew = {e: 120*random.random() for e in cexp.redensify.EDGES_SIGN}
        ns = {n: random.random() > .5 for n in adj
              if len(adj[n]) == 1 and random.random() < .7}
        root = max(adj.items(), key=lambda x: len(x[1]))[0]
        flep(adj, ns, ew, root)
    run_once(1000)
    run_once(1000)
    start = clock()

Beispiel #36

0

Datei anzeigen

Datei: FillDB.py Projekt: karanjeetsingh/FindingSimilarCities

    chunker = Chunker.Chunker(foursquare.MAX_MULTI_REQUESTS)
    previous = [e['_id'] for e in TABLE.find({'city': city})]
    print previous
    potential = gather_all_entities_id(checkins, DB_FIELD, city=city)
    print('but already {} {}s in DB.'.format(len(previous), ENTITY_KIND))
    import persistent as p
    region = city or 'world'
    invalid_filename = 'non_{}_id_{}'.format(ENTITY_KIND, region)
    try:
        INVALID_ID = p.load_var(invalid_filename)
    except IOError:
        pass
    print('and {} {}s are invalid.'.format(len(INVALID_ID), ENTITY_KIND))
    new_ones = set(potential).difference(set(previous))
    new_ones = new_ones.difference(set(INVALID_ID))
    outside = set([e['_id'] for e in TABLE.find({'city': None}, {'_id': 1})])
    outside.intersection_update(new_ones)
    print('and {} {}s are outside range.'.format(len(outside), ENTITY_KIND))
    new_ones = new_ones.difference(outside)
    print('So only {} new ones.'.format(len(new_ones)))
    for batch in chunker(new_ones):
        IDS_QUEUE.put(batch)
        total_entities += len(batch)

    IDS_QUEUE.join()
    ENTITIES_QUEUE.join()
    mongo_insertion()
    print('{}/{} invalid id'.format(len(INVALID_ID), total_entities))
    print('{}/{} requests'.format(CLIENT.rate_remaining, CLIENT.rate_limit))
    p.save_var(invalid_filename, INVALID_ID)

Beispiel #37

0

Datei anzeigen

Datei: 0708tue.py Projekt: daureg/illalla

        Atms.append(tms)
        Arr.append(rrs)
        with open('static/cmp_{}.json'.format(qcity)) as infile:
            star = ap.ujson.load(infile)
        get_gold = lambda c, d: [_['dst'] for _ in star[c][d] if _['metric'] == 'emd']
        rq = [star.get(_[0], {}).get(_[1]) is not None and np.min(get_gold(*_)) < 1e5
              and len(dsts[i]) > 0
              for i, _ in enumerate(ALL_Q)]
        rqs = list(itertools.compress(ALL_Q, rq))
        t_slow = np.hstack([t_slow, np.array([t for t, q in zip(emd, AQ) if q[0] == qcity and q[1:] in rqs])])
        t_fast = np.hstack([t_fast, np.array(list(itertools.compress(tms, rq)))])
        slow = np.hstack([slow, np.array([np.min(get_gold(*q)) for q in itertools.compress(ALL_Q, rq)])])
        fast = np.hstack([fast, np.array([10 if len(_) == 0 else min(_) for _ in itertools.compress(dsts, rq)])])
    full_data.append((Adsts, Atms, Arr, t_fast, t_slow, fast, slow))
import persistent as p
p.save_var('approx_brute_relevance.my', full_data)
import sys
sys.exit()
del full_data[:]
full_data = []
n_step = 1
# for n_step in range(5):
for knn in [8, 25, 50, 80, 160]:
    Adsts, Atms, Arr = [], [], []
    t_fast, t_slow, fast, slow = np.array([]), np.array([]), np.array([]), np.array([])
    for qcity in QCITIES:
        ALL_Q = queries(qcity)
        dsts, tms, rrs = ap.test_all_queries(ALL_Q, qcity, n_steps=1, k=knn)
        Adsts.append(dsts)
        Atms.append(tms)
        with open('static/cmp_{}.json'.format(qcity)) as infile:

Beispiel #38

0

Datei anzeigen

Datei: get_brands.py Projekt: wsgan001/illalla

#! /usr/bin/python2
# vim: set fileencoding=utf-8
import scipy.io as sio
import VenueFeature as vf
import CommonMongo as cm
import persistent as p
DB, CLIENT = cm.connect_to_db('foursquare')
vf.DB = DB
vf.CLIENT = CLIENT
brands = ["mcdonald's", 'starbucks']
import cities as C
starbucks = list(vf.DB.venue.find({'name':
                                   {'$in': ['Starbucks Coffee', 'Starbucks']}},
                                  {'city': 1}))
macdo = list(vf.DB.venue.find({'name': "McDonald's"}, {'city': 1}))
for city in C.SHORT_KEY:
    vindex = set(list(sio.loadmat(city+'_fv')['i']))
    fromdb = set([_['_id'] for _ in macdo if _['city'] == city])
    res = list(fromdb.intersection(vindex))
    p.save_var('{}_{}.my'.format(city, brands[0]), res)
    print('saved {} {} in {}'.format(len(res), brands[0], city))
    fromdb = set([_['_id'] for _ in starbucks if _['city'] == city])
    res = list(fromdb.intersection(vindex))
    p.save_var('{}_{}.my'.format(city, brands[1]), res)
    print('saved {} {} in {}'.format(len(res), brands[1], city))

Beispiel #39

0

Datei anzeigen

Datei: spatial_scan.py Projekt: wsgan001/illalla

def consolidate(tags):
    d = {
        tag: persistent.load_var(u'disc/post_{}_{}'.format(tag, GRID_SIZE))
        for tag in tags
    }
    persistent.save_var(u'disc/all_{}'.format(GRID_SIZE), d)

Beispiel #40

0

Datei anzeigen

Datei: spatial_scan.py Projekt: wsgan001/illalla

def post_process(tag):
    top_loc = persistent.load_var(u'disc/top_{}_{}'.format(tag, GRID_SIZE))
    merged = merge_regions(top_loc)
    persistent.save_var(u'disc/post_{}_{}'.format(tag, GRID_SIZE), merged)

Beispiel #41

0

Datei anzeigen

Datei: leskovec.py Projekt: daureg/magnet

            s = clock()
            llr.fit(Xa[train_feat], ya[train])
            pred = llr.predict(Xa[test_feat])
            end = clock()
            append_res(lesko, s, end, pred, ya[test], frac)

            s = clock()
            olr.fit(Xa[train, 15:17], ya[train])
            pred = olr.predict(Xa[test, 15:17])
            end = clock()
            append_res(logreg, s, end, pred, ya[test], frac)

            s = clock()
            dt.fit(Xa[train, 15:17], ya[train])
            pred = dt.predict(Xa[test, 15:17])
            end = clock()
            append_res(dectree, s, end, pred, ya[test], frac)

        res[0].append(lesko)
        res[1].append(fixed)
        res[2].append(simple_fixed)
        res[3].append(tuned)
        res[4].append(cmp_tuned)
        res[5].append(logreg)
        res[6].append(dectree)
        res[7].append(left_fixed)
        res[8].append(rev_fixed)

    p.save_var('{}_{}_{}.my'.format(pref, start, part+1), (alphas, res))

Beispiel #42

0

Datei anzeigen

Datei: significance_test.py Projekt: wsgan001/illalla

 try:
     os.mkdir('random')
 except OSError:
     pass
 for district in districts:
     savename = 'random/{}_{}.my'.format(city, district)
     print(savename)
     if os.path.isfile(savename):
         continue
     distrib, best_score, best_region = [], 0, None
     for i in range(NTEST):
         regions, score = mock_random_list(city, district, city_info)
         if score > best_score:
             best_score, best_region = score, regions
         distrib.append(score)
     p.save_var(savename, distrib)
     outjson = [{
         'pos':
         rank + 1,
         'metric':
         'random',
         'dst':
         -1,
         'venues':
         r[1],
         'geo':
         mapping(Polygon(np.fliplr(c.euclidean_to_geo(city, r[0]))))
     } for rank, r in enumerate(best_region)]
     filename = 'static/random_{}_{}.json'.format(city, district)
     with open(filename, 'w') as f:
         json.dump(outjson,

Beispiel #43

0

Datei anzeigen

Datei: shazoo.py Projekt: daureg/magnet

def batch_predict(tree_adj, training_signs, edge_weight):
    """Predict all node's signs of a weighted tree which are not given as training.

    It works by computing all the border trees, computing the sign of their hinge nodes (at most
    once), extracting all hinge tree within, and predicting the signs of non revealed by
    propagating hinge values.
    """
    # since shazoo use the revealed signs as-is, it's ok to use the same name
    training_signs, l2_values, rta_signs = training_signs
    all_nodes_to_predict = set(tree_adj) - set(training_signs)
    logging.debug('batch_predict has %d nodes to predict', len(all_nodes_to_predict))
    methods = ['l2cost', 'rta', 'shazoo']
    # fields are current_closest_hinge, current_sign, current_dst_to_closest_hinge
    node_predictions = {m: defaultdict(lambda: (None, None, 2e9)) for m in methods}
    hinge_value = {m: {} for m in methods}
    total_iter = 0
    while all_nodes_to_predict:
        some_root_of_a_border_tree = next(iter(all_nodes_to_predict))
        hinge_nodes, border_tree_nodes = find_hinge_nodes(tree_adj, edge_weight, training_signs,
                                                          some_root_of_a_border_tree,
                                                          with_visited=True)
        unmarked = border_tree_nodes - hinge_nodes
        for u in hinge_nodes:
            if u in hinge_value['shazoo']:
                continue
            vals, _, status = flep(tree_adj, (training_signs, rta_signs), edge_weight, u)
            hinge_value['shazoo'][u] = sgn(vals[0])
            hinge_value['rta'][u] = sgn(vals[1])
            if not USE_SCIPY:
                continue
            border_tree = build_border_tree_from_mincut_run(status, edge_weight)
            _, E, El, leaves_sign, _, _ = border_tree
            L = {u: l2_values[u] for u in leaves_sign}
            mapped_E, mapped_El_L, mapping = preprocess_edge_and_leaves(E, El, L)
            val = solve_by_zeroing_derivative(mapped_E, mapped_El_L, mapping, L,
                                              reorder=False)[0][u]
            hinge_value['l2cost'][u] = sgn(val)
        predicted_in_that_border_tree = set()
        inner_iter = 0
        # to avoid the same fork being picked again and again
        unmarked.add(some_root_of_a_border_tree)
        while unmarked:
            one_to_predict = next(iter(unmarked))
            hinge_tree = get_hinge_tree(one_to_predict, tree_adj, hinge_nodes)
            other_predicted = set()
            for h, h_val in iteritems(hinge_value['shazoo']):
                if h not in hinge_tree:
                    continue
                predicted = propagate_hinge(hinge_tree, h, h_val, node_predictions['shazoo'],
                                            edge_weight)
                for u in predicted:
                    prediction_info = node_predictions['shazoo'][u]
                    used_hinge = prediction_info[0]
                    node_predictions['rta'][u] = (used_hinge, hinge_value['rta'][used_hinge],
                                                  prediction_info[2])
                    if not USE_SCIPY:
                        continue
                    node_predictions['l2cost'][u] = (used_hinge, hinge_value['l2cost'][used_hinge],
                                                     prediction_info[2])
                other_predicted.update(predicted)
            predicted_in_that_border_tree.update(other_predicted)
            unmarked -= other_predicted
            inner_iter += 1
            if inner_iter > len(tree_adj):
                import time
                logging.critical('batch predict failed in the inner loop')
                persistent.save_var('__fail_{}.my'.format(int(time.time())), (tree_adj, (training_signs, l2_values, rta_signs), edge_weight))
                raise RuntimeError('batch predict failed in the inner loop')
        all_nodes_to_predict -= predicted_in_that_border_tree
        total_iter += 1
        if total_iter > len(tree_adj):
            import time
            logging.critical('batch predict failed in the outer loop')
            persistent.save_var('__fail_{}.my'.format(int(time.time())), (tree_adj, (training_signs, l2_values, rta_signs), edge_weight))
            raise RuntimeError('batch predict failed in the outer loop')
    logging.debug('batch_predict has actually predicted %d nodes', len(node_predictions) - len(training_signs))
    return {m: {u: v[1] for u, v in iteritems(node_predictions[m]) if u not in training_signs}
            for m in methods}

Beispiel #44

0

Datei anzeigen

            [t_fast, np.array(list(itertools.compress(tms, rq)))])
        slow = np.hstack([
            slow,
            np.array(
                [np.min(get_gold(*q)) for q in itertools.compress(ALL_Q, rq)])
        ])
        fast = np.hstack([
            fast,
            np.array([
                10 if len(_) == 0 else min(_)
                for _ in itertools.compress(dsts, rq)
            ])
        ])
    full_data.append((Adsts, Atms, Arr, t_fast, t_slow, fast, slow))
import persistent as p
p.save_var('approx_brute_relevance.my', full_data)
import sys
sys.exit()
del full_data[:]
full_data = []
n_step = 1
# for n_step in range(5):
for knn in [8, 25, 50, 80, 160]:
    Adsts, Atms, Arr = [], [], []
    t_fast, t_slow, fast, slow = np.array([]), np.array([]), np.array(
        []), np.array([])
    for qcity in QCITIES:
        ALL_Q = queries(qcity)
        dsts, tms, rrs = ap.test_all_queries(ALL_Q, qcity, n_steps=1, k=knn)
        Adsts.append(dsts)
        Atms.append(tms)

Beispiel #45

0

Datei anzeigen

Datei: bfs_tree_stretch.py Projekt: daureg/magnet

    u, v = int(e.source()), int(e.target())
    if (u, v) in bfs_tree:
        bfsmap[e] = True
    else:
        bfsmap[e] = False
k.set_vertex_filter(None)
k.set_edge_filter(bfsmap)
print_diag('build tree {}, {} test edges'.format(root, len(test_edges)))
bfs_dst = shortest_distance(k, dense=False)
bfs_mat = np.zeros((n, n), dtype=np.uint8)
for v in k.vertices():
    bfs_mat[int(v), :] = bfs_dst[v].a.astype(np.uint8)
print_diag('computed pairwise distance')
bsum = 0
bsize = 0
esum = 0
for i, v in enumerate(lcc_nodes):
    graph_distance = dst_mat[v, lcc_nodes[i+1:]]
    tree_distance = bfs_mat[v, lcc_nodes[i+1:]]
    if v in test_graph:
        esum += bfs_mat[v, sorted(test_graph[v])].sum()
    ratio = (tree_distance/graph_distance)
    bsum += ratio.sum()
    bsize += ratio.shape[0]
path_stretch = bsum/bsize
edge_stretch = (esum/2)/len(test_edges)
print_diag('computed stats')
print(idx, root, path_stretch, edge_stretch)
p.save_var('{}_out_{}.my'.format(prefix, idx),
           (idx, root, path_stretch, edge_stretch))

Beispiel #46

0

Datei anzeigen

 def save_new_lid(self):
     """save these new lid because we need to build their profile"""
     region = 'world' if self.city is None else self.city
     id_ = str(hash(self.cids[0]))[:5]
     output = 'new_venue_id_{}_{}'.format(id_, region)
     p.save_var(output, set(self.new_venues))