Exemple #1
0
def GetBuildingsObjects(
        sd, b_n,
        o_n):  #sd - seed(integer), buildings number, o- firestation number
    random.seed(sd)
    builds = sl.load_obj('buildings')
    objects = sl.load_obj('firestations')
    builds_choice = random.sample(builds, b_n)
    objects_choice = random.sample(objects, o_n)
    res_builds = [b['id'] for b in builds_choice]
    res_objects = [o['id'] for o in objects_choice]
    return res_builds, res_objects
Exemple #2
0
def draw_tree(tree, name):
    fig = saveload.load_obj('fig_demo')
    fig.set_size_inches(100, 100, forward=True)
    node_coords = saveload.load_obj("coords")

    for e in tree:
        a = e[0]
        b = e[1]

        a_coords = (float(node_coords[e[0]][0]),float(node_coords[e[0]][1]))
        b_coords = (float(node_coords[e[1]][0]),float(node_coords[e[1]][1]))
        plt.plot((float(a_coords[0]), float(b_coords[0])), (float(a_coords[1]), float(b_coords[1])), '-r')
Exemple #3
0
def draw_clusters_with_centers(clu, cen, name):
    fig = saveload.load_obj('fig_demo')
    fig.set_size_inches(100, 100, forward=True)
    node_coords = saveload.load_obj("coords")

    for c in clu:
        for v in c:
            v_coords = (float(node_coords[v][0]),float(node_coords[v][1]))
            plt.scatter(float(v_coords[0]),float(v_coords[1]),linewidths=20,c = "black")
    for v in cen:
        v_coords = (float(node_coords[v][0]),float(node_coords[v][1]))
        plt.scatter(float(v_coords[0]),float(v_coords[1]),linewidths=20,c = "red")

    fig.savefig(name, dpi = 100)
Exemple #4
0
def GetInfo(id):
    builds = sl.load_obj('buildings')
    firestations = sl.load_obj('firestations')
    roads = sl.load_obj('roads')
    for build in builds:
        if build['id'] == id:
            res = build
    for firestation in firestations:
        if firestation['id'] == id:
            res = firestation
    for road in roads:
        for nd in road['nodes']:
            if nd == id:
                res = road
    return res
Exemple #5
0
def GetWeightsDist(Dist_list, f_nodes):
    weights = saveload.load_obj('weights')
    for f_node in f_nodes:
        if f_node in weights.keys():
            Dist_list[f_node] *= weights[f_node]
        else:
            Dist_list[f_node] *= random.random()
Exemple #6
0
def get_style(im_path='pasha_style.jpg'):

    img2vec = Img2Vec(model='alexnet')
    a = load_obj('vecs')
    names = load_obj('names')

    a = np.array(a)
    start = time()
    q = img2vec.get_vec(Image.open('pasha_style.jpg'))
    score = np.sum(q * a, axis=1) / np.linalg.norm(a, axis=1)
    topk_idx = np.argsort(score)[::-1]
    for i in topk_idx:
        print('> %s\t%s' % (score[i], names[i]))
    end = time()
    print(end - start)
    return [names[i] for i in topk_idx]
Exemple #7
0
	def load(self):
		if self.check_previous():
			prev = sl.load_obj(self.name+'.gtr')
			attr = prev.__dict__
			for key in attr:
				setattr(self, key, attr[key])

		else:
			print('Error: load call failure, file not found.')
			
			exit()
Exemple #8
0
def main(args):
    G = sl.load_obj("adj_list_47")

    # Show fire stations
    if (args.show_fs):
        firestations = sl.load_obj("firestations")
        for station in firestations:
            print(station['id'], station['name'])

    # Get Info By ID
    if (args.info is not None):
        try:
            res = GetInfo(args.info)
            for i in res:
                print(i, res[i])

        except UnboundLocalError:
            print("Wrong id. Check it and try again.")
        except:
            print("Oops, something went wrong")

    #First half of task.
    if (args.pick_fs_and_bds is not None):

        m = args.pick_fs_and_bds[0]
        n = args.pick_fs_and_bds[1]
        # Pick random m firestations and n buildings
        builds, stations = GetBuildingsObjects(args.pick_fs_and_bds[2], n, m)

        # Now to analyse the distance values we have t orun DijkstraWithFinishNodes for all stations and all buildings

        res_builds = {}
        res_stations = {}
        mindist_from_builds = {}
        mindist_from_stations = {}

        if (args.no_recalc_values is None):
            for b in builds:
                res_builds[b] = graph.DijkstraWithFinishNodes(G, b, stations)
                dist, minfb = graph.GetMinDist(res_builds[b][0], stations)
                mindist_from_builds[b] = (dist, minfb)

                print("Для дома c id = ", b, " ближайшая пожарная станция - ",
                      GetInfo(minfb[0])['name'], " id = ", minfb)
                print(
                    "-----------------------------------------------------------------------------------------------------------"
                )

            print("\n")
            for st in stations:
                res_stations[st] = graph.DijkstraWithFinishNodes(G, st, builds)
                dist, minfs = graph.GetMinDist(res_stations[st][0], builds)
                mindist_from_stations[st] = (dist, minfs)

                print("Для пожарной станции ",
                      GetInfo(st)["name"], " id = ", st,
                      " ближайший дом c id = ", minfs)
                print(
                    "-----------------------------------------------------------------------------------------------------------"
                )

            sl.save_obj(res_builds, "res_builds")
            sl.save_obj(res_stations, "res_stations")
            sl.save_obj(mindist_from_stations, "mindist_from_stations")
            sl.save_obj(mindist_from_builds, "mindist_from_builds")

        else:
            res_builds = sl.load_obj("res_builds")
            res_stations = sl.load_obj("res_stations")
            mindist_from_builds = sl.load_obj("mindist_from_builds")
            mindist_from_stations = sl.load_obj("mindist_from_stations")

        # Ищем минимум расстояния туда+обратно
        if (args.both_sides is not None):
            for b in builds:
                min_dist_sum = float('inf')
                closest_station = None
                for st in stations:
                    if (min_dist_sum >
                            res_stations[st][0][b] + res_builds[b][0][st]):
                        min_dist_sum = res_stations[st][0][b] + res_builds[b][
                            0][st]
                        closest_station = st

                print(
                    f"Для дома с id = {b} ближайшая(по сумме туда-обратно) пожарная станция {GetInfo(closest_station)['name']}"
                )

        #Для какого объекта инфраструктуры сумма кратчайших расстояний от него до всех домов минимальна.
        if (args.sum_of_paths is not None):
            min_of_sum = float('inf')
            minel = None
            for st in stations:
                currsum = 0
                for s in res_stations[st][0]:
                    currsum += res_stations[st][0][s]

                if (min_of_sum < currsum):
                    min_of_sum = currsum
                    minel = st

            print(
                f"Для объекта {GetInfo(st)['name']}, id = {st} сумма кратчайших расстояний от него до всех домов минимальна"
            )

        #Для какого объекта инфраструктуры построенное дерево кратчайших путей имеет минимальный вес
        if (args.sum_of_tree is not None):
            min_of_tree = float('inf')
            mintree = None
            for st in stations:
                tree = graph.GetTree(builds, res_stations[st][1])
                sumoftree = graph.GetSumOfTree(tree, G)
                if (min_of_tree < sumoftree):
                    min_of_tree = sumoftree
                    mintree = st

            print(
                f"Дерево кратчайших путей имеет минимальный вес для {GetInfo(st)['name']}, id = {st}"
            )

        # Определить, какой из объектов расположен так, что расстояние между ним и самым дальним домом минимально
        if (args.mindist_fs is not None):
            mindist_global = float('inf')
            resfs = None
            for st in stations:
                # Ищем максимально удаленый дом
                maxdist = 0
                for s in res_stations[st][0]:
                    if maxdist < res_stations[st][0][s] and res_stations[st][
                            0][s] != float('inf'):
                        maxdist = res_stations[st][0][s]
                if maxdist < mindist_global:
                    mindist_global = maxdist
                    resfs = st
            print(
                f"Объект {GetInfo(resfs)['name']} с id = {resfs} расположен так, что расстояние между ним и самым дальним домом минимально"
            )

        # Для каждого дома определить объекты, расположенные не далее, чем в X км
        if (args.set_distance is not None):
            X = args.set_distance

            not_far_stations = {}
            for b in builds:
                not_far_stations[b] = []
                for st in stations:
                    if res_builds[b][0][st] <= X:
                        not_far_stations[b].append(st)
            for b in builds:
                if len(not_far_stations[b]) > 0:
                    print(
                        f"Для дома {b} следующие станции находятся ближе чем в {X} условных км досигаемости(при движении от дома к станции): {not_far_stations[b]}"
                    )
                else:
                    print(
                        f"Для дома {b} никакие станции не находятся ближе чем в {X} условных км досигаемости(при движении от дома к станции)"
                    )
            print(
                "-----------------------------------------------------------------------------------------------------------"
            )

            for b in builds:
                not_far_stations[b] = []
                for st in stations:
                    if res_stations[st][0][b] <= X:
                        not_far_stations[b].append(st)
            for b in builds:
                if len(not_far_stations[b]) > 0:
                    print(
                        f"Для дома {b} следующие станции находятся ближе чем в {X} условных км досигаемости(при движении от станции к дому): {not_far_stations[b]}"
                    )
                else:
                    print(
                        f"Для дома {b} никакие станции не находятся ближе чем в {X} условных км досигаемости(при движении от станции к дому)"
                    )
            print(
                "-----------------------------------------------------------------------------------------------------------"
            )

            for b in builds:
                not_far_stations[b] = []
                for st in stations:
                    if res_stations[st][0][b] + res_builds[b][0][st] <= X:
                        not_far_stations[b].append(st)
            for b in builds:
                if len(not_far_stations[b]) > 0:
                    print(
                        f"Для дома {b} следующие станции находятся ближе чем в {X} условных км досигаемости(при необходимости возвращения): {not_far_stations[b]}"
                    )
                else:
                    print(
                        f"Для дома {b} никакие станции не находятся ближе чем в {X} условных км досигаемости(при необходимости возвращения)"
                    )
    # Вторая часть Исследовательской работы. Кластеры--------------------------------------------------------------------------------------------
    if (args.data_for_clusters is not None):
        n = args.data_for_clusters[0]
        # Pick random n buildings and 1 firestation
        builds, station = GetBuildingsObjects(args.data_for_clusters[1], n, 1)

        # Lets build a tree
        station = station[0]
        dij = graph.DijkstraWithFinishNodes(G, station, builds)
        tree = graph.GetTree(builds, dij[1])

        # Тут наверно надо нарисовать дерево

        # Считаем длину дерева
        treelen = graph.GetSumOfTree(
            tree, G
        )  # return sum of tree; edges - from GetTree, adj_list from GetGraphList

        # Считаем сумму кратчайших расстояний
        that_sum = 0
        for el in dij[0]:
            if (dij[0][el] != float('inf')):
                that_sum += dij[0][el]

        print(f"Сумма кратчайших расстояний равна {that_sum}")

        res_builds = {}
        res_stations = {}
        mindist_from_builds = {}
        mindist_from_stations = {}

        #Сюда идет исследование для К = 2,3,4

        K = [2]  #3,4]

        for k in K:
            clu = clusters.Get_k_Clusters(builds, G, k)

            # Поиск центроид
            print("начался поиск центроид")
            center = clusters.Find_Centers(clu, G)
            print("Центроиды: ", center)

            # Поиск кратчайших путей в кластерах
            pos = 0
            for c in clu:
                ccen = center[pos]  # центроида этого кластера

                cdij = graph.DijkstraWithFinishNodes(G, ccen, c)
                ctree = graph.GetTree(c, cdij[1])

                # Считаем длину дерева
                ctreelen = graph.GetSumOfTree(ctree, G)
                print(f"Для {pos}-го кластера длина дерева равна {ctreelen}")

                # Считаем сумму кратчайших расстояний
                csum = 0
                for el in cdij[0]:
                    if (cdij[0][el] != float('inf')):
                        csum += cdij[0][el]

                print(
                    f"Сумма кратчайших расстояний для {pos}-го кластера равна {csum}"
                )
                pos += 1
    # Построение Дендрограммы
    if (args.dendrogram is not None):
        n = args.dendrogram[0]
        # Pick random n buildings and 1 firestation
        builds, station = GetBuildingsObjects(args.dendrogram[0], n, 1)

        plt.figure()
        dn = hierarchy.dendrogram(clusters.Get_Dendro_matr(builds, G))
        plt.savefig('foo.pdf')

        #if(args.no_recalc_values is None):
        #   print("here we go")
    if (args.get_2_3_5_clusters is not None):
        n = args.get_2_3_5_clusters[0]
        # Pick random n buildings and 1 firestation
        builds, station = GetBuildingsObjects(args.get_2_3_5_clusters[1], n, 1)
        print(len(builds))
        print(station)
        print('-' * 25)

        #res_5, res_3, res_2, dendromatr = clusters.Get_Clusters(builds, G)
        #Сюда идет исследование для К = 2,3,4

        K = [2, 3, 5]

        for k in K:
            print(k)
            clu = None
            if (k == 2):
                clu = sl.load_obj("res_2")
            else:
                clu = clusters.Get_k_Clusters(builds, G, k)
                sl.save_obj(clu, "res_" + str(k))

            # Поиск центроид
            print("начался поиск центроид")
            center = clusters.Find_Centers(clu, G)
            sl.save_obj(center, "center" + str(k))
            print("Центроиды: ", center)

            # Дерево кратчайших путей от станции до центроидов
            #fs_to_cen = sl.load_obj("fs_to_cen_"+str(k))
            fs_to_cen = graph.DijkstraWithFinishNodes(G, station[0], center)
            fs_to_cen_tree = graph.GetTree(center, fs_to_cen[1])
            #fs_to_cen_tree = sl.load_obj("fs_to_cen_tree_"+str(k))
            sl.save_obj(fs_to_cen, "fs_to_cen_" + str(k))
            sl.save_obj(fs_to_cen_tree, "fs_to_cen_tree_" + str(k))

            # Сумма кратчайших расстояний
            that_sum = 0
            for el in fs_to_cen[0]:
                if (fs_to_cen[0][el] != float('inf')):
                    that_sum += fs_to_cen[0][el]

            print(f"Сумма кратчайших расстояний равна {that_sum}")
            # Поиск кратчайших путей в кластерах
            pos = 0
            for c in clu:
                ccen = center[pos]  # центроида этого кластера

                cdij = graph.DijkstraWithFinishNodes(G, ccen, c)
                sl.save_obj(cdij, "cdij" + str(k) + str(pos))
                ctree = graph.GetTree(c, cdij[1])
                sl.save_obj(ctree, "ctree" + str(k) + str(pos))

                # Считаем длину дерева
                ctreelen = graph.GetSumOfTree(ctree, G)

                print(f"Для {pos}-го кластера длина дерева равна {ctreelen}")

                # Считаем сумму кратчайших расстояний
                #csum = 0
                #for el in cdij[0]:
                #   if(cdij[0][el]!=float('inf')):
                #      csum+= cdij[0][el]

                #print(f"Сумма кратчайших расстояний для {pos}-го кластера равна {csum}")
                pos += 1
        #sl.save_obj(res_5,"res_5")
        #sl.save_obj(res_3,"res_3")
        #sl.save_obj(res_2,"res_2")

        #sl.save_obj(dendromatr, "dendromatr")

    if (args.get_2_3_5_clusters_load is not None):
        n = args.get_2_3_5_clusters[0]
        # Pick random n buildings and 1 firestation
        builds, station = GetBuildingsObjects(args.get_2_3_5_clusters[1], n, 1)
        print(len(builds))
        print(station)
        print('-' * 25)

        #res_5, res_3, res_2, dendromatr = clusters.Get_Clusters(builds, G)
        #Сюда идет исследование для К = 2,3,4

        K = [2, 3, 5]

        for k in K:
            print(k)
            clu = None
            if (k == 2):
                clu = sl.load_obj("res_2")
            else:
                clu = clusters.Get_k_Clusters(builds, G, k)
                sl.save_obj(clu, "res_" + str(k))

            # Поиск центроид
            print("начался поиск центроид")
            center = clusters.Find_Centers(clu, G)
            sl.save_obj(center, "center" + str(k))
            print("Центроиды: ", center)

            # Дерево кратчайших путей от станции до центроидов
            #fs_to_cen = sl.load_obj("fs_to_cen_"+str(k))
            fs_to_cen = graph.DijkstraWithFinishNodes(G, station[0], center)
            fs_to_cen_tree = graph.GetTree(center, fs_to_cen[1])
            #fs_to_cen_tree = sl.load_obj("fs_to_cen_tree_"+str(k))
            sl.save_obj(fs_to_cen, "fs_to_cen_" + str(k))
            sl.save_obj(fs_to_cen_tree, "fs_to_cen_tree_" + str(k))

            # Сумма кратчайших расстояний
            that_sum = 0
            for el in fs_to_cen[0]:
                if (fs_to_cen[0][el] != float('inf')):
                    that_sum += fs_to_cen[0][el]

            print(f"Сумма кратчайших расстояний равна {that_sum}")
            # Поиск кратчайших путей в кластерах
            pos = 0
            for c in clu:
                ccen = center[pos]  # центроида этого кластера

                cdij = graph.DijkstraWithFinishNodes(G, ccen, c)
                sl.save_obj(cdij, "cdij" + str(k) + str(pos))
                ctree = graph.GetTree(c, cdij[1])
                sl.save_obj(ctree, "ctree" + str(k) + str(pos))

                # Считаем длину дерева
                ctreelen = graph.GetSumOfTree(ctree, G)

                print(f"Для {pos}-го кластера длина дерева равна {ctreelen}")

                # Считаем сумму кратчайших расстояний
                #csum = 0
                #for el in cdij[0]:
                #   if(cdij[0][el]!=float('inf')):
                #      csum+= cdij[0][el]

                #print(f"Сумма кратчайших расстояний для {pos}-го кластера равна {csum}")
                pos += 1
Exemple #9
0
def GetGraphListWithRead():
    graph_list = {}
    roads = saveload.load_obj('roads')
    coords = saveload.load_obj('coords')
    buildings = saveload.load_obj('buildings') + saveload.load_obj('firestations')
    print('phase1')
    for road in roads:
        oneway = False
        if 'oneway' in road.keys():
            if road['oneway'] == 'yes':
                oneway = True
        nodes = road['nodes']
        for node in nodes:
            if node not in graph_list.keys():
                graph_list[node] = []
        for i in range(len(nodes)):
            if (i < len(nodes) - 1):
                node1_coords = coords[nodes[i]]
                node2_coords = coords[nodes[i + 1]]
                distance = round(1000000*((float(node1_coords[0])-float(node2_coords[0]))**2 + (float(node1_coords[1])-float(node2_coords[1]))**2), 4)
                graph_list[nodes[i]].append((nodes[i+1],distance))
                if not oneway:
                    graph_list[nodes[i+1]].append((nodes[i],distance))
    print('phase2')
    road_list = graph_list.copy()
    for building in buildings:
        graph_list[building['id']] = []
        node1_coords = coords[building['id']]
        nearest_node = '-'
        min_dist = float('inf')
        for node in road_list:
            node2_coords = coords[node]
            distance = 1000000*((float(node1_coords[0])-float(node2_coords[0]))**2 + (float(node1_coords[1])-float(node2_coords[1]))**2)
            if (distance < min_dist):
                min_dist = distance
                nearest_node = node
        min_dist = round(min_dist, 4)
        graph_list[building['id']].append((nearest_node,min_dist))
        graph_list[nearest_node].append((building['id'], min_dist))

    build_nodes = [ building['id'] for building in buildings]
    print('phase3')
    # delete vertexes
    for v in road_list.keys():
        if len(graph_list[v]) <=2:
            connect = False
            for vert in graph_list[v]:
                if vert[0] in build_nodes:
                    connect = True
                    break
            if connect == False:
                # oneway
                if len(graph_list[v]) == 1:
                    vert_in = []
                    vert_out = [ vert[0] for vert in graph_list[v]]
                    for vert in graph_list.keys():
                        if v in [ v_out[0] for v_out in graph_list[vert]]:
                            vert_in.append(vert)
                    if len(vert_in) == len(vert_out) and set(vert_in) != set(vert_out):
                        l = [ v_out[0] for v_out in graph_list[vert_in[0]]]
                        index = l.index(v)
                        vert_del = graph_list[vert_in[0]][index]
                        node1_coords = coords[vert_in[0]]
                        node2_coords = coords[vert_out[0]]
                        distance = round(1000000*((float(node1_coords[0])-float(node2_coords[0]))**2 + (float(node1_coords[1])-float(node2_coords[1]))**2), 4)
                        graph_list[vert_in[0]].append((vert_out[0], distance))
                        while vert_del in graph_list[vert_in[0]]:
                            graph_list[vert_in[0]].remove(vert_del)
                        del graph_list[v]
                # twoways
                elif len(graph_list[v]) == 2:
                    vert_in = []
                    vert_out = [ vert[0] for vert in graph_list[v]]
                    for vert in graph_list.keys():
                        if v in [ v_out[0] for v_out in graph_list[vert]]:
                            vert_in.append(vert)
                    if len(vert_in) == len(vert_out) and set(vert_in) == set(vert_out):
                        v1 = vert_in[0]
                        v2 = vert_in[1]
                        l1 = [ v_out[0] for v_out in graph_list[v1]]
                        l2 = [ v_out[0] for v_out in graph_list[v2]]
                        index1 = l1.index(v)
                        index2 = l2.index(v)
                        vert_del1 = graph_list[v1][index1]
                        vert_del2 = graph_list[v2][index2]
                        node1_coords = coords[v1]
                        node2_coords = coords[v2]
                        distance = round(1000000*((float(node1_coords[0])-float(node2_coords[0]))**2 + (float(node1_coords[1])-float(node2_coords[1]))**2), 4)
                        graph_list[v1].append((v2, distance))
                        graph_list[v2].append((v1, distance))
                        while vert_del1 in graph_list[v1]:
                            graph_list[v1].remove(vert_del1)
                        while vert_del2 in graph_list[v2]:
                            graph_list[v2].remove(vert_del2)                        
                        del graph_list[v]
    print('graph is builded')
    return graph_list
Exemple #10
0
def relative_queryset(all_queries, fname, trange, qassoc=None, loc='GB', verbose=False):
	cmpext = '_5comp'

	print('Beginning relative queryset compilation...')
	
	PYTREND=None

	if not os.path.isdir('obj'):
		os.makedirs('obj')
	if os.path.exists('obj/'+fname+'.pkl'):
		print('Loading previous file...')
		global_dict = sl.load_obj(fname)
	else:
		global_dict = {}
		sl.save_obj(global_dict, fname)
	if type(all_queries)==str:
		query_elements = all_queries.split(', ')
	elif type(all_queries)==list:
		query_elements = all_queries
	else:
		print("Data type for query input must be 'list' or 'string'")
		exit()
	

	if verbose:
		print('Query elements: ', query_elements)


	"""
	global_dict is the main dictionary - all search terms are stored individually here, including
	their relative factor
	
	First just loop through the queries and search for each in pytrends
	"""
	for query in query_elements:
		if not query in global_dict:
			if PYTREND==None:
				PYTREND = TrendReq(hl='en-UK', tz=0)
			print('Requesting data for: ', query)
			global_dict[query] = {'data':relative_data([query], PYTREND, trange=trange, loc=loc), 'factor':.0}
			sl.save_obj(global_dict, fname)
			#print('Data for %s saved successfully.' %query)
			#Need to wait so as not to exceed google's rate limit
			#time.sleep(random.randint(1, 4))
		else:
			print('"%s" already stored in dictionary.' %query)


	"""
	del_queries - queries for which there is no data

	go through the global_dict and check if there is data
	"""
	del_queries = []
	
	for qu in global_dict:
		if global_dict[qu]['data']==None:
			del_queries.append(qu)


	"""
	global_dict2 is a dictionary in which 5 search terms are compared
	at once. By applying a 'reference term', each 5 are compared to each
	other 5. 

	"""

	
	#Remove queries which we do not want.
	query_set = [q for q in global_dict if q not in del_queries]

	query_comps = []
	iqueries = range(len(query_set))
	Nlow  = 1
	lq_val = 1e-7
	MaxLoops = 5

	icheck = 0

	#Try to load old versions of the 5 query comparison
	if os.path.exists('obj/'+fname+cmpext+'.pkl'):
		print('Loading previous for 5 comparison file...')
		global_dict2 = sl.load_obj(fname+cmpext)
		iqueries = find_lowfactors(global_dict, query_set, thresh=lq_val)
	else:
		global_dict2 = {}
		#sl.save_obj(global_dict, fname+cmpext)


	#query_set = copy.copy(query_elements)
	#ESTABLISH PLAN BEFORE CONTINUING... REWRITE!!!
	if not 'REF' in global_dict2:
		global_dict2['REF'] = ''
		sl.save_obj(global_dict2, fname+cmpext)

	ref_query = global_dict2['REF']
			


	"""
	At present, this is pretty rough - I simply set search terms which have '0' searches
	relative to the other terms to a small number (1e-7) for the subsequent pairwise 
	comparison (which is the one thats important to get right...)

	"""

	while len(iqueries)>Nlow and icheck<MaxLoops:

		print('Five query check, loop number %d/%d'%(icheck+1, MaxLoops))
		query_temp = []
		subiquery=0
		for iquery in iqueries:
			query_temp.append(query_set[iquery])
			#If there is not a reference query then we need to strip the first 5 queries
			#If there is a reference query already, can simply divide the queries into 4s
			if ref_query=='':
				groupsize = GROUPSIZE
			else:
				groupsize = GROUPSIZE-1
			if len(query_temp)%groupsize==0 or iquery==len(query_set)-1:
				if not ref_query == '':
					query_temp.append(ref_query)
				qstring = ", ".join(query_temp)

				if PYTREND==None:
					PYTREND = TrendReq(hl='en-UK', tz=0)

				if not qstring in global_dict2:
					print('Requesting data for: ', qstring)
					global_dict2[qstring] = {'data': relative_data(query_temp, PYTREND, trange=trange, loc=loc), 'factor':1.0}
					
				if ref_query == '':
					ref_query = get_reference(global_dict2[qstring]['data'])
					global_dict2['REF']=ref_query
				if not qstring in global_dict2:
					sl.save_obj(global_dict2, fname+cmpext)

				for quer in query_temp:
					if max(global_dict2[qstring]['data'][ref_query])>1e-7:
						global_dict[quer]['factor'] = float(max(global_dict2[qstring]['data'][quer]))/float(max(global_dict2[qstring]['data'][ref_query]))
					else:
						global_dict[quer]['factor'] =lq_val

					
					global_dict[quer]['f_approx'] = True
						
				query_temp = []

		iqueries = find_lowfactors(global_dict, query_set, thresh=lq_val)
		
		if len(iqueries)>Nlow and icheck>=MaxLoops:
			print('Error: Number of sweeps for the 5 query comparison exceeded')
			print('Number of queries below limit: %d/%d'%(len(iqueries)/len(global_dict)))
			exit()

		icheck+=1
		
	sl.save_obj(global_dict, fname)
	


	"""
	global_dict3 is where the pairwise comparison happens
	
	Problem: low search terms presently cannot be ordered for the
	pairwise comparison. Might fail to get correct scaling at the 
	low popularity end. Probably need to add a number of loops in
	which global_dict2 is refined for queries where factor==1e-7

	"""
	global_dict, global_dict3 = pairwise_queryset(global_dict, query_set, trange,fname, ptrend=PYTREND,loc=loc)
	
			

	
	return global_dict, global_dict2, global_dict3
Exemple #11
0
def pairwise_queryset(global_dict, query_set, trange, fname, ptrend=None, loc='GB', tol=1e-1):
	MaxLoop = len(global_dict)
	ref_fact = 1.0
	allfacts_pw = np.ones(len(query_set))*ref_fact
	ipair =0
	notSorted=True
	cmp2ext = '_2comp'


	allquers = []
	allfacts = []
	
	print('Data comparison before pairwise sweep:')
	print('Query    Factor    Approx?')

	for quer in query_set:
		allfacts.append(global_dict[quer]['factor'])
		allquers.append(quer)

	allquers = sort_queries(allquers, allfacts)


	global_dict3 = {}

	if os.path.exists('obj/'+fname+cmp2ext+'.pkl'):
		print('Loading previous for 2 comparison file...')
		global_dict3 = sl.load_obj(fname+cmp2ext)

	
	
	allquers_prev = allquers
	iqueries = np.arange(len(allquers))
	iqsrt = iqueries
	iqsrt_prev = iqueries

	tmp_dict = {}
	for q in allquers:
		tmp_dict[q] = global_dict[q]['factor']

	srt_pw_dict =  OrderedDict(sorted(tmp_dict.items(), key=operator.itemgetter(1), reverse=True))
	srt_pw_list = []
	for key in srt_pw_dict:
		srt_pw_list.append(key)

	
	data_warning = {}
	while notSorted and ipair<MaxLoop:

		print('Pairwise sweep %d/%d'%(ipair+1, MaxLoop))
		
		srt_pw_list_prev = copy.copy(srt_pw_list)

		srt_pw_dict[srt_pw_list[0]] =1.

		#Loop through query indices, sorted into descending popularity
		for iiq in range(len(srt_pw_list)-1):
			
			if ptrend==None:
				ptrend = TrendReq(hl='en-UK', tz=0)

			quer = srt_pw_list[iiq]
			next_quer =  srt_pw_list[iiq+1]


			qstring = str(quer)+", "+str(next_quer)
			qtmp = [quer, next_quer]

			if not qstring in global_dict3:
				print('Requesting data for: ', qstring)
				
				global_dict3[qstring] = {'data': relative_data(qtmp, ptrend, trange=trange, loc=loc), 'factor':1.0}

				sl.save_obj(global_dict3, fname+cmp2ext)

			
			q1sum = float(sum(global_dict3[qstring]['data'][qtmp[0]]))
			q2sum = float(sum(global_dict3[qstring]['data'][qtmp[1]]))


			if q2sum/(q1sum+1e-10)>100. or q2sum/(q1sum+1e-10)<0.01:
				print('Error: pairwise comparison yielded a drop fraction greater than 100')
				print('Comparison terms: {0} vs. {1}'.format(qtmp[0], qtmp[1]))
				print('\n**********\nData 1:', global_dict3[qstring]['data'][qtmp[0]])
				print('\n**********\nData 2:', global_dict3[qstring]['data'][qtmp[1]])
				exit()

			#If the next query is much more popular than the previous then swap. 
			if q2sum>1.1*q1sum:
				print('Swapping: %s with %s (ratio: %.2lf)'%(quer, next_quer, q2sum/q1sum))
				srt_pw_list[iiq], srt_pw_list[iiq+1] = srt_pw_list[iiq+1], srt_pw_list[iiq]
				q1sum, q2sum = q2sum, q1sum
				srt_pw_dict[srt_pw_list[iiq]] = 1.

				if ipair>120:
					plt.plot(global_dict3[qstring]['data']['Date'], global_dict3[qstring]['data'][quer])
					plt.plot(global_dict3[qstring]['data']['Date'], global_dict3[qstring]['data'][next_quer])
					plt.show()
			
			srt_pw_dict[srt_pw_list[iiq+1]] = q2sum/q1sum

			
			
			qtmp = []
	

		notSorted=False
		if srt_pw_list_prev!=srt_pw_list:
			notSorted=True
		
		ipair+=1

		if ipair >= MaxLoop:
			print('Warning: reached maximum number of sweeps for pairwise comparison without success')
			print('Using current data, but discrepancies between comparisons exist.')
			

	print('Pairwise sweep complete..')

	factor=1.0
	
	for item in srt_pw_list:
		factor*= srt_pw_dict[item]
		global_dict[item]['factor'] = factor
		global_dict[item]['f_approx'] = False
		print(item, global_dict[item]['factor'] )



	sl.save_obj(global_dict, fname)

	return global_dict, global_dict3	
Exemple #12
0
def draw_city_graph_rebrand(name):
    #node_coords = xmlparser.getNodesCoords().values()
    node_coords = saveload.load_obj("coords")


    #G = graph.GetGraphList()
    #saveload.save_obj(G, 'adj_list')
    G = saveload.load_obj(name)
    #node_coords = {'1':('1','1'),'2':('2','1'),'2.5':('2.5','2'),'3':('3','1'),'4':('4','0')}
    #G = {'1' : [('2','1'),('2.5','1')], '2.5': [('1','1'),('4','1'),('3','1')], '3':[('2.5','1'),('4','1')], '4':[('2.5','1'),('3','1')]}
    x = []
    y = []
    #x = [1,2,1,2.5,4,2.5,3,2.5,3,4]
    #y = [1,1,1,2,0,2,1,2,1,0]
    plt.ioff()

    buildings = saveload.load_obj("buildings")
    bid = [ building['id'] for building in buildings]

    firestations = saveload.load_obj("firestations")
    fid = [ firestation['id'] for firestation in firestations]
    colors = []

    fig = plt.gcf()
    fig.set_size_inches(50, 50, forward=True)


    pos = 0
    total = len(G.keys())

   # with open('lens.txt', "w") as f:
    #    for i in G.keys():
     #       f.write("len(G[i] = " + str(len(G[i])) + '\n')

    for i in G.keys():
        if len(G[i]) > 4:
            print("len(", i, ") > 4 and equal ", len(G[i]))

    for i in G.keys():
        i_coords = (float(node_coords[i][0]),float(node_coords[i][1]))

        print("step ", pos, "from", total)
        pos += 1
        print(len(G[i]))
        for j in G[i]:
            j_coords = (float(node_coords[j[0]][0]),float(node_coords[j[0]][1]))

            plt.scatter(float(i_coords[0]),float(i_coords[1]),linewidths=10,c = "blue")
            plt.scatter(float(j_coords[0]),float(j_coords[1]),linewidths=10,c = "blue")
            plt.plot((float(i_coords[0]), float(j_coords[0])), (float(i_coords[1]), float(j_coords[1])), '-k')


    #for i in node_coords:
     #   print(i)
      #  x.append(float(i[0]))
       # y.append(float(i[    colors = []

    #for i in range(len(x)):
     #   plt.scatter(x[i],y[i],linewidths=10,c = colors[i])

    #for i in range(0,(len(x)-1),2):
     #   plt.plot((x[i], x[i+1]), (y[i], y[i+1]), '-k')
    #i = 0

    #for node in G:
     #   (node_lat, node_lon) = coords[node]
      #  node_lat = float(node_lat)
       # node_lon = float(node_lon)
        #for adj_node in G[node]:
         #   (adj_node_lat,adj_node_lon) = coords[adj_node]
          #  adj_node_lat = float(adj_node_lat)
           # adj_node_lon = float(adj_node_lon)
            #plt.plot([node_lat,adj_node_lat], [node_lon,adj_node_lon], 'black')
        #i = i + 1
        #print(i)

    fig.savefig('classic_Voronezh.png', dpi=100)
    saveload.save_obj(fig, 'fig_demo')
Exemple #13
0
def redraw_current():
    fig = saveload.load_obj('fig_demo')
    fig.set_size_inches(100, 100, forward=True)
    fig.savefig('classic_Voronezh_2.png', dpi=100)
Exemple #14
0
def pairwise_queryset(global_dict,
                      query_set,
                      trange,
                      fname,
                      ptrend=None,
                      loc='GB',
                      tol=1e-1):
    MaxLoop = 75
    ref_fact = 1.0
    allfacts_pw = np.ones(len(query_set)) * ref_fact
    ipair = 0
    notSorted = True
    cmp2ext = '_2comp'

    allquers = []
    allfacts = []

    print('Data comparison before pairwise sweep:')
    print('Query    Factor    Approx?')

    for quer in query_set:
        allfacts.append(global_dict[quer]['factor'])
        allquers.append(quer)

    allquers = sort_queries(allquers, allfacts)

    global_dict3 = {}

    if os.path.exists('obj/' + fname + cmp2ext + '.pkl'):
        print('Loading previous for 2 comparison file...')
        global_dict3 = sl.load_obj(fname + cmp2ext)

    allquers_prev = allquers
    iqueries = np.arange(len(allquers))
    iqsrt = iqueries
    iqsrt_prev = iqueries

    tmp_dict = {}
    iq = 0
    order = {}
    for q in allquers:
        tmp_dict[q] = global_dict[q]['factor']
        order[q] = iq
        iq += 1

    srt_pw_dict = OrderedDict(
        sorted(tmp_dict.items(), key=operator.itemgetter(1), reverse=True))

    data_warning = {}
    prev_order = None
    while notSorted and ipair < MaxLoop:

        print('Pairwise sweep %d/%d' % (ipair + 1, MaxLoop))

        srt_pw_dict_new = {}
        for key in srt_pw_dict:
            srt_pw_dict_new[key] = 1.
            break

        #Loop through query indices, sorted into descending popularity
        iiq = 0
        for quer in srt_pw_dict:
            if iiq < len(srt_pw_dict) - 1:
                if ptrend == None:
                    ptrend = TrendReq(hl='en-UK', tz=0)

                link_prev, link_next, key = srt_pw_dict._OrderedDict__map[quer]
                next_quer = link_next[2]

                qstring = str(quer) + ", " + str(next_quer)
                qtmp = [quer, next_quer]

                #print(qtmp, order[quer], order[next_quer])
                if not qstring in global_dict3:
                    print('Requesting data for: ', qstring, order[quer],
                          order[next_quer])

                    global_dict3[qstring] = {
                        'data': relative_data(qtmp,
                                              ptrend,
                                              trange=trange,
                                              loc=loc),
                        'factor': 1.0
                    }

                    sl.save_obj(global_dict3, fname + cmp2ext)
                """if 'Mountain ringlet' in qtmp:
					for key in global_dict3[qstring]['data']:
						print(key)
					
					plt.plot(global_dict3[qstring]['data']['Date'], global_dict3[qstring]['data']['Mountain ringlet'])
					plt.show()
				if 'Polyommatus icarus' in qtmp:
					for key in global_dict3[qstring]['data']:
						print(key)
					
					plt.plot(global_dict3[qstring]['data']['Date'], global_dict3[qstring]['data']['Polyommatus icarus'])
					plt.show()"""

                num = float(sum(global_dict3[qstring]['data'][qtmp[1]]))
                denom = float(sum(global_dict3[qstring]['data'][qtmp[0]]))

                if num == 0.0:
                    print(
                        'Warning: 0 reached for second value in drop factor...'
                    )
                    frac_drop = 100.0
                elif denom == 0.0:
                    print(
                        'Warning: 0 reached for initial value in drop factor...'
                    )
                    frac_drop = 0.01
                else:
                    frac_drop = num / denom

                srt_pw_dict_new[next_quer] = srt_pw_dict_new[quer] * frac_drop

                if frac_drop > 100. or frac_drop < 0.01:
                    print(
                        'Error: pairwise comparison yielded a drop fraction greater than 100'
                    )
                    print('Comparison terms: {0} vs. {1}'.format(
                        qtmp[0], qtmp[1]))
                    print('\n**********\nData 1:',
                          global_dict3[qstring]['data'][qtmp[0]])
                    print('\n**********\nData 2:',
                          global_dict3[qstring]['data'][qtmp[1]])
                    exit()

                qtmp = []
            iiq += 1

        srt_pw_dict_prev = copy.copy(srt_pw_dict)
        srt_pw_dict = OrderedDict(
            sorted(srt_pw_dict_new.items(),
                   key=operator.itemgetter(1),
                   reverse=True))

        order = {}

        notSorted = False

        err_warning = {}

        iiq = 0
        num_out = 0
        big_err = 0.
        sum_err = 0.
        errs = []
        for key in srt_pw_dict:
            link_prev, link_next, key = srt_pw_dict._OrderedDict__map[key]
            next_quer = link_next[2]

            link_prev, link_next, key = srt_pw_dict_prev._OrderedDict__map[key]
            next_quer_prev = link_next[2]

            if next_quer != None:
                rat1 = srt_pw_dict[key] / srt_pw_dict[next_quer]
                rat2 = srt_pw_dict_prev[key] / srt_pw_dict_prev[next_quer]
            else:
                rat1 = 1.
                rat2 = 1.

            print('%d. %s :      %.2e    %.2e |   %.2e    %.2e ' %
                  (iiq, key, srt_pw_dict[key], srt_pw_dict_prev[key], rat1,
                   rat2))
            if prev_order != None:
                print(prev_order[key])
            err = abs((rat1 - rat2) / rat1)
            sum_err += err
            errs.append(err)
            if err > tol:
                if err > big_err:
                    big_err = err
                err_warning[key] = True
                notSorted = True
                num_out += 1
            else:
                err_warning[key] = False

            order[key] = iiq

            iiq += 1

        print('Number out:', num_out)
        print('Biggest error:', big_err)
        print('Mean error:', sum_err / float(len(srt_pw_dict)))

        print('Median error:', np.median(np.array(errs)))

        prev_order = copy.copy(order)

        ipair += 1

        if ipair >= MaxLoop:
            print(
                'Warning: reached maximum number of sweeps for pairwise comparison without success'
            )
            print(
                'Using current data, but discrepancies between comparisons exist.'
            )

    print('Pairwise sweep complete..')
    exit()
    #global_dict[qtmp[1]]['factor'] = global_dict[qtmp[0]]['factor']*float(max(global_dict3[qstring]['data'][qtmp[1]]))/float(max(global_dict3[qstring]['data'][qtmp[0]]))

    for iq in range(len(allquers)):
        global_dict[allquers[iq]]['factor'] = allfacts_pw[iq]
        global_dict[allquers[iq]]['f_approx'] = False

    print('Data comparison after pairwise sweep:')
    print('Query    Factor    Approx?')

    for quer in query_set:
        print(quer, global_dict[quer]['factor'], global_dict[quer]['f_approx'])
        allfacts.append(global_dict[quer]['factor'])
        allquers.append(quer)

    sl.save_obj(global_dict, fname)

    return global_dict, global_dict3
Exemple #15
0
def pairwise_queryset(global_dict,
                      query_set,
                      trange,
                      fname,
                      ptrend=None,
                      loc='GB',
                      tol=1e-2):
    MaxLoop = 4
    ref_fact = 1.0
    allfacts_pw = np.ones(len(query_set)) * ref_fact
    ipair = 0
    notSorted = True
    cmp2ext = '_2comp'

    allquers = []
    allfacts = []

    print('Data comparison before pairwise sweep:')
    print('Query    Factor    Approx?')

    for quer in query_set:
        print(quer, global_dict[quer]['factor'], global_dict[quer]['f_approx'])
        allfacts.append(global_dict[quer]['factor'])
        allquers.append(quer)

    allquers = sort_queries(allquers, allfacts)

    global_dict3 = {}

    if os.path.exists('obj/' + fname + cmp2ext + '.pkl'):
        print('Loading previous for 2 comparison file...')
        global_dict3 = sl.load_obj(fname + cmp2ext)

    allquers_prev = allquers
    iqueries = np.arange(len(allquers))
    iqsrt = iqueries
    iqsrt_prev = iqueries

    tmp_dict = {}
    for q in allquers:
        tmp_dict[q] = 1.0

    srt_pw_dict = OrderedDict(
        sorted(tmp_dict.items(), key=operator.itemgetter(1)))

    while notSorted and ipair < MaxLoop:

        print('Pairwise sweep %d/%d' % (ipair + 1, MaxLoop))
        allfacts_pw_prev = copy.copy(allfacts_pw)

        #allfacts_pw = np.ones(len(allfacts_pw))

        #Loop through query indices, sorted into descending popularity
        for iiq in iqueries:

            #If last search, use previous as the reference value
            if iiq < len(allquers) - 1:
                pqm = iiq
                pqp = iiq + 1
            else:
                pqm = iiq - 1
                pqp = iiq

            iq1 = iqsrt[pqm]
            iq2 = iqsrt[pqp]

            qstring = str(allquers[iq1]) + ", " + str(allquers[iq2])
            qtmp = [allquers[iq1], allquers[iq2]]

            print('Iq1:', iq1, 'Iq2:', iq2)
            print(
                np.where(iqsrt_prev == iq1)[0],
                np.where(iqsrt_prev == iq2)[0])

            print(allfacts_pw[iq1])
            print(allfacts_pw[iq2])

            if ptrend == None:
                ptrend = TrendReq(hl='en-UK', tz=0)

            if not qstring in global_dict3:
                #print('Requesting data for: ', qstring)

                global_dict3[qstring] = {
                    'data': relative_data(qtmp, ptrend, trange=trange,
                                          loc=loc),
                    'factor': 1.0
                }

                sl.save_obj(global_dict3, fname + cmp2ext)

            num = float(max(global_dict3[qstring]['data'][qtmp[1]]))
            denom = float(max(global_dict3[qstring]['data'][qtmp[0]]))

            if num == 0.0:
                print('Warning: 0 reached for second value in drop factor...')
                frac_drop = 100.0
            elif denom == 0.0:
                print('Warning: 0 reached for initial value in drop factor...')
                frac_drop = 0.01
            else:
                frac_drop = num / denom

            allfacts_pw[iq2] = allfacts_pw[iq1] * frac_drop

            if frac_drop > 100. or frac_drop < 0.01:
                print(
                    'Error: pairwise comparison yielded a drop fraction greater than 100'
                )
                print('Comparison terms: {0} vs. {1}'.format(qtmp[0], qtmp[1]))
                print('\n**********\nData 1:',
                      global_dict3[qstring]['data'][qtmp[0]])
                print('\n**********\nData 2:',
                      global_dict3[qstring]['data'][qtmp[1]])
                exit()

            qtmp = []

        iqsrt_prev = copy.copy(iqsrt)
        iqsrt = sort_queries(copy.copy(iqueries), allfacts_pw)
        print(iqsrt)
        print(iqsrt_prev)

        print('\n***************')
        print('Query   -   Factor')
        notSorted = False

        iiq = 0
        for iq in iqsrt:
            print('%d. %s :      %.2e    %d' %
                  (iiq, allquers[iq], allfacts_pw[iq] / allfacts_pw_prev[iq],
                   iqsrt_prev[iq]))
            if abs((allfacts_pw[iq] - allfacts_pw_prev[iq]) /
                   allfacts_pw[iq]) > tol:
                notSorted = True

            iiq += 1

        ipair += 1

        if ipair >= MaxLoop:
            print(
                'Error: reached maximum number of sweeps for pairwise comparison without success'
            )
            exit()

    print('Pairwise sweep complete..')
    exit()
    #global_dict[qtmp[1]]['factor'] = global_dict[qtmp[0]]['factor']*float(max(global_dict3[qstring]['data'][qtmp[1]]))/float(max(global_dict3[qstring]['data'][qtmp[0]]))

    for iq in range(len(allquers)):
        global_dict[allquers[iq]]['factor'] = allfacts_pw[iq]
        global_dict[allquers[iq]]['f_approx'] = False

    print('Data comparison after pairwise sweep:')
    print('Query    Factor    Approx?')

    for quer in query_set:
        print(quer, global_dict[quer]['factor'], global_dict[quer]['f_approx'])
        allfacts.append(global_dict[quer]['factor'])
        allquers.append(quer)

    sl.save_obj(global_dict, fname)

    return global_dict, global_dict3