def getBoundaries(host, port, db_name): # Get maps coordinate dao = Dao(host, port) dao.connect(db_name) c_list = list(dao.query('globals', '')) c_dict = dict(c_list[0]) dao.close() return (float(c_dict['lat_max']),float(c_dict['lon_max'])),(float(c_dict['lat_min']),float(c_dict['lon_min']))
def getPlotsMap(host, port, db_name, collection): # Get maps coordinate dao = Dao(host, port) dao.connect(db_name) c_list = list(dao.query(collection, '')) c_dict = dict(c_list[0]) dao.close() # Select the map m = Basemap(projection='mill',llcrnrlat=int(c_dict['lat_min']),urcrnrlat=int(c_dict['lat_max']+1),llcrnrlon=int(c_dict['lon_min']),urcrnrlon=int(c_dict['lon_max']+1),resolution='i') m.drawcoastlines() m.drawcountries() m.drawstates() m.fillcontinents(color='#04BAE3',lake_color='#FFFFFF') m.drawmapboundary(fill_color='#FFFFFF') return m
def main(args): if len(args) == 1 or args[1] == '--h': print('Parameters : [ hostname, port, s ]') return 0 # Parameters for the db host = args[1] port = int(args[2]) # Parameters for the matrix s = int(args[3]) # Parameters for http requests max_waiting_time = 1 # 1s timeout for each request l_fails = [] #list containing the fails url db_name = 'db_geo_index' # geoindex collection collection_name = 'topics' # topic collection collection_topics_name = 'topics_trentino_test_approximated' max_loc, min_loc = getBoundaries(host, port, db_name) matrix = Matrix(min_loc, max_loc, s) matrix.toString() print('') # connect to geo dao dao = GeoDao(host, port) dao.connect(db_name, collection_topics_name) # =================================================================== # Get the plot map m = getPlotsMap(host, port, db_name, 'globals') # =================================================================== empty_cell_counter = 0 n_cells = 0 while matrix.hasNext(): # For the plotting cell_full = False locs = matrix.next() #actual position of the iterator current = matrix.current print('Current cell : '+ str(current), end = '\r') bl = [locs[0],locs[1]] tr = [locs[2],locs[3]] result = dao.getUrlsByBox(bl,tr) #do something with result l_url = [] l_res = list(result) if len(l_res) == 0: empty_cell_counter = empty_cell_counter + 1 elif len(l_res) > 0: #print(l_res) #print('') # compute the coordinates for the center of the cell cluster_lon = bl[1] + (tr[1] - bl[1]) / 2 cluster_lat = bl[0] + (tr[0] - bl[0]) / 2 # For plotting cell_full = True # =================================================================== # Get the plot map if cell_full == True: x,y = m(cluster_lon,cluster_lat) m.plot(x,y, 'ro') # =================================================================== n_cells = n_cells + 1 dao.close() print('') print('# cells : '+str(n_cells)) print('# empty : '+str(empty_cell_counter)) plt.title("Geo Plotting of the full cells") plt.show() return 0
def main(args): if len(args) == 1 or args[1] == '--h': print('Parameters : [ hostname, port, s ]') return 0 # Parameters for the db host = args[1] port = int(args[2]) # Parameters for the matrix s = int(args[3]) # Parameters for http requests max_waiting_time = 1 # 1s timeout for each request l_fails = [] #list containing the fails url db_name = 'db_geo_index' # geoindex collection collection_name = 'clicks' # topic collection collection_topics_name = 'topics_mini' max_loc, min_loc = getBoundaries(host, port, db_name) matrix = Matrix(min_loc, max_loc, s) matrix.toString() print('') # connect to geo dao dao = GeoDao(host, port) dao.connect(db_name, collection_name) # =================================================================== # Get the plot map m = getPlotsMap(host, port, db_name, 'globals') # =================================================================== empty_cell_counter = 0 n_cells = 0 while matrix.hasNext(): # For the plotting cell_full = False locs = matrix.next() #actual position of the iterator current = matrix.current print('Current cell : '+ str(current), end = '\r') bl = [locs[0],locs[1]] tr = [locs[2],locs[3]] result = dao.getUrlsByBox(bl,tr) #do something with result l_url = [] l_res = list(result) if len(l_res) == 0: empty_cell_counter = empty_cell_counter + 1 elif len(l_res) > 0: # compute the coordinates for the center of the cell cluster_lon = bl[1] + (tr[1] - bl[1]) / 2 cluster_lat = bl[0] + (tr[0] - bl[0]) / 2 # extract url and put it in a list for row in l_res: d_row = dict(row) urls = d_row['urls'] for url in urls: l_url.append(url) # Get corpuses from of all the url into a cell http_ret = http.get_corpuses(l_url, max_waiting_time, l_fails) corpuses = http_ret[0] l_fails = list(set(l_fails + http_ret[1])) # merges fails list # remove empty sublist corpuses = [x for x in corpuses if x != []] if len(corpuses) > 0: ''' # ONLY FOR TEST : save all the corpus ============= print('Saving corpuses on DB ...', end = '\r') corpuses_collection_name= 'corpuses_mini' d_corpuses = {} d_corpuses['loc'] = [cluster_lat,cluster_lon] d_corpuses['corpuses'] = corpuses dao.addOne(corpuses_collection_name, d_corpuses) # ================================================= # Make lda on the corpuses print('Doing LDA ...', end = '\r') l_topics = tmpLda(corpuses) # Save the topic list into the db print('Saving topics on DB ...', end = '\r') d_topics = {} d_topics['loc'] = [cluster_lat,cluster_lon] d_topics['topics'] = l_topics dao.addOne(collection_topics_name, d_topics) # For plotting cell_full = True ''' # =================================================================== # Get the plot map if cell_full == True: x,y = m(cluster_lon,cluster_lat) m.plot(x,y, 'ro') # =================================================================== n_cells = n_cells + 1 dao.close() print('') print('# cells : '+str(n_cells)) print('# empty : '+str(empty_cell_counter)) plt.title("Geo Plotting of the full cells") plt.show() return 0