def create_msk_location(): if current_app.config['TESTING']: msk = Location.create(name=u'Moscow', north=55.996804, south=55.492144, west=37.235253, east=37.945527, height=3000, north_width=3000, south_width=3000, timezone='Europe/Moscow') else: msk = Location.create(name=u'Moscow', north=55.996804, south=55.492144, west=37.235253, east=37.945527, height=56132, north_width=44181, south_width=44756, timezone='Europe/Moscow') get_logger().info('Moscow location created') for tag in [ u'moscow', u'москва', u'russia', u'россия', u'vscorussia', u'vscomoscow', u'vscomsk', u'msk', u'мск' ]: IgnoreForLocation.create(location=msk, tag=tag) radius = 500 lat_km = msk.lat_km() long_km = msk.long_km() msk_coords = (msk.latitude(), msk.longitude()) y = msk.north - lat_km * radius i = 0 while y - lat_km * radius >= msk.south: j = 0 x = msk.west + long_km * radius counter = 0 while x + long_km * radius <= msk.east: last_area = SimpleArea.create(location=msk, latitude=y, longitude=x, radius=radius, row=i, column=j) counter += 1 x += long_km * 2 * radius j += 1 get_logger().info( "Moscow areas created {0} for latitude {1}, row {2}".format( counter, y, i)) y -= lat_km * 2 * radius i += 1
def hello(): """ Print hello """ get_logger().debug("Hello debug") get_logger().info("Hello info") get_logger().warning("Hello warning") get_logger().error("Hello error") get_logger().critical("Hello critical") print "hello"
def clear_old_tags(): select = Hashtag.select().join(HashtagFrequency, JOIN_LEFT_OUTER) select = select.group_by(Hashtag).having( fn.Count(HashtagFrequency.id) == 0) count = select.count() get_logger().info("{0} old tags to remove".format(count)) for h in select: h.delete_instance() get_logger().info("Old tags to remove")
def create_london_location(): if current_app.config['TESTING']: london = Location.create(name=u'London', north=51.709035, south=51.249583, west=-0.552444, east=0.305863, height=2000, north_width=4000, south_width=4500, timezone='Europe/London') else: london = Location.create(name=u'London', north=51.709035, south=51.249583, west=-0.552444, east=0.305863, height=51108, north_width=59328, south_width=59927, timezone='Europe/London') get_logger().info('London location created') for tag in [u'london', u'uk']: IgnoreForLocation.create(location=london, tag=tag) radius = 500 lat_km = london.lat_km() long_km = london.long_km() london_coords = (london.latitude(), london.longitude()) y = london.north - lat_km * radius i = 0 while y - lat_km * radius >= london.south: j = 0 x = london.west + long_km * radius counter = 0 while x + long_km * radius <= london.east: last_area = SimpleArea.create(location=london, latitude=y, longitude=x, radius=radius, row=i, column=j) counter += 1 x += long_km * 2 * radius j += 1 get_logger().debug( "London areas created {0} for latitude {1}, row {2}".format( counter, y, i)) y -= lat_km * 2 * radius i += 1
def clear_old_hours(location, min_time): hours_to_clear = TagsOfAreaInHour.select().where( TagsOfAreaInHour.area << location.simple_areas, TagsOfAreaInHour.max_stamp < min_time) count = hours_to_clear.count() HashtagFrequency.delete().where( HashtagFrequency.area_in_hour << hours_to_clear).execute() TagsOfAreaInHour.delete().where( TagsOfAreaInHour.area << location.simple_areas, TagsOfAreaInHour.max_stamp < min_time).execute() get_logger().info("Location {0}: {1} old hours to remove".format( location.name, count))
def create_spb_location(): spb = Location.create(name=u'Saint Petersburg', north=60.091486, south=59.744044, west=30.089023, east=30.562465, height=38646, north_width=26396, south_width=26396, timezone='Europe/Moscow') get_logger().info('Saint Petersburg location created') for tag in [ u'spb', u'saintpetersburg', u'питер', u'санктпетербург', u'петербург', u'спб', u'vscospb', u'vscorussia', u'russia' ]: IgnoreForLocation.create(location=spb, tag=tag) radius = 500 lat_km = spb.lat_km() long_km = spb.long_km() london_coords = (spb.latitude(), spb.longitude()) y = spb.north - lat_km * radius i = 0 while y - lat_km * radius >= spb.south: j = 0 x = spb.west + long_km * radius counter = 0 while x + long_km * radius <= spb.east: last_area = SimpleArea.create(location=spb, latitude=y, longitude=x, radius=radius, row=i, column=j) counter += 1 x += long_km * 2 * radius j += 1 get_logger().debug( "Saint Petersburg areas created {0} for latitude {1}, row {2}". format(counter, y, i)) y -= lat_km * 2 * radius i += 1
def summarize_tags(threads_count=100): get_logger().info('Summarizing starts') areas_queue = Queue.Queue() for area in SimpleArea.select(): areas_queue.put(area) threads = [] for i in range(threads_count): t = TagsSummarizingThread(areas_queue, current_app.config['COMMON_IGNORE'], get_logger()) threads.append(t) t.start() for t in threads: t.join()
def create_berlin_location(): berlin = Location.create(name=u'Berlin', north=52.677519, south=52.337324, west=13.086277, east=13.763996, height=37840, north_width=45882, south_width=45882, timezone='Europe/Berlin') get_logger().info('Berlin location created') for tag in [u'berlin', u'vscoberlin', u'germany']: IgnoreForLocation.create(location=berlin, tag=tag) radius = 500 lat_km = berlin.lat_km() long_km = berlin.long_km() london_coords = (berlin.latitude(), berlin.longitude()) y = berlin.north - lat_km * radius i = 0 while y - lat_km * radius >= berlin.south: j = 0 x = berlin.west + long_km * radius counter = 0 while x + long_km * radius <= berlin.east: last_area = SimpleArea.create(location=berlin, latitude=y, longitude=x, radius=radius, row=i, column=j) counter += 1 x += long_km * 2 * radius j += 1 get_logger().debug( "Berlin areas created {0} for latitude {1}, row {2}".format( counter, y, i)) y -= lat_km * 2 * radius i += 1
def create_tables(): Location.create_table() get_logger().info('Location table created') AreaGroup.create_table() get_logger().info('AreaGroup table created') IgnoreForLocation.create_table() get_logger().info('IgnoreForLocation table created') Hashtag.create_table() get_logger().info('Hashtag table created') SimpleArea.create_table() get_logger().info('SimpleArea table created') TagsOfAreaInHour.create_table() get_logger().info('TagsOfAreaInHour table created') HashtagFrequency.create_table() get_logger().info('HashtagFrequency table created')
def drop_tables(): if HashtagFrequency.table_exists(): HashtagFrequency.drop_table() get_logger().info('HashtagFrequency table dropped') if TagsOfAreaInHour.table_exists(): TagsOfAreaInHour.drop_table() get_logger().info('TagsOfAreaInHour table dropped') if SimpleArea.table_exists(): SimpleArea.drop_table() get_logger().info('SimpleArea table dropped') if IgnoreForLocation.table_exists(): IgnoreForLocation.drop_table() get_logger().info('IgnoreForLocation table dropped') if AreaGroup.table_exists(): AreaGroup.drop_table() get_logger().info('AreaGroup table dropped') if Location.table_exists(): Location.drop_table() get_logger().info('Location table dropped') if Hashtag.table_exists(): Hashtag.drop_table() get_logger().info('Hashtag table dropped')
def update_tags(request_threads_count, summarize_threads_count, memory): get_logger().info('Tags update starts') areas_queue = Queue.Queue() lock = threading.Lock() threads = [] for i in range(request_threads_count): t = TagsUpdaterThread(areas_queue, lock, current_app.config['LOGINS'], get_logger()) threads.append(t) t.start() for location in Location.select(): now = datetime.datetime.now(tz=pytz.timezone('GMT')).replace( tzinfo=None) last_memory_time = now - datetime.timedelta(seconds=memory) small_delta = datetime.timedelta( seconds=current_app.config['TAGS_TIME_PERIOD']) clear_old_hours(location, last_memory_time) update_location_time(location) start_time = location.updated or last_memory_time get_logger().info("Add new hour-areas to {0}".format(location.name)) # add new areas count = 0 cur_max_time = start_time while cur_max_time + small_delta <= now: for area in SimpleArea.select().where( SimpleArea.location == location): tah = TagsOfAreaInHour.create(area=area, max_stamp=cur_max_time + small_delta, min_stamp=cur_max_time) count += 1 cur_max_time += small_delta get_logger().info("{0} hour-areas added to {1}".format( count, location.name)) update_location_time(location) # process not processed areas for area in location.simple_areas: if TagsOfAreaInHour.select().where( TagsOfAreaInHour.processed == None, TagsOfAreaInHour.area == area).count() > 0: areas_queue.put(area) get_logger().info('Waiting for all threads') # hope that putting is faster than processing areas_queue.join() get_logger().info('Areas updated') for t in threads: t.stop() get_logger().debug('Threads stopping') for t in threads: t.join() clear_old_tags() summarize_tags(summarize_threads_count) get_logger().info('Tags summarized') for location in Location.select(): grouper = TagsGrouper(location.id) grouper.process() get_logger().info("Tags groupped for {0}".format(location.name)) get_logger().info('Tags update is done')