def update_console(): global _tweets_seen, _passed_filter, _possible_hits, _hits, _overflow global _buffer, _start_time, _cache_hits, _cache_size seen_percent = 0 if _tweets_seen > 0: seen_percent = int(100 * (float(_passed_filter) / _tweets_seen)) runtime = time.time() - _start_time status = ('tweets seen: ' + str(_tweets_seen) + " passed filter: " + str(_passed_filter) + " ({0}%)".format(seen_percent) + " hits " + str(_possible_hits + _fetch_pool_size) + '/' + str(_cache_hits) + " agrams: " + str(_hits) + " cachesize: " + str(_cache_size) + " buffer: " + str(_buffer) + " runtime: " + anagramfunctions.format_seconds(runtime)) sys.stdout.write(status + '\r') sys.stdout.flush()
def delete_short_entries(srcdb, cutoff=20, start=0): try: import gdbm except ImportError: print('database manipulation requires gdbm') print('trimming %s, cutoff %i' % (srcdb, cutoff)) start_time = time.time() db = gdbm.open(srcdb, 'wf') k = db.firstkey() seen = 0 marked = 0 prevk = k todel = set() try: while k is not None: seen += 1 prevk = k nextk = db.nextkey(k) if anagramfunctions.length_from_hash(k) < cutoff: todel.add(k) marked += 1 sys.stdout.write('seen/marked: %i/%i next: %s\t\t\t\t\r' % (seen, marked, nextk)) sys.stdout.flush() k = nextk finally: deleted = 0 print('\ndeleting %i entries' % marked) for i in todel: try: del db[i] except KeyError: print('key error for key %s' % i) deleted += 1 sys.stdout.write('deleted %i/%i\r' % (deleted, marked)) sys.stdout.flush() db.sync() db.close() duration = time.time() - start_time print('\ndeleted %i of %i in %s' % (deleted, seen, anagramfunctions.format_seconds(duration)))
def delete_short_entries(srcdb, cutoff=20, start=0): try: import gdbm except ImportError: print('database manipulation requires gdbm') print('trimming %s, cutoff %i' %(srcdb, cutoff)) start_time = time.time() db = gdbm.open(srcdb, 'wf') k = db.firstkey() seen = 0 marked = 0 prevk = k todel = set() try: while k is not None: seen += 1 prevk = k nextk = db.nextkey(k) if anagramfunctions.length_from_hash(k) < cutoff: todel.add(k) marked += 1 sys.stdout.write('seen/marked: %i/%i next: %s\t\t\t\t\r' % (seen, marked, nextk)) sys.stdout.flush() k = nextk finally: deleted = 0 print('\ndeleting %i entries' % marked) for i in todel: try: del db[i] except KeyError: print('key error for key %s' % i) deleted += 1 sys.stdout.write('deleted %i/%i\r' % (deleted, marked)) sys.stdout.flush() db.sync() db.close() duration = time.time() - start_time print('\ndeleted %i of %i in %s' % (deleted, seen, anagramfunctions.format_seconds(duration)))
def update_console(): global _tweets_seen, _passed_filter, _possible_hits, _hits, _overflow global _buffer, _start_time, _cache_hits, _cache_size seen_percent = 0 if _tweets_seen > 0: seen_percent = int(100*(float(_passed_filter)/_tweets_seen)) runtime = time.time()-_start_time status = ( 'tweets seen: ' + str(_tweets_seen) + " passed filter: " + str(_passed_filter) + " ({0}%)".format(seen_percent) + " hits " + str(_possible_hits + _fetch_pool_size) + '/' + str(_cache_hits) + " agrams: " + str(_hits) + " cachesize: " + str(_cache_size) + " buffer: " + str(_buffer) + " runtime: " + anagramfunctions.format_seconds(runtime) ) sys.stdout.write(status + '\r') sys.stdout.flush()
def sleep(self, interval, debug=False): print('base interval is %d' % (interval / 60)) randfactor = random.randrange(0, interval) interval = interval * 0.5 + randfactor sleep_chunk = 10 # seconds print('sleeping for %d minutes' % (interval / 60)) if not debug: while interval > 0: sleep_status = ' %s remaining \r' % ( anagramfunctions.format_seconds(interval)) sys.stdout.write(sleep_status.rjust(35)) sys.stdout.flush() time.sleep(sleep_chunk) interval -= sleep_chunk print('\n') else: return interval / 60