예제 #1
0
def update_console():
    global _tweets_seen, _passed_filter, _possible_hits, _hits, _overflow
    global _buffer, _start_time, _cache_hits, _cache_size

    seen_percent = 0
    if _tweets_seen > 0:
        seen_percent = int(100 * (float(_passed_filter) / _tweets_seen))
    runtime = time.time() - _start_time

    status = ('tweets seen: ' + str(_tweets_seen) + " passed filter: " +
              str(_passed_filter) + " ({0}%)".format(seen_percent) + " hits " +
              str(_possible_hits + _fetch_pool_size) + '/' + str(_cache_hits) +
              " agrams: " + str(_hits) + " cachesize: " + str(_cache_size) +
              " buffer: " + str(_buffer) + " runtime: " +
              anagramfunctions.format_seconds(runtime))
    sys.stdout.write(status + '\r')
    sys.stdout.flush()
예제 #2
0
def delete_short_entries(srcdb, cutoff=20, start=0):
    try:
        import gdbm
    except ImportError:
        print('database manipulation requires gdbm')

    print('trimming %s, cutoff %i' % (srcdb, cutoff))
    start_time = time.time()
    db = gdbm.open(srcdb, 'wf')
    k = db.firstkey()
    seen = 0
    marked = 0
    prevk = k
    todel = set()
    try:
        while k is not None:
            seen += 1
            prevk = k
            nextk = db.nextkey(k)
            if anagramfunctions.length_from_hash(k) < cutoff:
                todel.add(k)
                marked += 1
            sys.stdout.write('seen/marked: %i/%i next: %s\t\t\t\t\r' %
                             (seen, marked, nextk))
            sys.stdout.flush()
            k = nextk
    finally:
        deleted = 0
        print('\ndeleting %i entries' % marked)
        for i in todel:
            try:
                del db[i]
            except KeyError:
                print('key error for key %s' % i)
            deleted += 1
            sys.stdout.write('deleted %i/%i\r' % (deleted, marked))
            sys.stdout.flush()

        db.sync()
        db.close()
        duration = time.time() - start_time
        print('\ndeleted %i of %i in %s' %
              (deleted, seen, anagramfunctions.format_seconds(duration)))
예제 #3
0
def delete_short_entries(srcdb, cutoff=20, start=0):
    try:
        import gdbm
    except ImportError:
        print('database manipulation requires gdbm')

    print('trimming %s, cutoff %i' %(srcdb, cutoff))
    start_time = time.time()
    db = gdbm.open(srcdb, 'wf')
    k = db.firstkey()
    seen = 0
    marked = 0
    prevk = k
    todel = set()
    try:
        while k is not None:
            seen += 1
            prevk = k
            nextk = db.nextkey(k)
            if anagramfunctions.length_from_hash(k) < cutoff:
                todel.add(k)
                marked += 1
            sys.stdout.write('seen/marked: %i/%i next: %s\t\t\t\t\r' % (seen, marked, nextk))
            sys.stdout.flush()
            k = nextk
    finally:
        deleted = 0
        print('\ndeleting %i entries' % marked)
        for i in todel:
            try:
                del db[i]
            except KeyError:
                print('key error for key %s' % i)
            deleted += 1
            sys.stdout.write('deleted %i/%i\r' % (deleted, marked))
            sys.stdout.flush()
        
        db.sync()
        db.close()
        duration = time.time() - start_time
        print('\ndeleted %i of %i in %s' %
            (deleted, seen, anagramfunctions.format_seconds(duration)))
예제 #4
0
def update_console():
    global _tweets_seen, _passed_filter, _possible_hits, _hits, _overflow
    global _buffer, _start_time, _cache_hits, _cache_size

    seen_percent = 0
    if _tweets_seen > 0:
        seen_percent = int(100*(float(_passed_filter)/_tweets_seen))
    runtime = time.time()-_start_time

    status = (
        'tweets seen: ' + str(_tweets_seen) +
        " passed filter: " + str(_passed_filter) +
        " ({0}%)".format(seen_percent) +
        " hits " + str(_possible_hits + _fetch_pool_size) + '/' + str(_cache_hits) +
        " agrams: " + str(_hits) +
        " cachesize: " + str(_cache_size) +
        " buffer: " + str(_buffer) +
        " runtime: " + anagramfunctions.format_seconds(runtime)
    )
    sys.stdout.write(status + '\r')
    sys.stdout.flush()
예제 #5
0
    def sleep(self, interval, debug=False):
        print('base interval is %d' % (interval / 60))

        randfactor = random.randrange(0, interval)
        interval = interval * 0.5 + randfactor
        sleep_chunk = 10  # seconds

        print('sleeping for %d minutes' % (interval / 60))

        if not debug:
            while interval > 0:
                sleep_status = ' %s remaining \r' % (
                    anagramfunctions.format_seconds(interval))
                sys.stdout.write(sleep_status.rjust(35))
                sys.stdout.flush()
                time.sleep(sleep_chunk)
                interval -= sleep_chunk

            print('\n')

        else:
            return interval / 60