def test_interactive(): import cProfile #prof = cProfile.Profile() #prof.enable() u.logging_init('inter', verbose_=True) test_error_metrics() test_interactive_real()
# Copyright © Los Alamos National Security, LLC, and others. from pprint import pprint import time from bsddb3 import db as bdb import numpy as np import u outer_ct = 5 inner_ct = 50000 u.logging_init('bdb', verbose_=True) l = u.l u.memory_use_log() db = bdb.DB() #db.set_flags(bdb.DB_TXN_NOT_DURABLE) db.set_cachesize(0, 32*1024*1024) db.set_pagesize(64*1024) db.open('/data6/foo.db', dbtype=bdb.DB_BTREE, flags=(bdb.DB_CREATE)) start_out = time.time() for j in range(outer_ct): start = time.time() for i in range(inner_ct): db.put(str(j * inner_ct + i).encode('UTF-8'), np.ones(720, dtype=np.int32)) db.sync()
dfs[freq].index = dfs[freq].index.to_period(freq) dfs[freq].rename(columns=lambda c: re.sub(r'\$norm$', '', c), inplace=True) # 2. Clean up any NANs. We interpolate anything in the middle and change # boundary NANs to zero. Note that the boundaries are fairly well # outside the study period, so that should have minimal effect. dfs[freq].interpolate(method='linear', axis=0, inplace=True) dfs[freq].fillna(0, inplace=True) # 3. Trim the DataFrames to the study period. This doesn't have any effect, # since we trim to each test later, but it saves memory. (dfs[freq], _) = dfs[freq].align(eg, axis=0, join='inner') assert (dfs[freq].index.equals(eg.index)) # 4. Build a DataFrame for each disease. This duplicates some vectors, but # not enough to be a worry. vs = dict() for (ob, ts) in sorted(g.truth.items()): freq = ts.index.freq.name dist = args.distance vs[ob] = dfs[freq].select(lambda c: relevant_p(ob, c, dist), axis=1) l.info(' %-15s %3d articles' % (ob + ':', len(vs[ob].columns))) return vs ### Bootstrap ### if (__name__ == '__main__'): args = u.parse_args(ap) args.in_ = getattr(args, 'in') # foo.in is a syntax error u.configure(args.config) u.logging_init('expmt') main()
# Copyright © Los Alamos National Security, LLC, and others. from pprint import pprint import time from bsddb3 import db as bdb import numpy as np import u outer_ct = 5 inner_ct = 50000 u.logging_init('bdb', verbose_=True) l = u.l u.memory_use_log() db = bdb.DB() #db.set_flags(bdb.DB_TXN_NOT_DURABLE) db.set_cachesize(0, 32 * 1024 * 1024) db.set_pagesize(64 * 1024) db.open('/data6/foo.db', dbtype=bdb.DB_BTREE, flags=(bdb.DB_CREATE)) start_out = time.time() for j in range(outer_ct): start = time.time() for i in range(inner_ct): db.put( str(j * inner_ct + i).encode('UTF-8'), np.ones(720, dtype=np.int32))
# Copyright © Los Alamos National Security, LLC, and others. import os from pprint import pprint import time import sqlite3 import numpy as np import u outer_ct = 10 inner_ct = 100000 u.logging_init('sl', verbose_=True) l = u.l #u.memory_use_log() conn = sqlite3.connect('/data6/foo.db') db = conn.cursor() db.execute('PRAGMA cache_size = -1048576') db.execute('PRAGMA encoding = "UTF-8"') db.execute('PRAGMA journal_mode = OFF') db.execute('PRAGMA page_size = 65536') db.execute('PRAGMA synchronous = OFF') db.execute('CREATE TABLE ts (namespace TEXT, name TEXT, total INT, data TEXT)') db.execute('CREATE INDEX ts_idx ON ts (namespace, name)') start_out = time.time() for j in range(outer_ct):
clf_times = [] for size in sizes: clf_times.append('%.5f' % time_it(clf, tweets[:size])) l.info(clf.__module__[-15:] + '\t' + '\t'.join(clf_times)) def read_tsvs(filenames): tweets = [] ct = 0 for filename in filenames: reader = tweet.Reader(filename) for tw in reader: tweets.append(tw.text) ct += 1 if ct > args.test_size: return tweets return tweets ### Bootstrap ### try: args = u.parse_args(ap) u.logging_init('clsbmk') if (__name__ == '__main__'): main() except testable.Unittests_Only_Exception: testable.register('')
dfs[freq].rename(columns=lambda c: re.sub(r'\$norm$', '', c), inplace=True) # 2. Clean up any NANs. We interpolate anything in the middle and change # boundary NANs to zero. Note that the boundaries are fairly well # outside the study period, so that should have minimal effect. dfs[freq].interpolate(method='linear', axis=0, inplace=True) dfs[freq].fillna(0, inplace=True) # 3. Trim the DataFrames to the study period. This doesn't have any effect, # since we trim to each test later, but it saves memory. (dfs[freq], _) = dfs[freq].align(eg, axis=0, join='inner') assert (dfs[freq].index.equals(eg.index)) # 4. Build a DataFrame for each disease. This duplicates some vectors, but # not enough to be a worry. vs = dict() for (ob, ts) in sorted(g.truth.items()): freq = ts.index.freq.name dist = args.distance vs[ob] = dfs[freq].select(lambda c: relevant_p(ob, c, dist), axis=1) l.info(' %-15s %3d articles' % (ob + ':', len(vs[ob].columns))) return vs ### Bootstrap ### if (__name__ == '__main__'): args = u.parse_args(ap) args.in_ = getattr(args, 'in') # foo.in is a syntax error u.configure(args.config) u.logging_init('expmt') main()