Ejemplo n.º 1
0
Archivo: gmm.py Proyecto: aronwc/quac
def test_interactive():
   import cProfile

   #prof = cProfile.Profile()
   #prof.enable()
   u.logging_init('inter', verbose_=True)
   test_error_metrics()
   test_interactive_real()
Ejemplo n.º 2
0
Archivo: gmm.py Proyecto: shikhach/quac
def test_interactive():
    import cProfile

    #prof = cProfile.Profile()
    #prof.enable()
    u.logging_init('inter', verbose_=True)
    test_error_metrics()
    test_interactive_real()
Ejemplo n.º 3
0
# Copyright © Los Alamos National Security, LLC, and others.

from pprint import pprint
import time

from bsddb3 import db as bdb
import numpy as np

import u

outer_ct = 5
inner_ct = 50000

u.logging_init('bdb', verbose_=True)
l = u.l

u.memory_use_log()

db = bdb.DB()
#db.set_flags(bdb.DB_TXN_NOT_DURABLE)
db.set_cachesize(0, 32*1024*1024)
db.set_pagesize(64*1024)
db.open('/data6/foo.db', dbtype=bdb.DB_BTREE, flags=(bdb.DB_CREATE))

start_out = time.time()
for j in range(outer_ct):
   start = time.time()
   for i in range(inner_ct):
      db.put(str(j * inner_ct + i).encode('UTF-8'),
             np.ones(720, dtype=np.int32))
   db.sync()
Ejemplo n.º 4
0
      dfs[freq].index = dfs[freq].index.to_period(freq)
      dfs[freq].rename(columns=lambda c: re.sub(r'\$norm$', '', c), inplace=True)
      # 2. Clean up any NANs. We interpolate anything in the middle and change
      # boundary NANs to zero. Note that the boundaries are fairly well
      # outside the study period, so that should have minimal effect.
      dfs[freq].interpolate(method='linear', axis=0, inplace=True)
      dfs[freq].fillna(0, inplace=True)
      # 3. Trim the DataFrames to the study period. This doesn't have any effect,
      # since we trim to each test later, but it saves memory.
      (dfs[freq], _) = dfs[freq].align(eg, axis=0, join='inner')
      assert (dfs[freq].index.equals(eg.index))
   # 4. Build a DataFrame for each disease. This duplicates some vectors, but
   # not enough to be a worry.
   vs = dict()
   for (ob, ts) in sorted(g.truth.items()):
      freq = ts.index.freq.name
      dist = args.distance
      vs[ob] = dfs[freq].select(lambda c: relevant_p(ob, c, dist), axis=1)
      l.info('  %-15s %3d articles' % (ob + ':', len(vs[ob].columns)))
   return vs


### Bootstrap ###

if (__name__ == '__main__'):
   args = u.parse_args(ap)
   args.in_ = getattr(args, 'in')  # foo.in is a syntax error
   u.configure(args.config)
   u.logging_init('expmt')
   main()
Ejemplo n.º 5
0
# Copyright © Los Alamos National Security, LLC, and others.

from pprint import pprint
import time

from bsddb3 import db as bdb
import numpy as np

import u

outer_ct = 5
inner_ct = 50000

u.logging_init('bdb', verbose_=True)
l = u.l

u.memory_use_log()

db = bdb.DB()
#db.set_flags(bdb.DB_TXN_NOT_DURABLE)
db.set_cachesize(0, 32 * 1024 * 1024)
db.set_pagesize(64 * 1024)
db.open('/data6/foo.db', dbtype=bdb.DB_BTREE, flags=(bdb.DB_CREATE))

start_out = time.time()
for j in range(outer_ct):
    start = time.time()
    for i in range(inner_ct):
        db.put(
            str(j * inner_ct + i).encode('UTF-8'), np.ones(720,
                                                           dtype=np.int32))
Ejemplo n.º 6
0
# Copyright © Los Alamos National Security, LLC, and others.

import os
from pprint import pprint
import time

import sqlite3
import numpy as np

import u

outer_ct = 10
inner_ct = 100000

u.logging_init('sl', verbose_=True)
l = u.l

#u.memory_use_log()

conn = sqlite3.connect('/data6/foo.db')
db = conn.cursor()
db.execute('PRAGMA cache_size = -1048576')
db.execute('PRAGMA encoding = "UTF-8"')
db.execute('PRAGMA journal_mode = OFF')
db.execute('PRAGMA page_size = 65536')
db.execute('PRAGMA synchronous = OFF')
db.execute('CREATE TABLE ts (namespace TEXT, name TEXT, total INT, data TEXT)')
db.execute('CREATE INDEX ts_idx ON ts (namespace, name)')

start_out = time.time()
for j in range(outer_ct):
Ejemplo n.º 7
0
        clf_times = []
        for size in sizes:
            clf_times.append('%.5f' % time_it(clf, tweets[:size]))
        l.info(clf.__module__[-15:] + '\t' + '\t'.join(clf_times))


def read_tsvs(filenames):
    tweets = []
    ct = 0
    for filename in filenames:
        reader = tweet.Reader(filename)
        for tw in reader:
            tweets.append(tw.text)
            ct += 1
            if ct > args.test_size:
                return tweets
    return tweets


### Bootstrap ###

try:
    args = u.parse_args(ap)
    u.logging_init('clsbmk')

    if (__name__ == '__main__'):
        main()

except testable.Unittests_Only_Exception:
    testable.register('')
Ejemplo n.º 8
0
        dfs[freq].rename(columns=lambda c: re.sub(r'\$norm$', '', c),
                         inplace=True)
        # 2. Clean up any NANs. We interpolate anything in the middle and change
        # boundary NANs to zero. Note that the boundaries are fairly well
        # outside the study period, so that should have minimal effect.
        dfs[freq].interpolate(method='linear', axis=0, inplace=True)
        dfs[freq].fillna(0, inplace=True)
        # 3. Trim the DataFrames to the study period. This doesn't have any effect,
        # since we trim to each test later, but it saves memory.
        (dfs[freq], _) = dfs[freq].align(eg, axis=0, join='inner')
        assert (dfs[freq].index.equals(eg.index))
    # 4. Build a DataFrame for each disease. This duplicates some vectors, but
    # not enough to be a worry.
    vs = dict()
    for (ob, ts) in sorted(g.truth.items()):
        freq = ts.index.freq.name
        dist = args.distance
        vs[ob] = dfs[freq].select(lambda c: relevant_p(ob, c, dist), axis=1)
        l.info('  %-15s %3d articles' % (ob + ':', len(vs[ob].columns)))
    return vs


### Bootstrap ###

if (__name__ == '__main__'):
    args = u.parse_args(ap)
    args.in_ = getattr(args, 'in')  # foo.in is a syntax error
    u.configure(args.config)
    u.logging_init('expmt')
    main()
Ejemplo n.º 9
0
      clf_times = []
      for size in sizes:
         clf_times.append('%.5f' % time_it(clf, tweets[:size]))
      l.info(clf.__module__[-15:] + '\t' + '\t'.join(clf_times))


def read_tsvs(filenames):
   tweets = []
   ct = 0
   for filename in filenames:
      reader = tweet.Reader(filename)
      for tw in reader:
          tweets.append(tw.text)
          ct += 1
          if ct > args.test_size:
              return tweets
   return tweets


### Bootstrap ###

try:
   args = u.parse_args(ap)
   u.logging_init('clsbmk')

   if (__name__ == '__main__'):
      main()

except testable.Unittests_Only_Exception:
   testable.register('')
Ejemplo n.º 10
0
# Copyright © Los Alamos National Security, LLC, and others.

import os
from pprint import pprint
import time

import sqlite3
import numpy as np

import u

outer_ct = 10
inner_ct = 100000

u.logging_init('sl', verbose_=True)
l = u.l

#u.memory_use_log()

conn = sqlite3.connect('/data6/foo.db')
db = conn.cursor()
db.execute('PRAGMA cache_size = -1048576')
db.execute('PRAGMA encoding = "UTF-8"')
db.execute('PRAGMA journal_mode = OFF')
db.execute('PRAGMA page_size = 65536')
db.execute('PRAGMA synchronous = OFF')
db.execute('CREATE TABLE ts (namespace TEXT, name TEXT, total INT, data TEXT)')
db.execute('CREATE INDEX ts_idx ON ts (namespace, name)')

start_out = time.time()
for j in range(outer_ct):