예제 #1
0
                if j.is_dir():
                    yield (j, freq)

def acc(args):
    (path, freq) = args
    (observation, prediction) = [ int(x) for x in path.parts[-2:] ]

    logger.getlogger().info('o: {0} p: {1}'.format(observation, prediction))
    
    data = [ pd.read_pickle(str(x)) for x in path.glob('*.pkl') ]
    df = pd.concat(data, axis=1)
    df = df.resample(freq).sum().mean()

    return (observation, prediction, df)

args = cli.CommandLine(cli.optsfile('characterisation-plot')).args
top_level = Path(args.source)
target = Path(args.target)
target.mkdir(parents=True, exist_ok=True)

freqs = args.freqs if args.freqs else [ 'D' ] # XXX defaults?

names = [ 'observation', 'prediction' ]
log = logger.getlogger(True)

for fq in freqs:
    log.info('collect {0}'.format(fq))

    with Pool(cpu_count() // 2, maxtasksperchild=1) as pool:
        f = pool.imap_unordered
        d = { tuple(i): j.values for (*i, j) in f(acc, mkargs(top_level, fq)) }
예제 #2
0
    # items = zip(range(oneday), [ [] ] * oneday)
    totals = OrderedDict()
    for i in range(oneday):
        totals[i] = [0]

    for i in df.index:
        key = cp.bucket(i)
        totals[key].append(df.ix[i])

    vals = [agg(x) for x in totals.values()]
    vals.append(nid)  # this is important

    return vals


cargs = cli.CommandLine(cli.optsfile('chgpt'))
args = cargs.args

oneday = round(constant.day / constant.minute)
window = nd.Window(args.window_obs, args.window_pred, args.window_trgt)

if args.resume:
    with open(args.resume, mode='rb') as fp:
        observations = pickle.load(fp)
    (measurements, nodes) = data.cleanse(observations)
else:
    db.genop(args.reporting)
    opts = [window, oneday, args.threshold, np.mean]
    with Pool() as pool:
        observations = pool.starmap(f, nd.nodegen(opts))
        observations = list(filter(None, observations))
예제 #3
0
from lib import db
from lib import cli
from configparser import ConfigParser

cargs = cli.CommandLine(cli.optsfile("prediction"))  # /etc/opts/prediction
args = cargs.args

config = ConfigParser()
config.read(args.config)  # --config

dbinfo = config["database"] if "database" in config else None
db.EstablishCredentials(**dbinfo)

db.genop(int(config["parameters"]["intra-reporting"]))
예제 #4
0
            if all([x in row for x in tbl]):
                self.data.append(row)


class Ireland(GetRemoteXML):
    def __init__(self, url, retries, timeout):
        super().__init__(url, retries, timeout, None, None)


processors = {
    'nyc': NYC,
    'mass': Massachusetts,
    'ie': Ireland,
}
cargs = cli.CommandLine(cli.optsfile('storage'))
args = cargs.args

handler = processors[args.source]
try:
    data = handler(args.url, args.retries, args.timeout)
    data.parse(args.table, args.root)
    data.to_file(args.output)
    # data.check(args.output)
except AttributeError as err:
    log = logger.getlogger()
    log.critical(err)
except AssertionError:
    (*_, tb) = sys.exc_info()
    (*_, tb_info) = map(list, traceback.extract_tb(tb))
예제 #5
0
def f(*args):
    (_, node, cargs) = args
    log = Logger().log

    log.info('{0}: setup +'.format(node))
    with DatabaseConnection() as conn:
        source = nd.Node(node, conn)
        neighbors = [ nd.Node(x, conn) for x in source.neighbors ]
    log.info('{0}: setup -'.format(node))
    
    classes = [ WindowInfluence ] # [ MinuteInfluence, WindowInfluence ]
    
    return [ i(source, neighbors, cargs).run() for i in classes ]

with Pool() as pool:
    cargs = cli.CommandLine(cli.optsfile('main'))
    
    results = pool.starmap(f, nd.nodegen(cargs.args))
    with NamedTemporaryFile(mode='wb', delete=False) as fp:
        pickle.dump(results, fp)
        msg = 'pickle: {0}'.format(fp.name)
        Logger().log.error(msg)

# with open('/tmp/tmpe2x8wi0d', mode='rb') as fp:
#     results = pickle.load(fp)
    
header = [
    'type',
    'source',
    'target',
    'pearson',
예제 #6
0
                        except ValueError:
                            break

            if all([ x in row for x in tbl ]):
                self.data.append(row)

class Ireland(GetRemoteXML):
    def __init__(self, url, retries, timeout):
        super().__init__(url, retries, timeout, None, None)
            
processors = {
    'nyc': NYC,
    'mass': Massachusetts,
    'ie': Ireland,
}
cargs = cli.CommandLine(cli.optsfile('storage'))
args = cargs.args

handler = processors[args.source]
try:
    data = handler(args.url, args.retries, args.timeout)
    data.parse(args.table, args.root)
    data.to_file(args.output)
    # data.check(args.output)
except AttributeError as err:
    log = logger.getlogger()
    log.critical(err)
except AssertionError:
    (*_, tb) = sys.exc_info()
    (*_, tb_info) = map(list, traceback.extract_tb(tb))
    
예제 #7
0
        mkplot(node, 'ols', res, cargs.output)
    except (LinAlgError, ValueError) as err:
        log.error('{0}: {1},{2}'.format(err, endog.shape, exog.shape))


def var_(*args):
    (_, nid, cargs) = args

    node = nd.Cluster(nid)
    log.info('var: {0}'.format(str(node)))
    endog = node.readings.dropna()
    if not endog.empty and cargs.lags:
        maxlags = max(cargs.lags)
        try:
            res = vm.VAR(endog=endog).fit(maxlags=maxlags)
            mkplot(node, 'var', res, cargs.output, maxlags)
        except (LinAlgError, ValueError) as err:
            log.error(err)


# Fit = namedtuple('Fit', [ 'node', 'model', 'lags' ])
with Pool() as pool:
    cargs = cli.CommandLine(cli.optsfile('regression'))

    for i in [var_, ols_]:
        results = pool.starmap(i, nd.nodegen(cargs.args))
        # fname = os.path.join(cargs.args.output, i.__name__, '.pkl')
        # with open(fname, mode='wb') as fp:
        #     r = list(filter(None, results))
        #     pickle.dump(r, fp)
예제 #8
0
    
    # items = zip(range(oneday), [ [] ] * oneday)
    totals = OrderedDict()
    for i in range(oneday):
        totals[i] = [ 0 ]
        
    for i in df.index:
        key = cp.bucket(i)
        totals[key].append(df.ix[i])
        
    vals = [ agg(x) for x in totals.values() ]
    vals.append(nid) # this is important
        
    return vals

cargs = cli.CommandLine(cli.optsfile('chgpt'))
args = cargs.args

oneday = round(constant.day / constant.minute)
window = nd.Window(args.window_obs, args.window_pred, args.window_trgt)

if args.resume:
    with open(args.resume, mode='rb') as fp:
        observations = pickle.load(fp)
    (measurements, nodes) = data.cleanse(observations)
else:
    db.genop(args.reporting)
    opts = [ window, oneday, args.threshold, np.mean ]
    with Pool() as pool:
        observations = pool.starmap(f, nd.nodegen(opts))
        observations = list(filter(None, observations))
예제 #9
0
            N = k * len(groups) # between Group degrees of freedom

            msg = '{0:2d} {1:2d} '.format(i, len(groups))
            fmt = 'F({2}, {3}) = {0:6.3f}, p = {1:.3f} {4} '
            for j in (stats.f_oneway, stats.kruskal):
                (v, p) = j(*samples)
                msg += fmt.format(v, p, k - 1, N - k, issig(p))
            print(msg)

plotargs = {
    # keys must be valid --display options
    'presentation': PlotArgs((120, 20), 36),
    'paper': PlotArgs((7, 3), 10),
    }
            
user = cli.CommandLine(cli.optsfile('prediction-plot'))
if not user.args.gfilter:
    user.args.gfilter = []

raw = pd.DataFrame.from_csv(user.args.data, sep=';', index_col=None)
assert(all([ x in raw.columns for x in user.args.gfilter]))
raw = raw.loc[raw['confusion_matrix'] != np.nan]

grouped = raw.groupby(user.args.gfilter + ['node'])[user.args.metric]
df = grouped.agg([ np.mean, stats.sem ]).unstack(0)

if user.args.gfilter:
    args = {
        'kind': 'bar',
        'yerr': df['sem'],
        'ylim': (0, 1),
예제 #10
0
        values = model.predict(model.classify())
    except ValueError as v:
        log.error(v)

    return Results(keys, values)


#
# Setup
#

log = logger.getlogger(True)
log.info('phase 1')
log.info('db version: {0}'.format(db.mark()))

cargs = cli.CommandLine(cli.optsfile('prediction'))  # /etc/opts/prediction

config = ConfigParser()
config.read(cargs.args.config)  # --config

params = config['parameters']
writer = ResultsWriter(config['output'].getboolean('print-header'))

# Establish the database credentials. Passing None uses the
# defaults.
dbinfo = config['database'] if 'database' in config else None
db.EstablishCredentials(**dbinfo)

#
# Processing
#
예제 #11
0
#

import itertools
import configparser

from lib import db
from lib import cli
from lib import node
from tempfile import NamedTemporaryFile

# http://stackoverflow.com/a/5228294
def product(d):
    for i in itertools.product(*d.values()):
        yield dict(zip(d, i))

cargs = cli.CommandLine(cli.optsfile('config')) # /etc/opts/config
args = cargs.args
tmpargs = {
    'mode': 'w',
    'delete': False,
    'dir': args.output,
    'prefix': '', # the default (None) is actually 'tmp'
    'suffix': '.ini',
}

#
# Options that can be simultaneous during a single run
#
machines = [
    # svm
    # bayes
예제 #12
0
    try:
        res = sm.OLS(endog=endog, exog=exog, missing='drop').fit()
        mkplot(node, 'ols', res, cargs.output)
    except (LinAlgError, ValueError) as err:
        log.error('{0}: {1},{2}'.format(err, endog.shape, exog.shape))

def var_(*args):
    (_, nid, cargs) = args
    
    node = nd.Cluster(nid)
    log.info('var: {0}'.format(str(node)))
    endog = node.readings.dropna()
    if not endog.empty and cargs.lags:
        maxlags = max(cargs.lags)
        try:
            res = vm.VAR(endog=endog).fit(maxlags=maxlags)
            mkplot(node, 'var', res, cargs.output, maxlags)
        except (LinAlgError, ValueError) as err:
            log.error(err)

# Fit = namedtuple('Fit', [ 'node', 'model', 'lags' ])
with Pool() as pool:
    cargs = cli.CommandLine(cli.optsfile('regression'))

    for i in [ var_, ols_ ]:        
        results = pool.starmap(i, nd.nodegen(cargs.args))
        # fname = os.path.join(cargs.args.output, i.__name__, '.pkl')
        # with open(fname, mode='wb') as fp:
        #     r = list(filter(None, results))
        #     pickle.dump(r, fp)
예제 #13
0
import itertools
import configparser

from lib import db
from lib import cli
from lib import node
from tempfile import NamedTemporaryFile


# http://stackoverflow.com/a/5228294
def product(d):
    for i in itertools.product(*d.values()):
        yield dict(zip(d, i))


cargs = cli.CommandLine(cli.optsfile('config'))  # /etc/opts/config
args = cargs.args
tmpargs = {
    'mode': 'w',
    'delete': False,
    'dir': args.output,
    'prefix': '',  # the default (None) is actually 'tmp'
    'suffix': '.ini',
}

#
# Options that can be simultaneous during a single run
#
machines = [
    # svm
    # bayes