Esempio n. 1
0
from cPickle import load

import pandas as pd

from gmm_with_weighting import gen_params
#-----------------------------------------------------------------------------
# Data IO
base = '/Volumes/HDD/Users/tom/DataStorage/Comext/yearly/'

gmm = pd.HDFStore(base + 'gmm_store.h5')
gmm_results = pd.HDFStore(base + 'gmm_results.h5')

with open(base + 'declarants_no_002_dict.pkl', 'r') as declarants:
    country_code = load(declarants)

ctry = '002'
df = gmm.select('by_ctry_' + ctry)

sub = df[:1000]
sub_p = sub.dropna().groupby(level='PRODUCT_NC')

#-----------------------------------------------------------------------------
# Python version
res = {name: gen_params(group, x0=[2, 1]) for name, group
       in sub_p}
# %timeit {name: gen_params(group, x0=[2, 1]) for name, group in sub_p}
# 1 loops, best of 3: 1.98 s per loop

#-----------------------------------------------------------------------------
# Cythonized
Esempio n. 2
0
    declarants = sorted(country_code.keys())
#-----------------------------------------------------------------------------
# Main loop.  Optimize to get params for each good, for each declarant.
    ctry = '001'
    try:
        df = gmm.select('ctry_' + ctry)
        df = df.dropna()
        df = df[~(df == np.inf)]
        by_product = df.groupby(level='good')
    except KeyError, AssertionError:
        with open('gmm_logging.txt', 'a') as f:
            f.write('Failed to open or group ctry: {}'.format(ctry))
        pass
    #---------------------------------------------------------------------
    # GMM Estimation.
    # Without Weighting
    # res = {name: gen_params(group, [2, 1], name, country=ctry, W=None)
    #        for name, group in by_product}
    #---------------------------------------------------------------------

    g = by_product.groups.iteritems()
    l1 = df.ix[g.next()[1]]
    l2 = df.ix[g.next()[1]]
    l3 = df.ix[g.next()[1]]
    l4 = df.ix[g.next()[1]]
    l5 = df.ix[g.next()[1]]

    test = pd.concat([l1, l2, l3, l4, l5])
    gr = test.groupby(level='good')
    res = {name: gen_params(group, [2, 1], name, country=ctry, W=None, options={'disp': True}) for name, group in gr}