import us
import pandas as pd
import numpy as np
from cytoolz.itertoolz import unique
from cytoolz.functoolz import thread_last, identity
from cytoolz.curried import map, filter, curry
from survey_stats import pdutil
# import sys
# import traceback as tb

from survey_stats import log

logger = log.getLogger(__name__)

US_STATES_FIPS_INTS = thread_last(us.STATES_AND_TERRITORIES,
                                  map(lambda x: x.fips),
                                  filter(lambda x: x is not None),
                                  map(lambda x: int(x)), list)

SITECODE_TRANSLATORS = {
    'fips':
    lambda x: (us.states.lookup('%.2d' % x).abbr
               if int(x) in US_STATES_FIPS_INTS else 'NA'),
    'codes':
    identity
}

SVYDESIGN_COLS = ['sitecode', 'strata', 'psu', 'weight']


def convert_cat_codes(s, fmt):
Exemple #2
0
import click
import survey_stats
from survey_stats.dbi import DatabaseConfig, DatabaseType
import survey_stats.const as c
from survey_stats import log

logger = log.getLogger('cli')


def resolve_db_args(db_host, db_port, db_type,
                    db_user, db_password, db_name, db_config):
    db_type = DatabaseType(db_type)
    dbc = DatabaseConfig(host=db_host, port=db_port,
                         type=DatabaseType(db_type), user=db_user,
                         password=db_password, name=db_name)
    if db_config:
        dbc = DatabaseConfig.from_yaml(db_config)
    return dbc


database_params = [
    click.option('-c', '--cache-dir', type=c.CLICK_DIR_PATH,
                 default=c.DEFAULT_CACHE_DIR,
                 help='directory with data files default:cache'),
    click.option('-C', '--db-config', type=c.CLICK_FILE_PATH,
                 help='database config yaml, takes priority'),
    click.option('-H', '--db-host', type=click.STRING,
                 envvar='SVY_DBHOST', default='localhost',
                 help='hostname/ip for database'),
    click.option('-P', '--db-port', type=c.CLICK_TCP_PORT,
                 envvar='SVY_DBPORT', default=50000,
Exemple #3
0
 def __init__(self):
     self.logger = log.getLogger('statsworker.' + __name__)
Exemple #4
0
from rpy2 import robjects as ro
from survey_stats.helpr import svyciprop_xlogit, svybyci_xlogit, factor_summary
from survey_stats.helpr import filter_survey_var, rm_nan_survey_var, svyby_nodrop
from survey_stats.helpr import fix_lonely_psus
from survey_stats import pdutil as u
from survey_stats.const import DECIMALS
from survey_stats import log
import gc

rbase = importr('base')
rstats = importr('stats')
rsvy = importr('survey')

rfeather = importr('feather', on_conflict='warn')

logger = log.getLogger()


def dim_design(d):
    return pandas2ri.ri2py(rbase.dim(d[d.names.index('variables')]))

def subset_survey(des, filt, qn=None):
    # filt is a dict with vars as keys and list of acceptable values as levels
    # example from R:
    #  subset(dclus1, sch.wide=="Yes" & comp.imp=="Yes"
    if not len(filt.keys()) > 0:
        # empty filter, return original design object
        return des
    filtered = rbase.Reduce(
        "&",
        [filter_survey_var(des, k, v) for k, v in filt.items()] +