import us import pandas as pd import numpy as np from cytoolz.itertoolz import unique from cytoolz.functoolz import thread_last, identity from cytoolz.curried import map, filter, curry from survey_stats import pdutil # import sys # import traceback as tb from survey_stats import log logger = log.getLogger(__name__) US_STATES_FIPS_INTS = thread_last(us.STATES_AND_TERRITORIES, map(lambda x: x.fips), filter(lambda x: x is not None), map(lambda x: int(x)), list) SITECODE_TRANSLATORS = { 'fips': lambda x: (us.states.lookup('%.2d' % x).abbr if int(x) in US_STATES_FIPS_INTS else 'NA'), 'codes': identity } SVYDESIGN_COLS = ['sitecode', 'strata', 'psu', 'weight'] def convert_cat_codes(s, fmt):
import click import survey_stats from survey_stats.dbi import DatabaseConfig, DatabaseType import survey_stats.const as c from survey_stats import log logger = log.getLogger('cli') def resolve_db_args(db_host, db_port, db_type, db_user, db_password, db_name, db_config): db_type = DatabaseType(db_type) dbc = DatabaseConfig(host=db_host, port=db_port, type=DatabaseType(db_type), user=db_user, password=db_password, name=db_name) if db_config: dbc = DatabaseConfig.from_yaml(db_config) return dbc database_params = [ click.option('-c', '--cache-dir', type=c.CLICK_DIR_PATH, default=c.DEFAULT_CACHE_DIR, help='directory with data files default:cache'), click.option('-C', '--db-config', type=c.CLICK_FILE_PATH, help='database config yaml, takes priority'), click.option('-H', '--db-host', type=click.STRING, envvar='SVY_DBHOST', default='localhost', help='hostname/ip for database'), click.option('-P', '--db-port', type=c.CLICK_TCP_PORT, envvar='SVY_DBPORT', default=50000,
def __init__(self): self.logger = log.getLogger('statsworker.' + __name__)
from rpy2 import robjects as ro from survey_stats.helpr import svyciprop_xlogit, svybyci_xlogit, factor_summary from survey_stats.helpr import filter_survey_var, rm_nan_survey_var, svyby_nodrop from survey_stats.helpr import fix_lonely_psus from survey_stats import pdutil as u from survey_stats.const import DECIMALS from survey_stats import log import gc rbase = importr('base') rstats = importr('stats') rsvy = importr('survey') rfeather = importr('feather', on_conflict='warn') logger = log.getLogger() def dim_design(d): return pandas2ri.ri2py(rbase.dim(d[d.names.index('variables')])) def subset_survey(des, filt, qn=None): # filt is a dict with vars as keys and list of acceptable values as levels # example from R: # subset(dclus1, sch.wide=="Yes" & comp.imp=="Yes" if not len(filt.keys()) > 0: # empty filter, return original design object return des filtered = rbase.Reduce( "&", [filter_survey_var(des, k, v) for k, v in filt.items()] +