def estimate(dset,
             indvars,
             depvar_name='price',
             max_segment_size=15000,
             estimation_table='buildings',
             output_names=None,
             agents_groupby=[
                 'building_type_id',
             ]):

    output_csv, output_title, coeff_name, output_varname = output_names
    choosers = dset.fetch(estimation_table)

    segments = choosers.groupby(agents_groupby)
    num_segments = len(segments.size().index)
    if num_segments != len(indvars):
        print "ERROR: number of segments does not match number of sets of independent variable"

    indvar_dict = dict(zip(segments.size().index.values, range(num_segments)))

    for name, segment in segments:
        ind_vars = indvars[indvar_dict[name]]
        name = str(name)
        segment = segment[segment[depvar_name] > 0]
        if len(segment[depvar_name]) > max_segment_size:
            segment = segment.ix[np.random.choice(segment.index,
                                                  max_segment_size,
                                                  replace=False)]
        depvar = segment[depvar_name].apply(np.log)
        est_data = pd.DataFrame(index=segment.index)
        for varname in indvars:
            est_data[varname] = segment[varname]
        est_data = est_data.fillna(0)
        est_data = sm.add_constant(est_data, prepend=False)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name
        print "Estimating hedonic for %s with %d observations" % (
            name, len(segment.index))

        model = sm.OLS(depvar, est_data)
        results = model.fit()
        print results.summary()
        print ' '
        tmp_outcsv = output_csv % name
        tmp_outtitle = output_title % name
        misc.resultstocsv((results.rsquared, results.rsquared_adj),
                          est_data.columns,
                          zip(results.params, results.bse, results.tvalues),
                          tmp_outcsv,
                          hedonic=1,
                          tblname=output_title)

        dset.store_coeff(tmp_coeffname, results.params.values,
                         results.params.index)
Beispiel #2
0
def estimate(dset,config,year,show=True,variables=None):

  choosers = fetch_table(dset,config)
  if 'est_sample_size' in config: 
    choosers = choosers.ix[np.random.choice(choosers.index, config['est_sample_size'],replace=False)]
  output_csv, output_title, coeff_name, output_varname = config["output_names"]
 
  assert 'alternatives' in config
  alternatives = eval(config['alternatives'])
  alternatives = merge(dset,alternatives,config)

  t1 = time.time()

  segments = [(None,choosers)]
  if 'segment' in config:
    for varname in config['segment']:
      if varname not in choosers.columns:
        choosers[varname] = calcvar(choosers,config,dset,varname)
    segments = choosers.groupby(config['segment'])
  for name, segment in segments:

    name = str(name)
    if name is not None: tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
    else: tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv, output_title, coeff_name

    assert "dep_var" in config
    depvar = config["dep_var"]
    global SAMPLE_SIZE
    SAMPLE_SIZE = config["alt_sample_size"] if "alt_sample_size" in config else SAMPLE_SIZE 
    sample, alternative_sample, est_params = interaction.mnl_interaction_dataset(
                                        segment,alternatives,SAMPLE_SIZE,chosenalts=segment[depvar])

    print "Estimating parameters for segment = %s, size = %d" % (name, len(segment.index)) 

    data = spec(alternative_sample,config,submodel=name)
    if show: print data.describe()
    data = data.as_matrix()
    
    fnames = config['ind_vars']
    fnames = config['ind_var_names'] if 'ind_var_names' in config else fnames

    fit, results = interaction.estimate(data,est_params,SAMPLE_SIZE)
    
    fnames = interaction.add_fnames(fnames,est_params)
    if show: print misc.resultstotable(fnames,results)
    misc.resultstocsv(fit,fnames,results,tmp_outcsv,tblname=tmp_outtitle)
    dset.store_coeff(tmp_coeffname,zip(*results)[0],fnames)

  print "Finished executing in %f seconds" % (time.time()-t1)
Beispiel #3
0
def estimate(dset,config,year,show=True,variables=None):

  choosers = fetch_table(dset,config)
  if 'est_sample_size' in config: 
    choosers = choosers.ix[np.random.choice(choosers.index, config['est_sample_size'],replace=False)]
  output_csv, output_title, coeff_name, output_varname = config["output_names"]
 
  assert 'alternatives' in config
  alternatives = eval(config['alternatives'])
  alternatives = merge(dset,alternatives,config)

  t1 = time.time()

  segments = [(None,choosers)]
  if 'segment' in config:
    for varname in config['segment']:
      if varname not in choosers.columns:
        choosers[varname] = calcvar(choosers,config,dset,varname)
    segments = choosers.groupby(config['segment'])
  for name, segment in segments:
    name = str(name)
    if name is not None: tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
    else: tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv, output_title, coeff_name

    assert "dep_var" in config
    depvar = config["dep_var"]
    global SAMPLE_SIZE
    SAMPLE_SIZE = config["alt_sample_size"] if "alt_sample_size" in config else SAMPLE_SIZE 
    sample, alternative_sample, est_params = interaction.mnl_interaction_dataset(
                                        segment,alternatives,SAMPLE_SIZE,chosenalts=segment[depvar])

    print "Estimating parameters for segment = %s, size = %d" % (name, len(segment.index)) 

    data = spec(alternative_sample,config,submodel=name)
    if show: print data.describe()
    data = data.as_matrix()
    
    fnames = config['ind_vars']
    fnames = config['ind_var_names'] if 'ind_var_names' in config else fnames

    fit, results = interaction.estimate(data,est_params,SAMPLE_SIZE)
    
    fnames = interaction.add_fnames(fnames,est_params)
    if show: print misc.resultstotable(fnames,results)
    misc.resultstocsv(fit,fnames,results,tmp_outcsv,tblname=tmp_outtitle)
    dset.store_coeff(tmp_coeffname,zip(*results)[0],fnames)

  print "Finished executing in %f seconds" % (time.time()-t1)
def estimate (dset,indvars,depvar = 'building_id',alternatives=None,SAMPLE_SIZE=100,max_segment_size = 1200,estimation_table = 'households_for_estimation',
              output_names=None,agents_groupby = ['income_3_tenure',]):
    ##HCLM ESTIMATION
    output_csv, output_title, coeff_name, output_varname = output_names
    choosers = dset.fetch(estimation_table)

    segments = choosers.groupby(agents_groupby)
    num_segments = len(segments.size().index)
    if num_segments != len(indvars):
        print "ERROR: number of segments does not match number of sets of independent variable"
    indvar_dict = dict(zip(segments.size().index.values,range(num_segments)))
    alts = alternatives
    for name, segment in segments:
        ind_vars = indvars[indvar_dict[name]]
        name = str(name)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
        if len(segment[depvar]) > max_segment_size: #reduce size of segment if too big so things don't bog down
            segment = segment.ix[np.random.choice(segment.index, max_segment_size,replace=False)]
        #,weight_var='residential_units')
        sample, alternative_sample, est_params = interaction.mnl_interaction_dataset(segment,alts,SAMPLE_SIZE,chosenalts=segment[depvar])
        ##Interaction variables
        interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var]
        for ivar in interaction_vars:
            alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]])

        print "Estimating parameters for segment = %s, size = %d" % (name, len(segment.index)) 
        if len(segment.index) > 50:
            est_data = pd.DataFrame(index=alternative_sample.index)
            for varname in ind_vars:
                est_data[varname] = alternative_sample[varname]
            est_data = est_data.fillna(0)
            data = est_data.as_matrix()
            try:
                fit, results = interaction.estimate(data, est_params, SAMPLE_SIZE)
                fnames = interaction.add_fnames(ind_vars,est_params)
                print misc.resultstotable(fnames,results)
                misc.resultstocsv(fit,fnames,results,tmp_outcsv,tblname=tmp_outtitle)
                coefficients = zip(*results)[0]+(.0001,.0001,.0001,.0001,.0001,.0001,.0001,.0001,.0001,.0001,.0001,)
                varnames = fnames+['county8001','county8005','county8013','county8014','county8019','county8031','county8035','county8039','county8047','county8059','county8123']
                dset.store_coeff(tmp_coeffname,coefficients,varnames)
            except:
                print 'SINGULAR MATRIX OR OTHER DATA/ESTIMATION PROBLEM'
        else:
            print 'SAMPLE SIZE TOO SMALL'
def estimate (dset,indvars,depvar_name = 'price',max_segment_size = 15000,estimation_table = 'buildings',output_names=None,agents_groupby = ['building_type_id',]):

    output_csv, output_title, coeff_name, output_varname = output_names
    choosers = dset.fetch(estimation_table)
    
    segments = choosers.groupby(agents_groupby)
    num_segments = len(segments.size().index)
    if num_segments != len(indvars):
        print "ERROR: number of segments does not match number of sets of independent variable"
        
    indvar_dict = dict(zip(segments.size().index.values,range(num_segments)))
    
    for name, segment in segments:
        ind_vars = indvars[indvar_dict[name]]
        name = str(name)
        segment = segment[segment[depvar_name]>0]
        if len(segment[depvar_name]) > max_segment_size:
            segment = segment.ix[np.random.choice(segment.index, max_segment_size,replace=False)]
        depvar = segment[depvar_name].apply(np.log)
        est_data = pd.DataFrame(index=segment.index)
        for varname in indvars:
            est_data[varname] = segment[varname]
        est_data = est_data.fillna(0)
        est_data = sm.add_constant(est_data,prepend=False)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
        print "Estimating hedonic for %s with %d observations" % (name,len(segment.index))

        model = sm.OLS(depvar,est_data)
        results = model.fit()
        print results.summary()
        print ' '
        tmp_outcsv = output_csv%name
        tmp_outtitle = output_title%name
        misc.resultstocsv((results.rsquared,results.rsquared_adj),est_data.columns,
                            zip(results.params,results.bse,results.tvalues),tmp_outcsv,hedonic=1,
                            tblname=output_title)

        dset.store_coeff(tmp_coeffname,results.params.values,results.params.index)
Beispiel #6
0
def estimate(dset,config,year=None,show=True,simulate=0,variables=None):

  t1 = time.time()
  
  buildings = fetch_table(dset,config,simulate)

  buildings = merge(dset,buildings,config)

  assert "output_names" in config
  output_csv, output_title, coeff_name, output_varname = config["output_names"]

  print "Finished specifying in %f seconds" % (time.time()-t1)
  t1 = time.time()

  simrents = []
  segments = [(None,buildings)]
  if 'segment' in config: segments = buildings.groupby(config['segment'])
  
  for name, segment in segments:
    
    est_data = spec(segment,config,submodel=name,dset=dset)
    if name is not None: tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
    else: tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv, output_title, coeff_name

    if not simulate:

      assert "dep_var" in config
      depvar = segment[config["dep_var"]]
      if "dep_var_transform" in config: depvar = depvar.apply(eval(config['dep_var_transform']))
      
      if name: print "Estimating hedonic for %s with %d observations" % (name,len(segment.index))
      if show : print est_data.describe()

      model = sm.OLS(depvar,est_data)
      results = model.fit()
      if show: print results.summary()

      tmp_outcsv = output_csv if name is None else output_csv%name
      tmp_outtitle = output_title if name is None else output_title%name
      misc.resultstocsv((results.rsquared,results.rsquared_adj),est_data.columns,
                        zip(results.params,results.bse,results.tvalues),tmp_outcsv,hedonic=1,
                        tblname=output_title)

      dset.store_coeff(tmp_coeffname,results.params.values,results.params.index)

    else:

      print "Generating rents on %d buildings" % (est_data.shape[0])
    
      vec = dset.load_coeff(tmp_coeffname)
      vec = np.reshape(vec,(vec.size,1))
      rents = est_data.dot(vec).astype('f4')
      if "output_transform" in config: rents = rents.apply(eval(config['output_transform']))
   
      simrents.append(rents[rents.columns[0]])

  if simulate:
    simrents = pd.concat(simrents)
    dset.buildings[output_varname] = simrents.reindex(dset.buildings.index)
    dset.store_attr(output_varname,year,simrents)

  print "Finished executing in %f seconds" % (time.time()-t1)
Beispiel #7
0
def lcmnl_estimate(cmdata,numclasses,csdata,numalts,chosen,maxiter=MAXITER,emtol=EMTOL,\
                     skipprep=False,csbeta=None,cmbeta=None,csfnames=None,cmfnames=None):

    loglik = -999999
    l_0 = None
    if csbeta is None:
        csbeta = [np.random.rand(csdata.shape[1]) for i in range(numclasses)]
    if csfnames is None:
        csfnames = ['cs%d' % i for i in range(csdata.shape[1])]
    if cmfnames is None:
        cmfnames = ['cm%d' % i for i in range(cmdata.shape[1])]
    if not skipprep:
        cmdata, cmfnames = prep_cm_data(cmdata, numclasses, cmfnames)
    if cmbeta is None: cmbeta = np.random.rand(cmdata.shape[1]) * 10.0 - 5.0
    results_d = {}

    for i in range(maxiter):
        print "Running iteration %d" % (i + 1)
        print time.ctime()

        # EXPECTATION
        def expectation(cmbeta, csbeta):
            print "Running class membership model"
            cmprobs = mnl.mnl_simulate(cmdata,
                                       cmbeta,
                                       numclasses,
                                       GPU=GPU,
                                       returnprobs=1)

            csprobs = []
            for cno in range(numclasses):
                tmp = mnl.mnl_simulate(csdata,
                                       csbeta[cno],
                                       numalts,
                                       GPU=GPU,
                                       returnprobs=1)
                tmp = np.sum(tmp * chosen, axis=1)  # keep only chosen probs
                csprobs.append(np.reshape(tmp, (-1, 1)))
            csprobs = np.concatenate(csprobs, axis=1)

            h = csprobs * cmprobs
            loglik = np.sum(np.log(np.sum(h, axis=1)))
            wts = h / np.reshape(np.sum(h, axis=1), (-1, 1))
            return loglik, wts

        oldloglik = loglik
        loglik, wts = expectation(cmbeta, csbeta)
        if l_0 is None: l_0 = loglik
        print "current cmbeta", cmbeta
        print "current csbeta", csbeta
        print "current loglik", loglik, i + 1, "\n\n"
        if abs(loglik - oldloglik) < emtol: break

        # MAXIMIZATION

        for cno in range(numclasses):
            print "Estimating class specific model for class %d" % (cno + 1)
            t1 = time.time()
            weights = np.reshape(wts[:, cno], (-1, 1))
            fit, results = mnl.mnl_estimate(csdata,
                                            chosen,
                                            numalts,
                                            GPU=GPU,
                                            weights=weights,
                                            beta=csbeta[cno])
            print "Finished in %fs" % (time.time() - t1)
            csbeta[cno] = zip(*results)[0]
            results_d['cs%d' % cno] = results

        print "Estimating class membership model"
        t1 = time.time()
        fit, results = mnl.mnl_estimate(cmdata,None,numclasses,GPU=GPU,weights=wts,lcgrad=True, \
                                                 beta=cmbeta,coeffrange=(-1000,1000))
        print "Finished in %fs" % (time.time() - t1)
        cmbeta = zip(*results)[0]
        results_d['cm'] = results

    l_1 = loglik
    l_0, foo = expectation(np.zeros(len(cmbeta)),
                           [np.zeros(len(a)) for a in csbeta])
    ll_ratio = 1 - (l_1 / l_0)

    print "Null Log-liklihood: %f" % l_0
    print "Log-liklihood at convergence: %f" % l_1
    print "Log-liklihood ratio: %f" % ll_ratio

    a = []
    fnames = []
    fnames += cmfnames
    a += results_d['cm']
    for i in range(numclasses):
        fnames += ['%s cls%d' % (s, i) for s in csfnames]
        a += results_d['cs%d' % i]

    print misc.resultstotable(fnames, a)
    fit = (l_0, l_1, ll_ratio)
    misc.resultstocsv(fit,
                      fnames,
                      a,
                      "lc-coeff.csv",
                      tblname="Latent Class Model Coefficients")

    return (l_0, l_1, ll_ratio), results_d
Beispiel #8
0
def lcmnl_estimate(cmdata,numclasses,csdata,numalts,chosen,maxiter=MAXITER,emtol=EMTOL,\
                     skipprep=False,csbeta=None,cmbeta=None,csfnames=None,cmfnames=None):

  loglik = -999999
  l_0 = None
  if csbeta is None: csbeta = [np.random.rand(csdata.shape[1]) for i in range(numclasses)]
  if csfnames is None: csfnames = ['cs%d'%i for i in range(csdata.shape[1])]
  if cmfnames is None: cmfnames = ['cm%d'%i for i in range(cmdata.shape[1])]
  if not skipprep: cmdata,cmfnames = prep_cm_data(cmdata,numclasses,cmfnames)
  if cmbeta is None: cmbeta = np.random.rand(cmdata.shape[1])*10.0-5.0
  results_d = {}
  
  for i in range(maxiter):
    print "Running iteration %d" % (i+1)
    print time.ctime()

    # EXPECTATION
    def expectation(cmbeta,csbeta):
      print "Running class membership model"
      cmprobs = mnl.mnl_simulate(cmdata,cmbeta,numclasses,GPU=GPU,returnprobs=1)

      csprobs = []
      for cno in range(numclasses):
        tmp = mnl.mnl_simulate(csdata,csbeta[cno],numalts,GPU=GPU,returnprobs=1)
        tmp = np.sum(tmp*chosen,axis=1) # keep only chosen probs
        csprobs.append(np.reshape(tmp,(-1,1)))
      csprobs = np.concatenate(csprobs,axis=1)

      h = csprobs * cmprobs
      loglik = np.sum(np.log(np.sum(h,axis=1)))
      wts = h / np.reshape(np.sum(h,axis=1),(-1,1))
      return loglik, wts

    oldloglik = loglik
    loglik, wts = expectation(cmbeta,csbeta)
    if l_0 is None: l_0 = loglik
    print "current cmbeta", cmbeta
    print "current csbeta", csbeta
    print "current loglik", loglik, i+1, "\n\n"
    if abs(loglik-oldloglik) < emtol: break
   
    # MAXIMIZATION

    for cno in range(numclasses):
      print "Estimating class specific model for class %d" % (cno+1)
      t1 =  time.time()
      weights=np.reshape(wts[:,cno],(-1,1))
      fit, results  = mnl.mnl_estimate(csdata,chosen,numalts,GPU=GPU,weights=weights,beta=csbeta[cno])
      print "Finished in %fs" % (time.time()-t1)
      csbeta[cno] = zip(*results)[0]
      results_d['cs%d'%cno] = results
    
    print "Estimating class membership model"
    t1 =  time.time()
    fit, results = mnl.mnl_estimate(cmdata,None,numclasses,GPU=GPU,weights=wts,lcgrad=True, \
                                             beta=cmbeta,coeffrange=(-1000,1000))
    print "Finished in %fs" % (time.time()-t1)
    cmbeta = zip(*results)[0]
    results_d['cm'] = results 
 
  l_1 = loglik 
  l_0, foo = expectation(np.zeros(len(cmbeta)),[np.zeros(len(a)) for a in csbeta])
  ll_ratio = 1-(l_1/l_0)
  
  print "Null Log-liklihood: %f" % l_0
  print "Log-liklihood at convergence: %f" % l_1
  print "Log-liklihood ratio: %f" % ll_ratio

  a = []
  fnames = []
  fnames += cmfnames
  a += results_d['cm']
  for i in range(numclasses):
    fnames += ['%s cls%d'%(s,i) for s in csfnames]
    a += results_d['cs%d'%i]

  print misc.resultstotable(fnames,a)
  fit = (l_0,l_1,ll_ratio)
  misc.resultstocsv(fit,fnames,a,"lc-coeff.csv",tblname="Latent Class Model Coefficients")

  return (l_0,l_1,ll_ratio), results_d
def estimate(
    dset,
    indvars,
    depvar="building_id",
    alternatives=None,
    SAMPLE_SIZE=100,
    max_segment_size=1200,
    estimation_table="households_for_estimation",
    output_names=None,
    agents_groupby=["income_3_tenure"],
):
    ##HCLM ESTIMATION
    output_csv, output_title, coeff_name, output_varname = output_names
    choosers = dset.fetch(estimation_table)

    segments = choosers.groupby(agents_groupby)
    num_segments = len(segments.size().index)
    if num_segments != len(indvars):
        print "ERROR: number of segments does not match number of sets of independent variable"
    indvar_dict = dict(zip(segments.size().index.values, range(num_segments)))
    alts = alternatives
    for name, segment in segments:
        ind_vars = indvars[indvar_dict[name]]
        name = str(name)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name
        if len(segment[depvar]) > max_segment_size:  # reduce size of segment if too big so things don't bog down
            segment = segment.ix[np.random.choice(segment.index, max_segment_size, replace=False)]
        # ,weight_var='residential_units')
        sample, alternative_sample, est_params = interaction.mnl_interaction_dataset(
            segment, alts, SAMPLE_SIZE, chosenalts=segment[depvar]
        )
        ##Interaction variables
        interaction_vars = [(var, var.split("_x_")) for var in ind_vars if "_x_" in var]
        for ivar in interaction_vars:
            alternative_sample[ivar[0]] = (alternative_sample[ivar[1][0]]) * alternative_sample[ivar[1][1]]

        print "Estimating parameters for segment = %s, size = %d" % (name, len(segment.index))
        if len(segment.index) > 50:
            est_data = pd.DataFrame(index=alternative_sample.index)
            for varname in ind_vars:
                est_data[varname] = alternative_sample[varname]
            est_data = est_data.fillna(0)
            data = est_data.as_matrix()
            try:
                fit, results = interaction.estimate(data, est_params, SAMPLE_SIZE)
                fnames = interaction.add_fnames(ind_vars, est_params)
                print misc.resultstotable(fnames, results)
                misc.resultstocsv(fit, fnames, results, tmp_outcsv, tblname=tmp_outtitle)
                coefficients = zip(*results)[0] + (
                    0.0001,
                    0.0001,
                    0.0001,
                    0.0001,
                    0.0001,
                    0.0001,
                    0.0001,
                    0.0001,
                    0.0001,
                    0.0001,
                    0.0001,
                )
                varnames = fnames + [
                    "county8001",
                    "county8005",
                    "county8013",
                    "county8014",
                    "county8019",
                    "county8031",
                    "county8035",
                    "county8039",
                    "county8047",
                    "county8059",
                    "county8123",
                ]
                dset.store_coeff(tmp_coeffname, coefficients, varnames)
            except:
                print "SINGULAR MATRIX OR OTHER DATA/ESTIMATION PROBLEM"
        else:
            print "SAMPLE SIZE TOO SMALL"
Beispiel #10
0
def estimate(dset, config, year=None, show=True, simulate=0, variables=None):

    t1 = time.time()

    buildings = fetch_table(dset, config, simulate)

    buildings = merge(dset, buildings, config)

    assert "output_names" in config
    output_csv, output_title, coeff_name, output_varname = config[
        "output_names"]

    print "Finished specifying in %f seconds" % (time.time() - t1)
    t1 = time.time()

    simrents = []
    segments = [(None, buildings)]
    if 'segment' in config: segments = buildings.groupby(config['segment'])

    for name, segment in segments:

        est_data = spec(segment, config, submodel=name, dset=dset)
        if name is not None:
            tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name
        else:
            tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv, output_title, coeff_name

        if not simulate:

            assert "dep_var" in config
            depvar = segment[config["dep_var"]]
            if "dep_var_transform" in config:
                depvar = depvar.apply(eval(config['dep_var_transform']))

            if name:
                print "Estimating hedonic for %s with %d observations" % (
                    name, len(segment.index))
            if show: print est_data.describe()

            model = sm.OLS(depvar, est_data)
            results = model.fit()
            if show: print results.summary()

            tmp_outcsv = output_csv if name is None else output_csv % name
            tmp_outtitle = output_title if name is None else output_title % name
            misc.resultstocsv((results.rsquared, results.rsquared_adj),
                              est_data.columns,
                              zip(results.params, results.bse,
                                  results.tvalues),
                              tmp_outcsv,
                              hedonic=1,
                              tblname=output_title)

            dset.store_coeff(tmp_coeffname, results.params.values,
                             results.params.index)

        else:

            print "Generating rents on %d buildings" % (est_data.shape[0])

            vec = dset.load_coeff(tmp_coeffname)
            vec = np.reshape(vec, (vec.size, 1))
            rents = est_data.dot(vec).astype('f4')
            if "output_transform" in config:
                rents = rents.apply(eval(config['output_transform']))

            simrents.append(rents[rents.columns[0]])

    if simulate:
        simrents = pd.concat(simrents)
        dset.buildings[output_varname] = simrents.reindex(dset.buildings.index)
        dset.store_attr(output_varname, year, simrents)

    print "Finished executing in %f seconds" % (time.time() - t1)
Beispiel #11
0
    print segment.reset_index().building_id.describe()
    alts.index = alts.index.astype('int32')
    #sample, alternative_sample, est_params = interaction.mnl_interaction_dataset(segment,alts,SAMPLE_SIZE,chosenalts=segment[depvar],weight_var='non_residential_sqft')
    sample, alternative_sample, est_params = interaction.mnl_interaction_dataset(segment,alts,SAMPLE_SIZE,chosenalts=segment[depvar])
    #alternative_sample['paris_x_employees'] = (alternative_sample.in_paris*alternative_sample.employees)
    print "Estimating parameters for segment = %s, size = %d" % (name, len(segment.index)) 
    if len(segment.index) > 50:
        est_data = pd.DataFrame(index=alternative_sample.index)
        for varname in ind_vars:
            est_data[varname] = alternative_sample[varname]
        est_data = est_data.fillna(0)
        data = est_data.as_matrix()
        try:
            fit, results = interaction.estimate(data, est_params, SAMPLE_SIZE)
            #print fit
            #print results
            fnames = interaction.add_fnames(ind_vars,est_params)
            print misc.resultstotable(fnames,results)
            misc.resultstocsv(fit,fnames,results,tmp_outcsv,tblname=tmp_outtitle)
            dset.store_coeff(tmp_coeffname,zip(*results)[0],fnames)
        except:
            print 'SINGULAR MATRIX OR OTHER DATA/ESTIMATION PROBLEM'
    else:
        print 'SAMPLE SIZE TOO SMALL'
print dset.coeffs[('emp_location_6','coeffs')]
print dset.coeffs[('emp_location_6','coeffs')][0]
print dset.coeffs[('emp_location_6','coeffs')][1]
print dset.coeffs[('emp_location_6','coeffs')][2]
print dset.coeffs[('emp_location_6','coeffs')][3]
print dset.coeffs[('emp_location_6','coeffs')][4]
print dset.coeffs[('emp_location_6','coeffs')][5]