Python preprocessの例

プログラミング言語: Python

名前空間/パッケージ名: proposed_algos

メソッド/関数: preprocess

hotexamples.comのコード掲載数: 3

Python preprocess - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのproposed_algos.preprocessの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: all_algos_test.py プロジェクト: mArtukhov/scalable-data-driven-assortment-planning

def run_real_ast_experiment(flag_savedata=True,genMethod='synthetic', nEst=20,nCand=80):

  #parameters required
  np.random.seed(1000)
  price_range = 1000      #denotes highest possible price of a product
  eps         = 1       #tolerance
  N           = 1 #   #number of times Monte Carlo simulation will run
  if genMethod =='synthetic':  
    real_data_list = [
        {'fname':'freq_itemset_data/retail0p0001_240852_txns88162.csv','isCSV':True,'min_ast_length':3},
        {'fname':'freq_itemset_data/foodmartFIM0p0001_233231_txns4141.csv','isCSV':True,'min_ast_length':4},
        {'fname':'freq_itemset_data/chains0p00001_txns1112949.txt','isCSV':False,'min_ast_length':5},
        {'fname':'freq_itemset_data/OnlineRetail0p000001_txns540455.txt','isCSV':False,'min_ast_length':3}]
  if genMethod == 'tafeng':  
     real_data_list = [{'fname':'freq_itemset_data/tafeng_final_0p00001_txns119390.txt','isCSV':False,'min_ast_length':8}]
  algos       = collections.OrderedDict({'Linear-Search':genAst_oracle,'Assort-LSH-G':genAst_AssortLSH,'Assort-Exact-G':genAst_AssortExact, 'Assort-BZ-G':genAst_AssortBZ})
  benchmark   = 'Linear-Search'
  loggs = get_log_dict(real_data_list,N,algos,price_range,eps) #hack
  loggs['additional']['real_data_list'] = real_data_list



  badError = 0
  t1= time.time()
  for i,real_data in enumerate(real_data_list):
      
    t0 = time.time()
    t = 0
    while(t<N):

      print 'Iteration number is ', str(t+1),' of ',N,', for real ast data ',real_data['fname']

      #generating the price
      meta = {'eps':eps}
    
    
      if genMethod =='synthetic':
          p,v,feasibles,C,prod = generate_instance_general(price_range,None,'synthetic',t,lenFeas=None,real_data=real_data)
      else:      
          p,v,feasibles,C,prod = generate_instance_general(price_range,None,'tafeng',t,lenFeas=None,real_data=real_data)  
      #
      loggs['additional']['C'][i,t] = C
      meta['feasibles'] = feasibles
      

      #preprocessing for proposed algos
      if 'Assort-Exact-G' in algos:
        meta['db_exact'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_exact',feasibles=feasibles)
      if 'Assort-LSH-G' in algos:
        meta['db_LSH'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_LSH', nEst=nEst,nCand=nCand,feasibles=feasibles)#Hardcoded values
      if 'Assort-BZ-G' in algos:
        meta['db_BZ'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_BZ', nEst=nEst,nCand=nCand,feasibles=feasibles)#Hardcoded values  

      #run algos
      maxSetBenchmark = None
      for algoname in algos:
        print '\tExecuting ',algoname
        loggs[algoname]['rev'][i,t],loggs[algoname]['maxSet'][(i,t)],loggs[algoname]['time'][i,t] = algos[algoname](prod,C,p,v,meta)
        print '\t\tTime taken is ',loggs[algoname]['time'][i,t],'sec.'

        if algoname==benchmark:
          maxSetBenchmark = copy.deepcopy(loggs[algoname]['maxSet'][(i,t)])

      loggs,badError = compute_overlap_stats(benchmark,algos,loggs,i,t,badError,maxSetBenchmark,eps)

      t = t+1    

    
    print 'Experiments (',N,' sims) for real ast data ',real_data['fname'], ' is done.'  
    print 'Cumulative time taken is', time.time() - t0,'\n'   
    loggs = compute_summary_stats(algos,loggs,benchmark,i)

    #dump it incrementally for each product size
    if flag_savedata == True:
      pickle.dump(loggs,open('./output/gen_loggs_real_ast_upto'+str(i)+'_nCand_'+str(nCand)+'_nEst_'+str(nEst)+'_'+datetime.datetime.now().strftime("%Y%m%d_%I%M%p")+'.pkl','wb'))

  print '\nAll experiments done. Total time taken is', time.time()  - t1,'\n\n'
  print "Summary:"
  for algoname in algos:
    print '\t',algoname,'time_mean',loggs[algoname]['time_mean']
    print '\t',algoname,'revPctErr_mean',loggs[algoname]['revPctErr_mean']

  return loggs

コード例 #2

ファイルを表示

ファイル: all_algos_test.py プロジェクト: mArtukhov/scalable-data-driven-assortment-planning

def run_prod_experiment_static_mnl(flag_capacitated=True,flag_savedata=True,genMethod='synthetic'):

  #parameters required
  random.seed(10)
  np.random.seed(1000)
  price_range = 1000      #denotes highest possible price of a product
  eps         = 0.1       #tolerance
  N           = 50 #   #number of times Monte Carlo simulation will run
  if flag_capacitated == True:
    C           = 100        #capacity of assortment
    if genMethod=='synthetic':
      prodList    = [100, 250, 500, 1000] #[100,200,300] #
    else:
      prodList    = [100, 250, 500, 1000]
    algos = collections.OrderedDict({'Static-MNL':capAst_static_mnl, 'LP':capAst_LP}) #'LP':capAst_LP
    benchmark = 'LP'
    loggs = get_log_dict(prodList,N,algos,price_range,eps,C)

  else:
    prodList    = [100,200,400,800,1600]
    algos       = collections.OrderedDict({'Linear-Search':genAst_oracle,'Assort-Exact-G':genAst_AssortExact,'Assort-LSH-G':genAst_AssortLSH})
    benchmark   = 'Linear-Search'
    loggs = get_log_dict(prodList,N,algos,price_range,eps)
    loggs['additional']['lenFeasibles'] = np.zeros(len(prodList))


  badError = 0
  t1= time.time()
  for i,prod in enumerate(prodList):
      
    t0 = time.time()
    t = 0
    while(t<N):

      print 'Iteration number is ', str(t+1),' of ',N,', for prod size ',prod

      #generating the price
      meta = {'eps':eps}
      if flag_capacitated == True:
          if genMethod == 'tafeng':  
              file_1 = open("products_final.pkl",'rb')
              product_choices = pickle.load(file_1)
              choices = random.sample(product_choices,prod)  
              p,v = get_real_prices_parameters_by_product(choices)
          else:  
              p,v = generate_instance(price_range,prod,genMethod,t)
      else:
        p,v,feasibles,C,prod = generate_instance_general(price_range,prod,genMethod,t)
        loggs['additional']['C'][i,t] = C
        meta['feasibles'] = feasibles

      #preprocessing for proposed algos
      if 'Assort-Exact' in algos:
        meta['db_exact'],_,meta['normConst'] = preprocess(prod, C, p, 'special_case_exact')
      if 'Assort-LSH' in algos:
        meta['db_LSH'],_,_ = preprocess(prod, C, p, 'special_case_LSH', nEst=20,nCand=80)#Hardcoded values
      if 'Assort-Exact-G' in algos:
        meta['db_exact'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_exact',feasibles=feasibles)
      if 'Assort-LSH-G' in algos:
        meta['db_LSH'],_,_ = preprocess(prod, C, p, 'general_case_LSH', nEst=20,nCand=80,feasibles=feasibles)#Hardcoded values



      #run algos
      maxSetBenchmark = None
      for algoname in algos:
        print '\tExecuting ',algoname
        loggs[algoname]['rev'][i,t],loggs[algoname]['maxSet'][(i,t)],loggs[algoname]['time'][i,t] = algos[algoname](prod,C,p,v,meta)
        print '\t\tTime taken is ',loggs[algoname]['time'][i,t],'sec.'

        if algoname==benchmark:
          maxSetBenchmark = copy.deepcopy(loggs[algoname]['maxSet'][(i,t)])

      loggs,badError = compute_overlap_stats(benchmark,algos,loggs,i,t,badError,maxSetBenchmark,eps)

      t = t+1    
      
    print 'Experiments (',N,' sims) for number of products ',prod, ' is done.'  
    print 'Cumulative time taken is', time.time() - t0,'\n'   
    loggs = compute_summary_stats(algos,loggs,benchmark,i)
    if flag_capacitated != True:
      loggs['additional']['lenFeasibles'][i] = len(feasibles)

    #dump it incrementally for each product size
    if flag_savedata == True:
      if flag_capacitated == True:
        pickle.dump(loggs,open('./output/cap_loggs_'+genMethod+'_prod_'+str(prod)+'_'+datetime.datetime.now().strftime("%Y%m%d_%I%M%p")+'.pkl','wb'))
      else:
        pickle.dump(loggs,open('./output/gen_loggs_'+genMethod+'_prod_'+str(prod)+'_'+datetime.datetime.now().strftime("%Y%m%d_%I%M%p")+'.pkl','wb'))

  print '\nAll experiments done. Total time taken is', time.time()  - t1,'\n\n'
  print "Summary:"
  for algoname in algos:
    print '\t',algoname,'time_mean',loggs[algoname]['time_mean']
    print '\t',algoname,'revPctErr_mean',loggs[algoname]['revPctErr_mean']

  return loggs

コード例 #3

ファイルを表示

ファイル: all_algos_test.py プロジェクト: mArtukhov/scalable-data-driven-assortment-planning

def run_lenFeas_experiment(flag_savedata=True,genMethod='synthetic',nEst=20,nCand=80):

  #parameters required
  random.seed(10)
  np.random.seed(1000)
  price_range = 1000      #denotes highest possible price of a product
  eps         = 1       #tolerance
  N           = 50 #   #number of times Monte Carlo simulation will run
  prod        = 1000
  lenFeasibles= [100,200,400,800,1600,3200,6400,12800,25600,51200]
  #lenFeasibles= [51200]
  algos       = collections.OrderedDict({'Linear-Search':genAst_oracle, 'Assort-LSH-G':genAst_AssortLSH, 'Assort-Exact-G':genAst_AssortExact, 'Assort-BZ-G' : genAst_AssortBZ})
  #algos       = collections.OrderedDict({'Linear-Search':genAst_oracle,'Assort-LSH-G':genAst_AssortLSH,'Assort-Exact-G':genAst_AssortExact, 'Assort-BZ-G' : genAst_AssortBZ})
  benchmark   = 'Linear-Search'
  loggs = get_log_dict(lenFeasibles,N,algos,price_range,eps) #hack
  loggs['additional']['lenFeasibles'] = lenFeasibles
  loggs['additional']['nEst'] = nEst
  loggs['additional']['nCand'] = nCand


  badError = 0
  t1= time.time()
  for i,lenFeas in enumerate(lenFeasibles):
      
    t0 = time.time()
    t = 0
    while(t<N):

      print 'Iteration number is ', str(t+1),' of ',N,', for no. of assortments ',lenFeas

      #generating the price
      meta = {'eps':eps}
      p,v,feasibles,C,prod = generate_instance_general(price_range,prod,genMethod,t,lenFeas=lenFeas)
      loggs['additional']['C'][i,t] = C
      meta['feasibles'] = feasibles

      #preprocessing for proposed algos
      if 'Assort-Exact-G' in algos:
        meta['db_exact'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_exact',feasibles=feasibles)
      if 'Assort-LSH-G' in algos:
        meta['db_LSH'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_LSH', nEst=nEst,nCand=nCand,feasibles=feasibles)#Hardcoded values
      if 'Assort-BZ-G' in algos:
        meta['db_BZ'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_BZ', nEst=nEst,nCand=nCand,feasibles=feasibles)#Hardcoded values  
        


      #run algos
      maxSetBenchmark = None
      for algoname in algos:
        print '\tExecuting ',algoname
        loggs[algoname]['rev'][i,t],loggs[algoname]['maxSet'][(i,t)],loggs[algoname]['time'][i,t] = algos[algoname](prod,C,p,v,meta)
        print '\t\tTime taken is ',loggs[algoname]['time'][i,t],'sec.'

        if algoname==benchmark:
          maxSetBenchmark = copy.deepcopy(loggs[algoname]['maxSet'][(i,t)])

      loggs,badError = compute_overlap_stats(benchmark,algos,loggs,i,t,badError,maxSetBenchmark,eps)

      t = t+1    
      

    
    print 'Experiments (',N,' sims) for number of feasibles ',lenFeas, ' is done.'  
    print 'Cumulative time taken is', time.time() - t0,'\n'   
    loggs = compute_summary_stats(algos,loggs,benchmark,i)

    #dump it incrementally for each product size
    if flag_savedata == True:
      pickle.dump(loggs,open('./output/gen_loggs_'+genMethod+'_lenF_'+str(lenFeas)+'_nCand_'+str(nCand)+'_nEst_'+str(nEst)+'_'+datetime.datetime.now().strftime("%Y%m%d_%I%M%p")+'.pkl','wb'))

  print '\nAll experiments done. Total time taken is', time.time()  - t1,'\n\n'
  print "Summary:"
  for algoname in algos:
    print '\t',algoname,'time_mean',loggs[algoname]['time_mean']
    print '\t',algoname,'revPctErr_mean',loggs[algoname]['revPctErr_mean']

  return loggs