def run_real_ast_experiment(flag_savedata=True,genMethod='synthetic', nEst=20,nCand=80): #parameters required np.random.seed(1000) price_range = 1000 #denotes highest possible price of a product eps = 1 #tolerance N = 1 # #number of times Monte Carlo simulation will run if genMethod =='synthetic': real_data_list = [ {'fname':'freq_itemset_data/retail0p0001_240852_txns88162.csv','isCSV':True,'min_ast_length':3}, {'fname':'freq_itemset_data/foodmartFIM0p0001_233231_txns4141.csv','isCSV':True,'min_ast_length':4}, {'fname':'freq_itemset_data/chains0p00001_txns1112949.txt','isCSV':False,'min_ast_length':5}, {'fname':'freq_itemset_data/OnlineRetail0p000001_txns540455.txt','isCSV':False,'min_ast_length':3}] if genMethod == 'tafeng': real_data_list = [{'fname':'freq_itemset_data/tafeng_final_0p00001_txns119390.txt','isCSV':False,'min_ast_length':8}] algos = collections.OrderedDict({'Linear-Search':genAst_oracle,'Assort-LSH-G':genAst_AssortLSH,'Assort-Exact-G':genAst_AssortExact, 'Assort-BZ-G':genAst_AssortBZ}) benchmark = 'Linear-Search' loggs = get_log_dict(real_data_list,N,algos,price_range,eps) #hack loggs['additional']['real_data_list'] = real_data_list badError = 0 t1= time.time() for i,real_data in enumerate(real_data_list): t0 = time.time() t = 0 while(t<N): print 'Iteration number is ', str(t+1),' of ',N,', for real ast data ',real_data['fname'] #generating the price meta = {'eps':eps} if genMethod =='synthetic': p,v,feasibles,C,prod = generate_instance_general(price_range,None,'synthetic',t,lenFeas=None,real_data=real_data) else: p,v,feasibles,C,prod = generate_instance_general(price_range,None,'tafeng',t,lenFeas=None,real_data=real_data) # loggs['additional']['C'][i,t] = C meta['feasibles'] = feasibles #preprocessing for proposed algos if 'Assort-Exact-G' in algos: meta['db_exact'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_exact',feasibles=feasibles) if 'Assort-LSH-G' in algos: meta['db_LSH'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_LSH', nEst=nEst,nCand=nCand,feasibles=feasibles)#Hardcoded values if 'Assort-BZ-G' in algos: meta['db_BZ'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_BZ', nEst=nEst,nCand=nCand,feasibles=feasibles)#Hardcoded values #run algos maxSetBenchmark = None for algoname in algos: print '\tExecuting ',algoname loggs[algoname]['rev'][i,t],loggs[algoname]['maxSet'][(i,t)],loggs[algoname]['time'][i,t] = algos[algoname](prod,C,p,v,meta) print '\t\tTime taken is ',loggs[algoname]['time'][i,t],'sec.' if algoname==benchmark: maxSetBenchmark = copy.deepcopy(loggs[algoname]['maxSet'][(i,t)]) loggs,badError = compute_overlap_stats(benchmark,algos,loggs,i,t,badError,maxSetBenchmark,eps) t = t+1 print 'Experiments (',N,' sims) for real ast data ',real_data['fname'], ' is done.' print 'Cumulative time taken is', time.time() - t0,'\n' loggs = compute_summary_stats(algos,loggs,benchmark,i) #dump it incrementally for each product size if flag_savedata == True: pickle.dump(loggs,open('./output/gen_loggs_real_ast_upto'+str(i)+'_nCand_'+str(nCand)+'_nEst_'+str(nEst)+'_'+datetime.datetime.now().strftime("%Y%m%d_%I%M%p")+'.pkl','wb')) print '\nAll experiments done. Total time taken is', time.time() - t1,'\n\n' print "Summary:" for algoname in algos: print '\t',algoname,'time_mean',loggs[algoname]['time_mean'] print '\t',algoname,'revPctErr_mean',loggs[algoname]['revPctErr_mean'] return loggs
def run_prod_experiment_static_mnl(flag_capacitated=True,flag_savedata=True,genMethod='synthetic'): #parameters required random.seed(10) np.random.seed(1000) price_range = 1000 #denotes highest possible price of a product eps = 0.1 #tolerance N = 50 # #number of times Monte Carlo simulation will run if flag_capacitated == True: C = 100 #capacity of assortment if genMethod=='synthetic': prodList = [100, 250, 500, 1000] #[100,200,300] # else: prodList = [100, 250, 500, 1000] algos = collections.OrderedDict({'Static-MNL':capAst_static_mnl, 'LP':capAst_LP}) #'LP':capAst_LP benchmark = 'LP' loggs = get_log_dict(prodList,N,algos,price_range,eps,C) else: prodList = [100,200,400,800,1600] algos = collections.OrderedDict({'Linear-Search':genAst_oracle,'Assort-Exact-G':genAst_AssortExact,'Assort-LSH-G':genAst_AssortLSH}) benchmark = 'Linear-Search' loggs = get_log_dict(prodList,N,algos,price_range,eps) loggs['additional']['lenFeasibles'] = np.zeros(len(prodList)) badError = 0 t1= time.time() for i,prod in enumerate(prodList): t0 = time.time() t = 0 while(t<N): print 'Iteration number is ', str(t+1),' of ',N,', for prod size ',prod #generating the price meta = {'eps':eps} if flag_capacitated == True: if genMethod == 'tafeng': file_1 = open("products_final.pkl",'rb') product_choices = pickle.load(file_1) choices = random.sample(product_choices,prod) p,v = get_real_prices_parameters_by_product(choices) else: p,v = generate_instance(price_range,prod,genMethod,t) else: p,v,feasibles,C,prod = generate_instance_general(price_range,prod,genMethod,t) loggs['additional']['C'][i,t] = C meta['feasibles'] = feasibles #preprocessing for proposed algos if 'Assort-Exact' in algos: meta['db_exact'],_,meta['normConst'] = preprocess(prod, C, p, 'special_case_exact') if 'Assort-LSH' in algos: meta['db_LSH'],_,_ = preprocess(prod, C, p, 'special_case_LSH', nEst=20,nCand=80)#Hardcoded values if 'Assort-Exact-G' in algos: meta['db_exact'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_exact',feasibles=feasibles) if 'Assort-LSH-G' in algos: meta['db_LSH'],_,_ = preprocess(prod, C, p, 'general_case_LSH', nEst=20,nCand=80,feasibles=feasibles)#Hardcoded values #run algos maxSetBenchmark = None for algoname in algos: print '\tExecuting ',algoname loggs[algoname]['rev'][i,t],loggs[algoname]['maxSet'][(i,t)],loggs[algoname]['time'][i,t] = algos[algoname](prod,C,p,v,meta) print '\t\tTime taken is ',loggs[algoname]['time'][i,t],'sec.' if algoname==benchmark: maxSetBenchmark = copy.deepcopy(loggs[algoname]['maxSet'][(i,t)]) loggs,badError = compute_overlap_stats(benchmark,algos,loggs,i,t,badError,maxSetBenchmark,eps) t = t+1 print 'Experiments (',N,' sims) for number of products ',prod, ' is done.' print 'Cumulative time taken is', time.time() - t0,'\n' loggs = compute_summary_stats(algos,loggs,benchmark,i) if flag_capacitated != True: loggs['additional']['lenFeasibles'][i] = len(feasibles) #dump it incrementally for each product size if flag_savedata == True: if flag_capacitated == True: pickle.dump(loggs,open('./output/cap_loggs_'+genMethod+'_prod_'+str(prod)+'_'+datetime.datetime.now().strftime("%Y%m%d_%I%M%p")+'.pkl','wb')) else: pickle.dump(loggs,open('./output/gen_loggs_'+genMethod+'_prod_'+str(prod)+'_'+datetime.datetime.now().strftime("%Y%m%d_%I%M%p")+'.pkl','wb')) print '\nAll experiments done. Total time taken is', time.time() - t1,'\n\n' print "Summary:" for algoname in algos: print '\t',algoname,'time_mean',loggs[algoname]['time_mean'] print '\t',algoname,'revPctErr_mean',loggs[algoname]['revPctErr_mean'] return loggs
def run_lenFeas_experiment(flag_savedata=True,genMethod='synthetic',nEst=20,nCand=80): #parameters required random.seed(10) np.random.seed(1000) price_range = 1000 #denotes highest possible price of a product eps = 1 #tolerance N = 50 # #number of times Monte Carlo simulation will run prod = 1000 lenFeasibles= [100,200,400,800,1600,3200,6400,12800,25600,51200] #lenFeasibles= [51200] algos = collections.OrderedDict({'Linear-Search':genAst_oracle, 'Assort-LSH-G':genAst_AssortLSH, 'Assort-Exact-G':genAst_AssortExact, 'Assort-BZ-G' : genAst_AssortBZ}) #algos = collections.OrderedDict({'Linear-Search':genAst_oracle,'Assort-LSH-G':genAst_AssortLSH,'Assort-Exact-G':genAst_AssortExact, 'Assort-BZ-G' : genAst_AssortBZ}) benchmark = 'Linear-Search' loggs = get_log_dict(lenFeasibles,N,algos,price_range,eps) #hack loggs['additional']['lenFeasibles'] = lenFeasibles loggs['additional']['nEst'] = nEst loggs['additional']['nCand'] = nCand badError = 0 t1= time.time() for i,lenFeas in enumerate(lenFeasibles): t0 = time.time() t = 0 while(t<N): print 'Iteration number is ', str(t+1),' of ',N,', for no. of assortments ',lenFeas #generating the price meta = {'eps':eps} p,v,feasibles,C,prod = generate_instance_general(price_range,prod,genMethod,t,lenFeas=lenFeas) loggs['additional']['C'][i,t] = C meta['feasibles'] = feasibles #preprocessing for proposed algos if 'Assort-Exact-G' in algos: meta['db_exact'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_exact',feasibles=feasibles) if 'Assort-LSH-G' in algos: meta['db_LSH'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_LSH', nEst=nEst,nCand=nCand,feasibles=feasibles)#Hardcoded values if 'Assort-BZ-G' in algos: meta['db_BZ'],_,meta['normConst'] = preprocess(prod, C, p, 'general_case_BZ', nEst=nEst,nCand=nCand,feasibles=feasibles)#Hardcoded values #run algos maxSetBenchmark = None for algoname in algos: print '\tExecuting ',algoname loggs[algoname]['rev'][i,t],loggs[algoname]['maxSet'][(i,t)],loggs[algoname]['time'][i,t] = algos[algoname](prod,C,p,v,meta) print '\t\tTime taken is ',loggs[algoname]['time'][i,t],'sec.' if algoname==benchmark: maxSetBenchmark = copy.deepcopy(loggs[algoname]['maxSet'][(i,t)]) loggs,badError = compute_overlap_stats(benchmark,algos,loggs,i,t,badError,maxSetBenchmark,eps) t = t+1 print 'Experiments (',N,' sims) for number of feasibles ',lenFeas, ' is done.' print 'Cumulative time taken is', time.time() - t0,'\n' loggs = compute_summary_stats(algos,loggs,benchmark,i) #dump it incrementally for each product size if flag_savedata == True: pickle.dump(loggs,open('./output/gen_loggs_'+genMethod+'_lenF_'+str(lenFeas)+'_nCand_'+str(nCand)+'_nEst_'+str(nEst)+'_'+datetime.datetime.now().strftime("%Y%m%d_%I%M%p")+'.pkl','wb')) print '\nAll experiments done. Total time taken is', time.time() - t1,'\n\n' print "Summary:" for algoname in algos: print '\t',algoname,'time_mean',loggs[algoname]['time_mean'] print '\t',algoname,'revPctErr_mean',loggs[algoname]['revPctErr_mean'] return loggs