def main(): data=edata.load("train.edata") testdata=edata.load("test.edata") ## data.removekcsq(3,15) # testdata=edata.load("test.edata") # # model=complexModel(data, False) # print model.fit() # model.save("trying.cModel") ## print model.useTestset(testdata) # bl = baseline(data) bl.fit() print "baseline:", bl.useTestset(testdata) #------------------------------------------------------------------------------ model=complexModel.load("trying.cModel") model.normalizeParameters() print "model:", model.useTestset(testdata) print np.average(model.ca), np.std(model.ca) print np.average(model.cr), np.std(model.cr) print np.average(model.cg), np.std(model.cg) print np.average(model.cb), np.std(model.cb) print np.average(model.st), np.std(model.st) print np.average(model.se), np.std(model.se)
def get_Index(question,story): real_question = question question_id = question["qid"] if question['type']=='Sch': text=story['sch'] else: text = story["text"] question = question["text"] #print("QUESTION: ", question) rake = Rake() rake.extract_keywords_from_text(real_question["text"])#this is question text #Code stopwords = set(nltk.corpus.stopwords.words("english")) #question_stem_list = chunk.lemmatize(nltk.pos_tag(nltk.word_tokenize(question))) #question_stem = "".join(t[0] + " " for t in question_stem_list) question_stem = question qbow = baseline.get_bow(baseline.get_sentences(question_stem)[0], stopwords) sentences = baseline.get_sentences(text) question=chunk.get_sentences(question) global noun_ids global verb_ids base_ans, index = baseline.baseline(qbow, sentences, stopwords,real_question["text"], rake.get_ranked_phrases(),story["sid"], noun_ids, verb_ids) return index
def baseShow(self,needClick=True): self.drawBox() if not needClick: while not self.success: myBase = baseline(n=self.n, boxView=self.boxView) for mouseLeft, mouseRight, pos in myBase.base(): self.autoReact(mouseLeft, mouseRight, pos) while needClick: for event in pygame.event.get(): if event.type == pygame.QUIT: sys.exit() elif event.type == pygame.MOUSEBUTTONDOWN: myBase = baseline(n=self.n, boxView=self.boxView) for mouseLeft, mouseRight, pos in myBase.base(): self.react(mouseLeft, mouseRight, pos) pygame.display.update()
def test_precision(self): df = pd.read_pickle('../data/final/df_final.pkl') data = d.split_data(df, True) data_train = data[0] data_test = data[1] data_val = data[2] b = base.baseline(df, False) als_result = als_precision(data_train, data_val, b) assert 1 == 1
def solve_baseline(self, stopwords, keyWord=None): # question is only one sentence so list has one item. qsent = bl.get_sentences(self.question)[0] # bag of words qbow = bl.get_bow(qsent, stopwords) sentences = bl.get_sentences(self.get_text()) answer,i = bl.baseline(qbow, sentences, stopwords, keyWord) #print('answer: ' , answer) key = set(['NN' , 'NNP', 'JJ'])#, 'JJ', 'VBN', 'VB']) ans = set([ t[0] for t in answer if t[1] in key] ) ans.add('a') ans.add('the') self.answer = " ".join(w for w in ans) return self.answer
def validation(train, valid, mode='validation', param=0): import data_processing as dp dphelper = dp.data_processing() dense_train, sparse_train = dphelper.split(train) dense_valid, sparse_valid = dphelper.split(valid) import sgd_bias as sgd train_rss_dense, valid_rss_dense = sgd.sgd_bias(dense_train, dense_valid, 'validation') import baseline as bs train_rss_sparse, valid_rss_sparse = bs.baseline(sparse_train, sparse_valid, 'validation') return train_rss_dense + train_rss_sparse, valid_rss_dense + valid_rss_sparse
def best_sent(self, keyword=None): stopwords = set(nltk.corpus.stopwords.words("english")) # use sch whenever possible if 'sch' in self.qtype: qtype = 'sch' else: qtype = 'story' # find sentence with answer qsent = bl.get_sentences(self.question)[0] qbow = bl.get_bow(qsent, stopwords) #sents = bl.get_sentences(all_texts[qtype][self.text_name]) sgraphs = Question.text_depgraphs[qtype][self.text_name] sents = [ depgraph.graph2sent(g) for g in sgraphs ] #print(len(sents)) best_sent,index = bl.baseline(qbow, sents, stopwords, keyword) return best_sent, index, qtype
def prediction(train_valid, test, pred_filename): import data_processing as dp dphelper = dp.data_processing() dense_train, sparse_train = dphelper.split(train_valid) dense_test, sparse_test = dphelper.split(test) ####### import sgd_bias as sgd y_hat_dense, train_rmse_dense = sgd.sgd_bias(dense_train, dense_test, 'prediction') import baseline as bs y_hat_sparse, train_rmse_sparse = bs.baseline(sparse_train, sparse_test, 'prediction') ####### print 'dense subset train rmse: %.16f' % train_rmse_dense print 'sparse subset train rmse: %.16f' % train_rmse_sparse test = dphelper.merge(test, y_hat_dense, y_hat_sparse) util.write_predictions(test, pred_filename)
def solve_why(self): #return stopwords = set(nltk.corpus.stopwords.words("english")) # find sentence with answer qsent = bl.get_sentences(self.question)[0] # the question word is not so signigicant to remove it. qsent.pop(0) qbow = bl.get_bow(qsent, stopwords) sents = bl.get_sentences(all_texts[self.qtype[0]][self.text_name]) best_sent,index = bl.baseline(qbow, sents, stopwords, 'because') self.answer = " ".join(t[0] for t in best_sent) qtype = self.qtype[0] ans = bl.select(best_sent, 'because', 30) if len(ans) < 4: ''' not very accurate. needs improvement. ''' self.solve_baseline(stopwords, 'because') #self.answer += ' &' else: # accurate self.answer = " ".join(ans)
def solve_who(self): #return global all_texts stopwords = set(nltk.corpus.stopwords.words("english")) last = utils.last_word(self.question.rstrip(' !?.;\"n')) #if False: if 'sch' in self.qtype: qtype = 'sch' else: qtype = 'story' if (last.lower() == 'about'): self.answer = bl.find_most_common( all_texts[qtype][self.text_name],'NN', 'JJ', 3, stopwords) self.answer += ' a' else: #return # find sentence with answer qsent = bl.get_sentences(self.question)[0] # the question word is not so signigicant so remove it. qsent.pop(0) qbow = bl.get_bow(qsent, stopwords) sents = bl.get_sentences(all_texts[qtype][self.text_name]) best_sent,index = bl.baseline(qbow, sents, stopwords) self.answer = " ".join(t[0] for t in best_sent) # find answer in sentence sgraph = self.get_dgraph(qtype,index) words = depgraph.get_relatives(sgraph, 'nsubj', 1, 'det') words = set(words) # add some words depending on type of question w = next(iter(words)) if(len(words) == 1 and starts_with_vowel(w)): words.add('an') elif qtype == 'sch': words.add('the') else: words.add('a') self.answer = ' '.join(words)
def process_baseline(oracle_csv): df = pd.ExcelFile(oracle_csv).parse('Sheet1') df_baseline_pinyin = pd.ExcelFile(os.path.join("..", "data", "proposal", "BaselineResponses.xlsx")).parse('Sheet1') #df = pd.read_csv(oracle_csv) names = df["English"] o1 = df["Pinyin_O1"] o2 = df["Pinyin_O2"] bp = df_baseline_pinyin["Baseline"] distance = 0 diff_count = 0 for name, name1, name2, pinyin in zip(names, o1, o2, bp): if name != name1 or name != name2: diff_count += 1 baseline_guess = baseline.baseline(name) print(baseline_guess) dist_o1 = edit_distance.edit_distance_pinyin(pinyin, name1) print("Distance between", pinyin, "and", name1, ":", dist_o1) dist_o2 = edit_distance.edit_distance_pinyin(pinyin, name2) print("Distance between", pinyin, "and", name2, ":", dist_o2) distance += ((dist_o1 + dist_o2) / 2) # take the average over ALL names return (distance/len(names), diff_count, len(names))
def solve_depgraph(self): stopwords = set(nltk.corpus.stopwords.words("english")) base_ans = self.solve_baseline(stopwords) # find sentence with answer qsent = bl.get_sentences(self.question)[0] qbow = bl.get_bow(qsent, stopwords) sents = bl.get_sentences(all_texts[self.qtype[0]][self.text_name]) best_sent,index = bl.baseline(qbow, sents, stopwords) self.answer = " ".join(t[0] for t in best_sent) qtype = self.qtype[0] # find answer in sentence sgraph = self.get_dgraph(qtype,index) qgraph = Question.q_depgraphs[self.qid] working_ans = depgraph.find_answer2(qgraph, sgraph) self.answer = working_ans baseans = set(w for w in base_ans.split()) depans = set(w for w in working_ans.split()) for entry in baseans: depans.add(entry) alist = [ a for a in depans if a in baseans] self.answer = " ".join(alist) return self.answer
return fromHell(b_min, b_max, b_pop, population) #return population iter_val = 0 for i in xrange(1): fromHellval = search(iter_val) iter_val += 1 return fromHellval if __name__ == '__main__': num_can = 500 num_gen = 100 p_mut = 5 p_cros = 1 for i in [10, 20, 40]: for j in [2, 4, 6, 8]: for k in [1, 3, 5, 7]: print "GA for model DTLZ ", k, "with decisions = ", i, " objectives = ", j model = dtlz(i, j, k) base_min, base_max = baseline(model) base_pop = basePopulation(model, base_min, base_max) print "GA parameters:", " \n num_can: ", num_can, "\n num_gen: ", num_gen, "\n p_mut: ", p_mut,\ "\n p_cros: ", p_cros print "Divergence Value from Baseline:", ga( model, base_min, base_max, base_pop, num_can, num_gen, p_mut, p_cros) print "-" * 120
if args['spaces']: data['form'] = data['form'].str.split(' ') if not data['lemma'].isnull().any(): data['lemma'] = data['lemma'].str.split(' ') else: data['form'] = [re.findall(r'\X', f) for f in data['form']] if not data['lemma'].isnull().any(): data['lemma'] = [re.findall(r'\X', f) for f in data['lemma']] if args['cv']: index = np.random.randint(1, args['cv'] + 1, len(data)) for k in range(1, max(index) + 1): print('** Start run', k) args['score'] = paradigms(data, index == k, **args) if not data['lemma'].isnull().any(): args['baseline'] = baseline(data, index == k, **args) else: args['baseline'] = 0.0 print(args) elif args['train']: index = np.array([ np.random.random() > float(args['train']) for i in range(len(data)) ], dtype=np.bool) args['score'] = paradigms(data, index, **args) if not data['lemma'].isnull().any(): args['baseline'] = baseline(data, index, **args) else: args['baseline'] = 0.0 print(args) else:
from baseline import baseline from apriori import apriori from data import load_grocery_dataset, load_unix_usage_dataset from rules import generate_rules import time import os import psutil if __name__ is '__main__': print('==========DATASET: grocery==========') print('=============Baseline===============') print('====================================') tic = time.time() grocery = load_grocery_dataset() result = baseline(grocery, min_sup=0.01) generate_rules(result, min_conf=0.5) print('Baseline time cost {:.6f}'.format(time.time() - tic)) print('Memory cost {}'.format( psutil.Process(os.getpid()).memory_info().rss)) print('====================================') print() print('==========DATASET: grocery==========') print('==============Apriori===============') print('====================================') tic = time.time() grocery = load_grocery_dataset() result = apriori(grocery, min_sup=0.01) generate_rules(result, min_conf=0.5) print('Apriori time cost {:.6f}'.format(time.time() - tic)) print('Memory cost {}'.format(
NUM_TEST_FORMULAS = 100 nn = NeuralNet() nn.train() testFormulas = FormulaSource() testFormulas.gen_data(NUM_TEST_FORMULAS) numCorrect = 0 numTotal = 0 nnC = 0 for f in testFormulas.data: t = TruthTable(Formula(f)) oracle(t) oracleT = copy(t.table) baseline(t) baseT = copy(t.table) nn.solve_table(t) nnT = copy(t.table) for k in oracleT: numTotal += 1 if oracleT[k] == baseT[k]: numCorrect += 1 if oracleT[k] == nnT[k]: nnC += 1 print("Baseline: {}/{} correct".format(numCorrect, numTotal), "accuracy={}".format(numCorrect / numTotal)) print("NN: {}/{} correct".format(nnC, numTotal), "accuracy={}".format(nnC / numTotal))
def train(args): # Verify algorithm and config algo = args.algo if algo == "PPO": config = ppo_config elif algo == "A2C": config = a2c_config else: raise ValueError("args.algo must in [PPO, A2C]") config.num_envs = args.num_envs # Seed the environments and setup torch seed = args.seed torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.set_num_threads(1) # Clean log directory log_dir = verify_log_dir(args.log_dir, algo) # Create vectorized environments num_envs = args.num_envs env_name = args.env_name # Prepare tensorboard file args.save_log = 'Pairtrding-{}'.format(time.strftime("%Y%m%d-%H%M%S")) generate_date = str(datetime.now().date()) writer = SummaryWriter(args.log_dir + '/runs/' + generate_date + '/' + args.save_log) # download stock price data from yahoo finance stocklist = [ '0700.hk', '2318.hk', '3988.hk', '0998.hk', '1398.hk', '3968.hk', '0981.hk', '0005.hk' ] # 腾讯,平安,中银,中信,工商,招商,中芯国际,汇丰 stocktickers = ' '.join(stocklist) data = yf.download(tickers=stocktickers, start="2010-01-01", end="2019-12-31") data = data['Close'] columnchange = [] for stock in data.columns: name = stock + 'change' columnchange.append(name) data[name] = data[stock] - data[stock].shift(1) CorrDict = {} for i in columnchange: for j in columnchange: if i != j and (i, j) not in CorrDict: CorrDict[(i, j)] = data[i].corr(data[j]) pair = list(max(CorrDict)) pair.append(pair[0][:7]) pair.append(pair[1][:7]) dataremain = data[pair] from sklearn import linear_model import numpy as np model = linear_model.LinearRegression() model.fit(dataremain[pair[0]][1:-250].to_numpy().reshape(-1, 1), y=dataremain[pair[1]][1:-250]) beta = model.coef_[0] dataremain['Spread'] = beta * data[pair[0]] - data[pair[1]] Spreadmean = dataremain['Spread'].mean() Spreadstd = dataremain['Spread'].std() dataremain['Z-score'] = (dataremain['Spread'] - Spreadmean) / Spreadstd envs = PairtradingEnv(stock1=dataremain[pair[2]][:-250], stock2=dataremain[pair[3]][:-250]) eval_envs = PairtradingEnv(stock1=dataremain[pair[2]][-250:], stock2=dataremain[pair[3]][-250:]) baseline_config = baselineConfig(mean=Spreadmean, std=Spreadstd, beta=beta) baseline_trainer = baseline(env=envs, config=baseline_config) baseline_eval = baseline(env=eval_envs, config=baseline_config) test = env_name == "CartPole-v0" frame_stack = args.input_length if not test else 1 # Setup trainer if algo == "PPO": trainer = PPOTrainer(envs, config, frame_stack, _test=test) else: trainer = A2CTrainer(envs, config, frame_stack, _test=test) # Create a placeholder tensor to help stack frames in 2nd dimension # That is turn the observation from shape [num_envs, 1, 84, 84] to # [num_envs, 4, 84, 84]. frame_stack_tensor = FrameStackTensor( num_envs, envs.observation_space.shape, frame_stack, config.device) # envs.observation_space.shape: 1,42,42 # Setup some stats helpers episode_rewards = np.zeros([num_envs, 1], dtype=np.float) total_episodes = total_steps = iteration = 0 reward_recorder = deque(maxlen=100) episode_length_recorder = deque(maxlen=100) episode_values = deque(maxlen=100) sample_timer = Timer() process_timer = Timer() update_timer = Timer() total_timer = Timer() progress = [] evaluate_stat = {} # Start training print("Start training!") while True: # Break when total_steps exceeds maximum value # ===== Sample Data ===== # episode_values = [] episode_rewards = np.zeros([num_envs, 1], dtype=np.float) for env_id in range(num_envs): obs = envs.reset() # obs.shape: 15,1,42,42 frame_stack_tensor.update(obs, env_id) trainer.rollouts.observations[0, env_id].copy_( frame_stack_tensor.get(env_id) ) #trainer.rollouts.observations.shape: torch.Size([201, 15, 4, 42, 42]) with sample_timer: for index in range(config.num_steps): # Get action # [TODO] Get the action # Hint: # 1. Remember to disable gradient computing # 2. trainer.rollouts is a storage containing all data # 3. What observation is needed for trainer.compute_action? with torch.no_grad(): values, actions_cash, action_log_prob_cash, actions_beta, action_log_prob_beta = trainer.compute_action( trainer.rollouts.observations[index, env_id]) act = baseline_trainer.compute_action( actions_cash.view(-1), actions_beta.view(-1)) cpu_actions = act # Step the environment # (Check step_envs function, you need to implement it) obs, reward, done, masks, total_episodes, \ total_steps, episode_rewards, episode_values = step_envs( cpu_actions, envs, env_id, episode_rewards, episode_values, frame_stack_tensor, reward_recorder, episode_length_recorder, total_steps, total_episodes, config.device, test) rewards = torch.from_numpy( np.array(reward).astype(np.float32)).view(-1).to( config.device) # Store samples trainer.rollouts.insert(frame_stack_tensor.get(env_id), actions_cash.view(-1), action_log_prob_cash.view(-1), actions_beta.view(-1), action_log_prob_beta.view(-1), values.view(-1), rewards, masks.view(-1), env_id) # ===== Process Samples ===== with process_timer: with torch.no_grad(): next_value = trainer.compute_values( trainer.rollouts.observations[-1]) trainer.rollouts.compute_returns(next_value, config.GAMMA) # ===== Update Policy ===== with update_timer: policy_loss, value_loss, dist_entropy, total_loss = \ trainer.update(trainer.rollouts) trainer.rollouts.after_update() # Add training statistics to tensorboard log file writer.add_scalar('train_policy_loss', policy_loss, iteration) writer.add_scalar('train_value_loss', value_loss, iteration) writer.add_scalar('train_dist_entropy', dist_entropy, iteration) writer.add_scalar('train_total_loss', total_loss, iteration) writer.add_scalar('train_episode_rewards', np.mean(episode_rewards), iteration) writer.add_scalar('train_episode_values', np.array(episode_values).mean(), iteration) # ===== Evaluate Current Policy ===== if iteration % config.eval_freq == 0: eval_timer = Timer() evaluate_rewards, evaluate_lengths, evaluate_values = evaluate( trainer, eval_envs, baseline_eval, frame_stack, 5) evaluate_stat = summary(evaluate_rewards, "episode_reward") if evaluate_lengths: evaluate_stat.update( summary(evaluate_lengths, "episode_length")) evaluate_stat.update( dict(win_rate=float( sum(np.array(evaluate_rewards) >= 0) / len(evaluate_rewards)), evaluate_time=eval_timer.now, evaluate_iteration=iteration, evaluate_values=float(np.array(evaluate_values).mean()))) # Add evaluation statistics to tensorboard log file writer.add_scalar('eval_episode_rewards', np.array(evaluate_rewards).mean(), iteration // config.eval_freq) writer.add_scalar('eval_episode_values', np.array(evaluate_values).mean(), iteration // config.eval_freq) # ===== Log information ===== if iteration % config.log_freq == 0: stats = dict( log_dir=log_dir, frame_per_second=int(total_steps / total_timer.now), training_episode_reward=summary(reward_recorder, "episode_reward"), training_episode_values=summary(episode_values, "episode_value"), training_episode_length=summary(episode_length_recorder, "episode_length"), evaluate_stats=evaluate_stat, learning_stats=dict(policy_loss=policy_loss, entropy=dist_entropy, value_loss=value_loss, total_loss=total_loss), total_steps=total_steps, total_episodes=total_episodes, time_stats=dict(sample_time=sample_timer.avg, process_time=process_timer.avg, update_time=update_timer.avg, total_time=total_timer.now, episode_time=sample_timer.avg + process_timer.avg + update_timer.avg), iteration=iteration) progress.append(stats) pretty_print({ "===== {} Training Iteration {} =====".format(algo, iteration): stats }) if iteration % config.save_freq == 0: trainer_path = trainer.save_w(log_dir, "iter{}".format(iteration)) progress_path = save_progress(log_dir, progress) print( "Saved trainer state at <{}>. Saved progress at <{}>.".format( trainer_path, progress_path)) if iteration >= args.max_steps: break iteration += 1 trainer.save_w(log_dir, "final") envs.close()
lumi_mask = lumi_json.contains(data_signal.run, data_signal.lumi) data_signal = data_signal[lumi_mask] lumi_mask = lumi_json.contains(data_background.run, data_background.lumi) data_background = data_background[lumi_mask] data_background['isE'] = 0 data_signal['isE'] = 1 data = pd.concat((data_background, data_signal)) data = data.sample(frac=1, random_state=42).reset_index(drop=True) #shuffle entries #used later on but better having it here for data integrity #sameprob = data_background.shape[0]/float(data_signal.shape[0]) #data.loc[(data.isE == 1), 'weight'] = sameprob data['cutbased'] = False data['cutmatching'] = False data['cutbdt'] = False baseline(data) mc_background['isE'] = 0 mc_signal['isE'] = 1 mc = pd.concat((mc_background, mc_signal)) mc = mc.sample(frac=1, random_state=42).reset_index(drop=True) #shuffle entries mc['cutbased'] = False mc['cutmatching'] = False mc['cutbdt'] = False baseline(mc) X_ = lambda x: [i for i, _, _ in x] Y_ = lambda x: [i for _, i, _ in x] Z_ = lambda x: [i for _, _, i in x] for var, binning, xlegend in [('trk_pt', np.arange(1, 11, 1), 'ktf track pT'),
iter_val = 0 for i in xrange(1): fromHellval = search(iter_val) iter_val += 1 return fromHellval if __name__ == '__main__': num_can = 500 num_gen = 100 p_mut = 5 p_cros = 1 for i in [10, 20, 40]: for j in [2, 4, 6, 8]: for k in [1, 3, 5, 7]: print "GA for model DTLZ ", k, "with decisions = ", i, " objectives = ", j model = dtlz(i, j, k) base_min, base_max = baseline(model) base_pop = basePopulation(model, base_min, base_max ) print "GA parameters:", " \n num_can: ", num_can, "\n num_gen: ", num_gen, "\n p_mut: ", p_mut,\ "\n p_cros: ", p_cros print "Divergence Value from Baseline:", ga(model, base_min, base_max, base_pop, num_can, num_gen, p_mut, p_cros) print "-"*120
def main( order, procedure="cg", max_iters=1, superitems_horizontal=True, superitems_horizontal_type="two-width", superitems_max_vstacked=4, density_tol=0.5, filtering_two_dims=False, filtering_max_coverage_all=3, filtering_max_coverage_single=3, tlim=None, enable_solver_output=False, height_tol=0, cg_use_height_groups=True, cg_mr_warm_start=True, cg_max_iters=100, cg_max_stag_iters=20, cg_sp_mr=False, cg_sp_np_type="mip", cg_sp_p_type="cp", cg_return_only_last=False, ): """ External interface to all the implemented solutions to solve 3D-BPP """ assert max_iters > 0, "The number of maximum iteration must be > 0" assert procedure in ("mr", "bl", "cg"), "Unsupported procedure" logger.info(f"{procedure.upper()} procedure starting") # Create the final superitems pool and a copy of the order final_layer_pool = layers.LayerPool(superitems.SuperitemPool(), config.PALLET_DIMS) working_order = order.copy() # Iterate the specified number of times in order to reduce # the number of uncovered items at each iteration not_covered, all_singles_removed = [], [] for iter in range(max_iters): logger.info(f"{procedure.upper()} iteration {iter + 1}/{max_iters}") # Create the superitems pool and call the baseline procedure superitems_list, singles_removed = superitems.SuperitemPool.gen_superitems( order=working_order, pallet_dims=config.PALLET_DIMS, max_vstacked=superitems_max_vstacked, horizontal=superitems_horizontal, horizontal_type=superitems_horizontal_type, ) superitems_pool = superitems.SuperitemPool(superitems=superitems_list) all_singles_removed += singles_removed # Call the right packing procedure if procedure == "bl": layer_pool = baseline.baseline(superitems_pool, config.PALLET_DIMS, tlim=tlim) elif procedure == "mr": layer_pool = maxrects_warm_start(superitems_pool, height_tol=height_tol, density_tol=density_tol, add_single=False) elif procedure == "cg": layer_pool = cg( superitems_pool, height_tol=height_tol, density_tol=density_tol, use_height_groups=cg_use_height_groups, mr_warm_start=cg_mr_warm_start, max_iters=cg_max_iters, max_stag_iters=cg_max_stag_iters, tlim=tlim, sp_mr=cg_sp_mr, sp_np_type=cg_sp_np_type, sp_p_type=cg_sp_p_type, return_only_last=cg_return_only_last, enable_solver_output=enable_solver_output, ) # Filter layers based on the given parameters layer_pool = layer_pool.filter_layers( min_density=density_tol, two_dims=filtering_two_dims, max_coverage_all=filtering_max_coverage_all, max_coverage_single=filtering_max_coverage_single, ) # Add only the filtered layers final_layer_pool.extend(layer_pool) # Compute the number of uncovered Items prev_not_covered = len(not_covered) item_coverage = final_layer_pool.item_coverage() not_covered = [k for k, v in item_coverage.items() if not v] logger.info( f"Items not covered: {len(not_covered)}/{len(item_coverage)}") if len(not_covered) == prev_not_covered: logger.info( "Stop iterating, no improvement from the previous iteration") break # Compute a new order composed of only not covered items working_order = order.iloc[not_covered].copy() # Build a pool of bins from the layer pool and compact # all layers in each bin to avoid having "flying" products bin_pool = bins.BinPool(final_layer_pool, config.PALLET_DIMS, singles_removed=set(all_singles_removed)) return bins.CompactBinPool(bin_pool)
def run_models(train, valid): return (bs.baseline(train, valid), bsl1.baseline_l1(train, valid), bsl2.baseline_l2(train, valid), sgd.sgd_bias(train, valid), bsfreq.baseline_freq(train,valid,'predict'))
def train(cfg, args): detector = build_detection_model(cfg) #print(detector) detector.eval() device = torch.device(cfg.MODEL.DEVICE) detector.to(device) outdir = cfg.OUTPUT_DIR checkpointer = DetectronCheckpointer(cfg, detector, save_dir=outdir) ckpt = cfg.MODEL.WEIGHT _ = checkpointer.load(ckpt) # Initialize the network model = baseline() class_weights = [1, 1, 5, 5] # could be adjusted class_weights = torch.FloatTensor(class_weights).to(device) criterion = nn.CrossEntropyLoss(weight=class_weights) # Initialize optimizer optimizer = optim.Adam(model.parameters(), lr=float(args.initLR), weight_decay=0.001) # Initialize image batch # imBatch = Variable(torch.FloatTensor(args.batch_size, 3, args.imHeight, args.imWidth)) imBatch = Variable(torch.FloatTensor(args.batch_size, 3, 736, 1280)) targetBatch = Variable(torch.LongTensor(args.batch_size, 1)) # Move network and batch to gpu imBatch = imBatch.cuda(device) targetBatch = targetBatch.cuda(device) model = model.cuda(device) # Initialize dataloader Dataset = BatchLoader(imageRoot=args.imageroot, gtRoot=args.gtroot, cropSize=(args.imWidth, args.imHeight)) dataloader = DataLoader(Dataset, batch_size=int(args.batch_size), num_workers=0, shuffle=True) lossArr = [] AccuracyArr = [] accuracy = 0 iteration = 0 for epoch in range(0, 10): trainingLog = open(outdir + ('trainingLog_{0}.txt'.format(epoch)), 'w') accuracy = 0 for i, dataBatch in enumerate(dataloader): iteration = i + 1 # Read data, under construction img_cpu = dataBatch['img'] # if args.batch_size == 1: # img_list = to_image_list(img_cpu[0,:,:], cfg.DATALOADER.SIZE_DIVISIBILITY) # else: # img_list = to_image_list(img_cpu, cfg.DATALOADER.SIZE_DIVISIBILITY) img_list = to_image_list(img_cpu[0, :, :], cfg.DATALOADER.SIZE_DIVISIBILITY) imBatch.data.copy_( img_list.tensors) # Tensor.shape(BatchSize, 3, Height, Width) target_cpu = dataBatch['target'] # print(target_cpu) targetBatch.data.copy_(target_cpu) # Train network RoIPool_module = detector.roi_heads.box.feature_extractor.pooler RoIPredictor = detector.roi_heads.box.predictor RoIProc = detector.roi_heads.box.post_processor Backbone = detector.backbone hook_roi = SimpleHook(RoIPool_module) hook_backbone = SimpleHook(Backbone) hook_pred = SimpleHook(RoIPredictor) hook_proc = SimpleHook(RoIProc) out_detector = detector(imBatch) features_roi = hook_roi.output.data features_backbone = hook_backbone.output[ 0].data # only use the bottom one # choose boxes with high scores thresh = 10 cls_logit = hook_pred.output[0].data cls_logit = torch.max(cls_logit, dim=1) ind = torch.ge(cls_logit[0], torch.FloatTensor([thresh]).to(device)) features_roi = features_roi[ind] optimizer.zero_grad() # pred = model(features_roi, features_backbone) pred = model(features_roi, features_backbone) # print('target:', targetBatch[0,:][0]) loss = criterion(pred, targetBatch[0, :]) action = pred.cpu().argmax().data.numpy() loss.backward() optimizer.step() if action == target_cpu.data.numpy()[0]: accuracy += 1 lossArr.append(loss.cpu().data.item()) AccuracyArr.append(accuracy / iteration) meanLoss = np.mean(np.array(lossArr)) if iteration % 100 == 0: print('prediction:', pred) print('predicted action:', action) print('ground truth:', target_cpu.data.numpy()[0]) print( 'Epoch %d Iteration %d: Loss %.5f Accumulated Loss %.5f' % (epoch, iteration, lossArr[-1], meanLoss)) trainingLog.write( 'Epoch %d Iteration %d: Loss %.5f Accumulated Loss %.5f \n' % (epoch, iteration, lossArr[-1], meanLoss)) print('Epoch %d Iteration %d: Accumulated Accuracy %.5f' % (epoch, iteration, AccuracyArr[-1])) trainingLog.write( 'Epoch %d Iteration %d: Accumulated Accuracy %.5f \n' % (epoch, iteration, AccuracyArr[-1])) if epoch in [4, 7] and iteration == 1: print('The learning rate is being decreased at Iteration %d', iteration) trainingLog.write( 'The learning rate is being decreased at Iteration %d \n' % iteration) for param_group in optimizer.param_groups: param_group['lr'] /= 10 if iteration == args.MaxIteration: torch.save(model.state_dict(), (outdir + 'netFinal_%d.pth' % (epoch + 1))) break if iteration >= args.MaxIteration: break if (epoch + 1) % 2 == 0: torch.save(model.state_dict(), (outdir + 'netFinal_%d.pth' % (epoch + 1)))
from data_utils import rt_shift_augmentation font = {'weight': 'normal', 'size': 16} color_seq = (44. / 256, 83. / 256, 169. / 256, 1.) color_ref = (69. / 256, 209. / 256, 163. / 256, 1.) color_bl = (237. / 256, 106. / 256, 90. / 256, 1.) # Test data, please use git-lfs to download files below test_data = pickle.load(open('./test_input.pkl', 'rb')) # Pre-saved sequential PB-Net predictions on test input pred = pickle.load(open('./test_preds.pkl', 'rb')) # Pre-saved reference-based PB-Net predictions on test input pred_ref = pickle.load(open('./test_preds_ref.pkl', 'rb')) ########################################################################### pred_bl = [baseline(p) for p in test_data] intg = False y_trues = [] y_preds = [] y_preds_ref = [] y_preds_bl = [] for i, sample in enumerate(test_data): output = pred[i] output_ref = pred_ref[i] output_bl = pred_bl[i] y_trues.append(calculate_abundance(sample[1], sample)) y_preds.append(calculate_abundance(output, sample, intg=intg)) y_preds_ref.append(calculate_abundance(output_ref, sample, intg=intg)) y_preds_bl.append(calculate_abundance(output_bl, sample, intg=intg)) y_trues = np.array(y_trues)
__author__ = 'Umberto' from baseline import baseline from unionOfClassifiers import runMethod2 from SVMScript import runMethod1 predictionsFromBaseline = baseline() predictionsFromMethod2 = runMethod2() predictionsFromMethod1 = runMethod1() #flatten predictions predictionsFromBaseline = [val for sublist in predictionsFromBaseline for val in sublist] predictionsFromMethod2 = [val for sublist in predictionsFromMethod2 for val in sublist] predictionsFromMethod1 = [val for sublist in predictionsFromMethod1 for val in sublist] from scipy import stats if(len(predictionsFromBaseline) != len(predictionsFromMethod2)): print('Error predictions from method 2 have different lengths!') else: r1 = stats.ttest_ind(predictionsFromBaseline, predictionsFromMethod2) print(r1) r2 = stats.ttest_ind(predictionsFromBaseline, predictionsFromMethod2, equal_var = False) print(r2) # # (8.6566243900008022, 8.3173929492649013e-18) # (8.6566243900008022, 1.2765689781551307e-17) if(len(predictionsFromBaseline) != len(predictionsFromMethod1)): print('Error predictions from method 1 have different lengths!') else:
def base(question, story): #Base real_question = question question_id = question["qid"] if question['type']=='Sch': text=story['sch'] else: text = story["text"] question = question["text"] #print("QUESTION: ", question) #Code stopwords = set(nltk.corpus.stopwords.words("english")) #question_stem_list = chunk.lemmatize(nltk.pos_tag(nltk.word_tokenize(question))) #question_stem = "".join(t[0] + " " for t in question_stem_list) question_stem = question qbow = baseline.get_bow(baseline.get_sentences(question_stem)[0], stopwords) sentences = baseline.get_sentences(text) question=chunk.get_sentences(question) rake = Rake() rake.extract_keywords_from_text(real_question["text"]) global noun_ids global verb_ids base_ans, index = baseline.baseline(qbow, sentences, stopwords,real_question["text"], rake.get_ranked_phrases(),story["sid"], noun_ids, verb_ids) newanswer ="".join(t[0]+" " for t in base_ans) saveans=newanswer chunker = nltk.RegexpParser(GRAMMAR) tempanswer=chunk.get_sentences(newanswer) atree=chunker.parse(tempanswer[0]) what_set = ["happened", "do"] #this should probably be changed in the future what_set = set(what_set) rake =Rake() rake.extract_keywords_from_text(real_question["text"]) if question[0][0][0].lower()=="who": pos_phrases = nltk.pos_tag(rake.get_ranked_phrases()) #print(pos_phrases) only_noun_pos_phrases = [noun for noun in pos_phrases if re.search(r"NN", noun[1])] only_noun_phrases = [] for i in only_noun_pos_phrases: only_noun_phrases.append(i[0]) np=chunk.find_nounphrase(atree) temp_ans="" if (np != []): counter = 0 while True: temp_ans = "" val = False for token in np[counter].leaves(): temp_ans=temp_ans+" "+token[0] for word in only_noun_phrases: if word in temp_ans: val = True if val: # if answer contains a word in only_noun_phrases if len(np)-1>counter: counter+=1 else: temp_ans = newanswer break else: break else: temp_ans = newanswer newanswer=temp_ans elif question[0][0][0].lower()=="what": #TODO will use dependency in the future as what questions are too hard to figure out wihtout knowing which words are dependent on others. if any(word in real_question["text"] for word in what_set): pp = chunk.find_verbphrase(atree) else: pp=chunk.find_nounphrase(atree) temp_ans="" #print([k.leaves() for k in pp]) if (pp != []): if len(pp)> 1: #fix later for token in pp[1].leaves(): temp_ans = temp_ans + " " + token[0] else: for token in pp[0].leaves(): temp_ans = temp_ans+" "+token[0] else: temp_ans = newanswer newanswer=temp_ans elif question[0][0][0].lower()=="where": pp=chunk.find_prepphrases(atree) temp_ans="" if (pp != []): for token in pp[0].leaves(): temp_ans=temp_ans+" "+token[0] else: temp_ans = newanswer newanswer=temp_ans elif question[0][0][0].lower()=="when": pp=chunk.find_times(atree) temp_ans="" if (pp != []): for token in pp[0].leaves(): temp_ans=temp_ans+" "+token[0] else: temp_ans = newanswer newanswer=temp_ans elif question[0][0][0].lower() == "why": pp=chunk.find_reasons(atree) temp_ans="" if (pp != []): for token in pp[0].leaves(): temp_ans=temp_ans+" "+token[0] else: temp_ans = newanswer newanswer=temp_ans if newanswer.replace(" ","") in PERSONAL_PRONOUN and question[0][0][0].lower()=="who": index=get_Index(question,story) i = index if i > 0: previous_sentence=sentences[index-i] for word,tag in previous_sentence: if tag == "NNP": newanswer=word #print("ANSWER ",newanswer) #print() saveans= re.sub(r'[^\w\s]','',saveans) return saveans
def test(cfg, args): # load detector # detector = build_detection_model(cfg) # detector.eval() device = torch.device(cfg.MODEL.DEVICE) # detector.to(device) outdir = cfg.OUTPUT_DIR # load network model = baseline(cfg) model.load_state_dict(torch.load(args.model_root)) # Initialize image batch # imBatch = Variable(torch.FloatTensor(args.batch_size, 3, args.imHeight, args.imWidth)) # imBatch = Variable(torch.FloatTensor(args.batch_size, 3, 736, 1280)) targetBatch = Variable(torch.LongTensor(args.batch_size, 1)) # Move network and batch to gpu # imBatch = imBatch.cuda(device) targetBatch = targetBatch.cuda(device) model = model.cuda(device) # Initialize dataloader Dataset = BatchLoader(imageRoot=args.imageroot, gtRoot=args.gtroot, cropSize=(args.imWidth, args.imHeight)) dataloader = DataLoader(Dataset, batch_size=args.batch_size, num_workers=0, shuffle=True) length = Dataset.__len__() AccuracyArr = [] accuracy = 0 # test SaveFilename = (outdir + 'TestingLog.txt') TestingLog = open(SaveFilename, 'w') print('Save to ', SaveFilename) for i, dataBatch in enumerate(dataloader): # Read data, under construction. now it is hard-code img_cpu = dataBatch['img'][0, :] N = img_cpu.shape[0] imBatch = Variable(torch.FloatTensor(N, 1024, 14, 14)) imBatch = imBatch.cuda(device) imBatch.data.copy_( img_cpu) # Tensor.shape(BatchSize, 3, Height, Width) target_cpu = dataBatch['target'] # print(target_cpu) targetBatch.data.copy_(target_cpu) # grap features from detector pred = model(imBatch) action = pred.cpu().argmax(dim=1).data.numpy() print('predicted action:', action) print('ground truth:', target_cpu.data.numpy()[0]) if action == target_cpu.data.numpy()[0]: accuracy += 1 AccuracyArr.append(accuracy / (i + 1)) print('Iteration %d / %d: Accumulated Accuracy %.5f' % (i + 1, length, AccuracyArr[-1])) TestingLog.write('Iteration %d / %d: Accumulated Accuracy %.5f \n' % (i + 1, length, AccuracyArr[-1]))
def run_model(train, valid, mode, param): return bs.baseline(train, valid, mode, param)
print 'algorithm:', alg print 'set event length:', event_length print 'set event overlap:', overlap G = utils.generateGraph(n=num_nodes) print 'number of nodes in the background network:', G.number_of_nodes() print 'number of edges in the background network:', G.number_of_edges() timestamps, active_truth = utils.generateIntervals( G, event_length=event_length, overlap=overlap) print 'number of timestamps', len(timestamps) if alg == 'baseline': Xstart, Xend = baseline.baseline(timestamps) elif alg == 'inner': Xstart, Xend = inner_point.runInner(timestamps) elif alg == 'budget': Xstart, Xend = budget.runBudget(timestamps) else: print('no algorithm specified') exit() print print 'relative total length of solution =', utils.getCost( Xstart, Xend) / ((event_length - 1) * num_nodes) print 'relative maximum length of solution =', utils.getMax( Xstart, Xend) / (event_length - 1) p, r, f = utils.compareGT(Xstart, Xend, active_truth, timestamps)
def train_for_n_iters(train_dataset, test_dataset, model_params, lr_params, n_iters=5, train_steps=1000, test_every=10, pretrain_steps=250, print_loss=True, log_dir="logs/", model_name="ARL"): """ Trains the ARL model for n iterations, and averages the results. Args: train_dataset: Data iterator of the train set. test_dataset: Data iterator of the test set. model_params: A dictionary with model hyperparameters. lr_params: A dictionary with hyperparmaeters for optimizers. n_iters: How often to train the model with different seeds. train_steps: Number of training steps. test_every: How often to evaluate on test set. pretrain_steps: Number of pretrain steps (steps with no adversary). print_loss: Wheter to print the loss during training. log_dir: Directory where to save the tensorboard loggers. """ # Set the device on which to train. device = "cuda:0" if torch.cuda.is_available() else "cpu" model_params["device"] = device # Initiate metrics object. metrics = FairnessMetrics(n_iters, test_every) # Preparation of logging directories. experiment_dir = os.path.join( log_dir, datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")) checkpoint_dir = os.path.join(experiment_dir, "checkpoints") os.makedirs(experiment_dir, exist_ok=True) os.makedirs(checkpoint_dir, exist_ok=True) # Initialte TensorBoard loggers. summary_writer = SummaryWriter(experiment_dir) logger_learner = TensorBoardLogger(summary_writer, name="learner") logger_adv = TensorBoardLogger(summary_writer, name="adversary") logger_metrics = TensorBoardLogger(summary_writer, name="metrics") for i in range(n_iters): print(f"Training model {i + 1}/{n_iters}") seed_everything(42 + i) # Load the train dataset as a pytorch dataloader. train_loader = DataLoader(train_dataset, batch_size=model_params["batch_size"], shuffle=True) # Create the model. if model_name == "ARL": model = ARL(**model_params) elif model_name == "baseline": model = baseline(**model_params) else: print("Unknown model") # Transfer model to correct device. model = model.to(device) # Adagrad is the defeault optimizer. optimizer_learner = torch.optim.Adagrad(model.learner.parameters(), lr=lr_params["learner"]) if model_name == 'ARL': optimizer_adv = torch.optim.Adagrad(model.adversary.parameters(), lr=lr_params["adversary"]) elif model_name == 'baseline': optimizer_adv = None # Train the model with current seeds. if print_loss: print("Start training on device {}".format(device)) train_model( model, train_loader, test_dataset, train_steps, test_every, pretrain_steps, optimizer_learner, optimizer_adv, metrics, checkpoint_dir, logger_learner, logger_adv, logger_metrics, n_iters=i, print_loss=print_loss, device=device, ) # Average results and return metrics metrics.average_results() return metrics
train_set = load_data(args.data_dir + args.train_file, word2id, add_reversed=True, n=args.n_gram) valid_set = load_data(args.data_dir + args.valid_file, word2id, add_reversed=True, n=args.n_gram) test_set = load_data(args.data_dir + args.test_file, word2id, add_reversed=False, n=args.n_gram) vocab_size = len(id2word) train_batches = batch_iter(train_set, args.batch_size, shuffle=True, diff_len = False) valid_batches = batch_iter(valid_set, args.batch_size * 10, shuffle=True, diff_len = False) test_batches = batch_iter(test_set, args.test_size, shuffle=False, diff_len = False) def set_cuda(var): if torch.cuda.is_available(): return var.cuda() return var model = baseline(vocab_size, args.emb_dim, args.hid_dim) # Initialize with pre-trained word_embedding. Otherwise train wordembedding from scratch if args.embed_file is not None: model.embedding.weight.data.copy_(torch.FloatTensor(word_embeddings)) if not args.fine_tune: model.embedding.weight.requires_grad = False model = set_cuda(model) model.train() if args.resume_model is not None: model.load_state_dict(torch.load(args.resume_model)) para = filter(lambda p: p.requires_grad, model.parameters()) opt = optim.Adagrad(para, lr=args.lr)
def test(cfg, args): # load detector detector = build_detection_model(cfg) detector.eval() device = torch.device(cfg.MODEL.DEVICE) detector.to(device) outdir = cfg.OUTPUT_DIR # load network model = baseline() model.load_state_dict(torch.load(args.model_root)) # Initialize image batch # imBatch = Variable(torch.FloatTensor(args.batch_size, 3, args.imHeight, args.imWidth)) imBatch = Variable(torch.FloatTensor(args.batch_size, 3, 736, 1280)) targetBatch = Variable(torch.LongTensor(args.batch_size, 1)) # Move network and batch to gpu imBatch = imBatch.cuda(device) targetBatch = targetBatch.cuda(device) model = model.cuda(device) # Initialize dataloader Dataset = BatchLoader(imageRoot=args.imageroot, gtRoot=args.gtroot, cropSize=(args.imWidth, args.imHeight)) dataloader = DataLoader(Dataset, batch_size=args.batch_size, num_workers=0, shuffle=True) length = Dataset.__len__() AccuracyArr = [] accuracy = 0 # test SaveFilename = (outdir + 'TestingLog.txt') TestingLog = open(SaveFilename, 'w') print('Save to ', SaveFilename) for i, dataBatch in enumerate(dataloader): # Read data, under construction. now it is hard-code img_cpu = dataBatch['img'] img_list = to_image_list(img_cpu[0, :, :], cfg.DATALOADER.SIZE_DIVISIBILITY) imBatch.data.copy_( img_list.tensors) # Tensor.shape(BatchSize, 3, Height, Width) target_cpu = dataBatch['target'] # print(target_cpu) targetBatch.data.copy_(target_cpu) # grap features from detector RoIPool_module = detector.roi_heads.box.feature_extractor.pooler Backbone = detector.backbone hook_roi = SimpleHook(RoIPool_module) hook_backbone = SimpleHook(Backbone) out_detector = detector(imBatch) features_roi = hook_roi.output.data features_backbone = hook_backbone.output[ 0].data # only use the bottom one pred = model(features_roi, features_backbone) action = pred.cpu().argmax().data.numpy() print('predicted action:', action) print('ground truth:', target_cpu.data.numpy()[0]) if action == target_cpu.data.numpy()[0]: accuracy += 1 AccuracyArr.append(accuracy / (i + 1)) print('Iteration %d / %d: Accumulated Accuracy %.5f' % (i + 1, length, AccuracyArr[-1])) TestingLog.write('Iteration %d / %d: Accumulated Accuracy %.5f \n' % (i + 1, length, AccuracyArr[-1]))