Ejemplo n.º 1
0
def main():
    data=edata.load("train.edata")
    testdata=edata.load("test.edata")
##    data.removekcsq(3,15)
#    testdata=edata.load("test.edata")
#    
#    model=complexModel(data, False)
#    print model.fit()
#    model.save("trying.cModel")
##    print model.useTestset(testdata)
#
    bl = baseline(data)
    bl.fit()
    print "baseline:", bl.useTestset(testdata)
    
#------------------------------------------------------------------------------
    
    model=complexModel.load("trying.cModel")
    model.normalizeParameters()
    
    print "model:", model.useTestset(testdata)
    print np.average(model.ca), np.std(model.ca)
    print np.average(model.cr), np.std(model.cr)
    print np.average(model.cg), np.std(model.cg)
    print np.average(model.cb), np.std(model.cb)
    print np.average(model.st), np.std(model.st)
    print np.average(model.se), np.std(model.se)
Ejemplo n.º 2
0
def get_Index(question,story):
    real_question = question
    question_id = question["qid"]

    if question['type']=='Sch':
        text=story['sch']
    else:
        text = story["text"]
    question = question["text"]
    #print("QUESTION: ", question)
    rake = Rake()
    rake.extract_keywords_from_text(real_question["text"])#this is question text

    #Code
    stopwords = set(nltk.corpus.stopwords.words("english"))
    #question_stem_list = chunk.lemmatize(nltk.pos_tag(nltk.word_tokenize(question)))
    #question_stem = "".join(t[0] + " " for t in question_stem_list)
    question_stem = question
    qbow = baseline.get_bow(baseline.get_sentences(question_stem)[0], stopwords)
    sentences = baseline.get_sentences(text)
    question=chunk.get_sentences(question)
    global noun_ids
    global verb_ids
    base_ans, index = baseline.baseline(qbow, sentences, stopwords,real_question["text"], rake.get_ranked_phrases(),story["sid"], noun_ids, verb_ids)
    return index
Ejemplo n.º 3
0
    def baseShow(self,needClick=True):
       
        self.drawBox()
        if not needClick:
            while not self.success:
                myBase = baseline(n=self.n, boxView=self.boxView)
                for mouseLeft, mouseRight, pos in myBase.base():
                    self.autoReact(mouseLeft, mouseRight, pos)

        while needClick:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    sys.exit()
                elif event.type == pygame.MOUSEBUTTONDOWN:      
                    myBase = baseline(n=self.n, boxView=self.boxView)
                    for mouseLeft, mouseRight, pos in myBase.base():
                        self.react(mouseLeft, mouseRight, pos)
                pygame.display.update()
Ejemplo n.º 4
0
    def test_precision(self):
        df = pd.read_pickle('../data/final/df_final.pkl')
        data = d.split_data(df, True)

        data_train = data[0]
        data_test = data[1]
        data_val = data[2]

        b = base.baseline(df, False)

        als_result = als_precision(data_train, data_val, b)
        assert 1 == 1
Ejemplo n.º 5
0
 def solve_baseline(self, stopwords, keyWord=None):
     # question is only one sentence so list has one item.
     qsent  = bl.get_sentences(self.question)[0]
     # bag of words
     qbow = bl.get_bow(qsent, stopwords)
     sentences = bl.get_sentences(self.get_text())
     answer,i = bl.baseline(qbow, sentences, stopwords, keyWord)
     #print('answer: ' , answer)
     key = set(['NN' , 'NNP', 'JJ'])#, 'JJ', 'VBN', 'VB'])
     ans = set([ t[0] for t in answer if t[1] in key] )
     ans.add('a')
     ans.add('the')
     self.answer = " ".join(w for w in ans)
     return self.answer
Ejemplo n.º 6
0
def validation(train, valid, mode='validation', param=0):
    
    import data_processing as dp
    dphelper = dp.data_processing()
    dense_train, sparse_train = dphelper.split(train)
    dense_valid, sparse_valid = dphelper.split(valid)
    
    
    import sgd_bias as sgd
    train_rss_dense, valid_rss_dense = sgd.sgd_bias(dense_train, dense_valid, 'validation')
    
    import baseline as bs
    train_rss_sparse, valid_rss_sparse = bs.baseline(sparse_train, sparse_valid, 'validation')
  
    return train_rss_dense + train_rss_sparse, valid_rss_dense + valid_rss_sparse
    
    
    
Ejemplo n.º 7
0
    def best_sent(self, keyword=None):
        stopwords = set(nltk.corpus.stopwords.words("english"))
        # use sch whenever possible
        if 'sch' in self.qtype:
            qtype = 'sch'
        else:
            qtype = 'story'
        # find sentence with answer
        qsent = bl.get_sentences(self.question)[0]
        qbow = bl.get_bow(qsent, stopwords)
        #sents = bl.get_sentences(all_texts[qtype][self.text_name])

        sgraphs = Question.text_depgraphs[qtype][self.text_name]

        sents = [ depgraph.graph2sent(g) for g in sgraphs ]

        #print(len(sents))
        best_sent,index = bl.baseline(qbow, sents, stopwords, keyword)
        return best_sent, index, qtype
Ejemplo n.º 8
0
def prediction(train_valid, test, pred_filename):
    
    import data_processing as dp
    dphelper = dp.data_processing()
    dense_train, sparse_train = dphelper.split(train_valid)
    dense_test, sparse_test = dphelper.split(test)
        
    #######
    import sgd_bias as sgd
    y_hat_dense, train_rmse_dense = sgd.sgd_bias(dense_train, dense_test, 'prediction')
    
    import baseline as bs
    y_hat_sparse, train_rmse_sparse = bs.baseline(sparse_train, sparse_test, 'prediction')
    
    #######
    print 'dense subset train rmse: %.16f' % train_rmse_dense
    print 'sparse subset train rmse: %.16f' % train_rmse_sparse
    test = dphelper.merge(test, y_hat_dense, y_hat_sparse)
    util.write_predictions(test, pred_filename) 
Ejemplo n.º 9
0
 def solve_why(self):
     #return
     stopwords = set(nltk.corpus.stopwords.words("english"))
     # find sentence with answer
     qsent = bl.get_sentences(self.question)[0]
     # the question word is not so signigicant to remove it.
     qsent.pop(0)
     qbow = bl.get_bow(qsent, stopwords)
     sents = bl.get_sentences(all_texts[self.qtype[0]][self.text_name])
     best_sent,index = bl.baseline(qbow, sents, stopwords, 'because')
     self.answer = " ".join(t[0] for t in best_sent)
     qtype = self.qtype[0]
     ans = bl.select(best_sent, 'because', 30)
     if len(ans) < 4:
         ''' not very accurate. needs improvement. '''
         self.solve_baseline(stopwords, 'because')
         #self.answer += ' &'
     else:
         # accurate
         self.answer = " ".join(ans)
Ejemplo n.º 10
0
    def solve_who(self):
        #return
        global all_texts
        stopwords = set(nltk.corpus.stopwords.words("english"))
        last = utils.last_word(self.question.rstrip(' !?.;\"n'))
        #if False:
        if 'sch' in self.qtype:
            qtype = 'sch'
        else:
            qtype = 'story'
        if (last.lower() == 'about'):
            self.answer = bl.find_most_common(
                all_texts[qtype][self.text_name],'NN', 'JJ', 3, stopwords)
            self.answer += ' a'

        else:
            #return
            # find sentence with answer
            qsent = bl.get_sentences(self.question)[0]
            # the question word is not so signigicant so remove it.
            qsent.pop(0)
            qbow = bl.get_bow(qsent, stopwords)
            sents = bl.get_sentences(all_texts[qtype][self.text_name])
            best_sent,index = bl.baseline(qbow, sents, stopwords)
            self.answer = " ".join(t[0] for t in best_sent)

            # find answer in sentence
            sgraph = self.get_dgraph(qtype,index)
            words = depgraph.get_relatives(sgraph, 'nsubj', 1, 'det')
            words = set(words)
            # add some words depending on type of question
            w = next(iter(words))
            if(len(words) == 1 and starts_with_vowel(w)):
                words.add('an')
            elif qtype == 'sch':
                words.add('the')
            else:
                words.add('a')
            self.answer = ' '.join(words)
Ejemplo n.º 11
0
def process_baseline(oracle_csv):
    df = pd.ExcelFile(oracle_csv).parse('Sheet1')
    df_baseline_pinyin = pd.ExcelFile(os.path.join("..", "data", "proposal", "BaselineResponses.xlsx")).parse('Sheet1')
    #df = pd.read_csv(oracle_csv)
    names = df["English"]
    o1 = df["Pinyin_O1"]
    o2 = df["Pinyin_O2"]
    bp = df_baseline_pinyin["Baseline"]

    distance = 0
    diff_count = 0
    for name, name1, name2, pinyin in zip(names, o1, o2, bp):
        if name != name1 or name != name2:
            diff_count += 1
            baseline_guess = baseline.baseline(name)
            print(baseline_guess)
            dist_o1 = edit_distance.edit_distance_pinyin(pinyin, name1)
            print("Distance between", pinyin, "and", name1, ":", dist_o1)
            dist_o2 = edit_distance.edit_distance_pinyin(pinyin, name2)
            print("Distance between", pinyin, "and", name2, ":", dist_o2)
            distance += ((dist_o1 + dist_o2) / 2)

    # take the average over ALL names
    return (distance/len(names), diff_count, len(names))
Ejemplo n.º 12
0
    def solve_depgraph(self):
        stopwords = set(nltk.corpus.stopwords.words("english"))
        base_ans = self.solve_baseline(stopwords)
        # find sentence with answer
        qsent = bl.get_sentences(self.question)[0]
        qbow = bl.get_bow(qsent, stopwords)
        sents = bl.get_sentences(all_texts[self.qtype[0]][self.text_name])
        best_sent,index = bl.baseline(qbow, sents, stopwords)
        self.answer = " ".join(t[0] for t in best_sent)
        qtype = self.qtype[0]

        # find answer in sentence
        sgraph = self.get_dgraph(qtype,index)
        qgraph = Question.q_depgraphs[self.qid]
        working_ans = depgraph.find_answer2(qgraph, sgraph)
        self.answer = working_ans

        baseans = set(w for w in base_ans.split())
        depans = set(w for w in working_ans.split())
        for entry in baseans:
            depans.add(entry)
        alist = [ a for a in depans if a in baseans]
        self.answer = " ".join(alist)
        return self.answer
Ejemplo n.º 13
0
        return fromHell(b_min, b_max, b_pop, population)
        #return population

    iter_val = 0
    for i in xrange(1):
        fromHellval = search(iter_val)
        iter_val += 1
    return fromHellval


if __name__ == '__main__':
    num_can = 500
    num_gen = 100
    p_mut = 5
    p_cros = 1

    for i in [10, 20, 40]:
        for j in [2, 4, 6, 8]:
            for k in [1, 3, 5, 7]:
                print "GA for model DTLZ ", k, "with decisions = ", i, " objectives = ", j
                model = dtlz(i, j, k)
                base_min, base_max = baseline(model)
                base_pop = basePopulation(model, base_min, base_max)
                print "GA parameters:", " \n num_can: ", num_can, "\n num_gen: ", num_gen, "\n p_mut: ", p_mut,\
                "\n p_cros: ", p_cros

                print "Divergence Value from Baseline:", ga(
                    model, base_min, base_max, base_pop, num_can, num_gen,
                    p_mut, p_cros)
                print "-" * 120
Ejemplo n.º 14
0
    if args['spaces']:
        data['form'] = data['form'].str.split(' ')
        if not data['lemma'].isnull().any():
            data['lemma'] = data['lemma'].str.split(' ')
    else:
        data['form'] = [re.findall(r'\X', f) for f in data['form']]
        if not data['lemma'].isnull().any():
            data['lemma'] = [re.findall(r'\X', f) for f in data['lemma']]

    if args['cv']:
        index = np.random.randint(1, args['cv'] + 1, len(data))
        for k in range(1, max(index) + 1):
            print('** Start run', k)
            args['score'] = paradigms(data, index == k, **args)
            if not data['lemma'].isnull().any():
                args['baseline'] = baseline(data, index == k, **args)
            else:
                args['baseline'] = 0.0
            print(args)
    elif args['train']:
        index = np.array([
            np.random.random() > float(args['train']) for i in range(len(data))
        ],
                         dtype=np.bool)
        args['score'] = paradigms(data, index, **args)
        if not data['lemma'].isnull().any():
            args['baseline'] = baseline(data, index, **args)
        else:
            args['baseline'] = 0.0
        print(args)
    else:
Ejemplo n.º 15
0
from baseline import baseline
from apriori import apriori
from data import load_grocery_dataset, load_unix_usage_dataset
from rules import generate_rules
import time
import os
import psutil

if __name__ is '__main__':
    print('==========DATASET: grocery==========')
    print('=============Baseline===============')
    print('====================================')
    tic = time.time()
    grocery = load_grocery_dataset()
    result = baseline(grocery, min_sup=0.01)
    generate_rules(result, min_conf=0.5)
    print('Baseline time cost {:.6f}'.format(time.time() - tic))
    print('Memory cost {}'.format(
        psutil.Process(os.getpid()).memory_info().rss))
    print('====================================')
    print()

    print('==========DATASET: grocery==========')
    print('==============Apriori===============')
    print('====================================')
    tic = time.time()
    grocery = load_grocery_dataset()
    result = apriori(grocery, min_sup=0.01)
    generate_rules(result, min_conf=0.5)
    print('Apriori time cost {:.6f}'.format(time.time() - tic))
    print('Memory cost {}'.format(
Ejemplo n.º 16
0
NUM_TEST_FORMULAS = 100

nn = NeuralNet()
nn.train()
testFormulas = FormulaSource()
testFormulas.gen_data(NUM_TEST_FORMULAS)
numCorrect = 0
numTotal = 0
nnC = 0
for f in testFormulas.data:
    t = TruthTable(Formula(f))

    oracle(t)
    oracleT = copy(t.table)

    baseline(t)
    baseT = copy(t.table)

    nn.solve_table(t)
    nnT = copy(t.table)
    for k in oracleT:
        numTotal += 1
        if oracleT[k] == baseT[k]:
            numCorrect += 1
        if oracleT[k] == nnT[k]:
            nnC += 1

print("Baseline: {}/{} correct".format(numCorrect, numTotal),
      "accuracy={}".format(numCorrect / numTotal))
print("NN: {}/{} correct".format(nnC, numTotal),
      "accuracy={}".format(nnC / numTotal))
Ejemplo n.º 17
0
def train(args):
    # Verify algorithm and config
    algo = args.algo
    if algo == "PPO":
        config = ppo_config
    elif algo == "A2C":
        config = a2c_config
    else:
        raise ValueError("args.algo must in [PPO, A2C]")
    config.num_envs = args.num_envs

    # Seed the environments and setup torch
    seed = args.seed
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
    torch.set_num_threads(1)

    # Clean log directory
    log_dir = verify_log_dir(args.log_dir, algo)

    # Create vectorized environments
    num_envs = args.num_envs
    env_name = args.env_name

    # Prepare tensorboard file
    args.save_log = 'Pairtrding-{}'.format(time.strftime("%Y%m%d-%H%M%S"))
    generate_date = str(datetime.now().date())

    writer = SummaryWriter(args.log_dir + '/runs/' + generate_date + '/' +
                           args.save_log)

    # download stock price data from yahoo finance
    stocklist = [
        '0700.hk', '2318.hk', '3988.hk', '0998.hk', '1398.hk', '3968.hk',
        '0981.hk', '0005.hk'
    ]
    # 腾讯,平安,中银,中信,工商,招商,中芯国际,汇丰
    stocktickers = ' '.join(stocklist)

    data = yf.download(tickers=stocktickers,
                       start="2010-01-01",
                       end="2019-12-31")
    data = data['Close']
    columnchange = []
    for stock in data.columns:
        name = stock + 'change'
        columnchange.append(name)
        data[name] = data[stock] - data[stock].shift(1)

    CorrDict = {}
    for i in columnchange:
        for j in columnchange:
            if i != j and (i, j) not in CorrDict:
                CorrDict[(i, j)] = data[i].corr(data[j])
    pair = list(max(CorrDict))
    pair.append(pair[0][:7])
    pair.append(pair[1][:7])
    dataremain = data[pair]

    from sklearn import linear_model
    import numpy as np
    model = linear_model.LinearRegression()
    model.fit(dataremain[pair[0]][1:-250].to_numpy().reshape(-1, 1),
              y=dataremain[pair[1]][1:-250])
    beta = model.coef_[0]

    dataremain['Spread'] = beta * data[pair[0]] - data[pair[1]]
    Spreadmean = dataremain['Spread'].mean()
    Spreadstd = dataremain['Spread'].std()
    dataremain['Z-score'] = (dataremain['Spread'] - Spreadmean) / Spreadstd

    envs = PairtradingEnv(stock1=dataremain[pair[2]][:-250],
                          stock2=dataremain[pair[3]][:-250])
    eval_envs = PairtradingEnv(stock1=dataremain[pair[2]][-250:],
                               stock2=dataremain[pair[3]][-250:])

    baseline_config = baselineConfig(mean=Spreadmean, std=Spreadstd, beta=beta)
    baseline_trainer = baseline(env=envs, config=baseline_config)

    baseline_eval = baseline(env=eval_envs, config=baseline_config)

    test = env_name == "CartPole-v0"
    frame_stack = args.input_length if not test else 1

    # Setup trainer
    if algo == "PPO":
        trainer = PPOTrainer(envs, config, frame_stack, _test=test)
    else:
        trainer = A2CTrainer(envs, config, frame_stack, _test=test)

    # Create a placeholder tensor to help stack frames in 2nd dimension
    # That is turn the observation from shape [num_envs, 1, 84, 84] to
    # [num_envs, 4, 84, 84].
    frame_stack_tensor = FrameStackTensor(
        num_envs, envs.observation_space.shape, frame_stack,
        config.device)  # envs.observation_space.shape: 1,42,42

    # Setup some stats helpers
    episode_rewards = np.zeros([num_envs, 1], dtype=np.float)
    total_episodes = total_steps = iteration = 0
    reward_recorder = deque(maxlen=100)
    episode_length_recorder = deque(maxlen=100)
    episode_values = deque(maxlen=100)
    sample_timer = Timer()
    process_timer = Timer()
    update_timer = Timer()
    total_timer = Timer()
    progress = []
    evaluate_stat = {}

    # Start training
    print("Start training!")
    while True:  # Break when total_steps exceeds maximum value
        # ===== Sample Data =====
        # episode_values = []
        episode_rewards = np.zeros([num_envs, 1], dtype=np.float)
        for env_id in range(num_envs):
            obs = envs.reset()  # obs.shape: 15,1,42,42
            frame_stack_tensor.update(obs, env_id)
            trainer.rollouts.observations[0, env_id].copy_(
                frame_stack_tensor.get(env_id)
            )  #trainer.rollouts.observations.shape: torch.Size([201, 15, 4, 42, 42])

            with sample_timer:
                for index in range(config.num_steps):
                    # Get action
                    # [TODO] Get the action
                    # Hint:
                    #   1. Remember to disable gradient computing
                    #   2. trainer.rollouts is a storage containing all data
                    #   3. What observation is needed for trainer.compute_action?
                    with torch.no_grad():
                        values, actions_cash, action_log_prob_cash, actions_beta, action_log_prob_beta = trainer.compute_action(
                            trainer.rollouts.observations[index, env_id])

                    act = baseline_trainer.compute_action(
                        actions_cash.view(-1), actions_beta.view(-1))

                    cpu_actions = act

                    # Step the environment
                    # (Check step_envs function, you need to implement it)
                    obs, reward, done, masks, total_episodes, \
                    total_steps, episode_rewards, episode_values = step_envs(
                        cpu_actions, envs, env_id, episode_rewards, episode_values, frame_stack_tensor,
                        reward_recorder, episode_length_recorder, total_steps,
                        total_episodes, config.device, test)

                    rewards = torch.from_numpy(
                        np.array(reward).astype(np.float32)).view(-1).to(
                            config.device)
                    # Store samples
                    trainer.rollouts.insert(frame_stack_tensor.get(env_id),
                                            actions_cash.view(-1),
                                            action_log_prob_cash.view(-1),
                                            actions_beta.view(-1),
                                            action_log_prob_beta.view(-1),
                                            values.view(-1), rewards,
                                            masks.view(-1), env_id)

        # ===== Process Samples =====
        with process_timer:
            with torch.no_grad():
                next_value = trainer.compute_values(
                    trainer.rollouts.observations[-1])
            trainer.rollouts.compute_returns(next_value, config.GAMMA)

        # ===== Update Policy =====
        with update_timer:
            policy_loss, value_loss, dist_entropy, total_loss = \
                trainer.update(trainer.rollouts)
            trainer.rollouts.after_update()

            # Add training statistics to tensorboard log file
            writer.add_scalar('train_policy_loss', policy_loss, iteration)
            writer.add_scalar('train_value_loss', value_loss, iteration)
            writer.add_scalar('train_dist_entropy', dist_entropy, iteration)
            writer.add_scalar('train_total_loss', total_loss, iteration)
            writer.add_scalar('train_episode_rewards',
                              np.mean(episode_rewards), iteration)
            writer.add_scalar('train_episode_values',
                              np.array(episode_values).mean(), iteration)

        # ===== Evaluate Current Policy =====
        if iteration % config.eval_freq == 0:
            eval_timer = Timer()
            evaluate_rewards, evaluate_lengths, evaluate_values = evaluate(
                trainer, eval_envs, baseline_eval, frame_stack, 5)
            evaluate_stat = summary(evaluate_rewards, "episode_reward")
            if evaluate_lengths:
                evaluate_stat.update(
                    summary(evaluate_lengths, "episode_length"))
            evaluate_stat.update(
                dict(win_rate=float(
                    sum(np.array(evaluate_rewards) >= 0) /
                    len(evaluate_rewards)),
                     evaluate_time=eval_timer.now,
                     evaluate_iteration=iteration,
                     evaluate_values=float(np.array(evaluate_values).mean())))

            # Add evaluation statistics to tensorboard log file
            writer.add_scalar('eval_episode_rewards',
                              np.array(evaluate_rewards).mean(),
                              iteration // config.eval_freq)
            writer.add_scalar('eval_episode_values',
                              np.array(evaluate_values).mean(),
                              iteration // config.eval_freq)

        # ===== Log information =====
        if iteration % config.log_freq == 0:
            stats = dict(
                log_dir=log_dir,
                frame_per_second=int(total_steps / total_timer.now),
                training_episode_reward=summary(reward_recorder,
                                                "episode_reward"),
                training_episode_values=summary(episode_values,
                                                "episode_value"),
                training_episode_length=summary(episode_length_recorder,
                                                "episode_length"),
                evaluate_stats=evaluate_stat,
                learning_stats=dict(policy_loss=policy_loss,
                                    entropy=dist_entropy,
                                    value_loss=value_loss,
                                    total_loss=total_loss),
                total_steps=total_steps,
                total_episodes=total_episodes,
                time_stats=dict(sample_time=sample_timer.avg,
                                process_time=process_timer.avg,
                                update_time=update_timer.avg,
                                total_time=total_timer.now,
                                episode_time=sample_timer.avg +
                                process_timer.avg + update_timer.avg),
                iteration=iteration)

            progress.append(stats)
            pretty_print({
                "===== {} Training Iteration {} =====".format(algo, iteration):
                stats
            })

        if iteration % config.save_freq == 0:
            trainer_path = trainer.save_w(log_dir, "iter{}".format(iteration))
            progress_path = save_progress(log_dir, progress)
            print(
                "Saved trainer state at <{}>. Saved progress at <{}>.".format(
                    trainer_path, progress_path))

        if iteration >= args.max_steps:
            break

        iteration += 1

    trainer.save_w(log_dir, "final")
    envs.close()
Ejemplo n.º 18
0
lumi_mask = lumi_json.contains(data_signal.run, data_signal.lumi)
data_signal = data_signal[lumi_mask]
lumi_mask = lumi_json.contains(data_background.run, data_background.lumi)
data_background = data_background[lumi_mask]
data_background['isE'] = 0
data_signal['isE'] = 1
data = pd.concat((data_background, data_signal))
data = data.sample(frac=1,
                   random_state=42).reset_index(drop=True)  #shuffle entries
#used later on but better having it here for data integrity
#sameprob = data_background.shape[0]/float(data_signal.shape[0])
#data.loc[(data.isE == 1), 'weight'] = sameprob
data['cutbased'] = False
data['cutmatching'] = False
data['cutbdt'] = False
baseline(data)

mc_background['isE'] = 0
mc_signal['isE'] = 1
mc = pd.concat((mc_background, mc_signal))
mc = mc.sample(frac=1,
               random_state=42).reset_index(drop=True)  #shuffle entries
mc['cutbased'] = False
mc['cutmatching'] = False
mc['cutbdt'] = False
baseline(mc)
X_ = lambda x: [i for i, _, _ in x]
Y_ = lambda x: [i for _, i, _ in x]
Z_ = lambda x: [i for _, _, i in x]

for var, binning, xlegend in [('trk_pt', np.arange(1, 11, 1), 'ktf track pT'),
Ejemplo n.º 19
0
    iter_val = 0
    for i in xrange(1):
        fromHellval = search(iter_val)
        iter_val += 1
    return fromHellval
        


if __name__ == '__main__':
    num_can = 500
    num_gen = 100
    p_mut = 5
    p_cros = 1

    
    for i in [10, 20, 40]:
        for j in [2, 4, 6, 8]:
            for k in [1, 3, 5, 7]:
                print "GA for model DTLZ ", k, "with decisions = ", i, " objectives = ", j
                model = dtlz(i, j, k) 
                base_min, base_max = baseline(model)
                base_pop = basePopulation(model, base_min, base_max ) 
                print "GA parameters:", " \n num_can: ", num_can, "\n num_gen: ", num_gen, "\n p_mut: ", p_mut,\
                "\n p_cros: ", p_cros

                print "Divergence Value from Baseline:", ga(model, base_min, base_max, base_pop, num_can, num_gen, p_mut, p_cros)
                print "-"*120
         
         
         
               
Ejemplo n.º 20
0
def main(
    order,
    procedure="cg",
    max_iters=1,
    superitems_horizontal=True,
    superitems_horizontal_type="two-width",
    superitems_max_vstacked=4,
    density_tol=0.5,
    filtering_two_dims=False,
    filtering_max_coverage_all=3,
    filtering_max_coverage_single=3,
    tlim=None,
    enable_solver_output=False,
    height_tol=0,
    cg_use_height_groups=True,
    cg_mr_warm_start=True,
    cg_max_iters=100,
    cg_max_stag_iters=20,
    cg_sp_mr=False,
    cg_sp_np_type="mip",
    cg_sp_p_type="cp",
    cg_return_only_last=False,
):
    """
    External interface to all the implemented solutions to solve 3D-BPP
    """
    assert max_iters > 0, "The number of maximum iteration must be > 0"
    assert procedure in ("mr", "bl", "cg"), "Unsupported procedure"

    logger.info(f"{procedure.upper()} procedure starting")

    # Create the final superitems pool and a copy of the order
    final_layer_pool = layers.LayerPool(superitems.SuperitemPool(),
                                        config.PALLET_DIMS)
    working_order = order.copy()

    # Iterate the specified number of times in order to reduce
    # the number of uncovered items at each iteration
    not_covered, all_singles_removed = [], []
    for iter in range(max_iters):
        logger.info(f"{procedure.upper()} iteration {iter + 1}/{max_iters}")

        # Create the superitems pool and call the baseline procedure
        superitems_list, singles_removed = superitems.SuperitemPool.gen_superitems(
            order=working_order,
            pallet_dims=config.PALLET_DIMS,
            max_vstacked=superitems_max_vstacked,
            horizontal=superitems_horizontal,
            horizontal_type=superitems_horizontal_type,
        )
        superitems_pool = superitems.SuperitemPool(superitems=superitems_list)
        all_singles_removed += singles_removed

        # Call the right packing procedure
        if procedure == "bl":
            layer_pool = baseline.baseline(superitems_pool,
                                           config.PALLET_DIMS,
                                           tlim=tlim)
        elif procedure == "mr":
            layer_pool = maxrects_warm_start(superitems_pool,
                                             height_tol=height_tol,
                                             density_tol=density_tol,
                                             add_single=False)
        elif procedure == "cg":
            layer_pool = cg(
                superitems_pool,
                height_tol=height_tol,
                density_tol=density_tol,
                use_height_groups=cg_use_height_groups,
                mr_warm_start=cg_mr_warm_start,
                max_iters=cg_max_iters,
                max_stag_iters=cg_max_stag_iters,
                tlim=tlim,
                sp_mr=cg_sp_mr,
                sp_np_type=cg_sp_np_type,
                sp_p_type=cg_sp_p_type,
                return_only_last=cg_return_only_last,
                enable_solver_output=enable_solver_output,
            )

        # Filter layers based on the given parameters
        layer_pool = layer_pool.filter_layers(
            min_density=density_tol,
            two_dims=filtering_two_dims,
            max_coverage_all=filtering_max_coverage_all,
            max_coverage_single=filtering_max_coverage_single,
        )

        # Add only the filtered layers
        final_layer_pool.extend(layer_pool)

        # Compute the number of uncovered Items
        prev_not_covered = len(not_covered)
        item_coverage = final_layer_pool.item_coverage()
        not_covered = [k for k, v in item_coverage.items() if not v]
        logger.info(
            f"Items not covered: {len(not_covered)}/{len(item_coverage)}")
        if len(not_covered) == prev_not_covered:
            logger.info(
                "Stop iterating, no improvement from the previous iteration")
            break

        # Compute a new order composed of only not covered items
        working_order = order.iloc[not_covered].copy()

    # Build a pool of bins from the layer pool and compact
    # all layers in each bin to avoid having "flying" products
    bin_pool = bins.BinPool(final_layer_pool,
                            config.PALLET_DIMS,
                            singles_removed=set(all_singles_removed))
    return bins.CompactBinPool(bin_pool)
Ejemplo n.º 21
0
def run_models(train, valid):
    return (bs.baseline(train, valid), 
            bsl1.baseline_l1(train, valid), 
            bsl2.baseline_l2(train, valid), 
            sgd.sgd_bias(train, valid),
            bsfreq.baseline_freq(train,valid,'predict'))
Ejemplo n.º 22
0
def train(cfg, args):
    detector = build_detection_model(cfg)
    #print(detector)
    detector.eval()
    device = torch.device(cfg.MODEL.DEVICE)
    detector.to(device)
    outdir = cfg.OUTPUT_DIR

    checkpointer = DetectronCheckpointer(cfg, detector, save_dir=outdir)
    ckpt = cfg.MODEL.WEIGHT
    _ = checkpointer.load(ckpt)

    # Initialize the network
    model = baseline()
    class_weights = [1, 1, 5, 5]  # could be adjusted
    class_weights = torch.FloatTensor(class_weights).to(device)
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    # Initialize optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=float(args.initLR),
                           weight_decay=0.001)

    # Initialize image batch
    # imBatch = Variable(torch.FloatTensor(args.batch_size, 3, args.imHeight, args.imWidth))
    imBatch = Variable(torch.FloatTensor(args.batch_size, 3, 736, 1280))
    targetBatch = Variable(torch.LongTensor(args.batch_size, 1))

    # Move network and batch to gpu
    imBatch = imBatch.cuda(device)
    targetBatch = targetBatch.cuda(device)
    model = model.cuda(device)

    # Initialize dataloader
    Dataset = BatchLoader(imageRoot=args.imageroot,
                          gtRoot=args.gtroot,
                          cropSize=(args.imWidth, args.imHeight))
    dataloader = DataLoader(Dataset,
                            batch_size=int(args.batch_size),
                            num_workers=0,
                            shuffle=True)

    lossArr = []
    AccuracyArr = []
    accuracy = 0
    iteration = 0

    for epoch in range(0, 10):
        trainingLog = open(outdir + ('trainingLog_{0}.txt'.format(epoch)), 'w')
        accuracy = 0
        for i, dataBatch in enumerate(dataloader):
            iteration = i + 1

            # Read data, under construction
            img_cpu = dataBatch['img']
            # if args.batch_size == 1:
            #     img_list = to_image_list(img_cpu[0,:,:], cfg.DATALOADER.SIZE_DIVISIBILITY)
            # else:
            #     img_list = to_image_list(img_cpu, cfg.DATALOADER.SIZE_DIVISIBILITY)
            img_list = to_image_list(img_cpu[0, :, :],
                                     cfg.DATALOADER.SIZE_DIVISIBILITY)
            imBatch.data.copy_(
                img_list.tensors)  # Tensor.shape(BatchSize, 3, Height, Width)

            target_cpu = dataBatch['target']
            # print(target_cpu)
            targetBatch.data.copy_(target_cpu)

            # Train network
            RoIPool_module = detector.roi_heads.box.feature_extractor.pooler
            RoIPredictor = detector.roi_heads.box.predictor
            RoIProc = detector.roi_heads.box.post_processor
            Backbone = detector.backbone
            hook_roi = SimpleHook(RoIPool_module)
            hook_backbone = SimpleHook(Backbone)
            hook_pred = SimpleHook(RoIPredictor)
            hook_proc = SimpleHook(RoIProc)
            out_detector = detector(imBatch)
            features_roi = hook_roi.output.data
            features_backbone = hook_backbone.output[
                0].data  # only use the bottom one
            # choose boxes with high scores
            thresh = 10
            cls_logit = hook_pred.output[0].data
            cls_logit = torch.max(cls_logit, dim=1)
            ind = torch.ge(cls_logit[0],
                           torch.FloatTensor([thresh]).to(device))
            features_roi = features_roi[ind]
            optimizer.zero_grad()

            # pred = model(features_roi, features_backbone)
            pred = model(features_roi, features_backbone)

            # print('target:', targetBatch[0,:][0])
            loss = criterion(pred, targetBatch[0, :])
            action = pred.cpu().argmax().data.numpy()

            loss.backward()

            optimizer.step()
            if action == target_cpu.data.numpy()[0]:
                accuracy += 1

            lossArr.append(loss.cpu().data.item())
            AccuracyArr.append(accuracy / iteration)

            meanLoss = np.mean(np.array(lossArr))
            if iteration % 100 == 0:
                print('prediction:', pred)
                print('predicted action:', action)
                print('ground truth:', target_cpu.data.numpy()[0])
                print(
                    'Epoch %d Iteration %d: Loss %.5f Accumulated Loss %.5f' %
                    (epoch, iteration, lossArr[-1], meanLoss))

                trainingLog.write(
                    'Epoch %d Iteration %d: Loss %.5f Accumulated Loss %.5f \n'
                    % (epoch, iteration, lossArr[-1], meanLoss))

                print('Epoch %d Iteration %d: Accumulated Accuracy %.5f' %
                      (epoch, iteration, AccuracyArr[-1]))
                trainingLog.write(
                    'Epoch %d Iteration %d: Accumulated Accuracy %.5f \n' %
                    (epoch, iteration, AccuracyArr[-1]))

            if epoch in [4, 7] and iteration == 1:
                print('The learning rate is being decreased at Iteration %d',
                      iteration)
                trainingLog.write(
                    'The learning rate is being decreased at Iteration %d \n' %
                    iteration)
                for param_group in optimizer.param_groups:
                    param_group['lr'] /= 10

            if iteration == args.MaxIteration:
                torch.save(model.state_dict(),
                           (outdir + 'netFinal_%d.pth' % (epoch + 1)))
                break

        if iteration >= args.MaxIteration:
            break

        if (epoch + 1) % 2 == 0:
            torch.save(model.state_dict(),
                       (outdir + 'netFinal_%d.pth' % (epoch + 1)))
Ejemplo n.º 23
0
from data_utils import rt_shift_augmentation

font = {'weight': 'normal', 'size': 16}
color_seq = (44. / 256, 83. / 256, 169. / 256, 1.)
color_ref = (69. / 256, 209. / 256, 163. / 256, 1.)
color_bl = (237. / 256, 106. / 256, 90. / 256, 1.)

# Test data, please use git-lfs to download files below
test_data = pickle.load(open('./test_input.pkl', 'rb'))
# Pre-saved sequential PB-Net predictions on test input
pred = pickle.load(open('./test_preds.pkl', 'rb'))
# Pre-saved reference-based PB-Net predictions on test input
pred_ref = pickle.load(open('./test_preds_ref.pkl', 'rb'))

###########################################################################
pred_bl = [baseline(p) for p in test_data]

intg = False
y_trues = []
y_preds = []
y_preds_ref = []
y_preds_bl = []
for i, sample in enumerate(test_data):
    output = pred[i]
    output_ref = pred_ref[i]
    output_bl = pred_bl[i]
    y_trues.append(calculate_abundance(sample[1], sample))
    y_preds.append(calculate_abundance(output, sample, intg=intg))
    y_preds_ref.append(calculate_abundance(output_ref, sample, intg=intg))
    y_preds_bl.append(calculate_abundance(output_bl, sample, intg=intg))
y_trues = np.array(y_trues)
Ejemplo n.º 24
0
__author__ = 'Umberto'

from baseline import baseline
from unionOfClassifiers import runMethod2
from SVMScript import runMethod1

predictionsFromBaseline = baseline()
predictionsFromMethod2 = runMethod2()
predictionsFromMethod1 = runMethod1()

#flatten predictions
predictionsFromBaseline = [val for sublist in predictionsFromBaseline for val in sublist]
predictionsFromMethod2 = [val for sublist in predictionsFromMethod2 for val in sublist]
predictionsFromMethod1 = [val for sublist in predictionsFromMethod1 for val in sublist]

from scipy import stats
if(len(predictionsFromBaseline) != len(predictionsFromMethod2)):
    print('Error predictions from method 2 have different lengths!')
else:

    r1 = stats.ttest_ind(predictionsFromBaseline, predictionsFromMethod2)
    print(r1)
    r2 = stats.ttest_ind(predictionsFromBaseline, predictionsFromMethod2, equal_var = False)
    print(r2)
#
# (8.6566243900008022, 8.3173929492649013e-18)
# (8.6566243900008022, 1.2765689781551307e-17)

if(len(predictionsFromBaseline) != len(predictionsFromMethod1)):
    print('Error predictions from method 1 have different lengths!')
else:
Ejemplo n.º 25
0
def base(question, story):
    #Base
    real_question = question
    question_id = question["qid"]

    if question['type']=='Sch':
        text=story['sch']
    else:
        text = story["text"]
    question = question["text"]
    #print("QUESTION: ", question)

    #Code
    stopwords = set(nltk.corpus.stopwords.words("english"))
    #question_stem_list = chunk.lemmatize(nltk.pos_tag(nltk.word_tokenize(question)))
    #question_stem = "".join(t[0] + " " for t in question_stem_list)
    question_stem = question
    qbow = baseline.get_bow(baseline.get_sentences(question_stem)[0], stopwords)
    sentences = baseline.get_sentences(text)
    question=chunk.get_sentences(question)
    rake = Rake()
    rake.extract_keywords_from_text(real_question["text"])
    global noun_ids
    global verb_ids
    base_ans, index = baseline.baseline(qbow, sentences, stopwords,real_question["text"], rake.get_ranked_phrases(),story["sid"], noun_ids, verb_ids)
    newanswer ="".join(t[0]+" " for t in base_ans)
    saveans=newanswer
    chunker = nltk.RegexpParser(GRAMMAR)
    tempanswer=chunk.get_sentences(newanswer)
    atree=chunker.parse(tempanswer[0])
    what_set = ["happened", "do"] #this should probably be changed in the future
    what_set = set(what_set)
    rake =Rake()
    rake.extract_keywords_from_text(real_question["text"])
    if question[0][0][0].lower()=="who":

        pos_phrases = nltk.pos_tag(rake.get_ranked_phrases())
        #print(pos_phrases)

        only_noun_pos_phrases = [noun for noun in pos_phrases if re.search(r"NN", noun[1])]
        only_noun_phrases = []
        for i in only_noun_pos_phrases:
            only_noun_phrases.append(i[0])

        np=chunk.find_nounphrase(atree)
        temp_ans=""
        if (np != []):

            counter = 0
            while True:
                temp_ans = ""
                val = False

                for token in np[counter].leaves():
                    temp_ans=temp_ans+" "+token[0]
                for word in only_noun_phrases:
                        if word in temp_ans:
                            val = True
                if val: # if answer contains a word in only_noun_phrases
                    if len(np)-1>counter:
                        counter+=1
                    else:
                        temp_ans = newanswer
                        break
                else:
                    break
        else:
            temp_ans = newanswer

        newanswer=temp_ans
        


    elif question[0][0][0].lower()=="what":
        #TODO will use dependency in the future as what questions are too hard to figure out wihtout knowing which words are dependent on others.
        if any(word in real_question["text"] for word in what_set):
            pp = chunk.find_verbphrase(atree)
        else:
            pp=chunk.find_nounphrase(atree)
        temp_ans=""
        #print([k.leaves() for k in pp])
        if (pp != []):
            if len(pp)> 1: #fix later
                for token in pp[1].leaves():
                    temp_ans = temp_ans + " " + token[0]
            else:
                for token in pp[0].leaves():
                    temp_ans = temp_ans+" "+token[0]
        else:
            temp_ans = newanswer
        newanswer=temp_ans


    elif question[0][0][0].lower()=="where":
        pp=chunk.find_prepphrases(atree)
        temp_ans=""
        if (pp != []):
            for token in pp[0].leaves():
                temp_ans=temp_ans+" "+token[0]
        else:
            temp_ans = newanswer
        newanswer=temp_ans

    elif question[0][0][0].lower()=="when":
        pp=chunk.find_times(atree)
        temp_ans=""
        if (pp != []):
            for token in pp[0].leaves():
                temp_ans=temp_ans+" "+token[0]
        else:
            temp_ans = newanswer
        newanswer=temp_ans
    elif question[0][0][0].lower() == "why":
        pp=chunk.find_reasons(atree)
        temp_ans=""
        if (pp != []):
            for token in pp[0].leaves():
                temp_ans=temp_ans+" "+token[0]
        else:
            temp_ans = newanswer
        newanswer=temp_ans
 
    if newanswer.replace(" ","") in PERSONAL_PRONOUN and question[0][0][0].lower()=="who":
        index=get_Index(question,story)
        i = index
        if i > 0:
            previous_sentence=sentences[index-i]
            for word,tag in previous_sentence:
                if tag == "NNP":
                    newanswer=word

    #print("ANSWER ",newanswer)
    #print()
    saveans= re.sub(r'[^\w\s]','',saveans)
    return saveans
Ejemplo n.º 26
0
def test(cfg, args):
    # load detector
    #    detector = build_detection_model(cfg)
    #    detector.eval()
    device = torch.device(cfg.MODEL.DEVICE)
    #    detector.to(device)
    outdir = cfg.OUTPUT_DIR

    # load network
    model = baseline(cfg)
    model.load_state_dict(torch.load(args.model_root))

    # Initialize image batch
    # imBatch = Variable(torch.FloatTensor(args.batch_size, 3, args.imHeight, args.imWidth))
    #    imBatch = Variable(torch.FloatTensor(args.batch_size, 3, 736, 1280))
    targetBatch = Variable(torch.LongTensor(args.batch_size, 1))

    # Move network and batch to gpu
    #    imBatch = imBatch.cuda(device)
    targetBatch = targetBatch.cuda(device)
    model = model.cuda(device)

    # Initialize dataloader
    Dataset = BatchLoader(imageRoot=args.imageroot,
                          gtRoot=args.gtroot,
                          cropSize=(args.imWidth, args.imHeight))
    dataloader = DataLoader(Dataset,
                            batch_size=args.batch_size,
                            num_workers=0,
                            shuffle=True)
    length = Dataset.__len__()

    AccuracyArr = []
    accuracy = 0

    # test
    SaveFilename = (outdir + 'TestingLog.txt')
    TestingLog = open(SaveFilename, 'w')
    print('Save to ', SaveFilename)
    for i, dataBatch in enumerate(dataloader):
        # Read data, under construction. now it is hard-code
        img_cpu = dataBatch['img'][0, :]
        N = img_cpu.shape[0]
        imBatch = Variable(torch.FloatTensor(N, 1024, 14, 14))
        imBatch = imBatch.cuda(device)
        imBatch.data.copy_(
            img_cpu)  # Tensor.shape(BatchSize, 3, Height, Width)

        target_cpu = dataBatch['target']
        # print(target_cpu)
        targetBatch.data.copy_(target_cpu)

        # grap features from detector

        pred = model(imBatch)
        action = pred.cpu().argmax(dim=1).data.numpy()

        print('predicted action:', action)
        print('ground truth:', target_cpu.data.numpy()[0])

        if action == target_cpu.data.numpy()[0]:
            accuracy += 1

        AccuracyArr.append(accuracy / (i + 1))

        print('Iteration %d / %d: Accumulated Accuracy %.5f' %
              (i + 1, length, AccuracyArr[-1]))
        TestingLog.write('Iteration %d / %d: Accumulated Accuracy %.5f \n' %
                         (i + 1, length, AccuracyArr[-1]))
Ejemplo n.º 27
0
def run_model(train, valid, mode, param):
    return bs.baseline(train, valid, mode, param)
    print 'algorithm:', alg
    print 'set event length:', event_length
    print 'set event overlap:', overlap

    G = utils.generateGraph(n=num_nodes)
    print 'number of nodes in the background network:', G.number_of_nodes()
    print 'number of edges in the background network:', G.number_of_edges()

    timestamps, active_truth = utils.generateIntervals(
        G, event_length=event_length, overlap=overlap)

    print 'number of timestamps', len(timestamps)

    if alg == 'baseline':
        Xstart, Xend = baseline.baseline(timestamps)
    elif alg == 'inner':
        Xstart, Xend = inner_point.runInner(timestamps)
    elif alg == 'budget':
        Xstart, Xend = budget.runBudget(timestamps)
    else:
        print('no algorithm specified')
        exit()

    print

    print 'relative total length of solution =', utils.getCost(
        Xstart, Xend) / ((event_length - 1) * num_nodes)
    print 'relative maximum length of solution =', utils.getMax(
        Xstart, Xend) / (event_length - 1)
    p, r, f = utils.compareGT(Xstart, Xend, active_truth, timestamps)
Ejemplo n.º 29
0
def train_for_n_iters(train_dataset,
                      test_dataset,
                      model_params,
                      lr_params,
                      n_iters=5,
                      train_steps=1000,
                      test_every=10,
                      pretrain_steps=250,
                      print_loss=True,
                      log_dir="logs/",
                      model_name="ARL"):
    """
    Trains the ARL model for n iterations, and averages the results. 

    Args:
        train_dataset: Data iterator of the train set.
        test_dataset: Data iterator of the test set. 
        model_params: A dictionary with model hyperparameters. 
        lr_params: A dictionary with hyperparmaeters for optimizers.
        n_iters: How often to train the model with different seeds.
        train_steps: Number of training steps. 
        test_every: How often to evaluate on test set. 
        pretrain_steps: Number of pretrain steps (steps with no adversary).
        print_loss: Wheter to print the loss during training. 
        log_dir: Directory where to save the tensorboard loggers. 
    """
    # Set the device on which to train.
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    model_params["device"] = device

    # Initiate metrics object.
    metrics = FairnessMetrics(n_iters, test_every)

    # Preparation of logging directories.
    experiment_dir = os.path.join(
        log_dir,
        datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    os.makedirs(experiment_dir, exist_ok=True)
    os.makedirs(checkpoint_dir, exist_ok=True)

    # Initialte TensorBoard loggers.
    summary_writer = SummaryWriter(experiment_dir)
    logger_learner = TensorBoardLogger(summary_writer, name="learner")
    logger_adv = TensorBoardLogger(summary_writer, name="adversary")
    logger_metrics = TensorBoardLogger(summary_writer, name="metrics")

    for i in range(n_iters):
        print(f"Training model {i + 1}/{n_iters}")
        seed_everything(42 + i)

        # Load the train dataset as a pytorch dataloader.
        train_loader = DataLoader(train_dataset,
                                  batch_size=model_params["batch_size"],
                                  shuffle=True)

        # Create the model.
        if model_name == "ARL":
            model = ARL(**model_params)
        elif model_name == "baseline":
            model = baseline(**model_params)
        else:
            print("Unknown model")

        # Transfer model to correct device.
        model = model.to(device)

        # Adagrad is the defeault optimizer.
        optimizer_learner = torch.optim.Adagrad(model.learner.parameters(),
                                                lr=lr_params["learner"])
        if model_name == 'ARL':
            optimizer_adv = torch.optim.Adagrad(model.adversary.parameters(),
                                                lr=lr_params["adversary"])
        elif model_name == 'baseline':
            optimizer_adv = None

        # Train the model with current seeds.
        if print_loss:
            print("Start training on device {}".format(device))
        train_model(
            model,
            train_loader,
            test_dataset,
            train_steps,
            test_every,
            pretrain_steps,
            optimizer_learner,
            optimizer_adv,
            metrics,
            checkpoint_dir,
            logger_learner,
            logger_adv,
            logger_metrics,
            n_iters=i,
            print_loss=print_loss,
            device=device,
        )

    # Average results and return metrics
    metrics.average_results()
    return metrics
Ejemplo n.º 30
0
train_set = load_data(args.data_dir + args.train_file, word2id, add_reversed=True, n=args.n_gram)
valid_set = load_data(args.data_dir + args.valid_file, word2id, add_reversed=True, n=args.n_gram)
test_set = load_data(args.data_dir + args.test_file, word2id, add_reversed=False, n=args.n_gram)
vocab_size = len(id2word)


train_batches = batch_iter(train_set, args.batch_size, shuffle=True, diff_len = False)
valid_batches = batch_iter(valid_set, args.batch_size * 10, shuffle=True, diff_len = False)
test_batches = batch_iter(test_set, args.test_size, shuffle=False, diff_len = False)

def set_cuda(var):
    if torch.cuda.is_available():
        return var.cuda()
    return var

model = baseline(vocab_size, args.emb_dim, args.hid_dim)
# Initialize with pre-trained word_embedding. Otherwise train wordembedding from scratch
if args.embed_file is not None:
    model.embedding.weight.data.copy_(torch.FloatTensor(word_embeddings))

if not args.fine_tune:
    model.embedding.weight.requires_grad = False

model = set_cuda(model)
model.train()

if args.resume_model is not None:
    model.load_state_dict(torch.load(args.resume_model))

para = filter(lambda p: p.requires_grad, model.parameters())
opt = optim.Adagrad(para, lr=args.lr)
Ejemplo n.º 31
0
def test(cfg, args):
    # load detector
    detector = build_detection_model(cfg)
    detector.eval()
    device = torch.device(cfg.MODEL.DEVICE)
    detector.to(device)
    outdir = cfg.OUTPUT_DIR

    # load network
    model = baseline()
    model.load_state_dict(torch.load(args.model_root))

    # Initialize image batch
    # imBatch = Variable(torch.FloatTensor(args.batch_size, 3, args.imHeight, args.imWidth))
    imBatch = Variable(torch.FloatTensor(args.batch_size, 3, 736, 1280))
    targetBatch = Variable(torch.LongTensor(args.batch_size, 1))

    # Move network and batch to gpu
    imBatch = imBatch.cuda(device)
    targetBatch = targetBatch.cuda(device)
    model = model.cuda(device)

    # Initialize dataloader
    Dataset = BatchLoader(imageRoot=args.imageroot,
                          gtRoot=args.gtroot,
                          cropSize=(args.imWidth, args.imHeight))
    dataloader = DataLoader(Dataset,
                            batch_size=args.batch_size,
                            num_workers=0,
                            shuffle=True)
    length = Dataset.__len__()

    AccuracyArr = []
    accuracy = 0

    # test
    SaveFilename = (outdir + 'TestingLog.txt')
    TestingLog = open(SaveFilename, 'w')
    print('Save to ', SaveFilename)
    for i, dataBatch in enumerate(dataloader):
        # Read data, under construction. now it is hard-code
        img_cpu = dataBatch['img']
        img_list = to_image_list(img_cpu[0, :, :],
                                 cfg.DATALOADER.SIZE_DIVISIBILITY)
        imBatch.data.copy_(
            img_list.tensors)  # Tensor.shape(BatchSize, 3, Height, Width)

        target_cpu = dataBatch['target']
        # print(target_cpu)
        targetBatch.data.copy_(target_cpu)

        # grap features from detector
        RoIPool_module = detector.roi_heads.box.feature_extractor.pooler
        Backbone = detector.backbone
        hook_roi = SimpleHook(RoIPool_module)
        hook_backbone = SimpleHook(Backbone)
        out_detector = detector(imBatch)
        features_roi = hook_roi.output.data
        features_backbone = hook_backbone.output[
            0].data  # only use the bottom one

        pred = model(features_roi, features_backbone)
        action = pred.cpu().argmax().data.numpy()

        print('predicted action:', action)
        print('ground truth:', target_cpu.data.numpy()[0])

        if action == target_cpu.data.numpy()[0]:
            accuracy += 1

        AccuracyArr.append(accuracy / (i + 1))

        print('Iteration %d / %d: Accumulated Accuracy %.5f' %
              (i + 1, length, AccuracyArr[-1]))
        TestingLog.write('Iteration %d / %d: Accumulated Accuracy %.5f \n' %
                         (i + 1, length, AccuracyArr[-1]))