Esempio n. 1
0
def main():
    h = ''
    while (h != '-h'):
        h = input('enter -h for usage\n')
    with open('README.txt', 'r') as f:
        for lines in f:
            print(lines)
    x = int(input("Enter option number to see usage and -1 to exit: \n"))
    while x != -1:
        with open(str(x) + '.txt', 'r') as f:
            for lines in f:
                print(lines)
        x = int(input("Enter option number to see usage and -1 to exit: \n"))
    execute = evaluate.Evaluate()
    parser = parse.parse
    mydict = execute.modified_eval()
    while (True):
        try:
            prog = input('interactive@LispInter>>> ')
            retvalue = execute.fund(parser(prog), mydict)
            if retvalue is not None:
                print(PyToLisp(retvalue))
        except Exception as e:
            print('Not valid!', 'reason: ' + str(e))
            pass
Esempio n. 2
0
def Run(output, test, gold, rule, dicts):
    print("Running...")
    train.Train(output, rule, dicts)
    analyze.Analyze(output, test, output + '.fore', rule)
    synthesize.Synthesize(output, output + '.phon', output + '.back', rule)
    clean.Clean(output, output + '.raw', rule)
    evaluate.Evaluate(output, output + '.parsed', gold)
Esempio n. 3
0
 def fitness(self):
     popfitness = []
     #print(self.pop)
     #print("fitness")
     for i in self.pop:  #i = [abcdefg] individual
         #print("individual is %r"%i)
         j = i[:self.
               baseusingnum]  #j: all base will be used in the individual
         #print("all base will be used in the individual is %r"%j)
         parameter1 = []
         #parameter =  [basex,basey,baseh,x,y,h,fc,Tx,G,htb,hre,Noise,rsrpthre,sinrthre,coverthre,nbaseallx,nbaseally,ncost,ocost]
         #print("j is %r"%j)
         for k in range(3):
             parameter10 = []
             for l in j:
                 parameter10.append(self.parameter[k][l])
                 #print("l is %d"%l)
                 #print("self.parameter[k][l] is %r"%self.parameter[k][l])
                 #print("parameter10 is %r"%parameter10)
             parameter1.append(parameter10)
             #print("%d step parameter1 is should be basex or y or h of %d size: %r"%(k,self.baseusingnum,parameter1))
         #print("parameter1 %r is should be basex, basey and baseh of %d size"%(parameter1,self.baseusingnum))
         parameter2 = []
         parameter2.extend(parameter1)
         parameter2.extend(self.parameter[3:])
         popfitness.append(evaluate.Evaluate(parameter2).Evaluate_main())
     fmax = max(popfitness)
     #print('fmax = %d'%fmax)
     #print("popfitness is %r"%popfitness)
     popfitness1 = popfitness[:]
     popfitness2 = np.array(popfitness)
     popfitness2 = -popfitness2 + fmax + 5
     popfitness2 = list(popfitness2)
     #print("self.F is %r"%popfitness)
     return popfitness1, popfitness2  #fitness value of each individual in pop
Esempio n. 4
0
def total_report():
    import define
    import prepare
    import fselect
    import evaluate
    import improve

    # label = 'LocalizationNew_Tx'
    label = 'BQ_AQUARIS_E5_'

    # for i in range(7, 8):
    for i in ['20', '30', '40']:

        data_name = "./uploads/" + label + str(i) + ".csv"
        print(data_name)
        # data_name = "iris.csv"
        class_name = "class"
        definer = define.Define(data_path=data_name,
                                header=None,
                                response=class_name).pipeline()

        preparer = prepare.Prepare(definer).pipeline()
        selector = fselect.Select(definer).pipeline()
        evaluator = evaluate.Evaluate(definer, preparer, selector)
        improver = improve.Improve(evaluator).pipeline()

        improver.save_full_report('./market/' + label + str(i))
        improver.save_score_report('./market/' + label + str(i))
Esempio n. 5
0
def EvaluateChild1(self, i):
    j = 0
    while j < self._nv:
        v = Grey2Dec(self.Pop2[i], self.vindicesvar[j], self.vtam[j], self.vw)
        self.vv[j] = self.delta[j] * v + self.vmin[j]
        j += 1

    self.vfobj2[i] = evaluate.Evaluate(self._nv, self.vv)
Esempio n. 6
0
def report_improve(data_path, data_name, problem_type, optimizer, modelos):
    definer = define.Define(data_path=data_path,data_name=data_name,problem_type=problem_type).pipeline()
    preparer = prepare.Prepare(definer).pipeline()
    selector = fselect.Select(definer).pipeline()
    evaluator = evaluate.Evaluate(definer, preparer, selector)
    improver = improve.Improve(evaluator, optimizer, modelos).pipeline()

    plot = improver.plot_models()
    table = improver.report
    dict_report = {'plot': plot, 'table': table}
    #dict_report = {'table': table}

    return dict_report
Esempio n. 7
0
    def Gene_main(self):
        minfit = len(self.parameter[3] * self.parameter[17])
        bestindividual = []
        for i in range(self.iterationtime):
            self.cross()
            self.mutation()
            self.popleft()
            #print("here1")
            #print (self.pop)
            print("the best fitness of the %d's generation is %f " %
                  (i, min(self.E)))
            print("the best individual of the %d's generation is" % i)
            print(self.pop[0][:self.baseusingnum])
            print("\n\n\n")
            if min(self.E) < minfit:
                minfit = min(self.E)
                bestindividual = self.pop[0]

        print("the best fitness is %f " % minfit)
        print("the best individual is")
        print(bestindividual[:self.baseusingnum])
        parameter1 = []
        #parameter =  [basex,basey,baseh,x,y,h,fc,Tx,G,htb,hre,Noise,rsrpthre,sinrthre,coverthre,nbaseallx,nbaseally,ncost,ocost]
        for k in range(3):
            parameter10 = []
            for l in bestindividual[:self.baseusingnum]:
                parameter10.append(self.parameter[k][l])
            parameter1.append(parameter10)
        print("parameter1 is %r" % parameter1)
        parameter2 = []
        parameter2.extend(parameter1)
        parameter2.extend(self.parameter[3:])
        cost = (evaluate.Evaluate(parameter2).cost())
        cover = (evaluate.Evaluate(parameter2).coverage())
        print("the cost of this individual is %r" % cost)
        print("the cover of this individual is %r" % cover)
        print("\n\n\n")
        return minfit, bestindividual[:self.baseusingnum], cost
Esempio n. 8
0
    def evaluate(self, test_labels):

        evaluate = ev.Evaluate(self.predict_labels, test_labels, self.rtl)
        hamming_loss = evaluate.hanmming_loss()

        one_error = evaluate.one_error()

        rank_loss = evaluate.rank_loss()

        coverage = evaluate.coverage()

        avg_precision = evaluate.avg_precison()

        return hamming_loss, one_error, rank_loss, coverage, avg_precision
Esempio n. 9
0
def EvaluatePopulation1(self):
    i = 0
    while i < self._pop:
        j = 0
        while j < self._nv:
            v = Grey2Dec(self.Pop1[i], self.vindicesvar[j], self.vtam[j],
                         self.vw)

            self.vv[j] = self.delta[j] * v + self.vmin[j]
            j += 1

        self.vfobj1[i] = evaluate.Evaluate(self._nv, self.vv)
        i += 1

    # The best individual is initialized as the first individual
    self._best = self.vfobj1[0]
Esempio n. 10
0
 def StrategyEvaluation(self, daily_report):
     daily_report = daily_report.sort_values(by=["trade_date"],
                                             ascending=True).reset_index(
                                                 drop=True)  # 日期从早到晚排序
     if not daily_report.empty:
         self.evaluate = evaluate.Evaluate(daily_report=daily_report)
         average_daily_net_rise = self.evaluate.CalcAverageDailyNetRise(
         )  # 001
         max_period_return, min_period_return = self.evaluate.CalcMaxMinPeriodReturn(
         )  # 002
         go_up_probability = self.evaluate.CalcGoUpProbability()  # 003
         max_days_keep_up, max_days_keep_down = self.evaluate.CalcMaxDaysKeepUpOrDown(
         )  # 004
         max_drawdown_value, max_drawdown_date, drawdown_start_date = self.evaluate.CalcMaxDrawdown(
         )  # 005
         annual_return_rate, index_annual_return_rate = self.evaluate.CalcAnnualReturnRate(
         )  # 006
         return_volatility = self.evaluate.CalcReturnVolatility()  # 007
         sharpe_ratio = self.evaluate.CalcSharpeRatio(
             annual_return_rate, return_volatility)  # 008
         beta_value = self.evaluate.CalcBetaValue()  # 009
         alpha_value = self.evaluate.CalcAlphaValue(
             annual_return_rate, index_annual_return_rate,
             beta_value)  # 010
         info_ratio = self.evaluate.CalcInfoRatio()  # 011
         print("平均每日净值涨幅:%f" % average_daily_net_rise)
         print("单周期最大涨幅:%f," % max_period_return,
               "单周期最大跌幅:%f" % min_period_return)
         print("上涨概率:%f" % go_up_probability)
         print("最大连续上涨天数:%f," % max_days_keep_up,
               "最大连续下跌天数:%f" % max_days_keep_down)
         print("最大回撤:%f," % max_drawdown_value,
               "最大回撤日期:%s," % max_drawdown_date.strftime("%Y-%m-%d"),
               "回撤开始日期:%s" % drawdown_start_date.strftime("%Y-%m-%d"))
         print("年化收益率:%f," % annual_return_rate,
               "参照指数年化收益率:%f" % index_annual_return_rate)
         print("收益波动率:%f" % return_volatility)
         print("夏普比率:%f" % sharpe_ratio)
         print("贝塔值:%f" % beta_value)
         print("阿尔法值:%f" % alpha_value)
         print("信息比率:%f" % info_ratio)
         self.evaluate.MakeNetValueCompare(self.rets_folder)  # 012
         return True
     else:
         return False
Esempio n. 11
0
def report_model(response, data_path, data_name, problem_type):
    definer = define.Define(data_path=data_path,data_name=data_name,problem_type=problem_type).pipeline()
    preparer = prepare.Prepare(definer).pipeline() # scaler
    selector = fselect.Select(definer).pipeline() # pca
    evaluator = evaluate.Evaluate(definer, preparer, selector).pipeline()

    plot = evaluator.plot_models()
    table = evaluator.report

    data_name = data_name.replace(".csv", "")
    plot_path = os.path.join(app.config['MARKET_DIR'], data_name, 'model')
    tools.path_exists(plot_path)

    plot_path_plot = os.path.join(plot_path, 'boxplot.html')
    evaluator.save_plot(plot_path_plot)
    plot_path_report = os.path.join(plot_path, 'report.csv')
    evaluator.save_report(plot_path_report)

    dict_report = {'plot': plot, 'table': table}
    return dict_report
Esempio n. 12
0
    # smaller value for "tight graphs" such as ukbench (i.e., nearly-duplicate),
    # larger for "loose graphs" such as corel (i.e., category classification)
    # okay to choose same values for search_range and kNN. very subtle difference
    search_region = 40
    kNN = 20
    retri_amount = 25

    # Load data (retrieval results for all images)
    img_name, result_idx, result_length = load_data(fn_result)

    # Generate reciprocal graphs for all images
    if re.search('voc', fn_result):
        feature_type = '.voc'
    elif re.search('hsv', fn_result):
        feature_type = '.hsv'
    elif re.search('gist', fn_result):
        feature_type = '.gist'
    else:
        feature_type = '.unknown'

    find_reciprocal_neighbors(img_name, result_idx, result_length,
                              fn_result_reranking, fn_folder_graph,
                              search_region, kNN, retri_amount, feature_type)

    # Evaluate the accuracy
    import evaluate
    print "Before building reciprocal kNN graphs:"
    evaluate.Evaluate(fn_label, fn_result, retri_amount - 2)
    print "After building reciprocal kNN graphs:"
    evaluate.Evaluate(fn_label, fn_result_reranking, retri_amount - 2)
Esempio n. 13
0
def main():
    args = parse_args()
    config = configparser.ConfigParser()
    """ARGS DETAIL"""
    config_file = args.config_file
    batch_size = args.batch
    n_epoch = args.epoch
    pretrain_epoch = args.pretrain_epoch
    gpu_id = args.gpu
    model_type = args.model
    pretrain_w2v = args.pretrain_w2v
    data_path = args.data_path
    load_model = args.load_model
    """DIR PREPARE"""
    config.read(config_file)
    vocab_size = int(config['Parameter']['vocab_size'])
    coefficient = float(config['Parameter']['coefficient'])
    shuffle_data = bool(config['Parameter']['shuffle'])

    if pretrain_w2v:
        vocab_size = 'p' + str(vocab_size)

    if model_type == 'multi':
        if shuffle_data:
            base_dir = './pseudo_{}_{}_{}_c{}_shuffle/'.format(
                model_type, vocab_size, data_path[0], coefficient)
        else:
            base_dir = './pseudo_{}_{}_{}_c{}/'.format(model_type, vocab_size,
                                                       data_path[0],
                                                       coefficient)
    else:
        if shuffle_data:
            base_dir = './pseudo_{}_{}_{}_shuffle/'.format(
                model_type, vocab_size, data_path[0])
        else:
            base_dir = './pseudo_{}_{}_{}/'.format(model_type, vocab_size,
                                                   data_path[0])
    model_save_dir = base_dir

    if not os.path.exists(base_dir):
        os.mkdir(base_dir)
        shutil.copyfile(config_file, base_dir + config_file)
    config_file = base_dir + config_file
    config.read(config_file)
    """PARAMATER"""
    embed_size = int(config['Parameter']['embed_size'])
    hidden_size = int(config['Parameter']['hidden_size'])
    class_size = int(config['Parameter']['class_size'])
    dropout_ratio = float(config['Parameter']['dropout'])
    weight_decay = float(config['Parameter']['weight_decay'])
    gradclip = float(config['Parameter']['gradclip'])
    vocab_size = int(config['Parameter']['vocab_size'])
    valid_num = int(config['Parameter']['valid_num'])
    shuffle_data = bool(config['Parameter']['shuffle'])
    """LOGGER"""
    log_file = model_save_dir + 'log.txt'
    logger = dataset.prepare_logger(log_file)
    logger.info(args)  # 引数を記録
    logger.info('[Training start] logging to {}'.format(log_file))
    """DATASET"""
    train_src_file = config[data_path]['train_src_file']
    train_trg_file = config[data_path]['train_trg_file']
    valid_src_file = config[data_path]['valid_src_file']
    valid_trg_file = config[data_path]['valid_trg_file']
    test_src_file = config[data_path]['single_src_file']
    test_trg_file = config[data_path]['single_trg_file']
    src_w2v_file = config[data_path]['src_w2v_file']
    trg_w2v_file = config[data_path]['trg_w2v_file']

    train_data = dataset.load_label_corpus_file(train_src_file, train_trg_file)
    qa_data_sub_lit = dataset.split_valid_data(train_data, valid_num)
    valid_data = dataset.load_label_corpus_file(valid_src_file, valid_trg_file)
    test_data = dataset.load_label_corpus_file(test_src_file, test_trg_file)
    test_data_sub_lit = dataset.split_valid_data(test_data, valid_num)
    """VOCABULARY"""
    src_vocab, trg_vocab, sos, eos = dataset.prepare_vocab(
        base_dir, train_data, vocab_size, gpu_id)
    src_vocab_size = len(src_vocab.vocab)
    trg_vocab_size = len(trg_vocab.vocab)

    src_initialW, trg_initialW = None, None
    if pretrain_w2v:
        w2v = word2vec.Word2Vec()
        src_initialW, vector_size, src_match_word_count = w2v.make_initialW(
            src_vocab.vocab, src_w2v_file)
        trg_initialW, vector_size, trg_match_word_count = w2v.make_initialW(
            trg_vocab.vocab, trg_w2v_file)
        logger.info(
            'Initialize w2v embedding. Match: src {}/{}, trg {}/{}'.format(
                src_match_word_count, src_vocab_size, trg_match_word_count,
                trg_vocab_size))

    logger.info('src_vocab size: {}, trg_vocab size: {}'.format(
        src_vocab_size, trg_vocab_size))

    evaluater = evaluate.Evaluate()
    """GPU"""
    if gpu_id >= 0:
        logger.info('Use GPU')
        chainer.cuda.get_device_from_id(gpu_id).use()

    cross_valid_result = []
    for ite in range(1, valid_num + 1):
        model_valid_dir = base_dir + 'valid{}/'.format(ite)
        if not os.path.exists(model_valid_dir):
            os.mkdir(model_valid_dir)

        qa_train_data, qa_dev_data, qa_test_data = dataset.separate_train_dev_test(
            qa_data_sub_lit, ite)
        train_data, dev_data, test_data = dataset.separate_train_dev_test(
            test_data_sub_lit, ite)
        test_data_id = [t['id'] for t in test_data]

        qa_iter = dataset.Iterator(qa_train_data,
                                   src_vocab,
                                   trg_vocab,
                                   batch_size,
                                   gpu_id,
                                   sort=True,
                                   shuffle=True)
        valid_iter = dataset.Iterator(valid_data,
                                      src_vocab,
                                      trg_vocab,
                                      batch_size,
                                      gpu_id,
                                      sort=False,
                                      shuffle=False)
        train_iter = dataset.Iterator(train_data,
                                      src_vocab,
                                      trg_vocab,
                                      batch_size,
                                      gpu_id,
                                      sort=True,
                                      shuffle=True)
        dev_iter = dataset.Iterator(dev_data,
                                    src_vocab,
                                    trg_vocab,
                                    batch_size,
                                    gpu_id,
                                    sort=False,
                                    shuffle=False)
        test_iter = dataset.Iterator(test_data,
                                     src_vocab,
                                     trg_vocab,
                                     batch_size,
                                     gpu_id,
                                     sort=False,
                                     shuffle=False)

        qa_size = len(qa_train_data)
        train_size = len(train_data)
        logger.info('V{} ## QA:{}, train:{}, dev:{} ,test:{}'.format(
            ite, qa_size, train_size, len(dev_data), len(test_data)))
        """MODEL"""
        if model_type == 'multi':
            model = model.Multi(src_vocab_size, trg_vocab_size, embed_size,
                                hidden_size, class_size, dropout_ratio,
                                coefficient, src_initialW, trg_initialW)
        elif model_type in ['label', 'pretrain']:
            model = model.Label(src_vocab_size, trg_vocab_size, embed_size,
                                hidden_size, class_size, dropout_ratio,
                                src_initialW, trg_initialW)
        else:
            model = model.EncoderDecoder(src_vocab_size, trg_vocab_size,
                                         embed_size, hidden_size,
                                         dropout_ratio, src_initialW,
                                         trg_initialW)

        if gpu_id >= 0:
            model.to_gpu()
        """OPTIMIZER"""
        optimizer = chainer.optimizers.Adam()
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.GradientClipping(gradclip))
        optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))
        """PRETRAIN"""
        if model_type == 'pretrain' and load_model is None:
            logger.info('Pre-train start')
            pretrain_loss_dic = {}
            for epoch in range(1, pretrain_epoch + 1):
                train_loss = 0
                for i, batch in enumerate(train_iter.generate(), start=1):
                    try:
                        loss = model.pretrain(*batch)
                        train_loss += loss.data
                        optimizer.target.cleargrads()
                        loss.backward()
                        optimizer.update()

                    except Exception as e:
                        logger.info('P{} ## train iter: {}, {}'.format(
                            epoch, i, e))
                chainer.serializers.save_npz(
                    model_save_dir + 'p_model_epoch_{}.npz'.format(epoch),
                    model)
                """EVALUATE"""
                valid_loss = 0
                for batch in valid_iter.generate():
                    with chainer.no_backprop_mode(), chainer.using_config(
                            'train', False):
                        valid_loss += model.pretrain(*batch).data
                logger.info('P{} ## train loss: {}, val loss:{}'.format(
                    epoch, train_loss, valid_loss))
                pretrain_loss_dic[epoch] = valid_loss
            """MODEL SAVE & LOAD"""
            best_epoch = min(pretrain_loss_dic,
                             key=(lambda x: pretrain_loss_dic[x]))
            logger.info('best_epoch:{}, val loss: {}'.format(
                best_epoch, pretrain_loss_dic[best_epoch]))
            shutil.copyfile(
                model_save_dir + 'p_model_epoch_{}.npz'.format(best_epoch),
                model_save_dir + 'p_best_model.npz')
            logger.info('Pre-train finish')

        if load_model:
            logger.info('load model: {}'.format(load_model))
            chainer.serializers.load_npz(base_dir + load_model, model)
        """TRAIN"""
        epoch_info = {}
        for epoch in range(1, n_epoch + 1):
            train_loss = 0
            mix_train_iter = dataset.MixIterator(qa_iter,
                                                 train_iter,
                                                 seed=0,
                                                 shuffle=shuffle_data)
            for i, batch in enumerate(mix_train_iter.generate(), start=1):
                try:
                    loss = optimizer.target(*batch[0])
                    train_loss += loss.data
                    optimizer.target.cleargrads()
                    loss.backward()
                    optimizer.update()

                except Exception as e:
                    logger.info('V{} ## E{} ## train iter: {}, {}'.format(
                        ite, epoch, i, e))
            chainer.serializers.save_npz(
                model_valid_dir + 'model_epoch_{}.npz'.format(epoch), model)
            """DEV"""
            labels, alignments = [], []
            for i, batch in enumerate(dev_iter.generate(), start=1):
                try:
                    with chainer.no_backprop_mode(), chainer.using_config(
                            'train', False):
                        _, label, align = model.predict(batch[0], sos, eos)
                except Exception as e:
                    logger.info('V{} ## E{} ## dev iter: {}, {}'.format(
                        ite, epoch, i, e))

                if model_type == 'multi':
                    for l, a in zip(label, align):
                        labels.append(chainer.cuda.to_cpu(l))
                        alignments.append(chainer.cuda.to_cpu(a))
                elif model_type in ['label', 'pretrain']:
                    for l in label:
                        labels.append(chainer.cuda.to_cpu(l))
                else:
                    for a in align:
                        alignments.append(chainer.cuda.to_cpu(a))

            best_param_dic = evaluater.param_search(labels, alignments,
                                                    dev_data)
            param = max(best_param_dic,
                        key=lambda x: best_param_dic[x]['macro'])
            init, mix = evaluate.key_to_param(param)
            dev_score = round(best_param_dic[param]['macro'], 3)
            """TEST"""
            outputs, labels, alignments = [], [], []
            for i, batch in enumerate(test_iter.generate(), start=1):
                try:
                    with chainer.no_backprop_mode(), chainer.using_config(
                            'train', False):
                        output, label, align = model.predict(
                            batch[0], sos, eos)
                except Exception as e:
                    logger.info('V{} ## E{} ## test iter: {}, {}'.format(
                        ite, epoch, i, e))

                if model_type == 'multi':
                    for l, a in zip(label, align):
                        labels.append(chainer.cuda.to_cpu(l))
                        alignments.append(chainer.cuda.to_cpu(a))
                elif model_type in ['label', 'pretrain']:
                    for l in label:
                        labels.append(chainer.cuda.to_cpu(l))
                else:
                    for a in align:
                        alignments.append(chainer.cuda.to_cpu(a))

            rate, count, tf_lit, macro, micro = evaluater.eval_param(
                labels, alignments, test_data, init, mix)
            test_macro_score = round(macro, 3)
            test_micro_score = round(micro, 3)
            logger.info(
                'V{} ## E{} ## loss: {}, dev: {}, param: {}, micro: {}, macro: {}'
                .format(ite, epoch, train_loss, dev_score, param,
                        test_micro_score, test_macro_score))

            epoch_info[epoch] = {
                'id': test_data_id,
                'label': labels,
                'align': alignments,
                'hypo': outputs,
                'epoch': epoch,
                'dev_score': dev_score,
                'param': param,
                'rate': rate,
                'count': count,
                'tf': tf_lit,
                'macro': test_macro_score,
                'micro': test_micro_score
            }
            dataset.save_output(model_valid_dir, epoch_info[epoch])
        """MODEL SAVE"""
        best_epoch = max(epoch_info,
                         key=(lambda x: epoch_info[x]['dev_score']))
        cross_valid_result.append(epoch_info[best_epoch])
        logger.info(
            'V{} ## best_epoch: {}, dev: {}, micro: {}, macro: {}'.format(
                ite, best_epoch, epoch_info[best_epoch]['dev_score'],
                epoch_info[best_epoch]['micro'],
                epoch_info[best_epoch]['macro']))
        shutil.copyfile(
            model_valid_dir + 'model_epoch_{}.npz'.format(best_epoch),
            model_valid_dir + 'best_model.npz')

        logger.info('')

    ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0
    ave_test_score = [0 for _ in range(len(cross_valid_result[0]['rate']))]
    id_total, label_total, align_total, tf_total = [], [], [], []

    for v, r in enumerate(cross_valid_result, start=1):
        ave_dev_score += r['dev_score']
        ave_macro_score += r['macro']
        ave_micro_score += r['micro']
        for i, rate in enumerate(r['rate']):
            ave_test_score[i] += rate
        logger.info('   {}: e{}, {}\tdev: {}, micro: {}, macro: {} {}'.format(
            v, r['epoch'], r['param'], r['dev_score'], r['micro'],
            dataset.float_to_str(r['rate']), r['macro']))

        id_total.extend(r['id'])
        label_total.extend(r['label'])
        align_total.extend(r['align'])
        tf_total.extend(r['tf'])
    ave_dev_score = round(ave_dev_score / valid_num, 3)
    ave_macro_score = round(ave_macro_score / valid_num, 3)
    ave_micro_score = round(ave_micro_score / valid_num, 3)
    ave_test_score = [
        ave_test_score[i] / valid_num for i in range(len(ave_test_score))
    ]
    logger.info('dev: {}, micro: {}, macro: {} {}'.format(
        ave_dev_score, ave_micro_score, dataset.float_to_str(ave_test_score),
        ave_macro_score))

    label, align, tf = dataset.sort_multi_list(id_total, label_total,
                                               align_total, tf_total)
    dataset.save_list(base_dir + 'label.txt', label)
    dataset.save_list(base_dir + 'align.txt', align)
    dataset.save_list(base_dir + 'tf.txt', tf)
        #     bearings["llama"] = ...
        # Now you need to convert this to a horizontal bearing as an angle.
        # Use the camera matrix for this!

        # There are ways to get much better bearings.
        # Try and think of better solutions than just averaging.

        for animal in bearings:
            bearing_dict = {
                "pose": self.pose.tolist(),
                "animal": animal,
                "bearing": bearings[animal]
            }
            bearing_line = json.dumps(bearing_dict)


if __name__ == "__main__":
    # Set up the network
    exp = evaluate.Evaluate()

    # Read in the images
    images_fname = "../system_output/images.txt"
    with open(images_fname, 'r') as images_file:
        posed_images = [PosedImage(line) for line in images_file]

    # Compute bearings and write to file
    bearings_fname = "../system_output/bearings.txt"
    with open(bearings_fname, 'w') as bearings_file:
        for posed_image in posed_images:
            posed_image.write_bearings(exp, bearings_file, "../system_output/")
Esempio n. 15
0
def main():
    """
    model1: label
    model2: encdec を指定する
    """
    args = parse_args()
    model_name1 = args.label_model
    model_dir1 = re.search(r'^(.*/)', model_name1).group(1)

    model_name2 = args.encdec_model
    model_dir2 = re.search(r'^(.*/)', model_name2).group(1)

    valid_type = args.valid

    # 結果保存用ディレクトリ作成
    output_dir = model_dir1 + model_dir2
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    # 評価データ準備
    config = configparser.ConfigParser()
    config_files = glob.glob(os.path.join(model_dir1, '*.ini'))
    config.read(config_files[0])
    valid_num = int(config['Parameter']['valid_num'])
    test_src_file = config['server']['single_src_file']
    test_trg_file = config['server']['single_trg_file']
    data = dataset.load_label_corpus_file(test_src_file, test_trg_file)
    data_sub_lit = dataset.split_valid_data(data, valid_num)

    evaluater = evaluate.Evaluate()

    result_dic = {}
    # validファイルに分割されている時

    if valid_type == 'TT':
        """
        model1: validファイルあり
        model2: validファイルあり
        """
        model_file_num = len(
            glob.glob(os.path.join(model_dir1, 'valid1/model_epoch_*.npz')))

        label_dic = {}
        align_dic = {}
        for i in range(1, model_file_num + 1):
            label_dic[i] = []
            align_dic[i] = []
            for valid in [2, 3, 4, 5, 1]:
                label, _ = dataset.load_score_file(
                    model_dir1 + 'valid{}/model_epoch_{}'.format(valid, i))
                label_dic[i].append(label)
                _, align = dataset.load_score_file(
                    model_dir2 + 'valid{}/model_epoch_{}'.format(valid, i))
                align_dic[i].append(align)

        order = {1: [4, 5], 2: [5, 1], 3: [1, 2], 4: [2, 3], 5: [3, 4]}

        for i in tqdm(range(1, model_file_num + 1)):
            for j in range(1, model_file_num + 1):
                info = []
                for ite, v in order.items():
                    _, dev_data, test_data = dataset.separate_train_dev_test(
                        data_sub_lit, ite)
                    dev_label = label_dic[i][v[0] - 1]
                    test_label = label_dic[i][v[1] - 1]

                    dev_align = align_dic[j][v[0] - 1]
                    test_align = align_dic[j][v[1] - 1]

                    best_param_dic = evaluater.param_search(
                        dev_label, dev_align, dev_data)
                    param = max(best_param_dic,
                                key=lambda x: best_param_dic[x]['macro'])
                    init, mix = evaluate.key_to_param(param)
                    dev_score = round(best_param_dic[param]['macro'], 3)

                    rate, count, tf_lit, macro, micro = evaluater.eval_param(
                        test_label, test_align, test_data, init, mix)
                    test_macro_score = round(macro, 3)
                    test_micro_score = round(micro, 3)
                    info.append({
                        'dev_score': dev_score,
                        'param': param,
                        'macro': test_macro_score,
                        'micro': test_micro_score,
                        'tf': tf_lit
                    })

                ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0
                param = []
                tf_lit = []

                for v, r in enumerate(info, start=1):
                    ave_dev_score += r['dev_score']
                    ave_macro_score += r['macro']
                    ave_micro_score += r['micro']
                    param.append(r['param'])
                    tf_lit.extend(r['tf'])

                ave_dev_score = round(ave_dev_score / valid_num, 3)
                ave_macro_score = round(ave_macro_score / valid_num, 3)
                ave_micro_score = round(ave_micro_score / valid_num, 3)

                key = 'label{}_enc{}'.format(i, j)
                result_dic[key] = {
                    'dev': ave_dev_score,
                    'micro': ave_micro_score,
                    'macro': ave_macro_score,
                    'param': ' '.join(param),
                    'tf': tf_lit
                }

        best_score = max(result_dic, key=lambda x: result_dic[x]['dev'])
        with open(output_dir + 'merge.txt', 'w') as f:
            [
                f.write('{}: {}\n'.format(k, v))
                for k, v in sorted(result_dic.items())
            ]
            f.write('best score\n{}: {}\n'.format(best_score,
                                                  result_dic[best_score]))
        with open(output_dir + 'tf.txt', 'w') as f:
            [f.write(r + '\n') for r in result_dic[best_score]['tf']]

    elif valid_type == 'FF':
        """
        model1: validファイルなし
        model2: validファイルなし
        """
        model_file_num = len(
            glob.glob(os.path.join(model_dir1, 'model_epoch_*.npz')))
        for i in tqdm(range(1, model_file_num + 1)):
            label, _ = dataset.load_score_file(model_dir1 +
                                               'model_epoch_{}'.format(i))
            label_sub_lit = dataset.split_valid_data(label, valid_num)
            for j in range(1, model_file_num + 1):
                _, align = dataset.load_score_file(model_dir2 +
                                                   'model_epoch_{}'.format(j))
                align_sub_lit = dataset.split_valid_data(align, valid_num)
                info = []
                for ite in range(1, valid_num + 1):
                    _, dev_data, test_data = dataset.separate_train_dev_test(
                        data_sub_lit, ite)
                    _, dev_label, test_label = dataset.separate_train_dev_test(
                        label_sub_lit, ite)
                    _, dev_align, test_align = dataset.separate_train_dev_test(
                        align_sub_lit, ite)

                    best_param_dic = evaluater.param_search(
                        dev_label, dev_align, dev_data)
                    param = max(best_param_dic,
                                key=lambda x: best_param_dic[x]['macro'])
                    init, mix = evaluate.key_to_param(param)
                    dev_score = round(best_param_dic[param]['macro'], 3)

                    rate, count, tf_lit, macro, micro = evaluater.eval_param(
                        test_label, test_align, test_data, init, mix)
                    test_macro_score = round(macro, 3)
                    test_micro_score = round(micro, 3)
                    info.append({
                        'dev_score': dev_score,
                        'param': param,
                        'macro': test_macro_score,
                        'micro': test_micro_score,
                        'tf': tf_lit
                    })

                ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0
                param = []

                for v, r in enumerate(info, start=1):
                    ave_dev_score += r['dev_score']
                    ave_macro_score += r['macro']
                    ave_micro_score += r['micro']
                    param.append(r['param'])
                    tf_lit.extend(r['tf'])

                ave_dev_score = round(ave_dev_score / valid_num, 3)
                ave_macro_score = round(ave_macro_score / valid_num, 3)
                ave_micro_score = round(ave_micro_score / valid_num, 3)

                key = 'label{}_enc{}'.format(i, j)
                result_dic[key] = {
                    'dev': ave_dev_score,
                    'micro': ave_micro_score,
                    'macro': ave_macro_score,
                    'param': ' '.join(param),
                    'tf': tf_lit
                }

        best_score = max(result_dic, key=lambda x: result_dic[x]['dev'])
        with open(output_dir + 'merge.txt', 'w') as f:
            [
                f.write('{}: {}\n'.format(k, v))
                for k, v in sorted(result_dic.items())
            ]
            f.write('best score\n{}: {}\n'.format(best_score,
                                                  result_dic[best_score]))
        with open(output_dir + 'tf.txt', 'w') as f:
            [f.write(r + '\n') for r in result_dic[best_score]['tf']]

    elif valid_type == 'TF':
        """
        model1: validファイルなし
        model2: validファイルなし
        """
        model_file_num = len(
            glob.glob(os.path.join(model_dir1, 'valid1/model_epoch_*.npz')))

        label_dic = {}
        for i in range(1, model_file_num + 1):
            label_dic[i] = []
            for valid in [2, 3, 4, 5, 1]:
                label, _ = dataset.load_score_file(
                    model_dir1 + 'valid{}/model_epoch_{}'.format(valid, i))
                label_dic[i].append(label)

        for j in range(1, model_file_num + 1):
            _, align = dataset.load_score_file(model_dir2 +
                                               'model_epoch_{}'.format(j))
            align_sub_lit = dataset.split_valid_data(align, valid_num)

        # 5-fold crossvalidationでvalid, testのインデックスを指定している
        # 1: [4, 5]は1回目のテストでは4番目のデータをvalidation用,5番目のデータをテストで使用する
        order = {1: [4, 5], 2: [5, 1], 3: [1, 2], 4: [2, 3], 5: [3, 4]}

        for i in tqdm(range(1, model_file_num + 1)):
            for j in range(1, model_file_num + 1):
                info = []
                for ite, v in order.items():
                    _, dev_data, test_data = dataset.separate_train_dev_test(
                        data_sub_lit, ite)
                    dev_label = label_dic[i][v[0] - 1]
                    test_label = label_dic[i][v[1] - 1]

                    _, dev_align, test_align = dataset.separate_train_dev_test(
                        align_sub_lit, ite)

                    best_param_dic = evaluater.param_search(
                        dev_label, dev_align, dev_data)
                    param = max(best_param_dic,
                                key=lambda x: best_param_dic[x]['macro'])
                    init, mix = evaluate.key_to_param(param)
                    dev_score = round(best_param_dic[param]['macro'], 3)

                    rate, count, tf_lit, macro, micro = evaluater.eval_param(
                        test_label, test_align, test_data, init, mix)
                    test_macro_score = round(macro, 3)
                    test_micro_score = round(micro, 3)
                    info.append({
                        'dev_score': dev_score,
                        'param': param,
                        'macro': test_macro_score,
                        'micro': test_micro_score,
                        'tf': tf_lit
                    })

                ave_dev_score, ave_macro_score, ave_micro_score = 0, 0, 0
                param = []
                tf_lit = []

                for v, r in enumerate(info, start=1):
                    ave_dev_score += r['dev_score']
                    ave_macro_score += r['macro']
                    ave_micro_score += r['micro']
                    param.append(r['param'])
                    tf_lit.extend(r['tf'])

                ave_dev_score = round(ave_dev_score / valid_num, 3)
                ave_macro_score = round(ave_macro_score / valid_num, 3)
                ave_micro_score = round(ave_micro_score / valid_num, 3)

                key = 'label{}_enc{}'.format(i, j)
                result_dic[key] = {
                    'dev': ave_dev_score,
                    'micro': ave_micro_score,
                    'macro': ave_macro_score,
                    'param': ' '.join(param),
                    'tf': tf_lit
                }

        best_score = max(result_dic, key=lambda x: result_dic[x]['dev'])
        with open(output_dir + 'merge.txt', 'w') as f:
            [
                f.write('{}: {}\n'.format(k, v))
                for k, v in sorted(result_dic.items())
            ]
            f.write('best score\n{}: {}\n'.format(best_score,
                                                  result_dic[best_score]))
        with open(output_dir + 'tf.txt', 'w') as f:
            [f.write(r + '\n') for r in result_dic[best_score]['tf']]
Esempio n. 16
0
    src = FastTextEmb(hparams.data_dir, hparams.src_lang, hparams.vocab_size)
    src_dict, src_vec = src.load_embeddings()
    # Load Target Embeddings
    tgt = FastTextEmb(hparams.data_dir, hparams.tgt_lang, hparams.vocab_size)
    tgt_dict, tgt_vec = tgt.load_embeddings()

    # GAN instance
    train_model = WordTranslator(hparams, src_vec, tgt_vec, hparams.vocab_size)

    # Copy embeddings
    src_vec_eval = copy.deepcopy(src_vec)
    tgt_vec_eval = copy.deepcopy(tgt_vec)

    # Evaluator instance
    eval_model = evaluate.Evaluate(train_model.generator.W, src_vec_eval,
                                   tgt_vec_eval, src_dict, tgt_dict,
                                   hparams.src_lang, hparams.tgt_lang,
                                   hparams.eval_dir, hparams.vocab_size)

    # Tensorflow session
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        local_lr = hparams.lr
        for epoch in range(hparams.epochs):
            # Train the model
            train_model.run(sess, local_lr)
            # Evaluate using nearest neighbors measure
            eval_model.calc_nn_acc(sess)
            # Evaluate using CSLS similarity measure
            eval_model.run_csls_metrics(sess)
            # Drop learning rate
            local_lr = train_model.set_lr(local_lr, eval_model.drop_lr)
Esempio n. 17
0
def ctrl(x_tr, y_tr, x_te, x_va=None):

    p = np.size(y_tr, 1)
    thr = 0.3
    m = 5 if p <= 20 else 7 if p <= 100 else 9

    n_tr = np.size(x_tr, 0)
    n_tr_va = int(n_tr / 5)

    x_tr = np.array(x_tr)
    y_tr = np.int32(y_tr)
    x_tr_va = x_tr[0:n_tr_va, :]
    y_tr_va = y_tr[0:n_tr_va, :]
    x_tr = x_tr[n_tr_va:, :]
    y_tr = y_tr[n_tr_va:, :]

    pred_base = pt.BinaryRelevance(svm.LinearSVC())
    pred_base.fit(x_tr, y_tr)
    y_tr_va_ = sparse.dok_matrix.toarray(pred_base.predict(x_tr_va))

    # filter 1
    ev = evaluate.Evaluate()
    f1 = np.zeros([p], dtype=np.float)
    for j in range(p):
        f1[j] = ev.eval_macro_f1(y_tr_va[:, j], y_tr_va_[:, j])

    yc = np.where(f1 >= thr, True, False)
    yc_index = np.array(list(range(p)))[yc]
    y_tr_c = y_tr[:, yc]

    # filter 2
    r = []
    for j in range(p):

        yc_j = np.where(yc_index == j, False, True)
        yc_index_j = yc_index[yc_j].tolist()
        y_tr_c_j = y_tr_c[:, yc_j]
        y_tr_j = y_tr[:, j]

        chi2, _ = fs.chi2(y_tr_c_j, y_tr_j)
        chi2 = chi2.tolist()

        r_j = []

        for k in range(m):
            if not chi2:
                break
            index = np.argmax(chi2)
            r_j.append(yc_index_j[index])
            yc_index_j.remove(yc_index_j[index])
            chi2.remove(chi2[index])
        r.append(r_j)

    # predict test and validation sets together
    n_te, n_va = 0, 0
    if x_va is not None:
        n_te = np.size(x_te, axis=0)
        n_va = np.size(x_va, axis=0)
        x_te = np.vstack((x_te, x_va))
    else:
        x_te = np.array(x_te)

    # get original prediction
    y_te_ori = sparse.dok_matrix.toarray(pred_base.predict(x_te))
    y_te_adv = np.zeros([np.size(x_te, 0), p])

    # train meta-classifiers and predict (to reduce storage usage)
    for j in range(p):

        pred_meta = [svm.LinearSVC() for _ in r[j]]

        # train meta-classifiers for label l_j
        for k, index in enumerate(r[j]):
            x = np.hstack([x_tr, y_tr[:, index][:, np.newaxis]])
            y = y_tr[:, j]
            pred_meta[k].fit(x, y)

        # predict phase
        votes = np.zeros([np.size(x_te, 0)])
        for k, index in enumerate(r[j]):
            x = np.hstack([x_te, y_te_ori[:, index][:, np.newaxis]])
            y = pred_meta[k].predict(x)
            votes += np.where(y == 1, 1, -1)
        y_te_adv[:, j] = np.where(votes > 0, 1, 0)

    y_te_ = y_te_adv
    # predict test and validation sets together
    if x_va is not None:
        y_va_ = y_te_[n_te:, :]
        y_te_ = y_te_[0:n_te, :]
        return y_te_, y_va_
    else:
        return y_te_
Esempio n. 18
0
def main():
    args = parse_args()
    model_dir = args.model_dir
    """LOAD CONFIG FILE"""
    config_files = glob.glob(os.path.join(model_dir, '*.ini'))
    assert len(config_files) == 1, 'Put only one config file in the directory'
    config_file = config_files[0]
    config = configparser.ConfigParser()
    config.read(config_file)
    """LOGGER"""
    logger = getLogger(__name__)
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('[%(asctime)s] %(message)s')

    sh = logging.StreamHandler()
    sh.setLevel(logging.INFO)
    sh.setFormatter(formatter)
    logger.addHandler(sh)

    log_file = model_dir + 'log.txt'
    fh = logging.FileHandler(log_file)
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    logger.info('[Test start] logging to {}'.format(log_file))
    """PARAMATER"""
    embed_size = int(config['Parameter']['embed_size'])
    hidden_size = int(config['Parameter']['hidden_size'])
    class_size = int(config['Parameter']['class_size'])
    dropout_ratio = float(config['Parameter']['dropout'])
    vocab_type = config['Parameter']['vocab_type']
    coefficient = float(config['Parameter']['coefficient'])
    """TEST DETAIL"""
    gpu_id = args.gpu
    batch_size = args.batch
    model_file = args.model
    """DATASET"""
    test_src_file = config['Dataset']['test_src_file']
    correct_txt_file = config['Dataset']['correct_txt_file']

    test_data_size = dataset.data_size(test_src_file)
    logger.info('test size: {0}'.format(test_data_size))
    if vocab_type == 'normal':
        src_vocab = dataset.VocabNormal()
        src_vocab.load(model_dir + 'src_vocab.normal.pkl')
        src_vocab.set_reverse_vocab()
        trg_vocab = dataset.VocabNormal()
        trg_vocab.load(model_dir + 'trg_vocab.normal.pkl')
        trg_vocab.set_reverse_vocab()

        sos = np.array([src_vocab.vocab['<s>']], dtype=np.int32)
        eos = np.array([src_vocab.vocab['</s>']], dtype=np.int32)

    elif vocab_type == 'subword':
        src_vocab = dataset.VocabSubword()
        src_vocab.load(model_dir + 'src_vocab.sub.model')
        trg_vocab = dataset.VocabSubword()
        trg_vocab.load(model_dir + 'trg_vocab.sub.model')

        sos = np.array([src_vocab.vocab.PieceToId('<s>')], dtype=np.int32)
        eos = np.array([src_vocab.vocab.PieceToId('</s>')], dtype=np.int32)

    src_vocab_size = len(src_vocab.vocab)
    trg_vocab_size = len(trg_vocab.vocab)
    logger.info('src_vocab size: {}, trg_vocab size: {}'.format(
        src_vocab_size, trg_vocab_size))

    evaluater = evaluate.Evaluate(correct_txt_file)
    test_iter = dataset.Iterator(test_src_file,
                                 test_src_file,
                                 src_vocab,
                                 trg_vocab,
                                 batch_size,
                                 sort=False,
                                 shuffle=False,
                                 include_label=False)
    """MODEL"""
    model = Multi(src_vocab_size, trg_vocab_size, embed_size, hidden_size,
                  class_size, dropout_ratio, coefficient)
    chainer.serializers.load_npz(model_file, model)
    """GPU"""
    if gpu_id >= 0:
        logger.info('Use GPU')
        chainer.cuda.get_device_from_id(gpu_id).use()
        model.to_gpu()
    """TEST"""
    outputs = []
    labels = []
    for i, batch in enumerate(test_iter.generate(), start=1):
        batch = convert.convert(batch, gpu_id)
        output, label = model.predict(batch[0], sos, eos)
        for o, l in zip(output, label):
            outputs.append(trg_vocab.id2word(o))
            labels.append(l)
    rank_list = evaluater.rank(labels)
    single = evaluater.single(rank_list)
    multiple = evaluater.multiple(rank_list)
    logger.info('single: {} | {}'.format(single[0], single[1]))
    logger.info('multi : {} | {}'.format(multiple[0], multiple[1]))

    with open(model_file + '.hypo', 'w') as f:
        [f.write(o + '\n') for o in outputs]
    with open(model_file + '.attn', 'w') as f:
        [f.write('{}\n'.format(l)) for l in labels]
Esempio n. 19
0
        x[i * 10 + j] = j

y = [0] * 100
for i in range(10):
    for j in range(10):
        y[i * 10 + j] = i

basex = [3, 7, 7]
basey = [5, 3, 7]
baseh = 10
h = 1.7
fc = 2900
Tx = 38.2
G = 10
htb = 10
hre = 1.7
Noise = -110
rsrpthre = -88
sinrthre = 50
coverthre = 0.7
nbaseallx = [3]
nbaseally = [5]
ncost = 300
ocost = 100
parameter1 = [
    basex, basey, x, y, baseh, h, fc, Tx, G, htb, hre, Noise, rsrpthre,
    sinrthre, coverthre, nbaseallx, nbaseally, ncost, ocost
]
Evalute = evaluate.Evaluate(parameter=parameter1)
a = Evalute.Evaluate_main()
print(a)
Esempio n. 20
0
            build_cknn_graphs.BuildKNNGraphs(fn_result, fn_result_reranking,
                                             fn_label, kNN, retri_amount,
                                             search_region, lam))
    print 'Graph Build Step; Total Time is ', time.time() - T, 's'

    print '######################  Graph Fusion Step  ##########################'
    T = time.time()
    retrieval_length = 60000
    for i in range(retrieval_length):
        graph_list = []
        for j in range(num_ranks):
            graph_list.append(graph_lists[j][i])

        graph_list_copy = copy.deepcopy(graph_list)
        weight, graph = graphfusion.DEMFR(graph_list_copy, num_ranks, kNN,
                                          retri_amount)
        weights[graph[0]] = weight
        graphs[graph[0]] = graph
        vectexS.append(graph[0])
    print 'Graph Fusion Step; Total Time is ', time.time() - T, 's'

    print '######################  Re-Rank Step  ##########################'
    T = time.time()
    graphfusion.Expectation_Rank(vectexS, graphs, fn_fusion_result, kNN,
                                 retri_amount, weights)
    print 'Re-Rank Step; Each retrieval image time is ', (
        time.time() - T) / result_length * 1000 / retri_amount, 'ms'

    print "After graphs fusion:"
    evaluate.Evaluate(fn_label, fn_fusion_result)
Esempio n. 21
0
    for line in fd_stdin:
        line = line.rstrip()
        line = line.split()
        fn_graph = data_directory + line[0]

        if numpy.mod(count, num_ranks) != 0:
            graph_list.append(cPickle.load(open(fn_graph, 'rb')))
            count += 1
            continue
        else:
            graph_list.append(cPickle.load(open(fn_graph, 'rb')))
            count += 1

        graph_list_copy = copy.deepcopy(graph_list)
        selected_images = graphfusion.Fusion_Density_Subgraph(graph_list_copy, num_ranks, retri_amount)
        # Uncomment the next line to use PageRank-based method, but keep the above line as well. We need "selected_images[0]" to define the center of the graph
        #selected_images = graphfusion.Fusion_Graph_Laplacian(graph_list, num_ranks, retri_amount, selected_images[0])

        fd_stdin_fusion.write(line[0] + ' ')
        for img_id in selected_images:
            fd_stdin_fusion.write(str(img_id) + ' ')

        fd_stdin_fusion.write('\n')
        graph_list = []

    fd_stdin_fusion.close()

    import evaluate
    print "After fusing VOC and HSV graphs:"
    evaluate.Evaluate(fn_label, fn_fusion_result, retri_amount-2)
Esempio n. 22
0
def main():
    args = parse_args()
    model_dir = args.model_dir
    """LOAD CONFIG FILE"""
    config_files = glob.glob(os.path.join(model_dir, '*.ini'))
    assert len(config_files) == 1, 'Put only one config file in the directory'
    config_file = config_files[0]
    config = configparser.ConfigParser()
    config.read(config_file)
    """LOGGER"""
    logger = getLogger(__name__)
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('[%(asctime)s] %(message)s')

    sh = logging.StreamHandler()
    sh.setLevel(logging.INFO)
    sh.setFormatter(formatter)
    logger.addHandler(sh)

    log_file = model_dir + 'log.txt'
    fh = logging.FileHandler(log_file)
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    logger.info('[Training start] logging to {}'.format(log_file))
    """PARAMATER"""
    embed_size = int(config['Parameter']['embed_size'])
    hidden_size = int(config['Parameter']['hidden_size'])
    class_size = int(config['Parameter']['class_size'])
    dropout_ratio = float(config['Parameter']['dropout'])
    weight_decay = float(config['Parameter']['weight_decay'])
    gradclip = float(config['Parameter']['gradclip'])
    vocab_type = config['Parameter']['vocab_type']
    vocab_size = int(config['Parameter']['vocab_size'])
    coefficient = float(config['Parameter']['coefficient'])
    """TRINING DETAIL"""
    gpu_id = args.gpu
    n_epoch = args.epoch
    batch_size = args.batch
    interval = args.interval
    reg = False if args.type == 'l' or args.type == 's' else True
    """DATASET"""
    if args.type == 'l':
        section = 'Local'
    elif args.type == 'lr':
        section = 'Local_Reg'
    elif args.type == 's':
        section = 'Server'
    else:
        section = 'Server_Reg'
    train_src_file = config[section]['train_src_file']
    train_trg_file = config[section]['train_trg_file']
    valid_src_file = config[section]['valid_src_file']
    valid_trg_file = config[section]['valid_trg_file']
    test_src_file = config[section]['test_src_file']
    correct_txt_file = config[section]['correct_txt_file']

    train_data_size = dataset.data_size(train_src_file)
    valid_data_size = dataset.data_size(valid_src_file)
    logger.info('train size: {0}, valid size: {1}'.format(train_data_size, valid_data_size))

    if vocab_type == 'normal':
        src_vocab = dataset.VocabNormal(reg)
        trg_vocab = dataset.VocabNormal(reg)
        if os.path.isfile(model_dir + 'src_vocab.normal.pkl') and os.path.isfile(model_dir + 'trg_vocab.normal.pkl'):
            src_vocab.load(model_dir + 'src_vocab.normal.pkl')
            trg_vocab.load(model_dir + 'trg_vocab.normal.pkl')
        else:
            init_vocab = {'<pad>': 0, '<unk>': 1, '<s>': 2, '</s>': 3}
            src_vocab.build(train_src_file, True,  init_vocab, vocab_size)
            trg_vocab.build(train_trg_file, False, init_vocab, vocab_size)
            dataset.save_pickle(model_dir + 'src_vocab.normal.pkl', src_vocab.vocab)
            dataset.save_pickle(model_dir + 'trg_vocab.normal.pkl', trg_vocab.vocab)
        src_vocab.set_reverse_vocab()
        trg_vocab.set_reverse_vocab()

        sos = convert.convert_list(np.array([src_vocab.vocab['<s>']], dtype=np.int32), gpu_id)
        eos = convert.convert_list(np.array([src_vocab.vocab['</s>']], dtype=np.int32), gpu_id)

    elif vocab_type == 'subword':
        src_vocab = dataset.VocabSubword()
        trg_vocab = dataset.VocabSubword()
        if os.path.isfile(model_dir + 'src_vocab.sub.model') and os.path.isfile(model_dir + 'trg_vocab.sub.model'):
            src_vocab.load(model_dir + 'src_vocab.sub.model')
            trg_vocab.load(model_dir + 'trg_vocab.sub.model')
        else:
            src_vocab.build(train_src_file, model_dir + 'src_vocab.sub', vocab_size)
            trg_vocab.build(train_trg_file, model_dir + 'trg_vocab.sub', vocab_size)

        sos = convert.convert_list(np.array([src_vocab.vocab.PieceToId('<s>')], dtype=np.int32), gpu_id)
        eos = convert.convert_list(np.array([src_vocab.vocab.PieceToId('</s>')], dtype=np.int32), gpu_id)

    src_vocab_size = len(src_vocab.vocab)
    trg_vocab_size = len(trg_vocab.vocab)
    logger.info('src_vocab size: {}, trg_vocab size: {}'.format(src_vocab_size, trg_vocab_size))

    train_iter = dataset.Iterator(train_src_file, train_trg_file, src_vocab, trg_vocab, batch_size, sort=True, shuffle=True, reg=reg)
    # train_iter = dataset.Iterator(train_src_file, train_trg_file, src_vocab, trg_vocab, batch_size, sort=False, shuffle=False, reg=reg)
    valid_iter = dataset.Iterator(valid_src_file, valid_trg_file, src_vocab, trg_vocab, batch_size, sort=False, shuffle=False, reg=reg)
    evaluater = evaluate.Evaluate(correct_txt_file)
    test_iter = dataset.Iterator(test_src_file, test_src_file, src_vocab, trg_vocab, batch_size, sort=False, shuffle=False)
    """MODEL"""
    if reg:
        class_size = 1
        model = MultiReg(src_vocab_size, trg_vocab_size, embed_size, hidden_size, class_size, dropout_ratio, coefficient)
    else:
        model = Multi(src_vocab_size, trg_vocab_size, embed_size, hidden_size, class_size, dropout_ratio, coefficient)
    """OPTIMIZER"""
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(gradclip))
    optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))
    """GPU"""
    if gpu_id >= 0:
        logger.info('Use GPU')
        chainer.cuda.get_device_from_id(gpu_id).use()
        model.to_gpu()
    """TRAIN"""
    sum_loss = 0
    loss_dic = {}
    result = []
    for epoch in range(1, n_epoch + 1):
        for i, batch in enumerate(train_iter.generate(), start=1):
            try:
                batch = convert.convert(batch, gpu_id)
                loss = optimizer.target(*batch)
                sum_loss += loss.data
                optimizer.target.cleargrads()
                loss.backward()
                optimizer.update()

                if i % interval == 0:
                    logger.info('E{} ## iteration:{}, loss:{}'.format(epoch, i, sum_loss))
                    sum_loss = 0

            except Exception as e:
                logger.info(traceback.format_exc())
                logger.info('iteration: {}'.format(i))
                for b in batch[0]:
                    for bb in b:
                        logger.info(src_vocab.id2word(bb))
        chainer.serializers.save_npz(model_dir + 'model_epoch_{}.npz'.format(epoch), model)

        """EVALUATE"""
        valid_loss = 0
        for batch in valid_iter.generate():
            batch = convert.convert(batch, gpu_id)
            with chainer.no_backprop_mode(), chainer.using_config('train', False):
                valid_loss += optimizer.target(*batch).data
        logger.info('E{} ## val loss:{}'.format(epoch, valid_loss))
        loss_dic[epoch] = valid_loss

        """TEST"""
        outputs = []
        labels = []
        for i, batch in enumerate(test_iter.generate(), start=1):
            batch = convert.convert(batch, gpu_id)
            with chainer.no_backprop_mode(), chainer.using_config('train', False):
                output, label = model.predict(batch[0], sos, eos)
            # for o, l in zip(output, label):
            #     o = chainer.cuda.to_cpu(o)
            #     outputs.append(trg_vocab.id2word(o))
            #     labels.append(l)
            for l in label:
                labels.append(l)
        rank_list = evaluater.rank(labels)
        s_rate, s_count = evaluater.single(rank_list)
        m_rate, m_count = evaluater.multiple(rank_list)
        logger.info('E{} ## s: {} | {}'.format(epoch, ' '.join(x for x in s_rate), ' '.join(x for x in s_count)))
        logger.info('E{} ## m: {} | {}'.format(epoch, ' '.join(x for x in m_rate), ' '.join(x for x in m_count)))

        # with open(model_dir + 'model_epoch_{}.hypo'.format(epoch), 'w')as f:
        #     [f.write(o + '\n') for o in outputs]
        with open(model_dir + 'model_epoch_{}.attn'.format(epoch), 'w')as f:
            [f.write('{}\n'.format(l)) for l in labels]

        result.append('{},{},{},{}'.format(epoch, valid_loss, s_rate[-1], m_rate[-1]))

    """MODEL SAVE"""
    best_epoch = min(loss_dic, key=(lambda x: loss_dic[x]))
    logger.info('best_epoch:{0}'.format(best_epoch))
    chainer.serializers.save_npz(model_dir + 'best_model.npz', model)

    with open(model_dir + 'result.csv', 'w')as f:
        f.write('epoch,valid_loss,single,multiple\n')
        [f.write(r + '\n') for r in result]
Esempio n. 23
0
 def eval(self):
     ev = evaluate.Evaluate(self.results['labels'])
     ev.score()
     print util.list_as_dec_str(ev.match_at_k)
     print util.list_as_dec_str(ev.mrr_at_k)