Ejemplo n.º 1
0
def pipeline_predict(src, config, trg=None):
    config = read_config(config)
    model_weights = os.path.join(config['data']['save_dir'], config['data']['preload_weights'])

    src, trg = read_data_pipeline(src, config)
    src_test = {}
    trg_test = {}

    src_test['word2id'] = src['word2id']
    src_test['id2word'] = src['id2word']

    trg_test['word2id'] = trg['word2id']
    trg_test['id2word'] = trg['id2word']

    if config['model']['decode'] == "beam_search":
        decoder = BeamSearchDecoder(config, model_weights, src_test, trg_test, config['model']['beam_size'])
        decoder.translate()
    else:
        decoder = GreedyDecoder(config, model_weights, src_test, trg_test)
        decoder.translate()
Ejemplo n.º 2
0
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable

parser = argparse.ArgumentParser()
parser.add_argument(
    "--config",
    help="path to json config",
    required=True
)
args = parser.parse_args()
config_file_path = args.config
config = read_config(config_file_path)
experiment_name = hyperparam_string(config)
save_dir = config['data']['save_dir']
load_dir = config['data']['load_dir']
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    filename='log/%s' % (experiment_name),
    filemode='w'
)

# define a new Handler to log to console as well
console = logging.StreamHandler()
# optional, set the logging level
console.setLevel(logging.INFO)
# set a format which is the same for console use
Ejemplo n.º 3
0
    if config['model']['decode'] == "beam_search":
        decoder = BeamSearchDecoder(config, model_weights, src_test, trg_test,
                                    config['model']['beam_size'])
        decoder.translate()
    else:
        decoder = GreedyDecoder(config, model_weights, src_test, trg_test)
        decoder.translate()


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument("--config", help="path to json config", required=True)
    args = parser.parse_args()
    config = read_config(args.config)
    model_weights = os.path.join(config['data']['save_dir'],
                                 config['data']['preload_weights'])

    src, trg = read_nmt_data(src=config['data']['src'],
                             config=config,
                             trg=config['data']['trg'])

    src_test, trg_test = read_nmt_data(
        src=config['data']['test_src'],
        config=config,
        trg=None  #trg=config['data']['test_trg']
    )

    if trg_test is None:
        trg_test = {}
Ejemplo n.º 4
0
                    index = sentence_real.index('</s>')
                else:
                    index = len(sentence_real)

                ground_truths.append(['<s>'] + sentence_real[:index + 1])

        bleu_score = get_bleu(preds, ground_truths)
        print('BLEU score : %.5f ' % (bleu_score))


if __name__ == '__main__':

    model_config = '/home/sandeep/Research/nmt-pytorch/config_local_en_de_attention_wmt15.json'
    model_weights = '/home/sandeep/Models/torch_seq2seq/model_translation__src_en__trg_de__attention_attention__dim_1024__emb_dim_500__optimizer_adam__n_layers_src_2__n_layers_trg_1__bidir_True__epoch_6.model'

    config = read_config(model_config)

    src, trg = read_nmt_data(src=config['data']['src'],
                             config=config,
                             trg=config['data']['trg'])

    src_test, trg_test = read_nmt_data(src=config['data']['test_src'],
                                       config=config,
                                       trg=config['data']['test_trg'])

    src_test['word2id'] = src['word2id']
    src_test['id2word'] = src['id2word']

    trg_test['word2id'] = trg['word2id']
    trg_test['id2word'] = trg['id2word']
Ejemplo n.º 5
0
import sys, os.path
sys.path.insert(0, os.path.abspath("."))
#%%
import sys
import tensorflow as tf
import scipy.optimize as opt
import numpy as np
import data_utils
import data_preprocess
data_in_path, model_path, start_date, end_date, date_range, top_n, data_out_path = data_utils.read_config(
    ".")

crime_data, class_count = data_utils.data_input("data/入室盗窃.csv")


def hit_grid_rate(y_, y):
    """
    :param y_: np.array,取topN后的格子列表序列
    :param y: np.array,实际案发格子的列表序列
    :return: 格子命中率
    """
    y_pred = y_.tolist()
    y_no_nan = list(map(lambda x: [] if np.isnan(x).all() else x, y))

    days = 0
    hit_rate = []
    for index, item in enumerate(y_no_nan):
        if (item == []):
            continue
        else:
            days = days + 1
Ejemplo n.º 6
0
import sys
# import tensorflow as tf
import scipy.optimize as opt
import data_utils

# 获取脚本参数
# pd_id = sys.argv[1]
# data_in_path = sys.argv[2]
# model_path = sys.argv[3]
# start_date = sys.argv[4]
# end_date = sys.argv[5]
# date_range = int(sys.argv[6])
# top_n = sys.argv[7]
# data_out_path = sys.argv[8]
pd_id, data_in_path, model_path, start_date, end_date, date_range, top_n, data_out_path = data_utils.read_config(
    sys.argv[1])

# 加载数据
crime_data, class_count = data_utils.data_input(data_in_path)


def rank_opt_with_tf(class_, crime_, start_, end_, date_, pd_):
    # 逐个班次训练计算
    for i in range(1, class_ + 1):
        train_x_data = data_utils.data_train_x(start_, end_, date_,
                                               crime_.ix[:, [0, 1, 2 * i]])
        train_y_data = data_utils.data_train_y(start_, end_, date_,
                                               crime_.ix[:,
                                                         [0, 1, 2 * i]]).values
        test_x_data = data_utils.data_test_x(end_, date_,
                                             crime_.ix[:, [0, 1, 2 * i]])