예제 #1
0
파일: train.py 프로젝트: sxdkxgwan/corelm
                    "--tuning-algorithm",
                    dest="alg",
                    default='mert',
                    help="Tuning Algorithm (mert|pro|wpro)")
parser.add_argument("-w",
                    "--instance-weights",
                    dest="instance_weights_path",
                    help="Instance weights for wpro algorithm")
parser.add_argument("-s",
                    "--predictable-seed",
                    dest="pred_seed",
                    action='store_true',
                    help="Tune with predictable seed to avoid randomness")
args = parser.parse_args()

U.set_theano_device(args.device)

from dlm.reranker import augmenter
from dlm.reranker import mosesIniReader as iniReader

if os.environ.has_key('MOSES_ROOT'):
    moses_root = os.environ['MOSES_ROOT']
else:
    L.error("Set MOSES_ROOT variable to your moses root directory")

U.mkdir_p(args.out_dir)

#cmd = moses_root + '/bin/moses -show-weights -f ' + args.input_config + ' 2> /dev/null'
#features = U.capture(cmd).strip().split('\n')
features = iniReader.parseIni(args.input_config)
예제 #2
0
			f.write("\t"+str(val))
		f.write("\n")

def write_biases(f, biases):
	for bias in biases:
		f.write(str(bias) + "\n")

# Arguments for this script
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--corelm-model", dest="corelm_model", required=True, help="The input NPLM model file")
parser.add_argument("-v", "--vocab-file", dest="vocab_path", required=True, help="The input vocabulary")
parser.add_argument("-dir", "--directory", dest="out_dir", help="The output directory for log file, model, etc.")

args = parser.parse_args()

U.set_theano_device('cpu',1)
from dlm.models.mlp import MLP

if args.out_dir is None:
	args.out_dir = 'corelm_convert-' + U.curr_time()
U.mkdir_p(args.out_dir)

# Loading CoreLM model and creating classifier class
L.info("Loading CoreLM model")
classifier = MLP(model_path=args.corelm_model)
args_nn = classifier.args
params_nn = classifier.params
U.xassert(len(params_nn)==7, "CoreLM model is not compatible with NPLM architecture. 2 hidden layers and an output linear layer is required.")

embeddings = params_nn[0].get_value()
W1 = params_nn[1].get_value()
예제 #3
0
파일: train.py 프로젝트: nusnlp/corelm
parser.add_argument("-i", "--input-nbest", dest="input_nbest", required=True, help="Input n-best file")
parser.add_argument("-v", "--vocab-file", dest="vocab_path", required=True, help="The vocabulary file that was used in training")
parser.add_argument("-m", "--model-file", dest="model_path", required=True, help="Input CoreLM model file")
parser.add_argument("-r", "--reference-files", dest="ref_paths", required=True, help="A comma-seperated list of reference files")
parser.add_argument("-c", "--config", dest="input_config", required=True, help="Input moses config (ini) file")
parser.add_argument("-o", "--output-dir", dest="out_dir", required=True, help="Output directory")
parser.add_argument("-d", "--device", dest="device", default="gpu", help="The computing device (cpu or gpu)")
parser.add_argument("-t", "--threads", dest="threads", default = 14, type=int, help="Number of MERT threads")
parser.add_argument("-iv", "--init-value", dest="init_value", default = '0.05', help="The initial value of the feature")
parser.add_argument("-n", "--no-aug", dest="no_aug", action='store_true', help="Augmentation will be skipped, if this flag is set")
parser.add_argument("-a", "--tuning-algorithm", dest="alg", default = 'mert', help="Tuning Algorithm (mert|pro|wpro)")
parser.add_argument("-w", "--instance-weights", dest="instance_weights_path", help="Instance weights for wpro algorithm")
parser.add_argument("-s", "--predictable-seed", dest="pred_seed", action='store_true', help="Tune with predictable seed to avoid randomness")
args = parser.parse_args()

U.set_theano_device(args.device)

from dlm.reranker import augmenter
from dlm.reranker import mosesIniReader as iniReader

if os.environ.has_key('MOSES_ROOT'):
	moses_root = os.environ['MOSES_ROOT']
else:
	L.error("Set MOSES_ROOT variable to your moses root directory")

U.mkdir_p(args.out_dir)

#cmd = moses_root + '/bin/moses -show-weights -f ' + args.input_config + ' 2> /dev/null'
#features = U.capture(cmd).strip().split('\n')
features = iniReader.parseIni(args.input_config)
예제 #4
0
import dlm.io.logging as L


def convert_type(param):
	return np.float32(param)



# Arguments for this script
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--nplm-model", dest="nplm_model", required=True, help="The input NPLM model file")
parser.add_argument("-dir", "--directory", dest="out_dir", help="The output directory for log file, model, etc.")

args = parser.parse_args()

U.set_theano_device('cpu',1)
from dlm.models.mlp import MLP


if args.out_dir is None:
	args.out_dir = 'nplm_convert-' + U.curr_time()
U.mkdir_p(args.out_dir)


# Reading the NPLM Model
args_nn = argparse.Namespace()
model_dict = dict()
lines = []
req_attribs = ['\config','\\vocab', '\input_vocab', '\output_vocab', '\input_embeddings',  '\hidden_weights 1', '\hidden_biases 1', '\hidden_weights 2', '\hidden_biases 2', '\output_weights', '\output_biases','\end']
attrib = ''
예제 #5
0
파일: train.py 프로젝트: nusnlp/corelm
L.set_file_path(os.path.abspath(args.out_dir) + "/log.txt")

L.info('Command: ' + ' '.join(sys.argv))

curr_version = U.curr_version()
if curr_version:
	L.info("Version: " + curr_version)

if args.emb_path:
	U.xassert(args.vocab, 'When --emb-path is used, vocab file must be given too (using --vocab).')

if args.loss_function == "nll":
	args.num_noise_samples = 0

U.print_args(args)
U.set_theano_device(args.device, args.threads)

import dlm.trainer
from dlm.io.mmapReader import MemMapReader
from dlm.models.mlp import MLP

#########################
## Loading datasets
#

trainset = MemMapReader(args.trainset, batch_size=args.batchsize, instance_weights_path=args.instance_weights_path)
devset = MemMapReader(args.devset)
testset = None
if args.testset:
	testset = MemMapReader(args.testset)
예제 #6
0
curr_version = U.curr_version()
if curr_version:
    L.info("Version: " + curr_version)

if args.emb_path:
    U.xassert(
        args.vocab,
        'When --emb-path is used, vocab file must be given too (using --vocab).'
    )

if args.loss_function == "nll":
    args.num_noise_samples = 0

U.print_args(args)
U.set_theano_device(args.device, args.threads)

import dlm.trainer
from dlm.io.mmapReader import MemMapReader
from dlm.io.featuresmmapReader import FeaturesMemMapReader

from dlm.models.mlp import MLP

#########################
## Loading datasets
#
if args.feature_emb_dim is None:
    trainset = FeaturesMemMapReader(
        args.trainset,
        batch_size=args.batchsize,
        instance_weights_path=args.instance_weights_path)