コード例 #1
0
ファイル: main.py プロジェクト: tarekmehrez/POSTagger
def train(results):
	# get files

	vocab_file = results.vocab
	labels_file = results.labels
	train_file = results.train
	step = results.step
	iterations = results.iter


	if not step:
		step = 0.01
	if not iterations:
		iterations = 10

	logger.debug(	'Started training with options:'		+ "\n" +
					'training file:	' + str(train_file) 	+ "\n" +
					'step size: '	  + str(step)			+ "\n" +
					'no. of iter: '	  + str(iterations)		+ "\n" +
					'vocab file:	' + str(vocab_file)		+ "\n" +
					'labels file:	' + str(labels_file)	+ "\n")




	if not os.path.exists('model/meta_data'):
		meta_data_instance = MetaData(vocab_file,labels_file)
		meta_data = meta_data_instance.get_meta_data()
		logger.info("Writing meta data file")
		write_obj(meta_data,'meta_data')
	else:
		logger.info("meta data file already exists ... loading")
		meta_data = read_obj('meta_data')

	if not os.path.exists('model/train.feats'):
		train_feats = extract_feats(meta_data,train_file)
		logger.info("Writing extracted feats for training files to train.feats")
		write_obj(train_feats,'train.feats')
	else:
		logger.info("train.feats already exists ... loading.")
		train_feats = read_obj('train.feats')

	if not os.path.exists('model/model'):
		classifier = Perceptron(meta_data)
		classifier.train(train_feats,step,iterations)
		logger.info("Done Training, model is written in model file")
		model = classifier.get_theta()
		write_obj(model, 'model')
	else:
		logger.info('model already exists, nothing to do!')