def parse_action(self, chat_id, text): self.logger.warning("%s - %s" % (chat_id, text)) user = self.users.find_one({'chat_id': chat_id}) if user is None: user = {'chat_id': chat_id, 'state': States.idle, 'words': [], 'train': { 'type': 0, 'words': 0, 'correct': 0, 'cadidacies': [] }} if 'train' not in user: user['train'] = { 'type': 0, 'words': 0, 'correct': 0, 'cadidacies': [] } if text[0] == '/': # Command cmd = text[1:].lower().split(' ')[0] if cmd in self.comands: self.comands[cmd](self, user, text) elif user['train']['type'] != 0: train.do_train(user, text) elif user['state'] == States.idle: self.add_word(user, text) elif user['state'] == States.langs_asked: self.langs_ask(user, text) self.users.save(user)
def search(self, opt, train=False, test=False): # put the dictionary in the argparse format use by do_train arg_opt = argparse.Namespace() arg_opt_dict = vars(arg_opt) arg_opt_dict.update(opt) if train: do_train(arg_opt) if test: do_test(arg_opt)
def train_crf(train_path, pattern_path, model_path): """ train a crf model :param train_path: :param pattern_path: :param model_path: :return: """ opt = create_default_opt() opt.input = train_path opt.pattern = pattern_path opt.output = model_path opt.maxiter = 10 opt.nthread = 4 crf_model = pywapiti.mdl_new(pywapiti.rdr_new(opt.maxent)) crf_model.opt = opt do_train(crf_model)
def train_crf(train_path, pattern_path, model_path): """ train a crf model :param train_path: :param pattern_path: :param model_path: :return: """ opt = create_default_opt() opt.input = train_path opt.pattern = pattern_path opt.output = model_path opt.maxiter = 10 opt.nthread = 4 crf_model = pywapiti.mdl_new( pywapiti.rdr_new(opt.maxent)) crf_model.opt = opt do_train(crf_model)
def remove(self, user, text): if user['train']['type'] != 0: for w in user['words']: if w == user['train']['word']: user['words'].remove(w) str_out = "%s - %s" % (w['en'], w['ru']) telegram.send_message(user['chat_id'], "Deleted:\n%s" % (str_out,)) train.do_train(user, text) else: tokens = text.split(" ") if len(tokens) > 1: cnt = int(tokens[1]) if cnt > 0: cnt -= 1 else: cnt = -1 str_out = "%s - %s" % (user['words'][cnt]['en'], user['words'][cnt]['ru']) del user['words'][cnt] telegram.send_message(user['chat_id'], "Word with index %s removed\n%s" % (cnt, str_out))
def main(): args = get_args() f = os.path.normpath device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") if args.do_init: init(f(args.train_file), f(args.dev_file), f(args.test_file), f(args.word_dict_path), f(args.tag_dict_path)) return if args.do_train: do_train(f(args.train_file), f(args.output_dir), f(args.word_dict_path), f(args.tag_dict_path), args.max_seq_len, args.embed_dim, args.hidden_dim, args.lr, args.batch_size, args.epochs, args.print_step, device) if args.do_eval: do_eval(f(args.test_file), f(args.word_dict_path), f(args.tag_dict_path), args.max_seq_len, args.embed_dim, args.hidden_dim, f(args.output_dir), f(args.eval_log_dir), device)
import paddle import paddle.fluid as fluid from eval import do_eval from train import do_train from predict import do_predict from inference_model import do_save_inference_model from dgu.utils.configure import PDConfig if __name__ == "__main__": args = PDConfig(yaml_file="./data/config/dgu.yaml") args.build() args.Print() if args.do_train: do_train(args) if args.do_predict: do_predict(args) if args.do_eval: do_eval(args) if args.do_save_inference_model: do_save_inference_model(args) # vim: set ts=4 sw=4 sts=4 tw=100:
parser.add_argument('--mode', required=True, choices=['train', 'eval']) parser.add_argument('--epochs', default=5, type=int) args = parser.parse_args() print('args:', args) bus = ng_bus.NeurogenBus(args.mode) io = ng_input_alex_net.NeurogenIO(bus) train = train.TrainController() cli_vars = [] cli_vars += io.get_vars() cli_vars += train.get_vars() io.read_inputs() X_train, y_train = io.get_train_xy() X_test, y_test = io.get_test_xy() print(X_train.shape) train.do_compile() train.print_sample_predictions(X_test, y_test) result_of_train = train.do_train(X_train, y_train, X_test, y_test, args.epochs, 'weights') print(result_of_train.history) # print(np.mean(result_of_train.history["val_acc"])) train.print_sample_predictions(X_test, y_test)
def main(_): logging.info('Running with parameters:') logging.info(json.dumps(FLAGS.flag_values_dict(), indent=4)) if os.path.exists(os.path.join(FLAGS.model_dir, 'config.json')): expect_partial = False if FLAGS.mode in ['transcribe-file', 'realtime']: expect_partial = True model = load_model(FLAGS.model_dir, checkpoint=FLAGS.checkpoint, expect_partial=expect_partial) else: if FLAGS.mode in ['eval', 'transcribe-file', 'realtime']: raise Exception('Model not found at path: {}'.format( FLAGS.model_dir)) logging.info('Initializing model from scratch.') os.makedirs(FLAGS.model_dir, exist_ok=True) model_config_filepath = os.path.join(FLAGS.model_dir, 'config.json') vocab = vocabulary.init_vocab() vocabulary.save_vocab(vocab, os.path.join(FLAGS.model_dir, 'vocab')) model = Transducer(vocab=vocab, encoder_layers=FLAGS.encoder_layers, encoder_size=FLAGS.encoder_size, pred_net_layers=FLAGS.pred_net_layers, pred_net_size=FLAGS.pred_net_size, joint_net_size=FLAGS.joint_net_size, softmax_size=FLAGS.softmax_size) model.save_config(model_config_filepath) logging.info('Initialized model from scratch.') distribution_strategy = None if FLAGS.tpu is not None: tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( tpu=FLAGS.tpu) distribution_strategy = tf.distribute.experimental.TPUStrategy( tpu_cluster_resolver=tpu_cluster_resolver) if FLAGS.mode == 'export': saved_model_dir = os.path.join(FLAGS.model_dir, 'saved_model') os.makedirs(saved_model_dir, exist_ok=True) all_versions = [int(ver) for ver in os.listdir(saved_model_dir)] if len(all_versions) > 0: version = max(all_versions) + 1 else: version = 1 export_path = os.path.join(saved_model_dir, str(version)) os.makedirs(export_path) tf.saved_model.save(model, export_path, signatures={ 'serving_default': model.predict }) elif FLAGS.mode == 'transcribe-file': transcription = transcribe_file(model, FLAGS.input) print('Input file: {}'.format(FLAGS.input)) print('Transcription: {}'.format(transcription)) elif FLAGS.mode == 'realtime': audio_buf = [] last_result = None def stream_callback(in_data, frame_count, time_info, status): audio_buf.append(in_data) return None, pyaudio.paContinue def audio_gen(): while True: if len(audio_buf) > 0: audio_data = audio_buf[0] audio_arr = np.frombuffer(audio_data, dtype=np.float32) yield audio_arr FORMAT = pyaudio.paFloat32 CHANNELS = 1 RATE = 16000 CHUNK = 2048 audio = pyaudio.PyAudio() stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK, stream_callback=stream_callback) stream.start_stream() outputs = transcribe_stream(model, audio_gen(), RATE) print('Transcribing live audio (press CTRL+C to stop)...') for (output, is_final) in outputs: if output != last_result and output != '' and not is_final: print('Partial Result: {}'.format(output)) last_result = output if is_final: print('# Final Result: {}'.format(output)) last_result = None else: if FLAGS.dataset_name == 'common-voice': data_utils = utils.data.common_voice train_dataset, dev_dataset = data_utils.create_datasets(FLAGS.dataset_path, max_data=FLAGS.max_data) if dev_dataset is None: dev_dataset = train_dataset.take(FLAGS.eval_size) train_dataset = train_dataset.skip(FLAGS.eval_size) if FLAGS.mode == 'eval': logging.info('Begin evaluation...') loss, acc = do_eval(model, dev_dataset, batch_size=FLAGS.batch_size, shuffle_buffer_size=FLAGS.shuffle_buffer_size, distribution_strategy=distribution_strategy) logging.info('Evaluation complete: Loss {} Accuracy {}'.format( loss, acc)) else: optimizer = tf.keras.optimizers.Adam(FLAGS.learning_rate) checkpoints_path = os.path.join(FLAGS.model_dir, 'checkpoints') os.makedirs(checkpoints_path, exist_ok=True) do_train(model, train_dataset, optimizer, FLAGS.epochs, FLAGS.batch_size, eval_dataset=dev_dataset, steps_per_checkpoint=FLAGS.steps_per_checkpoint, checkpoint_path=checkpoints_path, steps_per_log=FLAGS.steps_per_log, tb_log_dir=FLAGS.tb_log_dir, keep_top_n=FLAGS.keep_top, shuffle_buffer_size=FLAGS.shuffle_buffer_size, distribution_strategy=distribution_strategy)
if args.mode == 'train': train_data, val_data = io.read_train_data(args.sample_count) if not ng_config.use_generator_fit: logging.info('X_train shape: %s', train_data[0].shape) logging.info('y_train shape: %s', train_data[1].shape) train.do_compile() train.try_load_weights(args.weights) if args.mode == 'train': train.print_sample_predictions(val_data) result_of_train = train.do_train( train_data, val_data, args.epochs, args.weights, ) metrics_output = json.dumps({'history': result_of_train.history}) logging.info('history: %s', metrics_output) if args.metrics_output: open(args.metrics_output, 'w').write(metrics_output) # print(np.mean(result_of_train.history["val_acc"])) train.print_sample_predictions(val_data) elif args.mode == 'eval': assert args.eval_data assert args.network_output import cli_util cli_util.do_eval(args, io, train) from tensorflow.keras import backend as K
def start_train(self, user, text): user['train']['type'] = 0 train.do_train(user, text)
def main(_): logging.info('Running with parameters:') logging.info(json.dumps(FLAGS.flag_values_dict(), indent=4)) if os.path.exists(os.path.join(FLAGS.model_dir, 'config.json')): expect_partial = False if FLAGS.mode in ['transcribe-file', 'realtime', 'export']: expect_partial = True model = load_model(FLAGS.model_dir, checkpoint=FLAGS.checkpoint, expect_partial=expect_partial) else: if FLAGS.mode in ['eval', 'transcribe-file', 'realtime']: raise Exception('Model not found at path: {}'.format( FLAGS.model_dir)) logging.info('Initializing model from scratch.') os.makedirs(FLAGS.model_dir, exist_ok=True) model_config_filepath = os.path.join(FLAGS.model_dir, 'config.json') vocab = vocabulary.init_vocab() vocabulary.save_vocab(vocab, os.path.join(FLAGS.model_dir, 'vocab')) model = Transducer(vocab=vocab, encoder_layers=FLAGS.encoder_layers, encoder_size=FLAGS.encoder_size, pred_net_layers=FLAGS.pred_net_layers, joint_net_size=FLAGS.joint_net_size, softmax_size=FLAGS.softmax_size) model.save_config(model_config_filepath) logging.info('Initialized model from scratch.') distribution_strategy = None if FLAGS.tpu is not None: tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( tpu=FLAGS.tpu) distribution_strategy = tf.distribute.experimental.TPUStrategy( tpu_cluster_resolver=tpu_cluster_resolver) if FLAGS.mode == 'export': # saved_model_dir = os.path.join(FLAGS.model_dir, 'saved_model') # os.makedirs(saved_model_dir, exist_ok=True) # all_versions = [int(ver) for ver in os.listdir(saved_model_dir)] # if len(all_versions) > 0: # version = max(all_versions) + 1 # else: # version = 1 # export_path = os.path.join(saved_model_dir, str(version)) # os.makedirs(export_path) # tf.saved_model.save(model, export_path, signatures={ # 'serving_default': model.predict # }) # print(model.predict(tf.zeros((1, 1024)), tf.constant([16000]), tf.constant(['hell']), tf.zeros((1, 2, 1, 2048)))) tflite_dir = os.path.join(FLAGS.model_dir, 'lite') os.makedirs(tflite_dir, exist_ok=True) concrete_func = model.predict.get_concrete_function( audio=tf.TensorSpec([1, 1024], dtype=tf.float32), sr=tf.TensorSpec([1], dtype=tf.int32), pred_inp=tf.TensorSpec([1], dtype=tf.string), enc_state=tf.TensorSpec([1, 2, 1, model.encoder_size], dtype=tf.float32)) converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func]) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS] converter.experimental_new_converter = True converter.experimental_new_quantizer = True converter.allow_custom_ops = True # def representative_dataset_gen(): # dataset, _ = load_datasets() # for i in range(10): # yield [next(dataset)] # converter.optimizations = [tf.lite.Optimize.DEFAULT] # converter.representative_dataset = representative_dataset_gen # converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] # converter.inference_input_type = tf.uint8 # converter.inference_output_type = tf.uint8 tflite_quant_model = converter.convert() with open(os.path.join(tflite_dir, 'model.tflite'), 'wb') as f: f.write(tflite_quant_model) print('Exported model to TFLite.') elif FLAGS.mode == 'transcribe-file': transcription = transcribe_file(model, FLAGS.input) print('Input file: {}'.format(FLAGS.input)) print('Transcription: {}'.format(transcription)) elif FLAGS.mode == 'realtime': import pyaudio audio_buf = [] last_result = None def stream_callback(in_data, frame_count, time_info, status): audio_buf.append(in_data) return None, pyaudio.paContinue def audio_gen(): while True: if len(audio_buf) > 0: audio_data = audio_buf[0] audio_arr = np.frombuffer(audio_data, dtype=np.float32) yield audio_arr FORMAT = pyaudio.paFloat32 CHANNELS = 1 RATE = 16000 CHUNK = 2048 audio = pyaudio.PyAudio() stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK, stream_callback=stream_callback) stream.start_stream() outputs = transcribe_stream(model, audio_gen(), RATE) print('Transcribing live audio (press CTRL+C to stop)...') for (output, is_final) in outputs: if output != last_result and output != '' and not is_final: print('Partial Result: {}'.format(output)) last_result = output if is_final: print('# Final Result: {}'.format(output)) last_result = None else: train_dataset, dev_dataset = load_datasets() if dev_dataset is None: dev_dataset = train_dataset.take(FLAGS.eval_size) train_dataset = train_dataset.skip(FLAGS.eval_size) if FLAGS.eval_size: dev_dataset = dev_dataset.take(FLAGS.eval_size) if FLAGS.mode == 'eval': logging.info('Begin evaluation...') loss, acc = do_eval(model, dev_dataset, batch_size=FLAGS.batch_size, shuffle_buffer_size=FLAGS.shuffle_buffer_size, distribution_strategy=distribution_strategy) logging.info('Evaluation complete: Loss {} Accuracy {}'.format( loss, acc)) else: optimizer = tf.keras.optimizers.Adam(FLAGS.learning_rate) checkpoints_path = os.path.join(FLAGS.model_dir, 'checkpoints') os.makedirs(checkpoints_path, exist_ok=True) do_train(model, train_dataset, optimizer, FLAGS.epochs, FLAGS.batch_size, eval_dataset=dev_dataset, steps_per_checkpoint=FLAGS.steps_per_checkpoint, checkpoint_path=checkpoints_path, steps_per_log=FLAGS.steps_per_log, tb_log_dir=FLAGS.tb_log_dir, keep_top_n=FLAGS.keep_top, shuffle_buffer_size=FLAGS.shuffle_buffer_size, distribution_strategy=distribution_strategy)
def do_POST(self): if (self.path == "/get") or (self.path == "/train"): self.send_response(200) else: self.send_response(404) self.end_headers() return form = cgi.FieldStorage(fp=self.rfile, headers=self.headers, environ={ "REQUEST_METHOD": "POST", "CONTENT_TYPE": self.headers["Content-Type"] }) field_item = form["file"] if (field_item == None): self.send_response(403) self.end_headers() return filename = field_item.filename fileValue = field_item.value fileSize = len(fileValue) print("[UPLOAD]", "file: ", filename, ", size:", len(fileValue), "bytes") if (len(fileValue) > 20000000) or not ( filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg") or filename.endswith(".gif")): self.send_response(403) self.end_headers() return else: self.send_header("Access-Control-Allow-Origin", "*") self.send_header("Content-type", "application/json") self.end_headers() if self.path == "/get": with open("upload/" + filename, "wb") as f: f.write(fileValue) crop.ssd_process(img_path="upload/" + filename, crop_path="crop", show=False) os.remove("upload/" + filename) files = glob.glob(crop_path + "/*") pred_result = [] pred_probabilities = [] for index, f in enumerate(files): img = image.load_img(f, target_size=(256, 256)) image_tensor = image.img_to_array(img) image_tensor = np.expand_dims(image_tensor, axis=0) image_tensor /= 255. model = load_model( "model/trash.h5", custom_objects={"leaky_relu": leaky_relu}) pred = model.predict(image_tensor) pred_class = np.argmax(pred, axis=1) pred_result.append(pred) pred_probabilities.append(pred) print("[PREDICT] Crop name:", f) print("[PREDICT] Raw prediction data:", pred) print("[PREDICT] Raw prediction class data: ", pred_class) response = { "status": "success", "result": pred_result, "probabilities": pred_probabilities } elif self.path == "/train": with open( "train/" + labels_index[int(form["type"].value)] + "/" + filename, "wb") as f: f.write(fileValue) response = {"status": "success"} if int(time.time()) - conf.read_config()["last"] > 86400: train_path = list(pathlib.Path("./train").glob("*/*")) train_count = len(train_path) if train_count > int(conf.read_config()["in"]): conf.write_config("last", int(time.time())) conf.write_config("in", weight.get_new_n(train_count)) train.do_train() for item in train_path: os.remove(item) responseBody = json.dumps(response) self.wfile.write(responseBody.encode("utf-8")) print()
import os import sys from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from options.train_options import TrainOptions from train import do_train parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument('--path', type=str, help='Path to datasets') parser.add_argument('--dataset', type=str, default='ade20k', help='which dataset to process') parser.add_argument('--norm', type=str, default='norm', help='normalization mode of dist map') if __name__ == '__main__': options = TrainOptions().parse() options.norm_mode = 'clade' do_train(options) # calculate_fid_given_paths # args = parser.parse_args() # make_dist_train_val_ade_datasets(args.path, args.norm)