MAX_EPOCH = args.MAX_EPOCH BATCH_SIZE = args.BATCH_SIZE MFCC_ROOT = args.MFCC_ROOT TRAIN_LIST = args.TRAIN_LIST VALID_LIST = args.VALID_LIST SAVE_FILE = args.SAVE_FILE # Build up model and batch generator device = 'cuda' if torch.cuda.is_available() else 'cpu' # check available gpu model = models.Classifier(IN_SIZE, NUM_CLASS, HIDDEN_SIZE, NUM_STACK, DROPOUT).to(device) # build up model loss_fun = nn.CrossEntropyLoss( ) # define CE as loss function (objective function) optimizer = torch.optim.Adam(model.parameters( )) # define optimizer (choosed adam here, you can try others as well) batch_train = utils.Batch_generator(MFCC_ROOT, TRAIN_LIST, BATCH_SIZE) # batch generator batch_valid = utils.Batch_generator(MFCC_ROOT, VALID_LIST, BATCH_SIZE) # print out settings logging.info('Batch_size: {}'.format(BATCH_SIZE)) logging.info('Max epoch: {}'.format(MAX_EPOCH)) logging.info('Max iteration: {}'.format(MAX_ITERATION)) logging.info('Hidden size: {}'.format(HIDDEN_SIZE)) logging.info('Num stack: {}'.format(NUM_STACK)) logging.info('Use cmvn: {}'.format(USE_CMVN)) # Training part now_epoch = 1 total_num = 0 # total number of used data correct_num = 0 # number of corrected prediction acc_plt = []
OUT_SIZE = int(conf.get('main', 'out_size')) P_HIDDEN_SIZE = int(conf.get('actor', 'hidden_size')) P_NUM_LAYERS = int(conf.get('actor', 'num_layers')) Q_HIDDEN_SIZE = int(conf.get('critic', 'hidden_size')) BATCH_SIZE = int(conf.get('main', 'batch_size')) SAMPLING_RATE = int(conf.get('main', 'sampling_rate')) NUM_PARAL = int(conf.get('main', 'num_paral')) AUDIO_SEGMENT = int(conf.get('main', 'audio_segment')) FRAMERATE_HZ = int(conf.get('main', 'frameRate_Hz')) ### Condition Setting device = 'cuda' if torch.cuda.is_available() else 'cpu' policy = models.stacked_BLSTM(IN_SIZE, OUT_SIZE, P_HIDDEN_SIZE, P_NUM_LAYERS).to(device) loss_fun = nn.MSELoss() p_optim = torch.optim.Adam(policy.parameters(), lr=1e-3) train_loader = utils.Batch_generator('training', BATCH_SIZE) for iteration in range(100): policy.train() start = time.time() ### Assume the duration is multiple of 10ms. ### For example, if the time length of input feature is 101, this sound is from 1000ms to 1009ms, ### but I regard this as 1000ms and ignore the last feature. inputs, length, _ = next(train_loader) feats = np.zeros((BATCH_SIZE, AUDIO_SEGMENT, IN_SIZE)) for i in range(BATCH_SIZE): s_pos = np.random.randint(length[i]-AUDIO_SEGMENT+1) feats[i] = inputs[i, s_pos:s_pos+AUDIO_SEGMENT,:] inputs = np.asarray(feats, dtype=np.float32)
conf.read("config.ini") IN_SIZE = int(conf.get('main', 'in_size')) OUT_SIZE = int(conf.get('main', 'out_size')) P_HIDDEN_SIZE = int(conf.get('actor', 'hidden_size')) P_NUM_LAYERS = int(conf.get('actor', 'num_layers')) Q_HIDDEN_SIZE = int(conf.get('critic', 'hidden_size')) #BATCH_SIZE = int(conf.get('main', 'batch_size')) SAMPLING_RATE = int(conf.get('main', 'sampling_rate')) frameRate_Hz = int(conf.get('main', 'frameRate_Hz')) FEAT_TYPE = conf.get('main', 'feat_type') BATCH_SIZE = int(sys.argv[3]) NUM_PARAL = int(sys.argv[4]) device = 'cuda' if torch.cuda.is_available() else 'cpu' policy = models.stacked_BLSTM(IN_SIZE, OUT_SIZE, P_HIDDEN_SIZE, P_NUM_LAYERS, 0).to(device) data_loader = utils.Batch_generator(sys.argv[2], BATCH_SIZE) num = sys.argv[1] policy.load_state_dict(torch.load('exp/p' + num + '.model')) reward_mean = 0 total_frame = 0 e = 1 iteration = 0 with torch.no_grad(): policy.eval() start = time.time() while e < 2: feats, length, e = next(data_loader) inputs = np.asarray(feats, dtype=np.float32) inputs = torch.from_numpy(inputs).to(device)
HIDDEN_SIZE = args.HIDDEN_SIZE NUM_STACK = args.NUM_STACK USE_CMVN = args.USE_CMVN BATCH_SIZE = args.BATCH_SIZE PARAM_FILE = args.PARAM_FILE EVAL_LIST = args.EVAL_LIST MFCC_ROOT = args.MFCC_ROOT # Build up model and batch generator device = 'cuda' if torch.cuda.is_available() else 'cpu' # check available gpu model = models.Classifier(IN_SIZE, NUM_CLASS, HIDDEN_SIZE, NUM_STACK, 0.0).to( device) # build model (same structure as trained model) model.load_state_dict( torch.load(PARAM_FILE)) # load parameters from trained model batch_test = utils.Batch_generator( MFCC_ROOT, EVAL_LIST, BATCH_SIZE) # data batch generator for evaluation data # Print out setting logging.info('Batch_size: {}'.format(BATCH_SIZE)) logging.info('Hidden size: {}'.format(HIDDEN_SIZE)) logging.info('Num stack: {}'.format(NUM_STACK)) logging.info('Use cmvn: {}'.format(USE_CMVN)) # Training part with torch.no_grad( ): # disable gradient calculation, reduce memory consumption model.eval() total_num = 0 # total num of test data correct_num = 0 # corrected prediction num while True: