Esempio n. 1
0
MAX_EPOCH = args.MAX_EPOCH
BATCH_SIZE = args.BATCH_SIZE
MFCC_ROOT = args.MFCC_ROOT
TRAIN_LIST = args.TRAIN_LIST
VALID_LIST = args.VALID_LIST
SAVE_FILE = args.SAVE_FILE

# Build up model and batch generator
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # check available gpu
model = models.Classifier(IN_SIZE, NUM_CLASS, HIDDEN_SIZE, NUM_STACK,
                          DROPOUT).to(device)  # build up model
loss_fun = nn.CrossEntropyLoss(
)  # define CE as loss function (objective function)
optimizer = torch.optim.Adam(model.parameters(
))  # define optimizer (choosed adam here, you can try others as well)
batch_train = utils.Batch_generator(MFCC_ROOT, TRAIN_LIST,
                                    BATCH_SIZE)  # batch generator
batch_valid = utils.Batch_generator(MFCC_ROOT, VALID_LIST, BATCH_SIZE)

# print out settings
logging.info('Batch_size: {}'.format(BATCH_SIZE))
logging.info('Max epoch: {}'.format(MAX_EPOCH))
logging.info('Max iteration: {}'.format(MAX_ITERATION))
logging.info('Hidden size: {}'.format(HIDDEN_SIZE))
logging.info('Num stack: {}'.format(NUM_STACK))
logging.info('Use cmvn: {}'.format(USE_CMVN))

# Training part
now_epoch = 1
total_num = 0  # total number of used data
correct_num = 0  # number of corrected prediction
acc_plt = []
Esempio n. 2
0
OUT_SIZE            = int(conf.get('main', 'out_size'))
P_HIDDEN_SIZE       = int(conf.get('actor', 'hidden_size'))
P_NUM_LAYERS        = int(conf.get('actor', 'num_layers'))
Q_HIDDEN_SIZE       = int(conf.get('critic', 'hidden_size'))
BATCH_SIZE          = int(conf.get('main', 'batch_size'))
SAMPLING_RATE       = int(conf.get('main', 'sampling_rate'))
NUM_PARAL           = int(conf.get('main', 'num_paral'))
AUDIO_SEGMENT       = int(conf.get('main', 'audio_segment'))
FRAMERATE_HZ        = int(conf.get('main', 'frameRate_Hz'))

### Condition Setting
device       = 'cuda' if torch.cuda.is_available() else 'cpu'
policy       = models.stacked_BLSTM(IN_SIZE, OUT_SIZE, P_HIDDEN_SIZE, P_NUM_LAYERS).to(device)
loss_fun     = nn.MSELoss()
p_optim      = torch.optim.Adam(policy.parameters(), lr=1e-3)
train_loader = utils.Batch_generator('training', BATCH_SIZE)

for iteration in range(100):
    policy.train()
    start = time.time()

    ### Assume the duration is multiple of 10ms.
    ### For example, if the time length of input feature is 101, this sound is from 1000ms to 1009ms,
    ### but I regard this as 1000ms and ignore the last feature.
    inputs, length, _ = next(train_loader)

    feats = np.zeros((BATCH_SIZE, AUDIO_SEGMENT, IN_SIZE))
    for i in range(BATCH_SIZE):
        s_pos    = np.random.randint(length[i]-AUDIO_SEGMENT+1)
        feats[i] = inputs[i, s_pos:s_pos+AUDIO_SEGMENT,:]
    inputs = np.asarray(feats, dtype=np.float32)
Esempio n. 3
0
conf.read("config.ini")
IN_SIZE             = int(conf.get('main', 'in_size'))
OUT_SIZE            = int(conf.get('main', 'out_size'))
P_HIDDEN_SIZE       = int(conf.get('actor', 'hidden_size'))
P_NUM_LAYERS        = int(conf.get('actor', 'num_layers'))
Q_HIDDEN_SIZE       = int(conf.get('critic', 'hidden_size'))
#BATCH_SIZE          = int(conf.get('main', 'batch_size'))
SAMPLING_RATE       = int(conf.get('main', 'sampling_rate'))
frameRate_Hz        = int(conf.get('main', 'frameRate_Hz'))
FEAT_TYPE           = conf.get('main', 'feat_type')
BATCH_SIZE = int(sys.argv[3])
NUM_PARAL  = int(sys.argv[4])

device       = 'cuda' if torch.cuda.is_available() else 'cpu'
policy       = models.stacked_BLSTM(IN_SIZE, OUT_SIZE, P_HIDDEN_SIZE, P_NUM_LAYERS, 0).to(device)
data_loader = utils.Batch_generator(sys.argv[2], BATCH_SIZE)

num = sys.argv[1]
policy.load_state_dict(torch.load('exp/p' + num + '.model'))

reward_mean = 0
total_frame = 0
e = 1
iteration = 0
with torch.no_grad():
    policy.eval()
    start = time.time()
    while e < 2:
        feats, length, e = next(data_loader)
        inputs           = np.asarray(feats, dtype=np.float32)
        inputs           = torch.from_numpy(inputs).to(device)
Esempio n. 4
0
HIDDEN_SIZE = args.HIDDEN_SIZE
NUM_STACK = args.NUM_STACK
USE_CMVN = args.USE_CMVN
BATCH_SIZE = args.BATCH_SIZE
PARAM_FILE = args.PARAM_FILE
EVAL_LIST = args.EVAL_LIST
MFCC_ROOT = args.MFCC_ROOT

# Build up model and batch generator
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # check available gpu
model = models.Classifier(IN_SIZE, NUM_CLASS, HIDDEN_SIZE, NUM_STACK, 0.0).to(
    device)  # build model (same structure as trained model)
model.load_state_dict(
    torch.load(PARAM_FILE))  # load parameters from trained model
batch_test = utils.Batch_generator(
    MFCC_ROOT, EVAL_LIST,
    BATCH_SIZE)  # data batch generator for evaluation data

# Print out setting
logging.info('Batch_size: {}'.format(BATCH_SIZE))
logging.info('Hidden size: {}'.format(HIDDEN_SIZE))
logging.info('Num stack: {}'.format(NUM_STACK))
logging.info('Use cmvn: {}'.format(USE_CMVN))

# Training part
with torch.no_grad(
):  # disable gradient calculation, reduce memory consumption
    model.eval()
    total_num = 0  # total num of test data
    correct_num = 0  # corrected prediction num
    while True: