def test():
    checkpoint = 'tacotron2-cn.pt'
    print('loading model: {}...'.format(checkpoint))
    model = Tacotron2(HParams())
    model.load_state_dict(torch.load(checkpoint))
    model = model.to('cpu')
    model.eval()

    print(bcolors.HEADER + '\nPost-training static quantization' +
          bcolors.ENDC)
    num_calibration_batches = 10

    model.qconfig = torch.quantization.default_qconfig
    print(model.qconfig)
    torch.quantization.prepare(model, inplace=True)

    # Calibrate first
    print('Post Training Quantization Prepare: Inserting Observers')

    # Calibrate with the training set
    print('Calibrate with the training set')
    evaluate(model, neval_batches=num_calibration_batches)
    print('Post Training Quantization: Calibration done')

    # Convert to quantized model
    torch.quantization.convert(model, inplace=True)
    print('Post Training Quantization: Convert done')

    print("Size of model after quantization")
    print_size_of_model(model)
Esempio n. 2
0
def load_glow_tts(config_path, checkpoint_path):
    with open(config_path, "r") as f:
        data = f.read()
    config = json.loads(data)

    hparams = HParams(**config)
    model = FlowGenerator(len(glow_tts_symbols),
                          out_channels=hparams.data.n_mel_channels,
                          **hparams.model).to("cpu")

    load_checkpoint(checkpoint_path, model)
    model.decoder.store_inverse(
    )  # do not calcuate jacobians for fast decoding
    _ = model.eval()

    return model
Esempio n. 3
0
def main():
    print('initializing preprocessing..')
    hparams = HParams()
    parser = argparse.ArgumentParser()
    parser.add_argument('--base_dir', default='')
    parser.add_argument(
        '--hparams',
        default='',
        help=
        'Hyperparameter overrides as a comma-separated list of name=value pairs'
    )
    parser.add_argument('--dataset', default='TIMIT')
    parser.add_argument('--output', default='testing_TIMIT-noisy')
    parser.add_argument('--n_jobs', type=int, default=cpu_count())
    args = parser.parse_args()

    run_preprocess(args, hparams)
Esempio n. 4
0
    def __init__(self, nbatch=128, nsteps=64):
        global hps
        hps = HParams(
            load_path='model_params/params.jl',
            nhidden=4096,
            nembd=64,
            nsteps=nsteps,
            nbatch=nbatch,
            nstates=2,
            nvocab=256,
            out_wn=False,
            rnn_wn=True,
            rnn_type='mlstm',
            embd_wn=True,
        )
        global params
        params = [np.load('model/%d.npy' % i) for i in range(15)]
        params[2] = np.concatenate(params[2:6], axis=1)
        params[3:6] = []

        X = tf.placeholder(tf.int32, [None, hps.nsteps])
        M = tf.placeholder(tf.float32, [None, hps.nsteps, 1])
        S = tf.placeholder(tf.float32, [hps.nstates, None, hps.nhidden])
        cells, states, logits = model(X, S, M, reuse=False)

        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
        #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        sess = tf.Session()
        tf.global_variables_initializer().run(session=sess)

        def seq_rep(xmb, mmb, smb):
            return sess.run(states, {X: xmb, M: mmb, S: smb})

        def seq_cells(xmb, mmb, smb):
            return sess.run(cells, {X: xmb, M: mmb, S: smb})

        def transform(xs):
            tstart = time.time()
            xs = [preprocess(x) for x in xs]
            lens = np.asarray([len(x) for x in xs])
            sorted_idxs = np.argsort(lens)
            unsort_idxs = np.argsort(sorted_idxs)
            sorted_xs = [xs[i] for i in sorted_idxs]
            maxlen = np.max(lens)
            offset = 0
            n = len(xs)
            smb = np.zeros((2, n, hps.nhidden), dtype=np.float32)
            for step in range(0, ceil_round_step(maxlen, nsteps), nsteps):
                start = step
                end = step + nsteps
                xsubseq = [x[start:end] for x in sorted_xs]
                ndone = sum([x == b'' for x in xsubseq])
                offset += ndone
                xsubseq = xsubseq[ndone:]
                sorted_xs = sorted_xs[ndone:]
                nsubseq = len(xsubseq)
                xmb, mmb = batch_pad(xsubseq, nsubseq, nsteps)
                for batch in range(0, nsubseq, nbatch):
                    start = batch
                    end = batch + nbatch
                    batch_smb = seq_rep(xmb[start:end], mmb[start:end],
                                        smb[:, offset + start:offset + end, :])
                    smb[:, offset + start:offset + end, :] = batch_smb
            features = smb[0, unsort_idxs, :]
            #print('%0.3f seconds to transform %d examples' %
            #      (time.time() - tstart, n))
            return features

        def cell_transform(xs, indexes=None):
            Fs = []
            xs = [preprocess(x) for x in xs]
            for xmb in tqdm(iter_data(xs, size=hps.nbatch),
                            ncols=80,
                            leave=False,
                            total=len(xs) // hps.nbatch):
                smb = np.zeros((2, hps.nbatch, hps.nhidden))
                n = len(xmb)
                xmb, mmb = batch_pad(xmb, hps.nbatch, hps.nsteps)
                smb = sess.run(cells, {X: xmb, S: smb, M: mmb})
                smb = smb[:, :n, :]
                if indexes is not None:
                    smb = smb[:, :, indexes]
                Fs.append(smb)
            Fs = np.concatenate(Fs, axis=1).transpose(1, 0, 2)
            return Fs

        self.transform = transform
        self.cell_transform = cell_transform
Esempio n. 5
0
    def __init__(self, nbatch=128, nsteps=64):
        global hps
        hps = HParams(
            load_path='model_params/params.jl',
            nhidden=4096,
            nembd=64,
            nsteps=nsteps,
            nbatch=nbatch,
            nstates=2,
            nvocab=256,
            out_wn=False,
            rnn_wn=True,
            rnn_type='mlstm',
            embd_wn=True,
        )
        global params
        params = [np.load('model/%d.npy'%i) for i in range(15)]
        params[2] = np.concatenate(params[2:6], axis=1)
        params[3:6] = []

        X = tf.placeholder(tf.int32, [None, hps.nsteps])
        M = tf.placeholder(tf.float32, [None, hps.nsteps, 1])
        S = tf.placeholder(tf.float32, [hps.nstates, None, hps.nhidden])
        cells, states, logits = model(X, S, M, reuse=False)

        sess = tf.Session()
        tf.global_variables_initializer().run(session=sess)

        def seq_rep(xmb, mmb, smb):
            return sess.run(states, {X: xmb, M: mmb, S: smb})

        def seq_cells(xmb, mmb, smb):
            return sess.run(cells, {X: xmb, M: mmb, S: smb})

        def transform(xs):
            tstart = time.time()
            xs = [preprocess(x) for x in xs]
            lens = np.asarray([len(x) for x in xs])
            sorted_idxs = np.argsort(lens)
            unsort_idxs = np.argsort(sorted_idxs)
            sorted_xs = [xs[i] for i in sorted_idxs]
            maxlen = np.max(lens)
            offset = 0
            n = len(xs)
            smb = np.zeros((2, n, hps.nhidden), dtype=np.float32)
            for step in range(0, ceil_round_step(maxlen, nsteps), nsteps):
                start = step
                end = step+nsteps
                xsubseq = [x[start:end] for x in sorted_xs]
                ndone = sum([x == b'' for x in xsubseq])
                offset += ndone
                xsubseq = xsubseq[ndone:]
                sorted_xs = sorted_xs[ndone:]
                nsubseq = len(xsubseq)
                xmb, mmb = batch_pad(xsubseq, nsubseq, nsteps)
                for batch in range(0, nsubseq, nbatch):
                    start = batch
                    end = batch+nbatch
                    batch_smb = seq_rep(
                        xmb[start:end], mmb[start:end],
                        smb[:, offset+start:offset+end, :])
                    smb[:, offset+start:offset+end, :] = batch_smb
            features = smb[0, unsort_idxs, :]
            print('%0.3f seconds to transform %d examples' %
                  (time.time() - tstart, n))
            return features

        def cell_transform(xs, indexes=None):
            Fs = []
            xs = [preprocess(x) for x in xs]
            for xmb in tqdm(
                    iter_data(xs, size=hps.nbatch), ncols=80, leave=False,
                    total=len(xs)//hps.nbatch):
                smb = np.zeros((2, hps.nbatch, hps.nhidden))
                n = len(xmb)
                xmb, mmb = batch_pad(xmb, hps.nbatch, hps.nsteps)
                smb = sess.run(cells, {X: xmb, S: smb, M: mmb})
                smb = smb[:, :n, :]
                if indexes is not None:
                    smb = smb[:, :, indexes]
                Fs.append(smb)
            Fs = np.concatenate(Fs, axis=1).transpose(1, 0, 2)
            return Fs

        def generate_sequence(x_start, override={}, sampling = 0, len_add = '.'):
            len_start = len(x_start)
            x = bytearray(preprocess(x_start))

            string_terminate = isinstance(len_add, str)
            len_end = (-1 if string_terminate else (len_start + len_add))

            ndone = 0
            last_chr = chr(x[-1])
            smb = np.zeros((2, 1, hps.nhidden))

            while True if string_terminate else ndone <= len_end:
                xsubseq = x[ndone:ndone+nsteps]
                ndone += len(xsubseq)
                xmb, mmb = batch_pad([xsubseq], 1, nsteps)

                #Override salient neurons
                for neuron, value in override.items():
                    smb[:, :, neuron] = value        

                if ndone <= len_start:
                    #Prime hidden state with full steps
                    smb = sess.run(states, {X: xmb, S: smb, M: mmb})
                else:
                    #Incrementally add characters
                    outs, smb = sess.run([logits, states], {X: xmb, S: smb, M: mmb})
                    out = outs[-1]

                    #Do uniform weighted sampling always or only after ' '
                    if (sampling == 1 and last_chr == ' ') or sampling == 2:
                        squashed = np.exp(out) / np.sum(np.exp(out), axis=0)
                        last_chr = chr(np.random.choice(len(squashed), p=squashed))
                    else:
                        last_chr = chr(np.argmax(out))

                x.append(ord(last_chr))

                if string_terminate and (last_chr in len_add):
                    len_add = len_add.replace(last_chr, "", 1)
                    if len(len_add) == 0:
                        break
            
            return(x.decode()) 

        self.transform = transform
        self.cell_transform = cell_transform
        self.generate_sequence = generate_sequence
Esempio n. 6
0
        with torch.no_grad():
            bias_audio = waveglow.infer(mel_input, sigma=0.0).float()
            bias_spec, _ = self.stft.transform(bias_audio)

        self.register_buffer('bias_spec', bias_spec[:, :, 0][:, :, None])

    def forward(self, audio, strength=0.1):
        audio_spec, audio_angles = self.stft.transform(audio.cuda().float())
        audio_spec_denoised = audio_spec - self.bias_spec * strength
        audio_spec_denoised = torch.clamp(audio_spec_denoised, 0.0)
        audio_denoised = self.stft.inverse(audio_spec_denoised, audio_angles)
        return audio_denoised


if __name__ == '__main__':
    config = HParams()
    checkpoint = 'tacotron2-cn.pt'
    print('loading model: {}...'.format(checkpoint))
    model = Tacotron2(config)
    model.load_state_dict(torch.load(checkpoint))
    model = model.to('cpu')
    model.eval()

    waveglow_path = 'waveglow_256channels.pt'
    waveglow = torch.load(waveglow_path)['model']
    waveglow.cuda().eval().half()
    for k in waveglow.convinv:
        k.float()
    denoiser = Denoiser(waveglow)

    text = "相对论直接和间接的催生了量子力学的诞生 也为研究微观世界的高速运动确立了全新的数学模型"
Esempio n. 7
0
import time

import numpy as np
import torch
from tqdm import tqdm

import config
from models.models import Tacotron2
from utils import text_to_sequence, HParams

if __name__ == '__main__':
    checkpoint = 'tacotron2-cn.pt'
    print('loading model: {}...'.format(checkpoint))
    model = Tacotron2(HParams())
    model.load_state_dict(torch.load(checkpoint))
    model = model.to('cpu')
    model.eval()

    filename = config.validation_files
    with open(filename, 'r') as file:
        lines = file.readlines()

    num_samples = len(lines)
    print('num_samples: ' + str(num_samples))

    elapsed = 0
    # Batches
    for line in tqdm(lines):
        tokens = line.strip().split('|')
        text = tokens[1]
        sequence = np.array(text_to_sequence(text))[None, :]
Esempio n. 8
0
    def __init__(self, nbatch=128, nsteps=64):
        global hps
        hps = HParams(
            load_path='model_params/params.jl',
            nhidden=4096,
            nembd=64,
            nsteps=nsteps,
            nbatch=nbatch,
            nstates=2,
            nvocab=256,
            out_wn=False,
            rnn_wn=True,
            rnn_type='mlstm',
            embd_wn=True,
        )
        global params
        params = [np.load('model/%d.npy' % i) for i in range(15)]
        params[2] = np.concatenate(params[2:6], axis=1)
        params[3:6] = []

        #print("n steps is", hps.nsteps);

        X = tf.placeholder(tf.int32, [None, hps.nsteps])
        M = tf.placeholder(tf.float32, [None, hps.nsteps, 1])
        S = tf.placeholder(tf.float32, [hps.nstates, None, hps.nhidden])
        cells, states, logits = model(X, S, M, reuse=False)

        sess = tf.Session()
        tf.global_variables_initializer().run(session=sess)

        def seq_rep(xmb, mmb, smb):
            return sess.run(states, {X: xmb, M: mmb, S: smb})

        def seq_cells(xmb, mmb, smb):
            return sess.run(cells, {X: xmb, M: mmb, S: smb})

        def transform(xs, track_indices=[]):
            tstart = time.time()
            xs = [preprocess(x) for x in xs]
            lens = np.asarray([len(x) for x in xs])
            #print lens
            sorted_idxs = np.argsort(lens)
            unsort_idxs = np.argsort(sorted_idxs)
            sorted_xs = [xs[i] for i in sorted_idxs]
            maxlen = np.max(lens)
            offset = 0
            n = len(xs)
            smb = np.zeros((2, n, hps.nhidden), dtype=np.float32)
            track_indices_values = [[] for i in range(len(track_indices))]
            rounded_steps = ceil_round_step(maxlen, nsteps)
            #print "rounded_steps", rounded_steps
            #print "maxlen", maxlen
            for step in range(0, rounded_steps, nsteps):
                start = step
                end = step + nsteps
                #print "start is", start, "and end is", end
                xsubseq = [x[start:end] for x in sorted_xs]
                ndone = sum([x == b'' for x in xsubseq])
                offset += ndone
                xsubseq = xsubseq[ndone:]
                sorted_xs = sorted_xs[ndone:]
                nsubseq = len(xsubseq)
                #print "nsubseq is", nsubseq
                xmb, mmb = batch_pad(xsubseq, nsubseq, nsteps, 'post')
                #print "xmb is", xmb
                #print "iterating through each batch for step", step
                for batch in range(0, nsubseq, nbatch):
                    start = batch
                    end = batch + nbatch
                    #print "scanning from", start, "to", end
                    #print "xmb - ", xmb[start:end], xmb.shape

                    batch_smb = seq_rep(xmb[start:end], mmb[start:end],
                                        smb[:, offset + start:offset + end, :])
                    #print "batch_smb", batch_smb, batch_smb.shape, batch_smb[0][0][2388], batch_smb[1][0][2388]
                    #smb[:, offset+start:offset+end, :] = batch_smb
                    batch_cells = seq_cells(
                        xmb[start:end], mmb[start:end],
                        smb[:, offset + start:offset + end, :])
                    smb[:, offset + start:offset + end, :] = batch_smb
                    #print "batch_cells len", len(batch_cells)
                    #print batch_cells[len(batch_cells)-1][0][2388]
                    #print batch_smb[0][0][2388]
                    #smb[0, offset+start:offset+end, :] = batch_cells[nsteps-1]
                    if len(track_indices):
                        #print "tracking sentiment..", batch_smb.shape, batch_cells.shape
                        for i, index in enumerate(track_indices):
                            #print "sentiment neuron values -- ", batch_cells[:,0,index]
                            track_indices_values[i].append(batch_cells[:, 0,
                                                                       index])

            #print "rounded_steps after", rounded_steps
            #print "maxlen after", maxlen

            if rounded_steps < maxlen:
                start = rounded_steps
                end = maxlen
                #print "start is", start, "and end is", end
                xsubseq = [x[start:end] for x in sorted_xs]
                ndone = sum([x == b'' for x in xsubseq])
                offset += ndone
                xsubseq = xsubseq[ndone:]
                sorted_xs = sorted_xs[ndone:]
                nsubseq = len(xsubseq)
                #print "xsubseq is", xsubseq
                xmb, mmb = batch_pad(xsubseq, nsubseq, nsteps, 'post')
                #xmb, mmb = batch_pad(xsubseq, nsubseq, nsteps)
                #print "xmb is", xmb
                #print "mmb is", mmb
                #print "n subseq is", nsubseq
                #print "nbatch is", nbatch
                #print "iterating through each batch for step", step
                for batch in range(0, nsubseq, nbatch):
                    start = batch
                    end = batch + nbatch
                    #print "scanning from", start, "to", end
                    batch_smb = seq_rep(xmb[start:end], mmb[start:end],
                                        smb[:, offset + start:offset + end, :])
                    #print "batch_smb", batch_smb, batch_smb.shape, batch_smb[0][0][2388], batch_smb[1][0][2388]
                    #print "offset", offset, start, end
                    #smb[:, offset+start:offset+end, :] = batch_smb
                    batch_cells = seq_cells(
                        xmb[start:end], mmb[start:end],
                        smb[:, offset + start:offset + end, :])
                    smb[:, offset + start:offset + end, :] = batch_smb
                    #print "batch_cells", batch_cells.shape
                    #smb[0, offset+start:offset+end, :] = batch_cells[maxlen-rounded_steps-1]
                    #print "smb----", smb
                    if len(track_indices):
                        #print "tracking sentiment..", batch_smb.shape, batch_cells.shape
                        for i, index in enumerate(track_indices):
                            #print "sentiment neuron values -- ", batch_cells[:,0,index]
                            track_indices_values[i].append(batch_cells[:, 0,
                                                                       index])

                #print "done with batch iteration"
            #print "unsort_idxs", unsort_idxs
            #print smb.shape
            features = smb[0, unsort_idxs, :]
            print('%0.3f seconds to transform %d examples' %
                  (time.time() - tstart, n))
            return features, track_indices_values

        def cell_transform(xs, indexes=None):
            Fs = []
            xs = [preprocess(x) for x in xs]
            for xmb in tqdm(iter_data(xs, size=hps.nbatch),
                            ncols=80,
                            leave=False,
                            total=len(xs) // hps.nbatch):
                smb = np.zeros((2, hps.nbatch, hps.nhidden))
                n = len(xmb)
                xmb, mmb = batch_pad(xmb, hps.nbatch, hps.nsteps)
                smb = sess.run(cells, {X: xmb, S: smb, M: mmb})
                smb = smb[:, :n, :]
                if indexes is not None:
                    smb = smb[:, :, indexes]
                Fs.append(smb)
            Fs = np.concatenate(Fs, axis=1).transpose(1, 0, 2)
            return Fs

        self.transform = transform
        self.cell_transform = cell_transform

        def generate_sequence(x_start, override={}, sampling=0, len_add='.'):
            """Continue a given sequence. 
            Args:
                x_start (string): The string to be continued.
                override (dict): Values of the hidden state to override
                  with keys of the dictionary as index.          
                sampling (int): 0 greedy argmax, 2 weighted random from probabilty 
                  distribution, 1 weighted but only once after each word.
                len_add (int, string, None): 
                  If int, the number of characters to be added.
                  If string, returns after each contained character was seen once.
            Returns:
                The completed string including transformation and paddings from preprocessing.
            """

            len_start = len(x_start)
            x_preprocessed = preprocess(x_start)
            x = bytearray(x_preprocessed)

            string_terminate = isinstance(len_add, str)
            len_end = (-1 if string_terminate else (len_start + len_add))

            ndone = 0
            last_chr = chr(x[-1])
            smb = np.zeros((2, 1, hps.nhidden))

            while True if string_terminate else ndone <= len_end:
                xsubseq = x[ndone:ndone + nsteps]
                ndone += len(xsubseq)
                xmb, mmb = batch_pad([xsubseq], 1, nsteps)

                #Override salient neurons
                for neuron, value in override.items():
                    smb[:, :, neuron] = value

                if ndone <= len_start:
                    #Prime hidden state with full steps
                    smb = sess.run(states, {X: xmb, S: smb, M: mmb})
                else:
                    #Incrementally add characters
                    outs, smb = sess.run([logits, states], {
                        X: xmb,
                        S: smb,
                        M: mmb
                    })
                    out = outs[-1]

                    #Do uniform weighted sampling always or only after ' '
                    if (sampling == 1 and last_chr == ' ') or sampling == 2:
                        squashed = np.exp(out) / np.sum(np.exp(out), axis=0)
                        last_chr = chr(
                            np.random.choice(len(squashed), p=squashed))
                    else:
                        last_chr = chr(np.argmax(out))

                x.append(ord(last_chr))

                if string_terminate and (last_chr in len_add):
                    len_add = len_add.replace(last_chr, "", 1)
                    if len(len_add) == 0:
                        break

            return (x.decode())

        self.transform = transform
        self.cell_transform = cell_transform
        self.generate_sequence = generate_sequence
Esempio n. 9
0
def main():
    DEVICE = torch.device('cuda')
    CPU = torch.device('cpu')
    input_path = './testing_TIMIT_noisy/train.txt'
    print('input_path: {}'.format(input_path))
    path_checkpoint = "./checkpoint/STAM_weights_4_1.0.pth"  # 断点路径
    print('loading checkpoint: {}'.format(path_checkpoint))
    with open(input_path, encoding='utf-8') as f:
        metadata = [line.strip().split('|') for line in f]

    noises = ['airport', 'babble', 'car', 'exhibition', 'restaurant', 'street', 'subway', 'train']
    snrs = ['SNR(-5)_', 'SNR(00)_', 'SNR(05)_', 'SNR(10)_']
    # snrs = ['SNR(-10)_', 'SNR(-15)_']
    # snrs = ['SNR(-20)_', 'SNR(-25)_']
    aucs = []
    features = []
    F1 = []
    DCF = []
    ACC = []
    PRECISION = []
    for snr in snrs:
        for noise in noises:
            features.append(snr + noise)

    gc.collect()
    torch.cuda.empty_cache()
    hparams = HParams()
    model = VAD(hparams).to(DEVICE)
    checkpoint = torch.load(path_checkpoint)
    model.load_state_dict(checkpoint['model'])

    with torch.no_grad():
        model.eval()
        TN = 0
        TP = 0
        FP = 0
        FN = 0

        for feature in tqdm(features):
            labels = []
            vads = []
            count = 0
            test_meta = [meta for meta in metadata if feature in meta[0]]
            test_dataset = SpeechDataset(input_path, test_meta, hparams)
            test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=True)

            for train_data, train_label in test_loader:
                count += 1
                train_data = train_data[0]
                train_label = train_label[0]
                train_data = torch.as_tensor(train_data, dtype=torch.float32).to(DEVICE)
                label = train_label[:, int(np.floor(int(2 * (hparams.w - 1) / hparams.u + 3) / 2))]
                midnet_output, postnet_output, alpha = model(train_data)
                _, vad = bdnn_prediction(F.sigmoid(postnet_output).cpu().detach().numpy(), w=hparams.w, u=hparams.u)

                vads = np.concatenate((vads, vad[:, 0]), axis=None)
                labels = np.concatenate((labels, label), axis=None)

            fpr, tpr, _ = metrics.roc_curve(labels, vads, pos_label=1)
            auc = metrics.auc(fpr, tpr)
            aucs.append(auc)
            A = confusion_matrix(np.int8(labels), np.int8(vads.round()))
            TN += A[0][0]
            FN += A[1][0]
            FP += A[0][1]
            TP += A[1][1]
            f1 = 2 * A[1][1] / (2 * A[1][1] + A[0][1] + A[1][0])
            dcf = (0.75 * A[1][0] + 0.25 * A[0][1]) / sum(sum(A))
            acc = (A[0][0] + A[1][1]) / sum(sum(A))
            precision = A[1][1] / (A[1][1] + A[0][1])
            ACC.append(acc)
            PRECISION.append(precision)
            F1.append(f1)
            DCF.append(dcf)
            print('[{}]: AUC: {:.2f}, F1-score: {:.2f}, DCF: {:.2f}, acc: {:.2f}, precision: {:.2f}'.format(
                feature, auc * 100, f1 * 100, dcf * 100, acc * 100, precision * 100))
            gc.collect()
            torch.cuda.empty_cache()

    for i in range(len(features)):
        print('[{}]: AUC: {:.2f}, F1-score: {:.2f}, DCF: {:.2f}, acc: {:.2f}, precision: {:.2f}'.format(
            features[i], aucs[i]*100, F1[i]*100, DCF[i]*100, ACC[i]*100, PRECISION[i]*100))

    print('Global AUC: {:.2f}, F1-score: {:.2f}, DCF: {:.2f}, acc: {:.2f}, precision: {:.2f}'.format(
            np.mean(aucs)*100, (2*TP / (2*TP+FN+FP))*100, ((0.75*FN+0.25*FP) / (TN+TP+FN+FP))*100,
        (TP+TN)/(TN+TP+FN+FP)*100, TP/(TP+FP)*100))
Esempio n. 10
0
import torch
import torch.nn as nn
import torch.nn.functional as F
from utils import HParams
hparamas = HParams()


class spec_conv(nn.Module):
    def __init__(self, hparamas=None):
        super(spec_conv, self).__init__()
        self.layers = hparamas.layers  # 4
        self.filter_width = hparamas.filter_width
        self.conv_channels = hparamas.conv_channels
        self.conv_layers1 = nn.ModuleList()
        self.conv_layers2 = nn.ModuleList()
        self.batch_norm = nn.ModuleList()
        self.sigmoid = nn.Sigmoid()
        self.max_pool2d = nn.ModuleList()

        for layer in range(self.layers):
            if layer == 0:
                self.conv_layers1.append(
                    nn.Conv2d(in_channels=1,
                              out_channels=self.conv_channels * (2**layer),
                              kernel_size=self.filter_width,
                              padding=(1, 1)))
                self.conv_layers2.append(
                    nn.Conv2d(in_channels=1,
                              out_channels=self.conv_channels * (2**layer),
                              kernel_size=self.filter_width,
                              padding=(1, 1)))
Esempio n. 11
0
    def __init__(self, nbatch=128, nsteps=64):
        global hps
        hps = HParams(
            load_path=project_path + 'model_params/params.jl',
            nhidden=4096,
            nembd=64,
            nsteps=nsteps,
            nbatch=nbatch,
            nstates=2,
            nvocab=256,
            out_wn=False,
            rnn_wn=True,
            rnn_type='mlstm',
            embd_wn=True,
        )
        global params
        params = [
            np.load(project_path + 'model/%d.npy' % i) for i in range(15)
        ]
        params[2] = np.concatenate(params[2:6], axis=1)
        params[3:6] = []

        X = tf.placeholder(tf.int32, [None, hps.nsteps])
        M = tf.placeholder(tf.float32, [None, hps.nsteps, 1])
        S = tf.placeholder(tf.float32, [hps.nstates, None, hps.nhidden])
        cells, states, logits = model(X, S, M, reuse=False)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        tf.global_variables_initializer().run(session=sess)

        def seq_rep(xmb, mmb, smb):
            return sess.run(states, {X: xmb, M: mmb, S: smb})

        def transform(xs):
            # tstart = time.time()
            xs = [preprocess(x) for x in xs]
            lens = np.asarray([len(x) for x in xs])
            sorted_idxs = np.argsort(lens)
            unsort_idxs = np.argsort(sorted_idxs)
            sorted_xs = [xs[i] for i in sorted_idxs]
            maxlen = 260  # np.max(lens)
            offset = 0
            n = len(xs)
            smb = np.zeros((2, n, hps.nhidden), dtype=np.float32)
            for step in range(0, ceil_round_step(maxlen, nsteps), nsteps):
                sent_step_start = step
                sent_step_end = step + nsteps
                xsubseq = [x[sent_step_start:sent_step_end] for x in sorted_xs]
                ndone = sum([x == b'' for x in xsubseq])
                offset += ndone
                xsubseq = xsubseq[ndone:]
                sorted_xs = sorted_xs[ndone:]
                nsubseq = len(xsubseq)
                xmb, mmb = batch_pad(xsubseq, nsubseq, nsteps)
                # for i in range(0, nsubseq):
                #     smb[:, offset+i:offset+i+1, :] = seq_rep(
                #                                         xmb[i:i+1], mmb[i:i+1],
                #                                         smb[:, offset+i:offset+i+1, :])
                for batch in range(0, nsubseq, nbatch):
                    batch_st = batch
                    batch_end = batch + nbatch
                    batch_smb = seq_rep(
                        xmb[batch_st:batch_end], mmb[batch_st:batch_end],
                        smb[:, offset + batch_st:offset + batch_end, :])
                    smb[:, offset + batch_st:offset + batch_end, :] = batch_smb
            features = smb[0, unsort_idxs, :]
            # print('%0.3f seconds to transform %d examples' %
            #       (time.time() - tstart, n))
            return features

        self.transform = transform
Esempio n. 12
0
            "epoch": e + 1,
            'step': start_step,
            'scheduler_lr': scheduler._rate,
            'scheduler_step': scheduler._step
        }
        if not os.path.isdir("./checkpoint"):
            os.mkdir("./checkpoint")
        torch.save(
            checkpoint, './checkpoint/weights_{}_acc_{:.2f}.pth'.format(
                e + 1, test_acc * 100))
    return train_losses, test_losses


if __name__ == '__main__':
    RESUME = True
    hparams = HParams()
    metadata_filename = './training_data/train.txt'
    metadata, training_idx, validation_idx = train_valid_split(
        metadata_filename, hparams, test_size=0.05, seed=0)

    train_dataset = SpeechDataset(metadata_filename,
                                  list(np.array(metadata)[training_idx]),
                                  hparams)
    test_dataset = SpeechDataset(metadata_filename,
                                 list(np.array(metadata)[validation_idx]),
                                 hparams)
    train_loader = DataLoader(train_dataset,
                              1,
                              True,
                              num_workers=4,
                              pin_memory=False)
def main():
    parser = argparse.ArgumentParser()

    # Required Parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the .tsv files (or other data files) for the task."
    )
    parser.add_argument("--model_type",
                        default=None,
                        type=str,
                        help="bert OR lstm",
                        required=True)
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output_result directory where the model predictions will be written."
    )
    parser.add_argument("--output_mode",
                        default="regression",
                        type=str,
                        help="classification or regression",
                        required=True)
    parser.add_argument("--domain",
                        default="celtrion",
                        type=str,
                        help="celtrion",
                        required=True)
    parser.add_argument("--target",
                        default="close",
                        type=str,
                        help="close, open, volume",
                        required=True)

    # Other Parameters
    parser.add_argument("--use_gpu",
                        help="use gpu=True or False",
                        default=True)
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--per_gpu_train_batch_size",
                        default=8,
                        type=int,
                        help="Batch size per GPU/CPU for classifier.")
    parser.add_argument("--per_gpu_eval_batch_size",
                        default=8,
                        type=int,
                        help="Batch size per GPU/CPU for classifier.")
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval.")
    parser.add_argument("--window_size",
                        default=50,
                        type=int,
                        help="window size for lstm")

    args = parser.parse_args()

    # use GPU ?
    if not is_gpu_available():
        args.use_gpu = False

    # Model
    model_type = args.model_type
    output_mode = args.output_mode

    # data
    data_root = args.data_dir

    # output
    output_root = args.output_dir
    prepare_dir(output_root)

    fns = {
        'input': {
            'train': os.path.join(data_root, 'train.csv'),
            'test': os.path.join(data_root, 'test.csv')
        },
        'output': {
            # 'csv' : os.path.join() # 필요시에 ~~
        },
        'model': os.path.join(output_root, 'model.out')
    }

    # Train
    if args.do_train:
        hps = HParams(
            # domain -------------------------------------------
            domain=args.domain,
            target=args.target,

            # gpu setting ----------------------------------------
            use_gpu=args.use_gpu,

            # train settings ----------------------------------------
            learning_rate=args.learning_rate,
            num_train_epochs=args.num_train_epochs,
            per_gpu_train_batch_size=args.per_gpu_train_batch_size,
            window_size=args.window_size,

            # model settings ----------------------------------------
            model_type=model_type,
            output_mode=output_mode)

        hps.show()

        print("*********** Start Training ***********")
        run_train(fns['input']['train'], fns['model'], hps)

    if args.do_eval:
        print("*********** Start Evaluating ***********")

        batch_size = args.per_gpu_eval_batch_size

        run_eval(fns['input']['test'], fns['model'], batch_size)