コード例 #1
0
 def build_model(self, wavenet_mel):
     hps = self.hps
     ns = self.hps.ns
     emb_size = self.hps.emb_size
     c = 80 if wavenet_mel else 513
     patch_classify_kernel = (3, 4) if wavenet_mel else (17, 4)
     self.Encoder = cc(Encoder(c_in=c, ns=ns, dp=hps.enc_dp))
     self.Decoder = cc(
         Decoder(c_out=c, ns=ns, c_a=hps.n_speakers, emb_size=emb_size))
     self.Generator = cc(
         Decoder(c_out=c, ns=ns, c_a=hps.n_speakers, emb_size=emb_size))
     self.SpeakerClassifier = cc(
         SpeakerClassifier(ns=ns, n_class=hps.n_speakers, dp=hps.dis_dp))
     self.PatchDiscriminator = cc(
         nn.DataParallel(
             PatchDiscriminator(
                 ns=ns,
                 n_class=hps.n_speakers,
                 classify_kernel_size=patch_classify_kernel)))
     betas = (0.5, 0.9)
     params = list(self.Encoder.parameters()) + list(
         self.Decoder.parameters())
     self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas)
     self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(),
                               lr=self.hps.lr,
                               betas=betas)
     self.gen_opt = optim.Adam(self.Generator.parameters(),
                               lr=self.hps.lr,
                               betas=betas)
     self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(),
                                 lr=self.hps.lr,
                                 betas=betas)
コード例 #2
0
    def sequential_generation(self,
                              seed_text,
                              batch_size=10,
                              max_len=15,
                              leed_out_len=15,
                              top_k=0,
                              temperature=None,
                              sample=True):
        """ Generate one word at a time, in L->R order """
        seed_len = len(seed_text) + 1  # +1 to account for CLS
        batch = self.get_init_text(seed_text, max_len, batch_size)

        for ii in range(max_len - seed_len):
            # inp = [sent[:seed_len + ii + leed_out_len] + [self.sep_id] for sent in batch]
            inp = cc(batch, self.no_cuda)
            inp_mask = [
                np.expand_dims(i != self.sep_id, -2).astype(np.int32)
                for i in inp
            ]
            test = cc(inp_mask, self.no_cuda)
            out, break_probs = self.model(inp.long(),
                                          cc(inp_mask, self.no_cuda)[0])
            idxs = self.generate_step(out,
                                      gen_idx=seed_len + ii,
                                      top_k=top_k,
                                      temperature=temperature,
                                      sample=sample)
            for jj in range(batch_size):
                batch[jj][seed_len + ii] = idxs[jj]

        # return self.untokenize_batch(batch)
        return self.untokenize_batch(batch)
コード例 #3
0
    def parallel_generation(self,
                            seed_text,
                            batch_size=10,
                            max_len=15,
                            top_k=0,
                            temperature=None,
                            max_iter=300,
                            sample=True,
                            print_every=10,
                            verbose=True):
        """ Generate for all positions at each time step """
        seed_len = len(seed_text) + 1  # +1 to account for CLS
        batch = self.get_init_text(seed_text, max_len, batch_size)
        inp_mask = []

        for ii in range(max_iter):
            inp = cc(batch, self.no_cuda)
            inp_mask.append(
                np.expand_dims(inp != self.sep_id, -2).astype(np.int32))
            out, break_probs = self.model(inp.long(),
                                          cc(inp_mask, self.no_cuda)[0])
            for kk in range(max_len - seed_len):
                idxs = self.generate_step(out,
                                          gen_idx=seed_len + kk,
                                          top_k=top_k,
                                          temperature=temperature,
                                          sample=sample)
                for jj in range(batch_size):
                    batch[jj][seed_len + kk] = idxs[jj]

            if verbose and np.mod(ii, print_every) == 0:
                print("iter", ii + 1, self.data_utils.id2sent(batch[0]))

        return self.untokenize_batch(batch)
コード例 #4
0
 def build_model(self):
     hps = self.hps
     ns = self.hps.ns
     emb_size = self.hps.emb_size
     self.Encoder = cc(Encoder(ns=ns, dp=hps.enc_dp))
     self.Decoder = cc(Decoder(ns=ns, c_a=hps.n_speakers,
                               emb_size=emb_size))
     self.Generator = cc(
         Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size))
     self.SpeakerClassifier = cc(
         SpeakerClassifier(ns=ns, n_class=hps.n_speakers, dp=hps.dis_dp))
     self.PatchDiscriminator = cc(
         nn.DataParallel(PatchDiscriminator(ns=ns, n_class=hps.n_speakers)))
     betas = (0.5, 0.9)
     params = list(self.Encoder.parameters()) + list(
         self.Decoder.parameters())
     self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas)
     self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(),
                               lr=self.hps.lr,
                               betas=betas)
     self.gen_opt = optim.Adam(self.Generator.parameters(),
                               lr=self.hps.lr,
                               betas=betas)
     self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(),
                                 lr=self.hps.lr,
                                 betas=betas)
コード例 #5
0
def graphEval(X, truth_spec, true_sp, true_cc, true_dd, true_bc):
    sp_emd_cur = []
    cc_emd_cur = []
    dd_emd_cur = []
    assorts_cur = []
    spec_l2_cur = []
    spec_l2_lin_cur = []
    bc_emd_cur = []

    for j in range(20):
        A = gnp(X)
        G = nx.from_numpy_matrix(A)
        print(nx.is_connected(G))
        if not nx.is_connected(G):
            Gc = max(nx.connected_component_subgraphs(G), key=len)
            print(len(Gc.nodes()))
        sp = utils.sp(A)
        cc = utils.cc(A)
        dd = utils.degree_sequence(A)
        spec_weight_l2 = l2_exp_weight(truth_spec, utils.spectrum(A))
        spec_weight_l2_lin = l2_lin_weight(truth_spec, utils.spectrum(A))
        bc = sorted(nx.betweenness_centrality(G).values())

        sp_emd_cur.append(utils.emd(sp, true_sp))
        cc_emd_cur.append(utils.emd(cc, true_cc))
        dd_emd_cur.append(utils.emd(dd, true_dd))
        assorts_cur.append(nx.degree_assortativity_coefficient(G))
        spec_l2_cur.append(spec_weight_l2)
        spec_l2_lin_cur.append(spec_weight_l2_lin)
        bc_emd_cur.append(utils.emd(bc, true_bc))

    return np.mean(sp_emd_cur), np.mean(cc_emd_cur), np.mean(
        dd_emd_cur), np.mean(assorts_cur), np.mean(spec_l2_cur), np.mean(
            bc_emd_cur), np.mean(spec_l2_lin_cur)
コード例 #6
0
    def forward(self, enc_output, enc_len):
        enc_len = enc_len.cpu().numpy().tolist()
        for i, (layer, project_layer) in enumerate(
                zip(self.layers, self.project_layers)):
            total_length = enc_output.size(1)
            xs_pack = pack_padded_sequence(enc_output,
                                           enc_len,
                                           batch_first=True)
            layer.flatten_parameters()
            xs, (_, _) = layer(xs_pack)
            ys_pad, enc_len = pad_packed_sequence(xs,
                                                  batch_first=True,
                                                  total_length=total_length)
            enc_len = enc_len.numpy()

            downsub = self.downsample[i]
            if downsub > 1:
                ys_pad = ys_pad.contiguous().view(ys_pad.size(0),
                                                  ys_pad.size(1) * 2,
                                                  ys_pad.size(2) // 2)
                enc_len = [(length * 2) for length in enc_len]
            ys_pad = F.dropout(ys_pad, 0.1, training=self.training)
            projected = project_layer(ys_pad)
            enc_output = self.activation(projected)
        output_lens = cc(torch.from_numpy(np.array(enc_len, dtype=np.int64)))
        return enc_output, output_lens
コード例 #7
0
    def __init__(self, output_dim, embedding_dim, hidden_dim, dropout_rate, n_layers,
            bos, eos, pad, ls_weight, labeldist):
        super(LM, self).__init__()

        self.bos, self.eos, self.pad = bos, eos, pad
        self.embedding = torch.nn.Embedding(output_dim, embedding_dim, padding_idx=pad)
        self.LSTM = torch.nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, batch_first=True, 
                dropout=dropout_rate if n_layers > 1 else 0)

        # re-init
        weight_init(self.LSTM)

        self.output_layer = torch.nn.Linear(hidden_dim, output_dim)
        self.dropout_layer = torch.nn.Dropout(p=dropout_rate)

        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.dropout_rate = dropout_rate
        self.n_layers = n_layers

        # label smoothing hyperparameters
        self.ls_weight = ls_weight
        self.labeldist = labeldist
        if labeldist is not None:
            self.vlabeldist = cc(torch.from_numpy(np.array(labeldist, dtype=np.float32)))
コード例 #8
0
    def __init__(self,
                 output_dim,
                 embedding_dim,
                 hidden_dim,
                 attention,
                 att_odim,
                 dropout_rate,
                 bos,
                 eos,
                 pad,
                 ls_weight=0,
                 labeldist=None):
        super(Decoder, self).__init__()
        self.bos, self.eos, self.pad = bos, eos, pad
        self.embedding = torch.nn.Embedding(output_dim,
                                            embedding_dim,
                                            padding_idx=pad)
        self.LSTMCell = torch.nn.LSTMCell(embedding_dim + att_odim, hidden_dim)
        self.output_layer = torch.nn.Linear(hidden_dim, output_dim)
        self.attention = attention

        self.hidden_dim = hidden_dim
        self.att_odim = att_odim
        self.dropout_rate = dropout_rate

        # label smoothing hyperparameters
        self.ls_weight = ls_weight
        self.labeldist = labeldist
        if labeldist is not None:
            self.vlabeldist = cc(
                torch.from_numpy(np.array(labeldist, dtype=np.float32)))
コード例 #9
0
    def parallel_sequential_generation(self,
                                       seed_text,
                                       batch_size=10,
                                       max_len=15,
                                       top_k=0,
                                       temperature=None,
                                       max_iter=300,
                                       burnin=200,
                                       print_every=10,
                                       verbose=True):
        """ Generate for one random position at a timestep

        args:
            - burnin: during burn-in period, sample from full distribution; afterwards take argmax
        """
        seed_len = len(seed_text) + 1  # +1 to account for CLS
        batch = self.get_init_text(seed_text, max_len, batch_size)
        inp_mask = []

        for ii in range(max_iter):
            kk = np.random.randint(0, max_len - seed_len)
            for jj in range(batch_size):
                batch[jj][seed_len + kk] = self.mask_id
            inp = cc(batch, self.no_cuda)
            inp_mask.append(
                np.expand_dims(inp != self.sep_id, -2).astype(np.int32))
            out, break_probs = self.model(inp.long(),
                                          cc(inp_mask, self.no_cuda)[0])
            topk = top_k if (ii >= burnin) else 0
            idxs = self.generate_step(out,
                                      gen_idx=seed_len + kk,
                                      top_k=topk,
                                      temperature=temperature,
                                      sample=(ii < burnin))
            for jj in range(batch_size):
                batch[jj][seed_len + kk] = idxs[jj]

            if verbose and np.mod(ii + 1, print_every) == 0:
                for_print = self.data_utils.id2sent(batch[0]).split()
                for_print = for_print[:seed_len + kk + 1] + [
                    '(*)'
                ] + for_print[seed_len + kk + 1:]
                print("iter", ii + 1, " ".join(for_print))

        return self.untokenize_batch(batch)
コード例 #10
0
 def mask_and_cal_sum(self, log_probs, ys, mask=None):
     if mask is None: 
         seq_len = [y.size(0) + 1 + 4 for y in ys]
         mask = cc(_seq_mask(seq_len=seq_len, max_len=log_probs.size(1)))
     else:
         seq_len = [y.size(0) for y in ys]
     # divide by total length
     loss = torch.sum(log_probs * mask) / sum(seq_len)
     return loss
コード例 #11
0
 def mask_and_cal_loss(self, log_probs, ys, mask=None):
     # mask is batch x max_len
     # add 1 to EOS
     if mask is None:
         seq_len = [y.size(0) + 1 for y in ys]
         mask = cc(_seq_mask(seq_len=seq_len, max_len=log_probs.size(1)))
     else:
         seq_len = [y.size(0) for y in ys]
     # divide by total length
     loss = -torch.sum(log_probs * mask) / sum(seq_len)
     return loss
コード例 #12
0
 def loss(self, x, training=True):
     with tf.name_scope('loss'):
         z = self._encode(x, training=training)
         x_h = self._decode(z, training=training)
         loss = dict()
         loss['pmse'] = p_mse(x, x_h)
         loss['corr'] = cc(x, x_h)
         #loss['diff'] = l1(x, x_h)
         tf.summary.scalar('pmse', loss['pmse'])
         tf.summary.scalar('corr', loss['corr'])
         #tf.summary.scalar('diff', loss['diff'])
     return loss
コード例 #13
0
 def forward(self, xpad, ilens):
     first_out = None
     first_lens = None
     for i, (layer, project_layer) in enumerate(
             zip(self.layers, self.project_layers)):
         total_length = xpad.size(1)
         xs_pack = pack_padded_sequence(xpad, ilens, batch_first=True)
         layer.flatten_parameters()
         xs, (_, _) = layer(xs_pack)
         ys_pad, ilens = pad_packed_sequence(xs,
                                             batch_first=True,
                                             total_length=total_length)
         ys_pad = F.dropout(ys_pad,
                            self.dropout_rate,
                            training=self.training)
         ilens = ilens.numpy()
         sub = self.subsample[i]
         if sub > 1:
             # pad one frame if it's not able to divide into 2 equal length
             if ys_pad.size(1) % 2 == 1:
                 ys_pad = F.pad(ys_pad.transpose(1, 2), (0, 1),
                                mode='replicate').transpose(1, 2)
             # concat two frames
             ys_pad = ys_pad.contiguous().view(ys_pad.size(0),
                                               ys_pad.size(1) // 2,
                                               ys_pad.size(2) * 2)
             ilens = [(length + 1) // sub for length in ilens]
         projected = project_layer(ys_pad)
         xpad = torch.tanh(projected)
         xpad = F.dropout(xpad, self.dropout_rate, training=self.training)
         if i == 0:
             first_out = xpad
             first_lens = cc(
                 torch.from_numpy(np.array(ilens, dtype=np.int64)))
     ilens = cc(torch.from_numpy(np.array(ilens, dtype=np.int64)))
     return xpad, ilens, first_out, first_lens
コード例 #14
0
    def forward(self, enc_pad, enc_len, dec_h, att_prev, scaling=2.0):
        '''
        enc_pad:(batch, enc_length, enc_dim)
        enc_len:(batch) of int
        dec_h:(batch, 1, dec_dim)
        att_prev:(batch, enc_length)
        '''
        batch_size = enc_pad.size(0)
        enc_h = self.mlp_enc(enc_pad)  # batch_size x enc_length x att_dim

        if dec_h is None:
            dec_h = enc_pad.new_zeros(batch_size, self.decoder_dim)
        else:
            dec_h = dec_h.view(batch_size, self.decoder_dim)

        # initialize attention weights to uniform
        if att_prev is None:
            att_prev = pad_list(
                [enc_pad.new(l).fill_(1.0 / l) for l in enc_len], 0)

        att_conv = self.loc_conv(
            att_prev.view(batch_size, 1, 1, enc_pad.size(1)))
        att_conv = att_conv.squeeze(2).transpose(1, 2)
        # att_conv: batch_size x channel x 1 x frame -> batch_size x frame x channel
        att_conv = self.mlp_att(
            att_conv
        )  # att_conv: batch_size x frame x channel -> batch_size x frame x att_dim

        dec_h_tiled = self.mlp_dec(dec_h).view(batch_size, 1, self.att_dim)
        att_state = torch.tanh(enc_h + dec_h_tiled + att_conv)
        e = self.gvec(att_state).squeeze(2)
        if enc_len is not None:
            mask = []
            for b in range(batch_size):
                mask.append([0] * enc_len[b] + [1] *
                            (enc_pad.size(1) - enc_len[b]))
            mask = cc(torch.ByteTensor(mask))
            e = e.masked_fill_(mask, -1e15)
        attn = F.softmax(scaling * e, dim=1)
        w_expanded = attn.unsqueeze(1)  # w_expanded: batch_size x 1 x frame

        c = torch.bmm(w_expanded, enc_pad).squeeze(1)
        # batch x 1 x frame * batch x enc_length x enc_dim => batch x 1 x enc_dim
        c = self.mlp_o(c)  # batch x enc_dim
        return c, attn
コード例 #15
0
    def get_data(root_dir='/storage/feature/LibriSpeech/npy_files/train-clean-100/7402/90848', text_index_path='/storage/feature/LibriSpeech/text_bpe/train-clean-100/7402/7402-90848.label.txt'):
        prefix = '7402-90848'
        datas = []
        for i in range(8):
            seg_id = str(i).zfill(4)
            filename = f'{prefix}-{seg_id}.npy'
            path = os.path.join(root_dir, filename)
            data = torch.from_numpy(np.load(path)).type(torch.FloatTensor)
            datas.append(data)
        datas.sort(key=lambda x: x.size(0), reverse=True)
        ilens = np.array([data.size(0) for data in datas], dtype=np.int64)
        datas = pad_sequence(datas, batch_first=True, padding_value=0)

        ys = []
        with open(text_index_path, 'r') as f:
            for line in f:
                utt_id, indexes = line.strip().split(',', maxsplit=1)
                indexes = cc(torch.Tensor([int(index) + 3 for index in indexes.split()]).type(torch.LongTensor))
                ys.append(indexes)
        return datas, ilens, ys[:8]
コード例 #16
0
    def decode(self, n_samples=5, sample=False, max_dec_timesteps=500):
        logits, predictions = [], []
        dec_c, dec_z = None, None
        for t in range(max_dec_timesteps):
            if t == 0:
                bos = cc(torch.Tensor([self.bos for _ in range(n_samples)]).type(torch.LongTensor))
                emb = self.embedding(bos).unsqueeze(1)
            else:
                emb = self.embedding(predictions[-1]).unsqueeze(1)
            logit, dec_z, dec_c = self.forward_step(emb, dec_z, dec_c)
            logits.append(logit)
            if not sample:
                predictions.append(torch.argmax(logit, dim=-1))
            else:
                sampled_indices = Categorical(logits=logit).sample() 
                predictions.append(sampled_indices)

        logits = torch.stack(logits, dim=1)
        predictions = torch.stack(predictions, dim=1)
        return predictions
コード例 #17
0
    def build_model(self):
        self.Encoder = cc(Encoder())
        self.Decoder = [cc(Decoder()) for _ in range(4)]
        self.ACLayer = cc(ACLayer())
        self.Discriminator = cc(Discriminator())
        self.ASRLayer = cc(ASRLayer())
        self.SpeakerClassifier = cc(SpeakerClassifier())
        ac_betas = (0.5, 0.999)
        vae_betas = (0.9, 0.999)
        ac_lr = 0.00005
        vae_lr = 0.001
        dis_lr = 0.002
        cls_betas = (0.5, 0.999)
        asr_betas = (0.5, 0.999)
        cls_lr = 0.0002
        asr_lr = 0.00001

        self.list_decoder = []

        for i in range(4):
            self.list_decoder += list(self.Decoder[i].parameters())
        self.vae_params = list(self.Encoder.parameters()) + self.list_decoder
        self.ac_optimizer = optim.Adam(self.ACLayer.parameters(),
                                       lr=ac_lr,
                                       betas=ac_betas)
        self.vae_optimizer = optim.Adam(self.vae_params,
                                        lr=vae_lr,
                                        betas=vae_betas)
        self.dis_optimizer = optim.Adam(self.Discriminator.parameters(),
                                        lr=dis_lr,
                                        betas=ac_betas)

        self.asr_optimizer = optim.Adam(self.ASRLayer.parameters(),
                                        lr=asr_lr,
                                        betas=asr_betas)
        self.cls_optimizer = optim.Adam(self.SpeakerClassifier.parameters(),
                                        lr=cls_lr,
                                        betas=cls_betas)
コード例 #18
0
    def build_model(self):
        hps = self.hps
        ns = self.hps.ns
        emb_size = self.hps.emb_size
        betas = (0.5, 0.9)

        #---stage one---#
        self.Encoder = cc(
            Encoder(ns=ns,
                    dp=hps.enc_dp,
                    emb_size=emb_size,
                    seg_len=hps.seg_len,
                    one_hot=self.one_hot,
                    binary_output=self.binary_output,
                    binary_ver=self.binary_ver))
        self.Decoder = cc(
            Decoder(ns=ns,
                    c_in=emb_size,
                    c_h=emb_size,
                    c_a=hps.n_speakers,
                    seg_len=hps.seg_len,
                    inp_emb=self.one_hot or self.binary_output))
        self.SpeakerClassifier = cc(
            SpeakerClassifier(
                ns=ns,
                c_in=emb_size if not self.binary_output else emb_size *
                emb_size,
                c_h=emb_size,
                n_class=hps.n_speakers,
                dp=hps.dis_dp,
                seg_len=hps.seg_len))

        #---stage one opts---#
        params = list(self.Encoder.parameters()) + \
            list(self.Decoder.parameters())
        self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas)
        self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(),
                                  lr=self.hps.lr,
                                  betas=betas)

        #---stage two---#
        self.Generator = cc(
            Decoder(ns=ns,
                    c_in=emb_size,
                    c_h=emb_size,
                    c_a=hps.n_speakers
                    if not self.targeted_G else hps.n_target_speakers))
        self.PatchDiscriminator = cc(
            nn.DataParallel(
                PatchDiscriminator(
                    ns=ns,
                    n_class=hps.n_speakers
                    if not self.targeted_G else hps.n_target_speakers,
                    seg_len=hps.seg_len)))

        #---stage two opts---#
        self.gen_opt = optim.Adam(self.Generator.parameters(),
                                  lr=self.hps.lr,
                                  betas=betas)
        self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(),
                                    lr=self.hps.lr,
                                    betas=betas)
コード例 #19
0
    N = A_full.shape[0]

    truth_spec = utils.spectrum(A_full)

    step = 400
    k = maxIter / step

    sp_emds = []
    cc_emds = []
    dd_emds = []
    assorts = []
    spectrum_weighted_distances = []
    bc_emds = []

    true_sp = utils.sp(A_full)
    true_cc = utils.cc(A_full)
    true_dd = utils.degree_sequence(A_full)

    G_true = nx.from_numpy_matrix(A_full)
    true_assort = nx.degree_assortativity_coefficient(G_true)

    true_bc = sorted(nx.betweenness_centrality(G_true))

    true_assorts = []

    #initialize all params
    for i in range(start, k + 1):
        iterNum = i * step

        X = np.loadtxt(path + '/samples_{}.txt'.format(iterNum))
        X = genExpected_fromWalks(X, A_full.sum())
コード例 #20
0
L = nx.normalized_laplacian_matrix(G).todense()
eig_vals, eig_vecs = linalg.eig(L)
eig_list = zip(eig_vals, np.transpose(eig_vecs))
eig_list.sort(key=lambda x: x[0])

u = np.asarray([u_i.real for u_i in eig_list[-2][1]])[0][0]

truth = utils.compute_graph_statistics(np.asarray(A_matrix))
f = open('plots/truth.txt', "w")
f.write(str(truth))
f.close()

truth_spec = utils.specGap(A_full)
train_spec = utils.specGap(A_matrix)

truth_cc = utils.cc(A_full)

cc_emd_combo = []
cc_emd_reg = []
cc_emd_fmm = []
cc_emd_combo_std = []
cc_emd_reg_std = []
cc_emd_fmm_std = []
k = 11
for i in range(1, k):
    print(i)
    X_c = np.loadtxt(
        'plots/barbell_sameDensity/barbell_combo_mixed/trainingIteration_{}_expectedGraph.txt'
        .format(i * 100))
    X_f = np.loadtxt(
        'plots/barbell_sameDensity/barbell_fmm/trainingIteration_{}_expectedGraph.txt'
コード例 #21
0
    print(f"Ordered top predicted tokens: {top_tokens}")
    print(f"Ordered top predicted values: {probs[sorted_indexes]}")


if __name__ == '__main__':
    args = parse()
    sent_gen = SentenceGenerator(args)
    data_utils = data_utils(args)

    sent = ["there is no [MASK] in our products now"]
    vecs = [data_utils.text2id(txt, 10) for txt in sent]
    # tok_sent = ['[CLS]']
    # tok_sent.extend(sent_gen.data_utils.tokenizer.tokenize(sent))
    # tok_sent.append('[SEP]')
    # inp = cc([sent_gen.data_utils.tokenizer.encode(sent, add_special_tokens=True)], args.no_cuda)
    inp = cc(vecs, args.no_cuda)
    mask_sent = np.expand_dims(inp != 0, -2).astype(np.int32)
    print(vecs)
    print(inp)
    print(mask_sent)

    predictions, break_probs = sent_gen.model.forward(
        inp.long(),
        cc(mask_sent, sent_gen.no_cuda)[0])
    sm = torch.nn.Softmax(dim=0)  # Used to convert logits to probs
    for pos in range(1, inp.shape[1]):
        if vecs[0][pos] != 0:
            # print(f"Prediction for word: {tok_sent[pos]}")
            print(
                f"Prediction for word: {data_utils.index2word[vecs[0][pos]]}")
            probs = sm(predictions[0, pos])
コード例 #22
0
    def forward(self, enc_pad, enc_len, ys=None, tf_rate=1.0, max_dec_timesteps=500, 
            sample=False, smooth=False, scaling=1.0, label_smoothing=True):
        batch_size = enc_pad.size(0)
        if ys is not None:
            # prepare input and output sequences
            bos = ys[0].data.new([self.bos])
            eos = ys[0].data.new([self.eos])
            ys_in = [torch.cat([bos, y], dim=0) for y in ys]
            ys_out = [torch.cat([y, eos], dim=0) for y in ys]
            pad_ys_in = pad_list(ys_in, pad_value=self.eos)
            pad_ys_out = pad_list(ys_out, pad_value=self.eos)
            # get length info
            batch_size, olength = pad_ys_out.size(0), pad_ys_out.size(1)
            # map idx to embedding
            eys = self.embedding(pad_ys_in)

        # initialization
        dec_c = self.zero_state(enc_pad)
        dec_z = self.zero_state(enc_pad)
        c = self.zero_state(enc_pad, dim=self.att_odim)

        w = None
        logits, prediction, ws = [], [], []
        # reset the attention module
        self.attention.reset()

        # loop for each timestep
        olength = max_dec_timesteps if not ys else olength
        for t in range(olength):
            # supervised learning: using teacher forcing
            if ys is not None:
                # teacher forcing
                tf = True if np.random.random_sample() <= tf_rate else False
                emb = eys[:, t, :] if tf or t == 0 else self.embedding(prediction[-1])
            # else, label the data with greedy
            else:
                if t == 0:
                    bos = cc(torch.Tensor([self.bos for _ in range(batch_size)]).type(torch.LongTensor))
                    emb = self.embedding(bos)
                else:
                    # using argmax
                    if not smooth:
                        emb = self.embedding(prediction[-1])
                    # smooth approximation of embedding
                    else:
                        emb = F.softmax(logit * scaling, dim=-1) @ self.embedding.weight
            logit, dec_z, dec_c, c, w = \
                    self.forward_step(emb, dec_z, dec_c, c, w, enc_pad, enc_len)

            ws.append(w)
            logits.append(logit)
            if not sample:
                prediction.append(torch.argmax(logit, dim=-1))
            else:
                sampled_indices = Categorical(logits=logit).sample() 
                prediction.append(sampled_indices)

        logits = torch.stack(logits, dim=1)
        log_probs = F.log_softmax(logits, dim=2)
        prediction = torch.stack(prediction, dim=1)
        ws = torch.stack(ws, dim=1)

        if ys:
            ys_log_probs = torch.gather(log_probs, dim=2, index=pad_ys_out.unsqueeze(2)).squeeze(2)
        else:
            ys_log_probs = torch.gather(log_probs, dim=2, index=prediction.unsqueeze(2)).squeeze(2)

        # label smoothing
        if label_smoothing and self.ls_weight > 0 and self.training:
            loss_reg = torch.sum(log_probs * self.vlabeldist, dim=2)
            ys_log_probs = (1 - self.ls_weight) * ys_log_probs + self.ls_weight * loss_reg
        return logits, ys_log_probs, prediction, ws
コード例 #23
0
    def get_data(root_dir='/storage/feature/LibriSpeech/npy_files/train-clean-100/7402/90848', text_index_path='/storage/feature/LibriSpeech/text_bpe/train-clean-100/7402/7402-90848.label.txt'):
        prefix = '7402-90848'
        datas = []
        for i in range(8):
            seg_id = str(i).zfill(4)
            filename = f'{prefix}-{seg_id}.npy'
            path = os.path.join(root_dir, filename)
            data = torch.from_numpy(np.load(path)).type(torch.FloatTensor)
            datas.append(data)
        datas.sort(key=lambda x: x.size(0), reverse=True)
        ilens = np.array([data.size(0) for data in datas], dtype=np.int64)
        datas = pad_sequence(datas, batch_first=True, padding_value=0)

        ys = []
        with open(text_index_path, 'r') as f:
            for line in f:
                utt_id, indexes = line.strip().split(',', maxsplit=1)
                indexes = cc(torch.Tensor([int(index) + 3 for index in indexes.split()]).type(torch.LongTensor))
                ys.append(indexes)
        return datas, ilens, ys[:8]
    data, ilens, ys = get_data()
    data = cc(data)
    model = cc(E2E(input_dim=40, enc_hidden_dim=800, enc_n_layers=3, 
        subsample=[1, 2, 1], dropout_rate=0.3, 
        dec_hidden_dim=1024, att_dim=512, conv_channels=10, 
        conv_kernel_size=201, att_odim=800, output_dim=500))
    log_probs, prediction, ws = model(data, ilens, ys)
    p_lens = [p.size() for p in prediction]
    t_lens = [t.size() for t in ys]

コード例 #24
0
ファイル: model.py プロジェクト: jjery2243542/token_discovery
        return c, w 

class Decoder(torch.nn.Module):
    def __init__(self, input_dim, embedding_dim, encoder_dim, att_dim, hidden_dim, output_dim):
        # index 0 is padding, index 1 is GO symbol 
        self.input_layer = torch.nn.Linear(input_dim + 2, embedding_dim)
        self.rnn_cell = torch.nn.LSTMCell(embedding_dim + encoder_dim, hidden_dim)
        self.output_layer = torch.nn.Linear(hidden_dim, output_dim)
        self.attention = AttLoc(encoder_dim=encoder_dim, decoder_dim=hidden_dim, 
                att_dim=att_dim, conv_channels=100, conv_kernel_size=10)

    def forward_step(self, token, last_hidden_state, encoder_state):
        self.rnn_cell()

if __name__ == '__main__':
    data = cc(torch.randn(32, 321, 13))
    ilens = np.ones((32,), dtype=np.int64) * 121
    net = cc(Encoder(13, 320, 4, [1, 2, 2, 1], dropout_rate=0.3, output_dim=512))
    emb = cc(EmbeddingLayer(embedding_dim=512, n_latent=300))
    out, ilens = net(data, ilens)
    print(out.size())
    distr, out = emb(out)
    print(distr.size(), out.size())
    #att = cc(AttLoc(640, 320, 300, 100, 10))
    #att.reset()
    #dec = cc(Variable(torch.randn(32, 320)))
    #context, weights = att(output, dec, None)
    #print(context.size(), weights.size(), weights[0])
    #dec = cc(Variable(torch.randn(32, 320)))
    #context, weights = att(output, dec, weights)
    #print(context.size(), weights.size(), weights[0])
コード例 #25
0
        self.conv_layer3 = nn.Sequential(
            nn.ConvTranspose2d(128,
                               64,
                               stride=2,
                               kernel_size=5,
                               padding=2,
                               output_padding=1), nn.ReLU(),
            nn.ConvTranspose2d(64,
                               out_channel,
                               stride=2,
                               kernel_size=5,
                               padding=2,
                               output_padding=1))

    def forward(self, x):
        out = self.conv_layer1(x)
        for layer in self.conv_layer2s:
            res = F.relu(layer(out))
            out = out + res
        out = self.conv_layer3(out)
        return out


if __name__ == '__main__':
    enc = cc(Encoder())
    dec = cc(Decoder())
    data = cc(torch.randn(16, 3, 128, 128))
    e = enc(data)
    d = dec(e)
    print(d.size())
コード例 #26
0
    def build_model(self):
        hps = self.hps
        ns = self.hps.ns
        enc_mode = self.enc_mode
        seg_len = self.hps.seg_len
        enc_size = self.hps.enc_size
        emb_size = self.hps.emb_size
        betas = (0.5, 0.9)

        #---stage one---#
        self.Encoder = cc(
            Encoder(ns=ns,
                    dp=hps.enc_dp,
                    enc_size=enc_size,
                    seg_len=seg_len,
                    enc_mode=enc_mode))
        self.Decoder = cc(
            Decoder(ns=ns,
                    c_in=enc_size,
                    c_h=emb_size,
                    c_a=hps.n_speakers,
                    seg_len=seg_len))
        self.SpeakerClassifier = cc(SpeakerClassifier(ns=ns, c_in=enc_size * enc_size if enc_mode == 'binary' else \
                     (2*enc_size if enc_mode == 'multilabel_binary' else enc_size), \
                     c_h=emb_size, n_class=hps.n_speakers, dp=hps.dis_dp, seg_len=seg_len))

        #---stage one opts---#
        params = list(self.Encoder.parameters()) + list(
            self.Decoder.parameters())
        self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas)
        self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(),
                                  lr=self.hps.lr,
                                  betas=betas)

        #---stage two---#
        if self.g_mode == 'naive':
            self.Generator = cc(
                Decoder(ns=ns,
                        c_in=enc_size,
                        c_h=emb_size,
                        c_a=hps.n_speakers,
                        seg_len=seg_len))
        elif self.g_mode == 'targeted' or self.g_mode == 'targeted_residual':
            self.Generator = cc(Decoder(ns=ns, c_in=enc_size, c_h=emb_size, c_a=hps.n_target_speakers, seg_len=seg_len, \
                   output_mask=True if self.g_mode == 'targeted_residual' else False))
        elif self.g_mode == 'enhanced':
            self.Generator = cc(
                Enhanced_Generator(ns=ns,
                                   dp=hps.enc_dp,
                                   enc_size=1024,
                                   emb_size=1024,
                                   seg_len=seg_len,
                                   n_speakers=hps.n_speakers))
        elif self.g_mode == 'spectrogram':
            self.Generator = cc(
                Spectrogram_Patcher(ns=ns,
                                    c_in=513,
                                    c_h=emb_size,
                                    c_a=hps.n_target_speakers,
                                    seg_len=seg_len))
        elif self.g_mode == 'tacotron':
            self.Generator = cc(
                Tacotron(enc_size,
                         hps.n_target_speakers,
                         mel_dim=hp.n_mels,
                         linear_dim=int(hp.n_fft / 2) + 1))
            self.tacotron_input_lengths = torch.tensor(
                [self.hps.seg_len // 8 for _ in range(hps.batch_size)])
        else:
            raise NotImplementedError('Invalid Generator mode!')

        self.PatchDiscriminator = cc(nn.DataParallel(PatchDiscriminator(ns=ns, n_class=hps.n_speakers \
                        if self.g_mode == 'naive' else hps.n_target_speakers,
                        seg_len=seg_len)))

        #---stage two opts---#
        self.gen_opt = optim.Adam(self.Generator.parameters(),
                                  lr=self.hps.lr,
                                  betas=betas)
        self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(),
                                    lr=self.hps.lr,
                                    betas=betas)

        #---target classifier---#
        self.TargetClassifier = cc(
            nn.DataParallel(TargetClassifier(ns=ns, n_class=3,
                                             seg_len=seg_len)))

        #---target classifier opts---#
        self.tclf_opt = optim.Adam(self.TargetClassifier.parameters(),
                                   lr=self.hps.lr,
                                   betas=betas)
コード例 #27
0
    def forward(self,
                enc_output,
                enc_len,
                dec_input=None,
                tf_rate=1.0,
                max_dec_timesteps=500,
                sample=False):
        batch_size = enc_output.size(0)
        enc_len = enc_len.cpu().numpy().tolist()
        if dec_input is not None:
            pad_dec_input_in = dec_input[0]
            pad_dec_input_out = dec_input[1]
            # get length info
            batch_size, olength = pad_dec_input_out.size(
                0), pad_dec_input_out.size(1)
            # map idx to embedding
            dec_input_embedded = self.embedding(pad_dec_input_in)

        # initialization
        dec_c = self.zero_state(enc_output)
        dec_h = self.zero_state(enc_output)
        attn = None

        logits, prediction, attns = [], [], []
        # loop for each timestep
        olength = max_dec_timesteps if not dec_input else olength
        for t in range(olength):
            if dec_input is not None:
                # teacher forcing
                tf = True if np.random.random_sample() <= tf_rate else False
                if tf or t == 0:
                    emb = dec_input_embedded[:, t, :]
                else:
                    self.embedding(prediction[-1])
            else:
                if t == 0:
                    bos = cc(
                        torch.Tensor([self.bos for _ in range(batch_size)
                                      ]).type(torch.LongTensor))
                    emb = self.embedding(bos)
                else:
                    emb = self.embedding(prediction[-1])

            logit, dec_h, dec_c, attn = \
                self.forward_step(emb, dec_h, dec_c, attn, enc_output, enc_len)

            attns.append(attn)
            logits.append(logit)
            if not sample:
                prediction.append(torch.argmax(logit, dim=-1))
            else:
                sampled_indices = Categorical(logits=logit).sample()
                prediction.append(sampled_indices)

        logits = torch.stack(logits, dim=1)  # batch x length x output_dim
        log_probs = F.log_softmax(logits, dim=2)
        prediction = torch.stack(prediction, dim=1)  # batch x length
        attns = torch.stack(attns, dim=1)  # batch x length x enc_len

        # get the log probs of the true label # batch x length
        if dec_input:
            dec_output_log_probs = torch.gather(
                log_probs, dim=2,
                index=pad_dec_input_out.unsqueeze(2)).squeeze(2)
        else:
            dec_output_log_probs = torch.gather(
                log_probs, dim=2, index=prediction.unsqueeze(2)).squeeze(2)

        # label smoothing : q'(y|x) = (1-e)*q(y|x) + e*u(y)
        if self.ls_weight > 0:
            loss_reg = torch.sum(log_probs * self.vlabeldist, dim=2)  # u(y)
            dec_output_log_probs = (
                1 - self.ls_weight
            ) * dec_output_log_probs + self.ls_weight * loss_reg

        return logits, dec_output_log_probs, prediction, attns