コード例 #1
0
    def __init__(self, config, src_vocab_size, tgt_vocab_size, use_cuda, pretrain=None, score_fn=None):
        super(seq2seq, self).__init__()
        if pretrain is not None:
            # hint: 会自动冻结
            src_embedding = nn.Embedding.from_pretrained(pretrain['src_emb'])
            tgt_embedding = nn.Embedding.from_pretrained(pretrain['tgt_emb'])

            ''' 
            def normal2(A):
                return A / np.sqrt(np.sum(A ** 2))

            # for i in range(len(pretrain['tgt_emb'])):
            #     pretrain['tgt_emb'][i] = normal2(pretrain['tgt_emb'][i])
            mat = np.zeros(45*45).reshape(45, 45)
            for i in range(45):
                for j in range(45):
                    _ = normal2(pretrain['tgt_emb'][i].numpy().copy())
                    __ = normal2(pretrain['tgt_emb'][j].numpy().copy())
                    mat[i][j] = _.dot(__)
            print(mat)
            print()
            '''
        else:
            src_embedding = None
            tgt_embedding = None
        self.encoder = models.rnn_encoder(config, src_vocab_size, embedding=src_embedding, tgt_embedding=tgt_embedding)
        if config.shared_vocab == False:
            self.decoder = models.rnn_decoder(config, tgt_vocab_size, embedding=tgt_embedding, score_fn=score_fn)
        else:
            self.decoder = models.rnn_decoder(config, tgt_vocab_size, embedding=self.encoder.embedding,
                                              score_fn=score_fn)
        self.use_cuda = use_cuda
        self.src_vocab_size = src_vocab_size
        self.tgt_vocab_size = tgt_vocab_size
        self.config = config
        if config.score == 'margin':
            # print("using margin loss")
            self.criterion = models.margin_criterion(tgt_vocab_size, use_cuda, config)
        elif config.score == 'hybrid':
            self.criterion = {}
            self.criterion['softmax'] = models.criterion(tgt_vocab_size, use_cuda, config)
            self.criterion['margin'] = models.margin_criterion(tgt_vocab_size, use_cuda, config)
        elif config.score == 'softmax':
            self.criterion = models.criterion(tgt_vocab_size, use_cuda, config)
        else:
            print('no such score function')
            os.abort()
        self.log_softmax = nn.LogSoftmax(dim=1)
コード例 #2
0
 def __init__(self, opt):
     super(seq2seq, self).__init__()
     self.opt = opt
     self.encoder = models.rnn_encoder(opt)
     self.decoder = models.rnn_decoder(opt)
     self.criterion = models.criterion(opt.use_cuda)
     self.log_softmax = nn.LogSoftmax(dim=1)
コード例 #3
0
    def __init__(self,
                 config,
                 src_vocab,
                 tgt_vocab,
                 use_cuda,
                 bmodel,
                 pretrain=None,
                 score_fn=None):
        super(seq2seq, self).__init__()
        if pretrain is not None:
            self.slot_embedding = nn.Embedding.from_pretrained(
                pretrain['slot'], freeze=False)
        else:
            self.slot_embedding = None
        if bert:
            self.src_embedding = bmodel
        self.dsep_id = src_vocab['-']
        self.ssep_id = src_vocab[',']
        self.vsep_id = src_vocab[';']
        src_vocab_size = len(src_vocab)
        tgt_vocab_size = len(tgt_vocab)

        self.encoder = models.rnn_encoder(
            config, )  # tgt_vocab_size,False, embedding=slot_embedding)

        self.decoder = models.rnn_decoder(config,
                                          src_vocab_size,
                                          tgt_vocab_size,
                                          slot_embedding=self.slot_embedding,
                                          value_embedding=self.slot_embedding,
                                          score_fn=score_fn)

        self.use_cuda = use_cuda
        self.config = config
        self.criterion = models.criterion(tgt_vocab_size, use_cuda)
コード例 #4
0
def validate(model, val_loader, timestamps):
    predictions = []
    total_loss = []
    running_loss = 0.0

    with torch.no_grad():
        for step, (data, target) in enumerate(val_loader):
            data = data.to(device)
            target = target.to(device)

            prediction = model(data.float())
            loss = torch.sqrt(
                models.criterion(prediction.squeeze(1), target.float()))

            print("Step [{:4d}/{}] -> Date: {}/{}, Target: {}, Prediction: {}".\
            format(step+1, len(val_loader), timestamps[step].month, timestamps[step].year,\
            target.item(), prediction.item()))

            predictions.append(float(prediction.item()))
            total_loss.append(loss.item())

            running_loss += loss.item()

    print(LINESPLIT)
    print("Average Validation Loss: {}".format(running_loss / len(total_loss)))

    return (predictions, total_loss)
コード例 #5
0
ファイル: seq2seq.py プロジェクト: shincling/TDAAv2
    def __init__(self, config, input_emb_size, mix_speech_len, tgt_vocab_size, use_cuda, pretrain=None, score_fn=''):
        super(seq2seq, self).__init__()
        if pretrain is not None:
            src_embedding = pretrain['src_emb']
            tgt_embedding = pretrain['tgt_emb']
        else:
            src_embedding = None
            tgt_embedding = None
        self.encoder = models.rnn_encoder(config, input_emb_size, None, embedding=src_embedding)
        if config.shared_vocab == False:
            self.decoder = models.rnn_decoder(config, tgt_vocab_size, embedding=tgt_embedding, score_fn=score_fn)
        else:
            self.decoder = models.rnn_decoder(config, tgt_vocab_size, embedding=self.encoder.embedding,
                                              score_fn=score_fn)
        self.use_cuda = use_cuda
        self.tgt_vocab_size = tgt_vocab_size
        self.config = config
        self.criterion = models.criterion(tgt_vocab_size, use_cuda,config.loss)
        self.loss_for_ss = nn.MSELoss()
        self.log_softmax = nn.LogSoftmax()
        self.wav_loss = models.WaveLoss(dBscale=1, nfft=config.FRAME_LENGTH, hop_size=config.FRAME_SHIFT)

        speech_fre = input_emb_size
        num_labels = tgt_vocab_size
        if config.use_tas:
            self.ss_model = models.ConvTasNet()
        else:
            self.ss_model = models.SS(config, speech_fre, mix_speech_len, num_labels)
コード例 #6
0
 def __init__(self, config, vocab, use_cuda, pretrain=None):
     super(bow2seq, self).__init__()
     self.vocab = vocab
     self.vocab_size = vocab.voc_size
     if pretrain is not None:
         self.embedding = pretrain['emb']
     else:
         self.embedding = nn.Embedding(self.vocab_size, config.emb_size)
     # self.encoder = models.GCN_Encoder(config, self.vocab_size, embedding=self.embedding)
     self.bert_encoder = models.bert.BERT(config.head_num,
                                          config.emb_size,
                                          config.dropout,
                                          config.emb_size,
                                          self.vocab_size,
                                          config.num_layers,
                                          config.max_sentence_len,
                                          word_emb=self.embedding)
     self.decoder = models.rnn_decoder(config,
                                       self.vocab_size,
                                       embedding=self.embedding)
     self.proj = nn.Linear(config.emb_size, config.decoder_hidden_size)
     self.state_wc = nn.Linear(
         config.decoder_hidden_size,
         config.decoder_hidden_size * config.num_layers)
     self.state_wh = nn.Linear(
         config.decoder_hidden_size,
         config.decoder_hidden_size * config.num_layers)
     self.tanh = nn.Tanh()
     self.config = config
     self.criterion = models.criterion(self.vocab_size, use_cuda)
     self.log_softmax = nn.LogSoftmax()
コード例 #7
0
 def __init__(self,
              config,
              src_vocab_size,
              tgt_vocab_size,
              use_cuda,
              pretrain=None,
              score_fn=None):
     super(seq2seq, self).__init__()
     if pretrain is not None:
         src_embedding = pretrain['src_emb']
         tgt_embedding = pretrain['tgt_emb']
     else:
         src_embedding = None
         tgt_embedding = None
     self.encoder = models.rnn_encoder(config,
                                       src_vocab_size,
                                       embedding=src_embedding)
     if config.shared_vocab == False:
         self.decoder = models.rnn_decoder(config,
                                           tgt_vocab_size,
                                           embedding=tgt_embedding,
                                           score_fn=score_fn)
     else:
         self.decoder = models.rnn_decoder(config,
                                           tgt_vocab_size,
                                           embedding=self.encoder.embedding,
                                           score_fn=score_fn)
     self.use_cuda = use_cuda
     self.src_vocab_size = src_vocab_size
     self.tgt_vocab_size = tgt_vocab_size
     self.config = config
     self.criterion = models.criterion(tgt_vocab_size, use_cuda)
     self.log_softmax = nn.LogSoftmax()
コード例 #8
0
 def __init__(self, config, vocab, use_cuda, pretrain=None):
     super(hierarchical_attention, self).__init__()
     self.vocab = vocab
     self.vocab_size = vocab.voc_size
     if pretrain is not None:
         self.embedding = pretrain['emb']
     else:
         self.embedding = nn.Embedding(self.vocab_size, config.emb_size)
     self.word_encoder = Encoder(config.emb_size, config.emb_size,
                                 config.num_layers, config.dropout,
                                 config.bidirec)
     self.word_attentive_pool = attentive_pooling(config.emb_size * 2)
     self.sentence_encoder = Encoder(config.emb_size * 2,
                                     config.emb_size * 2, config.num_layers,
                                     config.dropout, config.bidirec)
     self.sentence_attentive_pool = attentive_pooling(
         config.decoder_hidden_size)
     self.decoder = models.rnn_decoder(config,
                                       self.vocab_size,
                                       embedding=self.embedding)
     self.w_context = nn.Linear(config.decoder_hidden_size * 2,
                                config.decoder_hidden_size,
                                bias=False)
     self.state_wc = nn.Linear(
         config.decoder_hidden_size,
         config.decoder_hidden_size * config.num_layers)
     self.state_wh = nn.Linear(
         config.decoder_hidden_size,
         config.decoder_hidden_size * config.num_layers)
     self.tanh = nn.Tanh()
     self.config = config
     self.criterion = models.criterion(self.vocab_size, use_cuda)
     self.log_softmax = nn.LogSoftmax()
コード例 #9
0
ファイル: ae.py プロジェクト: zhangsh950618/superAE
    def __init__(self,
                 config,
                 src_vocab_size,
                 tgt_vocab_size,
                 use_cuda,
                 w2v=None,
                 score_fn=None,
                 weight=0.0,
                 pretrain_updates=0,
                 extend_vocab_size=0,
                 device_ids=None):
        super(ae, self).__init__()
        if w2v is not None:
            src_embedding = w2v['src_emb']
            tgt_embedding = w2v['tgt_emb']
        else:
            src_embedding = None
            tgt_embedding = None

        self.encoder_s2s = models.gated_rnn_encoder(config,
                                                    src_vocab_size,
                                                    embedding=src_embedding)

        if config.shared_vocab == False:
            self.decoder = models.rnn_decoder(config,
                                              tgt_vocab_size,
                                              embedding=tgt_embedding,
                                              score_fn=score_fn)
        else:
            self.decoder = models.rnn_decoder(
                config,
                tgt_vocab_size,
                embedding=self.encoder_s2s.embedding,
                score_fn=score_fn)

        self.encoder_ae = models.rnn_encoder(config,
                                             src_vocab_size,
                                             embedding=self.decoder.embedding)

        self.use_cuda = use_cuda
        self.src_vocab_size = src_vocab_size
        self.tgt_vocab_size = tgt_vocab_size
        self.config = config
        self.weight = weight
        self.pretrain_updates = pretrain_updates
        if 'emb' in score_fn:
            self.criterion = models.criterion_emb(config.hidden_size,
                                                  tgt_vocab_size, use_cuda)
        else:
            self.criterion = models.criterion(tgt_vocab_size, use_cuda)
        self.log_softmax = nn.LogSoftmax()
        if score_fn.startswith('dis'):
            self.discriminator = nn.Linear(
                config.num_layers * config.hidden_size * 2, 1)
            self.sigmoid = nn.Sigmoid()
        if score_fn.endswith('map'):
            self.h_map = nn.Linear(config.hidden_size, config.hidden_size)
            self.c_map = nn.Linear(config.hidden_size, config.hidden_size)
        self.score_fn = score_fn
コード例 #10
0
ファイル: seq2seq.py プロジェクト: zhangsh950618/superAE
    def __init__(self,
                 config,
                 src_vocab_size,
                 tgt_vocab_size,
                 use_cuda,
                 w2v=None,
                 score_fn=None,
                 weight=0.0,
                 pretrain_updates=0,
                 extend_vocab_size=0,
                 device_ids=None):
        super(seq2seq, self).__init__()
        if w2v is not None:
            src_embedding = w2v['src_emb']
            tgt_embedding = w2v['tgt_emb']
        else:
            src_embedding = None
            tgt_embedding = None

        if 'copy' in score_fn:
            build_encoder = models.copy_rnn_encoder
            build_decoder = models.copy_rnn_decoder
        else:
            build_encoder = models.rnn_encoder
            build_decoder = models.rnn_decoder

        self.encoder = build_encoder(config,
                                     src_vocab_size,
                                     embedding=src_embedding)
        if config.shared_vocab == False:
            self.decoder = build_decoder(config,
                                         tgt_vocab_size,
                                         embedding=tgt_embedding,
                                         score_fn=score_fn,
                                         extend_vocab_size=extend_vocab_size)
        else:
            self.decoder = build_decoder(config,
                                         tgt_vocab_size,
                                         embedding=self.encoder.embedding,
                                         score_fn=score_fn,
                                         extend_vocab_size=extend_vocab_size)
        #if len(device_ids) > 0:
        #    self.encoder = nn.DataParallel(self.encoder, device_ids=device_ids, dim=1)
        #    self.decoder = nn.DataParallel(self.decoder, device_ids=device_ids, dim=1)
        self.use_cuda = use_cuda
        self.src_vocab_size = src_vocab_size
        self.tgt_vocab_size = tgt_vocab_size
        self.config = config
        self.weight = weight
        if 'emb' in score_fn:
            self.criterion = models.criterion_emb(config.hidden_size,
                                                  tgt_vocab_size, use_cuda)
        elif 'copy' in score_fn:
            self.criterion = models.copy_criterion(use_cuda)
        else:
            self.criterion = models.criterion(tgt_vocab_size, use_cuda)
        self.log_softmax = nn.LogSoftmax()
コード例 #11
0
    def __init__(self,
                 config,
                 vocab,
                 use_cuda,
                 use_content=False,
                 pretrain=None):
        super(seq2seq, self).__init__()
        self.vocab = vocab
        self.vocab_size = vocab.voc_size
        if pretrain is not None:
            self.embedding = pretrain['emb']
        else:
            self.embedding = nn.Embedding(self.vocab_size, config.emb_size)
            if config.emb_file is not None:
                print(len(vocab._word2id))
                print('Loading embedding file: %s' % config.emb_file)
                embeddings = np.random.randn(self.vocab_size,
                                             config.emb_size) * 0.01
                pre_trained = 0
                i = 0
                #small = open('data/music/small_embedding.txt', 'w')
                count = 0

                #with open('data/music/small_embedding.txt', 'w') as small:
                for line in open(config.emb_file, 'r',
                                 encoding='utf-8').readlines():
                    count += 1
                    if count % 100000 == 0:
                        print(count)
                    sp = line.split()
                    if (len(sp) == config.emb_size + 1) and sp[0] in set(
                            vocab._id2word):
                        pre_trained += 1
                        embeddings[vocab._word2id[sp[0]]] = [
                            float(x) for x in sp[1:]
                        ]
                        #small.write(line)
                    else:
                        i += 1
                        #print(sp[0])
                #small.close()
                print("Number of len(sp)!=301     :", i)
                print('Pre-trained: %d (%.2f%%)' %
                      (pre_trained, pre_trained * 100.0 / len(vocab._word2id)))
                self.embedding.weight.data.copy_(torch.FloatTensor(embeddings))

        self.encoder = models.rnn_encoder(config,
                                          self.vocab_size,
                                          embedding=self.embedding)
        self.decoder = models.rnn_decoder(config,
                                          self.vocab_size,
                                          embedding=self.embedding)
        self.config = config
        self.use_content = use_content
        self.criterion = models.criterion(self.vocab_size, use_cuda)
        self.log_softmax = nn.LogSoftmax()
        self.tanh = nn.Tanh()
コード例 #12
0
ファイル: seq2seq.py プロジェクト: shincling/TDAAv2
    def __init__(self,
                 config,
                 input_emb_size,
                 mix_speech_len,
                 tgt_vocab_size,
                 use_cuda,
                 pretrain=None,
                 score_fn=''):
        super(seq2seq, self).__init__()
        if pretrain is not None:
            src_embedding = pretrain['src_emb']
            tgt_embedding = pretrain['tgt_emb']
        else:
            src_embedding = None
            tgt_embedding = None
        # self.encoder = models.rnn_encoder(config, input_emb_size, None, embedding=src_embedding)
        if config.is_two_channel:
            self.encoder = models.TransEncoder(config, 2 * input_emb_size)
        else:
            self.encoder = models.TransEncoder(config, input_emb_size)
        self.decoder = models.TransDecoder(config,
                                           sos_id=0,
                                           eos_id=tgt_vocab_size - 1,
                                           n_tgt_vocab=tgt_vocab_size)
        # if config.shared_vocab == False:
        #     self.decoder = models.rnn_decoder(config, tgt_vocab_size, embedding=tgt_embedding, score_fn=score_fn)
        # else:
        #     self.decoder = models.rnn_decoder(config, tgt_vocab_size, embedding=self.encoder.embedding,
        #                                       score_fn=score_fn)
        self.use_cuda = use_cuda
        self.tgt_vocab_size = tgt_vocab_size
        self.config = config
        self.criterion = models.criterion(tgt_vocab_size, use_cuda,
                                          config.loss)
        self.loss_for_ss = nn.MSELoss()
        self.log_softmax = nn.LogSoftmax()
        self.wav_loss = models.WaveLoss(dBscale=1,
                                        nfft=config.FRAME_LENGTH,
                                        hop_size=config.FRAME_SHIFT)

        speech_fre = input_emb_size
        num_labels = tgt_vocab_size
        if config.is_two_channel:
            self.separation_linear_real = nn.Linear(self.encoder.d_model,
                                                    2 * speech_fre)
            self.separation_linear_imag = nn.Linear(self.encoder.d_model,
                                                    2 * speech_fre)
        else:
            self.separation_linear = nn.Linear(self.encoder.d_model,
                                               2 * speech_fre)
        self.speech_fre = speech_fre
        self.dropout_layer = nn.Dropout(config.linear_dropout)
コード例 #13
0
 def __init__(self,
              config,
              vocab,
              use_cuda,
              use_copy,
              use_bert,
              word_level_model,
              graph_model,
              pretrain=None):
     super(graph2seq, self).__init__()
     self.word_level_model = word_level_model
     self.vocab = vocab
     self.vocab_size = vocab.voc_size
     if pretrain is not None:
         self.embedding = pretrain['emb']
     else:
         self.embedding = nn.Embedding(self.vocab_size, config.emb_size)
     # self.encoder = models.GCN_Encoder(config, self.vocab_size, embedding=self.embedding)
     self.use_copy = use_copy
     self.use_bert = use_bert
     if use_bert:
         self.bert_encoder = models.bert.BERT(
             config.head_num, config.decoder_hidden_size, config.dropout,
             config.decoder_hidden_size, self.vocab_size, config.num_layers,
             config.max_sentence_len)
     self.encoder = models.Memory_Network(config,
                                          self.vocab_size,
                                          word_level_model,
                                          graph_model,
                                          embedding=self.embedding)
     if use_copy:
         self.decoder = models.pointer_decoder(config,
                                               self.vocab_size,
                                               embedding=self.embedding)
     else:
         self.decoder = models.rnn_decoder(config,
                                           self.vocab_size,
                                           embedding=self.embedding,
                                           gru=False)
     self.state_wc = nn.Linear(
         config.decoder_hidden_size,
         config.decoder_hidden_size * config.num_layers)
     self.state_wh = nn.Linear(
         config.decoder_hidden_size,
         config.decoder_hidden_size * config.num_layers)
     self.tanh = nn.Tanh()
     self.config = config
     self.criterion = models.criterion(self.vocab_size, use_cuda)
     self.log_softmax = nn.LogSoftmax()
コード例 #14
0
def train(model, train_loader, optim, sch, num_epochs):
    avg_loss = []
    for epoch in range(num_epochs):
        if epoch == MAX_EPOCHS:
            print(LINESPLIT)
            print("Maximum allowed training epochs({}) reached".format(epoch))

        total_steps = len(train_loader)
        running_loss = 0.0

        for step, (feats, sns) in enumerate(train_loader):
            feats = feats.to(device)
            sns = sns.to(device)

            optim.zero_grad()

            outputs = model(feats.float())
            loss = torch.sqrt(
                models.criterion(outputs.squeeze(1), sns.float()) + eps)

            loss.backward()
            optim.step()

            running_loss += loss.item()

        avg_loss.append(running_loss / total_steps)

        sch.step(running_loss / total_steps)

        if (epoch + 1) % SAVE_FREQ == 0:
            torch.save(
                model.state_dict(),
                "{}_{}_{}.pth".format(modelfolder, model.__class__.__name__,
                                      epoch + 1))
            print(LINESPLIT)
            print("Model checkpoint saved as _{}_{}.pth".format(
                model.__class__.__name__, epoch + 1))
            print(LINESPLIT)

        if (epoch + 1) % PRINT_FREQ == 0:
            print("Epoch [{:4d}/{}] -> Loss: {:.4f}".format(
                epoch + 1, num_epochs, running_loss / total_steps))
            running_loss = 0.0

    return avg_loss
コード例 #15
0
    def __init__(self,
                 config,
                 input_emb_size,
                 mix_speech_len,
                 tgt_vocab_size,
                 use_cuda,
                 pretrain=None,
                 score_fn=None):
        super(seq2seq, self).__init__()
        if pretrain is not None:
            src_embedding = pretrain['src_emb']
            tgt_embedding = pretrain['tgt_emb']
        else:
            src_embedding = None
            tgt_embedding = None
        self.encoder = models.rnn_encoder(config,
                                          input_emb_size,
                                          None,
                                          embedding=src_embedding)
        if config.shared_vocab == False:
            self.decoder = models.rnn_decoder(config,
                                              tgt_vocab_size,
                                              embedding=tgt_embedding,
                                              score_fn=score_fn)
        else:
            self.decoder = models.rnn_decoder(config,
                                              tgt_vocab_size,
                                              embedding=self.encoder.embedding,
                                              score_fn=score_fn)
        self.use_cuda = use_cuda
        self.tgt_vocab_size = tgt_vocab_size
        self.config = config
        self.criterion = models.criterion(tgt_vocab_size, use_cuda)
        self.loss_for_ss = nn.MSELoss()
        self.log_softmax = nn.LogSoftmax()

        speech_fre = input_emb_size
        num_labels = tgt_vocab_size
        self.ss_model = models.SS(config, speech_fre, mix_speech_len,
                                  num_labels)
コード例 #16
0
 def __init__(self,
              config,
              vocab,
              use_cuda,
              use_content=False,
              pretrain=None):
     super(seq2seq, self).__init__()
     self.vocab = vocab
     self.vocab_size = vocab.voc_size
     if pretrain is not None:
         self.embedding = pretrain['emb']
     else:
         self.embedding = nn.Embedding(self.vocab_size, config.emb_size)
     self.encoder = models.rnn_encoder(config,
                                       self.vocab_size,
                                       embedding=self.embedding)
     self.decoder = models.rnn_decoder(config,
                                       self.vocab_size,
                                       embedding=self.embedding)
     self.config = config
     self.use_content = use_content
     self.criterion = models.criterion(self.vocab_size, use_cuda)
     self.log_softmax = nn.LogSoftmax()
     self.tanh = nn.Tanh()
コード例 #17
0
ファイル: seq2seq.py プロジェクト: shincling/TDAAv2
    def __init__(self, config, input_emb_size, mix_speech_len, tgt_vocab_size, use_cuda, pretrain=None, score_fn=''):
        super(seq2seq_music, self).__init__()
        if pretrain is not None:
            src_embedding = pretrain['src_emb']
            tgt_embedding = pretrain['tgt_emb']
        else:
            src_embedding = None
            tgt_embedding = None
        self.use_cuda = use_cuda
        self.tgt_vocab_size = tgt_vocab_size
        self.config = config
        self.criterion = models.criterion(tgt_vocab_size, use_cuda,config.loss)
        self.loss_for_ss = nn.MSELoss()
        self.log_softmax = nn.LogSoftmax()
        self.wav_loss = models.WaveLoss(dBscale=1, nfft=config.FRAME_LENGTH, hop_size=config.FRAME_SHIFT)

        speech_fre = input_emb_size
        num_labels = tgt_vocab_size
        if config.use_tas:
            if self.config.use_dprnn:
                self.ss_model = models.FaSNet_base(config)
                self.spk_lstm = nn.LSTMCell(self.ss_model.B + self.ss_model.N, self.ss_model.B)  # LSTM over the speakers' step.
            else:
                self.ss_model = models.ConvTasNet_music(config)
                if self.config.two_stage:
                    self.second_ss_model = models.ConvTasNet_2nd(config)
                    for p in self.encoder.parameters():
                        p.requires_grad = False
                    for p in self.decoder.parameters():
                        p.requires_grad = False
                    for p in self.ss_model.parameters():
                        p.requires_grad = False
                self.spk_lstm = nn.LSTMCell(self.ss_model.B + self.ss_model.N, self.ss_model.B)  # LSTM over the speakers' step.
        else:
            # self.ss_model = models.SS_att(config, speech_fre, mix_speech_len, num_labels)
            self.ss_model = models.SS(config, speech_fre, mix_speech_len, num_labels)
コード例 #18
0
    def __init__(self,
                 config,
                 input_emb_size,
                 mix_speech_len,
                 tgt_vocab_size,
                 use_cuda,
                 pretrain=None,
                 score_fn=''):
        super(seq2seq, self).__init__()
        if pretrain is not None:
            src_embedding = pretrain['src_emb']
            tgt_embedding = pretrain['tgt_emb']
        else:
            src_embedding = None
            tgt_embedding = None
        # self.encoder = models.rnn_encoder(config, input_emb_size, None, embedding=src_embedding)
        self.encoder = models.TransEncoder(config, input_emb_size)
        self.decoder = models.TransDecoder(config,
                                           sos_id=0,
                                           eos_id=tgt_vocab_size - 1,
                                           n_tgt_vocab=tgt_vocab_size)
        # if config.shared_vocab == False:
        #     self.decoder = models.rnn_decoder(config, tgt_vocab_size, embedding=tgt_embedding, score_fn=score_fn)
        # else:
        #     self.decoder = models.rnn_decoder(config, tgt_vocab_size, embedding=self.encoder.embedding,
        #                                       score_fn=score_fn)
        self.use_cuda = use_cuda
        self.tgt_vocab_size = tgt_vocab_size
        self.config = config
        self.criterion = models.criterion(tgt_vocab_size, use_cuda,
                                          config.loss)
        self.loss_for_ss = nn.MSELoss()
        self.log_softmax = nn.LogSoftmax()
        self.wav_loss = models.WaveLoss(dBscale=1,
                                        nfft=config.FRAME_LENGTH,
                                        hop_size=config.FRAME_SHIFT)

        speech_fre = input_emb_size
        num_labels = tgt_vocab_size
        if config.use_tas:
            if self.config.use_dprnn:
                self.ss_model = models.FaSNet_base(config)
                if self.config.two_stage:
                    self.second_ss_model = models.FaSNet_base_2nd(config)
                    for p in self.encoder.parameters():
                        p.requires_grad = False
                    for p in self.decoder.parameters():
                        p.requires_grad = False
                    for p in self.ss_model.parameters():
                        p.requires_grad = False
            else:
                self.ss_model = models.ConvTasNet(config)
                if self.config.two_stage:
                    self.second_ss_model = models.ConvTasNet_2nd(config)
                    for p in self.encoder.parameters():
                        p.requires_grad = False
                    for p in self.decoder.parameters():
                        p.requires_grad = False
                    for p in self.ss_model.parameters():
                        p.requires_grad = False
        else:
            # self.ss_model = models.SS_att(config, speech_fre, mix_speech_len, num_labels)
            self.ss_model = models.SS(config, speech_fre, mix_speech_len,
                                      num_labels)
コード例 #19
0
def main(args):
    os.makedirs(args.checkpoint_dir, exist_ok=True)

    dataset, collate_fn, batch_frontend = build_dataset(args)

    model = build_model(args)

    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=args.batch_size,
                                              num_workers=args.num_workers,
                                              collate_fn=collate_fn,
                                              shuffle=True)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    loss_scale_consistency = args.gamma_regularization
    loss_scale_entropy = args.gamma_regularization

    start = time.time()
    iteration = 0
    for epoch in range(args.num_epochs):
        model.train()

        total_loss = 0

        for i, (frames_paths, frames, flow_paths,
                flow) in enumerate(data_loader):
            learning_rate = (args.learning_rate *
                             (iteration / args.warmup_steps)
                             if iteration < args.warmup_steps else
                             args.learning_rate) * (args.decay_rate**int(
                                 iteration / args.decay_steps))
            loss_scale_consistency = args.loss_scale_consistency * (
                args.gamma_regularization**int(iteration / args.decay_steps))
            loss_scale_entropy = args.loss_scale_entropy * (
                args.gamma_regularization**int(iteration / args.decay_steps))
            optimizer.param_groups[0]['lr'] = learning_rate

            breakpoint()
            flow = sample_frames_flow(flow)

            flow = batch_frontend(flow.to(args.device))

            recon_combined, recons, masks, slots, attn = model(flow)
            loss = models.criterion(
                recon_combined,
                masks,
                flow,
                loss_scale_reconstruction=args.loss_scale_reconstruction,
                loss_scale_consistency=loss_scale_consistency,
                loss_scale_entropy=loss_scale_entropy)
            loss_item = float(loss)
            total_loss += loss_item

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            print('Epoch:', epoch, '#', iteration, '|', i, '/',
                  len(data_loader), 'Loss:', loss_item)
            iteration += 1

        total_loss /= len(data_loader)

        print('Epoch:', epoch, 'Loss:', total_loss, 'Time:',
              datetime.timedelta(seconds=time.time() - start))

        if not epoch % args.checkpoint_epoch_interval:
            model_state_dict = model.module.state_dict() if isinstance(
                model, nn.DataParallel) else model.state_dict()
            torch.save(
                dict(model_state_dict=model_state_dict),
                os.path.join(args.checkpoint_dir,
                             args.checkpoint_pattern.format(epoch=epoch)))
コード例 #20
0
ファイル: tests.py プロジェクト: Fattyzqrrrrr621/REC2020
from django.test import TestCase

dataset = get_dataset()
testdata=[]
for item in dataset:
    if(item[0]==(4,2018,2)):
        for items in item:
            testdata.append(items)
testdata.remove((4,2018,2))

#获取最佳聚类数
maxk = 9 #所测试的最大聚类数
WSSarray = []  # 对聚类数为1,2,...maxk,分别存储其误差值,WSSarray的长度为maxk
for i in range(1,maxk+1):
    cluster, belong = Clustering(testdata, i)
    WSS = criterion(testdata,cluster,belong)
    WSSarray.append(WSS)
# print("不同聚类数对应的误差值所组成的数组为:",WSS)  # 输出误差数组

WSSDelta = list(np.ones(maxk))  # 获得误差数组的增量差,选择增量差最大的点对应的聚类数作为合适的聚类数
WSSDelta[0] = 0
maxDelta = -1
indexDelta = 0
for i in range(1,maxk):
    WSSDelta[i] = WSSarray[i-1] - WSSarray[i]
    if WSSDelta[i]>maxDelta:
        maxDelta = WSSDelta[i]
        indexDelta = i+1  # 较好的聚类数
# print("最大的误差差值为:",maxDelta)  # 输出最大误差
# print("误差差值对应的数组为:",SSEDelta)  # 输出误差差值
#print(WSSDelta)