Ejemplo n.º 1
0
    def __init__(self):
        super(UNet, self).__init__()
        self.enc1 = Encoder(3, 128)
        self.enc2 = Encoder(128, 256)
        self.enc3 = Encoder(256, 512)
        self.enc4 = Encoder(512, 1024)

        self.conv1 = nn.Conv2d(1024, 2048, 3, padding=1)
        self.conv2 = nn.Conv2d(2048, 2048, 3, padding=1)
        self.t_conv = nn.ConvTranspose2d(2048, 1024, 2, stride=2)

        self.dec4 = Decoder(2048, 1024)
        self.dec3 = Decoder(1024, 512)
        self.dec2 = Decoder(512, 256)
        self.dec1 = Decoder(256, 128, is_final=True)
Ejemplo n.º 2
0
def load():
    global session
    global graph
    global model
    global data_result

    data_result = DataResult(None, None)

    with open(script_dir + '/../temp/processed_data.json', 'r') as output:
        json_data = json.load(output)
        data_result.loadJSON(json_data)

    graph = Graph()
    with graph.as_default():
        session = Session(graph=graph)
        with session.as_default():
            temp_encoder = Encoder(data_result.input_data)
            temp_decoder = Decoder(data_result.output_data, temp_encoder)
            temp_model = Model([temp_encoder.inputs, temp_decoder.inputs],
                               temp_decoder.outputs)
            temp_model.compile(optimizer='rmsprop',
                               loss='categorical_crossentropy')
            temp_model.load_weights(
                os.path.dirname(__file__) + '/../model_weights.h5')

            model = temp_model
Ejemplo n.º 3
0
    def __init__(self, mc):

        #  d_input: int,
        #  d_model: int,
        #  d_output: int,
        #  q: int,
        #  v: int,
        #  h: int,
        #  N: int,
        #  attention_size: int = None,
        #  dropout: float = 0.3,
        #  chunk_mode: str = 'chunk',
        #  pe: str = None,
        #  pe_period: int = 24):a
        """Create transformer structure from Encoder and Decoder blocks."""
        super().__init__()

        self.mc = mc
        self._d_model = mc.d_model

        self.layers_encoding = nn.ModuleList([
            Encoder(mc.d_model,
                    mc.q,
                    mc.v,
                    mc.h,
                    attention_size=mc.attention_size,
                    dropout=mc.dropout,
                    chunk_mode=mc.chunk_mode) for _ in range(mc.N)
        ])
        self.layers_decoding = nn.ModuleList([
            Decoder(mc.d_model,
                    mc.q,
                    mc.v,
                    mc.h,
                    attention_size=mc.attention_size,
                    dropout=mc.dropout,
                    chunk_mode=mc.chunk_mode) for _ in range(mc.N)
        ])

        self._embedding = nn.Linear(mc.d_input, mc.d_model)
        self._linear = nn.Linear(mc.d_model, mc.d_output)

        pe_functions = {
            'original': generate_original_PE,
            'regular': generate_regular_PE,
        }

        if mc.pe in pe_functions.keys():
            self._generate_PE = pe_functions[mc.pe]
            self._pe_period = mc.pe_period
        elif mc.pe is None:
            self._generate_PE = None
        else:
            raise NameError(
                f'PE "{mc.pe}" not understood. Must be one of {", ".join(pe_functions.keys())} or None.'
            )

        self.name = 'transformer'
Ejemplo n.º 4
0
def main():
    G = Generator(z_dim=20)
    D = Discriminator(z_dim=20)
    E = Encoder(z_dim=20)
    G.apply(weights_init)
    D.apply(weights_init)
    E.apply(weights_init)

    train_img_list=make_datapath_list(num=200)
    mean = (0.5,)
    std = (0.5,)
    train_dataset = GAN_Img_Dataset(file_list=train_img_list, transform=ImageTransform(mean, std))

    batch_size = 64
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    num_epochs = 1500
    G_update, D_update, E_update = train_model(G, D, E, dataloader=train_dataloader, num_epochs=num_epochs, save_model_name='Efficient_GAN')
    def __init__(self, data_result, model=None):
        self.data_result = data_result

        if (model == None):
            temp_encoder = Encoder(self.data_result.input_data)
            temp_decoder = Decoder(self.data_result.output_data, temp_encoder)
            temp_model = Model([temp_encoder.inputs, temp_decoder.inputs],
                               temp_decoder.outputs)
            temp_model.compile(optimizer='rmsprop',
                               loss='categorical_crossentropy')
            temp_model.load_weights(
                os.path.dirname(__file__) + '/../model_weights.h5')
            self.model = temp_model
        else:
            self.model = model

        self.input_token_index = dict([
            (char, i)
            for i, char in enumerate(self.data_result.input_data.chars)
        ])
        self.target_token_index = dict([
            (char, i)
            for i, char in enumerate(self.data_result.output_data.chars)
        ])

        self.encoder_inputs = self.model.input[0]  # input_1
        self.encoder_outputs, state_h_enc, state_c_enc = self.model.layers[
            2].output  # lstm_1
        self.encoder_states = [state_h_enc, state_c_enc]
        self.encoder_model = Model(self.encoder_inputs, self.encoder_states)

        self.decoder_inputs = self.model.input[1]  # input_2
        self.decoder_state_input_h = Input(shape=(UNIT_SIZE, ), name='input_3')
        self.decoder_state_input_c = Input(shape=(UNIT_SIZE, ), name='input_4')
        self.decoder_states_inputs = [
            self.decoder_state_input_h, self.decoder_state_input_c
        ]
        self.decoder_lstm = self.model.layers[3]
        self.decoder_outputs, self.state_h_dec, self.state_c_dec = self.decoder_lstm(
            self.decoder_inputs, initial_state=self.decoder_states_inputs)
        self.decoder_states = [self.state_h_dec, self.state_c_dec]
        self.decoder_dense = self.model.layers[4]
        self.decoder_outputs = self.decoder_dense(self.decoder_outputs)
        self.decoder_model = Model(
            [self.decoder_inputs] + self.decoder_states_inputs,
            [self.decoder_outputs] + self.decoder_states)

        # Reverse-lookup token index to decode sequences back to
        # something readable.
        self.reverse_input_char_index = dict(
            (i, char) for char, i in self.input_token_index.items())
        self.reverse_target_char_index = dict(
            (i, char) for char, i in self.target_token_index.items())
Ejemplo n.º 6
0
    def __init__(self, args, device):
        super().__init__()
        self.args = args

        self.model_infos = args.model_infos
        self.latent_dim = self.model_infos["encoder_dims"][-1]
        self.latent_sample_dim = self.model_infos["latent_dim"]

        self.encoder = Encoder(self.args)
        self.decoder = Decoder(self.args)

        self.optimizer = optim.Adam(self.parameters(),
                                    lr=self.model_infos["learning_rate"])
        self.device = device
        self.mse_loss = nn.MSELoss()
Ejemplo n.º 7
0
 def __init__(self,
              dictionary_size=100,
              embedding_dim=1100,
              rnn_hidden_size=600,
              rnn_num_layers=2,
              z_dim=1100):  #Does embedding_dim should be the same as z_dim?
     super(ParaphraseModel, self).__init__()
     self.embedding = nn.Embedding(
         dictionary_size,
         embedding_dim)  #should be replaced in word embedding like word2vec
     self.encoder = Encoder(embedding_dim, rnn_hidden_size, rnn_num_layers,
                            z_dim)
     self.decoder = Decoder(embedding_dim, rnn_hidden_size, rnn_num_layers,
                            dictionary_size)
     self.cel = nn.CrossEntropyLoss(ignore_index=-1)  #cross entrpoy
     self.dictionary_size = dictionary_size
     self.embedding_dim = embedding_dim
Ejemplo n.º 8
0
    print(f"Num of GPUs: {torch.cuda.device_count()}")

    if device.type == 'cuda':
        print(f"GPU tagger: {torch.cuda.current_device()}")
        print(f"GPU model: {torch.cuda.get_device_name(0)}")
        torch.cuda.empty_cache()

    print(f'Working on {str(device).upper()}')

    '''Initializations'''
    # Initialize and load dataset
    train_loader, valid_loader = initialize_data(args)

    # Initialize models
    encoder = Encoder(num_nodes=args.num_nodes, node_size=args.inputNodeSize,
                      latent_node_size=args.latentNodeSize, num_hidden_node_layers=args.num_hiddenNodeLayers,
                      hidden_edge_size=args.hiddenEdgeSize, output_edge_size=args.outputEdgeSize, num_mps=args.num_mps,
                      dropout=args.dropout, alpha=args.alpha, intensity=args.intensity, batch_norm=args.batch_norm, device=device).to(device)

    decoder = Decoder(num_nodes=args.num_nodes, node_size=args.inputNodeSize,
                      latent_node_size=args.latentNodeSize, num_hidden_node_layers=args.num_hiddenNodeLayers,
                      hidden_edge_size=args.hiddenEdgeSize, output_edge_size=args.outputEdgeSize, num_mps=args.num_mps,
                      dropout=args.dropout, alpha=args.alpha, intensity=args.intensity, batch_norm=args.batch_norm, device=device).to(device)

    # Both on gpu
    if (next(encoder.parameters()).is_cuda and next(encoder.parameters()).is_cuda):
        print('The models are initialized on GPU...')
    # One on cpu and the other on gpu
    elif (next(encoder.parameters()).is_cuda or next(encoder.parameters()).is_cuda):
        raise AssertionError("The encoder and decoder are not trained on the same device!")
    # Both on cpu
    else:
Ejemplo n.º 9
0
if (LIMIT_GPU_USAGE):
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    ktf.set_session(get_session())
# Process the dataset
print('STARTING: loading_data')
data_result = DataResult(None, None)

with open(script_dir + './temp/processed_data.json', 'r') as output:
    json_data = json.load(output)
    data_result.loadJSON(json_data)
print('END: loading_data')
print('')

# Create the encoder
print('STARTING: create encoder')
encoder = Encoder(data_result.input_data)
print('END: create encoder')
print('')

# Create the decoder
print('STARTING: create decoder')
decoder = Decoder(data_result.output_data, encoder)
print('STARTING: create decoder')
print('')

# Create the model
print('STARTING: create model')
model = Model([encoder.inputs, decoder.inputs], decoder.outputs)
print('END: create model')
print('')
Ejemplo n.º 10
0
def main():

    global char2index
    global index2char
    global SOS_token
    global EOS_token
    global PAD_token

    parser = argparse.ArgumentParser(description='Speech hackathon Baseline')
    parser.add_argument('--hidden_size', type=int, default=512, help='hidden size of model (default: 256)')
    parser.add_argument('--layer_size', type=int, default=3, help='number of layers of model (default: 3)')
    parser.add_argument('--dropout', type=float, default=0.2, help='dropout rate in training (default: 0.2)')
    parser.add_argument('--bidirectional', action='store_true', help='use bidirectional RNN for encoder (default: False)')
    parser.add_argument('--use_attention', action='store_true', help='use attention between encoder-decoder (default: False)')
    parser.add_argument('--batch_size', type=int, default=32, help='batch size in training (default: 32)')
    parser.add_argument('--workers', type=int, default=4, help='number of workers in dataset loader (default: 4)')
    parser.add_argument('--max_epochs', type=int, default=10, help='number of max epochs in training (default: 10)')
    parser.add_argument('--lr', type=float, default=1e-04, help='learning rate (default: 0.0001)')
    parser.add_argument('--teacher_forcing', type=float, default=0.5, help='teacher forcing ratio in decoder (default: 0.5)')
    parser.add_argument('--max_len', type=int, default=80, help='maximum characters of sentence (default: 80)')
    parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)')
    parser.add_argument('--save_name', type=str, default='model', help='the name of model in nsml or local')
    parser.add_argument('--mode', type=str, default='train')
    parser.add_argument("--pause", type=int, default=0)
    parser.add_argument("--visdom", type=bool, default=False)
    
    # Encoder
    parser.add_argument('--d_input', default=80, type=int,
                    help='Dim of encoder input (before LFR)')
    parser.add_argument('--n_layers_enc', default=6, type=int,
                        help='Number of encoder stacks')
    parser.add_argument('--n_head', default=8, type=int,
                        help='Number of Multi Head Attention (MHA)')
    parser.add_argument('--d_k', default=64, type=int,
                        help='Dimension of key')
    parser.add_argument('--d_v', default=64, type=int,
                        help='Dimension of value')
    parser.add_argument('--d_model', default=512, type=int,
                        help='Dimension of model')
    parser.add_argument('--d_inner', default=2048, type=int,
                        help='Dimension of inner')
    parser.add_argument('--dropout', default=0.1, type=float,
                        help='Dropout rate')
    parser.add_argument('--pe_maxlen', default=5000, type=int,
                        help='Positional Encoding max len')
    # Decoder
    parser.add_argument('--d_word_vec', default=512, type=int,
                        help='Dim of decoder embedding')
    parser.add_argument('--n_layers_dec', default=6, type=int,
                        help='Number of decoder stacks')
    parser.add_argument('--tgt_emb_prj_weight_sharing', default=1, type=int,
                        help='share decoder embedding with decoder projection')



    args = parser.parse_args()

    char2index, index2char = label_loader.load_label('./data/hackathon.labels')
    SOS_token = char2index['<s>']
    EOS_token = char2index['</s>']
    PAD_token = char2index['_']

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device('cuda' if args.cuda else 'cpu')

    # N_FFT: defined in loader.py
    feature_size = 256 # log me sprctogram
    #feature_size = N_FFT / 2 + 1 # stft

    encoder = Encoder(args.d_input * args.LFR_m, args.n_layers_enc, args.n_head,
                      args.d_k, args.d_v, args.d_model, args.d_inner,
                      dropout=args.dropout, pe_maxlen=args.pe_maxlen)
    decoder = Decoder(SOS_token, EOS_token, len(char2index),
                      args.d_word_vec, args.n_layers_dec, args.n_head,
                      args.d_k, args.d_v, args.d_model, args.d_inner,
                      dropout=args.dropout,s
                      tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
                      pe_maxlen=args.pe_maxlen)

    model = Transformer(encoder, decoder)
    model.flatten_parameters()

    for param in model.parameters():
        param.data.uniform_(-0.08, 0.08)

    model = nn.DataParallel(model).to(device)

    optimizer = optim.Adam(model.module.parameters(), lr=args.lr)
    criterion = nn.CrossEntropyLoss(reduction='sum', ignore_index=PAD_token).to(device)

    bind_model(model, optimizer)

    if args.pause == 1:
        nsml.paused(scope=locals())

    if args.mode != "train":
        return

    data_list = os.path.join(DATASET_PATH, 'train_data', 'data_list.csv')
    wav_paths = list()
    script_paths = list()

    with open(data_list, 'r') as f:
        for line in f:
            # line: "aaa.wav,aaa.label"

            wav_path, script_path = line.strip().split(',')
            wav_paths.append(os.path.join(DATASET_PATH, 'train_data', wav_path))
            script_paths.append(os.path.join(DATASET_PATH, 'train_data', script_path))

    best_loss = 1e10
    best_cer = 1e10
    begin_epoch = 0

    # load all target scripts for reducing disk i/o
    target_path = os.path.join(DATASET_PATH, 'train_label')
    load_targets(target_path)

    train_batch_num, train_dataset_list, valid_dataset = split_dataset(args, wav_paths, script_paths, valid_ratio=0.05)

    logger.info('start')
    
    if args.visdom:
        train_visual = Visual(train_batch_num)
        eval_visual = Visual(1)

    train_begin = time.time()

    for epoch in range(begin_epoch, args.max_epochs):

        train_queue = queue.Queue(args.workers * 2)

        train_loader = MultiLoader(train_dataset_list, train_queue, args.batch_size, args.workers)
        train_loader.start()

        if args.visdom:
            train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing, train_visual)
        else:
            train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing)

        logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' % (epoch, train_loss, train_cer))

        train_loader.join()

        valid_queue = queue.Queue(args.workers * 2)
        valid_loader = BaseDataLoader(valid_dataset, valid_queue, args.batch_size, 0)
        valid_loader.start()

        if args.visdom:
            eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device, eval_visual)
        else:
            eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue, criterion, device)

        logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' % (epoch, eval_loss, eval_cer))

        valid_loader.join()

        nsml.report(False,
            step=epoch, train_epoch__loss=train_loss, train_epoch__cer=train_cer,
            eval__loss=eval_loss, eval__cer=eval_cer)

        best_loss_model = (eval_loss < best_loss)
        best_cer_model = (eval_cer < best_cer)
        nsml.save(args.save_name)

        if best_loss_model:
            nsml.save('best_loss')
            best_loss = eval_loss
        if best_cer_model:
            nsml.save('best_cer')
            best_cer = eval_cer
Ejemplo n.º 11
0
                        device=args.device,
                        is_train=True)
test_loader = DataLoader(train_inputs_vocab,
                         train_targets_vocab,
                         args.test_input_path,
                         None,
                         shuffle=False,
                         batch_size=args.batch_size,
                         device=args.device,
                         is_train=False)

###############################
# get models
###############################
encoder = Encoder(train_loader.train_inputs_vocab.word_counts,
                  args.encoder_embedded_size,
                  args.encoder_hidden_size).to(args.device)
decoder = Decoder(train_loader.train_targets_vocab.word_counts,
                  args.decoder_embedded_size, args.decoder_hidden_size,
                  train_loader.SOS_IDX, train_loader.EOS_IDX,
                  args.teacher_forcing_ratio, args.device).to(args.device)
seq2seq = Seq2Seq(encoder, decoder, args.device)

###############################
# get optimizer
###############################
optimizer = torch.optim.Adam(seq2seq.parameters(), lr=args.learning_rate)

###############################
# check direcotories exist
###############################
Ejemplo n.º 12
0
    def __init__(self,
                 enc_in,
                 input_length,
                 c_out,
                 d_model=512,
                 attention_layer_types=["Triangular"],
                 embedd_kernel_size=3,
                 forward_kernel_size=1,
                 value_kernel_size=1,
                 causal_kernel_size=3,
                 d_ff=None,
                 n_heads=8,
                 e_layers=3,
                 dropout=0.1,
                 norm="batch",
                 se_block=False,
                 activation='relu',
                 output_attention=True):
        """
        enc_in : 输入给encoder的channel数,也就是最开始的channel数, 这个通过dataloader获得
        input_length: 数据的原始长度,最后预测的时候要用,也要从dataloader获得
        c_out : 最后的输出层,这里应该等于原始输入长度
        --------------------------- TODO 是加还是concant??---------------------
        d_model:每一层encoding的数量 ,这个数基本不变,因为在transofomer 中的相加 residual, d_model 就不变化
        attention_layer_types 一个list 包含那些attention的类型       
        n_heads 总共attention多少个头,目前大概是三的倍数
        e_layers: 多少层encoder
        
        
        """
        super(TStransformer, self).__init__()

        self.enc_in = enc_in
        self.d_model = d_model
        self.embedd_kernel_size = embedd_kernel_size
        self.dropout = dropout

        self.attention_layer_types = attention_layer_types
        self.n_heads = n_heads
        self.d_ff = d_ff
        self.activation = activation
        self.forward_kernel_size = forward_kernel_size
        self.value_kernel_size = value_kernel_size
        self.causal_kernel_size = causal_kernel_size
        self.norm = norm
        self.output_attention = output_attention
        self.se_block = se_block
        self.e_layers = e_layers

        self.input_length = input_length
        self.c_out = c_out
        # Encoding

        self.enc_embedding = DataEmbedding(
            c_in=enc_in,
            d_model=d_model,
            embedd_kernel_size=embedd_kernel_size,
            dropout=dropout).double()

        # Encoder
        self.encoder = Encoder([
            EncoderLayer(attention_layer_types=self.attention_layer_types,
                         d_model=self.d_model,
                         n_heads=self.n_heads,
                         d_ff=self.d_ff,
                         dropout=self.dropout,
                         activation=self.activation,
                         forward_kernel_size=self.forward_kernel_size,
                         value_kernel_size=self.value_kernel_size,
                         causal_kernel_size=self.causal_kernel_size,
                         output_attention=self.output_attention,
                         norm=self.norm,
                         se_block=self.se_block) for l in range(self.e_layers)
        ]).double()
        # 这里的输出是 (B, L, d_model)
        # self.decoder = ??????????????

        self.donwconv1 = nn.Conv1d(
            in_channels=d_model, out_channels=int(d_model / 2),
            kernel_size=3).double()  # 这里是因为最后输出是每一个时间步骤为1
        self.activation1 = F.relu
        self.norm1 = nn.BatchNorm1d(int(d_model / 2))

        self.donwconv2 = nn.Conv1d(
            in_channels=int(d_model / 2), out_channels=1,
            kernel_size=3).double()  # 这里是因为最后输出是每一个时间步骤为1
        self.activation2 = F.relu
        self.norm2 = nn.BatchNorm1d(1)

        self.predict = nn.Linear(
            in_features=self.input_length,
            out_features=self.c_out).double()  #这里可以换成kernel比较大的conv1d
Ejemplo n.º 13
0
 def __init__(self, embedding_size, hidden_size, vocab_size, gru_layers):
     super(EncoderDecoder, self).__init__()
     self.encoder = Encoder(embedding_size)
     self.decoder = Decoder(embedding_size, hidden_size, vocab_size,
                            gru_layers)
Ejemplo n.º 14
0
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k
        if name.startswith('module.'):
            name = name[7:]  # remove 'module.' of dataparallel
        new_state_dict[name] = v
    return new_state_dict


#torch.manual_seed(44)
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

device = "cuda" if torch.cuda.is_available() else "cpu"
G = Generator(z_dim=20)
D = Discriminator(z_dim=20)
E = Encoder(z_dim=20)
'''-------load weights-------'''
G_load_weights = torch.load('./checkpoints/G_Efficient_GAN_1500.pth')
G.load_state_dict(fix_model_state_dict(G_load_weights))

D_load_weights = torch.load('./checkpoints/D_Efficient_GAN_1500.pth')
D.load_state_dict(fix_model_state_dict(D_load_weights))

E_load_weights = torch.load('./checkpoints/E_Efficient_GAN_1500.pth')
E.load_state_dict(fix_model_state_dict(E_load_weights))

G.to(device)
D.to(device)
E.to(device)
"""use GPU in parallel"""
if device == 'cuda':