예제 #1
0
    def __init__(self, bar_embedder, context_size, 
                 enc_lstm_size, dec_lstm_size, 
                 enc_use_meta=False, dec_use_meta=False, compile_now=False):
        self.n_voices = 6


        prev_bars = [Input(shape=(None,), name="context_" + str(i)) 
                            for i in range(context_size)]
        
        if enc_use_meta or dec_use_meta:
            meta_cat = Input(shape=(None,), name="metaData")
        
        
        embeddings = [bar_embedder(pb) for pb in prev_bars]
        embed_size = bar_embedder.embedding_size

        
        
        if enc_use_meta:     
            embeddings = [Concat([emb, meta_cat]) for emb in embeddings]
            embed_size = bar_embedder.embedding_size + self.n_voices                  
        
        embeddings_stacked = Lambda(lambda ls: K.stack(ls, axis=1), 
                           output_shape=(context_size, 
                                         embed_size)
                           )(embeddings)
                        
        # encode        
        embeddings_processed = LSTM(enc_lstm_size)(embeddings_stacked)


        encoded_size = enc_lstm_size
        
        # decode
        if dec_use_meta:
            encoded_size += self.n_voices
            embeddings_processed = Concat([embeddings_processed, meta_cat])
        
            repeated = Lambda(self._repeat, output_shape=(None, encoded_size))\
                                ([prev_bars[0], embeddings_processed])

        decoded = LSTM(dec_lstm_size, 
                       return_sequences=True, name='dec_lstm')(repeated)

        preds = TimeDistributed(Dense(bar_embedder.vocab_size, activation='softmax'), 
                               name='softmax_layer')(decoded)
    
        if enc_use_meta or dec_use_meta:
            super().__init__(inputs=[*prev_bars, meta_cat], outputs=preds)  
        else:
            super().__init__(inputs=prev_bars, outputs=preds)  
            
        self.params = [context_size, enc_lstm_size, dec_lstm_size,
                       enc_use_meta, dec_use_meta]
        
        self.use_meta = enc_use_meta or dec_use_meta

        
        if compile_now:
            self.compile_default()
예제 #2
0
    def __init__(self, melody_encoder,
                 rhythm_embed_size, dec_lstm_size, V,
                 enc_use_meta=False, dec_use_meta=False,
                 compile_now=False):

        self.n_voices = 9
        self.use_meta = enc_use_meta or dec_use_meta
        m = melody_encoder.m

        prev_melodies = Input(shape=(None, m), name="contexts")
        bar_embedding = Input(shape=(rhythm_embed_size, ), name="bar_rhythm_embedded")
        if self.use_meta:
            meta_cat = Input(shape=(self.n_voices,), name="metaData")

        lead = Input(shape=(None, m), name="lead")
        lead_enc = self.get_lead_encoder(melody_encoder)


        #encode
        processed = melody_encoder(prev_melodies)
        processed_with_rhythms = Concat([processed, bar_embedding])

        # decode
        if dec_use_meta:
            processed_with_rhythms = Concat([processed_with_rhythms, meta_cat])


        lead_processed = lead_enc(lead)
        processed_with_lead = Concat([processed_with_rhythms, lead_processed])

        proc_repeated = RepeatVector(m)(processed_with_lead)

        lstm_outputs = LSTM(dec_lstm_size, return_sequences=True)(proc_repeated)

        preds = TimeDistributed(Dense(V, activation="softmax"))(lstm_outputs)


        self.params = [melody_encoder.params, rhythm_embed_size,
                       dec_lstm_size, V, enc_use_meta, dec_use_meta]

#        self.params = [V, rhythm_embed_size,
#                       dec_lstm_size,
#                       enc_use_meta, dec_use_meta]

        if self.use_meta:
            super().__init__(inputs=[prev_melodies, bar_embedding, 
                                     meta_cat, lead], 
                             outputs=preds, name=repr(self))
        else:
            super().__init__(inputs=[prev_melodies, bar_embedding, lead], 
                             outputs=preds, name=repr(self))

        self.encoder = melody_encoder

        if compile_now:
            self.compile_default()
예제 #3
0
    def __init__(self,
                 m,
                 V,
                 rhythm_embed_size,
                 conv_f,
                 conv_win_size,
                 enc_lstm_size,
                 dec_lstm_1_size,
                 dec_lstm_2_size,
                 meta_len,
                 compile_now=False):

        self.n_voices = 6

        prev_melodies = Input(shape=(None, m), name="contexts")
        bar_embedding = Input(shape=(rhythm_embed_size, ),
                              name="bar_rhythm_embedded")
        meta_data = Input(shape=(meta_len, ), name="metaData")
        meta_cat = Dense(self.n_voices, activation="softmax")(meta_data)

        #encode
        conved = Conv1D(filters=conv_f,
                        kernel_size=conv_win_size)(prev_melodies)
        meta_repeated = Lambda(self._repeat,
                               output_shape=(None, self.n_voices))(
                                   [conved, meta_cat])

        conved_with_meta = Concat([conved, meta_repeated], axis=-1)
        processed = LSTM(enc_lstm_size)(conved_with_meta)

        # decode
        proc_rhythm_concat = Concat([processed, bar_embedding])
        proc_repeated = RepeatVector(m)(proc_rhythm_concat)

        lstm1_outputs = LSTM(dec_lstm_1_size,
                             return_sequences=True)(proc_repeated)
        lstm2_outputs = LSTM(dec_lstm_2_size,
                             return_sequences=True)(lstm1_outputs)

        preds = TimeDistributed(Dense(V, activation="softmax"))(lstm2_outputs)

        super().__init__(inputs=[prev_melodies, bar_embedding, meta_data],
                         outputs=preds)

        self.params = [
            m, V, rhythm_embed_size, conv_f, conv_win_size, enc_lstm_size,
            dec_lstm_1_size, dec_lstm_2_size, meta_len
        ]

        if compile_now:
            self.compile_default()
예제 #4
0
    def __init__(self, melody_encoder, dense_size, V, compile_now=False):
        m = melody_encoder.m

        self.n_voices = 10

        root_note = Input(shape=(1, ), name="root_note")
        melody_context = Input(shape=(None, m), name="bar_melody")
        meta_embedded = Input(shape=(self.n_voices, ), name="meta_embedded")

        root_encoded = Dense(dense_size)(root_note)
        context_encoded = melody_encoder(melody_context)

        inputs_concat = Concat([root_encoded, context_encoded, meta_embedded])

        decoded = Dense(dense_size)(inputs_concat)

        preds = Dense(V, activation="softmax")(decoded)

        super().__init__(inputs=[root_note, melody_context, meta_embedded],
                         outputs=preds)

        self.params = [dense_size, V]
        self.melody_encoder = melody_encoder

        if compile_now:
            self.compile_default()
예제 #5
0
    def __init__(self, rhythm_encoder, dec_lstm_size, V,  
                 enc_use_meta=False, dec_use_meta=False, compile_now=False):
        self.n_voices = 8

        context_size = rhythm_encoder.context_size
        encoded_size = rhythm_encoder.encoding_size
        bar_embedder = rhythm_encoder.bar_embedder
        bar_embed_size = rhythm_encoder.bar_embedder.embedding_size


        prev_bars = [Input(shape=(None,), name="context_" + str(i)) 
                            for i in range(context_size)]
        
        if enc_use_meta or dec_use_meta:
            meta_cat = Input(shape=(None,), name="metaData")

        lead = Input(shape=(None, ), name="lead")
        lead_embedded = bar_embedder(lead)
                        
        # encode        
        embeddings_processed = rhythm_encoder(prev_bars)
        
        # decode
        if dec_use_meta:
            encoded_size += self.n_voices
            encoded_size += bar_embed_size
            embeddings_processed = Concat([embeddings_processed, meta_cat, lead_embedded])
        
        repeated = Lambda(self._repeat, output_shape=(None, encoded_size))\
                                ([prev_bars[0], embeddings_processed])

        decoded = LSTM(dec_lstm_size, 
                       return_sequences=True, name='dec_lstm')(repeated)

        preds = TimeDistributed(Dense(V, activation='softmax'), 
                               name='softmax_layer')(decoded)
    
    
        self.params = [rhythm_encoder.params, dec_lstm_size, V,
                       enc_use_meta, dec_use_meta]
    
#        self.params = [V, context_size, dec_lstm_size,
#                       enc_use_meta, dec_use_meta]
        
        self.use_meta = enc_use_meta or dec_use_meta
    
    
        if enc_use_meta or dec_use_meta:
            super().__init__(inputs=[*prev_bars, meta_cat, lead], outputs=preds,
                 name=repr(self))  
        else:
            super().__init__(inputs=[prev_bars, lead], outputs=preds,
                 name=repr(self))  
        
        if compile_now:
            self.compile_default()
예제 #6
0
def inception_3b(input_t, fc1, fc2_in, fc2_out, fc3_in, fc3_out, fc4_out):
    
    layer_1x1 = Conv2D(fc1, (1,1), padding='same', activation='relu')(input_t)
    
    layer_3x3 = Conv2D(fc2_in, (1,1), padding='same', activation='relu')(input_t)
    layer_3x3 = Conv2D(fc2_out, (3,3), padding='same', activation='relu')(layer_3x3)
    
    layer_5x5 = Conv2D(fc3_in, (1,1), padding='same', activation='relu')(input_t)
    layer_5x5 = Conv2D(fc3_out, (5,5), padding='same', activation='relu')(layer_5x5)
    
    layer_pool = MaxPooling2D((3,3), strides=(1,1), padding='same')(input_t)
    layer_pool = Conv2D(fc4_out, (1,1), padding='same', activation='relu')(layer_pool)
    
    output = Concat([layer_1x1, layer_3x3, layer_5x5, layer_pool], axis = -1)
    return output
예제 #7
0
    def __init__(self, bar_embedder, context_size, 
                 enc_lstm_size, dec_lstm_size, meta_len, compile_now=False):
        self.num_calls = 0
        self.n_voices = 6


        prev_bars = [Input(shape=(None,), name="context_" + str(i)) 
                            for i in range(context_size)]
        meta_data = Input(shape=(meta_len,), name="metaData")
        meta_cat = Dense(self.n_voices, activation="softmax")(meta_data)
        
        # embed
        embeddings_with_meta = [Concat([bar_embedder(pb), meta_cat]) for pb in prev_bars]
        embeddings_stacked = Lambda(lambda ls: K.stack(ls, axis=1), 
                           output_shape=(context_size, 
                                         bar_embedder.embedding_size + self.n_voices)
                           )(embeddings_with_meta)
                        
        # encode        
        embeddings_processed = LSTM(enc_lstm_size)(embeddings_stacked)


        # decode        
        repeated = Lambda(self._repeat, output_shape=(None, enc_lstm_size))\
                            ([prev_bars[0], embeddings_processed])

        decoded = LSTM(dec_lstm_size, 
                       return_sequences=True, name='dec_lstm')(repeated)

        pred = TimeDistributed(Dense(bar_embedder.vocab_size, activation='softmax'), 
                               name='softmax_layer')(decoded)
    
        super().__init__(inputs=[*prev_bars, meta_data], outputs=pred)  
        
        self.params = [context_size, enc_lstm_size, dec_lstm_size, meta_len]
        
        if compile_now:
            self.compile_default()
예제 #8
0
x13 = Conv2D(32, 3, activation="relu", padding="same")(x12)
x1 = Dropout(0.15 * dropout)(x13)

x21 = Conv2D(8, 5, activation="relu", padding="same")(normal)
xmp2 = MaxPooling2D(pool_size=(1, 2))(x21)
x22 = Conv2D(16, 5, activation="relu", padding="same")(xmp2)
x23 = Conv2D(32, 5, activation="relu", padding="same")(x22)
x2 = Dropout(0.15 * dropout)(x23)

x31 = Conv2D(8, 5, activation="relu", padding="same")(normal)
xmp3 = MaxPooling2D(pool_size=(1, 2))(x31)
x32 = Conv2D(16, 3, activation="relu", padding="same")(xmp3)
x33 = Conv2D(32, 1, activation="relu", padding="same")(x32)
x3 = Dropout(0.15 * dropout)(x33)

inception = Concat([x1, x2, x3], axis=3)

x = Conv2D(32, 6, activation="relu", padding="same")(inception)
x = MaxPooling2D(pool_size=(1, 2))(x)
x = Dropout(0.3 * dropout)(x)

x = Conv2D(32, 5, activation="relu", padding="same")(x)
x = Dropout(0.3 * dropout)(x)

x = Conv2D(32, 4, activation="relu", padding="same")(x)
x = MaxPooling2D(pool_size=(1, 2))(x)
x = Dropout(0.3 * dropout)(x)

x = Conv2D(32, 3, activation="relu", padding="same")(x)
x = Dropout(0.3 * dropout)(x)