def create(self, imageHeight, imageWidth, num_classes, evalFLAG):
        graph = tf.Graph()
        with graph.as_default():

            num_hidden = 256

            training = not evalFLAG

            with tf.name_scope('Inputs'):
                inputs = tf.placeholder(tf.float32,
                                        [None, imageHeight, imageWidth, 1],
                                        name='inputs')
                if evalFLAG:
                    tf.summary.image('inputs', inputs, max_outputs=1)

            #seq_len should be feed with a list containing the real width of the images before padding to obtain imageWidth
            seq_len = tf.placeholder(tf.int32, [None], name='seq_len')

            targets = tf.sparse_placeholder(tf.int32, name='targets')

            targets_len = tf.placeholder(tf.int32, name='targets_len')

            conv_keep_prob = 0.8
            lstm_keep_prob = 0.5

            # Layer 1
            with tf.name_scope('Layer_Conv_1'):
                h_conv1 = CNN(x=inputs,
                              filters=16,
                              kernel_size=[3, 3],
                              strides=[1, 1],
                              name='conv1',
                              activation=tf.nn.leaky_relu,
                              evalFLAG=evalFLAG,
                              initializer=tf.contrib.layers.xavier_initializer(
                                  uniform=False))
                h_pool1, seq_len_1, imageHeight, imageWidth = max_pool(
                    h_conv1, [2, 2], seq_len, imageHeight, imageWidth,
                    evalFLAG)
                h_pool1 = tf.layers.dropout(h_pool1,
                                            rate=0.0,
                                            training=training)

            # Layer 2
            with tf.name_scope('Layer_Conv_2'):
                h_conv2 = CNN(x=h_pool1,
                              filters=32,
                              kernel_size=[3, 3],
                              strides=[1, 1],
                              name='conv2',
                              activation=tf.nn.leaky_relu,
                              evalFLAG=evalFLAG,
                              initializer=tf.contrib.layers.xavier_initializer(
                                  uniform=False))
                h_pool2, seq_len_2, imageHeight, imageWidth = max_pool(
                    h_conv2, [2, 2], seq_len_1, imageHeight, imageWidth,
                    evalFLAG)
                h_pool2 = tf.layers.dropout(h_pool2,
                                            rate=(1 - conv_keep_prob),
                                            training=training)

            # Layer 3
            with tf.name_scope('Layer_Conv_3'):
                h_conv3 = CNN(x=h_pool2,
                              filters=48,
                              kernel_size=[3, 3],
                              strides=[1, 1],
                              name='conv3',
                              activation=tf.nn.leaky_relu,
                              evalFLAG=evalFLAG,
                              initializer=tf.contrib.layers.xavier_initializer(
                                  uniform=False))
                h_pool3, seq_len_3, imageHeight, imageWidth = max_pool(
                    h_conv3, [2, 2], seq_len_2, imageHeight, imageWidth,
                    evalFLAG)
                h_pool3 = tf.layers.dropout(h_pool3,
                                            rate=(1 - conv_keep_prob),
                                            training=training)

            # Layer 4
            with tf.name_scope('Layer_Conv_4'):
                h_conv4 = CNN(x=h_pool3,
                              filters=64,
                              kernel_size=[3, 3],
                              strides=[1, 1],
                              name='conv4',
                              activation=tf.nn.leaky_relu,
                              evalFLAG=evalFLAG,
                              initializer=tf.contrib.layers.xavier_initializer(
                                  uniform=False))
                h_pool4, seq_len_4, imageHeight, imageWidth = max_pool(
                    h_conv4, [1, 1], seq_len_3, imageHeight, imageWidth,
                    evalFLAG)
                h_pool4 = tf.layers.dropout(h_pool4,
                                            rate=(1 - conv_keep_prob),
                                            training=training)

            # Layer 5
            with tf.name_scope('Layer_Conv_5'):
                h_conv5 = CNN(x=h_pool4,
                              filters=80,
                              kernel_size=[3, 3],
                              strides=[1, 1],
                              name='conv5',
                              activation=tf.nn.leaky_relu,
                              evalFLAG=evalFLAG,
                              initializer=tf.contrib.layers.xavier_initializer(
                                  uniform=False))
                h_pool5, seq_len_5, imageHeight, imageWidth = max_pool(
                    h_conv5, [1, 1], seq_len_4, imageHeight, imageWidth,
                    evalFLAG)
                h_pool5 = tf.layers.dropout(h_pool5,
                                            rate=(1 - lstm_keep_prob),
                                            training=training)

            with tf.name_scope('Reshaping_step'):
                h_cw_concat = tf.transpose(h_pool5, (2, 0, 1, 3))
                h_cw_concat = tf.reshape(
                    h_cw_concat, (int(imageWidth), -1, int(imageHeight * 80)))
                h_cw_concat = tf.transpose(h_cw_concat, (1, 0, 2))

            with tf.name_scope('Layer_BLSTM_1'):

                h_bilstm1 = bidirectionalLSTM(h_cw_concat, num_hidden,
                                              seq_len_5, '1', evalFLAG)
                h_bilstm1 = tf.concat(h_bilstm1, 2)
                h_bilstm1 = tf.layers.dropout(h_bilstm1,
                                              rate=(1 - lstm_keep_prob),
                                              training=training)

            with tf.name_scope('Layer_BLSTM_2'):

                h_bilstm2 = bidirectionalLSTM(h_bilstm1, num_hidden, seq_len_5,
                                              '2', evalFLAG)
                h_bilstm2 = tf.concat(h_bilstm2, 2)
                h_bilstm2 = tf.layers.dropout(h_bilstm2,
                                              rate=(1 - lstm_keep_prob),
                                              training=training)

            with tf.name_scope('Layer_BLSTM_3'):

                h_bilstm3 = bidirectionalLSTM(h_bilstm2, num_hidden, seq_len_5,
                                              '3', evalFLAG)
                h_bilstm3 = tf.concat(h_bilstm3, 2)
                h_bilstm3 = tf.layers.dropout(h_bilstm3,
                                              rate=(1 - lstm_keep_prob),
                                              training=training)

            with tf.name_scope('Layer_BLSTM_4'):

                h_bilstm4 = bidirectionalLSTM(h_bilstm3, num_hidden, seq_len_5,
                                              '4', evalFLAG)
                h_bilstm4 = tf.concat(h_bilstm4, 2)
                h_bilstm4 = tf.layers.dropout(h_bilstm4,
                                              rate=(1 - lstm_keep_prob),
                                              training=training)

            with tf.name_scope('Layer_BLSTM_5'):

                h_bilstm5 = bidirectionalLSTM(h_bilstm4, num_hidden, seq_len_5,
                                              '5', evalFLAG)
                h_bilstm5 = tf.concat(h_bilstm5, 2)
                h_bilstm5 = tf.layers.dropout(h_bilstm5,
                                              rate=(1 - lstm_keep_prob),
                                              training=training)

            with tf.name_scope('Layer_Linear') as ns:
                outputs = tf.transpose(h_bilstm5, (1, 0, 2))
                outputs = tf.reshape(outputs, (-1, 2 * num_hidden))
                logits = FNN(outputs, num_classes, ns, None, evalFLAG)

            with tf.name_scope('Logits'):
                logits = tf.reshape(logits, (int(imageWidth), -1, num_classes))

            seq_len_5 = tf.maximum(seq_len_5, targets_len)

            n_batches = tf.placeholder(tf.float32, name='n_batches')
            previousCost = tf.placeholder(tf.float32, name='previous_cost')

            with tf.name_scope('CTC_Loss'):
                loss = tf.nn.ctc_loss(targets,
                                      logits,
                                      seq_len_5,
                                      preprocess_collapse_repeated=False,
                                      ctc_merge_repeated=True)
                with tf.name_scope('total'):
                    batch_cost = tf.reduce_mean(loss)
                    cost = batch_cost / n_batches + previousCost

            tf.summary.scalar('CTC_loss', cost)

            with tf.name_scope('train'):
                train_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES,
                    scope='Layer_Linear') + tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES,
                        scope='BLSTM[12345]') + tf.get_collection(
                            tf.GraphKeys.TRAINABLE_VARIABLES,
                            scope='conv[12345]')
                print(train_vars)
                learning_rate = tf.placeholder(tf.float32,
                                               name='learning_rate')
                optimizer = tf.train.AdamOptimizer(
                    learning_rate=learning_rate).minimize(batch_cost)

            with tf.name_scope('predictions'):
                predictions, log_prob = tf.nn.ctc_beam_search_decoder(
                    logits, seq_len_5, merge_repeated=False)

            with tf.name_scope('CER'):
                with tf.name_scope('Mean_CER_per_word'):
                    previousEDnorm = tf.placeholder(tf.float32,
                                                    name='previousEDnorm')
                    EDnorm = tf.reduce_mean(
                        tf.edit_distance(
                            tf.cast(predictions[0], tf.int32),
                            targets,
                            normalize=True)) / n_batches + previousEDnorm

                    if evalFLAG:
                        tf.summary.scalar('EDnorm', EDnorm)

                with tf.name_scope('Absolute_CER_total_set'):
                    setTotalChars = tf.placeholder(tf.float32,
                                                   name='setTotalChars')
                    previousEDabs = tf.placeholder(tf.float32,
                                                   name='previousEDabs')
                    errors = tf.edit_distance(tf.cast(predictions[0],
                                                      tf.int32),
                                              targets,
                                              normalize=False)
                    EDabs = tf.reduce_sum(
                        errors) / setTotalChars + previousEDabs
                    if evalFLAG:
                        tf.summary.scalar('EDabs', EDabs)

            ED = [EDnorm, EDabs]

            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=5,
                                   keep_checkpoint_every_n_hours=24)

            transferred_vars = tf.get_collection(
                tf.GraphKeys.GLOBAL_VARIABLES,
                scope="BLSTM[12345]") + tf.get_collection(
                    tf.GraphKeys.GLOBAL_VARIABLES, scope="conv")

            transferred_vars_dict = dict([(var.op.name, var)
                                          for var in transferred_vars])

            transfer_saver = tf.train.Saver(transferred_vars_dict)

            merged = tf.summary.merge_all()

            return graph, [
                saver, transfer_saver
            ], inputs, seq_len, targets, targets_len, learning_rate, n_batches, setTotalChars, previousEDabs, previousEDnorm, previousCost, optimizer, batch_cost, cost, errors, ED, predictions, merged
def main():
    print("bonjour")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    N_EPOCHS = 8
    WIN_SIZES = [3,4,5]
    BATCH_SIZE = 64
    EMB_DIM = 300
    OUT_DIM = 1
    L2_NORM_LIM = 3.0
    NUM_FIL = 100
    DROPOUT_PROB = 0.5
    V_STRATEGY = 'static'
    ALLOC_MEM = 4096

    if V_STRATEGY in ['rand', 'static', 'non-static']:
        NUM_CHA = 1
    else:
        NUM_CHA = 2

    # FILE paths
    W2V_PATH     = 'GoogleNews-vectors-negative300.bin'
    TRAIN_X_PATH = 'train_x.txt'
    TRAIN_Y_PATH = 'train_y.txt'
    VALID_X_PATH = 'valid_x.txt'
    VALID_Y_PATH = 'valid_y.txt'


    # Load pretrained embeddings
    pretrained_model = gensim.models.KeyedVectors.load_word2vec_format(W2V_PATH, binary=True)
    vocab = pretrained_model.wv.vocab.keys()
    w2v = pretrained_model.wv

    # Build dataset =======================================================================================================
    w2c = build_w2c(TRAIN_X_PATH, vocab=vocab)
    w2i, i2w = build_w2i(TRAIN_X_PATH, w2c, unk='unk')
    train_x, train_y = build_dataset(TRAIN_X_PATH, TRAIN_Y_PATH, w2i, unk='unk')
    valid_x, valid_y = build_dataset(VALID_X_PATH, VALID_Y_PATH, w2i, unk='unk')
    train_x, train_y = sort_data_by_length(train_x, train_y)
    valid_x, valid_y = sort_data_by_length(valid_x, valid_y)
    VOCAB_SIZE = len(w2i)
    print('VOCAB_SIZE:', VOCAB_SIZE)
    
    V_init = init_V(w2v, w2i)
    

    with open(os.path.join(RESULTS_DIR, './w2i.dump'), 'wb') as f_w2i, open(os.path.join(RESULTS_DIR, './i2w.dump'), 'wb') as f_i2w:
        pickle.dump(w2i, f_w2i)
        pickle.dump(i2w, f_i2w)

    # Build model =================================================================================
 
    model=CNN(VOCAB_SIZE, EMB_DIM, NUM_FIL, WIN_SIZES, OUT_DIM, 
                 DROPOUT_PROB, len(w2i))


    # Train model ================================================================================
   
    pretrained_embeddings = torch.tensor(V_init)
    model.embedding.weight.data.copy_(pretrained_embeddings)
    model.embedding.weight.data[len(w2i)-1] = torch.zeros(EMB_DIM)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.BCEWithLogitsLoss()
    model = model.to(device)    
    criterion = criterion.to(device)
    n_batches_train = int(len(train_x)/BATCH_SIZE)
    n_batches_valid = int(len(valid_x)/BATCH_SIZE)
    #print(len(train_x))
    
    best_valid_loss = float('inf')

    for j in range(N_EPOCHS):


        start_time = time.time()
        epoch_loss = 0
        epoch_acc = 0 
        epoch_loss = 0
        epoch_acc = 0
  
  
    
        for i in range(n_batches_train-1):
            start = i*BATCH_SIZE
            end = start+BATCH_SIZE      
            train_loss, train_acc = train(model,train_x[start:end],train_y[start:end], criterion,optimizer)
            end_time = time.time()
            epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        
            #if valid_loss < best_valid_loss:
             #   best_valid_loss = valid_loss
              #  torch.save(model.state_dict(), 'tut4-model.pt')
        
        for k in range(n_batches_valid-1):
            start = k*BATCH_SIZE
            end = start+BATCH_SIZE      
            valid_loss, valid_acc = evaluate(model,valid_x[start:end],valid_y[start:end], criterion,optimizer)
            end_time = time.time()
            epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        
        print(f'Epoch: {j } | Epoch Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

    torch.save(model.state_dict(), 'training.pt')
    return model