def get_trained_model(c, corpus, dictionary, non_zero_indices):
    model = LogisticRegression(penalty='l1',  tol=0.0001, C=c, fit_intercept=True, \
        intercept_scaling=1, solver='liblinear', max_iter=args.epochs, multi_class='ovr', verbose=1)
    train_sentences1, train_labels = helper.batch_to_one_hot_encoded(
        corpus.data, dictionary, non_zero_indices=non_zero_indices)
    model.fit(train_sentences1, train_labels)
    return model
def eval_routine(corpus, dictionary, model, non_zero_indices=None):
    print('one hot encoding...')
    eval_sentences, eval_labels = helper.batch_to_one_hot_encoded(corpus.data, dictionary, non_zero_indices = non_zero_indices)
    if not non_zero_indices:
        print('Creating CSR sparsing...')
        eval_sentences = csr_matrix(eval_sentences)
    print('Testing...')
    acc = model.score(eval_sentences, eval_labels)
    print(' Accurcay: ', acc)
    return acc
def get_trained_model2(c, corpus, dictionary, non_zero_indices):
    model = LogisticRegression(penalty='l1',  tol=0.0001, C=c, fit_intercept=True, \
        intercept_scaling=1, solver='liblinear', max_iter=args.epochs, multi_class='ovr', verbose=0)
    train_batches = helper.batchify(corpus.data, args.batch_size)
    print('number of train batches = ', len(train_batches))

    num_batches = len(train_batches)
    n_correct, n_total = 0, 0
    for batch_no in range(1, num_batches + 1):
        if batch_no % 500 == 0:
            print(' training batch: ', batch_no, ' of ', num_batches,
                  ' percentage: ', batch_no / num_batches)
        train_sentences1, train_labels = helper.batch_to_one_hot_encoded(
            train_batches[batch_no - 1],
            dictionary,
            non_zero_indices=non_zero_indices)
        model.fit(train_sentences1, train_labels)
    return model
def eval_routine(corpus, dictionary, model, non_zero_indices=None):
    nexamples = len(corpus.data)
    dev_batches = helper.batchify(corpus.data, args.batch_size)
    print('number of train batches = ', len(dev_batches))
    total_acc = 0.0
    correct = 0.0

    num_batches = len(dev_batches)
    n_correct, n_total = 0, 0
    for batch_no in range(1, num_batches + 1):
        if batch_no % 500 == 0:
            print(' validation batch: ', batch_no, ' of ', num_batches,
                  ' percentage: ', batch_no / num_batches)
        eval_sentences, eval_labels = helper.batch_to_one_hot_encoded(
            dev_batches[batch_no - 1],
            dictionary,
            non_zero_indices=non_zero_indices)
        acc = model.score(eval_sentences, eval_labels)
        correct += acc * len(eval_labels)
        total_acc += acc
        # if batch_no%500 == 0 :print(' for this minibatch score: ', acc, ' correct: ', acc*len(eval_labels), ' of ', len(eval_labels), 'total accc: ', total_acc, ' total correct: ', correct)
    print(' Correct: ', correct, ' acc: ', correct / nexamples,
          ' sanity check: ', total_acc / num_batches)
    return correct / nexamples


# save the model to disk
filename = args.task+'_L1_model.pcl'
best_acc = 0
i=0
bc = -1


# cs = 0.02, and 0.2 for sst  for BCN and LSTM (or the otehr sequence)
cs = [] # for IMDB 


print('one hot encoding...')
train_sentences1, train_labels = helper.batch_to_one_hot_encoded(train_corpus.data, dictionary, non_zero_indices = [])
print('Training...')

# import pdb
# pdb.set_trace()

# for c in cs:
while (i<1):
    # c = numpy.random.uniform(args.c-5, args.c+5)
    c = args.c#2.5#numpy.random.uniform(0.7785, 0.7785)
    print('training model with c: ', c, ' in iter: ', i+1, "/", len(cs))
    i+=1
    
    model = get_trained_model(c, train_sentences1, train_labels, dictionary, non_zero_indices=[])
    # print("==="*20, "\nC: ", c, "\n", "=="*20)