# tok_neg_test, tok_pos_test = parallel_run(parse_tokens, test_neg), parallel_run(parse_tokens, test_pos)
    test['paragraph_neg'], test['paragraph_pos'] = parallel_run(parse_paragraph, test_neg), parallel_run(parse_paragraph, test_pos)


    # -- parameters to tune and set
    WORDS_PER_SENTENCE = 50
    SENTENCES_PER_PARAGRAPH = 50
    PREPEND = False
 
    log('normalizing training inputs...')

    log('  --> building local word vector representation')
    train_repr = normalize_sos(
                            [
                                normalize_sos(review, WORDS_PER_SENTENCE, prepend=PREPEND) 
                                for review in gb.get_indices(train['paragraph_pos'] + train['paragraph_neg'])
                            ], 
            SENTENCES_PER_PARAGRAPH, [0] * WORDS_PER_SENTENCE, PREPEND
        )

    train_text = np.array(train_repr)

    log('  --> building global word vector representation')
    global_train_repr = normalize_sos(
                            [
                                normalize_sos(review, WORDS_PER_SENTENCE, prepend=PREPEND) 
                                for review in global_gb.get_indices(train['paragraph_pos'] + train['paragraph_neg'])
                            ], 
            SENTENCES_PER_PARAGRAPH, [0] * WORDS_PER_SENTENCE, PREPEND
        )
    train_reviews = train_reviews[:NUM_TRAIN_REVIEWS]
    train_labels.extend(dev_labels)
    train_labels = train_labels[:NUM_TRAIN_REVIEWS]

    test_reviews = test_reviews[:NUM_TEST_REVIEWS]
    test_labels = test_labels[:NUM_TEST_REVIEWS]

    log('Splitting training data into paragraphs')
    train_text_sentences = parallel_run(parse_paragraph, train_reviews)
    test_text_sentences = parallel_run(parse_paragraph, test_reviews)

    log('normalizing training inputs...')
    train_repr = normalize_sos(
                        [
                                normalize_sos(review, WORDS_PER_SENTENCE)
                                for review in gb.get_indices(train_text_sentences)
                        ],
            SENTENCES_PER_PARAGRAPH, [0] * WORDS_PER_SENTENCE
        )

    train_text = np.array(train_repr)

    log('normalizing testing inputs...')
    test_repr = normalize_sos(
                        [
                            normalize_sos(review, WORDS_PER_SENTENCE)
                            for review in gb.get_indices(test_text_sentences)
                        ],
        SENTENCES_PER_PARAGRAPH, [0] * WORDS_PER_SENTENCE
    )