def main(pred_path, gold_path):

    pred_corpus = Corpus(pred_path)
    train_corpus = Corpus(gold_path)

    Nb = NaiveBayes(train_corpus, pred_corpus)
    train_start_timestamp = time.time()
    Nb.train(train_corpus)

    train_elapsed = time.time() - train_start_timestamp

    i = 0
    for tweet in pred_corpus:
        if i % 1000 == 0 and not i == 0:
            print('\r%i' % i, file=sys.stderr, end='')
        i += 1
        tweet.prediction = Nb.predict(tweet)

    eval_results = pred_corpus.evaluate()

    print('\r' + pformat(eval_results) + '\n')
    print(' == Macro Avg F_Score: %.2f ==\n' %
          (eval_results['macro_avg_f_score']))
    print(' == Eval Accuracy:     %.2f ==\n' % (eval_results['accuracy']))
    print(' %s\n' % ('-' * 78))
def main(pred_path: str, gold_path: str, epochs: int) -> None:
    train_corpus = Corpus(train_path)
    eval_corpus = Corpus(eval_path)

    feature_extractor = TextualFeatureExtractor([train_corpus, eval_corpus],
                                                caching=True)

    classes = feature_extractor.classes
    p = Perceptron(classes)

    epoch = 0
    while True:
        epoch += 1
        train_start_timestamp = time.time()
        train_corpus.shuffle()
        for i, tweet in enumerate(train_corpus):
            feature_vector = feature_extractor.get_feature_vector(tweet)
            prediction = p.train(feature_vector, tweet.emotion)
            tweet.prediction = max(prediction.items(), key=itemgetter(1))[0]

            if i % 1000 == 0 and not i == 0:
                print('\r%i' % i, file=sys.stderr, end='')

        train_elapsed = time.time() - train_start_timestamp
        train_results = train_corpus.evaluate()

        eval_start_timestamp = time.time()
        eval_corpus.shuffle()
        for i, tweet in enumerate(eval_corpus):
            feature_vector = feature_extractor.get_feature_vector(tweet)
            prediction = p.predict(feature_vector)
            tweet.prediction = max(prediction.items(), key=itemgetter(1))[0]

        eval_elapsed = time.time() - eval_start_timestamp
        eval_results = eval_corpus.evaluate()

        eval_width = 10
        emotion_width = len(max(eval_results['emotions'],
                                key=lambda x: len(x)))

        print('\rEpoch %i:' % epoch)
        print(pformat(eval_results) + '\n')
        print(' == Train Macro Avg F_Score: %.2f ==' %
              (train_results['macro_avg_f_score']))
        print(' == Eval Macro Avg F_Score: %.2f ==' %
              (eval_results['macro_avg_f_score']))
        print(' == Train Accuracy: %.2f ==' % (train_results['accuracy']))
        print(' == Eval  Accuracy: %.2f ==\n' % (eval_results['accuracy']))
        print(' Time elapsed: Train: %.2f Eval %.2f' %
              (train_elapsed, eval_elapsed))
        print(' %s\n' % ('-' * 78))
def main(image_path: str, image_index_path: str, pred_path: str,
         gold_path: str, epochs: int) -> None:
    text_train_corpus = Corpus(train_path)
    text_eval_corpus = Corpus(eval_path)

    text_feature_extractor = TextualFeatureExtractor(
        [text_train_corpus, text_eval_corpus], caching=True)

    classes = text_feature_extractor.classes
    text_classifier = Perceptron(classes)

    predictions = {}

    epoch = 0
    while epoch < 10:
        epoch += 1
        train_start_timestamp = time.time()
        text_train_corpus.shuffle()
        for i, tweet in enumerate(text_train_corpus):
            feature_vector = text_feature_extractor.get_feature_vector(tweet)
            prediction = text_classifier.train(feature_vector, tweet.emotion)
            # tweet.full_prediction = prediction
            tweet.prediction = max(prediction.items(), key=itemgetter(1))[0]
            predictions[tweet.tweet_id] = prediction

            if i % 1000 == 0 and not i == 0:
                print('\r%i' % i, file=sys.stderr, end='')

        train_elapsed = time.time() - train_start_timestamp
        train_results = text_train_corpus.evaluate()

        eval_start_timestamp = time.time()
        text_eval_corpus.shuffle()
        for i, tweet in enumerate(text_eval_corpus):
            feature_vector = text_feature_extractor.get_feature_vector(tweet)
            prediction = text_classifier.predict(feature_vector)
            # tweet.full_prediction = prediction
            tweet.prediction = max(prediction.items(), key=itemgetter(1))[0]
            predictions[tweet.tweet_id] = prediction

        eval_elapsed = time.time() - eval_start_timestamp
        eval_results = text_eval_corpus.evaluate()

        eval_width = 10
        emotion_width = len(max(eval_results['emotions'],
                                key=lambda x: len(x)))

        print('\rEpoch %i:' % epoch)
        print(pformat(eval_results) + '\n')
        print(' == Macro Avg F_Score: %.2f ==' %
              (train_results['macro_avg_f_score']))
        print(' == Train Accuracy:    %.2f ==' % (train_results['accuracy']))
        print(' == Eval  Accuracy:    %.2f ==\n' % (eval_results['accuracy']))
        print(' Time elapsed: Train: %.2f Eval %.2f' %
              (train_elapsed, eval_elapsed))
        print(' %s\n' % ('-' * 78))

    image_train_corpus = Corpus(train_path,
                                image_path,
                                image_index_path,
                                image_res=(12, 12))
    image_eval_corpus = Corpus(eval_path,
                               image_path,
                               image_index_path,
                               image_res=(12, 12))

    image_feature_extractor = ImageFeatureExtractor(
        [image_train_corpus, image_eval_corpus], [predictions], caching=True)

    classes = image_feature_extractor.classes
    image_classifier = Perceptron(classes)

    epoch = 0
    while True:
        epoch += 1
        train_start_timestamp = time.time()
        image_train_corpus.shuffle()
        for i, tweet in enumerate(image_train_corpus):
            feature_vector = image_feature_extractor.get_feature_vector(tweet)
            prediction = image_classifier.train(feature_vector, tweet.emotion)
            # tweet.full_prediction = prediction
            tweet.prediction = max(prediction.items(), key=itemgetter(1))[0]

            if i % 1000 == 0 and not i == 0:
                print('\r%i' % i, file=sys.stderr, end='')

        train_elapsed = time.time() - train_start_timestamp
        train_results = image_train_corpus.evaluate()

        eval_start_timestamp = time.time()
        image_eval_corpus.shuffle()
        for i, tweet in enumerate(image_eval_corpus):
            feature_vector = image_feature_extractor.get_feature_vector(tweet)
            prediction = image_classifier.predict(feature_vector)
            # tweet.full_prediction = prediction
            tweet.prediction = max(prediction.items(), key=itemgetter(1))[0]

        eval_elapsed = time.time() - eval_start_timestamp
        eval_results = image_eval_corpus.evaluate()

        eval_width = 10
        emotion_width = len(max(eval_results['emotions'],
                                key=lambda x: len(x)))

        print('\rEpoch %i:' % epoch)
        print(pformat(eval_results) + '\n')
        print(' == Train Macro Avg F_Score: %.2f ==' %
              (train_results['macro_avg_f_score']))
        print(' == Eval Macro Avg F_Score: %.2f ==' %
              (eval_results['macro_avg_f_score']))
        print(' == Train Accuracy: %.2f ==' % (train_results['accuracy']))
        print(' == Eval  Accuracy: %.2f ==\n' % (eval_results['accuracy']))
        print(' %s\n' % ('-' * 78))
def main(pred_path: str, gold_path: str, epochs: int) -> None:
    # train_longest_tweet = max((len(x) for tweet in train_corpus for x in tweet.body))
    # eval_longest_tweet = max((len(x) for tweet in eval_corpus for x in tweet.body))
    # longest_tweet = max(train_longest_tweet, eval_longest_tweet)

    train_corpus = Corpus(train_path)
    eval_corpus = Corpus(eval_path)

    word_lookup = Lookup()
    emotion_lookup = Lookup()
    i = 1
    j = 0
    for tweet in train_corpus:
        for word in tweet.content:
            if word not in word_lookup:
                word_lookup[word] = i
                i += 1
        tag = tweet.emotion
        if tag not in emotion_lookup:
            emotion_lookup[tag] = j
            j += 1

    word_lookup['__pad__'] = len(word_lookup)
    word_lookup['__unk__'] = len(word_lookup)

    emotion_lookup['__pad__'] = len(emotion_lookup)
    emotion_lookup['__unk__'] = len(emotion_lookup)

    reverse_emotion_lookup = {i: x for x, i in emotion_lookup.items()}

    x1 = T.imatrix('x1')
    # m = T.imatrix('y')
    # x2 = T.ftensor4('x2')
    y = T.ivector('y')

    network = build_network(text_input_var=x1,
                            number_of_words=len(word_lookup),
                            word_embedding_length=30,
                            num_target=len(emotion_lookup))

    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, y)
    # loss = lasagne.objectives.categorical_crossentropy(prediction, y) * T.reshape(T.cast(m, 'float32'), [-1])
    # loss = T.sum(loss)/T.sum(m)
    loss = T.mean(loss)

    params = lasagne.layers.get_all_params(network)
    updates = lasagne.updates.adam(loss, params)

    train = theano.function(inputs=[x1, y],
                            outputs=[prediction, loss],
                            updates=updates,
                            allow_input_downcast=True,
                            on_unused_input='warn')

    predict = theano.function(inputs=[x1],
                              outputs=[prediction],
                              allow_input_downcast=True,
                              on_unused_input='warn')

    prev_error = -1
    epoch = 0

    xts, tis, es = prepare_data(train_corpus, word_lookup, emotion_lookup)
    test_xts, test_tis, test_es = prepare_data(eval_corpus, word_lookup,
                                               emotion_lookup)

    try:
        prev_error = -1
        while True:
            epoch += 1
            errors = []
            for texts, _, emotions in generate_batches(xts, tis, es,
                                                       word_lookup):
                ys, err = train(texts, emotions)
                errors.append(err)
            error = sum(errors) / len(errors)

            for texts, tweet_ids, _ in generate_batches(
                    xts, tis, es, word_lookup):
                ys = predict(texts)[0]
                for y, ti in zip(ys, tweet_ids):
                    pred = reverse_emotion_lookup[int(np.argmax(y))]
                    train_corpus.get_tweet(ti).prediction = pred

            for texts, tweet_ids, _ in generate_batches(
                    test_xts, test_tis, test_es, word_lookup):
                ys = predict(texts)[0]
                for y, ti in zip(ys, tweet_ids):
                    pred = reverse_emotion_lookup[int(np.argmax(y))]
                    eval_corpus.get_tweet(ti).prediction = pred

            train_results = train_corpus.evaluate()
            eval_results = eval_corpus.evaluate()

            eval_width = 10
            emotion_width = len(
                max(eval_results['emotions'], key=lambda x: len(x)))

            if prev_error == -1:
                print('\rEpoch %i error: %.6f' % (epoch, error))
            else:
                err_delta = error - prev_error
                print('\rEpoch %i error: %.6f error_delta: %.6f' %
                      (epoch, error, err_delta))
            prev_error = error

            print(pformat(eval_results) + '\n')
            print(' == Train Macro Avg F_Score: %.2f ==' %
                  (train_results['macro_avg_f_score']))
            print(' == Eval Macro Avg F_Score: %.2f ==' %
                  (eval_results['macro_avg_f_score']))
            print(' == Train Accuracy: %.2f ==' % (train_results['accuracy']))
            print(' == Eval  Accuracy: %.2f ==\n' % (eval_results['accuracy']))
            # print(' Time elapsed: Train: %.2f Eval %.2f' %(train_elapsed, eval_elapsed))
            print(' %s\n' % ('-' * 78))

    except KeyboardInterrupt:
        pass