max_val_score = 0

while len(chosen_features) < 1:
    best_val = 0.
    best_feature = None

    for feature in feature_list:
        if feature in chosen_features: continue

        sent_encoder.feature_names = chosen_features + [feature]
        model = SimplePQModel(sent_encoder=sent_encoder,
                              clf_type=AdaBoostClassifier,
                              clf_args={
                                  'n_estimators':
                                  100,
                                  'base_estimator':
                                  DecisionTreeClassifier(
                                      max_depth=1, class_weight="balanced")
                              })
        model.fit(train_articles)

        val_accuracy = E.evaluate(model=model,
                                  articles=val_articles,
                                  verbose=0)
        test_accuracy = E.evaluate(model=model,
                                   articles=test_articles,
                                   verbose=0)

        res_str = "{}\t{:.1f}\t{:.1f}".format(
            ', '.join(chosen_features + [feature]), 100 * val_accuracy,
	'๐Ÿ‡ธ', '๐ŸŒˆ', '๐Ÿ’™', '\u200e', 'รฅ', '๐Ÿ–ค', 'โ€', 'ฬ‚', '๐Ÿ’ฅ', '๐Ÿ˜‰', 'ั‰', 'ั‘', '๐Ÿคท', 'ั†', '๐Ÿ’–', '๐Ÿน', '๐ŸŠ', '{', '}', '๐Ÿค”', '๐Ÿ’‹', '\U0001f9e1', '\u200f', '๐ž', '๐Ÿ›‘', 'ยบ', '๐Ÿ‡น', 
	'โ˜•', 'ษช', '๐ŸŒŠ', '๐Ÿ’', '๐Ÿ‡บ', '๐Ÿค—', '๐Ÿ™', 'ร—', '๐Ÿ’š', 'ยฏ', '๐ŸŒบ', 'โ†’', 'โ™‚', '์ „', '์œจ', '๋ฆฌ', 'โœˆ', 'ล„', '๐Ÿ’ฆ', 'โ˜', '๐ŸŒŸ', 'สผ', 'โ€š', '๐Ÿ‘', '๐Ÿ˜', '๐Ÿคฆ', '๐ŸŽ‰', '๐ŸŽ„', '๐Ÿ’ช', '๐Ÿ‘„', 
	'๐Ÿ‘‘', 'โ™ฅ', 'ยน', 'ฤ', '๐Ÿ’', '๐–', '๐จ', '๐ฅ', '๐Ÿ‘Œ', '๐Ÿฅ‚', '๐Ÿ’…', 'โ˜', '๐Ÿ‘‹', '๐Ÿ˜ƒ', 'โšพ', '๐Ÿ˜…', '๐Ÿ›', '๐Ÿ‡ท', 'ฬ€', 'ัŽ', 'ั', 'โ–ช', 'แด›', 'ษด', 'แด€', 'แด‡', '๐Ÿ', 'โžก', '๐ŸŒ…', 'โ›ฑ', 'ล›', 
	'๐Ÿ‡ง', '๐Ÿ’•', 'โšก', '๐ŸŒŽ', '\\', 'ใƒ„', 'ล“', '๐Ÿ‘ฏ', '๐Ÿ˜ญ', '๐Ÿ’”', '๐Ÿ‘€', '๐Ÿ•', '๐ŸŽถ', '๐Ÿ™†', 'ลพ', '๐Ÿ˜Š', '๏ฟฝ', '๐Ÿ“', '๐Ÿƒ', '๐Ÿ’Ž', 'โ›ต', 'โ™ก', '๐Ÿ˜ณ', '\U0001f90d', '๐Ÿ˜', 'รน', '๐ŸŒธ', 'ยฌ', 'โ€‘', 
	'๐Ÿ‘Ž', '\U0001f9d8', 'โˆ’', '๐Ÿ‹', '๐ŸŽ€', '๐Ÿ‘ธ', '๐Ÿ˜†', '๐Ÿ’ธ', '๐Ÿ˜ช', '๐ŸŽ', '๐Ÿ‘ญ', '๐Ÿ˜‹', '๐Ÿ–•', '๐Ÿ˜‘', '๐Ÿˆ', '๐Ÿ‘œ', '๐Ÿ™€', '๐Ÿ˜ผ', '๐Ÿ˜ฝ', '๐Ÿ‘Š', 'ยด', '๐Ÿค“', '\U0001f929', '๐Ÿ˜Ž', '๐Ÿคก', '๐ŸŽ…', '๐Ÿ”ฑ', 
	'๐Ÿ’„', 'โšœ', '๐Ÿ‡ซ', '๐', '๐“', '๐š', '๐ฏ', '๐Œ', '๐ฆ', '๐ข', '๐ญ', 'ยพ', 'โ„“', 'ยจ', '๐Ÿ‘ฐ', '๐Ÿ˜', '๐Ÿ‹', '๐Ÿ˜', '๐Ÿ‘‡', '๐Ÿ˜š', '๐Ÿ’', '๐Ÿ—ณ', '๐Ÿ™', '๐Ÿ•Œ', 'ั„', '๐Ÿ‘—', '๐Ÿ›', 'ส€', 'สŸ', '๐Ÿ’Œ', '๐Ÿ›ฃ', 
	'๐Ÿš–', '๐Ÿ”', '๐Ÿฌ', '๐ŸŒ‰', '๐ŸŽญ', '๐Ÿ ', '๐ŸŒณ', '๐Ÿ“', 'ฤ…', '๐Ÿ…', '๐Ÿ˜ฉ', '๐Ÿ’€', '๐ŸŽƒ', '๐Ÿ‘ถ', '๏ฟผ', '๐Ÿฅž', '๏ฌ', '๐Ÿ”', '๐Ÿ•', 'ฬŠ', '\U0001f974', '๐Ÿ™ƒ', 'โœŠ', 'ฬƒ', '๐ŸŒš', '๐ŸŒป', '๐Ÿ˜ฌ', '๐Ÿ™‹', '๐Ÿ™‚', 
	'๐Ÿ‡ฎ', 'โ˜บ', '๐Ÿ„', '๐Ÿ’', '๐Ÿ', '๐Ÿค ', '๐Ÿ˜ข', 'โ—', 'โ™', '๐ŸŸ', '๏ฝ', '๏ฝ•', '๏ฝŽ', '๏ฝ’', '๏ฝ', '๏ฝ…', '๐ŸŽผ', 'ส”', 'โ‰ ', '๐Ÿ˜˜', '๐Ÿ˜ต', '๐Ÿ…', '๐Ÿ˜ฎ', '๐Ÿš', '๐Ÿคธ', 'โœฎ', '๐Ÿฆ', '๐Ÿ ', '๐ŸŒ', 'โƒฃ', '๐Ÿ›ถ', 
	'๐Ÿ˜ป', '\U0001f9d0', '๐Ÿ‡ญ', '๐Ÿšœ', 'รŸ', '๐Ÿ’ฐ', '\u2009', 'ยผ', '๐Ÿ’ก', '๐Ÿš', '๐Ÿ‡ฌ', '๐Ÿฃ', '๐Ÿ—บ', '\U0001f6f8', '๐ŸŒ‘', '๐Ÿค˜', '๐ŸŒ‹', '๐Ÿฅ', '๐Ÿง€', '๐Ÿณ', '๐Ÿฅ“', '๐Ÿท', '๐Ÿฅ‘', '๐Ÿค', '๐Ÿธ', '๐Ÿ˜ฐ',
	 'โ”€', '๐Ÿ˜œ', '๐ŸŒผ', '๐ŸŽ ', '๐Ÿ™ˆ', '๐ŸŽก', '๐Ÿพ', '๐Ÿ‘', '๐Ÿซ', 'ฤ‡', 'โ›ˆ', 'สบ', 'โ˜พ', 'หš', '\U0001f976', '๐Ÿ–', '\U0001f9da', '๐Ÿถ', '๐Ÿ', 'โš“', 'ลผ']


for char_sets in char_sets_options:
	for quantiles in [1, 2, 5, 10, 15]:
		sent_encoder = TrueCharPositionEncoder(char_sets = char_sets, quantiles=quantiles)
		model = SimplePQModel(sent_encoder=sent_encoder, clf_type=LogisticRegression, clf_args={'class_weight':'balanced', 'max_iter':1000, 'solver':'lbfgs'})
		#model = SimpleNNModel(sent_encoder=sent_encoder, layer_sizes=layer_sizes, layer_dropouts=layer_dropouts)
		model.fit(train_articles)
		#coefs = model.model.coef_[0]
		#coef_str = "\t".join([str(round(v, 3)) for v in coefs])
		#
		#val_accuracy = E.evaluate(model=model, articles=val_articles, verbose=0)
		test_accuracy = E.evaluate(model=model, articles=test_articles, verbose=0)
		res_str = "{}\t{}\t{}\t{:.2f}".format(sent_encoder.name, char_sets, quantiles, 100*test_accuracy)
		print(res_str)
		results_file.write(res_str+"\n")
		results_file.flush()


'''
for quantiles in [2, 5, 10, 20]:
	from sklearn.ensemble import AdaBoostClassifier
	from sklearn.tree import DecisionTreeClassifier
	from models.sentence_encoders import HandcraftedEncoder



	#sent_encoder = HandcraftedEncoder()
	sent_encoder = HandcraftedEncoder(precomputed_embeddings=settings.PRECOMPUTED_HANDCRAFTED_EMBEDDINGS_FNAME)
	feature_list = ["Quote_count", "Sent_position", "R_difficult", "POS_PRP", "POS_VB", "A_concreteness"] #HandcraftedEncoder._all_features + "best"
	#feature = "best"


	for feature in feature_list:
		print(feature)
		sent_encoder.set_features(feature)
		model = SimplePQModel(sent_encoder=sent_encoder, clf_type=AdaBoostClassifier, clf_args={'n_estimators':100, 'base_estimator':DecisionTreeClassifier(max_depth=1, class_weight="balanced")})
		print("training {}...".format(feature))
		model.fit(train_articles)
		print("generating...")

		combined_samples[feature] = generate_samples(model, test_articles)



elif model_name == "ngrams":
	from models.sentence_encoders import NGramEncoder

	for mode, n in [('char', 2), ('word', 1)]:
		print(mode, n)
		sent_encoder = NGramEncoder(mode=mode, n=n, store_results=False, vocab_size=1000)
		print("preparing encoder...")