# ===== money: float = 0.0 while True: # Get user selection selection = functions.user_selection() if selection == "": continue # Take action to perform user selection elif selection == 3: functions.switch_off() elif selection == 4: functions.print_report(money) else: # Check sufficient ingredients to make the drink sufficient = functions.check_ingredients(selection) if not sufficient: print("Maintenance required") if sufficient: # Sufficient ingredients to make the drink # Ask for payment result = functions.get_payment(selection, money) paid = result[0] money = result[1] if paid:
#ETHNIC_SLURS = set(load_ethnic_slurs()) training_data_path = build_data_path('augmented_train.csv') df = pd.read_csv(training_data_path) X = df['comment_text'] y = df[LABEL_COLS] X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.33) clf = OneVsRestClassifier(MultinomialNB()) tfidf = TfidfVectorizer(strip_accents='ascii', stop_words='english', ngram_range=(1, 1), norm='l2') bad_word_counter = CountVectorizer(vocabulary=BAD_WORDS) #slur_counter = CountVectorizer(vocabulary=ETHNIC_SLURS) #union = make_union(tfidf, bad_word_counter, slur_counter) union = make_union(tfidf, bad_word_counter) pipeline = make_pipeline(union, clf) optimizer = pipeline optimizer.fit(X_train, y_train) y_predictions = optimizer.predict(X_valid) # best_estimator_ = optimizer.best_estimator_ print_report(y_valid, y_predictions)
y = df[LABEL_COLS] X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.33) label_counts = y_train.sum() total = label_counts.sum() label_freqs = defaultdict(lambda: 0) for key in label_counts.index: label_freqs[key] = label_counts[key] / total def predict(X_values): predictions = [] for example in X_values: prediction = [] for key in LABEL_COLS: rand_value = random() frequency = label_freqs[key] prediction.append(1 if rand_value < frequency else 0) predictions.append(prediction) return np.array(predictions) random_predictions = predict(X_valid) print('Baseline Data') print_report(y_valid, random_predictions) print()
scoring='f1_weighted', verbose=3) fit_params = { 'randomforestclassifier__sample_weights': compute_sample_weight('balanced', y_train) } optimizer.fit(X_train, y_train) y_predictions = optimizer.predict(X_valid) print(y_predictions.shape, y_valid.shape) # best_estimator_ = optimizer.best_estimator_ metrics.roc_auc_score(y_valid, y_predictions) test_data = build_data_path('test.csv') data_df = pd.read_csv(test_data) test_labels = build_data_path('test_labels.csv') label_df = pd.read_csv(test_labels) test_df = data_df.set_index('id').join(label_df.set_index('id')) CONDITIONS = [f'{label} != -1' for label in LABEL_COLS] QUERY_STRING = ' & '.join(CONDITIONS) test_df = test_df.query(QUERY_STRING) X_test = test_df['comment_text'] y_test = test_df[LABEL_COLS] y_predictions = optimizer.predict(X_test) print_report(y_test, y_predictions, data_type='TESTING') # The Inspiration for the Code # has been taken from # https://github.com/SaltyQuetzals/Toxicroak