def train_for_classification(): for each in list_vals: classification_df = pd.read_csv(input_dict[each]) classification_df['u_id'] = classification_df.index # classification_df = process_df(classification_df) classification_df.rename(columns={ "STATUS": "desc", each: "label" }, inplace=True) number_of_classes = len(list(classification_df['label'].unique())) model_directory = os.path.join(root_dir, "classify_dict_" + each) metrics_json = os.path.join(root_dir, "accuracy_metrics_" + each + ".json") training_loader, testing_loader = load_datasets( classification_df, train_size=0.8, number_of_classes=number_of_classes) unique_ids, val_targets, val_outputs = start_epochs( training_loader, testing_loader, metrics_json, model_directory, epochs=5, number_of_classes=number_of_classes) out_numpy = np.concatenate((unique_ids.reshape( -1, 1), val_targets.reshape(-1, 1), val_outputs.reshape(-1, 1)), axis=1) predicted_df = pd.DataFrame(out_numpy, columns=['id', 'original', 'predicted']) predicted_df.to_csv(os.path.join(root_dir, "predicted_" + each + ".csv"), index=False, header=True)
def train_classification(): classification_df = pd.read_csv(final_data) classification_df = setup_data(classification_df) number_of_classes = max(list(classification_df['label'].unique()))+1 model_directory = os.path.join(root_dir, "classify_dict") metrics_json = os.path.join(root_dir, "accuracy_metrics.json") training_loader, testing_loader = load_datasets(classification_df, train_size=0.8, number_of_classes=number_of_classes) unique_ids, val_targets, val_outputs = start_epochs(training_loader, testing_loader, metrics_json, model_directory, epochs=20, number_of_classes=number_of_classes) out_numpy = np.concatenate((unique_ids.reshape(-1, 1), val_targets.reshape(-1, 1), val_outputs.reshape(-1, 1)), axis=1) predicted_df = pd.DataFrame(out_numpy, columns=['id', 'original', 'predicted']) predicted_df.to_csv(os.path.join(root_dir, "predicted.csv"), index=False, header=True)