def main(argv): inputPath = argv[0] gamma, Lambda = readResults(inputPath) tf, labels_1, labels_2, topic_texts, dictionary, data = em.loadData( 'reutersdata', 'earn', 'grain', maxDocs=len(gamma), returnData=True) samples = 0 minWordsFromTopic = 5 numTopics = 4 colors = ['red', 'orange', 'green', 'blue'] gamma = gamma - np.min(gamma) for docIdx, row in enumerate(gamma): if np.sum(row >= minWordsFromTopic) == numTopics: topics = np.where(row >= minWordsFromTopic) if len(data[docIdx].split()) < 100: print(data[docIdx]) wd = em.wordCountToFlatArray(tf[docIdx, :]) phiAssigned = np.where(getPhi(inputPath, docIdx) > 0.9) print("") topicToColor = dict() for topicIdx, topic in enumerate(topics[0]): dictionaryIdxs = Lambda[topic, :].argsort()[-10:] print("Num words from this topic: %f" % row[topic]) for didx in dictionaryIdxs: print(dictionary[didx]) print("") topicToColor[topic] = colors[topicIdx] wordToColoredWord = dict() for wordIdx, topicIdx in zip(*phiAssigned): if topicIdx in topicToColor: wordToColoredWord[dictionary[ wd[wordIdx]]] = topicToColor[topicIdx] newText = data[docIdx] for word, color in wordToColoredWord.iteritems(): newText = re.sub(r'\s(%s)\s' % word, r' \\textcolor{%s}{\1} ' % color, newText, flags=re.I) print(newText) print("") samples += 1 if samples > 10: break
def loadDataSet(): if request.method == 'POST': #f = request.files['folder'] f = "dataset" functions.save_features(f) dataset = functions.loadData("datasets/dataset.csv") vals = functions.crossValidation(dataset) return render_template('precision.html', nr=int(vals[0][0] * 100), reg=int(vals[2][0] * 100), tr=int(vals[1][0] * 100), data=vals) else: return "err"
def savem(): nom = str(request.form["n"]) model = str(request.form["nom"]) dataset = functions.loadData("datasets/dataset.csv") if (model == "log"): clasif = functions.trainingLogReg(dataset) functions.savemodel(clasif, nom) elif (model == "nr"): clasif = functions.trainingNeuralNetwork(dataset) functions.savemodel(clasif, nom) else: clasif = functions.trainingDecTrees(dataset) functions.savemodel(clasif, nom) return 'v'
def main(argv): gamma = readResults(argv[0]) tf, labels_1, labels_2, topic_texts, dictionary = em.loadData( 'reutersdata', 'earn', 'grain', maxDocs=8000) mean_stats_low_dim, sd_stats_low_dim, mean_stats_high_dim, sd_stats_high_dim = DocumentClassification( gamma, tf, labels_1) plt.figure() plotSVMModelAccuracy(mean_stats_low_dim, sd_stats_low_dim, mean_stats_high_dim, sd_stats_high_dim) mean_stats_low_dim, sd_stats_low_dim, mean_stats_high_dim, sd_stats_high_dim = DocumentClassification( gamma, tf, labels_2) plt.figure() plotSVMModelAccuracy(mean_stats_low_dim, sd_stats_low_dim, mean_stats_high_dim, sd_stats_high_dim) plt.show()
from imblearn.combine import SMOTETomek from imblearn.under_sampling import RandomUnderSampler from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix, precision_recall_curve, auc, roc_auc_score, roc_curve, recall_score, classification_report from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import KFold from sklearn.metrics import precision_recall_curve from sklearn import decomposition, tree import functions #Load dataset data_set = functions.loadData() print('Dataset is loaded') #Select specific features from the entire dataset selected_features = [ "issuercountrycode", "txvariantcode", "EuroAmount", "currencycode", "shoppercountrycode", "shopperinteraction", "cardverificationcodesupplied", "cvcresponsecode", "accountcode" ] new_data = data_set[selected_features] #Create dummies dataset new_data = pd.get_dummies(new_data) #Split dataset into train and test X_train, X_test, y_train, y_test = train_test_split(new_data, data_set['label'],
def main(): user = sys.argv[-1] for frame in sys.argv[1:-1]: # Loop over all csv files os.system("cls") print("User: "******"File Path: " + str(frame)) csv_frames = fnc.handlefileinput(frame) match_overview = csv_frames[0] match_performance = csv_frames[1] # sixth_pick_overview = csv_frames[2] player_round_data = csv_frames[3] # round_event_breakdown = csv_frames[4] all_players, all_teams = fnc.loadData() knowndata = fnc.getknowndata(all_players, all_teams) all_playernames = knowndata[0] all_teamnames = knowndata[1] used_gamemode = knowndata[2] used_match_info = knowndata[3] input_correct = False user_input = [] while not input_correct: user_input = fnc.handleuserinput(knowndata) print("\n\nMatchdata: " + str(user_input[0]) + " | Remarks:\t" + str(user_input[1]) + "\n") map_string = "" for data in user_input[3]: map_string += "\t\t\t\t" + str(data) + "\n" op_string = "" for data in user_input[4]: op_string += "\t\t\t\t" + str(data) + "\n" print( "Team 1: " + str(user_input[2]) + "\n\tBanned:\n\t\tMaps:\n" + map_string + "\n\t\tOps:\n" + op_string) map_string = "" for data in user_input[6]: map_string += "\t\t\t\t" + str(data) + "\n" op_string = "" for data in user_input[7]: op_string += "\t\t\t\t" + str(data) + "\n" print( "Team 2: " + str(user_input[5]) + "\n\tBanned:\n\t\tMaps:\n" + map_string + "\n\t\tOps:\n" + op_string) confirm = input("Input correct?\t Press y to confirm...\n") if confirm == "y" or confirm == "Y": input_correct = True blue_team = user_input[2] orange_team = user_input[5] if not (blue_team in all_teamnames): print("Add new team...") all_teams.append(ds.Team(blue_team)) if not (orange_team in all_teamnames): print("Add new team...") all_teams.append(ds.Team(orange_team)) for team in all_teams: print("Add team round data...") if team.name in [blue_team, orange_team]: try: team.addMatch(match_overview, player_round_data, user_input, roundoverview) except NameError: roundoverview = team.addMatch(match_overview, player_round_data, user_input) for player_name in match_performance.Player.values: if not (player_name in all_playernames): print("Add new player match data...") all_players.append(ds.Player(player_name)) all_players[len(all_players) - 1].addMatch(player_name, match_overview, match_performance, roundoverview) else: for player in all_players: if player.name == player_name: print("Add player match data...") player.addMatch(player_name, match_overview, match_performance, roundoverview) fnc.saveData(all_players, all_teams) if random.randint(1, 1001) > 999: # this ends the progress bar toolbar_width = 100 # setup toolbar sys.stdout.write("[%s]" % (" " * toolbar_width)) sys.stdout.flush() sys.stdout.write("\b" * (toolbar_width + 1)) # return to start of line, after '[' for i in range(toolbar_width): time.sleep(1 / random.randint(25, 50)) # do real work here # update the bar if i == (toolbar_width / 4): sys.stdout.write("TR") sys.stdout.flush() elif (3 * toolbar_width / 4) > i > (toolbar_width / 4): sys.stdout.write("OL") sys.stdout.flush() else: sys.stdout.write("-") sys.stdout.flush() sys.stdout.write("]\n") print("File Path: " + str(frame)) print("Finished")