def main(argv):
    inputPath = argv[0]

    gamma, Lambda = readResults(inputPath)

    tf, labels_1, labels_2, topic_texts, dictionary, data = em.loadData(
        'reutersdata', 'earn', 'grain', maxDocs=len(gamma), returnData=True)

    samples = 0
    minWordsFromTopic = 5
    numTopics = 4
    colors = ['red', 'orange', 'green', 'blue']
    gamma = gamma - np.min(gamma)
    for docIdx, row in enumerate(gamma):
        if np.sum(row >= minWordsFromTopic) == numTopics:
            topics = np.where(row >= minWordsFromTopic)
            if len(data[docIdx].split()) < 100:
                print(data[docIdx])
                wd = em.wordCountToFlatArray(tf[docIdx, :])

                phiAssigned = np.where(getPhi(inputPath, docIdx) > 0.9)
                print("")

                topicToColor = dict()
                for topicIdx, topic in enumerate(topics[0]):
                    dictionaryIdxs = Lambda[topic, :].argsort()[-10:]
                    print("Num words from this topic: %f" % row[topic])
                    for didx in dictionaryIdxs:
                        print(dictionary[didx])
                    print("")
                    topicToColor[topic] = colors[topicIdx]

                wordToColoredWord = dict()
                for wordIdx, topicIdx in zip(*phiAssigned):
                    if topicIdx in topicToColor:
                        wordToColoredWord[dictionary[
                            wd[wordIdx]]] = topicToColor[topicIdx]

                newText = data[docIdx]
                for word, color in wordToColoredWord.iteritems():
                    newText = re.sub(r'\s(%s)\s' % word,
                                     r' \\textcolor{%s}{\1} ' % color,
                                     newText,
                                     flags=re.I)

                print(newText)
                print("")
                samples += 1
                if samples > 10:
                    break
def loadDataSet():
    if request.method == 'POST':
        #f = request.files['folder']
        f = "dataset"
        functions.save_features(f)
        dataset = functions.loadData("datasets/dataset.csv")
        vals = functions.crossValidation(dataset)
        return render_template('precision.html',
                               nr=int(vals[0][0] * 100),
                               reg=int(vals[2][0] * 100),
                               tr=int(vals[1][0] * 100),
                               data=vals)
    else:
        return "err"
def savem():
    nom = str(request.form["n"])
    model = str(request.form["nom"])

    dataset = functions.loadData("datasets/dataset.csv")
    if (model == "log"):
        clasif = functions.trainingLogReg(dataset)
        functions.savemodel(clasif, nom)
    elif (model == "nr"):
        clasif = functions.trainingNeuralNetwork(dataset)
        functions.savemodel(clasif, nom)
    else:
        clasif = functions.trainingDecTrees(dataset)
        functions.savemodel(clasif, nom)
    return 'v'
Example #4
0
def main(argv):
    gamma = readResults(argv[0])

    tf, labels_1, labels_2, topic_texts, dictionary = em.loadData(
        'reutersdata', 'earn', 'grain', maxDocs=8000)

    mean_stats_low_dim, sd_stats_low_dim, mean_stats_high_dim, sd_stats_high_dim = DocumentClassification(
        gamma, tf, labels_1)
    plt.figure()
    plotSVMModelAccuracy(mean_stats_low_dim, sd_stats_low_dim,
                         mean_stats_high_dim, sd_stats_high_dim)

    mean_stats_low_dim, sd_stats_low_dim, mean_stats_high_dim, sd_stats_high_dim = DocumentClassification(
        gamma, tf, labels_2)
    plt.figure()
    plotSVMModelAccuracy(mean_stats_low_dim, sd_stats_low_dim,
                         mean_stats_high_dim, sd_stats_high_dim)

    plt.show()
Example #5
0
from imblearn.combine import SMOTETomek
from imblearn.under_sampling import RandomUnderSampler

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_recall_curve, auc, roc_auc_score, roc_curve, recall_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_curve
from sklearn import decomposition, tree

import functions

#Load dataset
data_set = functions.loadData()
print('Dataset is loaded')

#Select specific features from the entire dataset
selected_features = [
    "issuercountrycode", "txvariantcode", "EuroAmount", "currencycode",
    "shoppercountrycode", "shopperinteraction", "cardverificationcodesupplied",
    "cvcresponsecode", "accountcode"
]
new_data = data_set[selected_features]

#Create dummies dataset
new_data = pd.get_dummies(new_data)
#Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(new_data,
                                                    data_set['label'],
Example #6
0
def main():
    user = sys.argv[-1]
    for frame in sys.argv[1:-1]:
        # Loop over all csv files
        os.system("cls")
        print("User: "******"File Path: " + str(frame))

        csv_frames = fnc.handlefileinput(frame)

        match_overview = csv_frames[0]
        match_performance = csv_frames[1]
        # sixth_pick_overview = csv_frames[2]
        player_round_data = csv_frames[3]
        # round_event_breakdown = csv_frames[4]

        all_players, all_teams = fnc.loadData()
        knowndata = fnc.getknowndata(all_players, all_teams)
        all_playernames = knowndata[0]
        all_teamnames = knowndata[1]
        used_gamemode = knowndata[2]
        used_match_info = knowndata[3]

        input_correct = False
        user_input = []
        while not input_correct:
            user_input = fnc.handleuserinput(knowndata)
            print("\n\nMatchdata: " + str(user_input[0]) + " | Remarks:\t" + str(user_input[1]) + "\n")

            map_string = ""
            for data in user_input[3]:
                map_string += "\t\t\t\t" + str(data) + "\n"
            op_string = ""
            for data in user_input[4]:
                op_string += "\t\t\t\t" + str(data) + "\n"
            print(
                "Team 1: " + str(user_input[2]) + "\n\tBanned:\n\t\tMaps:\n" + map_string + "\n\t\tOps:\n" + op_string)
            map_string = ""
            for data in user_input[6]:
                map_string += "\t\t\t\t" + str(data) + "\n"
            op_string = ""
            for data in user_input[7]:
                op_string += "\t\t\t\t" + str(data) + "\n"
            print(
                "Team 2: " + str(user_input[5]) + "\n\tBanned:\n\t\tMaps:\n" + map_string + "\n\t\tOps:\n" + op_string)
            confirm = input("Input correct?\t Press y to confirm...\n")
            if confirm == "y" or confirm == "Y":
                input_correct = True

        blue_team = user_input[2]
        orange_team = user_input[5]

        if not (blue_team in all_teamnames):
            print("Add new team...")
            all_teams.append(ds.Team(blue_team))
        if not (orange_team in all_teamnames):
            print("Add new team...")
            all_teams.append(ds.Team(orange_team))
        for team in all_teams:
            print("Add team round data...")
            if team.name in [blue_team, orange_team]:
                try:
                    team.addMatch(match_overview, player_round_data, user_input, roundoverview)
                except NameError:
                    roundoverview = team.addMatch(match_overview, player_round_data, user_input)

        for player_name in match_performance.Player.values:
            if not (player_name in all_playernames):
                print("Add new player match data...")
                all_players.append(ds.Player(player_name))
                all_players[len(all_players) - 1].addMatch(player_name, match_overview, match_performance, roundoverview)
            else:
                for player in all_players:
                    if player.name == player_name:
                        print("Add player match data...")
                        player.addMatch(player_name, match_overview, match_performance, roundoverview)
        fnc.saveData(all_players, all_teams)

        if random.randint(1, 1001) > 999:
            # this ends the progress bar
            toolbar_width = 100
            # setup toolbar
            sys.stdout.write("[%s]" % (" " * toolbar_width))
            sys.stdout.flush()
            sys.stdout.write("\b" * (toolbar_width + 1))  # return to start of line, after '['

            for i in range(toolbar_width):
                time.sleep(1 / random.randint(25, 50))  # do real work here
                # update the bar
                if i == (toolbar_width / 4):
                    sys.stdout.write("TR")
                    sys.stdout.flush()
                elif (3 * toolbar_width / 4) > i > (toolbar_width / 4):
                    sys.stdout.write("OL")
                    sys.stdout.flush()
                else:
                    sys.stdout.write("-")
                    sys.stdout.flush()
            sys.stdout.write("]\n")

        print("File Path: " + str(frame))
        print("Finished")