Ejemplo n.º 1
0
def mainClassification():
    folds = 10
    n_classifiers = classifiers.len_classify()
    cm, acc = [], []
    result_cm, result_acc = [0] * folds, [0] * folds

    for i in range(folds):
        result_cm[i], result_acc[i] = [0] * n_classifiers, [0] * n_classifiers

    data = preprocessing.readData()
    samples, targets = preprocessing.splitSamples(data)
    kfold = preprocessing.crossValidation(folds)

    for train, test in kfold.split(samples, targets):
        X_train, X_test = samples[train], samples[test]
        y_train, y_test = targets[train], targets[test]

        for i in range(n_classifiers):
            y_pred = classifiers.classify(X_train, y_train, X_test, i)
            fold_cm, fold_acc = classifiers.mensureAcc(y_pred, y_test)
            cm.append(fold_cm)
            acc.append(fold_acc)

    for j in range(n_classifiers):
        for k in range(folds):
            result_cm[k][j] = cm[k * n_classifiers + j]
            result_acc[k][j] = acc[k * n_classifiers + j]
    utils.writeCSV("all_classifiers_confusion-matrix.csv", result_cm)
    utils.writeCSV("all_classifiers_accuracy.csv", result_acc)
    print("Finish!!\n")
Ejemplo n.º 2
0
def mainEnsemble():
    n_estimators = [10, 15, 20]
    folds = 10
    repeats = 3
    n_classifiers = classifiers.len_classify()
    acc = []
    result_acc = [0] * folds * repeats

    for i in range((folds * repeats)):
        result_acc[i] = [0] * n_classifiers

    data = preprocessing.readData()
    samples, targets = preprocessing.splitSamples(data)

    for num in n_estimators:
        kfold = preprocessing.repeatCrossValidation(folds, repeats)

        for train, test in kfold.split(samples, targets):
            X_train, X_test = samples[train], samples[test]
            y_train, y_test = targets[train], targets[test]

            for i in range(n_classifiers):
                y_pred = ensemble.runBagging(X_train, y_train, X_test, i, num)
                fold_cm, fold_acc = classifiers.mensureAcc(y_pred, y_test)
                acc.append(fold_acc)

        for j in range(n_classifiers):
            for k in range(folds):
                result_acc[k][j] = acc[k * n_classifiers + j]
        file_name = "bagging" + str(n_estimators) + ".csv"
        utils.writeCSV(file_name, result_acc)
    print("Finish!!\n")
Ejemplo n.º 3
0
 def updateConfig(self, event):
     """
     Update config file with current VS settings
     """
     logging.debug(event)
     self.appConfig[0]["fishScreenStartX"] = self.vsX
     self.appConfig[0]["fishScreenStartY"] = self.vsY
     self.appConfig[0]["fishScreenWidth"] = self.vsWidth
     self.appConfig[0]["fishScreenHeight"] = self.vsHeight
     writeCSV(constants.APP_CONFIG_FILE, self.appConfig[0])
     self.printVirtualScreenData()
     logging.debug("Config file updates successfully")
Ejemplo n.º 4
0
def updateConfig(event):
    """
    Update config file with current VS settings
    """
    global appConfig
    logging.debug(event)
    appConfig[0]["fishScreenStartX"] = vsX
    appConfig[0]["fishScreenStartY"] = vsY
    appConfig[0]["fishScreenWidth"] = vsWidth
    appConfig[0]["fishScreenHeight"] = vsHeight
    writeCSV(APP_CONFIG_FILE, appConfig[0])
    printVirtualScreenData()
    logging.debug("Config file updates successfully")
Ejemplo n.º 5
0
def process_survay(text,typeID,file):
	if utils.codeIsExists(text,CODE_CSV):
		logger.info("detected code for {}: {} is already submitted".format(file,text))
		return True
	# devide text into chunk of 5 numbers
	text_partition = utils.divideString(text)

	# automate task of survay
	coupon_text, proxy = maskIP(text_partition,typeID)


	logger.info("detected code for {}: {}, {} (proxy={})".format(file,text,coupon_text,proxy))
	utils.writeCSV(text,CODE_CSV)
	return True
Ejemplo n.º 6
0
def main():
    data = preprocessing.readData()
    sil_values_ag1 = []
    db_values_ag2 = []
    cr_values_ag3 = []

    for i in range(2, 22):
        # Hierarquico
        hier = hierarchical(n_clusters=i, linkage='complete').fit(data)
        sil_values_ag1.append(scoreSil(data, hier.labels_))
        db_values_ag2.append(scoreDB(data, hier.labels_))
        # cr_values_ag3.append(scoreCR(targets, hier.labels_))
    writeCSV("sil_all.csv", sil_values_ag)
    writeCSV("db_all.csv", db_values_ag)
    writeCSV("cr_all.csv", cr_values_ag)
Ejemplo n.º 7
0
# Generate phrasers for multiwords
log(INFO, 'Generating phrasers')
bigramPhraser, trigramPhraser = generatePhrasers(texts,
                                                 min_count=5,
                                                 threshold=50)

# Generate multiword n-grams
log(INFO, "Generating multiword n-grams")
texts = makeNGrgrams(texts, bigramPhraser, trigramPhraser)

# Parse all multiword terms from wordnet
log(INFO, "Parsing wordnet multiterms into dictionary")
wordnetMultitermDict = readMultiterms()

# Match multiword terms from wordnet
log(INFO, "Matching wordnet multiterms")
texts = applyWordnetMultiterms(texts, wordnetMultitermDict)

# Lemmatize the texts and filter for allowed token tags
log(INFO, "Lemmatizing")
texts = lemmatize(texts, allowed_token_tags=['NNP', 'NNS', 'NN'])

# Write preprocessed texts to a file
file_path = 'models/lemmatized.csv'.format(model_name)

log(INFO, "Writing lemmatized abstracts to file {}".format(file_path))
writeCSV(file_path, list(zip(ids, texts)))

log(INFO, "Finished")
Ejemplo n.º 8
0
    def createSets(self, dats):

        self.logger.log('Creating or cleaning output directory ' +
                        self.configuration['exportDir'])
        if os.path.exists(self.configuration['exportDir']):
            for file in os.listdir(
                    os.path.join(self.configuration['exportDir'])):
                fullPath = os.path.join(self.configuration['exportDir'], file)
                shutil.rmtree(fullPath) if os.path.isdir(
                    fullPath) else os.remove(fullPath)
        else:
            os.makedirs(self.configuration['exportDir'])

        dryRun = True if self.configuration['dryRun'] == '1' else False
        useGenreSubFolder = True if self.configuration[
            'genreSubFolders'] == '1' else False
        scrapeImages = True if self.configuration[
            'useImages'] == '1' and self.configuration['images'] else False

        CSVs, gamelists, roots = dict(), dict(), dict()
        header = "Genre;Name (mame description);Rom name;Year;Manufacturer;Hardware;Comments;Notes\n"

        # init CSVS
        CSVs[self.setKey] = open(os.path.join(self.configuration['exportDir'],
                                              self.setKey + ".csv"),
                                 "w",
                                 encoding="utf-8")
        CSVs[self.setKey].write(header)
        # init gamelists
        roots[self.setKey] = etree.Element("datafile")
        roots[self.setKey].append(dats[self.setKey + "Header"])
        os.makedirs(os.path.join(self.configuration['exportDir'], self.setKey))
        os.makedirs(
            os.path.join(self.configuration['exportDir'], self.setKey,
                         'downloaded_images')) if scrapeImages else None
        gamelists[self.setKey] = gamelist.initWrite(
            os.path.join(self.configuration['exportDir'], self.setKey))

        for genre in self.favorites.keys():
            self.logger.log("Handling genre " + genre)

            if useGenreSubFolder:
                os.makedirs(
                    os.path.join(self.configuration['exportDir'], self.setKey,
                                 genre))
                if scrapeImages:
                    gamelist.writeGamelistFolder(gamelists[self.setKey], genre,
                                                 genre + '.png')
                    utils.setImageCopy(
                        self.configuration['exportDir'],
                        os.path.join(self.scriptDir, 'data', 'images'),
                        genre + '.png', self.setKey, dryRun)

            # copy bios in each subdirectory
            for bios in self.bioses:
                setBios = os.path.join(self.configuration[self.setKey],
                                       bios + ".zip")
                utils.setFileCopy(self.configuration['exportDir'], setBios,
                                  genre, bios, self.setKey, useGenreSubFolder,
                                  dryRun)
                if os.path.exists(setBios):
                    utils.writeGamelistHiddenEntry(gamelists[self.setKey],
                                                   bios, genre,
                                                   useGenreSubFolder)

            for game in sorted(self.favorites[genre]):

                setRom = os.path.join(self.configuration[self.setKey],
                                      game + ".zip")
                setCHD = os.path.join(self.configuration[self.setKey], game)
                image = self.configuration['imgNameFormat'].replace(
                    '{rom}', game)
                # TODO aliases should be handled here
                utils.setFileCopy(self.configuration['exportDir'], setRom,
                                  genre, game, self.setKey, useGenreSubFolder,
                                  dryRun)
                utils.setCHDCopy(self.configuration['exportDir'], setCHD,
                                 genre, game, self.setKey, useGenreSubFolder,
                                 dryRun)
                utils.writeCSV(CSVs[self.setKey], game, None, genre,
                               dats[self.setKey], None, self.setKey)
                utils.writeGamelistEntry(gamelists[self.setKey], game, image,
                                         dats[self.setKey], genre,
                                         useGenreSubFolder, None, self.setKey,
                                         None)
                roots[self.setKey].append(
                    dats[self.setKey][game].node) if game in dats[
                        self.setKey] else None
                if scrapeImages:
                    utils.setImageCopy(self.configuration['exportDir'],
                                       self.configuration['images'], image,
                                       self.setKey, dryRun)

                self.logger.log(setRom)

        # writing and closing everything
        treeSet = etree.ElementTree(roots[self.setKey])
        treeSet.write(os.path.join(self.configuration['exportDir'],
                                   self.setKey + ".dat"),
                      xml_declaration=True,
                      encoding="utf-8")
        CSVs[self.setKey].close()
        gamelist.closeWrite(gamelists[self.setKey])
Ejemplo n.º 9
0
    phase_vol = mainpath + "Phase_vol_tbl.txt"      # redundant
    trace_main = mainpath + "Trace_main_tbl.txt"
  
    inputpaths = (
        phase_main,
        phase_mass,
        phase_vol,
        solid_comp,
        system_main,
        trace_main,
        bulk_comp
    )
    inputnames = (
        'phase_main',
        'phase_mass',
        'phase_vol',
        'solid_comp',
        'system_main',
        'trace_main',
        'bulk_comp'
    )

    inputfiles = dict(zip(inputnames, inputpaths))
    
    Data = extractData(inputfiles)

    writeCSV(Data, outputpath)

    # Cleaning the directory
    moveTables(mainpath, outputpath)
Ejemplo n.º 10
0
    def __createSets(self, allTests, dats):

        self.logger.log('Creating or cleaning output directory ' +
                        self.configuration['exportDir'])
        if os.path.exists(self.configuration['exportDir']):
            for file in os.listdir(
                    os.path.join(self.configuration['exportDir'])):
                fullPath = os.path.join(self.configuration['exportDir'], file)
                shutil.rmtree(fullPath) if os.path.isdir(
                    fullPath) else os.remove(fullPath)
        else:
            os.makedirs(self.configuration['exportDir'])

        notInAnySet = []
        onlyInOneSet = dict()
        dryRun = True if self.configuration['dryRun'] == '1' else False
        useGenreSubFolder = True if self.configuration[
            'genreSubFolders'] == '1' else False
        keepNotTested = True if self.configuration[
            'keepNotTested'] == '1' else False
        keepLevel = int(self.configuration['keepLevel'])
        usePreferedSetForGenre = True if self.configuration[
            'usePreferedSetForGenre'] == '1' else False
        scrapeImages = True if self.configuration[
            'useImages'] == '1' and self.configuration['images'] else False

        scoreSheet = open(os.path.join(self.configuration['exportDir'],
                                       "scoreSheet.csv"),
                          "w",
                          encoding="utf-8")
        scoreSheet.write('rom;' + ';'.join(
            list(map(lambda key: key + 'Score', self.setKeys[self.hardware])))
                         + '\n')

        CSVs, gamelists, roots = dict(), dict(), dict()
        header = "Status;Genre;Name (mame description);Rom name;Year;Manufacturer;Hardware;Comments;Notes\n"
        for setKey in self.usingSystems:
            # init CSVS
            CSVs[setKey] = open(os.path.join(self.configuration['exportDir'],
                                             setKey + ".csv"),
                                "w",
                                encoding="utf-8")
            CSVs[setKey].write(header)
            # init gamelists
            roots[setKey] = etree.Element("datafile")
            roots[setKey].append(dats[setKey + "Header"])
            os.makedirs(os.path.join(self.configuration['exportDir'], setKey))
            os.makedirs(
                os.path.join(self.configuration['exportDir'], setKey,
                             'downloaded_images')) if scrapeImages else None
            gamelists[setKey] = gamelist.initWrite(
                os.path.join(self.configuration['exportDir'], setKey))

        # get bioses
        if '[BIOSES]' in self.favorites.keys():
            self.bioses = self.favorites['[BIOSES]']
            del self.favorites['[BIOSES]']

        for genre in self.favorites.keys():
            self.logger.log("Handling genre " + genre)

            if useGenreSubFolder:
                for setKey in self.usingSystems:
                    os.makedirs(
                        os.path.join(self.configuration['exportDir'], setKey,
                                     genre))
                    if scrapeImages:
                        gamelist.writeGamelistFolder(gamelists[setKey], genre,
                                                     genre + '.png')
                        utils.setImageCopy(
                            self.configuration['exportDir'],
                            os.path.join(self.scriptDir, 'data', 'images'),
                            genre + '.png', setKey, dryRun)

            # copy bios in each subdirectory
            for bios in self.bioses:
                for setKey in self.usingSystems:
                    setBios = os.path.join(self.configuration[setKey],
                                           bios + ".zip")
                    utils.setFileCopy(self.configuration['exportDir'], setBios,
                                      genre, bios, setKey, useGenreSubFolder,
                                      dryRun)
                    if os.path.exists(setBios):
                        utils.writeGamelistHiddenEntry(gamelists[setKey], bios,
                                                       genre,
                                                       useGenreSubFolder)

            for favs in sorted(self.favorites[genre]):
                # needed to handle multi names games
                if ';' in favs:
                    games = favs.split(';')
                else:
                    games = [favs]

                multiGameFoundInSet = False
                for game in games:
                    audit = game + " -> "
                    scores = dict()
                    testForGame = allTests[game] if game in allTests else None

                    for setKey in self.setKeys[self.hardware]:
                        scores[setKey] = self.__computeScore(
                            setKey, self.configuration[setKey], game,
                            testForGame) if setKey in self.usingSystems else -2

                    audit = audit + " SCORES: " + \
                        " ".join(list(map(lambda key: str(scores[key]), self.setKeys[self.hardware]))) + " ,"
                    scoreSheet.write(game + ';' + ';'.join(
                        list(
                            map(lambda key: str(scores[key]), self.setKeys[
                                self.hardware]))) + '\n')

                    selected = []
                    for setKey in self.usingSystems:
                        selected.append(setKey) if self.__keepSet(
                            keepNotTested, usePreferedSetForGenre,
                            self.configuration['exclusionType'], keepLevel,
                            scores, setKey, genre, selected) else None

                    audit = audit + " SELECTED: " + str(selected)

                    for setKey in self.usingSystems:
                        setRom = os.path.join(self.configuration[setKey],
                                              game + ".zip")
                        setCHD = os.path.join(self.configuration[setKey], game)
                        image = self.configuration['imgNameFormat'].replace(
                            '{rom}', game)
                        if setKey in selected:
                            multiGameFoundInSet = True
                            utils.setFileCopy(self.configuration['exportDir'],
                                              setRom, genre, game, setKey,
                                              useGenreSubFolder, dryRun)
                            utils.setCHDCopy(self.configuration['exportDir'],
                                             setCHD, genre, game, setKey,
                                             useGenreSubFolder, dryRun)
                            utils.writeCSV(CSVs[setKey], game, scores[setKey],
                                           genre, dats[setKey], testForGame,
                                           setKey)
                            testStatus = self.getStatus(testForGame[setKey].status) \
                                if testForGame is not None and setKey in testForGame else 'UNTESTED & FRESHLY ADDED'
                            utils.writeGamelistEntry(gamelists[setKey], game,
                                                     image, dats[setKey],
                                                     genre, useGenreSubFolder,
                                                     testForGame, setKey,
                                                     testStatus)
                            roots[setKey].append(
                                dats[setKey]
                                [game].node) if game in dats[setKey] else None
                            if scrapeImages:
                                utils.setImageCopy(
                                    self.configuration['exportDir'],
                                    self.configuration['images'], image,
                                    setKey, dryRun)
                    # Works only if most recent game is first in line (raidendx;raidndx not the opposite)
                    if len(selected) == 0 and not multiGameFoundInSet:
                        notInAnySet.append(game)
                    elif len(selected) == 1:
                        if selected[0] not in onlyInOneSet:
                            onlyInOneSet[selected[0]] = []
                        onlyInOneSet[selected[0]].append(game)

                    self.logger.log("    " + audit)

        # writing and closing everything
        for setKey in self.usingSystems:
            treeSet = etree.ElementTree(roots[setKey])
            treeSet.write(os.path.join(self.configuration['exportDir'],
                                       setKey + ".dat"),
                          xml_declaration=True,
                          encoding="utf-8")
            CSVs[setKey].close()
            gamelist.closeWrite(gamelists[setKey])

        scoreSheet.close()

        self.logger.log("\n<------------------ RESULTS ------------------>")
        self.logger.log("NOT FOUND IN ANY SET : " + str(len(notInAnySet)),
                        self.logger.WARNING)
        self.logger.log(" ".join(notInAnySet), self.logger.WARNING)