def mainClassification(): folds = 10 n_classifiers = classifiers.len_classify() cm, acc = [], [] result_cm, result_acc = [0] * folds, [0] * folds for i in range(folds): result_cm[i], result_acc[i] = [0] * n_classifiers, [0] * n_classifiers data = preprocessing.readData() samples, targets = preprocessing.splitSamples(data) kfold = preprocessing.crossValidation(folds) for train, test in kfold.split(samples, targets): X_train, X_test = samples[train], samples[test] y_train, y_test = targets[train], targets[test] for i in range(n_classifiers): y_pred = classifiers.classify(X_train, y_train, X_test, i) fold_cm, fold_acc = classifiers.mensureAcc(y_pred, y_test) cm.append(fold_cm) acc.append(fold_acc) for j in range(n_classifiers): for k in range(folds): result_cm[k][j] = cm[k * n_classifiers + j] result_acc[k][j] = acc[k * n_classifiers + j] utils.writeCSV("all_classifiers_confusion-matrix.csv", result_cm) utils.writeCSV("all_classifiers_accuracy.csv", result_acc) print("Finish!!\n")
def mainEnsemble(): n_estimators = [10, 15, 20] folds = 10 repeats = 3 n_classifiers = classifiers.len_classify() acc = [] result_acc = [0] * folds * repeats for i in range((folds * repeats)): result_acc[i] = [0] * n_classifiers data = preprocessing.readData() samples, targets = preprocessing.splitSamples(data) for num in n_estimators: kfold = preprocessing.repeatCrossValidation(folds, repeats) for train, test in kfold.split(samples, targets): X_train, X_test = samples[train], samples[test] y_train, y_test = targets[train], targets[test] for i in range(n_classifiers): y_pred = ensemble.runBagging(X_train, y_train, X_test, i, num) fold_cm, fold_acc = classifiers.mensureAcc(y_pred, y_test) acc.append(fold_acc) for j in range(n_classifiers): for k in range(folds): result_acc[k][j] = acc[k * n_classifiers + j] file_name = "bagging" + str(n_estimators) + ".csv" utils.writeCSV(file_name, result_acc) print("Finish!!\n")
def updateConfig(self, event): """ Update config file with current VS settings """ logging.debug(event) self.appConfig[0]["fishScreenStartX"] = self.vsX self.appConfig[0]["fishScreenStartY"] = self.vsY self.appConfig[0]["fishScreenWidth"] = self.vsWidth self.appConfig[0]["fishScreenHeight"] = self.vsHeight writeCSV(constants.APP_CONFIG_FILE, self.appConfig[0]) self.printVirtualScreenData() logging.debug("Config file updates successfully")
def updateConfig(event): """ Update config file with current VS settings """ global appConfig logging.debug(event) appConfig[0]["fishScreenStartX"] = vsX appConfig[0]["fishScreenStartY"] = vsY appConfig[0]["fishScreenWidth"] = vsWidth appConfig[0]["fishScreenHeight"] = vsHeight writeCSV(APP_CONFIG_FILE, appConfig[0]) printVirtualScreenData() logging.debug("Config file updates successfully")
def process_survay(text,typeID,file): if utils.codeIsExists(text,CODE_CSV): logger.info("detected code for {}: {} is already submitted".format(file,text)) return True # devide text into chunk of 5 numbers text_partition = utils.divideString(text) # automate task of survay coupon_text, proxy = maskIP(text_partition,typeID) logger.info("detected code for {}: {}, {} (proxy={})".format(file,text,coupon_text,proxy)) utils.writeCSV(text,CODE_CSV) return True
def main(): data = preprocessing.readData() sil_values_ag1 = [] db_values_ag2 = [] cr_values_ag3 = [] for i in range(2, 22): # Hierarquico hier = hierarchical(n_clusters=i, linkage='complete').fit(data) sil_values_ag1.append(scoreSil(data, hier.labels_)) db_values_ag2.append(scoreDB(data, hier.labels_)) # cr_values_ag3.append(scoreCR(targets, hier.labels_)) writeCSV("sil_all.csv", sil_values_ag) writeCSV("db_all.csv", db_values_ag) writeCSV("cr_all.csv", cr_values_ag)
# Generate phrasers for multiwords log(INFO, 'Generating phrasers') bigramPhraser, trigramPhraser = generatePhrasers(texts, min_count=5, threshold=50) # Generate multiword n-grams log(INFO, "Generating multiword n-grams") texts = makeNGrgrams(texts, bigramPhraser, trigramPhraser) # Parse all multiword terms from wordnet log(INFO, "Parsing wordnet multiterms into dictionary") wordnetMultitermDict = readMultiterms() # Match multiword terms from wordnet log(INFO, "Matching wordnet multiterms") texts = applyWordnetMultiterms(texts, wordnetMultitermDict) # Lemmatize the texts and filter for allowed token tags log(INFO, "Lemmatizing") texts = lemmatize(texts, allowed_token_tags=['NNP', 'NNS', 'NN']) # Write preprocessed texts to a file file_path = 'models/lemmatized.csv'.format(model_name) log(INFO, "Writing lemmatized abstracts to file {}".format(file_path)) writeCSV(file_path, list(zip(ids, texts))) log(INFO, "Finished")
def createSets(self, dats): self.logger.log('Creating or cleaning output directory ' + self.configuration['exportDir']) if os.path.exists(self.configuration['exportDir']): for file in os.listdir( os.path.join(self.configuration['exportDir'])): fullPath = os.path.join(self.configuration['exportDir'], file) shutil.rmtree(fullPath) if os.path.isdir( fullPath) else os.remove(fullPath) else: os.makedirs(self.configuration['exportDir']) dryRun = True if self.configuration['dryRun'] == '1' else False useGenreSubFolder = True if self.configuration[ 'genreSubFolders'] == '1' else False scrapeImages = True if self.configuration[ 'useImages'] == '1' and self.configuration['images'] else False CSVs, gamelists, roots = dict(), dict(), dict() header = "Genre;Name (mame description);Rom name;Year;Manufacturer;Hardware;Comments;Notes\n" # init CSVS CSVs[self.setKey] = open(os.path.join(self.configuration['exportDir'], self.setKey + ".csv"), "w", encoding="utf-8") CSVs[self.setKey].write(header) # init gamelists roots[self.setKey] = etree.Element("datafile") roots[self.setKey].append(dats[self.setKey + "Header"]) os.makedirs(os.path.join(self.configuration['exportDir'], self.setKey)) os.makedirs( os.path.join(self.configuration['exportDir'], self.setKey, 'downloaded_images')) if scrapeImages else None gamelists[self.setKey] = gamelist.initWrite( os.path.join(self.configuration['exportDir'], self.setKey)) for genre in self.favorites.keys(): self.logger.log("Handling genre " + genre) if useGenreSubFolder: os.makedirs( os.path.join(self.configuration['exportDir'], self.setKey, genre)) if scrapeImages: gamelist.writeGamelistFolder(gamelists[self.setKey], genre, genre + '.png') utils.setImageCopy( self.configuration['exportDir'], os.path.join(self.scriptDir, 'data', 'images'), genre + '.png', self.setKey, dryRun) # copy bios in each subdirectory for bios in self.bioses: setBios = os.path.join(self.configuration[self.setKey], bios + ".zip") utils.setFileCopy(self.configuration['exportDir'], setBios, genre, bios, self.setKey, useGenreSubFolder, dryRun) if os.path.exists(setBios): utils.writeGamelistHiddenEntry(gamelists[self.setKey], bios, genre, useGenreSubFolder) for game in sorted(self.favorites[genre]): setRom = os.path.join(self.configuration[self.setKey], game + ".zip") setCHD = os.path.join(self.configuration[self.setKey], game) image = self.configuration['imgNameFormat'].replace( '{rom}', game) # TODO aliases should be handled here utils.setFileCopy(self.configuration['exportDir'], setRom, genre, game, self.setKey, useGenreSubFolder, dryRun) utils.setCHDCopy(self.configuration['exportDir'], setCHD, genre, game, self.setKey, useGenreSubFolder, dryRun) utils.writeCSV(CSVs[self.setKey], game, None, genre, dats[self.setKey], None, self.setKey) utils.writeGamelistEntry(gamelists[self.setKey], game, image, dats[self.setKey], genre, useGenreSubFolder, None, self.setKey, None) roots[self.setKey].append( dats[self.setKey][game].node) if game in dats[ self.setKey] else None if scrapeImages: utils.setImageCopy(self.configuration['exportDir'], self.configuration['images'], image, self.setKey, dryRun) self.logger.log(setRom) # writing and closing everything treeSet = etree.ElementTree(roots[self.setKey]) treeSet.write(os.path.join(self.configuration['exportDir'], self.setKey + ".dat"), xml_declaration=True, encoding="utf-8") CSVs[self.setKey].close() gamelist.closeWrite(gamelists[self.setKey])
phase_vol = mainpath + "Phase_vol_tbl.txt" # redundant trace_main = mainpath + "Trace_main_tbl.txt" inputpaths = ( phase_main, phase_mass, phase_vol, solid_comp, system_main, trace_main, bulk_comp ) inputnames = ( 'phase_main', 'phase_mass', 'phase_vol', 'solid_comp', 'system_main', 'trace_main', 'bulk_comp' ) inputfiles = dict(zip(inputnames, inputpaths)) Data = extractData(inputfiles) writeCSV(Data, outputpath) # Cleaning the directory moveTables(mainpath, outputpath)
def __createSets(self, allTests, dats): self.logger.log('Creating or cleaning output directory ' + self.configuration['exportDir']) if os.path.exists(self.configuration['exportDir']): for file in os.listdir( os.path.join(self.configuration['exportDir'])): fullPath = os.path.join(self.configuration['exportDir'], file) shutil.rmtree(fullPath) if os.path.isdir( fullPath) else os.remove(fullPath) else: os.makedirs(self.configuration['exportDir']) notInAnySet = [] onlyInOneSet = dict() dryRun = True if self.configuration['dryRun'] == '1' else False useGenreSubFolder = True if self.configuration[ 'genreSubFolders'] == '1' else False keepNotTested = True if self.configuration[ 'keepNotTested'] == '1' else False keepLevel = int(self.configuration['keepLevel']) usePreferedSetForGenre = True if self.configuration[ 'usePreferedSetForGenre'] == '1' else False scrapeImages = True if self.configuration[ 'useImages'] == '1' and self.configuration['images'] else False scoreSheet = open(os.path.join(self.configuration['exportDir'], "scoreSheet.csv"), "w", encoding="utf-8") scoreSheet.write('rom;' + ';'.join( list(map(lambda key: key + 'Score', self.setKeys[self.hardware]))) + '\n') CSVs, gamelists, roots = dict(), dict(), dict() header = "Status;Genre;Name (mame description);Rom name;Year;Manufacturer;Hardware;Comments;Notes\n" for setKey in self.usingSystems: # init CSVS CSVs[setKey] = open(os.path.join(self.configuration['exportDir'], setKey + ".csv"), "w", encoding="utf-8") CSVs[setKey].write(header) # init gamelists roots[setKey] = etree.Element("datafile") roots[setKey].append(dats[setKey + "Header"]) os.makedirs(os.path.join(self.configuration['exportDir'], setKey)) os.makedirs( os.path.join(self.configuration['exportDir'], setKey, 'downloaded_images')) if scrapeImages else None gamelists[setKey] = gamelist.initWrite( os.path.join(self.configuration['exportDir'], setKey)) # get bioses if '[BIOSES]' in self.favorites.keys(): self.bioses = self.favorites['[BIOSES]'] del self.favorites['[BIOSES]'] for genre in self.favorites.keys(): self.logger.log("Handling genre " + genre) if useGenreSubFolder: for setKey in self.usingSystems: os.makedirs( os.path.join(self.configuration['exportDir'], setKey, genre)) if scrapeImages: gamelist.writeGamelistFolder(gamelists[setKey], genre, genre + '.png') utils.setImageCopy( self.configuration['exportDir'], os.path.join(self.scriptDir, 'data', 'images'), genre + '.png', setKey, dryRun) # copy bios in each subdirectory for bios in self.bioses: for setKey in self.usingSystems: setBios = os.path.join(self.configuration[setKey], bios + ".zip") utils.setFileCopy(self.configuration['exportDir'], setBios, genre, bios, setKey, useGenreSubFolder, dryRun) if os.path.exists(setBios): utils.writeGamelistHiddenEntry(gamelists[setKey], bios, genre, useGenreSubFolder) for favs in sorted(self.favorites[genre]): # needed to handle multi names games if ';' in favs: games = favs.split(';') else: games = [favs] multiGameFoundInSet = False for game in games: audit = game + " -> " scores = dict() testForGame = allTests[game] if game in allTests else None for setKey in self.setKeys[self.hardware]: scores[setKey] = self.__computeScore( setKey, self.configuration[setKey], game, testForGame) if setKey in self.usingSystems else -2 audit = audit + " SCORES: " + \ " ".join(list(map(lambda key: str(scores[key]), self.setKeys[self.hardware]))) + " ," scoreSheet.write(game + ';' + ';'.join( list( map(lambda key: str(scores[key]), self.setKeys[ self.hardware]))) + '\n') selected = [] for setKey in self.usingSystems: selected.append(setKey) if self.__keepSet( keepNotTested, usePreferedSetForGenre, self.configuration['exclusionType'], keepLevel, scores, setKey, genre, selected) else None audit = audit + " SELECTED: " + str(selected) for setKey in self.usingSystems: setRom = os.path.join(self.configuration[setKey], game + ".zip") setCHD = os.path.join(self.configuration[setKey], game) image = self.configuration['imgNameFormat'].replace( '{rom}', game) if setKey in selected: multiGameFoundInSet = True utils.setFileCopy(self.configuration['exportDir'], setRom, genre, game, setKey, useGenreSubFolder, dryRun) utils.setCHDCopy(self.configuration['exportDir'], setCHD, genre, game, setKey, useGenreSubFolder, dryRun) utils.writeCSV(CSVs[setKey], game, scores[setKey], genre, dats[setKey], testForGame, setKey) testStatus = self.getStatus(testForGame[setKey].status) \ if testForGame is not None and setKey in testForGame else 'UNTESTED & FRESHLY ADDED' utils.writeGamelistEntry(gamelists[setKey], game, image, dats[setKey], genre, useGenreSubFolder, testForGame, setKey, testStatus) roots[setKey].append( dats[setKey] [game].node) if game in dats[setKey] else None if scrapeImages: utils.setImageCopy( self.configuration['exportDir'], self.configuration['images'], image, setKey, dryRun) # Works only if most recent game is first in line (raidendx;raidndx not the opposite) if len(selected) == 0 and not multiGameFoundInSet: notInAnySet.append(game) elif len(selected) == 1: if selected[0] not in onlyInOneSet: onlyInOneSet[selected[0]] = [] onlyInOneSet[selected[0]].append(game) self.logger.log(" " + audit) # writing and closing everything for setKey in self.usingSystems: treeSet = etree.ElementTree(roots[setKey]) treeSet.write(os.path.join(self.configuration['exportDir'], setKey + ".dat"), xml_declaration=True, encoding="utf-8") CSVs[setKey].close() gamelist.closeWrite(gamelists[setKey]) scoreSheet.close() self.logger.log("\n<------------------ RESULTS ------------------>") self.logger.log("NOT FOUND IN ANY SET : " + str(len(notInAnySet)), self.logger.WARNING) self.logger.log(" ".join(notInAnySet), self.logger.WARNING)