Python SonicScrewdriver.pairtreefile Exemples

Langage de programmation: Python

Class/Type: SonicScrewdriver

Méthode/Fonction: pairtreefile

Exemples au hotexamples.com: 8

Python SonicScrewdriver.pairtreefile - 8 exemples trouvés. Ce sont les exemples réels les mieux notés de SonicScrewdriver.pairtreefile extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

clean_pairtree(23)

pairtreelabel(17)

readtsv(17)

addtodict(10)

sortkeysbyvalue(8)

dirty_pairtree(7)

pairtreefile(5)

pairtreepath(5)

simple_date(5)

add_dicts(3)

date_row(3)

infer_date(2)

appendtodict(1)

get_tagset(1)

sortvaluesbykey(1)

writetsv(1)

Méthodes fréquemment utilisées

clean_pairtree (23)

pairtreelabel (17)

readtsv (17)

addtodict (10)

sortkeysbyvalue (8)

dirty_pairtree (7)

pairtreefile (5)

pairtreepath (5)

simple_date (5)

add_dicts (3)

Méthodes fréquemment utilisées

date_row (3)

infer_date (2)

appendtodict (1)

get_tagset (1)

sortvaluesbykey (1)

writetsv (1)

Exemple #1

0

Afficher le fichier

Fichier : GenerateCotrainingSet.py Projet : tedunderwood/HathiGenreTrainingset

# Generate Cotraining Set # This script uses a set of volumes already classified and sorted by a model # in order to generate additional training data for a new model. import SonicScrewdriver as utils from shutil import copyfile indices, columns, metadata = utils.readtsv("/Volumes/TARDIS/work/cotrain/sortedcotrain.tsv") toget = indices[-200:] toget = [utils.pairtreefile(x) for x in toget] genredir = "/Volumes/TARDIS/work/cotrain/top200/genremaps/" featuredir = "/Volumes/TARDIS/work/cotrain/top200/pagefeatures/" for htid in toget: featuresource = "/Volumes/TARDIS/work/cotrain/pagefeatures/" + htid + ".pg.tsv" featuredestination = "/Volumes/TARDIS/work/cotrain/top200/pagefeatures/" + htid + ".pg.tsv" copyfile(featuresource, featuredestination) genresource = "/Volumes/TARDIS/work/cotrain/predictions/" + htid + ".predict" genredestination = "/Volumes/TARDIS/work/cotrain/top200/genremaps/" + htid + ".map" with open(genresource, mode="r", encoding = "utf-8") as f: filelines = f.readlines() with open(genredestination, mode="w", encoding = "utf-8") as f: for line in filelines: line = line.rstrip()

Exemple #2

0

Afficher le fichier

Fichier : findmatches.py Projet : afcarl/HathiGenreTrainingset

import os import SonicScrewdriver as utils folder = "/Users/tunder/Dropbox/pagedata/thirdfeatures/pagefeatures/" files = os.listdir(folder) validfiles = set() for filename in files: if not filename.startswith(".") and len(filename) > 7: filename = filename[:-7] validfiles.add(filename) otherfolder = "/Volumes/TARDIS/output/slices/" slices = os.listdir(otherfolder) slicefiles = set() for aslice in slices: if aslice.startswith("."): continue with open(otherfolder + aslice, encoding="utf-8") as f: fl = f.readlines() for line in fl: line = line.rstrip() line = utils.pairtreefile(line) slicefiles.add(line) print(slicefiles.intersection(validfiles))

Exemple #3

0

Afficher le fichier

import sys, os import SonicScrewdriver as utils from shutil import copyfile with open("/Users/tunder/Dropbox/pagedata/activelearn/sourcefile.txt", mode="r", encoding="utf-8") as f: filelines = f.readlines() files = list() for line in filelines: files.append(line.rstrip()) with open("/Users/tunder/Dropbox/pagedata/activelearn/learn1.arff", mode="w", encoding="utf-8") as f: f.write("% List of files in associated folder.\n") f.write("% Does not really use arff format.\n") f.write("\n") f.write("@RELATION learn1\n\n") f.write("@ATTRIBUTE htid string\n") f.write("@ATTRIBUTE endpg numeric\n") f.write("@ATTRIBUTE startpgpart numeric\n") f.write("@ATTRIBUTE endpgpart numeric\n") f.write("@ATTRIBUTE probability numeric\n") f.write("\n") for afile in files: outline = utils.pairtreefile(afile) + ",0,0,0,0,0\n" f.write(outline) sourcepath = "/Volumes/TARDIS/work/cotrain/texts/" + utils.pairtreefile(afile) + ".norm.txt" destination = "/Users/tunder/Dropbox/pagedata/activelearn/" + utils.pairtreefile(afile) + ".txt" copyfile(sourcepath, destination)

Exemple #4

0

Afficher le fichier

Fichier : better_metadata_maker.py Projet : tedunderwood/GenreProject

if os.path.isfile(filepath): tokencount, wordcount = count_words(filepath) else: print("Missing file: " + filepath) sys.exit(0) newrow = [idcode, date, tokencount, wordcount, author, title] outtable.append(newrow) print(counter) counter += 1 rows, columns, table = utils.readtsv('/Users/tunder/Dropbox/GenreProject/metadata/topicmodelingsample.tsv') sourcedir = "/Volumes/TARDIS/work/moneytexts/" for row in rows: filename = utils.pairtreefile(row) + ".fic.txt" filepath = os.path.join(sourcedir, filename) if os.path.isfile(filepath): tokencount, wordcount = count_words(filepath) else: print("Missing file: " + filepath) sys.exit(0) idcode = table["HTid"][row] date = str(utils.simple_date(row, table)) author = table["author"][row] title = table["title"][row] newrow = [idcode, date, tokencount, wordcount, author, title] outtable.append(newrow) print(counter) counter += 1

Exemple #5

0

Afficher le fichier

Fichier : enrichmetadata.py Projet : afcarl/HathiGenreTrainingset

# I'm not repeating these columns, because the first is not useful and the second # is not reliable. outrow = [htid] for column in columns[1:]: if column not in columns_to_exclude: outrow.append(table[column][dirtyhtid]) return outrow metadata_path = '/Volumes/TARDIS/work/metadata/MergedMonographs.tsv' rows, columns, table = utils.readtsv(metadata_path) indextorows = dict() for row in rows: cleanid = utils.pairtreefile(row) newrow = make_row(cleanid, row, columns, table) indextorows[cleanid] = newrow for genreabbrev, genre in genrenames.items(): print(genre) genrepath = os.path.join(rootpath, genre) volsinsubset = list() # Because there are some volumes in the metadata that weren't # included in the 95-percent subset. Those won't be present # as files, and shouldn't be carried forward to the next stage. metadataforgenre = dict()

Exemple #6

0

Afficher le fichier

Fichier : better_metadata_maker.py Projet : tedunderwood/GenreProject

tokencount, wordcount = count_words(filepath) else: print("Missing file: " + filepath) sys.exit(0) newrow = [idcode, date, tokencount, wordcount, author, title] outtable.append(newrow) print(counter) counter += 1 rows, columns, table = utils.readtsv( '/Users/tunder/Dropbox/GenreProject/metadata/topicmodelingsample.tsv') sourcedir = "/Volumes/TARDIS/work/moneytexts/" for row in rows: filename = utils.pairtreefile(row) + ".fic.txt" filepath = os.path.join(sourcedir, filename) if os.path.isfile(filepath): tokencount, wordcount = count_words(filepath) else: print("Missing file: " + filepath) sys.exit(0) idcode = table["HTid"][row] date = str(utils.simple_date(row, table)) author = table["author"][row] title = table["title"][row] newrow = [idcode, date, tokencount, wordcount, author, title] outtable.append(newrow) print(counter) counter += 1

Exemple #7

0

Afficher le fichier

Fichier : GenerateCotrainingSet.py Projet : afcarl/HathiGenreTrainingset

# Generate Cotraining Set # This script uses a set of volumes already classified and sorted by a model # in order to generate additional training data for a new model. import SonicScrewdriver as utils from shutil import copyfile indices, columns, metadata = utils.readtsv( "/Volumes/TARDIS/work/cotrain/sortedcotrain.tsv") toget = indices[-200:] toget = [utils.pairtreefile(x) for x in toget] genredir = "/Volumes/TARDIS/work/cotrain/top200/genremaps/" featuredir = "/Volumes/TARDIS/work/cotrain/top200/pagefeatures/" for htid in toget: featuresource = "/Volumes/TARDIS/work/cotrain/pagefeatures/" + htid + ".pg.tsv" featuredestination = "/Volumes/TARDIS/work/cotrain/top200/pagefeatures/" + htid + ".pg.tsv" copyfile(featuresource, featuredestination) genresource = "/Volumes/TARDIS/work/cotrain/predictions/" + htid + ".predict" genredestination = "/Volumes/TARDIS/work/cotrain/top200/genremaps/" + htid + ".map" with open(genresource, mode="r", encoding="utf-8") as f: filelines = f.readlines() with open(genredestination, mode="w", encoding="utf-8") as f: for line in filelines:

Exemple #8

0

Afficher le fichier

Fichier : findmatches.py Projet : tedunderwood/HathiGenreTrainingset

import os import SonicScrewdriver as utils folder = "/Users/tunder/Dropbox/pagedata/thirdfeatures/pagefeatures/" files = os.listdir(folder) validfiles = set() for filename in files: if not filename.startswith(".") and len(filename) > 7: filename = filename[:-7] validfiles.add(filename) otherfolder = "/Volumes/TARDIS/output/slices/" slices = os.listdir(otherfolder) slicefiles = set() for aslice in slices: if aslice.startswith("."): continue with open(otherfolder + aslice, encoding="utf-8") as f: fl = f.readlines() for line in fl: line = line.rstrip() line = utils.pairtreefile(line) slicefiles.add(line) print(slicefiles.intersection(validfiles))