def stanza_lem_model( model: ModelOutput = ModelOutput("stanza/lem/sv_suc_lemmatizer.pt")): """Download and unzip the Stanza POS-tagging model.""" zip_model = Model("stanza/lem/synt_stanza_full.zip") zip_model.download( "https://svn.spraakdata.gu.se/sb-arkiv/pub/stanza/lem_stanza.zip") zip_model.unzip() zip_model.remove()
def stanza_dep_model( model: ModelOutput = ModelOutput("stanza/dep/sv_talbanken_parser.pt"), pretrain: ModelOutput = ModelOutput( "stanza/dep/sv_talbanken.pretrain.pt")): """Download and unzip the Stanza dependency model.""" zip_model = Model("stanza/dep/synt_stanza_full.zip") zip_model.download( "https://svn.spraakdata.gu.se/sb-arkiv/pub/stanza/synt_stanza_full.zip" ) zip_model.unzip() zip_model.remove()
def stanza_pos_model(model: ModelOutput = ModelOutput( "stanza/pos/full_sv_talbanken_tagger.pt"), pretrain: ModelOutput = ModelOutput( "stanza/pos/full_sv_talbanken.pretrain.pt")): """Download and unzip the Stanza POS-tagging model.""" zip_model = Model("stanza/pos/synt_stanza_full.zip") zip_model.download( "https://svn.spraakdata.gu.se/sb-arkiv/pub/stanza/morph_stanza_full.zip" ) zip_model.unzip() zip_model.remove()
def build_model(out: ModelOutput = ModelOutput("sensaldo/sensaldo.pickle")): """Download and build SenSALDO model.""" # Download and extract sensaldo-base-v02.txt zip_model = Model("sensaldo/sensaldo-v02.zip") zip_model.download( "https://svn.spraakdata.gu.se/sb-arkiv/pub/lexikon/sensaldo/sensaldo-v02.zip" ) zip_model.unzip() tsv_model = Model("sensaldo/sensaldo-base-v02.txt") # Read sensaldo tsv dictionary and save as a pickle file lexicon = read_sensaldo(tsv_model) out.write_pickle(lexicon) # Clean up zip_model.remove() tsv_model.remove() Model("sensaldo/sensaldo-fullform-v02.txt").remove()
def build_model(out: ModelOutput = ModelOutput("geo/geo.pickle")): """Download and build geo model.""" # Download and extract cities1000.txt cities_zip = Model("geo/cities1000.zip") cities_zip.download("http://download.geonames.org/export/dump/cities1000.zip") cities_zip.unzip() # Download and extract alternateNames.txt names_zip = Model("geo/alternateNames.zip") names_zip.download("http://download.geonames.org/export/dump/alternateNames.zip") names_zip.unzip() pickle_model(Model("geo/cities1000.txt"), Model("geo/alternateNames.txt"), out) # Clean up cities_zip.remove() names_zip.remove() Model("geo/iso-languagecodes.txt").remove() Model("geo/cities1000.txt").remove() Model("geo/alternateNames.txt").remove()