def swefn_model( out: ModelOutput = ModelOutput("lexical_classes/swefn.pickle")): """Download and build SweFN model.""" # Download swefn.xml and build swefn.pickle raw_file = Model("lexical_classes/swefn.xml") raw_file.download( "https://svn.spraakdata.gu.se/sb-arkiv/pub/lmf/swefn/swefn.xml") lexicon = read_swefn(raw_file.path) out.write_pickle(lexicon) # Clean up raw_file.remove()
def build_model(out: ModelOutput = ModelOutput("sensaldo/sensaldo.pickle")): """Download and build SenSALDO model.""" # Download and extract sensaldo-base-v02.txt zip_model = Model("sensaldo/sensaldo-v02.zip") zip_model.download( "https://svn.spraakdata.gu.se/sb-arkiv/pub/lexikon/sensaldo/sensaldo-v02.zip" ) zip_model.unzip() tsv_model = Model("sensaldo/sensaldo-base-v02.txt") # Read sensaldo tsv dictionary and save as a pickle file lexicon = read_sensaldo(tsv_model) out.write_pickle(lexicon) # Clean up zip_model.remove() tsv_model.remove() Model("sensaldo/sensaldo-fullform-v02.txt").remove()
def blingbring_model( out: ModelOutput = ModelOutput("lexical_classes/blingbring.pickle")): """Download and build Blingbring model.""" # Download roget hierarchy classmap = Model("lexical_classes/roget_hierarchy.xml") classmap.download( "https://github.com/spraakbanken/sparv-models/raw/master/lexical_classes/roget_hierarchy.xml" ) # Download blingbring.txt and build blingbring.pickle raw_file = Model("lexical_classes/blingbring.txt") raw_file.download( "https://svn.spraakdata.gu.se/sb-arkiv/pub/lexikon/bring/blingbring.txt" ) lexicon = read_blingbring(raw_file.path, classmap.path) out.write_pickle(lexicon) # Clean up raw_file.remove() classmap.remove()
def build_diapivot(out: ModelOutput = ModelOutput("hist/diapivot.pickle")): """Download diapivot XML dictionary and save as a pickle file.""" # Download diapivot.xml xml_model = Model("hist/diapivot.xml") xml_model.download( "https://svn.spraakdata.gu.se/sb-arkiv/pub/lmf/diapivot/diapivot.xml") # Create pickle file xml_lexicon = read_xml(xml_model.path) log.info("Saving cross lexicon in Pickle format") picklex = {} for lem in xml_lexicon: lemgrams = [] for saldo, match in list(xml_lexicon[lem].items()): lemgrams.append(PART_DELIM1.join([saldo, match])) picklex[lem] = sorted(lemgrams) out.write_pickle(picklex) # Clean up xml_model.remove()