def test_save_load(): c, docs = mock_corpus() fd, filename = tempfile.mkstemp() dict_fd, dict_filename = tempfile.mkstemp() metadata_fd, metadata_filename = tempfile.mkstemp() try: f = None dict_f = None try: f = os.fdopen(fd, 'wb') dict_f = os.fdopen(dict_fd, 'wb') c.save(documents_file=f, dictionary_file=dict_f, metadata_filename=metadata_filename) finally: if f is not None: f.close() if dict_f is not None: dict_f.close() new_c = Corpus.load( documents_file=filename, dictionary_file=dict_filename, metadata_filename=metadata_filename) assert_equals(c.documents, new_c.documents) assert_true(all(c.metadata == new_c.metadata)) assert_equals(c.dic, new_c.dic) finally: os.remove(filename) os.remove(dict_filename)
# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse from corpora import Corpus if __name__ == "__main__": parser = argparse.ArgumentParser( description="load files of scala file and convert it to a python " "corpus using a dictionary" ) parser.add_argument("scala_file", help="python pickle file, containing tokens and metadata") parser.add_argument("dictionary") parser.add_argument("corpus") args = parser.parse_args() print("loading scala_file") corpus = Corpus.load(scala_file=args.scala_file, dictionary_file=args.dictionary) print("writing corpus to file") corpus.save(documents_file=args.corpus)