コード例 #1
0
def test_save_load():
    c, docs = mock_corpus()
    fd, filename = tempfile.mkstemp()
    dict_fd, dict_filename = tempfile.mkstemp()
    metadata_fd, metadata_filename = tempfile.mkstemp()
    try:
        f = None
        dict_f = None
        try:
            f = os.fdopen(fd, 'wb')
            dict_f = os.fdopen(dict_fd, 'wb')
            c.save(documents_file=f, dictionary_file=dict_f,
                   metadata_filename=metadata_filename)
        finally:
            if f is not None:
                f.close()
            if dict_f is not None:
                dict_f.close()

        new_c = Corpus.load(
            documents_file=filename,
            dictionary_file=dict_filename,
            metadata_filename=metadata_filename)
        assert_equals(c.documents, new_c.documents)
        assert_true(all(c.metadata == new_c.metadata))
        assert_equals(c.dic, new_c.dic)
    finally:
        os.remove(filename)
        os.remove(dict_filename)
コード例 #2
0
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
from corpora import Corpus


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="load files of scala file and convert it to a python " "corpus using a dictionary"
    )
    parser.add_argument("scala_file", help="python pickle file, containing tokens and metadata")
    parser.add_argument("dictionary")
    parser.add_argument("corpus")
    args = parser.parse_args()

    print("loading scala_file")
    corpus = Corpus.load(scala_file=args.scala_file, dictionary_file=args.dictionary)

    print("writing corpus to file")
    corpus.save(documents_file=args.corpus)