json.dump(args, f)

    pprint(args)

    # N_relationships = len(relationships.relationships)
    replacement_column_index = args['sequence_length'] / 2

    rng = np.random.RandomState(args['random_seed'])
    data_rng = np.random.RandomState(args['random_seed'])
    validation_rng = np.random.RandomState(args['random_seed'] + 1)
    random.seed(args['random_seed'])


    # set up syntactic
    ngram_reader = NgramReader(args['ngram_filename'], vocab_size=args['ngram_vocab_size'], train_proportion=args['train_proportion'], test_proportion=args['test_proportion'])
    testing_block = ngram_reader.testing_block()
    print 'corpus contains %i ngrams' % (ngram_reader.number_of_ngrams)

    # set up semantic
    # num_semantic_training = int(relationships.N * 0.98)
    # semantic_training = relationships.data[:num_semantic_training]
    # semantic_testing = relationships.data[num_semantic_training:]

    relationship_path = join(base_dir, 'relationships.pkl.gz')
    vocabulary_path = join(base_dir, 'vocabulary.pkl.gz')
    try:
        with gzip.open(relationship_path) as f:
            relationships = cPickle.load(f)
        print 'loaded relationships from %s' % relationship_path
    except:
        # relationships = Relationships()
예제 #2
0
        args['base_dir'] = base_dir
    else:
        model_loaded = False
        # dump the params
        with open(os.path.join(args['base_dir'], 'params.json'), 'w') as f:
            json.dump(args, f)

    pprint(args)

    replacement_column_index = args['sequence_length'] / 2

    ngram_reader = NgramReader(args['ngram_filename'],
                               vocab_size=args['vocab_size'],
                               train_proportion=args['train_proportion'],
                               test_proportion=args['test_proportion'])
    testing_block = ngram_reader.testing_block()
    vocabulary = ngram_reader.word_array
    print 'corpus contains %i ngrams' % (ngram_reader.number_of_ngrams)

    rng = np.random.RandomState(args['random_seed'])
    data_rng = np.random.RandomState(args['random_seed'])
    validation_rng = np.random.RandomState(args['random_seed'] + 1)
    random.seed(args['random_seed'])

    if not args['dont_run_semantic']:
        print 'loading semantic similarities'
        word_similarity = semantic_module.WordSimilarity(
            vocabulary,
            args['word_similarity_file'],
            memmap_filename=args['word_similarity_memmap'])
        print 'computing terms with semantic distance'