def build_assoc_space(input_file, output_dir):
    print('loading')
    counts = defaultdict(int)
    triples = []

    for line in codecs.open(input_file, encoding='utf-8'):
        left, right, value = line.strip().split('\t')
        if not concept_is_bad(left) and not concept_is_bad(right):
            value = float(value)
            triples.append((value, left, right))
            counts[left] += 1
            counts[right] += 1

    print('filtering entries')
    sparse = SparseEntryStorage()
    for (value, left, right) in triples:
        if concept_is_frequent_enough(left,
                                      counts) and concept_is_frequent_enough(
                                          right, counts) and left != right:
            sparse.add_entry((value, left, right))
    del triples

    # Add links from a concept to itself, and negative links to its opposite if it's there
    for concept in counts:
        if concept_is_frequent_enough(concept, counts):
            sparse.add_entry((1., concept, concept))
            negation = negate_concept(concept)
            if concept_is_frequent_enough(negation, counts):
                sparse.add_entry((-1., concept, negation))

    print('making assoc space')
    space = AssocSpace.from_sparse_storage(sparse, 150, offset_weight=4e-5)

    print('saving')
    space.save_dir(output_dir)
def build_assoc_space(input_file, output_dir):
    print('loading')
    counts = defaultdict(int)
    triples = []

    for line in codecs.open(input_file, encoding='utf-8'):
        left, right, value = line.strip().split('\t')[:3]
        if not concept_is_bad(left) and not concept_is_bad(right):
            value = float(value)
            triples.append((value, left, right))
            counts[left] += 1
            counts[right] += 1

    print('filtering entries')
    sparse = SparseEntryStorage()
    for (value, left, right) in triples:
        if concept_is_frequent_enough(left, counts) and concept_is_frequent_enough(right, counts) and left != right:
            sparse.add_entry((value, left, right))
    del triples

    # Add links from a concept to itself, and negative links to its opposite if it's there
    for concept in counts:
        if concept_is_frequent_enough(concept, counts):
            sparse.add_entry((1., concept, concept))
            negation = negate_concept(concept)
            if concept_is_frequent_enough(negation, counts):
                sparse.add_entry((-1., concept, negation))

    print('making assoc space')
    space = AssocSpace.from_sparse_storage(sparse, k=300, offset_weight=1e-4)

    print('saving')
    space.save_dir(output_dir)
def test_sparse_storage():
    # Simple tests for SparseEntryStorage.
    bucket = SparseEntryStorage()

    # Getting labels and matrix from an empty storage bucket does not crash
    matrix, labels = bucket.get_matrix_and_labels()
    eq_(len(labels), 0)
    eq_(matrix.shape, (0, 0))

    # Actually add some things and check again
    bucket.add_entries(ENTRIES)
    matrix, labels = bucket.get_matrix_and_labels()
    eq_(' '.join(labels), 'apple red green celery orange banana yellow lemon')
    eq_(matrix[0, 1], 4)
    eq_(matrix[6, 5], 1)
    eq_(matrix[4, 2], 0)