Example #1
0
def create_trie():
    dictionary = load(os.path.join(OUTPUT_DIR, "element_data.p"))
    # read dictionary file into a trie
    trie = TrieNode()
    words = []
    for key in dictionary:
        words += key.split(" ")
    for key in set(words):
        trie.insert(key)
    for key in SYMBOL_MAPPING.keys():
        trie.insert(key)

    cache(trie, os.path.join(OUTPUT_DIR, "trie.p"))
Example #2
0
                  for key, value in result.iteritems())
    return dict((" ".join(sorted(set(key.split(" ")))), value)
                for key, value in result.iteritems())


def parse_out_isotopes(data):
    for key, value in data.iteritems():
        if key in ELEMENT_MAPPING:
            isotopes = []
            for k in value.keys():
                try:
                    val = int(str(k))
                    assert val == float(str(k))
                    isotopes.append((k, value.pop(k)))
                except:
                    pass
            if isotopes:
                data[key]["isotopes"] = dict(isotopes)
    return data


if __name__ == "__main__":
    result = merge_dicts(
        load(os.path.join(OUTPUT_DIR, "atomic_ionization_output.p")),
        load(os.path.join(OUTPUT_DIR, "atomic_weight_compositions.p")))

    look_up = get_the_big_dict(parse_out_isotopes(result))
    print "\n".join(look_up.keys())
    print look_up["hydrogen"]
    cache(look_up, os.path.join(OUTPUT_DIR, "element_data.p"))
Example #3
0
    columns = len( word ) + 1
    currentRow = [ previousRow[0] + 1 ]

    # Build one row for the letter, with a column for each letter in the target
    # word, plus one for the empty string at column 0
    for column in xrange( 1, columns ):
        insertCost = currentRow[column - 1] + 1
        deleteCost = previousRow[column] + 1

        if word[column - 1] != letter:
            replaceCost = previousRow[ column - 1 ] + 1
        else:
            replaceCost = previousRow[ column - 1 ]

        currentRow.append( min( insertCost, deleteCost, replaceCost ) )

    # if the last entry in the row indicates the optimal cost is less than the
    # maximum cost, and there is a word in this trie node, then add it.
    if currentRow[-1] <= maxCost and node.word != None:
        results[currentRow[-1]] = results.get(currentRow[-1], []) + [node.word]

    # if any entries in the row are less than the maximum cost, then
    # recursively search each branch of the trie
    if min( currentRow ) <= maxCost:
        for letter in node.children:
            searchRecursive( node.children[letter], letter, word, currentRow,
                results, maxCost )

create_trie()
trie = load(os.path.join(OUTPUT_DIR, "trie.p"))
Example #4
0
"""
Where the data lives loaded in memory because we don't like databases
"""
import os

from blank.data_utils import load, OUTPUT_DIR

ELEMENT_DATA = load(os.path.join(OUTPUT_DIR, "element_data.p"))