def create_trie(): dictionary = load(os.path.join(OUTPUT_DIR, "element_data.p")) # read dictionary file into a trie trie = TrieNode() words = [] for key in dictionary: words += key.split(" ") for key in set(words): trie.insert(key) for key in SYMBOL_MAPPING.keys(): trie.insert(key) cache(trie, os.path.join(OUTPUT_DIR, "trie.p"))
for key, value in result.iteritems()) return dict((" ".join(sorted(set(key.split(" ")))), value) for key, value in result.iteritems()) def parse_out_isotopes(data): for key, value in data.iteritems(): if key in ELEMENT_MAPPING: isotopes = [] for k in value.keys(): try: val = int(str(k)) assert val == float(str(k)) isotopes.append((k, value.pop(k))) except: pass if isotopes: data[key]["isotopes"] = dict(isotopes) return data if __name__ == "__main__": result = merge_dicts( load(os.path.join(OUTPUT_DIR, "atomic_ionization_output.p")), load(os.path.join(OUTPUT_DIR, "atomic_weight_compositions.p"))) look_up = get_the_big_dict(parse_out_isotopes(result)) print "\n".join(look_up.keys()) print look_up["hydrogen"] cache(look_up, os.path.join(OUTPUT_DIR, "element_data.p"))
columns = len( word ) + 1 currentRow = [ previousRow[0] + 1 ] # Build one row for the letter, with a column for each letter in the target # word, plus one for the empty string at column 0 for column in xrange( 1, columns ): insertCost = currentRow[column - 1] + 1 deleteCost = previousRow[column] + 1 if word[column - 1] != letter: replaceCost = previousRow[ column - 1 ] + 1 else: replaceCost = previousRow[ column - 1 ] currentRow.append( min( insertCost, deleteCost, replaceCost ) ) # if the last entry in the row indicates the optimal cost is less than the # maximum cost, and there is a word in this trie node, then add it. if currentRow[-1] <= maxCost and node.word != None: results[currentRow[-1]] = results.get(currentRow[-1], []) + [node.word] # if any entries in the row are less than the maximum cost, then # recursively search each branch of the trie if min( currentRow ) <= maxCost: for letter in node.children: searchRecursive( node.children[letter], letter, word, currentRow, results, maxCost ) create_trie() trie = load(os.path.join(OUTPUT_DIR, "trie.p"))
""" Where the data lives loaded in memory because we don't like databases """ import os from blank.data_utils import load, OUTPUT_DIR ELEMENT_DATA = load(os.path.join(OUTPUT_DIR, "element_data.p"))