def factor_categories(): print 'load cats' cats = j.loads(open('categories.json','r').read()) print 'make categories phonebook' aggregator = s.make_category_listing(cats) cats = aggregator['phonebook'] tree_cats = aggregator['tree'] print 'load products' prods = s.read_all_products() print 'fix product counts' s.count_products(cats, prods) s.roll_up_counts(cats) print 'prune food, money, and baby stuff' s.prune_category('28985', cats) s.prune_category('33546', cats) s.prune_category('34458', cats) print 'tally attributes' cats = s.tally_attributes(cats, prods) print 'roll up attributes' cats = s.roll_up_attributes(cats) print 'summarize_attributes' cats = s.summarize_attributes(cats) print 'eliminate singletons' cats = s.eliminate_singletons(cats) print 'coalesce by jaccard' s.coalesce_by_jaccard(cats, JACCARD_THRESHHOLD) print 'make tree' tree = s.make_tree(cats) print 'simplify tree' tree = c.deepcopy(tree) s.strip_down_tree(tree) return tree, cats
import survey as s import codecs import json as j import re if __name__ == "__main__": cats = j.loads(open("categories_flat.json").read()) prods = s.read_all_products() extract_all_equivalence(cats, prods) def extract_all_equivalence(cats, prods, write_folder=""): for franch in s.get_franchises(cats): name = franch["singularName"].split()[0] if not write_folder.endswith("/"): write_folder += "/" fname = name + "_eq_text.csv" franch_id = franch["id"] filt_cats = dict(filter(lambda c: c[1]["franchise"] == franch_id, cats.items())) extract_equivalence(filt_cats, prods, fname, write_folder=write_folder, headings_in_file=False) # Build a list of rows that will be entries in the db def extract_equivalence(cats, prods, fname, write_folder="", headings_in_file=False): # By default, we'll index equivalence classes USE_FIELD_TYPE = "equivalence_class" fh = codecs.open(write_folder + fname, "w", "utf-8") is_first = True