def read_term_file(filename):
    term_list = []
    term_dic = {}
    term_dic_new = {}
    f = open(filename, "r")
    for line in f:
        term_dic = cjson.decode(line)
    f.close()
    
    hist = stats.histogram()
    
    for term in term_dic:
        if term_dic[term] > 100:
            term_dic_new.update({term:term_dic[term]})
    
    for term in term_dic_new:
        term_list.append([term, term_dic_new[term]])
        hist.add(term_dic_new[term])
    
    minc = min(term_dic_new.values())
    maxc = max(term_dic_new.values())
    
    print minc
    print maxc
    
    term_list = sorted(term_list, key =lambda x:x[1], reverse = True)
    write_file(term_list)
def filter_emerging_term(filename):
    term_dic = read_term_dic("../term_dic/term_dic_09_reduced_filtered_truncated_without_0-9.json")
    term_nutrition_dic = read_nutrition_vector(filename)
    term_kurtosis_skewness_dic = {}
    count = 0
    for term in term_nutrition_dic:
        if term in term_dic:
            mystats = stats.histogram()
            for key in term_nutrition_dic[term]:
                mystats.add(term_nutrition_dic[term][key])
            kurtosis = mystats.kurtosis()
            skewness = mystats.skewness()
            if not kurtosis == None and not skewness == None and not mystats.sum() == 0.0:
                term_kurtosis_skewness_dic.update(
                    {
                        term: {
                            "mean": mystats.avg(),
                            "standard_dev": mystats.std(),
                            "kurtosis": kurtosis * mystats.max(),
                            "skewness": skewness * mystats.max(),
                        }
                    }
                )
                count += 1
    # print sorted(term_kurtosis_skewness_dic.iteritems(), key=lambda x:x[1]["kurtosis"], reverse = True)
    f = open("../term_nutrition/term_kurtosis_skewness_dic_09.json", "w")
    json.dump(term_kurtosis_skewness_dic, f)
    f.close()
    print len(term_kurtosis_skewness_dic)
    f = open("../term_nutrition/term_kurtosis_skewness_09_sorted.txt", "w")
    l = sorted(term_kurtosis_skewness_dic.iteritems(), key=lambda x: x[1]["mean"], reverse=True)
    f.write(
        "{} ".format("user").ljust(15)
        + "{} ".format("mean").ljust(15)
        + "{} ".format("standard_dev").ljust(15)
        + "{} ".format("kurtosis").ljust(15)
        + "{} ".format("skewness").ljust(15)
        + "\n"
    )
    for term in l:
        if not "\\" in term[0]:
            f.write(
                "{} ".format(term[0]).ljust(15)
                + "{} ".format(term[1]["mean"]).ljust(15)
                + "{} ".format(term[1]["standard_dev"]).ljust(15)
                + "{} ".format(term[1]["kurtosis"]).ljust(15)
                + "{} ".format(term[1]["skewness"]).ljust(15)
                + "\n"
            )
        else:
            term_kurtosis_skewness_dic.pop(term[0])
    print len(term_kurtosis_skewness_dic)
    f.close()
def process_nutrition_term(filename):
    term_nutrition_dic = read_nutrition_vector(filename)
    for term in term_nutrition_dic:
        mystats = stats.histogram()
        for key in term_nutrition_dic[term]:
            mystats.add(term_nutrition_dic[term][key])
        for key in term_nutrition_dic[term]:
            if mystats.sum() == 0.0:
                term_nutrition_dic[term][key] = 0.0
            else:
                term_nutrition_dic[term][key] = term_nutrition_dic[term][key] / mystats.sum()
    f = open("../term_nutrition/term_nutrition_vector_09_2.json", "w")
    json.dump(term_nutrition_dic, f)
    f.close()