def ner_tag(sent): #----- REPLACE THESE PATHS FOR YOUR SYSTEM --------------------- json_file = r"C:/Users/admin/Documents/Studies/7th Sem/Natural Language Processing/SeeEvaluation/SEE/SEE/QueryGenerator/NER/all_data.json" #pickle_file = r"C:\home\ananth\research\pesit\nlp\ner\all_data.p" pickle_file = r"C:/Users/admin/Documents/Studies/7th Sem/Natural Language Processing/SeeEvaluation/SEE/SEE/QueryGenerator/NER/all_data.p" history_file = r"C:/Users/admin/Documents/Studies/7th Sem/Natural Language Processing/SeeEvaluation/SEE/SEE/QueryGenerat1or/NER/history.p" model_metrics_file = r"C:/Users/admin/Documents/Studies/7th Sem/Natural Language Processing/SeeEvaluation/SEE/SEE/QueryGenerator/NER/model_metrics.p" # ---------------------------------------------------------------- ner_client = NerClient("1PI11CS196", "g04") ret = ner_client.get_brand_product_bigrams_dict() supported_tags = ["Org", "OS", "Version", "Phone", "Other", "Price", "Family", "Size", "Feature"] data = json.loads(open(json_file).read())['root'] #print "num stu = ", len(data) (history_list, sents, expected) = build_history(data, supported_tags) (his1, wmap1) = build_history_1(data, supported_tags) myhis = (history_list, sents, expected, ) func_obj = FeatureFunctions(wmap1, supported_tags, ret) #FeatureFunctions(supported_tags) #print "Number of features defined: ", len(func_obj.flist) clf = Memm(func_obj, pickle_file) func_obj.set_wmap(sents) #print "After build_history" #TRAIN = int(raw_input("Enter 1 for Train, 0 to use pickeled file: ")) TRAIN = 0 if TRAIN == 1: clf.train(history_list[:7500], reg_lambda = 0.02) # 10000 else: clf.load_classifier() #print "Model: ", clf.model, " tagset = ", clf.tag_set test_sents = [] test_sents.append(sent.split(" ")) result = clf.tag(test_sents) return result
def main(sent): pickle_file = r"/Users/vaishnavibharadwaj/Documents/7th sem/nlp/exam/day3/raks/all_data.p" #r"C:\home\ananth\research\pesit\nlp\client\all_data.p" history_file = r"/Users/vaishnavibharadwaj/Documents/7th sem/nlp/exam/day3/raks/history.p" #r"C:\home\ananth\research\pesit\nlp\client\history.p" ner_client = NerClient("1PI11CS026", "g07") ret = ner_client.get_brand_product_bigrams_dict() supported_tags = [ "Org", "OS", "Version", "Phone", "Other", "Price", "Family", "Size", "Feature" ] # func_obj = FeatureFunctions(wmap1, supported_tags, ret) #FeatureFunctions(supported_tags) # print "Number of features defined: ", len(func_obj.flist) # clf = Memm(func_obj, pickle_file) # func_obj.set_wmap(sents) # print "After build_history" #print 'getting data from file' (history_list, sents, expected) = pickle.load(open(history_file, "rb")) wmap1 = [] func_obj = FeatureFunctions(wmap1, supported_tags, ret) #FeatureFunctions(supported_tags) # print "Number of features defined: ", len(func_obj.flist) clf = Memm(func_obj, pickle_file) # func_obj.set_wmap(sents) # print "After build_history" # TRAIN = int(raw_input("Enter 1 for Train, 0 to use pickeled file: ")) # if TRAIN == 1: # clf.train(history_list[:7500], reg_lambda = 0.02) # 10000 # else: clf.load_classifier() test_sents = [] test_sents.append(sent) #print (type(test_sents)) #print(test_sents) result = clf.tag(test_sents) return result[0]
data = json.loads(open(json_file).read())['root'] print "num stu = ", len(data) (history_list, sents, expected) = build_history(data, supported_tags) (his1, wmap1) = build_history_1(data, supported_tags) myhis = (history_list, sents, expected, ) pickle.dump(myhis, open(history_file, "wb")) #print history_list[:100] #raw_input("Enter to continue") ''' else: print 'getting data from file' (history_list, sents, expected) = pickle.load(open(history_file, "rb")) print 'got history data from file' ''' func_obj = FeatureFunctions(wmap1, supported_tags, ret) #FeatureFunctions(supported_tags) print "Number of features defined: ", len(func_obj.flist) clf = Memm(func_obj, pickle_file) func_obj.set_wmap(sents) print "After build_history" TRAIN = int(raw_input("Enter 1 for Train, 0 to use pickeled file: ")) if TRAIN == 1: clf.train(history_list[:7500], reg_lambda = 0.02) # 10000 else: clf.load_classifier() print "Model: ", clf.model, " tagset = ", clf.tag_set