def search_content_formula(mathML): (query_sem_fea, query_struc_fea, query_cn_fea, query_var_fea) = features_extraction(mathML) query_ino_terms = ino_sem_terms(query_sem_fea) # sematic term in order query_sort_terms = sort_sem_terms( query_sem_fea) # sematic term in sorted order related_formulas = formula_retrieval(query_sort_terms) query_ino_terms = [term for term in chain.from_iterable(query_ino_terms)] query_sort_terms = query_sort_terms[len(query_sort_terms) - 1] N = Formula.objects.count() IDF_values = compute_IDF_values(query_ino_terms, query_sort_terms, query_struc_fea, related_formulas, N) results, num_of_results = formulas_ranking(query_ino_terms, query_sort_terms, query_struc_fea, query_cn_fea, query_var_fea, related_formulas, IDF_values, N) return results, num_of_results
def search_content_formula(mathML): (query_sem_fea, query_struc_fea, query_cn_fea, query_var_fea) = features_extraction(mathML) query_ino_terms = ino_sem_terms(query_sem_fea) # sematic term in order query_sort_terms = sort_sem_terms(query_sem_fea) # sematic term in sorted order related_formulas = formula_retrieval(query_sort_terms) query_ino_terms = [term for term in chain.from_iterable(query_ino_terms)] query_sort_terms = query_sort_terms[len(query_sort_terms)-1] N = Formula.objects.count() IDF_values = compute_IDF_values(query_ino_terms, query_sort_terms, query_struc_fea, related_formulas, N) results, num_of_results = formulas_ranking(query_ino_terms, query_sort_terms, query_struc_fea, query_cn_fea, query_var_fea, related_formulas, IDF_values, N) return results, num_of_results
def extractFormulaFeature(query): "Input: Formula Query" "Output: Query vector" """ Step 1: Query to MathML """ math_obj = asciitomathml.asciitomathml.AsciiMathML() math_obj.parse_string(query) mathML = math_obj.to_xml_string() mathML = mathML.replace("<math xmlns=\"http://www.w3.org/1998/Math/MathML\">","<math>") mathML = mathML.replace("&","") """ Step 2: MathML to Formula object """ #Extract four types of formula_obj formula_obj = Formula() (sem_features, struc_features, const_features, var_features) = features_extraction(mathML) # Generate index terms inorder_sem_terms = ino_sem_terms(sem_features) sorted_sem_terms = sort_sem_terms(sem_features) #Insert into formulas table formula_obj.inorder_term = inorder_sem_terms formula_obj.sorted_term = sorted_sem_terms formula_obj.structure_term = struc_features formula_obj.constant_term = const_features formula_obj.variable_term = var_features formula_obj.status = 1 """ Step 3: Extract feature """ featureAll = readFeature('formula') formula = formula_obj formula.structure = [] formula.semantic = [] formula.constant = [] formula.variable = [] #semantic if formula.sorted_term!= '[]': f_semantic_array = formula.sorted_term[0] for line in f_semantic_array: fa = line.split('$') for f in fa: f = semantic_rep(f) if f != "": formula.semantic.append(f) #structure if formula.structure_term!= '[]': f_structure_array = formula.structure_term #print f_structure_array for f in f_structure_array: f = struct_rep(f) if f != "": formula.structure.append(f) #constant if formula.constant_term!= '[]': f_constant_array = formula.constant_term #print f_constant_array for f in f_constant_array: f = const_rep(f) if f != "": formula.constant.append(f) #variable if formula.variable_term!= '[]': f_variable_array = formula.variable_term for f in f_variable_array: f = var_rep(f) if f != "": formula.variable.append(f) """ Step 4: Build vector """ line = [] #print formula.semantic #print formula.structure #print formula.constant #print formula.variable for s in readFeature('semantic'): line.append(min(1,formula.semantic.count(s))) for s in readFeature('structure'): line.append(min(1,formula.structure.count(s))) for c in readFeature('constant'): line.append(min(1,formula.constant.count(c))) for v in readFeature('variable'): line.append(min(1,formula.variable.count(v))) return line