Beispiel #1
0
def search_content_formula(mathML):
    (query_sem_fea, query_struc_fea, query_cn_fea,
     query_var_fea) = features_extraction(mathML)

    query_ino_terms = ino_sem_terms(query_sem_fea)  # sematic term in order
    query_sort_terms = sort_sem_terms(
        query_sem_fea)  # sematic term in sorted order

    related_formulas = formula_retrieval(query_sort_terms)

    query_ino_terms = [term for term in chain.from_iterable(query_ino_terms)]
    query_sort_terms = query_sort_terms[len(query_sort_terms) - 1]

    N = Formula.objects.count()

    IDF_values = compute_IDF_values(query_ino_terms, query_sort_terms,
                                    query_struc_fea, related_formulas, N)

    results, num_of_results = formulas_ranking(query_ino_terms,
                                               query_sort_terms,
                                               query_struc_fea, query_cn_fea,
                                               query_var_fea, related_formulas,
                                               IDF_values, N)

    return results, num_of_results
def search_content_formula(mathML):
    (query_sem_fea, query_struc_fea, query_cn_fea, query_var_fea) = features_extraction(mathML)

    query_ino_terms = ino_sem_terms(query_sem_fea)	# sematic term in order
    query_sort_terms = sort_sem_terms(query_sem_fea)	# sematic term in sorted order

    related_formulas = formula_retrieval(query_sort_terms)

    query_ino_terms = [term for term in chain.from_iterable(query_ino_terms)]
    query_sort_terms = query_sort_terms[len(query_sort_terms)-1]
        
    N = Formula.objects.count()
    
    IDF_values = compute_IDF_values(query_ino_terms, query_sort_terms, 
                                    query_struc_fea, related_formulas, N)

    results, num_of_results = formulas_ranking(query_ino_terms, query_sort_terms, query_struc_fea, 
                            query_cn_fea, query_var_fea, related_formulas, 
                            IDF_values, N)

    return results, num_of_results
Beispiel #3
0
def extractFormulaFeature(query):
	"Input: Formula Query"
	"Output: Query vector"
	
	"""
	Step 1: Query to MathML
	"""
	math_obj =  asciitomathml.asciitomathml.AsciiMathML()
	math_obj.parse_string(query)
	mathML = math_obj.to_xml_string()
	mathML = mathML.replace("<math xmlns=\"http://www.w3.org/1998/Math/MathML\">","<math>") 
	mathML = mathML.replace("&","") 
	
	"""
	Step 2: MathML to Formula object
	"""
	#Extract four types of formula_obj
	formula_obj = Formula()
	(sem_features, struc_features, const_features, var_features) = features_extraction(mathML)            
            
	# Generate index terms
	inorder_sem_terms = ino_sem_terms(sem_features)
	sorted_sem_terms = sort_sem_terms(sem_features)
            
	#Insert into formulas table
	formula_obj.inorder_term = inorder_sem_terms
	formula_obj.sorted_term = sorted_sem_terms
	formula_obj.structure_term = struc_features
	formula_obj.constant_term = const_features
	formula_obj.variable_term = var_features
	formula_obj.status = 1

	"""
	Step 3: Extract feature
	"""
	featureAll = readFeature('formula')
	formula = formula_obj
	formula.structure = []
	formula.semantic = []
	formula.constant = []
	formula.variable = []
	#semantic
	if formula.sorted_term!= '[]':
		f_semantic_array = formula.sorted_term[0]
		for line in f_semantic_array:
			fa = line.split('$')
			for f in fa:
				f = semantic_rep(f)
				if  f != "":
					formula.semantic.append(f)
	#structure
	if formula.structure_term!= '[]':
		f_structure_array = formula.structure_term
		#print f_structure_array
		for f in f_structure_array:
			f = struct_rep(f)
			if f != "":
				formula.structure.append(f)		
	#constant
	if formula.constant_term!= '[]':
		f_constant_array = formula.constant_term
		#print f_constant_array
		for f in f_constant_array:
			f = const_rep(f)
			if f != "":
				formula.constant.append(f)
	#variable
	if formula.variable_term!= '[]':
		f_variable_array = formula.variable_term
		for f in f_variable_array:
			f = var_rep(f)
			if f != "":
				formula.variable.append(f)
	"""
	Step 4: Build vector
	"""
	line = []
	
	#print formula.semantic
	#print formula.structure
	#print formula.constant
	#print formula.variable
	
	for s in readFeature('semantic'):
		line.append(min(1,formula.semantic.count(s)))
	for s in readFeature('structure'):
		line.append(min(1,formula.structure.count(s)))
	for c in readFeature('constant'):
		line.append(min(1,formula.constant.count(c)))
	for v in readFeature('variable'):
		line.append(min(1,formula.variable.count(v)))
	return  line