def add_doc(self, doc_id = '', doc_class='', doc_terms=[], do_padding = False): my_doc_terms = SuperList() for term in doc_terms: self.terms.unique_append(term) my_doc_terms.insert_after_padding(self.terms.index(term)) self.matrix.append({'id': doc_id, 'class': doc_class, 'terms': my_doc_terms}) if do_padding: self.do_padding()
def add_query(self, query_id = '', query_class='n/a', query_terms=[]): my_query_terms = SuperList() my_query_terms.do_padding(new_len=len(self.terms), padding_data=0) new_terms_count = 0 for term in query_terms: try: my_query_terms.insert_after_padding(self.terms.index(term)) except: # Term not obtaied in traing phase new_terms_count += 1 self.queries.append({'id': query_id, 'class': query_class, 'terms': my_query_terms, 'new_terms_count': new_terms_count})
def add_query(self, query_id = '', query_class='n/a', query_terms=[]): my_query_terms = SuperList() my_query_terms.do_padding(new_len=len(self.terms), padding_data=0) for term in query_terms: try: my_query_terms.insert_after_padding(self.terms.index(term)) except: # Term not obtaied in traing phase, ignore it pass # Calling add_vectors to convert my_query_terms to log_tf values self.add_vectors(a=my_query_terms, log_tf_a = True) self.queries.append({'id': query_id, 'class': query_class, 'terms': my_query_terms})