def process_item(self, item, spider): item['question_content'] = clean_tags( item['question_content_html']).strip() #item['question_answer'] = clean_tags(item['question_answer_html']).strip() item['question_analysis'] = clean_tags( item['question_analysis_html']).strip() #item['question_comment'] = clean_tags(item['question_comment_html']) return item
def extract_answer_info(type_str, answer_html, p): question_type = question_type_classifier(type_str) is_unique = is_answer_unique(question_type) answer_text = clean_tags(answer_html) unique_answer = extract_answer(answer_html, p, answer_text) if is_unique else u'' return question_type, is_unique, answer_text, unique_answer
def extract_answer_text(answer_html): return clean_tags(answer_html)
def process_item(self, item, spider): item['question_content'] = clean_tags(item['question_content_html']).strip() #item['question_answer'] = clean_tags(item['question_answer_html']).strip() item['question_analysis'] = clean_tags(item['question_analysis_html']).strip() #item['question_comment'] = clean_tags(item['question_comment_html']) return item