# print(topic_title,topic_desc,topic_author,str(topic_replies),str(topic_views),str(topic_lastpost),topic_lastpostby) except: pass # really don't care if it fails, likely a row of no interest print("Inserted " + str(topics_inserted) + " on page " + str(page)) posts_counted = 0 # if topic, process all posts, extracting fields for users and posts docs in mongodb all_posts = soup.body.find('div', attrs={'class': 'topic'}) if all_posts: while len( topic_stack ) > 0: # before processing posts, empty all topics on stack tc = topic_stack.pop() mon.manage_topic(tc[0], tc[1], tc[2], tc[3], tc[4], tc[5], tc[6], tc[7], tc[8], tc[9], tc[10]) topic_id = topicnum # simplify nomenclature and use topic_id for id (same as topicnum when parsing posts) posts_parent = all_posts.find('span', attrs={ 'class': 'main_topic_title' }).text.strip() # check if poll present poll = all_posts.find('div', attrs={'class': 'poll'}) if poll: poll_text_raw = poll.text # remove debug and ending info poll_text_short = poll_text_raw[ 0:poll_text_raw.find('Debug.dir( ipb.topic.poll )')] poll_text = re.sub(r'\n+', '\n', re.sub(r'\t+', '\t', poll_text_short))