if answerMan: dic['answerMan'] = answerMan.encode('utf8') if answerID: dic['answerID'] = answerID.encode('utf8') if answerContent: dic['answerContent'] = answerContent.encode('utf8') import reverseDay dic['spiderTime'] = reverseDay.splitTimeStr(startTime) except Exception, e: print e print 'eror at dic[]' try: if answerContent != 'N/A': if answerTime != 'N/A': dic['answerTime'] = dealTime.dealTime(answerTime, startTime.split('-')[0]) else: import reverseDay dic['answerTime'] = reverseDay.splitTimeStr(startTime) else: dic['answerTime'] = 'N/A' if askTime != 'N/A': dic['askTime'] = dealTime.dealTime(askTime, startTime.split('-')[0]) else: import reverseDay dic['askTime'] = reverseDay.splitTimeStr(startTime) except Exception, e: print 'error at time' print e print '数据录入mongo...'
dic['answerMan'] = answerMan.encode('utf8') if answerID: dic['answerID'] = answerID.encode('utf8') if answerContent: dic['answerContent'] = answerContent.encode('utf8') import reverseDay dic['spiderTime'] = reverseDay.splitTimeStr(startTime) except Exception, e: print e print 'eror at dic[]' try: if answerContent != 'N/A': if answerTime != 'N/A': dic['answerTime'] = dealTime.dealTime( answerTime, startTime.split('-')[0]) else: import reverseDay dic['answerTime'] = reverseDay.splitTimeStr(startTime) else: dic['answerTime'] = 'N/A' if askTime != 'N/A': dic['askTime'] = dealTime.dealTime(askTime, startTime.split('-')[0]) else: import reverseDay dic['askTime'] = reverseDay.splitTimeStr(startTime) except Exception, e: print 'error at time' print e
askMan = askContent = askTime = answerID = answerMan = answerContent = answerTime = 'N/A' try: ask = item.find('div', {'class': 'm_feed_detail m_qa_detail'}) if ask == None: ask = item.find('div', {'class': 'm_feed_detail'}) try: # 提问人 askMan = ask.find('div', {'class': 'm_feed_face'}).p.text.strip() # 提问内容 askContent = ask.find('div', {'class': 'm_feed_txt'}).contents[2].strip() # 上市公司代码 patt = re.compile(r"\((.*?)\)", re.I | re.X) answerID = patt.findall(ask.find('div', {'class': 'm_feed_txt'}).a.text.strip())[0] # 提问时间 askTime = ask.find('div', {'class': 'm_feed_from'}).span.text.strip(r'"()"') askTime = dealTime.dealTime(askTime, '2016') except Exception, e: print '提问出错' print e try: answer = item.find('div', {'class':'m_feed_detail m_qa'}) #上市公司 answerMan = answer.find('div', {'class':'m_feed_face'}).p.text.strip() #上市公司回答内容 answerContent = answer.find('div', {'class':'m_feed_txt'}).text.strip() #上市公司回答时间 answerTime = answer.find('div',{'class':'m_feed_from'}).span.text.strip(r'"()"') print askTime answerTime = dealTime.dealTime(answerTime, '2016') except Exception, e:
}).p.text.strip() # 提问内容 askContent = ask.find('div', { 'class': 'm_feed_txt' }).contents[2].strip() # 上市公司代码 patt = re.compile(r"\((.*?)\)", re.I | re.X) answerID = patt.findall( ask.find('div', { 'class': 'm_feed_txt' }).a.text.strip())[0] # 提问时间 askTime = ask.find('div', { 'class': 'm_feed_from' }).span.text.strip(r'"()"') askTime = dealTime.dealTime(askTime, '2016') except Exception, e: print '提问出错' print e try: answer = item.find('div', {'class': 'm_feed_detail m_qa'}) #上市公司 answerMan = answer.find('div', { 'class': 'm_feed_face' }).p.text.strip() #上市公司回答内容 answerContent = answer.find('div', { 'class': 'm_feed_txt' }).text.strip() #上市公司回答时间