예제 #1
0
                if answerMan:
                    dic['answerMan'] = answerMan.encode('utf8')
                if answerID:
                    dic['answerID'] = answerID.encode('utf8')
                if answerContent:
                    dic['answerContent'] = answerContent.encode('utf8')
                import reverseDay
                dic['spiderTime'] = reverseDay.splitTimeStr(startTime)
            except Exception, e:
                print e
                print 'eror at dic[]'

            try:
                if answerContent != 'N/A':
                    if answerTime != 'N/A':
                        dic['answerTime'] = dealTime.dealTime(answerTime, startTime.split('-')[0])
                    else:
                        import reverseDay
                        dic['answerTime'] = reverseDay.splitTimeStr(startTime)
                else:
                    dic['answerTime'] = 'N/A'
                if askTime != 'N/A':
                    dic['askTime'] = dealTime.dealTime(askTime, startTime.split('-')[0])
                else:
                    import reverseDay
                    dic['askTime'] = reverseDay.splitTimeStr(startTime)
            except Exception, e:
                print 'error at time'
                print e

            print '数据录入mongo...'
예제 #2
0
                    dic['answerMan'] = answerMan.encode('utf8')
                if answerID:
                    dic['answerID'] = answerID.encode('utf8')
                if answerContent:
                    dic['answerContent'] = answerContent.encode('utf8')
                import reverseDay
                dic['spiderTime'] = reverseDay.splitTimeStr(startTime)
            except Exception, e:
                print e
                print 'eror at dic[]'

            try:
                if answerContent != 'N/A':
                    if answerTime != 'N/A':
                        dic['answerTime'] = dealTime.dealTime(
                            answerTime,
                            startTime.split('-')[0])
                    else:
                        import reverseDay
                        dic['answerTime'] = reverseDay.splitTimeStr(startTime)
                else:
                    dic['answerTime'] = 'N/A'
                if askTime != 'N/A':
                    dic['askTime'] = dealTime.dealTime(askTime,
                                                       startTime.split('-')[0])
                else:
                    import reverseDay
                    dic['askTime'] = reverseDay.splitTimeStr(startTime)
            except Exception, e:
                print 'error at time'
                print e
예제 #3
0
        askMan = askContent = askTime = answerID = answerMan = answerContent = answerTime = 'N/A'
        try:
            ask = item.find('div', {'class': 'm_feed_detail m_qa_detail'})
            if ask == None:
                ask = item.find('div', {'class': 'm_feed_detail'})
            try:
                # 提问人
                askMan = ask.find('div', {'class': 'm_feed_face'}).p.text.strip()
                # 提问内容
                askContent = ask.find('div', {'class': 'm_feed_txt'}).contents[2].strip()
                # 上市公司代码
                patt = re.compile(r"\((.*?)\)", re.I | re.X)
                answerID = patt.findall(ask.find('div', {'class': 'm_feed_txt'}).a.text.strip())[0]
                # 提问时间
                askTime = ask.find('div', {'class': 'm_feed_from'}).span.text.strip(r'"()"')
                askTime = dealTime.dealTime(askTime, '2016')
            except Exception, e:
                print '提问出错'
                print e

            try:
                answer = item.find('div', {'class':'m_feed_detail m_qa'})
                #上市公司
                answerMan = answer.find('div', {'class':'m_feed_face'}).p.text.strip()
                #上市公司回答内容
                answerContent = answer.find('div', {'class':'m_feed_txt'}).text.strip()
                #上市公司回答时间
                answerTime = answer.find('div',{'class':'m_feed_from'}).span.text.strip(r'"()"')
                print askTime
                answerTime = dealTime.dealTime(answerTime, '2016')
            except Exception, e:
예제 #4
0
                }).p.text.strip()
                # 提问内容
                askContent = ask.find('div', {
                    'class': 'm_feed_txt'
                }).contents[2].strip()
                # 上市公司代码
                patt = re.compile(r"\((.*?)\)", re.I | re.X)
                answerID = patt.findall(
                    ask.find('div', {
                        'class': 'm_feed_txt'
                    }).a.text.strip())[0]
                # 提问时间
                askTime = ask.find('div', {
                    'class': 'm_feed_from'
                }).span.text.strip(r'"()"')
                askTime = dealTime.dealTime(askTime, '2016')
            except Exception, e:
                print '提问出错'
                print e

            try:
                answer = item.find('div', {'class': 'm_feed_detail m_qa'})
                #上市公司
                answerMan = answer.find('div', {
                    'class': 'm_feed_face'
                }).p.text.strip()
                #上市公司回答内容
                answerContent = answer.find('div', {
                    'class': 'm_feed_txt'
                }).text.strip()
                #上市公司回答时间