Ejemplo n.º 1
0
 def insertCrawlerRecord(mysql,kwargs):
     crawlerRecord=CrawlerRecord()
     mysqlSession = mysql.session
     crawlerRecord.title=str.strip(kwargs['title'].encode('utf8'))
     crawlerRecord.abstract=str.strip(kwargs['abstract'].encode('utf8'))
     crawlerRecord.crawler_url=str.strip(kwargs['crawler_url'].encode('utf8'))
     crawlerRecord.gmt_crawler=kwargs['gmt_crawler']
     crawlerRecord.gmt_created=kwargs['gmt_created']
     crawlerRecord.url_key=BasicTool.md5(str.strip(kwargs['crawler_url']))
     crawlerRecord.title_key=BasicTool.md5(str.strip(kwargs['title'].encode('utf8')))
     crawlerRecord.sch_id=kwargs['sch_id']
     crawlerRecord.alumni_id=kwargs['alumni_id']
     try:
         mysqlSession.add(crawlerRecord)
         mysqlSession.commit()
         spider.crawlerTask.crawlerNum.add()
     except sqlalchemy.exc.IntegrityError,e:
         mysqlSession.rollback()
         CrawlerTool.logger.error(e)
Ejemplo n.º 2
0
            abstract += sibling
        else:
            abstract += sibling.get_text()#re.search(r'<.*>(.*)</.*>',sibling.get_text())
        #print type(sibling)
        #print sibling.name
    abstract=abstract[:-5]
    print gmt_created
    print title
    print crawler_url
    print abstract
    gmt_crawler = datetime.datetime.now()#BasicTool.getCurrentTime()
    crawlerRecord=CrawlerRecord()
    crawlerRecord.title=title
    crawlerRecord.abstract=abstract
    crawlerRecord.crawler_url=crawler_url
    crawlerRecord.gmt_crawler=gmt_crawler
    crawlerRecord.gmt_created=gmt_created
    crawlerRecord.url_key=BasicTool.md5(crawler_url)
    crawlerRecord.sch_id=1
    crawlerRecord.alumni_id=1
    mysql=MySQL()
    mysql.session.add(crawlerRecord)
    mysql.session.commit()
    mysql.session.close()



# tt=(nt.strftime('%Y年%m月%d日 %H时%M分%S秒'))
# print tt
# timeArray = time.strptime(tt, "%Y年%m月%d日 %H时%M分%S秒")
# print timeArray