Ejemplo n.º 1
0
    def query(self, table, showcol, con_args):

        if len(showcol) == 0:
            showcol = '*'
        else:
            showcol = ','.join(showcol)
        if isinstance(con_args, {}):
            con_args = reduce(lambda x, y: str(x) + ',' + str(y[0]) + '=' + str(y[1]), con_args.items(), 'WHERE 1=1')
        else:
            con_args = 'WHERE 1=1' + con_args
        try:
            self.db.set_character_set('utf8')
            sql = "SELECT %s FROM %s %s" % (showcol, table, con_args)
            try:
                result = self.cur.execute(sql)

                if not result:
                    return []
                return self.cur.fetchall()
            except MySQLdb.Error, e:
                # 发生错误时回滚
                self.db.rollback()
                print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
        except MySQLdb.Error, e:
            print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
Ejemplo n.º 2
0
    def __init__(self):
        try:

            self.db = MySQLdb.connect(IPADDRESS, USERNAME, PASSWORD, DBNAME)
            self.cur = self.db.cursor()
        except MySQLdb.Error, e:
            print BasicTool.getCurrentTime(), "连接数据库错误,原因%d: %s" % (e.args[0], e.args[1])
Ejemplo n.º 3
0
    def update(self, table, change_col, con_args):

        change_col = reduce(lambda x, y: str(x) + ',' + str(y[0]) + '=' + str(y[1]), change_col.items(), '')[1:]
        if isinstance(con_args, {}):
            con_args = reduce(lambda x, y: str(x) + ',' + str(y[0]) + '=' + str(y[1]), con_args.items(), 'WHERE 1=1')
        else:
            con_args = 'WHERE 1=1' + con_args
        try:
            self.db.set_character_set('utf8')
            sql = "UPDATE %s SET %s %s" % (table, change_col, con_args)
            try:
                return self.cur.execute(sql)
            except MySQLdb.Error, e:
                # 发生错误时回滚
                self.db.rollback()
                print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
        except MySQLdb.Error, e:
            print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
Ejemplo n.º 4
0
    def delete(self, table, con_args):

        if isinstance(con_args, {}):
            condition = reduce(lambda x, y: str(x) + ',' + str(y[0]) + '=' + str(y[1]), con_args.items(), 'WHERE 1=1')
        else:
            condition = 'WHERE 1=1' + con_args
        try:
            self.db.set_character_set('utf8')
            sql = "DELETE FROM %s %s" % (table, condition)
            try:
                return self.cur.execute(sql)

            except MySQLdb.Error, e:
                # 发生错误时回滚
                self.db.rollback()
                print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
        except MySQLdb.Error, e:
            print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
Ejemplo n.º 5
0
 def insertCrawlerRecord(mysql,kwargs):
     crawlerRecord=CrawlerRecord()
     mysqlSession = mysql.session
     crawlerRecord.title=str.strip(kwargs['title'].encode('utf8'))
     crawlerRecord.abstract=str.strip(kwargs['abstract'].encode('utf8'))
     crawlerRecord.crawler_url=str.strip(kwargs['crawler_url'].encode('utf8'))
     crawlerRecord.gmt_crawler=kwargs['gmt_crawler']
     crawlerRecord.gmt_created=kwargs['gmt_created']
     crawlerRecord.url_key=BasicTool.md5(str.strip(kwargs['crawler_url']))
     crawlerRecord.title_key=BasicTool.md5(str.strip(kwargs['title'].encode('utf8')))
     crawlerRecord.sch_id=kwargs['sch_id']
     crawlerRecord.alumni_id=kwargs['alumni_id']
     try:
         mysqlSession.add(crawlerRecord)
         mysqlSession.commit()
         spider.crawlerTask.crawlerNum.add()
     except sqlalchemy.exc.IntegrityError,e:
         mysqlSession.rollback()
         CrawlerTool.logger.error(e)
Ejemplo n.º 6
0
 def insert(self, table, my_dict):
     try:
         self.db.set_character_set('utf8')
         cols = ', '.join(my_dict.keys())
         values = '"," '.join(my_dict.values())
         sql = "INSERT INTO %s (%s) VALUES (%s)" % (table, cols, '"' + values + '"')
         try:
             result = self.cur.execute(sql)
             insert_id = self.db.insert_id()
             self.db.commit()
             # 判断是否执行成功
             if result:
                 return insert_id
             else:
                 return 0
         except MySQLdb.Error, e:
             # 发生错误时回滚
             self.db.rollback()
             # 主键唯一,无法插入
             if "key 'PRIMARY'" in e.args[1]:
                 print BasicTool.getCurrentTime(), "数据已存在,未插入数据"
             else:
                 print BasicTool.getCurrentTime(), "插入数据失败,原因 %d: %s" % (e.args[0], e.args[1])
     except MySQLdb.Error, e:
         print BasicTool.getCurrentTime(), "数据库错误,原因%d: %s" % (e.args[0], e.args[1])
Ejemplo n.º 7
0
            abstract += sibling
        else:
            abstract += sibling.get_text()#re.search(r'<.*>(.*)</.*>',sibling.get_text())
        #print type(sibling)
        #print sibling.name
    abstract=abstract[:-5]
    print gmt_created
    print title
    print crawler_url
    print abstract
    gmt_crawler = datetime.datetime.now()#BasicTool.getCurrentTime()
    crawlerRecord=CrawlerRecord()
    crawlerRecord.title=title
    crawlerRecord.abstract=abstract
    crawlerRecord.crawler_url=crawler_url
    crawlerRecord.gmt_crawler=gmt_crawler
    crawlerRecord.gmt_created=gmt_created
    crawlerRecord.url_key=BasicTool.md5(crawler_url)
    crawlerRecord.sch_id=1
    crawlerRecord.alumni_id=1
    mysql=MySQL()
    mysql.session.add(crawlerRecord)
    mysql.session.commit()
    mysql.session.close()



# tt=(nt.strftime('%Y年%m月%d日 %H时%M分%S秒'))
# print tt
# timeArray = time.strptime(tt, "%Y年%m月%d日 %H时%M分%S秒")
# print timeArray
Ejemplo n.º 8
0
 def crawlerSogou(lst):
     for t in lst:
         words = BasicTool.searchWord(t)
         sogou = Sogou(word=words, sch_id=t.graduate, alumni_id=t.alumni_id)
         sogou.parseHtml()
Ejemplo n.º 9
0
 def crawlerBaidu(lst):
     for t in lst:
         words=BasicTool.searchWord(t)
         baidu = Baidu(word=words, sch_id=t.graduate, alumni_id=t.alumni_id)
         baidu.parseHtml()