Esempio n. 1
0
    def insert(self,d):
        n = MySQL()

        n.selectDb('joke')
        tbname = 'joke'
        n.insert(tbname, d)
        n.commit()
def deleteT(db, date):
    tables1 = gethiveT(db)  #传库名
    tables2 = getCT(db)  #传库名
    conn = MySQL(config.washmeta)
    tables1 = getDBs(tables1)  #传库名
    len2 = len(tables2)
    flag = True
    if len2 == 0:
        for table1 in tables1:
            table1['Create_Dt'] = date
            table1['Data_Tbl_UUID'] = uuid.uuid1()
            logging.debug('table1:' + table1['Data_Tbl_Phys_Nm'])
            conn.insert("data_tbl", table1)
    for table2 in tables2:
        for table1 in tables1:
            if table2.get('Data_Tbl_Phys_Nm') == table1.get(
                    'Data_Tbl_Phys_Nm'):
                flag = False
                break
        if flag:
            logging.debug('table1:' + table1['Data_Tbl_Phys_Nm'])
            try:
                conn.execute(
                    "update data_tbl set Del_Dt='{}' where Data_Tbl_Phys_Nm='{}'"
                    .format(date, table2.get('Data_Tbl_Phys_Nm')))
            except Exception:
                logging.error('删除表元数据失败,数据为:' + str(table2))
                print traceback.format_exc()
                sys.exit(1)
        flag = True
    del conn
def insertCByT(db, tb):
    conn1 = MySQL(config.hivemeta)
    conn2 = MySQL(config.washmeta)
    hivesql = """select 
'{}' AS Data_Tblid,
t1.COLUMN_NAME as Fld_Phys_Nm,
t1.COMMENT as Fld_Cn_Nm,
t1.TYPE_NAME as Fld_Data_Type,
t1.INTEGER_IDX as Fld_Ord
from columns_v2 t1
left join sds t2
on t1.cd_id = t2.cd_id
left join tbls t3
on t2.sd_id = t3.sd_id
left join dbs t4
on t3.db_id=t4.db_id
where t4.name='{}' and t3.tbl_name='{}'"""
    cs1 = conn1.execute(
        hivesql.format(tb.get('Data_Tbl_Phys_Nm'), db,
                       tb.get('Data_Tbl_Phys_Nm')))
    cs1 = getTableID(cs1)
    for c in cs1:
        c['Create_Dt'] = tb['Create_Dt']
        try:
            logging.debug("插入新增表字段:" + str(c['Fld_Phys_Nm']))
            conn2.insert('data_fld', c)
        except Exception as e:
            logging.error("插入新增表字段失败:" + str(c['Fld_Phys_Nm']))
            print traceback.format_exc()
    del conn1
    del conn2
Esempio n. 4
0
    def insert(self,d):
        n = MySQL()

        n.selectDb('images')
        tbname = 'similar_images'
        n.insert(tbname, d)
        n.commit()
Esempio n. 5
0
def insert_data(data):
	n=MySQL()
	sql = "select id from urls_crawled where url='"+data['url']+"';"
	result = n.query(sql)
	if result == 0:
		try:
			n.insert('urls_crawled',data)
		except:
			return 200
	n.commit()
def insertNewP(db, date):
    partitions1 = gethiveP(db)  #传库名
    partitions2 = getCP(db)  #传库名
    conn = MySQL(config.washmeta)
    partitions1 = getTableID(partitions1)
    len2 = len(partitions2)
    if len2 == 0:
        for p1 in partitions1:
            logging.debug('partition:' + str(p1['Data_Tblid']) + ':' +
                          p1['Dp_Path'])
            try:
                conn.insert("dp", p1)
            except Exception:
                logging.error('第一次插入分区数据失败,数据为:' + str(p1))
                print traceback.format_exc()
                sys.exit(1)
    else:
        for p1 in partitions1:
            flag = True
            for p2 in partitions2:
                if p1.get('Data_Tblid') == p2.get('Data_Tblid') and p1.get(
                        'Dp_Path') == p2.get('Dp_Path'):
                    flag = False
                    break
            if flag:
                logging.debug('插入分区partition:' + str(p1['Data_Tblid']) + ':' +
                              p1['Dp_Path'])
                try:
                    conn.insert("dp", p1)
                except Exception:
                    logging.error('插入分区数据失败,数据为:' + str(p1))
                    print traceback.format_exc()
                    sys.exit(1)
        for p2 in partitions2:
            flag = True
            for p1 in partitions1:
                if p2.get('Data_Tblid') == p1.get('Data_Tblid') and p2.get(
                        'Dp_Path') == p1.get('Dp_Path'):
                    flag = False
                    break
            if flag:
                logging.debug('删除分区partition:' + str(p2['Data_Tblid']) + ':' +
                              p2['Dp_Path'])
                try:
                    conn.execute(
                        "delete from dp where Data_Tblid='{}' and Dp_Path='{}'"
                        .format(p2.get('Data_Tblid'), p2.get('Dp_Path')))
                except Exception:
                    logging.error('删除分区数据失败,数据为:' + str(p1))
                    print traceback.format_exc()
                    sys.exit(1)

    del conn
Esempio n. 7
0
    def html_parser(self, html_source):
        tree = html.fromstring(html_source)
        parser = HtmlParser()

        username = parser.get_username(tree)
        brief_info = parser.get_brief_info(tree)
        industry = parser.get_industry(tree)
        education = parser.get_education(tree)
        major = parser.get_major(tree)
        answer_count = parser.get_answer_count(tree)
        article_count = parser.get_article_count(tree)
        ask_question_count = parser.get_ask_question_count(tree)
        collection_count = parser.get_collection_count(tree)
        follower_count = parser.get_follower_count(tree)
        followed_count = parser.get_followed_count(tree)
        follow_live_count = parser.get_follow_live_count(tree)
        follow_topic_count = parser.get_follow_topic_count(tree)
        follow_column_count = parser.get_follow_column_count(tree)
        follow_question_count = parser.get_follow_question_count(tree)
        follow_collection_count = parser.get_follow_collection_count(tree)

        now = datetime.datetime.now()
        current_time = now.strftime("%Y-%m-%d %H:%M:%S")
        print "*" * 60
        print "用户名:%s\n" % username
        print "个人简介:%s\n" % brief_info
        print "所处行业:%s\n" % industry
        print "毕业学校:%s\n" % education
        print "主修专业:%s\n" % major
        print "回答数:%s\n" % answer_count
        print "文章数:%s\n" % article_count
        print "提问数:%s\n" % ask_question_count
        print "收藏数:%s\n" % collection_count
        print "被关注数:%s\n" % follower_count
        print "关注数:%s\n" % followed_count
        print "关注直播数:%s\n" % follow_live_count
        print "关注话题数:%s\n" % follow_topic_count
        print "关注专栏数:%s\n" % follow_column_count
        print "关注问题数:%s\n" % follow_question_count
        print "关注收藏夹数:%s\n" % follow_collection_count
        print "当前时间:%s\n" % current_time
        print "*" * 60

        # Save data to mysql.
        db = MySQL(DATABASE_CONFIG)
        sql = "INSERT INTO t_user(username, brief_info, industry, education, major, answer_count, article_count, ask_question_count, collection_count, follower_count, followed_count, follow_live_count, follow_topic_count, follow_column_count, follow_question_count, follow_collection_count, gmt_create) values('" + username + "','" + brief_info + "','" + industry + "','" + education + "', '" + major + "', '" + answer_count + "', '" + article_count + "', '" + ask_question_count + "', '" + collection_count + "', '" + follower_count + "', '" + followed_count + "', '" + follow_live_count + "', '" + follow_topic_count + "', '" + follow_column_count + "', '" + follow_question_count + "', '" + follow_collection_count + "', '" + current_time + "')"
        db.insert(sql)

        # Extract urls
        self.extract_urls(tree)
        return
def updateC(db, date):
    columns1 = gethiveC(db)  #传库名
    columns2 = getCC(db)  #传库名
    conn = MySQL(config.washmeta)
    columns1 = getTableID(columns1)
    len2 = len(columns2)
    flag = True
    if len2 == 0:
        for c1 in columns1:
            c1['Create_Dt'] = date
            logging.debug('column:' + str(c1['Data_Tblid']) + ':' +
                          c1['Fld_Phys_Nm'])
            try:
                conn.insert("data_fld", c1)
            except Exception:
                logging.error('第一次插入字段数据失败,失败数据为:' + str(c1))
                print traceback.format_exc()
                sys.exit(1)
    else:
        for c1 in columns1:
            flag = True
            for c2 in columns2:
                if c1.get('Data_Tblid') == c2.get('Data_Tblid') and c1.get('Fld_Phys_Nm') == c2.get('Fld_Phys_Nm') and c1.get('Fld_Cn_Nm') == c2.get('Fld_Cn_Nm') \
                        and c1.get('Fld_Data_Type') == c2.get('Fld_Data_Type') and c1.get('Fld_Ord') == c2.get('Fld_Ord'):
                    flag = False
                    break
            if flag:
                c1['Upd_Dt'] = date
                logging.debug('column:' + str(c1['Data_Tblid']) + ':' +
                              c1['Fld_Phys_Nm'])
                try:
                    conn.execute(
                        "update data_fld set Fld_Cn_Nm='{}',Fld_Data_Type='{}',Fld_Ord='{}',Upd_Dt='{}' where Data_Tblid='{}' and Fld_Phys_Nm='{}'"
                        .format(c1['Fld_Cn_Nm'], c1['Fld_Data_Type'],
                                c1['Fld_Ord'], c1['Upd_Dt'], c1['Data_Tblid'],
                                c1['Fld_Phys_Nm']))
                except Exception:
                    logging.error('插入新增字段数据失败,数据为:' + str(c1))
                    print traceback.format_exc()
                    sys.exit(1)
    del conn
def insertNewT(db, date):
    tables1 = gethiveT(db)  #传库名
    tables2 = getCT(db)  #传库名
    conn = MySQL(config.washmeta)
    tables1 = getDBs(tables1)  #传库名
    len2 = len(tables2)
    if len2 == 0:
        for table1 in tables1:
            ud = uuid.uuid1()
            table1['Create_Dt'] = date
            table1['Data_Tbl_UUID'] = str(ud)
            logging.debug('table1:' + table1['Data_Tbl_Phys_Nm'])
            try:
                conn.insert("data_tbl", table1)
            except Exception:
                logging.error('第一次插入表数据失败,插入数据是:' + str(table1))
                print traceback.format_exc()
                sys.exit(1)
    else:
        for table1 in tables1:
            flag = True
            for table2 in tables2:
                if (table1.get('Data_Tbl_Phys_Nm') == table2.get(
                        'Data_Tbl_Phys_Nm')):
                    flag = False
                    break
            if flag:
                table1['Create_Dt'] = date
                table1['Data_Tbl_UUID'] = str(uuid.uuid1())
                logging.debug('table1:' + table1['Data_Tbl_Phys_Nm'])
                try:
                    # conn.insert("data_tbl", table1)
                    logging.debug("插入表字段")
                    insertCByT(db, table1)
                except Exception as e:
                    logging.error('插入新增表失败,插入数据是:' + str(table1))
                    print e
                    print traceback.format_exc()
                    sys.exit(1)
    del conn
Esempio n. 10
0
def insertNewC(db, date):
    columns1 = gethiveC(db)  #传库名
    columns2 = getCC(db)  #传库名
    conn = MySQL(config.washmeta)
    columns1 = getTableID(columns1)  #传库名
    len2 = len(columns2)
    flag = True
    if len2 == 0:
        for c1 in columns1:
            c1['Create_Dt'] = date
            logging.debug('column:' + str(c1['Data_Tblid']) + ':' +
                          c1['Fld_Phys_Nm'])
            try:
                conn.insert("data_fld", c1)
            except Exception:
                logging.error('第一次插入字段数据失败,失败数据为:' + str(c1))
                print traceback.format_exc()
                sys.exit(1)
    else:
        for c1 in columns1:
            flag = True
            for c2 in columns2:
                if c1.get('Data_Tblid') == c2.get('Data_Tblid') and c1.get(
                        'Fld_Phys_Nm') == c2.get('Fld_Phys_Nm'):
                    flag = False
                    break
            if flag:
                c1['Create_Dt'] = date
                logging.debug('column:' + str(c1['Data_Tblid']) + ':' +
                              c1['Fld_Phys_Nm'])
                try:
                    conn.insert('data_fld', c1)
                except Exception:
                    logging.error('插入新增字段数据失败,数据为:' + str(c1))
                    print traceback.format_exc()
                    sys.exit(1)
    del conn
Esempio n. 11
0
def updateT(db, date):
    tables1 = gethiveT(db)  #传库名
    tables2 = getCT(db)  #传库名
    conn = MySQL(config.washmeta)
    tables1 = getDBs(tables1)  #传库名
    len2 = len(tables2)
    if len2 == 0:
        for table1 in tables1:
            table1['Create_Dt'] = date
            table1['Data_Tbl_UUID'] = uuid.uuid1()
            logging.debug('table1:' + table1['Data_Tbl_Phys_Nm'])
            try:
                conn.insert("data_tbl", table1)
            except Exception:
                logging.error('第一次插入表数据失败,插入数据是:' + str(table1))
                print traceback.format_exc()
                sys.exit(1)
    for table1 in tables1:
        for table2 in tables2:
            if table1.get('Data_Tbl_Phys_Nm') == table2.get(
                    'Data_Tbl_Phys_Nm'):
                # print 'hive 表:',table1.get('Data_Tbl_Phys_Nm')
                # print 'clean 表:',table2.get('Data_Tbl_Phys_Nm')
                # if (not compareP(table1, table2) or not compareC(table1, table2)):
                if (compareP(db, table1, table2)
                        or compareC(db, table1, table2)):
                    logging.debug('对比表table1:' + table1['Data_Tbl_Phys_Nm'])
                    try:
                        conn.execute(
                            "update data_tbl set Upd_Dt='{}' where Data_Tbl_Phys_Nm='{}'"
                            .format(date, table1.get('Data_Tbl_Phys_Nm')))
                    except Exception:
                        logging.error('更新表元数据失败,数据为:' + str(table1))
                        print traceback.format_exc()
                        sys.exit(1)
    del conn
Esempio n. 12
0
        'charset':'utf8'}
db = MySQL(dbconfig)
#sql1 = "select tried_num from import_status_record where cid = '%s' and curr_sql_file like '%s';"%(cLists[0],'%'+sqlDate+'%')
#execNum=db.query(sql1)
#sql = "select count(*) from available_CIDs;"
sql2 = "select * from available_CIDs;"
cidsNum = db.query(sql2)


if __name__ == "__main__":
    try:
        #saveout = sys.stdout
        #saveerr = sys.stderr
        #f = open("log/import_record"+time.strftime("%m%d_%H%M%S"),"w")
        #sys.stdout = f
        #sys.stderr = f
        #os.system('./stat.sh ' + "config")
        start = time.strftime("%Y-%m-%d %H:%M:%S")
        work_manager = WorkManager(cidsNum,4)
        work_manager.wait_allcomplete()
        end = time.strftime("%Y-%m-%d %H:%M:%S")
        print "threads start times: ",start
        print "threads end times: ",end
    finally:
        print "###" * 30
        print "Statistics the import results and write results into mysql"
        os.system('./stat.sh ' + "dataList " + str(execNum+1))
        sql="load data infile '%s' into table import_status_record FIELDS TERMINATED BY ',';"%(sqlData)
        db.insert(sql)
        #f.close()
Esempio n. 13
0
class Spider(object):
    def __init__(self):
        self.keyword = 'python'
        # self.cookie_url = 'http://weixin.sogou.com/weixin?type=2&ie=utf8&s_from=input&_sug_=y&_sug_type_=&query={0}'.format(self.keyword)
        # self.test_url = 'http://weixin.sogou.com/weixin?type=2&ie=utf8&query={}&tsn=1&ft=&et=&interation=&wxid=&usip='.format(self.keyword)
        self.headers = {
            'Cookie':
            'ABTEST=8|1537491697|v1; IPLOC=CN3301; SUID=8A00CD733E18960A000000005BA442F1; JSESSIONID=aaaELbWTQ1L2wb49VYFvw; SUID=8A00CD733118960A000000005BA44328; weixinIndexVisited=1; SUV=00CA1DF373CD008A5BA44329E5072984; sct=1; ppinf=5|1537491766|1538701366|dHJ1c3Q6MToxfGNsaWVudGlkOjQ6MjAxN3x1bmlxbmFtZToyOllHfGNydDoxMDoxNTM3NDkxNzY2fHJlZm5pY2s6MjpZR3x1c2VyaWQ6NDQ6bzl0Mmx1UG1pVEc5UzdYai1uNS00dmZESjlaSUB3ZWl4aW4uc29odS5jb218; pprdig=SZP50z_ocFRwyaEzaFydV-HYv-7zERPayFcU4AKiczu0biMhxplP0vHK_c9YDQaC7wSpf6k1pi_KgkugvqfiXFx57nAVREJnCoD2sI6PPqu_RkhU8p_t8K_u0nBORzPL4t56QANrWGeOqqABFIR8--kajPxzjyOrns2gB7Mx1Gk; sgid=18-37096587-AVukQzbMnFYdwGh1fBNzkwE; PHPSESSID=n0hgnp1cq5hb3hde6ag44fo2d0; SUIR=64ED239EEEE89BB9C39C6C78EE77A936; ppmdig=1537498561000000839ca7e11ec83beadd0ca06d0eb18a07; SNUID=5AD01CA2D0D4A689DE848F35D1313467',
            'Host':
            'weixin.sogou.com',
            'Upgrade-Insecure-Requests':
            '1',
            # 'Referer': self.cookie_url,
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36'
        }
        self.base_url = 'http://weixin.sogou.com/weixin?'
        self.proxy_pool_url = 'http://127.0.0.1:5555/random'
        self.mysql = MySQL()
        self.max_count = 5
        self.session = requests.Session()

    def get_proxy(self):
        try:
            response = requests.get(self.proxy_pool_url)
            if response.status_code == 200:
                return response.text
            return None
        except ConnectionError:
            return None

    def get_html(self, url, count=1):
        # 通过代理ip池
        print('Crawling', url)
        print('Trying Count', count)
        global proxy
        if count >= self.max_count:
            print('Tried Too Many Counts')
            return None
        try:
            if proxy:
                proxies = {
                    'http': 'http://' + proxy,
                    'https': 'https://' + proxy
                }
                response = self.session.get(url,
                                            allow_redirects=False,
                                            headers=self.headers,
                                            proxies=proxies)
                return response.text
            else:
                response = self.session.get(url,
                                            allow_redirects=False,
                                            headers=self.headers)
                if response.status_code == 200:
                    return response.text
                if response.status_code == 302:
                    # Need Proxy
                    print('302')
                    proxy = self.get_proxy()
                    if proxy:
                        print('Using Proxy', proxy)
                        return self.get_html(url)
                    else:
                        print('Get Proxy Failed')
                        return None
        except ConnectionError as e:
            print('Error Occurred', e.args)
            proxy = self.get_proxy()
            count += 1
            return self.get_html(url, count)

    # def get_html(url):
    #     # 使用本机ip
    #     try:
    #         response = requests.get(url, allow_redirects=False, headers=headers)
    #         if response.status_code == 200:
    #             return response.text
    #         if response.status_code == 302:
    #             return None
    #     except ConnectionError:
    #         return get_html(url)

    def get_index(self, keyword, page):
        data = {'query': keyword, 'type': 2, 'page': page}
        queries = urlencode(data)
        url = self.base_url + queries
        html = self.get_html(url)
        return html

    def parse_index(self, html):
        doc = pq(html)
        items = doc('.news-box .news-list li .txt-box h3 a').items()
        for item in items:
            yield item.attr('href')
        time.sleep(random.randint(5, 10))

    def get_detail(self, url):
        try:
            response = requests.get(url)
            if response.status_code == 200:
                return response.text
            return None
        except ConnectionError:
            return None

    def parse_detail(self, html):
        doc = pq(html)
        url_object_id = get_md5(html)
        title = doc('.rich_media_title').text()
        content = doc('.rich_media_content').text()

        # date = doc('#publish_time').text()
        nickname = doc('#js_name').text()
        wechat = doc('#js_profile_qrcode > div > p:nth-child(3) > span').text()
        try:
            match_date = re.search('var.*?publish_time\s=\s(.*)"', html)
            date = match_date.group(1)[1:11]
            return {
                'url_object_id': url_object_id,
                'title': title,
                'content': content,
                'date': date,
                'nickname': nickname,
                'wechat': wechat
            }
        except AttributeError:
            return {
                'url_object_id': url_object_id,
                'title': title,
                'content': content,
                'nickname': nickname,
                'wechat': wechat
            }

    def main(self):
        for page in range(86, 101):
            self.session.headers.update(self.headers)
            html = self.get_index(self.keyword, page)
            if html:
                article_urls = self.parse_index(html)
                for article_url in article_urls:
                    article_html = self.get_detail(article_url)
                    if article_html:
                        article_data = self.parse_detail(article_html)
                        print(article_data)
                        self.mysql.insert('python_articles', article_data)
Esempio n. 14
0
class Tables:
    """Create or Drop tables,delete data from tables
    """
    def __init__(self):
        self._logger = Logger(__file__)
        try:
            fsock = open("sqls.xml", "r")
        except IOError:
            self._logger.error("The file don't exist, Please double check!")
        self.sqls = BeautifulSoup(fsock.read())
        dbconfig = {'host':'127.0.0.1', 
                'port': 3306, 
                'user':'******', 
                'passwd':'123456', 
                'db':'scenic', 
                'charset':'utf8'}
        self.db = MySQL(dbconfig)

    def initDB(self):
        """create all tables
        """
        createSqls = self.sqls.find(id="createSql")
        for item in createSqls.select("item"):
            sql = item.string
            self._logger.info("create the table "+item.attrs["id"])
            self.db.execute(sql)
        # must reopen the cursor, or it will raise exception with error code 1024. What a f*****g error
        self.db.reopenCursor()

    def createTable(self,name):
        """create a specified table
        """
        create = self.sqls.find(id="createSql").find(id=name).string
        if create:
            self._logger.info(" create table "+name)
            self.db.execute(create)
        else:
            self._logger.error("error occured when create table "+name)
        
    def dropAll(self):
        """drop all the tables
        """
        dropSqls= self.sqls.find(id="dropSql")
        for item in dropSqls.select("item"):
            sql = item.string
            self._logger.info("drop the table "+item.attrs["id"])
            self.db.execute(sql)
    def dropTable(self,name):
        """drop specified table
        """
        drop = self.sqls.find(id="dropSql").find(name)
        if drop:
            self._logger.info("drop the table "+name)
            self.db.execute(sql)
        else:
            self._logger.warn("Don't have the table "+name)
    def cleanAll(self):
        """delete data from all the tables,but not drop tables
        """
        cleanSqls= self.sqls.find(id="cleanSql")
        for item in cleanSqls.select("item"):
            sql = item.string
            self._logger.info("clean the table "+item.attrs["id"])
            self.db.execute(sql)
    def cleanTable(self,name):
        """clean the data of specified table
        """
        pass

    def insertTable(self,name,params):
        """insert values int to the specified table
        # Parameters:
        name: the name of the table
        params: the value insert into the tables. It can be tuple for inserting a row,or can be a list to insert serveral rows
        # Return:
        """
        insert = self.sqls.find(id="insertSql").find(id=name).string
        if insert:
            self._logger.info(" insert into table "+name)
            self.db.insert(insert,params)
        else:
            self._logger.error("did not find the table "+name+" when insert")

    def insertData(self,data):
        """It is the interface for outer calling
        # Parameters:
        data: the value insert into the tables. It can be tuple for inserting a row,or can be a list to insert serveral rows
        # Return:
        """
        if isinstance(data,Scenic):
            data.encode()
            types = self.joint(data.types)
            seasons = self.joint(data.fits)
            sceneryParams = (data.id,data.name,data.province,data.city,data.area,data.level,data.quality,data.description,data.website,data.symbol,data.opentime,data.closetime,data.price,data.suggest,seasons,types,data.longitude,data.latitude,data.precise,data.confidence)
            imageParams = []
            for item in data.images:
                imageParams.append( (data.id,str(uuid.uuid1()),item,data.name,data.name) )
            self.insertTable("scenery",sceneryParams)
            # insert into database when only there are pictures,or it will occur error
            if imageParams:
                self.insertTable("sceneryImages",imageParams)
        else:
            self._logger.error("the parameter is not the instance of Scenic")
            return False

    def joint(self,data,split=","):
        """Joint list with split parameter,default is ,
        """
        result = ""
        if isinstance(data,list):
            length = len(data)
            if length > 0:
                result = result+data[0]
                for i in range(1,length):
                    result = result+split+data[i]
        return result

                
    def initTables(self):
        """Initial basic tables including sceneryType,season
        """
        basic = SearchParams()
        # insert basic data into sceneryType table
        params = []
        for item in basic.scenicType.keys():
            params.append((basic.scenicType[item],item,item))
        self.insertTable("sceneryType",params)
        # insert basic data into season table
        params = []
        for item in basic.scenicFit.keys():
            params.append((basic.scenicFit[item],item))
        self.insertTable("season",params)
Esempio n. 15
0
class Tables:
    """Create or Drop tables,delete data from tables
    """
    def __init__(self):
        self._logger = Logger(__file__)
        try:
            fsock = open("sqls.xml", "r")
        except IOError:
            self._logger.error("The file don't exist, Please double check!")
        self.sqls = BeautifulSoup(fsock.read())
        dbconfig = {
            'host': '127.0.0.1',
            'port': 3306,
            'user': '******',
            'passwd': '123456',
            'db': 'scenic',
            'charset': 'utf8'
        }
        self.db = MySQL(dbconfig)

    def initDB(self):
        """create all tables
        """
        createSqls = self.sqls.find(id="createSql")
        for item in createSqls.select("item"):
            sql = item.string
            self._logger.info("create the table " + item.attrs["id"])
            self.db.execute(sql)
        # must reopen the cursor, or it will raise exception with error code 1024. What a f*****g error
        self.db.reopenCursor()

    def createTable(self, name):
        """create a specified table
        """
        create = self.sqls.find(id="createSql").find(id=name).string
        if create:
            self._logger.info(" create table " + name)
            self.db.execute(create)
        else:
            self._logger.error("error occured when create table " + name)

    def dropAll(self):
        """drop all the tables
        """
        dropSqls = self.sqls.find(id="dropSql")
        for item in dropSqls.select("item"):
            sql = item.string
            self._logger.info("drop the table " + item.attrs["id"])
            self.db.execute(sql)

    def dropTable(self, name):
        """drop specified table
        """
        drop = self.sqls.find(id="dropSql").find(name)
        if drop:
            self._logger.info("drop the table " + name)
            self.db.execute(sql)
        else:
            self._logger.warn("Don't have the table " + name)

    def cleanAll(self):
        """delete data from all the tables,but not drop tables
        """
        cleanSqls = self.sqls.find(id="cleanSql")
        for item in cleanSqls.select("item"):
            sql = item.string
            self._logger.info("clean the table " + item.attrs["id"])
            self.db.execute(sql)

    def cleanTable(self, name):
        """clean the data of specified table
        """
        pass

    def insertTable(self, name, params):
        """insert values int to the specified table
        # Parameters:
        name: the name of the table
        params: the value insert into the tables. It can be tuple for inserting a row,or can be a list to insert serveral rows
        # Return:
        """
        insert = self.sqls.find(id="insertSql").find(id=name).string
        if insert:
            self._logger.info(" insert into table " + name)
            self.db.insert(insert, params)
        else:
            self._logger.error("did not find the table " + name +
                               " when insert")