예제 #1
0
def pusanUrl(name, url):
    subMenuUrls = clsSubMenuUrls()
    subMenuUrls.name = name
    subMenuUrls.url = url
    
    print subMenuUrls.name.decode('utf-8') + ' : ' + subMenuUrls.url
    #print >> exceptFile, subMenuUrls + ' : ' + subMenuUrls.url
    
    detailProductPusanHtml = savefilegethtml.getHtml(subMenuUrls.url, 'class="container', '<!-- end .ot_tab_style1 -->', 'onlinetourSubPagePusan.txt')
    
    for subMenu in detailProductPusanHtml:
        #if subMenu.find('<li class="">') > -1 and subMenu.find('전체') < 0:
        if subMenu.find('<li') > -1 and subMenu.find('<a') > -1 and (subMenu.find('전체') < 0 or subMenuUrls.url.find('D50') > -1  or subMenuUrls.url.find('D60') > -1  or subMenuUrls.url.find('D70') > -1):
            detailRegionUrls = clsDetailRegionUrls()
            detailRegionUrls.name = tourUtil.getRemovedHtmlTag(subMenu).strip()
            detailRegionUrls.url = mainUrl + tourUtil.getTagAttr(subMenu, 'a', 'href')
            subMenuUrls.detailRegionList.append(detailRegionUrls)
            
            print detailRegionUrls.name.decode('utf-8') + ' : ' + detailRegionUrls.url
            #print >> exceptFile, detailRegionUrls.name + ' : ' + detailRegionUrls.url
            
    return subMenuUrls
예제 #2
0
print >> exceptFile, "Start : %s" % time.ctime()

print menulist

con = cx_Oracle.connect("bigtour/[email protected]:1521/ora11g")

# 메뉴에 다 잘들어 갔나 확인..
for level1 in menulist:
    for level2 in level1.tourkindgroup:
        for level3 in level2.regionUrlGroup:
            print 'Depart City : ' + level1.departCity + ', TourKind:' + level2.tourkind + ', Region : ' + level3.region + '(' + level3.url + ')'

            try:
                print >> exceptFile, level3.url
                regionHtml = savefilegethtml.getHtml(
                    level3.url, '<div class="leftArea">',
                    '</nav><!-- //lnb -->', 'tourbaksaRegionHtml.txt', '', '')

                for each_line in regionHtml:
                    if each_line.find('<li class="') > -1 and each_line.find(
                            'M1=') > -1:
                        #print each_line
                        cityClass = clsCityUrlGroup()
                        cityClass.city = each_line.split('</a>')[0].split(
                            ">")[2]
                        cityClass.url = homepageUrl + each_line.split(
                            "href='")[1].split("'")[0]

                        print 'Depart Url : ' + cityClass.url
                        try:
                            print >> exceptFile, cityClass.url
예제 #3
0
        self.url = ''
        self.code = ''
        self.productCode = ''
        self.airchk = ''
        self.city = ''

    def toString(self):
        val = 'name:' + self.productname + ',price:' + self.price + ',dDay:' + self.dDay + ',dTime:' + self.dTime + ',aDay:' + self.aDay + ',aTime:' + self.aTime + ',night:' + self.night + ',city:' + self.city
        val += ',period:' + self.period + ',airCode:' + self.airCode + ',status:' + self.status + ',url:' + self.url + ',code:' + self.code + ',productCode:' + self.productCode + ',airchk:' + self.airchk
        return val


tourkind = 'W'
period = ''
detailHtml = savefilegethtml.getHtml(
    'http://www.naeiltour.co.kr/jagiya/honeymoon/program_include.asp?good_cd=550201054&sel_ym=201407',
    '', '', 'naeiltourDetailHtml.txt')
departDayList = list()
for detail_each_line in detailHtml:
    if detail_each_line.find("fn_goodDetail('") > -1:
        departDayList.append(
            detail_each_line.split("fn_goodDetail('")[1].split("'")[0])

# 출발 가능 날짜에 항공사 찾아오는 부분
productCls = clsProduct()

for dayInfo in departDayList:
    productListUrl = 'http://www.naeiltour.co.kr/jagiya/honeymoon/program_include.asp?good_cd=550201054&sel_day=20140712'
    print 'ProductListUrl : ' + productListUrl
    productListHtml = savefilegethtml.getHtml(productListUrl, '', '',
                                              'naeiltourproductListHtml.txt')
예제 #4
0
        self.period = ''
        self.code = ''
        self.status = ''
        self.name = ''
        self.price = ''
        self.booked = ''
        self.url = ''

    def toString(self):
        return 'Code:' + self.code + ',sDay:' + self.sDay + ',sTime:' + self.sTime + ',aDay:' + self.aDay + ',aTime:' + self.aTime + ',aCode:' + self.aCode + ',Period:' + self.period + ',status:' + self.status + ',name:' + self.name + ',price:' + self.price + ',booked:' + self.booked


print '=============================================================================================================='
print 'PackageList Url : http://www.verygoodtour.com/Product/Package/PackageList?MenuCode=1010103&PageSize=200'
regionHtml = savefilegethtml.getHtml(
    'http://www.verygoodtour.com/Product/Package/PackageList?MenuCode=1010103&PageSize=200',
    '<div id="list_proviewM">', 'function BingPaging()', 'regionHtml.txt')
#regionHtml = urllib2.urlopen(menu.url).read()
#regionHtml = regionHtml[regionHtml.find('<div id="list_proviewM">'):regionHtml.find('function BingPaging()')]
#regionHtmlFile = open('regionHtml.txt', 'w')
#print >> regionHtmlFile, regionHtml
#regionHtmlFile.close()

#regionHtml = open('regionHtml.txt')
mastercode = ''

for each_line in regionHtml:
    if each_line.find('img_ov_text2') > -1:
        #Detail Product List 가져오는 URL...
        mastercode = each_line.split("('")[1].split("')")[0]
    elif each_line.find('class="title"') > -1:
        self.aCode = ''
        self.period = ''
        self.code = ''
        self.status = ''
        self.name = ''
        self.price = ''
        self.booked = ''
        self.url = ''
        
    def toString(self):
        return 'Code:'+self.code+',sDay:'+self.sDay+',sTime:'+self.sTime+',aDay:'+self.aDay+',aTime:'+self.aTime+',aCode:'+self.aCode+',Period:'+self.period+',status:'+self.status+',name:'+self.name+',price:'+self.price+',booked:'+self.booked


print '=============================================================================================================='
print 'PackageList Url : http://www.verygoodtour.com/Product/Package/PackageList?MenuCode=101092103&PageSize=200'
regionHtml = savefilegethtml.getHtml('http://www.verygoodtour.com/Product/Package/PackageList?MenuCode=101092103&PageSize=200', '<div id="list_proviewM">', 'function BingPaging()', 'regionHtml.txt')
#regionHtml = urllib2.urlopen(menu.url).read()
#regionHtml = regionHtml[regionHtml.find('<div id="list_proviewM">'):regionHtml.find('function BingPaging()')]
#regionHtmlFile = open('regionHtml.txt', 'w')
#print >> regionHtmlFile, regionHtml
#regionHtmlFile.close()

#regionHtml = open('regionHtml.txt')
mastercode = ''

con = cx_Oracle.connect("bigtour/[email protected]:1521/ora11g")
for each_line in regionHtml:
    if each_line.find('img_ov_text2') > -1:
        #Detail Product List 가져오는 URL...
        mastercode = each_line.split("('")[1].split("')")[0]
    elif each_line.find('class="title"') > -1:
예제 #6
0
        return 'No'


# 시간 변수들..
tourAgency = 'vgtour'
targetYear = sys.argv[1]
targetMonth = sys.argv[2]
#targetYear = '2014'
#targetMonth = '07'
scrappingStartTime = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")

exceptFile = open('verygoodtourException' + scrappingStartTime + '.txt', 'w')
print >> exceptFile, "Start : %s" % time.ctime()

sitemapUrl = 'http://www.verygoodtour.com/Content/SiteMap.html'
sitemapHtml = savefilegethtml.getHtml(sitemapUrl, '', '', 'sitemapHtml.txt')
#sitemapHtml = urllib2.urlopen(sitemapUrl).read()
#sitemapHtmlFile = open('sitemapHtml.txt', 'w')
#print >> sitemapHtmlFile, sitemapHtml
#sitemapHtmlFile.close()
#sitemapHtml = open('sitemapHtml.txt')
#menulist = list()           # 메뉴 Url 들을 담고 있을 clsProduct들의 List
tourType = ''
departCity = ''
region = ''
depthIdx = 0
idx = 0
productList = list()  # 중복으로 같은 상품 안가져 오도록 List에 넣고.. 없는 것들만 들고오도록..
productList.append('START')
con = cx_Oracle.connect("bigtour/[email protected]:1521/ora11g")
try:
        self.subMenuList = list()

# 시간 변수들..
tourAgency = 'lottetour'
mainUrl = 'http://www.lottetour.com'
targetYear = sys.argv[1]
targetMonth = sys.argv[2]
#targetYear = '2014'
#targetMonth = '07'
scrappingStartTime = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")

exceptFileName = 'lottetourException' + scrappingStartTime + '.txt'
exceptFile = open(exceptFileName, 'w')
print >> exceptFile, "Start : %s" % time.ctime()

mainpageHtml = savefilegethtml.getHtml('http://www.lottetour.com/welcome', '<nav>', '</nav>', 'onlinetourMainPage.txt')


urlDict = dict()
urlDict['package'] = 'package'
urlDict['free'] = 'fit'
urlDict['honeymoon'] = 'honeymoon'
urlDict['golf'] = 'golf'
urlDict['cruise'] = 'cruise'

startComment = False
firstOversea = True
subMenu = False
mainList = list()
clsMain = mainCls()
clsSubMenu = subMenuCls()
targetYear = sys.argv[1]
targetMonth = sys.argv[2]
#targetYear = '2014'
#targetMonth = '07'
scrappingStartTime = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
homepageUrl = 'http://www.naeiltour.co.kr'

exceptFile = open('naeiltourException' + scrappingStartTime + '.txt', 'w')
print >> exceptFile, "Start : %s" % time.ctime()

#배낭여행 시작========================================
print '@@@@@@@@@@@@@ backpack start @@@@@@@@@@@@@@@@@@@@'
print >> exceptFile, '@@@@@@@@@@@@@ backpack start @@@@@@@@@@@@@@@@@@@@'
backpackUrl = 'http://www.naeiltour.co.kr/backpack/eu_main.asp?area=40'
mainHtml = savefilegethtml.getHtml(backpackUrl, '<div id="left_mn">',
                                   '<div id="left_mn2">',
                                   'naeiltourbackpackHtml.txt')

comment = False
backpackMenuList = list()  #menu들 List
country = ''
try:
    for each_line in mainHtml:
        if each_line.find('<!--') > -1:
            comment = True

        if comment == False and each_line.find('/backpack/list.asp?') > -1:
            backpackRegionClass = clsRegionUrl()
            backpackRegionClass.country = country
            backpackRegionClass.url = homepageUrl + each_line.split(
                'href="')[1].split('"')[0]
예제 #9
0
                                ):  # 출발일정 눌렀을때 List가 펼쳐지는 경우랑, 페이지가 이동하는 경우 나눔..
                                    detailProductUrl = ''
                                    #if package.menuCode == 'A01':
                                    detailProductUrl = 'http://www.ybtour.co.kr/Goods/' + urlMap[
                                        package.
                                        menuCode] + '/inc_evList_ajax.asp?goodCD=' + detailProduct + '&startDT=' + targetYear + targetMonth

                                    #detailProductUrl = 'http://www.ybtour.co.kr/Goods/overseas/inc_evList_ajax.asp?goodCD=150201119&startDT=201408'

                                    #if detailProductUrl == 'http://www.ybtour.co.kr/Goods/Overseas/inc_evList_ajax.asp?goodCD=JAA2013113&startDT=201407':
                                    #print ''

                                    print 'Detail Product URL : ' + detailProductUrl
                                    print >> exceptFile, 'Detail Product URL : ', detailProductUrl
                                    detailProductList = savefilegethtml.getHtml(
                                        detailProductUrl, '', '',
                                        'ybtourTempFile.txt')

                                    try:
                                        # 2014. 06. 29. 여행상품명에서 국가, 도시코드 가져오는 부분으로 적용..
                                        #codeLists = codes.getCityCode(productNameList[codeIdx], sub2package.menuName, productCommentList[codeIdx], subpackage.menuName)
                                        codeLists = codes.getCityCode(
                                            productNameList[codeIdx],
                                            productCommentList[codeIdx])
                                        cityList = codeLists[0]
                                        nationList = codeLists[1]
                                        continentList = codeLists[2]
                                        siteList = codeList[
                                            3]  # 2014. 8. 3. site 추가

                                        if len(cityList) == 0 and len(
예제 #10
0
 detailProduct = pcode.split('s')[1]
 
 detailProductUrl = ''
 if not (package.menuCode == 'A03' or package.menuCode == 'A06'):       # 출발일정 눌렀을때 List가 펼쳐지는 경우랑, 페이지가 이동하는 경우 나눔..
     detailProductUrl = ''
     #if package.menuCode == 'A01':
     detailProductUrl = 'http://www.ybtour.co.kr/Goods/' + urlMap[package.menuCode] + '/inc_evList_ajax.asp?goodCD=' + detailProduct + '&startDT=' + targetYear + targetMonth
     
     #detailProductUrl = 'http://www.ybtour.co.kr/Goods/overseas/inc_evList_ajax.asp?goodCD=150201119&startDT=201408'
     
     #if detailProductUrl == 'http://www.ybtour.co.kr/Goods/Overseas/inc_evList_ajax.asp?goodCD=JAA2013113&startDT=201407':
         #print ''
     
     print 'Detail Product URL : ' + detailProductUrl
     print >> exceptFile, 'Detail Product URL : ', detailProductUrl
     detailProductList = savefilegethtml.getHtml(detailProductUrl, '', '', 'ybtourTempFile.txt')
     
     try:
         # 2014. 06. 29. 여행상품명에서 국가, 도시코드 가져오는 부분으로 적용..
         #codeLists = codes.getCityCode(productNameList[codeIdx], sub2package.menuName, productCommentList[codeIdx], subpackage.menuName)
         codeLists = codes.getCityCode(productNameList[codeIdx], productCommentList[codeIdx])
         cityList = codeLists[0]
         nationList = codeLists[1]
         continentList = codeLists[2]
         siteList = codeList[3]              # 2014. 8. 3. site 추가
         
         if len(cityList) == 0 and len(nationList) == 0 and len(continentList) == 0:
             codeList = codes.getCityCode(sub2package.menuName)
             cityList = codeList[0]
             nationList = codeList[1]
             continentList = codeList[2]
예제 #11
0
def insertData(productCls, detailUrl, regionUrl, tourAgency, kind, dmst_div):
    print 'Product Url : ', productCls.url
    print >> exceptFile, 'Product Url : ', productCls.url
    
     # 2014. 7. 23. 카테고리의 국가는 넣지 않기로 함...
    #codeList = codes.getCityCode(productCls.name.decode('utf-8'), detailUrl.name.decode('utf-8'), regionUrl.name.decode('utf-8'))
    codeList = codes.getCityCode(productCls.name.decode('utf-8'))
    cityList = codeList[0]
    nationList = codeList[1]
    continentList = codeList[2]
    siteList = codeList[3]              # 2014. 8. 3. site 추가
    
    if len(cityList) == 0 and len(nationList) == 0 and len(continentList) == 0:
        codeList = codes.getCityCode(detailUrl.name.decode('utf-8'))
        cityList = codeList[0]
        nationList = codeList[1]
        continentList = codeList[2]
        siteList = codeList[3]              # 2014. 8. 3. site 추가
    
    # Master 상품 입력
    query = tourQuery.getMasterMergeQuery(tourAgency, productCls.code, productCls.name.decode('utf-8'), menu.kind, dmst_div, '', '')
    #print query
    cursor = con.cursor()
    cursor.execute(query)
    con.commit()
    # Region Data 삭제
    codes.insertRegionData(tourAgency, productCls.code, cityList, nationList, continentList, siteList)
    
    detailProductHtml = savefilegethtml.getHtml(productCls.url, '', '', 'tour2000DetailHtml'+targetMonth+'.txt')
    pl10Idx = 0
    for detailProduct in detailProductHtml:
        try:
            if detailProduct.find('<span class="text_pink">') > -1 and detailProduct.find('<a href=') < 0:
                detailCls = clsProductDetail()
                numArray = tourUtil.getNumArray(detailProduct)
                if len(numArray) > 7:
                    detailCls.dDay = targetYear + numArray[0] + numArray[1]
                    detailCls.dTime = numArray[2] + numArray[3]
                    detailCls.aDay = targetYear + numArray[4] + numArray[5]
                    detailCls.aTime = numArray[6] + numArray[7]
                elif len(numArray) == 4:
                    detailCls.dDay = targetYear + numArray[0] + numArray[1]
                    detailCls.dTime = ''
                    detailCls.aDay = targetYear + numArray[2] + numArray[3]
                    detailCls.aTime = ''
            elif detailProduct.find('onError') > -1:
                detailCls.airCode = detailProduct[detailProduct.find('.gif') - 4:detailProduct.find('.gif') - 2]
            elif detailProduct.find('text_redB') > -1:
                numArray = tourUtil.getNumArray(tourUtil.getRemovedHtmlTag(detailProduct))
                for num in numArray:
                    detailCls.price += num
            elif detailProduct.find('</a></td>') > -1:
                if detailProduct.find('text_pink') > -1:
                    detailCls.status = codes.getStatus('tour2000', '예약가능')
                elif detailProduct.find('text_blau') > -1:
                    detailCls.status = codes.getStatus('tour2000', '출발가능')
                elif detailProduct.find('text_green') > -1:
                    detailCls.status = codes.getStatus('tour2000', '대기예약')
                elif detailProduct.find('text_grayLightSmall') > -1:
                    detailCls.status = codes.getStatus('tour2000', '예약마감')
                    
                detailCls.remainSeat = tourUtil.getRemovedHtmlTag(detailProduct).replace("'", "").strip()
            elif detailProduct.find('<p class="pl10">') > -1:
                if pl10Idx == 0:
                    pl10Idx = 1
                    detailCls.productName = tourUtil.getRemovedHtmlTag(detailProduct).replace("'", "").strip()
                    detailCls.url = mainUrl + tourUtil.getTagAttr(detailProduct, 'a', 'href')
                    detailCls.productSeq = detailProduct.split('ev_ym=')[1].split('&')[0] + detailProduct.split('ev_seq=')[1].split('&')[0]
                else:
                    pl10Idx = 0
                
                if detailCls.productName.find('부산출발') > -1:
                    departCity = 'PUS'
                else:
                    departCity = 'ICN'
                
                query = tourQuery.getDetailMergeQuery(tourAgency, productCls.code, detailCls.productSeq, detailCls.productName.decode('utf-8'), detailCls.dDay+detailCls.dTime, detailCls.aDay+detailCls.aTime, productCls.period, departCity, '', detailCls.airCode, detailCls.status, detailCls.url, detailCls.price, '0', '0', '0', '', productCls.night)
                #print >> exceptFile, query
                #print query
                cursor = con.cursor()
                cursor.execute(query)
                con.commit()
                #break
        except:
            print >> exceptFile, 'detail parcing Error : ', sys.exc_info()[0]
            pass
예제 #12
0
# 시간 변수들..
tourAgency = 'tour2000'
targetYear = sys.argv[1]
targetMonth = sys.argv[2]
#targetYear = '2014'
#targetMonth = '07'

scrappingStartTime = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")

exceptFileName = 'tour2000Exception' + scrappingStartTime + '.txt'
exceptFile = open(exceptFileName, 'w')
print >> exceptFile, "Start : %s" % time.ctime()

mainUrl = 'http://www.tour2000.co.kr'

mainHtml = savefilegethtml.getHtml('http://www.tour2000.co.kr/index.asp', '<div class="navi_wholeMenu_box">', '<!-- navi_wholeMenu_wrapper// -->', 'tour2000mainHtml.txt')

startMainUrl = False
menuList = list()
MenuUrlCls = clsMenuUrls()
for each_line in mainHtml:
    if each_line.find('text_pinkB14') > -1:
        MenuUrlCls = clsMenuUrls()
        MenuUrlCls.kind = codes.getTourKind(tourAgency, tourUtil.getRemovedHtmlTag(each_line).strip())
        startMainUrl = True
    
    # 해외여행(패키지), 허니문, 골프, 국내(제주) 제외하고는 일단 패스
    if MenuUrlCls.kind == 'A' or MenuUrlCls.kind == 'F' or MenuUrlCls.kind == 'H' or MenuUrlCls.kind == 'No':
        continue    
    
    if startMainUrl and each_line.find('<li>') > -1:
예제 #13
0
def insertData(productCls, detailUrl, regionUrl, tourAgency, kind, dmst_div):
    print 'Product Url : ', productCls.url
    print >> exceptFile, 'Product Url : ', productCls.url

    # 2014. 7. 23. 카테고리의 국가는 넣지 않기로 함...
    #codeList = codes.getCityCode(productCls.name.decode('utf-8'), detailUrl.name.decode('utf-8'), regionUrl.name.decode('utf-8'))
    codeList = codes.getCityCode(productCls.name.decode('utf-8'))
    cityList = codeList[0]
    nationList = codeList[1]
    continentList = codeList[2]
    siteList = codeList[3]  # 2014. 8. 3. site 추가

    if len(cityList) == 0 and len(nationList) == 0 and len(continentList) == 0:
        codeList = codes.getCityCode(detailUrl.name.decode('utf-8'))
        cityList = codeList[0]
        nationList = codeList[1]
        continentList = codeList[2]
        siteList = codeList[3]  # 2014. 8. 3. site 추가

    # Master 상품 입력
    query = tourQuery.getMasterMergeQuery(tourAgency, productCls.code,
                                          productCls.name.decode('utf-8'),
                                          menu.kind, dmst_div, '', '')
    #print query
    cursor = con.cursor()
    cursor.execute(query)
    con.commit()
    # Region Data 삭제
    codes.insertRegionData(tourAgency, productCls.code, cityList, nationList,
                           continentList, siteList)

    detailProductHtml = savefilegethtml.getHtml(
        productCls.url, '', '', 'tour2000DetailHtml' + targetMonth + '.txt')
    pl10Idx = 0
    for detailProduct in detailProductHtml:
        try:
            if detailProduct.find(
                    '<span class="text_pink">') > -1 and detailProduct.find(
                        '<a href=') < 0:
                detailCls = clsProductDetail()
                numArray = tourUtil.getNumArray(detailProduct)
                if len(numArray) > 7:
                    detailCls.dDay = targetYear + numArray[0] + numArray[1]
                    detailCls.dTime = numArray[2] + numArray[3]
                    detailCls.aDay = targetYear + numArray[4] + numArray[5]
                    detailCls.aTime = numArray[6] + numArray[7]
                elif len(numArray) == 4:
                    detailCls.dDay = targetYear + numArray[0] + numArray[1]
                    detailCls.dTime = ''
                    detailCls.aDay = targetYear + numArray[2] + numArray[3]
                    detailCls.aTime = ''
            elif detailProduct.find('onError') > -1:
                detailCls.airCode = detailProduct[detailProduct.find('.gif') -
                                                  4:detailProduct.find('.gif'
                                                                       ) - 2]
            elif detailProduct.find('text_redB') > -1:
                numArray = tourUtil.getNumArray(
                    tourUtil.getRemovedHtmlTag(detailProduct))
                for num in numArray:
                    detailCls.price += num
            elif detailProduct.find('</a></td>') > -1:
                if detailProduct.find('text_pink') > -1:
                    detailCls.status = codes.getStatus('tour2000', '예약가능')
                elif detailProduct.find('text_blau') > -1:
                    detailCls.status = codes.getStatus('tour2000', '출발가능')
                elif detailProduct.find('text_green') > -1:
                    detailCls.status = codes.getStatus('tour2000', '대기예약')
                elif detailProduct.find('text_grayLightSmall') > -1:
                    detailCls.status = codes.getStatus('tour2000', '예약마감')

                detailCls.remainSeat = tourUtil.getRemovedHtmlTag(
                    detailProduct).replace("'", "").strip()
            elif detailProduct.find('<p class="pl10">') > -1:
                if pl10Idx == 0:
                    pl10Idx = 1
                    detailCls.productName = tourUtil.getRemovedHtmlTag(
                        detailProduct).replace("'", "").strip()
                    detailCls.url = mainUrl + tourUtil.getTagAttr(
                        detailProduct, 'a', 'href')
                    detailCls.productSeq = detailProduct.split(
                        'ev_ym=')[1].split('&')[0] + detailProduct.split(
                            'ev_seq=')[1].split('&')[0]
                else:
                    pl10Idx = 0

                if detailCls.productName.find('부산출발') > -1:
                    departCity = 'PUS'
                else:
                    departCity = 'ICN'

                query = tourQuery.getDetailMergeQuery(
                    tourAgency, productCls.code, detailCls.productSeq,
                    detailCls.productName.decode('utf-8'),
                    detailCls.dDay + detailCls.dTime,
                    detailCls.aDay + detailCls.aTime, productCls.period,
                    departCity, '', detailCls.airCode, detailCls.status,
                    detailCls.url, detailCls.price, '0', '0', '0', '',
                    productCls.night)
                #print >> exceptFile, query
                #print query
                cursor = con.cursor()
                cursor.execute(query)
                con.commit()
                #break
        except:
            print >> exceptFile, 'detail parcing Error : ', sys.exc_info()[0]
            pass
예제 #14
0
tourAgency = 'tour2000'
targetYear = sys.argv[1]
targetMonth = sys.argv[2]
#targetYear = '2014'
#targetMonth = '07'

scrappingStartTime = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")

exceptFileName = 'tour2000Exception' + scrappingStartTime + '.txt'
exceptFile = open(exceptFileName, 'w')
print >> exceptFile, "Start : %s" % time.ctime()

mainUrl = 'http://www.tour2000.co.kr'

mainHtml = savefilegethtml.getHtml('http://www.tour2000.co.kr/index.asp',
                                   '<div class="navi_wholeMenu_box">',
                                   '<!-- navi_wholeMenu_wrapper// -->',
                                   'tour2000mainHtml.txt')

startMainUrl = False
menuList = list()
MenuUrlCls = clsMenuUrls()
for each_line in mainHtml:
    if each_line.find('text_pinkB14') > -1:
        MenuUrlCls = clsMenuUrls()
        MenuUrlCls.kind = codes.getTourKind(
            tourAgency,
            tourUtil.getRemovedHtmlTag(each_line).strip())
        startMainUrl = True

    # 해외여행(패키지), 허니문, 골프, 국내(제주) 제외하고는 일단 패스
    if MenuUrlCls.kind == 'A' or MenuUrlCls.kind == 'F' or MenuUrlCls.kind == 'H' or MenuUrlCls.kind == 'No':
예제 #15
0
        self.period = ''
        self.airCode = ''
        self.status = ''
        self.url = ''
        self.code = ''
        self.productCode = ''
        self.airchk = ''
        self.city = ''
    
    def toString(self):
        val = 'name:'+self.productname+',price:'+self.price+',dDay:'+self.dDay+',dTime:'+self.dTime+',aDay:'+self.aDay+',aTime:'+self.aTime + ',night:'+self.night+',city:'+self.city
        val += ',period:'+self.period+',airCode:'+self.airCode+',status:'+self.status+',url:'+self.url+',code:'+self.code+',productCode:'+self.productCode+',airchk:'+self.airchk
        return val
tourkind = 'F'
period = ''
detailHtml = savefilegethtml.getHtml('http://www.naeiltour.co.kr/friday/program/program_include.asp?good_cd=2302009532&sel_ym=201407', '', '', 'naeiltourDetailHtml.txt')
print 'http://www.naeiltour.co.kr/friday/program/program_include.asp?good_cd=23020145&sel_ym=201407'
departDayList = list()
for detail_each_line in detailHtml:
    if detail_each_line.find("fn_goodDetail('") > -1:
        departDayList.append(detail_each_line.split("fn_goodDetail('")[1].split("'")[0])
        
# 출발 가능 날짜에 항공사 찾아오는 부분
productCls = clsProduct()

for dayInfo in departDayList:
    productListUrl = 'http://www.naeiltour.co.kr/friday/program/program_include.asp?good_cd=2302009532&sel_day=20140708'
    print 'ProductListUrl : ' + productListUrl
    productListHtml = savefilegethtml.getHtml(productListUrl, '', '', 'naeiltourproductListHtml.txt')
    print 'ProductListUrl : ' + productListUrl
    for product in productListHtml:
예제 #16
0
# 시간 변수들..
tourAgency = 'onlinetour'
mainUrl = 'http://www.onlinetour.co.kr/web/tour'
mainUrl2 = 'http://www.onlinetour.co.kr'
targetYear = sys.argv[1]
targetMonth = sys.argv[2]
#targetYear = '2014'
#targetMonth = '07'
scrappingStartTime = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")

exceptFileName = 'onlinetourException' + scrappingStartTime + '.txt'
exceptFile = open(exceptFileName, 'w')
print >> exceptFile, "Start : %s" % time.ctime()

mainpageHtml = savefilegethtml.getHtml('http://www.onlinetour.co.kr/web/home', '<li id="n_pack">', '<!--}} ot_navi-->', 'onlinetourMainPage.txt')

# URL 쑤셔넣는 부분...
mainMenuList = list()
mainMenuUrls = clsMenuUrls()
subMenuUrls = clsSubMenuUrls()
detailRegionUrls = clsDetailRegionUrls()
chkFree = False
chkDomestic = False
for menuList in mainpageHtml:
    try:
        #print menuList
        if menuList.find('<a href=') > -1 and menuList.find('<li>') < 0:
            mainMenuUrls = clsMenuUrls()
            mainMenuUrls.name = tourUtil.getRemovedHtmlTag(menuList).strip()
            mainMenuUrls.url = tourUtil.getTagAttr(menuList, 'a', 'href')
예제 #17
0
        self.url = ''
        self.code = ''
        self.productCode = ''
        self.airchk = ''
        self.city = ''

    def toString(self):
        val = 'name:' + self.productname + ',price:' + self.price + ',dDay:' + self.dDay + ',dTime:' + self.dTime + ',aDay:' + self.aDay + ',aTime:' + self.aTime + ',night:' + self.night + ',city:' + self.city
        val += ',period:' + self.period + ',airCode:' + self.airCode + ',status:' + self.status + ',url:' + self.url + ',code:' + self.code + ',productCode:' + self.productCode + ',airchk:' + self.airchk
        return val


tourkind = 'F'
period = ''
detailHtml = savefilegethtml.getHtml(
    'http://www.naeiltour.co.kr/friday/program/program_include.asp?good_cd=2302009532&sel_ym=201407',
    '', '', 'naeiltourDetailHtml.txt')
print 'http://www.naeiltour.co.kr/friday/program/program_include.asp?good_cd=23020145&sel_ym=201407'
departDayList = list()
for detail_each_line in detailHtml:
    if detail_each_line.find("fn_goodDetail('") > -1:
        departDayList.append(
            detail_each_line.split("fn_goodDetail('")[1].split("'")[0])

# 출발 가능 날짜에 항공사 찾아오는 부분
productCls = clsProduct()

for dayInfo in departDayList:
    productListUrl = 'http://www.naeiltour.co.kr/friday/program/program_include.asp?good_cd=2302009532&sel_day=20140708'
    print 'ProductListUrl : ' + productListUrl
    productListHtml = savefilegethtml.getHtml(productListUrl, '', '',
targetYear = sys.argv[1]
targetMonth = sys.argv[2]
#targetYear = '2014'
#targetMonth = '07'
scrappingStartTime = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
homepageUrl = 'http://www.naeiltour.co.kr'

exceptFile = open('naeiltourException' + scrappingStartTime + '.txt', 'w')
print >> exceptFile, "Start : %s" % time.ctime()

#배낭여행 시작========================================
print '@@@@@@@@@@@@@ backpack start @@@@@@@@@@@@@@@@@@@@'
print >> exceptFile, '@@@@@@@@@@@@@ backpack start @@@@@@@@@@@@@@@@@@@@'
backpackUrl = 'http://www.naeiltour.co.kr/backpack/eu_main.asp?area=40'
mainHtml = savefilegethtml.getHtml(backpackUrl, '<div id="left_mn">', '<div id="left_mn2">', 'naeiltourbackpackHtml.txt')

comment = False
backpackMenuList = list()       #menu들 List
country = ''
try:
    for each_line in mainHtml:
        if each_line.find('<!--') > -1:
            comment = True
            
        if comment == False and each_line.find('/backpack/list.asp?') > -1:
            backpackRegionClass = clsRegionUrl()
            backpackRegionClass.country = country
            backpackRegionClass.url = homepageUrl + each_line.split('href="')[1].split('"')[0]
            if each_line.find('">-') > -1:
                backpackRegionClass.region = each_line.split('">-')[1].split('<')[0].strip()
def searchProduct(filename,
                  productcode,
                  productName,
                  period,
                  targetUrl,
                  listUrl,
                  productDetailUrl,
                  departCity,
                  tourkind,
                  dmst_div,
                  country='',
                  city='',
                  comment=''):
    detailHtml = savefilegethtml.getHtml(targetUrl, '', '',
                                         'naeiltourDetailHtml.txt')
    print >> filename, 'TargetUrl : ', targetUrl
    departDayList = list()
    for detail_each_line in detailHtml:
        if detail_each_line.find("fn_goodDetail('") > -1:
            departDayList.append(
                detail_each_line.split("fn_goodDetail('")[1].split("'")[0])

    # 출발 가능 날짜에 항공사 찾아오는 부분
    try:
        con = cx_Oracle.connect(
            "bigtour/[email protected]:1521/ora11g")

        codeList = codes.getCityCode(productName, city, comment, country)
        cityList = codeList[0]
        nationList = codeList[1]

        #print nationList
        #print cityList

        #print nationList
        #print cityList
        query = savefilegethtml.getMasterMergeQueryTest1(
            'naeiltour', productcode, '', country, city, productName, tourkind,
            dmst_div, comment, '', nationList, cityList)  # A : 해외(Abroad)
        #query = savefilegethtml.getMasterMergeQuery('naeiltour', productcode, '', country, city, productName, tourkind, dmst_div, comment, '', nationList, cityList)  # A : 해외(Abroad)
        #print query
        cursor = con.cursor()
        cursor.execute(query)
        con.commit()

        productCls = clsProduct()

        for dayInfo in departDayList:
            try:
                productListUrl = listUrl + productcode + '&sel_day=' + dayInfo
                print 'ProductListUrl : ' + productListUrl
                productListHtml = savefilegethtml.getHtml(
                    productListUrl, '', '', 'naeiltourproductListHtml.txt')
                print >> filename, 'ProductListUrl : ' + productListUrl
                for product in productListHtml:
                    try:
                        if product.find("fn_price('") > -1:
                            productCls = clsProduct()
                            productSplit = product.split('fn_price')[1].split(
                                "'")
                            productCls.productCode = productSplit[1]
                            productCls.dDay = productSplit[3]
                            productCls.code = productSplit[5]
                            if tourkind == 'W' or tourkind == 'G':
                                productCls.airCode = product[
                                    product.find('.gif') -
                                    2:product.find('.gif')]
                            else:
                                productCls.airCode = productSplit[
                                    7]  # 한글 공항코드... but 우리는 영문2자리 공항코드가 필요하다...
                            productCls.price = productSplit[9].replace(',', '')
                            #print productSplit[11]
                            productCls.status = codes.getStatus(
                                'naeiltour', productSplit[11]
                            )  # 공백 : 예약가능, 03 : 마감임박, 05 : 마감
                            #if tourkind == 'W':
                            #productCls.city = productSplit[13]
                            productCls.url = productDetailUrl + productcode + '&sel_day=' + productCls.dDay
                            productCls.productname = productName
                            productCls.dTime = ''
                            productCls.aDay = ''
                            productCls.aTime = ''

                        if period != '' and tourkind == 'F':
                            if product.find('<td width="134">') > -1:
                                productCls.period = period
                                #print productCls.toString()
                                productCls.airCode = product[
                                    product.find('.gif') -
                                    2:product.find('.gif')]
                                query = savefilegethtml.getDetailMergeQueryTest1(
                                    'naeiltour', productcode, productCls.code,
                                    productCls.productname,
                                    '20' + productCls.dDay, '',
                                    productCls.period, departCity, '',
                                    productCls.airCode, productCls.status,
                                    productCls.url, productCls.price, '0', '0',
                                    '0', '', '')
                                #query = savefilegethtml.getDetailMergeQuery('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '')
                                #print query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break

                        if period == '' and tourkind == 'F':
                            if product.find('<td class="FRIDAYSPACING" >'
                                            ) > -1 and product.find(
                                                '.gif') > -1:
                                productCls.airCode = product[
                                    product.find('.gif') -
                                    2:product.find('.gif')]

                            if product.find('idth="220">') > -1:
                                """
                                # 날짜 가져오는 부분... 종류가 너무 많아서 좀 수정
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[0]))
                                    productCls.night = tmpText[
                                        len(tmpText) - 1].encode('utf-8')
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[1]))
                                    productCls.period = tmpText[0].encode(
                                        'utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1(
                                    'naeiltour', productcode, productCls.code,
                                    productCls.productname,
                                    '20' + productCls.dDay, '',
                                    productCls.period, departCity, '',
                                    productCls.airCode, productCls.status,
                                    productCls.url, productCls.price, '0', '0',
                                    '0', '', productCls.night)
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()

                        if period == '' and tourkind == 'W':
                            if product.find('valign="middle"') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[0]))
                                    productCls.night = tmpText[
                                        len(tmpText) - 1].encode('utf-8')
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[1]))
                                    productCls.period = tmpText[0].encode(
                                        'utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1(
                                    'naeiltour', productcode, productCls.code,
                                    productCls.productname,
                                    '20' + productCls.dDay, '',
                                    productCls.period, departCity, '',
                                    productCls.airCode, productCls.status,
                                    productCls.url, productCls.price, '0', '0',
                                    '0', '', productCls.night)
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break

                        if period == '' and tourkind == 'G':
                            if product.find('valign="middle"') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[0]))
                                    productCls.night = tmpText[
                                        len(tmpText) - 1].encode('utf-8')
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[1]))
                                    productCls.period = tmpText[0].encode(
                                        'utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1(
                                    'naeiltour', productcode, productCls.code,
                                    productCls.productname,
                                    '20' + productCls.dDay, '',
                                    productCls.period, departCity, '',
                                    productCls.airCode, productCls.status,
                                    productCls.url, productCls.price, '0', '0',
                                    '0', '', productCls.night)
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break

                        if period == '' and tourkind == 'D':
                            if product.find('<td class="FRIDAYSPACING" >'
                                            ) > -1 and product.find(
                                                '.gif') > -1:
                                productCls.airCode = product[
                                    product.find('.gif') -
                                    2:product.find('.gif')]

                            if product.find('idth="220">') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('')[1])[1]
                                else:
                                    productCls.night = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[0]))
                                    productCls.night = tmpText[
                                        len(tmpText) - 1].encode('utf-8')
                                    tmpText = re.findall(
                                        u'[\^0-9]+',
                                        tourUtil.getRemovedHtmlTag(
                                            splitText[1]))
                                    productCls.period = tmpText[0].encode(
                                        'utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                if product.find('COLOR=BLUE>') > -1:
                                    departCity = 'PUS'
                                else:
                                    departCity = 'ICN'

                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1(
                                    'naeiltour', productcode, productCls.code,
                                    productCls.productname,
                                    '20' + productCls.dDay, '',
                                    productCls.period, departCity, '',
                                    productCls.airCode, productCls.status,
                                    productCls.url, productCls.price, '0', '0',
                                    '0', '', productCls.night)
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break

                    except cx_Oracle.DatabaseError as err1:
                        print >> filename, err1
                        pass
                    except:
                        print >> filename, "Depth3 Error:", sys.exc_info()[0]
                        pass
                #break
            except:
                print >> filename, "Depth2 Error:", sys.exc_info()[0]
                pass

    except:
        print >> filename, "Depth1 Error:", sys.exc_info()[0]
        pass
    finally:
        con.close()
def searchProduct(filename, productcode, productName, period, targetUrl, listUrl, productDetailUrl, departCity, tourkind, dmst_div, country='', city='', comment=''):
    detailHtml = savefilegethtml.getHtml(targetUrl, '', '', 'naeiltourDetailHtml.txt')
    print >> filename, 'TargetUrl : ', targetUrl
    departDayList = list()
    for detail_each_line in detailHtml:
        if detail_each_line.find("fn_goodDetail('") > -1:
            departDayList.append(detail_each_line.split("fn_goodDetail('")[1].split("'")[0])
            
    # 출발 가능 날짜에 항공사 찾아오는 부분
    try:
        con = cx_Oracle.connect("bigtour/[email protected]:1521/ora11g")

        codeList = codes.getCityCode(productName, city, comment, country)
        cityList = codeList[0]
        nationList = codeList[1]
        
        #print nationList
        #print cityList
        
        #print nationList
        #print cityList
        query = savefilegethtml.getMasterMergeQueryTest1('naeiltour', productcode, '', country, city, productName, tourkind, dmst_div, comment, '', nationList, cityList)  # A : 해외(Abroad)
        #query = savefilegethtml.getMasterMergeQuery('naeiltour', productcode, '', country, city, productName, tourkind, dmst_div, comment, '', nationList, cityList)  # A : 해외(Abroad)
        #print query
        cursor = con.cursor()
        cursor.execute(query)
        con.commit()
        
        productCls = clsProduct()
        
        for dayInfo in departDayList:
            try:
                productListUrl = listUrl + productcode + '&sel_day=' + dayInfo
                print 'ProductListUrl : ' + productListUrl
                productListHtml = savefilegethtml.getHtml(productListUrl, '', '', 'naeiltourproductListHtml.txt')
                print >> filename, 'ProductListUrl : ' + productListUrl
                for product in productListHtml:
                    try:
                        if product.find("fn_price('") > -1:
                            productCls = clsProduct()
                            productSplit = product.split('fn_price')[1].split("'")
                            productCls.productCode = productSplit[1]
                            productCls.dDay = productSplit[3]
                            productCls.code = productSplit[5]
                            if tourkind == 'W' or tourkind == 'G':
                                productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')]
                            else:
                                productCls.airCode = productSplit[7]            # 한글 공항코드... but 우리는 영문2자리 공항코드가 필요하다...
                            productCls.price = productSplit[9].replace(',', '')
                            #print productSplit[11]
                            productCls.status = codes.getStatus('naeiltour', productSplit[11])                    # 공백 : 예약가능, 03 : 마감임박, 05 : 마감
                            #if tourkind == 'W':
                                #productCls.city = productSplit[13]
                            productCls.url = productDetailUrl + productcode + '&sel_day=' + productCls.dDay
                            productCls.productname = productName
                            productCls.dTime = ''
                            productCls.aDay = ''
                            productCls.aTime = ''
                                
                        if period != '' and tourkind == 'F':
                            if product.find('<td width="134">') > -1:
                                productCls.period = period
                                #print productCls.toString()
                                productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')]
                                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '') 
                                #query = savefilegethtml.getDetailMergeQuery('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', '') 
                                #print query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break
                        
                        if period == '' and tourkind == 'F':
                            if product.find('<td class="FRIDAYSPACING" >') > -1 and product.find('.gif') > -1:
                                productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')]
                            
                            if product.find('idth="220">') > -1:
                                """
                                # 날짜 가져오는 부분... 종류가 너무 많아서 좀 수정
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0]))
                                    productCls.night = tmpText[len(tmpText)-1].encode('utf-8')
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1]))
                                    productCls.period = tmpText[0].encode('utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) 
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                        
                        if period == '' and tourkind == 'W':
                            if product.find('valign="middle"') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0]))
                                    productCls.night = tmpText[len(tmpText)-1].encode('utf-8')
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1]))
                                    productCls.period = tmpText[0].encode('utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) 
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break
                        
                        if period == '' and tourkind == 'G':
                            if  product.find('valign="middle"') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('(')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('(')[1])[1]
                                elif product.find('[') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('[')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0]))
                                    productCls.night = tmpText[len(tmpText)-1].encode('utf-8')
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1]))
                                    productCls.period = tmpText[0].encode('utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) 
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break
                        
                        if period == '' and tourkind == 'D':
                            if product.find('<td class="FRIDAYSPACING" >') > -1 and product.find('.gif') > -1:
                                productCls.airCode = product[product.find('.gif') - 2:product.find('.gif')]
                                
                            if product.find('idth="220">') > -1:
                                """
                                if product.find('(') > -1:
                                    productCls.night = re.findall(r"\d", product.split('[')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('')[1])[1]
                                else:
                                    productCls.night = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[0]
                                    productCls.period = re.findall(r"\d", product.split('COLOR=#FF7A73>')[1])[1]
                                """
                                splitText = product.decode('cp949').split(u'박')
                                if len(splitText) > 1:
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[0]))
                                    productCls.night = tmpText[len(tmpText)-1].encode('utf-8')
                                    tmpText = re.findall(u'[\^0-9]+', tourUtil.getRemovedHtmlTag(splitText[1]))
                                    productCls.period = tmpText[0].encode('utf-8')
                                else:
                                    productCls.night = '0'
                                    productCls.period = '0'
                                #############################################################################################
                                if product.find('COLOR=BLUE>') > -1:
                                    departCity = 'PUS'
                                else:
                                    departCity = 'ICN'
                                
                                #print productCls.toString()
                                query = savefilegethtml.getDetailMergeQueryTest1('naeiltour', productcode, productCls.code, productCls.productname, '20' + productCls.dDay, '', productCls.period, departCity, '', productCls.airCode, productCls.status, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) 
                                #print 'Query : ' + query
                                cursor = con.cursor()
                                cursor.execute(query)
                                con.commit()
                                #break
                                
                    except cx_Oracle.DatabaseError as err1:
                        print >> filename, err1
                        pass
                    except:
                        print >> filename, "Depth3 Error:", sys.exc_info()[0]
                        pass
                #break
            except:
                print >> filename, "Depth2 Error:", sys.exc_info()[0]
                pass
       
    except:
        print >> filename, "Depth1 Error:", sys.exc_info()[0]
        pass
    finally:
        con.close()
exceptFile = open('tourbaksaException'+scrappingStartTime+'.txt', 'w')
print >> exceptFile, "Start : %s" % time.ctime()

print menulist

con = cx_Oracle.connect("bigtour/[email protected]:1521/ora11g")

# 메뉴에 다 잘들어 갔나 확인..
for level1 in menulist:
    for level2 in level1.tourkindgroup:
        for level3 in level2.regionUrlGroup:
            print 'Depart City : ' + level1.departCity + ', TourKind:' + level2.tourkind + ', Region : ' + level3.region + '(' + level3.url + ')'
            
            try:
                print >> exceptFile, level3.url
                regionHtml = savefilegethtml.getHtml(level3.url, '<div class="leftArea">', '</nav><!-- //lnb -->', 'tourbaksaRegionHtml.txt', '', '')
                
                for each_line in regionHtml:
                    if each_line.find('<li class="') > -1 and each_line.find('M1=') > -1:
                        #print each_line
                        cityClass = clsCityUrlGroup()
                        cityClass.city = each_line.split('</a>')[0].split(">")[2]
                        cityClass.url = homepageUrl + each_line.split("href='")[1].split("'")[0]
                        
                        print 'Depart Url : ' + cityClass.url
                        try:
                            print >> exceptFile, cityClass.url
                            departListHtml = savefilegethtml.getHtml(cityClass.url, '<div class="list"  id="itemList" >', '', 'tourbaksaDepartListHtml.txt')
                            
                            try:
                                productList = clsProductList()
    else:
        return 'No'
    
# 시간 변수들..
tourAgency = 'vgtour'
targetYear = sys.argv[1]
targetMonth = sys.argv[2]
#targetYear = '2014'
#targetMonth = '07'
scrappingStartTime = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")

exceptFile = open('verygoodtourException' + scrappingStartTime + '.txt', 'w')
print >> exceptFile, "Start : %s" % time.ctime()
        
sitemapUrl = 'http://www.verygoodtour.com/Content/SiteMap.html'
sitemapHtml = savefilegethtml.getHtml(sitemapUrl, '', '', 'sitemapHtml.txt')
#sitemapHtml = urllib2.urlopen(sitemapUrl).read()
#sitemapHtmlFile = open('sitemapHtml.txt', 'w')
#print >> sitemapHtmlFile, sitemapHtml
#sitemapHtmlFile.close()
#sitemapHtml = open('sitemapHtml.txt')
#menulist = list()           # 메뉴 Url 들을 담고 있을 clsProduct들의 List
tourType = ''
departCity = ''
region = ''
depthIdx = 0
idx = 0
productList = list()        # 중복으로 같은 상품 안가져 오도록 List에 넣고.. 없는 것들만 들고오도록..
productList.append('START')
con = cx_Oracle.connect("bigtour/[email protected]:1521/ora11g")
try: