detailProductCls.url = homepageUrl + detailProduct.split("location.href='")[1].split("'")[0] if detailProduct.find('예약마감'.encode('cp949')) > -1: detailProductCls.status = codes.getStatus('tourbaksa', '예약마감') elif detailProduct.find('바로예약'.encode('cp949')) > -1: detailProductCls.status = codes.getStatus('tourbaksa', '바로예약') elif waitSeat and detailProduct.find('예약접수'.encode('cp949')) > -1: detailProductCls.status = codes.getStatus('tourbaksa', '대기예약') elif detailProduct.find('예약접수'.encode('cp949')) > -1: detailProductCls.status = codes.getStatus('tourbaksa', '예약접수') else: detailProductCls.status = codes.getStatus('tourbaksa', 'None') #detailProductCls.status = detailProduct.split('</button>')[0].split('>')[2] detailProductCls.detailCode = detailProduct.split('EV_YM=')[1].split('&')[0] + detailProduct.split('EV_SEQ=')[1].split('&')[0] #print detailProductCls.toString() query = tourQuery.getDetailMergeQuery(tourAgency, productList.productCode, detailProductCls.detailCode, productList.productname, detailProductCls.dDay, '', productList.period, level1.departCity, '', productList.airCode, detailProductCls.status, detailProductCls.url, detailProductCls.price, '0', '0', '0', '', productList.night) #print 'Query : ' + query cursor = con.cursor() cursor.execute(query) con.commit() #break except: print >> exceptFile, 'Detail Product Parcing Error', sys.exc_info()[0] pass except: print >> exceptFile, 'Detail Product URL Error', sys.exc_info()[0] pass
prd_fee = t['SPrice']['#text'] prd_status = codes.getStatus( 'modetour', t['SDetailState']['#text']) prd_code = t['SPriceNum'][ '#text'] flynum = t['SstartAir'] #period = t['SNight'] #기간이 아니라... 잠자는 횟수임.. 1박2일이면.. 1 airline = t['SAirName'] prd_url = 'http://www.modetour.com/Package/Itinerary.aspx?startLocation=' + sublist.startLocation + '&location=' + sublist.location + '&location1=' + sublist.location1 + '&theme=' + sublist.Theme + '&theme1=' + sublist.Theme1 + '&MLoc=' + sublist.MLoc + '&Pnum=' + prd_code #print 'product url:' + prd_url query = tourQuery.getDetailMergeQuery( tourAgency, productCode, prd_code, prd_nm, st_dt + st_time, arr_day + arr_time, tr_term, sublist.startLocation, '', air_cd, prd_status, prd_url, prd_fee, '0', '0', '0', '') #print >> exceptFile, query #print query cursor = con.cursor() cursor.execute(query) con.commit() normalCnt += 1 #break #print(t['SMeet']) except TypeError as typeerr:
clsDetail.dDay = detail['departureAir']['departureDate'].strip() clsDetail.dTime = detail['departureAir']['departureTime'].strip() clsDetail.aDay = detail['entryAir']['arrivalDate'].strip() clsDetail.aTime = detail['entryAir']['arrivalTime'].strip() clsDetail.url = mainUrl + '/evt/' + clsDetail.seq #http://www.lottetour.com/evt/A140721565?menu=558 if detail['meetPlace'].find('인천'.decode('utf-8')) > -1: clsDetail.departCity = 'ICN' elif detail['meetPlace'].find('김해'.decode('utf-8')) > -1: clsDetail.departCity = 'PUS' elif detail['meetPlace'].find('김포'.decode('utf-8')) > -1: clsDetail.departCity = 'GMP' else: clsDetail.departCity = 'ETC' query = tourQuery.getDetailMergeQuery(tourAgency, productCode, clsDetail.seq, clsDetail.name, clsDetail.dDay+clsDetail.dTime, clsDetail.aDay+clsDetail.aTime, clsDetail.period, clsDetail.departCity, '', clsDetail.airCode, clsDetail.status, clsDetail.url, clsDetail.price, '0', '0', '0', '', clsDetail.period) #print query cursor = con.cursor() cursor.execute(query) con.commit() #print detail['name'] except: print "Level3 : ", sys.exc_info()[0] print >> exceptFile, "Level3 : ", sys.exc_info()[0] pass #break except: print "Level2 : ", sys.exc_info()[0] print >> exceptFile, "Level2 : ", sys.exc_info()[0] pass
)[0] + detailProduct.split( 'EV_SEQ=' )[1].split('&')[0] #print detailProductCls.toString() query = tourQuery.getDetailMergeQuery( tourAgency, productList. productCode, detailProductCls. detailCode, productList. productname, detailProductCls. dDay, '', productList.period, level1.departCity, '', productList. airCode, detailProductCls. status, detailProductCls. url, detailProductCls. price, '0', '0', '0', '', productList.night) #print 'Query : ' + query cursor = con.cursor() cursor.execute(query) con.commit() #break
detailProductCls.price += num #print 'Price : ', detailProductCls.price elif detailInfo.find('class="reservation"') > -1: if detailInfo.find('예약마감') > -1: detailProductCls.status = codes.getStatus('onlinetour', 'Finish') elif detailInfo.find('예약가능') > -1: detailProductCls.status = codes.getStatus('onlinetour', 'Avail') elif detailInfo.find('출발가능') > -1: detailProductCls.status = codes.getStatus('onlinetour', 'Confirm') else: detailProductCls.status = 'Etc' #print 'status : ', detailProductCls.status #print >> exceptFile, 'status : ', detailProductCls.status query = tourQuery.getDetailMergeQuery(tourAgency, productCls.productCode, detailProductCls.proc_cd, detailProductCls.productName.decode('utf-8'), detailProductCls.dDay + detailProductCls.dTime, detailProductCls.aDay+detailProductCls.aTime, productCls.period, mainMenu.departCity, '', detailProductCls.airCode, detailProductCls.status, detailProductCls.url, detailProductCls.price, '0', '0', '0', '', productCls.night) #print query cursor = con.cursor() cursor.execute(query) con.commit() #break except: print >> exceptFile, "Level3 Error :", sys.exc_info()[0] pass #break except: print >> exceptFile, "Level2 Error :", sys.exc_info()[0] pass #break except:
clsProduct.period = parcer.strip()[:1] #print >> ybtourproductfile, 'Period:' + clsProduct.period elif parcer.find('<td class="blue">') > -1 and parcer.find('원') > -1: spliter = parcer.strip().split('>') clsProduct.price = spliter[1].split('원')[0].replace(',', '') #print >> ybtourproductfile, 'Price:' + clsProduct.price elif parcer.find('출발확정') > -1 or parcer.find('예약마감') > -1 or parcer.find('예약가능') > -1: spliter = parcer.strip().split('>') #print codes.getStatus('ybtour', spliter[1].split('<')[0]) clsProduct.status = codes.getStatus('ybtour', spliter[1].split('<')[0]) #print >> ybtourproductfile, 'Status:' + clsProduct.status elif parcer.strip() == '</tr>': flag = False # 2014. 06. 29. 여행상품명에서 국가, 도시코드 가져오는 부분으로 적용.. query = tourQuery.getDetailMergeQuery(tourAgency, detailProduct, clsProduct.detailcode, clsProduct.productName, targetYear+clsProduct.departDay+clsProduct.departTime, targetYear+clsProduct.arriveDay+clsProduct.arriveTime, clsProduct.period, departCity, '', clsProduct.airCode, clsProduct.status, clsProduct.url, clsProduct.price, '0', '0', '0', '') #query = savefilegethtml.getDetailMergeQuery('ybtour', detailProduct, clsProduct.detailcode, clsProduct.productName, targetYear+clsProduct.departDay+clsProduct.departTime, targetYear+clsProduct.arriveDay+clsProduct.arriveTime, clsProduct.period, departCity, '', clsProduct.airCode, clsProduct.status, clsProduct.url, clsProduct.price, '0', '0', '0', '') #print query con.commit() cursor = con.cursor() cursor.execute(query) con.commit() #break except: print "ML5 Parcing Error:", sys.exc_info()[0] print >> exceptFile, "ML5 Parcing Error:", sys.exc_info()[0] pass break except UnicodeEncodeError as err1:
) == '</tr>': flag = False # 2014. 06. 29. 여행상품명에서 국가, 도시코드 가져오는 부분으로 적용.. query = tourQuery.getDetailMergeQuery( tourAgency, detailProduct, clsProduct. detailcode, clsProduct. productName, targetYear + clsProduct. departDay + clsProduct. departTime, targetYear + clsProduct. arriveDay + clsProduct. arriveTime, clsProduct.period, departCity, '', clsProduct.airCode, clsProduct.status, clsProduct.url, clsProduct.price, '0', '0', '0', '') #query = savefilegethtml.getDetailMergeQuery('ybtour', detailProduct, clsProduct.detailcode, clsProduct.productName, targetYear+clsProduct.departDay+clsProduct.departTime, targetYear+clsProduct.arriveDay+clsProduct.arriveTime, clsProduct.period, departCity, '', clsProduct.airCode, clsProduct.status, clsProduct.url, clsProduct.price, '0', '0', '0', '') #print query con.commit() cursor = con.cursor()
def insertData(productCls, detailUrl, regionUrl, tourAgency, kind, dmst_div): print 'Product Url : ', productCls.url print >> exceptFile, 'Product Url : ', productCls.url # 2014. 7. 23. 카테고리의 국가는 넣지 않기로 함... #codeList = codes.getCityCode(productCls.name.decode('utf-8'), detailUrl.name.decode('utf-8'), regionUrl.name.decode('utf-8')) codeList = codes.getCityCode(productCls.name.decode('utf-8')) cityList = codeList[0] nationList = codeList[1] continentList = codeList[2] siteList = codeList[3] # 2014. 8. 3. site 추가 if len(cityList) == 0 and len(nationList) == 0 and len(continentList) == 0: codeList = codes.getCityCode(detailUrl.name.decode('utf-8')) cityList = codeList[0] nationList = codeList[1] continentList = codeList[2] siteList = codeList[3] # 2014. 8. 3. site 추가 # Master 상품 입력 query = tourQuery.getMasterMergeQuery(tourAgency, productCls.code, productCls.name.decode('utf-8'), menu.kind, dmst_div, '', '') #print query cursor = con.cursor() cursor.execute(query) con.commit() # Region Data 삭제 codes.insertRegionData(tourAgency, productCls.code, cityList, nationList, continentList, siteList) detailProductHtml = savefilegethtml.getHtml(productCls.url, '', '', 'tour2000DetailHtml'+targetMonth+'.txt') pl10Idx = 0 for detailProduct in detailProductHtml: try: if detailProduct.find('<span class="text_pink">') > -1 and detailProduct.find('<a href=') < 0: detailCls = clsProductDetail() numArray = tourUtil.getNumArray(detailProduct) if len(numArray) > 7: detailCls.dDay = targetYear + numArray[0] + numArray[1] detailCls.dTime = numArray[2] + numArray[3] detailCls.aDay = targetYear + numArray[4] + numArray[5] detailCls.aTime = numArray[6] + numArray[7] elif len(numArray) == 4: detailCls.dDay = targetYear + numArray[0] + numArray[1] detailCls.dTime = '' detailCls.aDay = targetYear + numArray[2] + numArray[3] detailCls.aTime = '' elif detailProduct.find('onError') > -1: detailCls.airCode = detailProduct[detailProduct.find('.gif') - 4:detailProduct.find('.gif') - 2] elif detailProduct.find('text_redB') > -1: numArray = tourUtil.getNumArray(tourUtil.getRemovedHtmlTag(detailProduct)) for num in numArray: detailCls.price += num elif detailProduct.find('</a></td>') > -1: if detailProduct.find('text_pink') > -1: detailCls.status = codes.getStatus('tour2000', '예약가능') elif detailProduct.find('text_blau') > -1: detailCls.status = codes.getStatus('tour2000', '출발가능') elif detailProduct.find('text_green') > -1: detailCls.status = codes.getStatus('tour2000', '대기예약') elif detailProduct.find('text_grayLightSmall') > -1: detailCls.status = codes.getStatus('tour2000', '예약마감') detailCls.remainSeat = tourUtil.getRemovedHtmlTag(detailProduct).replace("'", "").strip() elif detailProduct.find('<p class="pl10">') > -1: if pl10Idx == 0: pl10Idx = 1 detailCls.productName = tourUtil.getRemovedHtmlTag(detailProduct).replace("'", "").strip() detailCls.url = mainUrl + tourUtil.getTagAttr(detailProduct, 'a', 'href') detailCls.productSeq = detailProduct.split('ev_ym=')[1].split('&')[0] + detailProduct.split('ev_seq=')[1].split('&')[0] else: pl10Idx = 0 if detailCls.productName.find('부산출발') > -1: departCity = 'PUS' else: departCity = 'ICN' query = tourQuery.getDetailMergeQuery(tourAgency, productCls.code, detailCls.productSeq, detailCls.productName.decode('utf-8'), detailCls.dDay+detailCls.dTime, detailCls.aDay+detailCls.aTime, productCls.period, departCity, '', detailCls.airCode, detailCls.status, detailCls.url, detailCls.price, '0', '0', '0', '', productCls.night) #print >> exceptFile, query #print query cursor = con.cursor() cursor.execute(query) con.commit() #break except: print >> exceptFile, 'detail parcing Error : ', sys.exc_info()[0] pass
detailClass.atime = aDate.split(')')[1].strip().replace(':', '') detailClass.acode = valueParcing(detailProduct, 'acode":"', '","aline') detailClass.aline = valueParcing(detailProduct, 'aline":"', '","tday') detailClass.tday = valueParcing(detailProduct, 'tday":"', '","grade') detailClass.grade = valueParcing(detailProduct, 'grade":"', '","gname') detailClass.gname = valueParcing(detailProduct, 'gname":"', '","pname').replace("'", "").decode('utf-8') detailClass.pname = valueParcing(detailProduct, 'pname":"', '","amt').replace("'", "").decode('utf-8') detailClass.amt = valueParcing(detailProduct, 'amt":"', '","lminute') detailClass.lminute = codes.getStatus('hanatour', valueParcing(detailProduct, 'lminute":"', '"}')) detailClass.url = 'http://www.hanatour.com/asp/booking/productPackage/pk-12000.asp?pkg_code=' + detailClass.pcode #print detailClass.toString() #print idx #idx += 1 # 2014. 6. 29. 정규식으로 이름에서 국가, 도시 코드 빼오도록.. query = tourQuery.getDetailMergeQuery(tourAgency, productClass.pkg_mst_code, detailClass.pcode, detailClass.pname, detailClass.dday+detailClass.dtime, detailClass.aday+detailClass.atime, detailClass.tday, departCity, '', detailClass.acode, detailClass.lminute, detailClass.url, detailClass.amt, '0', '0', '0', '') #query = savefilegethtml.getDetailMergeQuery('hanatour', productClass.pkg_mst_code, detailClass.pcode, detailClass.pname, detailClass.dday+detailClass.dtime, detailClass.aday+detailClass.atime, detailClass.tday, departCity, '', detailClass.acode, detailClass.lminute, detailClass.url, detailClass.amt, '0', '0', '0', '') #print >> exceptFile ,query cursor = con.cursor() cursor.execute(query) con.commit() #break except cx_Oracle.DatabaseError as dberr: print >> exceptFile, 'Depth 44 : ' + str(dberr) pass except: print >> exceptFile, 'Depth 4 : ' + str(sys.exc_info()[0]) pass except cx_Oracle.IntegrityError as dberr:
def insertData(productCls, detailUrl, regionUrl, tourAgency, kind, dmst_div): print 'Product Url : ', productCls.url print >> exceptFile, 'Product Url : ', productCls.url # 2014. 7. 23. 카테고리의 국가는 넣지 않기로 함... #codeList = codes.getCityCode(productCls.name.decode('utf-8'), detailUrl.name.decode('utf-8'), regionUrl.name.decode('utf-8')) codeList = codes.getCityCode(productCls.name.decode('utf-8')) cityList = codeList[0] nationList = codeList[1] continentList = codeList[2] siteList = codeList[3] # 2014. 8. 3. site 추가 if len(cityList) == 0 and len(nationList) == 0 and len(continentList) == 0: codeList = codes.getCityCode(detailUrl.name.decode('utf-8')) cityList = codeList[0] nationList = codeList[1] continentList = codeList[2] siteList = codeList[3] # 2014. 8. 3. site 추가 # Master 상품 입력 query = tourQuery.getMasterMergeQuery(tourAgency, productCls.code, productCls.name.decode('utf-8'), menu.kind, dmst_div, '', '') #print query cursor = con.cursor() cursor.execute(query) con.commit() # Region Data 삭제 codes.insertRegionData(tourAgency, productCls.code, cityList, nationList, continentList, siteList) detailProductHtml = savefilegethtml.getHtml( productCls.url, '', '', 'tour2000DetailHtml' + targetMonth + '.txt') pl10Idx = 0 for detailProduct in detailProductHtml: try: if detailProduct.find( '<span class="text_pink">') > -1 and detailProduct.find( '<a href=') < 0: detailCls = clsProductDetail() numArray = tourUtil.getNumArray(detailProduct) if len(numArray) > 7: detailCls.dDay = targetYear + numArray[0] + numArray[1] detailCls.dTime = numArray[2] + numArray[3] detailCls.aDay = targetYear + numArray[4] + numArray[5] detailCls.aTime = numArray[6] + numArray[7] elif len(numArray) == 4: detailCls.dDay = targetYear + numArray[0] + numArray[1] detailCls.dTime = '' detailCls.aDay = targetYear + numArray[2] + numArray[3] detailCls.aTime = '' elif detailProduct.find('onError') > -1: detailCls.airCode = detailProduct[detailProduct.find('.gif') - 4:detailProduct.find('.gif' ) - 2] elif detailProduct.find('text_redB') > -1: numArray = tourUtil.getNumArray( tourUtil.getRemovedHtmlTag(detailProduct)) for num in numArray: detailCls.price += num elif detailProduct.find('</a></td>') > -1: if detailProduct.find('text_pink') > -1: detailCls.status = codes.getStatus('tour2000', '예약가능') elif detailProduct.find('text_blau') > -1: detailCls.status = codes.getStatus('tour2000', '출발가능') elif detailProduct.find('text_green') > -1: detailCls.status = codes.getStatus('tour2000', '대기예약') elif detailProduct.find('text_grayLightSmall') > -1: detailCls.status = codes.getStatus('tour2000', '예약마감') detailCls.remainSeat = tourUtil.getRemovedHtmlTag( detailProduct).replace("'", "").strip() elif detailProduct.find('<p class="pl10">') > -1: if pl10Idx == 0: pl10Idx = 1 detailCls.productName = tourUtil.getRemovedHtmlTag( detailProduct).replace("'", "").strip() detailCls.url = mainUrl + tourUtil.getTagAttr( detailProduct, 'a', 'href') detailCls.productSeq = detailProduct.split( 'ev_ym=')[1].split('&')[0] + detailProduct.split( 'ev_seq=')[1].split('&')[0] else: pl10Idx = 0 if detailCls.productName.find('부산출발') > -1: departCity = 'PUS' else: departCity = 'ICN' query = tourQuery.getDetailMergeQuery( tourAgency, productCls.code, detailCls.productSeq, detailCls.productName.decode('utf-8'), detailCls.dDay + detailCls.dTime, detailCls.aDay + detailCls.aTime, productCls.period, departCity, '', detailCls.airCode, detailCls.status, detailCls.url, detailCls.price, '0', '0', '0', '', productCls.night) #print >> exceptFile, query #print query cursor = con.cursor() cursor.execute(query) con.commit() #break except: print >> exceptFile, 'detail parcing Error : ', sys.exc_info()[0] pass
air_cd = t['SAirCode'][:2] st_dt = t['SPriceDay']['#text'] st_time = t['SstartTime'].replace(':', '') arr_day = t['SArrivalDay']['#text'] arr_time = t['SArrivalTime'].replace(':', '') tr_term = t['SDay'] tr_div = themeCode prd_fee = t['SPrice']['#text'] prd_status = codes.getStatus('modetour', t['SDetailState']['#text']) prd_code = t['SPriceNum']['#text'] flynum = t['SstartAir'] #period = t['SNight'] #기간이 아니라... 잠자는 횟수임.. 1박2일이면.. 1 airline = t['SAirName'] prd_url = 'http://www.modetour.com/Package/Itinerary.aspx?startLocation='+sublist.startLocation+'&location='+sublist.location+'&location1='+sublist.location1+'&theme='+sublist.Theme+'&theme1='+sublist.Theme1+'&MLoc='+sublist.MLoc+'&Pnum='+prd_code #print 'product url:' + prd_url query = tourQuery.getDetailMergeQuery(tourAgency, productCode, prd_code, prd_nm, st_dt+st_time, arr_day+arr_time, tr_term, sublist.startLocation, '', air_cd, prd_status, prd_url, prd_fee, '0', '0', '0', '') #print >> exceptFile, query #print query cursor = con.cursor() cursor.execute(query) con.commit() normalCnt += 1 #break #print(t['SMeet']) except TypeError as typeerr: try: reg_div = anCode prd_nm = tree['ModeSangPum']['SangList']['SName']['#text'].replace("'", "")
valueParcing( detailProduct, 'lminute":"', '"}')) detailClass.url = 'http://www.hanatour.com/asp/booking/productPackage/pk-12000.asp?pkg_code=' + detailClass.pcode #print detailClass.toString() #print idx #idx += 1 # 2014. 6. 29. 정규식으로 이름에서 국가, 도시 코드 빼오도록.. query = tourQuery.getDetailMergeQuery( tourAgency, productClass.pkg_mst_code, detailClass.pcode, detailClass.pname, detailClass.dday + detailClass.dtime, detailClass.aday + detailClass.atime, detailClass.tday, departCity, '', detailClass.acode, detailClass.lminute, detailClass.url, detailClass.amt, '0', '0', '0', '') #query = savefilegethtml.getDetailMergeQuery('hanatour', productClass.pkg_mst_code, detailClass.pcode, detailClass.pname, detailClass.dday+detailClass.dtime, detailClass.aday+detailClass.atime, detailClass.tday, departCity, '', detailClass.acode, detailClass.lminute, detailClass.url, detailClass.amt, '0', '0', '0', '') #print >> exceptFile ,query cursor = con.cursor() cursor.execute(query) con.commit() #break except cx_Oracle.DatabaseError as dberr: print >> exceptFile, 'Depth 44 : ' + str(
#print productCls.period #print departCity #print productCls.aCode #print productCls.booked #print productCls.url #print productCls.price if productCls.code.strip() == '': continue query = tourQuery.getDetailMergeQuery( tourAgency, mastercode, productCls.code, productCls.name, productCls.sDay + productCls.sTime, productCls.aDay + productCls.aTime, productCls.period, departCity, '', productCls.aCode, productCls.booked, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) #print query cursor = con.cursor() cursor.execute(query) con.commit() #break except: print 'data base error!!!' print >> exceptFile, "Parcing Error:", sys.exc_info( )[0]
#query... 등등 #print mastercode #print productCls.code #print productCls.name #print productCls.sDay+productCls.sTime #print productCls.aDay+productCls.aTime #print productCls.period #print departCity #print productCls.aCode #print productCls.booked #print productCls.url #print productCls.price if productCls.code.strip() == '': continue query = tourQuery.getDetailMergeQuery(tourAgency, mastercode, productCls.code, productCls.name, productCls.sDay+productCls.sTime, productCls.aDay+productCls.aTime, productCls.period, departCity, '', productCls.aCode, productCls.booked, productCls.url, productCls.price, '0', '0', '0', '', productCls.night) #print query cursor = con.cursor() cursor.execute(query) con.commit() #break except: print 'data base error!!!' print >> exceptFile, "Parcing Error:", sys.exc_info()[0] pass #break except: print >> exceptFile, "Parcing or URL Error:", sys.exc_info()[0] pass