コード例 #1
0
ファイル: elal_scrape.py プロジェクト: gabik/wizzscrape
debug_flag=False
new_year=0
maxn=31
arg_month=sys.argv[2]
Start_orig = datetime.date.today()
#Start_orig = datetime.date(2015,8,1)
Start_orig += datetime.timedelta(days=(int(maxn)-1)*int(arg_month))
Stop = Start_orig + datetime.timedelta(days=maxn)
scrape_time = datetime.datetime.today()
cleandone=1

DST = sys.argv[1]
if len(sys.argv) >= 4 :
 if sys.argv[3] == "debug" : debug_flag=True

usd=get_currency("usd")

def get_proxy():
 s=requests.session()
 test_url='http://fly.elal.co.il/plnext/ELALonlinebooking/Override.action'
 test2_url='http://booking.elal.co.il/newBooking/urlDirector.do'
 good=False
 while good is False:
  cur_proxy = replace_proxy()
  print "Need Proxy... {0}".format(cur_proxy)
  try:
   test=s.get(test_url)
  except:
   test = dummy()
  #if 'Access Denied' not in test.text and test.status_code == 200 and 'Manual Runner' not in test.text:
  if 'Access Denied' not in test.text and 'Manual Runner' not in test.text:
コード例 #2
0
ファイル: elalC_scrape.py プロジェクト: gabik/wizzscrape
new_year = 0
maxn = 31
arg_month = sys.argv[2]
if arg_month > 11: exit
Start_orig = datetime.date.today()
#Start_orig = datetime.date(2015,8,1)
Start_orig += datetime.timedelta(days=(int(maxn) - 1) * int(arg_month))
Stop = Start_orig + datetime.timedelta(days=maxn)
scrape_time = datetime.datetime.today()
cleandone = 1

DST = sys.argv[1]
if len(sys.argv) >= 4:
    if sys.argv[3] == "debug": debug_flag = True

usd = get_currency("usd")

Start = Start_orig
flightsList = []
n = 0
print DST
print str(scrape_time)
print str(Start_orig), str(arg_month)
while Stop > Start:
    n += 1
    if debug_flag:
        print "Progress: " + str(n) + "/" + str(maxn)
    else:
        sys.stdout.write(" Progress: %d/%d   \r" % (n, maxn))
        sys.stdout.flush()
    Ret = Start + datetime.timedelta(days=2)
コード例 #3
0
import requests
import re
from HTMLParser import HTMLParser
import sys
import datetime
from general_scrape import find_all, clean_dup, strip_non_ascii, get_currency

eur=get_currency("eur")

class getFlight(HTMLParser):
 def __init__(self, req_date):
  self.tmp_date=""
  self.tmp_price=""
  self.tmp_data=""
  self.tmp_time=""
  self.day=0
  self.endday=0
  self.price=0
  self.date=0
  self.time = 0
  self.data = []
  self._vals = {}
  self.header = 0
  self.direction = 0
  self.req_date=req_date
  HTMLParser.__init__(self)
 def handle_starttag(self, tag, attrs):
  if tag=="div":
   for a,b in attrs:
    if a=="class" and b=="OutboundDaySlider": self.direction=1
    if a=="class" and b=="ReturnDaySlider": self.direction=2