예제 #1
0
 def handle_data(self, data):
  if self.price == 2: 
   tmpprice = strip_non_ascii(data)
   tmpprice = tmpprice[0:tmpprice.find('.')]
   self._vals['price'] = tmpprice
   self.price=0
  if self.date == 1: 
   self.tmp_date += data + " "
  if self.time == 1: 
   self._vals['dep_time'] = strip_non_ascii(data).split()[0]
   self._vals['arr_time'] = strip_non_ascii(data).split()[1]
  if self.direction == 1:
   if data[0:8] == 'Tel-Aviv' : self.directionval=1;
   if data[-8:] == 'Tel-Aviv' : self.directionval=2;
예제 #2
0
 def handle_data(self, data):
     if self.price == 2:
         tmpprice = strip_non_ascii(data)
         tmpprice = tmpprice[0:tmpprice.find('.')]
         self._vals['price'] = tmpprice
         self.price = 0
     if self.date == 1:
         self.tmp_date += data + " "
     if self.time == 1:
         self._vals['dep_time'] = strip_non_ascii(data).split()[0]
         self._vals['arr_time'] = strip_non_ascii(data).split()[1]
     if self.direction == 1:
         if data[0:8] == 'Tel-Aviv': self.directionval = 1
         if data[-8:] == 'Tel-Aviv': self.directionval = 2
예제 #3
0
 def handle_endtag(self, tag):
  if tag == "td" and self.header==1: 
   self.direction=0
   self.price=0
  if tag == "tr" and self.header==1: 
   self._vals['date'] = self.dep_date.split('T')[0]
   self._vals['year'] = self._vals['date'].split('-')[0]
   self._vals['month'] = self._vals['date'].split('-')[1]
   self._vals['day'] = self._vals['date'].split('-')[2]
   self._vals['dep_time'] = ":".join(self.dep_date.split('T')[1].split(":")[0:2])
   self._vals['arr_time'] = ":".join(self.arr_date.split('T')[1].split(":")[0:2])
   self._vals['price'] = int((min([int(strip_non_ascii(p.split()[0])[0:-2])+1 for p in [" ".join(y.split()) for y in [x for x in self.tmp_price.split('\r\n')[1:]]]])*eur)+0.5)
   if "TLV" in self.tmp_direction.strip().split('\r\n')[0] : self._vals['direction']=1
   if "BCN" in self.tmp_direction.strip().split('\r\n')[0] : self._vals['direction']=2
   #if "TLV" in [strip_non_ascii(x.replace(' ','')) for x in self.tmp_direction.split('\r\n')[1:5]][0] : self._vals['direction']=1
   #if "TLV" in [strip_non_ascii(x.replace(' ','')) for x in self.tmp_direction.split('\r\n')[1:5]][2] : self._vals['direction']=2
   self.header=0
   self.data.append(self._vals)
   self._vals = {}
   self.tmp_price=""
   self.tmp_direction=""
   self.direction = 0
   self.price=0
   self.dep_date = ""
   self.arr_date = ""
예제 #4
0
 def handle_endtag(self, tag):
     if tag == "td" and self.header == 1:
         self.direction = 0
         self.price = 0
     if tag == "tr" and self.header == 1:
         self._vals['date'] = self.dep_date.split('T')[0]
         self._vals['year'] = self._vals['date'].split('-')[0]
         self._vals['month'] = self._vals['date'].split('-')[1]
         self._vals['day'] = self._vals['date'].split('-')[2]
         self._vals['dep_time'] = ":".join(
             self.dep_date.split('T')[1].split(":")[0:2])
         self._vals['arr_time'] = ":".join(
             self.arr_date.split('T')[1].split(":")[0:2])
         self._vals['price'] = int((min([
             int(strip_non_ascii(p.split()[0])[0:-2]) + 1 for p in [
                 " ".join(y.split())
                 for y in [x for x in self.tmp_price.split('\r\n')[1:]]
             ]
         ]) * eur) + 0.5)
         if "TLV" in self.tmp_direction.strip().split('\r\n')[0]:
             self._vals['direction'] = 1
         if "BCN" in self.tmp_direction.strip().split('\r\n')[0]:
             self._vals['direction'] = 2
         #if "TLV" in [strip_non_ascii(x.replace(' ','')) for x in self.tmp_direction.split('\r\n')[1:5]][0] : self._vals['direction']=1
         #if "TLV" in [strip_non_ascii(x.replace(' ','')) for x in self.tmp_direction.split('\r\n')[1:5]][2] : self._vals['direction']=2
         self.header = 0
         self.data.append(self._vals)
         self._vals = {}
         self.tmp_price = ""
         self.tmp_direction = ""
         self.direction = 0
         self.price = 0
         self.dep_date = ""
         self.arr_date = ""
예제 #5
0
 def handle_endtag(self, tag):
  if tag=="span":
   self.price=0
   self.date=0
   self.time=0
  if tag=="li" and self.day==1:
   if self.tmp_price!="":
    self._vals['weekday']=self.tmp_date.split()[0]
    self._vals['day']=self.tmp_date.split()[1]
    self._vals['month']=datetime.datetime.strptime(self.tmp_date.split()[2], "%b").strftime("%m")
    self._vals['price']=int(float(strip_non_ascii(self.tmp_price))*float(eur)+0.5)
    self._vals['priceE']=strip_non_ascii(self.tmp_price)
    self._vals['direction']=self.direction
    self._vals['dep_time']=self.tmp_time.split()[1]
    self._vals['arr_time']=self.tmp_time.split()[3]
    tmp_year=int(self.req_date.split("-")[2])
    tmp_mon=int(self.req_date.split("-")[1])
    if (tmp_mon==1) and (int(self._vals['month'])==12): tmp_year-=1
    if (tmp_mon==12) and (int(self._vals['month'])==1): tmp_year+=1
    self._vals['year'] = str(tmp_year)
    self.data.append(self._vals)
   self._vals={}
   self.tmp_price=""
   self.tmp_time=""
예제 #6
0
 def handle_endtag(self, tag):
  if tag == "h3" : self.header=0
  if tag == "h2" : self.direction=0
  if tag == "span":
   if self.date == 1:
    tmp_year=int(self.req_date.split("-")[2])
    tmp_mon=int(self.req_date.split("-")[1])
    if (tmp_mon==1) and ("Dec" in self.tmp_date): tmp_year-=1
    if (tmp_mon==12) and ("Jan" in self.tmp_date): tmp_year+=1
    self._vals['year'] = str(tmp_year)
    tmp_full_date = strip_non_ascii(self.tmp_date).split() 
    self._vals['weekday'] = tmp_full_date[0]
    self._vals['day'] = tmp_full_date[1]
    self._vals['month'] = datetime.datetime.strptime(tmp_full_date[2], "%b").strftime("%m") 
  if tag == "label":
   if self._vals:
    self._vals['direction'] = self.directionval
    self.data.append(self._vals)
    self.deep=0
    self._vals={}
    self.tmp_date = ""
예제 #7
0
 def handle_endtag(self, tag):
     if tag == "h3": self.header = 0
     if tag == "h2": self.direction = 0
     if tag == "span":
         if self.date == 1:
             tmp_year = int(self.req_date.split("-")[2])
             tmp_mon = int(self.req_date.split("-")[1])
             if (tmp_mon == 1) and ("Dec" in self.tmp_date): tmp_year -= 1
             if (tmp_mon == 12) and ("Jan" in self.tmp_date): tmp_year += 1
             self._vals['year'] = str(tmp_year)
             tmp_full_date = strip_non_ascii(self.tmp_date).split()
             self._vals['weekday'] = tmp_full_date[0]
             self._vals['day'] = tmp_full_date[1]
             self._vals['month'] = datetime.datetime.strptime(
                 tmp_full_date[2], "%b").strftime("%m")
     if tag == "label":
         if self._vals:
             self._vals['direction'] = self.directionval
             self.data.append(self._vals)
             self.deep = 0
             self._vals = {}
             self.tmp_date = ""
예제 #8
0
  if debug_flag: 
   fd=codecs.open('/tmp/output_pages/'+sys.argv[0]+'_'+DST+'_'+str(Start)+'-'+str(Ret)+'.html', 'w', encoding='utf-8')
   fd.write(r2.text)
   fd.close()

  continue

 cur_date=Start.strftime("%d-%m-%Y")
 prP = getFlight(cur_date)
 prP.feed(r2.text)
 if debug_flag:
  print Start.strftime("%d/%m/%Y")
  print Ret.strftime("%d/%m/%Y")
  print len(list(find_all(r2.text, "marketColumn")))
  for s in find_all(r2.text, '<span class="price">'):
   print strip_non_ascii(r2.text[s+20:r2.text.find('<', s+20, s+30)])
  print len(prP.data)
  for t in prP.data : print t
  print r2
  print '-------'
 flightsList.extend(prP.data)
 Start=Ret 
print ""
#Out=[]
#Inc=[]
#for i in flightsList:
 #if i['direction'] == 4 : 
  #Out.append(i) 
 #else: 
  #Inc.append(i)
#Out=clean_dup(Out)
예제 #9
0
                             'w',
                             encoding='utf-8')
            fd.write(r2.text)
            fd.close()

        continue

    cur_date = Start.strftime("%d-%m-%Y")
    prP = getFlight(cur_date)
    prP.feed(r2.text)
    if debug_flag:
        print Start.strftime("%d/%m/%Y")
        print Ret.strftime("%d/%m/%Y")
        print len(list(find_all(r2.text, "marketColumn")))
        for s in find_all(r2.text, '<span class="price">'):
            print strip_non_ascii(
                r2.text[s + 20:r2.text.find('<', s + 20, s + 30)])
        print len(prP.data)
        for t in prP.data:
            print t
        print r2
        print '-------'
    flightsList.extend(prP.data)
    Start = Ret
print ""
#Out=[]
#Inc=[]
#for i in flightsList:
#if i['direction'] == 4 :
#Out.append(i)
#else:
#Inc.append(i)