def get_details(self, sub): det = Detail() det.id = sub.id det.label = sub.label det.infos = '' det.price = Decimal('0.0') return det
def iter_details(self, sub): det = Detail() det.id = sub.id det.label = sub.label det.infos = '' det.price = Decimal('0.0') yield det
def get_balance(self): for calls in self.get_calls(): if "Votre solde" in calls.label: detail = Detail() detail.price = calls.price detail.label = u"Balance" return detail
def get_details(self, sub): det = Detail() det.id = sub.id det.label = sub.label det.infos = "" det.price = Decimal("0.0") return det
def iter_details(self, sub): det = Detail() det.id = sub.id det.label = sub.label det.infos = "" det.price = Decimal("0.0") yield det
def do_details(self, id): """ details [ID] Get details of subscriptions. If no ID given, display all details of all backends. """ l = [] id, backend_name = self.parse_id(id) if not id: for subscrib in self.get_object_list('iter_subscription'): l.append((subscrib.id, subscrib.backend)) else: l.append((id, backend_name)) for id, backend in l: names = (backend,) if backend is not None else None # XXX: should be generated by backend? -Flo # XXX: no, but you should do it in a specific formatter -romain # TODO: do it, and use exec_method here. Code is obsolete mysum = Detail() mysum.label = u"Sum" mysum.infos = u"Generated by boobill" mysum.price = Decimal("0.") self.start_format() for detail in self.do('get_details', id, backends=names): self.format(detail) mysum.price = detail.price + mysum.price self.format(mysum)
def on_loaded(self): self.calls = [] for tr in self.document.xpath('//tr'): try: attrib = tr.attrib["class"] except: continue if attrib == "even" or attrib == "odd": label = u'' tddate = tr.find('td[@class="middle nowrap"]') for td in tr.xpath('td[@class="long"]'): label += unicode(td.text.strip()) + u' ' tdprice = tr.xpath('td[@class="price"]') label += u'(' + unicode(tdprice[0].text.strip()) + u')' price = Decimal(tdprice[1].text.strip().replace(',', '.')) detail = Detail() mydate = date(*reversed([ int(x) for x in tddate.text.strip().split(' ')[0].split(".") ])) mytime = time(*[ int(x) for x in tddate.text.strip().split(' ')[1].split(":") ]) detail.datetime = datetime.combine(mydate, mytime) detail.label = label detail.price = price self.calls.append(detail)
def get_calls(self): txt = self._parse_pdf() pages = txt.split("DEBIT") pages.pop(0) # remove headers details = [] for page in pages: page = page.split("RÉGLO MOBILE")[0].split("N.B. Prévoir")[0] # remove footers lines = page.split("\n") lines = [x for x in lines if len(x) > 0] # Remove empty lines numitems = (len(lines) + 1) / 4 # Each line has five columns lines.pop(0) # remove the extra € symbol modif = 0 i = 0 while i < numitems: if modif != 0: numitems = (len(lines) + 1 + modif) / 4 base = i * 4 - modif dateop = base corres = base + 1 duree = base + 2 price = base + 3 if "Changement vers le Forfait" in lines[base]: modif += 1 i += 1 continue # Special case with 5 columns, the operation date is not in the first one if len(re.split("(\d+\/\d+\/\d+)", lines[dateop])) < 2: lines[base + 1] = lines[base] + " " + lines[base + 1] dateop = base + 1 corres = base + 2 duree = base + 3 price = base + 4 modif -= 1 detail = Detail() splits = re.split("(\d+\/\d+\/\d+)", lines[dateop]) mydate = date(*reversed([int(x) for x in splits[1].split("/")])) mytime = time(*[int(x) for x in splits[2].split(":")]) detail.datetime = datetime.combine(mydate, mytime) if lines[corres] == "-": lines[corres] = "" if lines[duree] == "-": lines[duree] = "" detail.label = ( unicode(splits[0], encoding="utf-8", errors="replace") + u" " + lines[corres] + u" " + lines[duree] ) # Special case with only 3 columns, we insert a price if "Activation de votre ligne" in detail.label: lines.insert(price, "0") try: detail.price = Decimal(lines[price].replace(",", ".")) except: # In some special cases, there are no price column. Try to detect it if "Inclus" not in lines[price]: modif += 1 detail.price = Decimal(0) details.append(detail) i += 1 return sorted(details, key=_get_date, reverse=True)
def iter_details(self, sub): self.logger.debug('call Browser.iter_details') det = Detail() det.id = sub.id det.label = sub.label det.infos = '' det.price = Decimal('0.0') yield det
def get_balance(self, subscription): if not isinstance(subscription, Subscription): subscription = self.get_subscription(subscription) balance = Detail() balance.price = subscription._balance balance.label = u"Balance" balance.currency = Currency.CUR_EUR return balance
def get_balance(self): if not self.is_on_page(HistoryPage): self.location(self.conso) detail = Detail() detail.label = u"Balance" for calls in self.get_history(): if "Votre solde" in calls.label: detail.price = calls.price return detail detail.price = NotAvailable return detail
def iter_payment_details(self, sub): if sub._id.isdigit(): idx = 0 else: idx = sub._id.replace('AFFILIE', '') if len(self.document.xpath('//div[@class="centrepage"]/h2')) > idx or self.document.xpath('//table[@id="DetailPaiement3"]') > idx: id_str = self.document.xpath('//div[@class="centrepage"]/h2')[idx].text.strip() m = re.match('.*le (.*) pour un montant de.*', id_str) if m: id_str = m.group(1) id_date = datetime.strptime(id_str, '%d/%m/%Y').date() id = sub._id + "." + datetime.strftime(id_date, "%Y%m%d") table = self.document.xpath('//table[@class="tableau"]')[idx].xpath('.//tr') line = 1 last_date = None for tr in table: tds = tr.xpath('.//td') if len(tds) == 0: continue date_str = tds[0].text det = Detail() det.id = id + "." + str(line) det.label = unicode(tds[1].text.strip()) if date_str is None or date_str == '': det.infos = u'' det.datetime = last_date else: det.infos = u'Payé ' + unicode(re.sub('[^\d,-]+', '', tds[2].text)) + u'€ / Base ' + unicode(re.sub('[^\d,-]+', '', tds[3].text)) + u'€ / Taux ' + unicode(re.sub('[^\d,-]+', '', tds[4].text)) + '%' det.datetime = datetime.strptime(date_str, '%d/%m/%Y').date() last_date = det.datetime det.price = Decimal(re.sub('[^\d,-]+', '', tds[5].text).replace(',', '.')) line = line + 1 yield det
def on_loaded(self): self.details = [] table = self.document.xpath('//table[@id="reportTable"]')[0] for tr in table.xpath('tbody/tr'): detail = Detail() # Skip global category if tr.find('td/a') is not None: continue if tr.attrib["class"] == "totalAmount": continue tds = tr.xpath('td') detail.label = unicode(tds[0].text.strip()) detail.infos = unicode(tds[1].text.strip()) detail.price = Decimal(tds[2].text.split(' ')[0].replace(',', '.')) self.details.append(detail)
def iter_payments(self, sub): table = self.browser.page.document.xpath('//table[contains(@summary, "Informations sur mon")]')[0] for tr in table.xpath('.//tr'): list_tds = tr.xpath('.//td') if len(list_tds) == 0: continue date = datetime.strptime(self.parser.tocleanstring(list_tds[0]), "%d/%m/%Y").date() amount = self.parser.tocleanstring(list_tds[1]) if amount is None: continue det = Detail() det.id = sub._id + "." + date.strftime("%Y%m%d") det.price = Decimal(re.sub('[^\d,-]+', '', amount).replace(',', '.')) det.datetime = date det.label = unicode(self.parser.tocleanstring(list_tds[2])) yield det
def get_calls(self): table = self.document.xpath('//table/tbody')[0] for tr in table.xpath('tr'): tds = tr.xpath('td') rawdate = tds[0].text_content() splitdate = rawdate.split('-') month_no = self.months.index(splitdate[1]) + 1 mydate = date(int(splitdate[2]), month_no, int(splitdate[0])) rawtime = tds[1].text_content() mytime = time(*[int(x) for x in rawtime.split(":")]) price = re.sub(u'[^\d\-\.]', '', tds[6].text) detail = Detail() detail.datetime = datetime.combine(mydate, mytime) detail.label = u"%s from %s to %s - %s" % ( tds[2].text, tds[3].text, tds[4].text, tds[5].text) try: detail.price = Decimal(price) except InvalidOperation: detail.price = Decimal(0) # free calls detail.currency = Currency.CUR_EUR yield detail
def iter_history(self): tables = self.doc.xpath( '//table[contains(concat(" ", @class, " "), " cTableauTriable ")]') if len(tables) > 0: lines = tables[0].xpath('.//tr') sno = 0 for tr in lines: list_a = tr.xpath('.//a') if len(list_a) == 0: continue date = tr.xpath('.//td')[0].text.strip() lot = list_a[0].text.replace('(*)', '').strip() if lot == 'SNL': sno = sno + 1 lot = lot + str(sno) factures = tr.xpath('.//div[@class="cAlignGauche"]/a') factures_lbl = '' for a in factures: factures_lbl = factures_lbl + a.text.replace( '(**)', '').strip() + ' ' montant = tr.xpath( './/div[@class="cAlignDroite"]')[0].text.strip() det = Detail() det.id = u'' + lot det.label = u'' + lot det.infos = u'' + factures_lbl det.datetime = datetime.strptime(date, "%d/%m/%Y").date() det.price = Decimal(montant.replace(',', '.')) yield det
def on_loaded(self): self.calls = [] for tr in self.document.xpath('//tr'): tds = tr.xpath('td') if tds[0].text is None or tds[0].text == "Date": pass else: detail = Detail() mydate = date(*reversed([int(x) for x in tds[0].text.split(' ')[0].split("/")])) mytime = time(*[int(x) for x in tds[0].text.split(' ')[2].split(":")]) detail.datetime = datetime.combine(mydate, mytime) detail.label = u' '.join([unicode(td.text.strip()) for td in tds[1:4] if td.text is not None]) try: detail.price = Decimal(tds[4].text[0:4].replace(',', '.')) except: detail.price = Decimal(0) self.calls.append(detail)
def _iter_divs(self, divs, num, inter=False): for div in divs: detail = Detail() detail.label = CleanText('div[@class="titre"]/p')(div) detail.id = "-" + detail.label.split(' ')[1].lower() if inter: detail.label = detail.label + u" (international)" detail.id = detail.id + "-inter" detail.infos = CleanText('div[@class="conso"]/p')(div) detail.price = CleanDecimal('div[@class="horsForfait"]/p/span', default=Decimal(0), replace_dots=True)(div) detail.currency = Currency('div[@class="horsForfait"]/p/span')(div) self.details[num].append(detail)
def parse_voice(self, div, string, num, inter=False): voice = Detail() voice.id = "-voice" voice.label = unicode(div.find('div[@class="titre"]/p').text_content()) if inter: voice.label = voice.label + " (international)" voice.id = voice.id + "-inter" voice.price = convert_price(div) voice1 = div.xpath('div[@class="conso"]/p/span')[0].text voice2 = div.xpath('div[@class="conso"]/p/span')[1].text voice.infos = unicode(string) % (voice1, voice2) return voice
def _parse_voice(self, div, string, num, inter=False): voicediv = div.xpath('div[@class="conso"]')[0] voice = Detail() voice.id = "-voice" voice.label = CleanText('div[@class="titre"]/p')(div) if inter: voice.label = voice.label + " (international)" voice.id = voice.id + "-inter" voice.price = CleanDecimal('div[@class="horsForfait"]/p/span', default=Decimal(0), replace_dots=True)(div) voice.currency = Currency('div[@class="horsForfait"]/p/span')(div) voice1 = CleanText('.//span[@class="actif"][1]')(voicediv) voice2 = CleanText('.//span[@class="actif"][2]')(voicediv) voice.infos = unicode(string) % (voice1, voice2) return voice
def get_details(self): txt = self._parse_pdf() page = txt.split('CONSOMMATION')[2].split('ACTIVITE DETAILLEE')[0] lines = page.split('\n') lines = [x for x in lines if len(x) > 0] # Remove empty lines numitems = ( (len(lines) + 1) / 3 ) - 1 # Each line has three columns, remove one element (pictures) lines.insert(len(lines) - 1, '') # Add an empty column for "Prélèvement mensuel lines.pop(0) details = [] for i in range(numitems): nature = i * 3 conso = nature + 1 price = conso + 1 detail = Detail() detail.label = unicode(lines[nature], encoding='utf-8') detail.infos = unicode(lines[conso], encoding='utf-8') try: detail.price = Decimal(lines[price].replace('€', '')) except: detail.price = Decimal(0) details.append(detail) return details
def iter_payment_details(self, sub): if sub._id.isdigit(): idx = 0 else: idx = sub._id.replace('AFFILIE', '') if len( self.document.xpath('//div[@class="centrepage"]/h3') ) > idx or self.document.xpath('//table[@id="DetailPaiement3"]') > idx: id_str = self.document.xpath( '//div[@class="centrepage"]/h3')[idx].text.strip() m = re.match('.*le (.*) pour un montant de.*', id_str) if m: id_str = m.group(1) id_date = datetime.strptime(id_str, '%d/%m/%Y').date() id = sub._id + "." + datetime.strftime(id_date, "%Y%m%d") table = self.document.xpath( '//table[@id="DetailPaiement3"]')[idx].xpath('.//tr') line = 1 last_date = None for tr in table: tds = tr.xpath('.//td') if len(tds) == 0: continue date_str = tds[0].text det = Detail() det.id = id + "." + str(line) det.label = unicode(tds[1].text.strip()) if date_str is None or date_str == '': det.infos = u'' det.datetime = last_date else: det.infos = u'Payé ' + unicode( re.sub('[^\d,-]+', '', tds[2].text)) + u'€ / Base ' + unicode( re.sub('[^\d,-]+', '', tds[3].text) ) + u'€ / Taux ' + unicode( re.sub('[^\d,-]+', '', tds[4].text)) + '%' det.datetime = datetime.strptime(date_str, '%d/%m/%Y').date() last_date = det.datetime det.price = Decimal( re.sub('[^\d,-]+', '', tds[5].text).replace(',', '.')) line = line + 1 yield det
def iter_history(self): tables = self.doc.xpath('//table[contains(concat(" ", @class, " "), " cTableauTriable ")]') if len(tables) > 0: lines = tables[0].xpath('.//tr') sno = 0 for tr in lines: list_a = tr.xpath('.//a') if len(list_a) == 0: continue date = tr.xpath('.//td')[0].text.strip() lot = list_a[0].text.replace('(*)', '').strip() if lot == 'SNL': sno = sno + 1 lot = lot + str(sno) factures = tr.xpath('.//div[@class="cAlignGauche"]/a') factures_lbl = '' for a in factures: factures_lbl = factures_lbl + a.text.replace('(**)', '').strip() + ' ' montant = tr.xpath('.//div[@class="cAlignDroite"]')[0].text.strip() det = Detail() det.id = u''+lot det.label = u''+lot det.infos = u''+factures_lbl det.datetime = datetime.strptime(date, "%d/%m/%Y").date() det.price = Decimal(montant.replace(',', '.')) yield det
def do_details(self, id): """ details [ID] Get details of subscriptions. If no ID given, display all details of all backends. """ l = [] id, backend_name = self.parse_id(id) if not id: for subscrib in self.get_object_list('iter_subscription'): l.append((subscrib.id, subscrib.backend)) else: l.append((id, backend_name)) for id, backend in l: names = (backend, ) if backend is not None else None # XXX: should be generated by backend? -Flo # XXX: no, but you should do it in a specific formatter -romain # TODO: do it, and use exec_method here. Code is obsolete mysum = Detail() mysum.label = u"Sum" mysum.infos = u"Generated by boobill" mysum.price = Decimal("0.") self.start_format() for detail in self.do('get_details', id, backends=names): self.format(detail) mysum.price = detail.price + mysum.price self.format(mysum)
def iter_divs(self, divs, num, inter=False): for div in divs: detail = Detail() detail.label = unicode( div.find('div[@class="titre"]/p').text_content()) detail.id = "-" + detail.label.split(' ')[1].lower() if inter: detail.label = detail.label + u" (international)" detail.id = detail.id + "-inter" detail.infos = unicode( div.find('div[@class="conso"]/p').text_content().lstrip()) detail.price = convert_price(div) self.details[num].append(detail)
def on_loaded(self): self.calls = [] for tr in self.document.xpath('//tr'): try: attrib = tr.attrib["class"] except: continue if attrib == "even" or attrib == "odd": label = u'' tddate = tr.find('td[@class="middle nowrap"]') for td in tr.xpath('td[@class="long"]'): label += unicode(td.text.strip()) + u' ' tdprice = tr.xpath('td[@class="price"]') label += u'(' + unicode(tdprice[0].text.strip()) + u')' price = Decimal(tdprice[1].text.strip().replace(',', '.')) detail = Detail() mydate = date(*reversed([int(x) for x in tddate.text.strip().split(' ')[0].split(".")])) mytime = time(*[int(x) for x in tddate.text.strip().split(' ')[1].split(":")]) detail.datetime = datetime.combine(mydate, mytime) detail.label = label detail.price = price self.calls.append(detail)
def get_details(self): txt = self._parse_pdf() page = txt.split('CONSOMMATION')[2].split('ACTIVITE DETAILLEE')[0] lines = page.split('\n') lines = [x for x in lines if len(x) > 0] # Remove empty lines numitems = ((len(lines) + 1) / 3) - 1 # Each line has three columns, remove one element (pictures) lines.insert(len(lines) - 1, '') # Add an empty column for "Prélèvement mensuel lines.pop(0) details = [] for i in range(numitems): nature = i * 3 conso = nature + 1 price = conso + 1 detail = Detail() detail.label = unicode(lines[nature], encoding='utf-8') detail.infos = unicode(lines[conso], encoding='utf-8') try: detail.price = Decimal(lines[price].replace('€', '')) except: detail.price = Decimal(0) details.append(detail) return details
def get_calls(self): txt = self._parse_pdf() pages = txt.split("DEBIT") pages.pop(0) # remove headers details = [] for page in pages: page = page.split('RÉGLO MOBILE')[0].split('N.B. Prévoir')[0] # remove footers lines = page.split('\n') lines = [x for x in lines if len(x) > 0] # Remove empty lines numitems = (len(lines) + 1) / 4 # Each line has five columns lines.pop(0) # remove the extra € symbol modif = 0 i = 0 while i < numitems: if modif != 0: numitems = ((len(lines) + 1 + modif) / 4) base = i * 4 - modif dateop = base corres = base + 1 duree = base + 2 price = base + 3 if "Changement vers le Forfait" in lines[base]: modif += 1 i += 1 continue # Special case with 5 columns, the operation date is not in the first one if len(re.split("(\d+\/\d+\/\d+)", lines[dateop])) < 2: lines[base + 1] = lines[base] + " " + lines[base + 1] dateop = base + 1 corres = base + 2 duree = base + 3 price = base + 4 modif -= 1 detail = Detail() splits = re.split("(\d+\/\d+\/\d+)", lines[dateop]) mydate = date(*reversed([int(x) for x in splits[1].split("/")])) mytime = time(*[int(x) for x in splits[2].split(":")]) detail.datetime = datetime.combine(mydate, mytime) if lines[corres] == '-': lines[corres] = "" if lines[duree] == '-': lines[duree] = '' detail.label = unicode(splits[0], encoding='utf-8', errors='replace') + u" " + lines[corres] + u" " + lines[duree] # Special case with only 3 columns, we insert a price if "Activation de votre ligne" in detail.label or u"Résiliation" in detail.label: lines.insert(price, '0') try: detail.price = Decimal(lines[price].replace(',', '.')) except: # In some special cases, there are no price column. Try to detect it if "Inclus" not in lines[price]: modif += 1 detail.price = Decimal(0) details.append(detail) i += 1 return sorted(details, key=_get_date, reverse=True)
def get_balance(self, subscription): if not isinstance(subscription, Subscription): subscription = self.get_subscription(subscription) balance = Detail() balance.id = "%s-balance" % subscription.id balance.price = subscription._balance balance.label = u"Balance %s" % subscription.id balance.currency = u'EUR' return balance
def iter_divs(self, divs, num, inter=False): for div in divs: detail = Detail() detail.label = unicode(div.find('div[@class="titre"]/p').text_content()) detail.id = "-" + detail.label.split(' ')[1].lower() if inter: detail.label = detail.label + u" (international)" detail.id = detail.id + "-inter" detail.infos = unicode(div.find('div[@class="conso"]/p').text_content().lstrip()) detail.price = convert_price(div) self.details[num].append(detail)
def get_calls(self): txt = self._parse_pdf() pages = txt.split("DEBIT") pages.pop(0) # remove headers details = [] for page in pages: page = page.split('RÉGLO MOBILE')[0].split('N.B. Prévoir')[0] # remove footers lines = page.split('\n') lines = [x for x in lines if len(x) > 0] # Remove empty lines numitems = (len(lines) + 1) / 4 # Each line has five columns lines.pop(0) modif = 0 i = 0 while i < numitems: if modif != 0: numitems = ((len(lines) + 1 + modif) / 4) nature = i * 4 - modif dateop = nature corres = nature + 1 duree = corres + 1 price = duree + 1 if "Changement vers le Forfait" in lines[nature]: modif += 1 i += 1 continue if len(re.split("(\d+\/\d+\/\d+)", lines[dateop])) < 2: lines[nature + 1] = lines[nature] + " " + lines[nature + 1] dateop = nature + 1 corres = dateop + 1 duree = corres + 1 price = duree + 1 modif -= 1 if not lines[corres][0:3].isdigit() and not lines[corres][0:3] == "-": modif += 1 detail = Detail() splits = re.split("(\d+\/\d+\/\d+)", lines[dateop]) mydate = date(*reversed([int(x) for x in splits[1].split("/")])) mytime = time(*[int(x) for x in splits[2].split(":")]) detail.datetime = datetime.combine(mydate, mytime) if lines[corres] == '-': lines[corres] = "" if lines[duree] == '-': lines[duree] = '' detail.label = unicode(splits[0], encoding='utf-8', errors='replace') + u" " + lines[corres] + u" " + lines[duree] # Special case with only 3 columns, we insert a price if "Activation de votre ligne" in detail.label: lines.insert(price, '0') try: detail.price = Decimal(lines[price].replace(',', '.')) except: detail.price = Decimal(0) details.append(detail) i += 1 return sorted(details, key=_get_date, reverse=True)
def iter_payment_details(self, sub): if sub._id.isdigit(): idx = 0 else: idx = sub._id.replace("AFFILIE", "") if ( len(self.document.xpath('//div[@class="centrepage"]/h3')) > idx or self.document.xpath('//table[@id="DetailPaiement3"]') > idx ): id_str = self.document.xpath('//div[@class="centrepage"]/h3')[idx].text.strip() m = re.match(".*le (.*) pour un montant de.*", id_str) if m: id_str = m.group(1) id_date = datetime.strptime(id_str, "%d/%m/%Y").date() id = sub._id + "." + datetime.strftime(id_date, "%Y%m%d") table = self.document.xpath('//table[@id="DetailPaiement3"]')[idx].xpath(".//tr") line = 1 last_date = None for tr in table: tds = tr.xpath(".//td") if len(tds) == 0: continue date_str = tds[0].text det = Detail() det.id = id + "." + str(line) det.label = unicode(tds[1].text.strip()) if date_str is None or date_str == "": det.infos = u"" det.datetime = last_date else: det.infos = ( u"Payé " + unicode(re.sub("[^\d,-]+", "", tds[2].text)) + u"€ / Base " + unicode(re.sub("[^\d,-]+", "", tds[3].text)) + u"€ / Taux " + unicode(re.sub("[^\d,-]+", "", tds[4].text)) + "%" ) det.datetime = datetime.strptime(date_str, "%d/%m/%Y").date() last_date = det.datetime det.price = Decimal(re.sub("[^\d,-]+", "", tds[5].text).replace(",", ".")) line = line + 1 yield det
def get_details(self): txt = self._parse_pdf() page = txt.split('CONSOMMATION')[2].split('ACTIVITE DETAILLEE')[0] lines = page.split('\n') lines = [x for x in lines if len(x) > 0] # Remove empty lines details = [] detail = None lines.pop(-1) # Line to describes pictures twolines = False for line in lines: if "Votre consommation" in line: line = line.split(": ", 1)[1] if twolines: twolines = False detail.infos = unicode(line, encoding='utf-8') elif re.match('[A-Za-z]', line[0]): # We have a new element, return the other one if detail is not None: details.append(detail) detail = Detail() split = re.split("(\d)", line, maxsplit=1) detail.price = Decimal(0) if len(split) > 2: detail.infos = unicode(split[1] + split[2], encoding='utf-8') else: twolines = True if '€' in line: specialprice = split[1] + split[2] detail.price = Decimal(specialprice.replace('€', '')) detail.label = unicode(split[0], encoding='utf-8') elif '€' in line: detail.price = Decimal(line.replace('€', '')) else: detail.infos = unicode(line, encoding='utf-8') details.append(detail) return details
def iter_history(self): table = self.document.xpath('//table[contains(concat(" ", @class, " "), " cTableauTriable ")]')[0].xpath('.//tr') for tr in table: list_a = tr.xpath('.//a') if len(list_a) == 0: continue date = tr.xpath('.//td')[0].text.strip() lot = list_a[0].text factures = tr.xpath('.//div[@class="cAlignGauche"]/a') factures_lbl = '' for a in factures: factures_lbl = factures_lbl + a.text + ' ' montant = tr.xpath('.//div[@class="cAlignDroite"]')[0].text.strip() det = Detail() det.id = lot det.label = lot det.infos = factures_lbl det.datetime = datetime.strptime(date, "%d/%m/%Y").date() det.price = Decimal(montant.replace(',', '.')) yield det
def get_details(self): txt = self._parse_pdf() page = txt.split('CONSOMMATION')[2].split('ACTIVITE DETAILLEE')[0] lines = page.split('\n') lines = [x for x in lines if len(x) > 0] # Remove empty lines details = [] detail = None lines.pop(0) # MENSUELLE lines.pop(-1) # Line to describes pictures twolines = False for line in lines: if "Votre consommation" in line: line = line.split(": ", 1)[1] if twolines: twolines = False detail.infos = unicode(line, encoding='utf-8') elif re.match('[A-Za-z]', line[0]): # We have a new element, return the other one if detail is not None: details.append(detail) detail = Detail() split = re.split("(\d)", line, maxsplit=1) detail.price = Decimal(0) if len(split) > 2: detail.infos = unicode(split[1] + split[2], encoding='utf-8') else: twolines = True if '€' in line: specialprice = split[1] + split[2] detail.price = Decimal(specialprice.replace('€', '')) detail.label = unicode(split[0], encoding='utf-8') elif '€' in line: detail.price = Decimal(line.replace('€', '')) else: detail.infos = unicode(line, encoding='utf-8') details.append(detail) return details
def on_loaded(self): self.calls = [] for tr in self.document.xpath('//tr'): tds = tr.xpath('td') if tds[0].text is None or tds[0].text == "Date": pass else: detail = Detail() mydate = date(*reversed( [int(x) for x in tds[0].text.split(' ')[0].split("/")])) mytime = time( *[int(x) for x in tds[0].text.split(' ')[2].split(":")]) detail.datetime = datetime.combine(mydate, mytime) detail.label = u' '.join([ unicode(td.text.strip()) for td in tds[1:4] if td.text is not None ]) try: detail.price = Decimal(tds[4].text[0:4].replace(',', '.')) except: detail.price = Decimal(0) self.calls.append(detail)
def get_calls(self): table = self.document.xpath('//table/tbody')[0] for tr in table.xpath('tr'): tds = tr.xpath('td') rawdate = tds[0].text_content() splitdate = rawdate.split('-') month_no = self.months.index(splitdate[1]) + 1 mydate = date(int(splitdate[2]), month_no, int(splitdate[0])) rawtime = tds[1].text_content() mytime = time(*[int(x) for x in rawtime.split(":")]) price = re.sub(u'[^\d\-\.]', '', tds[6].text) detail = Detail() detail.datetime = datetime.combine(mydate, mytime) detail.label = u"%s from %s to %s - %s" % (tds[2].text, tds[3].text, tds[4].text, tds[5].text) try: detail.price = Decimal(price) except InvalidOperation: detail.price = Decimal(0) # free calls detail.currency = 'EUR' yield detail
def iter_payment_details(self, sub): if CleanText('//div[@class="infoPrestationsAssure"]/span')(self.doc).startswith('Pour %s' % sub.subscriber): id_str = self.doc.xpath('//div[@class="centrepage"]/h2')[0].text.strip() m = re.match('.*le (.*) pour un montant de.*', id_str) if m: id_str = m.group(1) id_date = datetime.strptime(id_str, '%d/%m/%Y').date() id = sub._id + "." + datetime.strftime(id_date, "%Y%m%d") table = self.doc.xpath('//div[@class="infoPrestationsAssure"]//table')[0].xpath('.//tr') line = 1 last_date = None for tr in table: tds = tr.xpath('.//td') if len(tds) == 0: continue det = Detail() if len(tds) == 5: date_str = tds[0].text det.id = id + "." + str(line) det.label = unicode(tds[1].text.strip()) jours = tds[2].text if jours is None: jours = '0' montant = tds[3].text if montant is None: montant = '0' price = tds[4].text if price is None: price = '0' if date_str is None or date_str == '': det.infos = u'' det.datetime = last_date else: det.infos = date_str + u' (' + unicode(re.sub('[^\d,-]+', '', jours)) + u'j) * ' + unicode(re.sub('[^\d,-]+', '', montant)) + u'€' det.datetime = datetime.strptime(date_str.split(' ')[3], '%d/%m/%Y').date() last_date = det.datetime det.price = Decimal(re.sub('[^\d,-]+', '', price).replace(',', '.')) if len(tds) == 6: date_str = tds[0].text det.id = id + "." + str(line) det.label = unicode(tds[1].text.strip()) paye = tds[2].text if paye is None: paye = '0' base = tds[3].text if base is None: base = '0' taux = tds[4].text if taux is None: taux = '0' price = tds[5].text if price is None: price = '0' if date_str is None or date_str == '': det.infos = u'' det.datetime = last_date else: det.infos = u'Payé ' + unicode(re.sub('[^\d,-]+', '', paye)) + u'€ / Base ' + unicode(re.sub('[^\d,-]+', '', base)) + u'€ / Taux ' + unicode(re.sub('[^\d,-]+', '', taux)) + '%' det.datetime = datetime.strptime(date_str, '%d/%m/%Y').date() last_date = det.datetime det.price = Decimal(re.sub('[^\d,-]+', '', price).replace(',', '.')) line = line + 1 yield det
def _parse_page(self, page): # Regexp footnote = re.compile(r'\([0-9]\) ') # (f) ht = re.compile('HT par mois') base = re.compile('la base de') enddate = re.compile('\d\d\/\d\d\/\d\d') # YY/MM/DD endwithdigit = re.compile('\d+$') # blah blah 42 textwithcoma = re.compile('([a-z]|\d{4})\,') # blah 2012, blah blah # Parsing details = [] for title in ['Abonnement', 'Consommation', 'Contributions et taxes liées à l\'énergie']: section = page.split(title, 1)[1].split('Total ')[0] # When a line holds '(0)', a newline is missing. section = re.sub(footnote, '\n', section) lines = section.split('\n') lines = [x for x in lines if len(x) > 0] # Remove empty lines detail = None for line in lines: if re.match('[A-Za-z]', line[0]): # Things we want to merge with the one just before if 'facturées' in line: # Long lines are sometimes split, so we try to join them # That is the case for: # 'Déduction du montant des consommations # estimées facturées du 00/00/00 au 00/00/00' detail.label = detail.label + u' ' + unicode(line, encoding='utf-8') # Things for which we want a new detail else: # Entering here, we will instantiate a new detail. # We hadn't so before because of fragmented lines. if detail is not None and detail.label is not NotAvailable: # We have a new element, return the other one details.append(detail) detail = Detail() detail.price = Decimal(0) # If the coma is not a decimal separator, then # this is is probably a loooong sentence. # When it comes to jokes, keep it short and sweet. line = re.split(textwithcoma, line)[0] # Things we want for sure if re.findall(enddate, line): # When a line has been badly split after a date, # We want the label to end after the date, and maybe # the second part to be the info mydate = re.search(enddate, line).group(0) mylist = line.rpartition(mydate) label = mylist[0] + mylist[1] detail.label = unicode(label, encoding='utf-8') elif re.findall(endwithdigit, line): # What is this stupid number at the end of the line? # Line should have been split before the number detail.label = unicode(re.split(endwithdigit, line)[0], encoding='utf-8') # Things we don't want for sure elif ')' in line and '(' not in line: # First part of the parenthesis should have been drop before # Avoid to create a new empty detail detail.label = NotAvailable elif re.match(base, line): # This string should come always after a date, # usually, it will match one of the cases above. # Sometimes, it appears on a new line we don't need. detail.label = NotAvailable elif re.match(ht, line): # '00,00 € HT par mois' may have been split after HT # We don't need of the second line detail.label = NotAvailable # Things we probably want to keep else: # Well, maybe our line is correct, after all. # Not much to do. detail.label = unicode(line, encoding='utf-8') detail.infos = NotAvailable elif ' %' in line: if isinstance(detail, Detail): # Sometimes the vat is not on a new line: # '00,00 00,0 %' instead of '00,0 %' vat = line.split()[line.count(' ')-1].replace(',', '.') detail.infos = unicode('TVA: ' + vat) elif ' €' in line: price = line.replace(',', '.') if isinstance(detail, Detail): detail.price = Decimal(price.strip(' €')) elif re.match(enddate, line): # Line holding dates may have been mixed up label = detail.label.split(' au ')[0] + u' au ' + unicode(line, encoding='utf-8') detail.label = label if detail.label is not NotAvailable: # Do not append empty details to the list # It seemed easier to create details anyway than dealing # with None objects details.append(detail) return details
def iter_payment_details(self, sub): id_str = self.doc.xpath('//div[@class="entete container"]/h2')[0].text.strip() m = re.match('.*le (.*) pour un montant de.*', id_str) if m: blocs_benes = self.doc.xpath('//span[contains(@id,"nomBeneficiaire")]') blocs_prestas = self.doc.xpath('//table[@id="tableauPrestation"]') i = 0 last_bloc = len(blocs_benes) for i in range(0, last_bloc): bene = blocs_benes[i].text; id_str = m.group(1) id_date = datetime.strptime(id_str, '%d/%m/%Y').date() id = sub._id + "." + datetime.strftime(id_date, "%Y%m%d") table = blocs_prestas[i].xpath('.//tr') line = 1 last_date = None for tr in table: tds = tr.xpath('.//td') if len(tds) == 0: continue det = Detail() # TO TEST : Indemnités journalières : Pas pu tester de cas de figure similaire dans la nouvelle mouture du site if len(tds) == 4: date_str = Regexp(pattern=r'.*<br/>(\d+/\d+/\d+)\).*').filter(tds[0].text) det.id = id + "." + str(line) det.label = tds[0].xpath('.//span')[0].text.strip() jours = tds[1].text if jours is None: jours = '0' montant = tds[2].text if montant is None: montant = '0' price = tds[3].text if price is None: price = '0' if date_str is None or date_str == '': det.infos = u'' det.datetime = last_date else: det.infos = '%s (%sj) * %s€' % (date_str, re.sub(r'[^\d,-]+', '', jours), re.sub(r'[^\d,-]+', '', montant)) det.datetime = datetime.strptime(date_str.split(' ')[3], '%d/%m/%Y').date() last_date = det.datetime det.price = Decimal(re.sub('[^\d,-]+', '', price).replace(',', '.')) if len(tds) == 5: date_str = Regexp(pattern=r'\w*(\d{2})/(\d{2})/(\d{4}).*', template='\\1/\\2/\\3', default="").filter("".join(tds[0].itertext())) det.id = id + "." + str(line) det.label = '%s - %s' % (bene, tds[0].xpath('.//span')[0].text.strip()) paye = tds[1].text if paye is None: paye = '0' base = tds[2].text if base is None: base = '0' tdtaux = tds[3].xpath('.//span')[0].text if tdtaux is None: taux = '0' else: taux = tdtaux.strip() tdprice = tds[4].xpath('.//span')[0].text if tdprice is None: price = '0' else: price = tdprice.strip() if date_str is None or date_str == '': det.infos = u'' det.datetime = last_date else: det.infos = u' Payé %s€ / Base %s€ / Taux %s%%' % (re.sub(r'[^\d,-]+', '', paye), re.sub(r'[^\d,-]+', '', base), re.sub('[^\d,-]+', '', taux)) det.datetime = datetime.strptime(date_str, '%d/%m/%Y').date() last_date = det.datetime det.price = Decimal(re.sub('[^\d,-]+', '', price).replace(',', '.')) line = line + 1 yield det
def iter_payment_details(self, sub): id_str = self.doc.xpath( '//div[@class="entete container"]/h2')[0].text.strip() m = re.match('.*le (.*) pour un montant de.*', id_str) if m: blocs_benes = self.doc.xpath( '//span[contains(@id,"nomBeneficiaire")]') blocs_prestas = self.doc.xpath('//table[@id="tableauPrestation"]') i = 0 last_bloc = len(blocs_benes) for i in range(0, last_bloc): bene = blocs_benes[i].text id_str = m.group(1) id_date = datetime.strptime(id_str, '%d/%m/%Y').date() id = sub._id + "." + datetime.strftime(id_date, "%Y%m%d") table = blocs_prestas[i].xpath('.//tr') line = 1 last_date = None for tr in table: tds = tr.xpath('.//td') if len(tds) == 0: continue det = Detail() # TO TEST : Indemnités journalières : Pas pu tester de cas de figure similaire dans la nouvelle mouture du site if len(tds) == 4: date_str = Regexp(r'.*<br/>(\d+/\d+/\d+)\).*', '\\1')(tds[0].text) det.id = id + "." + str(line) det.label = unicode( tds[0].xpath('.//span')[0].text.strip()) jours = tds[1].text if jours is None: jours = '0' montant = tds[2].text if montant is None: montant = '0' price = tds[3].text if price is None: price = '0' if date_str is None or date_str == '': det.infos = u'' det.datetime = last_date else: det.infos = date_str + u' (' + unicode( re.sub('[^\d,-]+', '', jours)) + u'j) * ' + unicode( re.sub('[^\d,-]+', '', montant)) + u'€' det.datetime = datetime.strptime( date_str.split(' ')[3], '%d/%m/%Y').date() last_date = det.datetime det.price = Decimal( re.sub('[^\d,-]+', '', price).replace(',', '.')) if len(tds) == 5: date_str = Regexp( pattern=r'\w*(\d{2})/(\d{2})/(\d{4}).*', template='\\1/\\2/\\3', default="").filter("".join(tds[0].itertext())) det.id = id + "." + str(line) det.label = bene + u' - ' + unicode( tds[0].xpath('.//span')[0].text.strip()) paye = tds[1].text if paye is None: paye = '0' base = tds[2].text if base is None: base = '0' tdtaux = tds[3].xpath('.//span')[0].text if tdtaux is None: taux = '0' else: taux = tdtaux.strip() tdprice = tds[4].xpath('.//span')[0].text if tdprice is None: price = '0' else: price = tdprice.strip() if date_str is None or date_str == '': det.infos = u'' det.datetime = last_date else: det.infos = u' Payé ' + unicode( re.sub('[^\d,-]+', '', paye)) + u'€ / Base ' + unicode( re.sub('[^\d,-]+', '', base) ) + u'€ / Taux ' + unicode( re.sub('[^\d,-]+', '', taux)) + '%' det.datetime = datetime.strptime( date_str, '%d/%m/%Y').date() last_date = det.datetime det.price = Decimal( re.sub('[^\d,-]+', '', price).replace(',', '.')) line = line + 1 yield det
def iter_payment_details(self, sub): if CleanText('//div[@class="infoPrestationsAssure"]/span')( self.doc).startswith('Pour %s' % sub.subscriber): id_str = self.doc.xpath( '//div[@class="centrepage"]/h2')[0].text.strip() m = re.match('.*le (.*) pour un montant de.*', id_str) if m: id_str = m.group(1) id_date = datetime.strptime(id_str, '%d/%m/%Y').date() id = sub._id + "." + datetime.strftime(id_date, "%Y%m%d") table = self.doc.xpath( '//div[@class="infoPrestationsAssure"]//table')[0].xpath( './/tr') line = 1 last_date = None for tr in table: tds = tr.xpath('.//td') if len(tds) == 0: continue det = Detail() if len(tds) == 5: date_str = tds[0].text det.id = id + "." + str(line) det.label = unicode(tds[1].text.strip()) jours = tds[2].text if jours is None: jours = '0' montant = tds[3].text if montant is None: montant = '0' price = tds[4].text if price is None: price = '0' if date_str is None or date_str == '': det.infos = u'' det.datetime = last_date else: det.infos = date_str + u' (' + unicode( re.sub('[^\d,-]+', '', jours)) + u'j) * ' + unicode( re.sub('[^\d,-]+', '', montant)) + u'€' det.datetime = datetime.strptime( date_str.split(' ')[3], '%d/%m/%Y').date() last_date = det.datetime det.price = Decimal( re.sub('[^\d,-]+', '', price).replace(',', '.')) if len(tds) == 6: date_str = tds[0].text det.id = id + "." + str(line) det.label = unicode(tds[1].text.strip()) paye = tds[2].text if paye is None: paye = '0' base = tds[3].text if base is None: base = '0' taux = tds[4].text if taux is None: taux = '0' price = tds[5].text if price is None: price = '0' if date_str is None or date_str == '': det.infos = u'' det.datetime = last_date else: det.infos = u'Payé ' + unicode( re.sub('[^\d,-]+', '', paye)) + u'€ / Base ' + unicode( re.sub('[^\d,-]+', '', base) ) + u'€ / Taux ' + unicode( re.sub('[^\d,-]+', '', taux)) + '%' det.datetime = datetime.strptime( date_str, '%d/%m/%Y').date() last_date = det.datetime det.price = Decimal( re.sub('[^\d,-]+', '', price).replace(',', '.')) line = line + 1 yield det