def output(self): f_paths = [] if not hasattr(self, '_urls_to_download'): self._urls_to_download = self._get_download_urls() for i, url, in enumerate(self._urls_to_download): f_p = build_fpath(TMP_DIR, f'{self.name}_{i}', 'xlsx') f_paths.append(f_p) return [luigi.LocalTarget(fp) for fp in f_paths]
def run(self): def percent(total, parsed): return round((parsed * 100) / total) bids_fpath = build_fpath(self.directory, self.name, 'bins') # copy on local machine from ftp if not exists(bids_fpath): self.input().get(bids_fpath) date_range = self.start_date, self.end_date parser = KgdTaxPaymentParser(self.name, bids_fpath, self.output()[2].path, self.parsed_fpath, self.notaxes_fpath, date_range, KGD_API_TOKEN, self.struct, self.timeout) # until the last bid while parser.bids: # if we have failed bids # pull last to process again if parser.failed_bids: bid = parser.failed_bids.popleft() r = True else: bid = parser.bids.popleft() r = False # refresh status bar status = parser.status(bid, r) self.set_status_message(status) try: r = parser.process_bin(bid) self.set_progress_percentage(percent(parser.source_bids_count, parser.parsed_bids_count)) except KgdServerNotAvailableError: print(SERVER_IS_DOWN) exit() except Exception as e: print(e) raise stata = dict(total=parser.source_bids_count, parsed_count=parser.parsed_bids_count) append_file(self.success_fpath, json.dumps(stata))
def run(self): bids_fpath = build_fpath(self.directory, self.name, 'uids') # copy on local machine from ftp if not exists(bids_fpath): self.input().get(bids_fpath) uids = read_lines(bids_fpath) new_uids = gosreestr_parse_new_uids(self.output().path, uids, timeout=self.timeout, error_timeout=self.timeout_error, luigi_callback=self.set_status) append_file(self.success_fpath, len(new_uids))
def unzip_one_file(apath, name, fname=None): archo, flist = zipo_flist(apath) folder = os.path.abspath(os.path.dirname(apath)) if len(flist) > 1: raise UnpackingError('There are more than one file in Zip archive.') _fname = flist[0] if fname: if basename(fname) in flist: _fname = fname archo.extract(_fname, folder) src = join(folder, _fname) ext = os.path.splitext(_fname)[1].strip('.') new_apath = build_fpath(folder, name, ext) move(src, new_apath) return new_apath
def output(self): return luigi.LocalTarget(build_fpath(TMP_DIR, self.name, 'csv'))
def success_fpath(self): return build_fpath(self.directory, self.name, self.success_fext)
def parsed_fpath(self): return build_fpath(self.directory, self.name, self.parsed_fext)
def notaxes_fpath(self): return build_fpath(self.directory, self.name, self.notaxes_fext)