def _gen_SQL_list(SQL_gen_proc, owner, tblname, filepath, block_size= RESTRICT.MAX_ALLOWED_PACKET): if File.is_path(filepath): sqlformat = 'insert into {tblname} ({{columns}}) values {{values}};'.format(tblname= tblname) with open(str(filepath), 'r') as fo: reader = csv.DictReader(fo, delimiter= RESTRICT.DELIMITER) l, lines = len(sqlformat), list() for _, ln in enumerate(reader): if None in ln: logger(__name__).debug('f**k') logger(__name__).debug(ln) for k in ln: print(k, ln[k]) if [ ln[k] for k in ln if ln[k] is None ] != []: logger(__name__).debug('park') logger(__name__).debug(ln) for k in ln: print(k, ln[k]) #logger(__name__).debug(ln) l1 = sum([ n for n in map(lambda x: len(x) + 4, [ ln[k] for k in ln ])]) #logger(__name__).debug(l1) if l + l1 >= block_size: yield SQL_gen_proc(sqlformat, lines, owner, tblname) del lines l, lines = len(sqlformat) + l1, list([ln]) else: l = l + l1 lines.append(ln) if lines != list(): yield SQL_gen_proc(sqlformat, lines, owner, tblname) del lines
def create(path, mod_name): if File.is_path(path): if path.exists(): logger(mod_name).info('found: "{}"'.format(str(path))) else: logger(mod_name).info('creating folder \"{}\"'.format(str(path))) path.mkdir(parents= True, exist_ok= True)
def count_src(path): p = pathlib.Path(path).resolve() count = 0 if File.is_path(p) and p.exists() and p.is_dir(): for p1 in p.glob('*'): if p1.name != '__pycache__' and p1.is_dir(): count = count + count_src(p1) for py in p.glob('*.py'): count1 = 0 #logger(__name__).debug(py) if py.name == 'logging.py': continue with open(str(py), 'r') as fo: for ln in iter(fo): line = ln.strip() if line == '' or line.startswith('logger(__name__).debug(') or\ line.startswith('logger(mod_name).debug(') or\ line == '"""' or line.startswith('#'): continue count1 = count1 + 1 fo.close() logger(__name__).debug('"{}": {}'.format(str(py), count1)) count = count + count1 logger(__name__).debug('"{}": {}'.format(str(p), count)) return count
def approach(self, ifpath): if type(self.fin) is not _io.TextIOWrapper and\ File.is_path(ifpath) and File.exists(ifpath): logger(self.mod_name).info('approaching "{}"'.format(str(ifpath))) self.infile = ifpath if self.newline is None: self.fin = open(str(ifpath), 'r') else: self.fin = open(str(ifpath), 'r', newline=self.newline) if not self._is_init(): logger(self.mod_name).warning( 'bad ETL agent: agent {} not initialized; skipping it'.format( str(self))) self.start_time = datetime.datetime.now() self._time_log(0) logging.switch_to_progress(self.mod_name) reader = csv.DictReader(self.fin, delimiter=RESTRICT.DELIMITER) i = -1 for i, line in enumerate(reader): self._push_recent_line(line) self._transform(line, linum=i + 1) self._data_rate(i) logging.switch_to_normal(self.mod_name) self.fin.close() self._report(i + 1) if i > -1: self._output()
def remove(path, mod_name): if File.is_path(path): for x in path.glob('*'): if x.is_dir(): remove(x, mod_name) else: File.remove(x, mod_name) path.rmdir() logger(mod_name).info('remove: "{}"'.format(path))
def list_computed_config(cusid, tech, key, mod_name): json_data = get_computed_config(cusid, tech, mod_name) for k in json_data: if File.is_path(json_data[k]): if not File.exists(json_data[k]): logger(__name__).warning('bad path: "{}"'.format( str(json_data[k]))) json_data[k] = '{} {}'.format(type(json_data[k]), json_data[k]) return JSON.to_yaml(json_data, key)
def _get_columns(zippath, filename, tmppath, CAT): if File.is_path(tmppath): zf = zipfile.ZipFile(str(zippath)) zf.extract(filename, str(tmppath)) zf.close() reCAT = _get_regex(CAT) result = dict() with open(str(tmppath.joinpath(filename)), 'r', newline='\r\n', encoding='latin-1') as fo: header = dict() for _, ln in enumerate(fo): line = ln.rstrip().split(RESTRICT.DELIMITER) if len(line) > 0: for i, h in enumerate(line): header[str(h.encode(), sys.getdefaultencoding())] = i break for _, ln in enumerate([x for x in iter(fo) if reCAT.match(x)]): line = ln.rstrip().split(RESTRICT.DELIMITER) key_tbl = (str(line[header['OWNER']].encode(), sys.getdefaultencoding()), str(line[header['TABLE_NAME']].encode(), sys.getdefaultencoding())) if key_tbl not in result: result[key_tbl] = dict() columns = result[key_tbl] key_col = str(line[header['COLUMN_NAME']].encode(), sys.getdefaultencoding()) if key_col not in columns: columns[key_col] = dict([ ('type', str(line[header['DATA_TYPE']].encode(), sys.getdefaultencoding())), ('len', str(line[header['DATA_LENGTH']].encode(), sys.getdefaultencoding())), ('precision', str(line[header['DATA_PRECISION']].encode(), sys.getdefaultencoding())), ('scale', str(line[header['DATA_SCALE']].encode(), sys.getdefaultencoding())) ]) tmppath.joinpath(filename).unlink() return result
def list_config(cusid, tech, key, mod_name): path = wic.find_config_file_path() logger(__name__).info('show "{}"'.format(path)) json_data = File.load_JSON(path, mod_name) for k in json_data: if File.is_path(json_data[k]): if not File.exists(json_data[k]): logger(__name__).warning('bad path: "{}"'.format( str(json_data[k]))) return JSON.to_yaml(json_data, key)
def _get_csv_columns(filepath, encoding=None, newline=None, delimiter=RESTRICT.DELIMITER): if File.is_path(filepath): with open(str(filepath), 'r', encoding=encoding, newline=newline) as fo: m = re.match('^(.+)$', fo.readline()) line = m.group(1).replace('\r', '').split(delimiter) result = list() for term in line: result.append('`{term}`'.format(term=term)) return ', '.join(result)
def get_ow_ta_columns(zippath, filename, tmppath, CAT): if File.is_path(tmppath): result = dict() for dic in _gen_ow_ta_columns(zippath, filename, tmppath, CAT): ot = (_encode(dic['OWNER']), _encode(dic['TABLE_NAME'])) if ot not in result: result[ot] = dict() columns = result[ot] c = _encode(dic['COLUMN_NAME']) columns[c] = dict() col = columns[c] col['type'] = _encode(dic['DATA_TYPE']) col['len'] = int(_encode(dic['DATA_LENGTH'])) col['precision'] = _try_int(_encode(dic['DATA_PRECISION'])) col['scale'] = _try_int(_encode(dic['DATA_SCALE'])) col['order'] = int(_encode(dic['COLUMN_ID'])) return result
def patch_columns(bad, path): if type(bad) is dict() and File.is_path(path): ugly = dict() File.gen_file_lines(path, newline='\r\n', encoding='latin-1') pass
def build_columns(path): if File.is_path(path): good, bad = dict(), dict() File.gen_file_lines(path, newline='\r\n', encoding='latin-1') pass
def is_empty(path, mod_name): if File.is_path(path): return [] == [ x for x in path.iterdir() ]
def _extract_file(zippath, filename, path, mod_name): if File.is_path(zippath) and File.is_path(path): logger(mod_name).info('extract "{}"'.format(str(path.joinpath(filename)))) zf = zipfile.ZipFile(str(zippath)) zf.extract(filename, str(path)) zf.close()
def _get_recent_zip(zippath, date): if File.is_path(zippath) and File.exists(zippath) and type(date) is datetime.date: m = _re_zip1.match(zippath.as_posix()) if m is not None: z = zippath.parent.parent.joinpath('{:%Y%m%d}/{}_{:%Y-%m-%d}.zip'.format(date, m.group(1), date)) return z