def do_extract(self, resource_id, server_id=None, job_id=None, localdict={}, context={}): if job_id: job = self.get_job(job_id) if not server_id: server_id = job['extract_server_id'] and job[ 'extract_server_id'][0] or None conn = server_id and self.get_connection(server_id) or self.local server = server_id and self.get_server(server_id) or { 'encoding': False, 'etl_type': 'rpc' } resource = self.get_resource(resource_id, server_id=server_id) query_encoding = resource['encoding'] or server['encoding'] anho, mes, dia = job['date'].split('-') localdict.update({'aaaa': anho, 'mm': mes, 'dd': dia, 'aa': anho[2:]}) rows = [] if resource['etl_type'] == 'fs': fl = StringIO() if job['type'] == 'online': if job_id and job['input_file']: fl = StringIO( base64.b64decode(str(job['input_file'])).decode( query_encoding or 'utf-8')) elif server['etl_type'] == 'fs': fl = conn.open(job['file_name'] or resource['f_filename'], localdict=localdict) else: if server['etl_type'] == 'fs': fl = conn.open(job['file_name'] or resource['f_filename'], localdict=localdict) elif job_id and job['input_file']: fl = StringIO( base64.b64decode(str(job['input_file'])).decode( query_encoding or 'utf-8')) cols = [ c['field_name'] or c['name'] for c in resource['f_columns'] ] header_cols = [] if resource['f_header_id']: header = self.get_resource(resource['f_header_id'][0]) header_cols = [ c['field_name'] or c['name'] for c in header['f_columns'] ] footer_cols = [] if resource['f_footer_id']: footer = self.get_resource(resource['f_footer_id'][0]) footer_cols = [ c['field_name'] or c['name'] for c in footer['f_columns'] ] if resource['f_type'] == 'txt': fp = [] for line in fl: fp.append(line) widths = [ slice(c['txt_position'] - 1, c['txt_position'] + c['txt_lenght'] - 1) for c in resource['f_columns'] ] hf = {} last = len(fp) - 1 if header_cols and fp: header_w = [ slice(c['txt_position'] - 1, c['txt_position'] + c['txt_lenght'] - 1) for c in header['f_columns'] ] hf.update( dict([(header_cols[i], fp[0][w]) for i, w in enumerate(header_w)])) if footer_cols and fp: footer_w = [ slice(c['txt_position'] - 1, c['txt_position'] + c['txt_lenght'] - 1) for c in footer['f_columns'] ] hf.update( dict([(footer_cols[i], fp[last][w]) for i, w in enumerate(footer_w)])) for i, line in enumerate(fp): if i == 0 and header_cols: continue if i == last and footer_cols: continue row = dict([(cols[i], line[w]) for i, w in enumerate(widths)]) row.update(hf) rows.append(row) elif resource['f_type'] == 'csv': hf = {} if header_cols: reader = csv.DictReader(fl, fieldnames=header_cols or None, delimiter=resource['txt_separator'] or ',', quotechar=resource['txt_quote'] or '"') for h in reader: hf.update(h) break if footer_cols: reader = csv.DictReader(fl, fieldnames=footer_cols or None, delimiter=resource['txt_separator'] or ',', quotechar=resource['txt_quote'] or '"') footers = [r for r in reader] hf.update(footers[-1]) fl.seek(0) reader = csv.DictReader(fl, fieldnames=cols or None, delimiter=resource['txt_separator'] or ',', quotechar=resource['txt_quote'] or '"') rows = [] for r in reader: r.update(hf) rows += [r] if header_cols: rows = rows[1:] if footer_cols: rows = rows[:-1] elif resource['f_type'] == 'dbf': rows = [] if not cols: cols = list(fl.field_names) localdict = { 'conn': conn, 'context': context, 'job': job_id and job or {}, 'table': fl } if resource['dbf_python']: exec(resource['dbf_python_code'], localdict) self.to_log(job_id, server_id, resource_id, localdict.get('to_log')) rows = localdict.get('rows', []) else: if resource['dbf_domain']: recs = fl.query(eval(resource['dbf_domain'], localdict)) for rec in recs: r = {c: rec[c] for c in cols} rows += [r] fl.close() elif resource['etl_type'] == 'db': cr = conn.cursor() if resource['sql_begin']: sql_begin = resource['sql_begin'] % localdict # if type(sql_begin) is unicode and query_encoding: # sql_begin = sql_begin.encode(query_encoding) cr.execute(sql_begin) if resource.get('sql_begin_delay'): self.log( 'Query Begin is executing, time to waiting %d sec. (%s)' % (resource['sql_begin_delay'], time.strftime('%Y-%m-%d %H:%M.%S')), server_id=server_id, resource_id=resource_id) time.sleep(resource['sql_begin_delay']) query = resource['sql_query'] % localdict # if type(query) is unicode and query_encoding: # query = query.encode(query_encoding) cr.execute(query) rows = cr.fetchall() row_description = cr.description if resource['sql_end']: sql_end = resource['sql_end'] % localdict # if type(sql_end) is unicode and query_encoding: # sql_end = sql_end.encode(query_encoding) cr.execute(sql_end) rows = [ dict([(type(col) is tuple and col[0] or col.name, r[i]) for i, col in enumerate(row_description)]) for r in rows ] cr.close() conn.close() elif resource['etl_type'] == 'rpc': _cr = importlib.import_module('psycopg2').connect( "dbname=%s" % self.local.database).cursor() localdict = { 'conn': conn, 'context': context, 'job': job_id and job or {}, 'cr': _cr } if resource['rpc_python']: exec(resource['rpc_python_code'], localdict) self.to_log(job_id, server_id, resource_id, localdict.get('to_log')) rows = localdict.get('rows', []) elif job['template_run_from'] in ( 'single', 'multiple') and job['model_name']: model_obj = conn.get_model(resource['rpc_model_name']) model_ids = model_obj.search([('id', 'in', eval(job.get('model_ids', '[]')))]) rows = model_obj.read( model_ids, [r['field_name'] for r in resource['rpc_fields']]) else: model_obj = conn.get_model(resource['rpc_model_name']) model_ids = model_obj.search( eval(resource['rpc_domain'], localdict)) rows = model_obj.read( model_ids, [r['field_name'] for r in resource['rpc_fields']]) default_value = {} if resource['row_default_value']: default_value = eval(resource['row_default_value'] % localdict) res = [] for r in rows: d = default_value.copy() if sys.version > '3': for x, y in r.items(): if y is None: y = False if type(y) is decimal.Decimal: y = float(y) elif type(y) is bytes and query_encoding: y = y.decode(query_encoding) d[x] = y else: for x, y in r.iteritems(): if y is None: y = False if type(y) is decimal.Decimal: y = float(y) elif type(y) is str and query_encoding: y = y.decode(query_encoding) d[x] = y res.append(d) return res