Example #1
0
    def do_extract(self,
                   resource_id,
                   server_id=None,
                   job_id=None,
                   localdict={},
                   context={}):
        if job_id:
            job = self.get_job(job_id)
            if not server_id:
                server_id = job['extract_server_id'] and job[
                    'extract_server_id'][0] or None
        conn = server_id and self.get_connection(server_id) or self.local
        server = server_id and self.get_server(server_id) or {
            'encoding': False,
            'etl_type': 'rpc'
        }
        resource = self.get_resource(resource_id, server_id=server_id)
        query_encoding = resource['encoding'] or server['encoding']
        anho, mes, dia = job['date'].split('-')
        localdict.update({'aaaa': anho, 'mm': mes, 'dd': dia, 'aa': anho[2:]})
        rows = []
        if resource['etl_type'] == 'fs':
            fl = StringIO()
            if job['type'] == 'online':
                if job_id and job['input_file']:
                    fl = StringIO(
                        base64.b64decode(str(job['input_file'])).decode(
                            query_encoding or 'utf-8'))
                elif server['etl_type'] == 'fs':
                    fl = conn.open(job['file_name'] or resource['f_filename'],
                                   localdict=localdict)
            else:
                if server['etl_type'] == 'fs':
                    fl = conn.open(job['file_name'] or resource['f_filename'],
                                   localdict=localdict)
                elif job_id and job['input_file']:
                    fl = StringIO(
                        base64.b64decode(str(job['input_file'])).decode(
                            query_encoding or 'utf-8'))

            cols = [
                c['field_name'] or c['name'] for c in resource['f_columns']
            ]

            header_cols = []
            if resource['f_header_id']:
                header = self.get_resource(resource['f_header_id'][0])
                header_cols = [
                    c['field_name'] or c['name'] for c in header['f_columns']
                ]

            footer_cols = []
            if resource['f_footer_id']:
                footer = self.get_resource(resource['f_footer_id'][0])
                footer_cols = [
                    c['field_name'] or c['name'] for c in footer['f_columns']
                ]

            if resource['f_type'] == 'txt':
                fp = []
                for line in fl:
                    fp.append(line)
                widths = [
                    slice(c['txt_position'] - 1,
                          c['txt_position'] + c['txt_lenght'] - 1)
                    for c in resource['f_columns']
                ]
                hf = {}
                last = len(fp) - 1
                if header_cols and fp:
                    header_w = [
                        slice(c['txt_position'] - 1,
                              c['txt_position'] + c['txt_lenght'] - 1)
                        for c in header['f_columns']
                    ]
                    hf.update(
                        dict([(header_cols[i], fp[0][w])
                              for i, w in enumerate(header_w)]))
                if footer_cols and fp:
                    footer_w = [
                        slice(c['txt_position'] - 1,
                              c['txt_position'] + c['txt_lenght'] - 1)
                        for c in footer['f_columns']
                    ]
                    hf.update(
                        dict([(footer_cols[i], fp[last][w])
                              for i, w in enumerate(footer_w)]))
                for i, line in enumerate(fp):
                    if i == 0 and header_cols:
                        continue
                    if i == last and footer_cols:
                        continue
                    row = dict([(cols[i], line[w])
                                for i, w in enumerate(widths)])
                    row.update(hf)
                    rows.append(row)
            elif resource['f_type'] == 'csv':
                hf = {}
                if header_cols:
                    reader = csv.DictReader(fl,
                                            fieldnames=header_cols or None,
                                            delimiter=resource['txt_separator']
                                            or ',',
                                            quotechar=resource['txt_quote']
                                            or '"')
                    for h in reader:
                        hf.update(h)
                        break
                if footer_cols:
                    reader = csv.DictReader(fl,
                                            fieldnames=footer_cols or None,
                                            delimiter=resource['txt_separator']
                                            or ',',
                                            quotechar=resource['txt_quote']
                                            or '"')
                    footers = [r for r in reader]
                    hf.update(footers[-1])

                fl.seek(0)
                reader = csv.DictReader(fl,
                                        fieldnames=cols or None,
                                        delimiter=resource['txt_separator']
                                        or ',',
                                        quotechar=resource['txt_quote'] or '"')
                rows = []
                for r in reader:
                    r.update(hf)
                    rows += [r]
                if header_cols:
                    rows = rows[1:]
                if footer_cols:
                    rows = rows[:-1]
            elif resource['f_type'] == 'dbf':
                rows = []
                if not cols:
                    cols = list(fl.field_names)
                localdict = {
                    'conn': conn,
                    'context': context,
                    'job': job_id and job or {},
                    'table': fl
                }
                if resource['dbf_python']:
                    exec(resource['dbf_python_code'], localdict)
                    self.to_log(job_id, server_id, resource_id,
                                localdict.get('to_log'))
                    rows = localdict.get('rows', [])
                else:
                    if resource['dbf_domain']:
                        recs = fl.query(eval(resource['dbf_domain'],
                                             localdict))
                    for rec in recs:
                        r = {c: rec[c] for c in cols}
                        rows += [r]

            fl.close()
        elif resource['etl_type'] == 'db':
            cr = conn.cursor()
            if resource['sql_begin']:
                sql_begin = resource['sql_begin'] % localdict
                # if type(sql_begin) is unicode and query_encoding:
                #     sql_begin = sql_begin.encode(query_encoding)
                cr.execute(sql_begin)
                if resource.get('sql_begin_delay'):
                    self.log(
                        'Query Begin is executing, time to waiting %d sec. (%s)'
                        % (resource['sql_begin_delay'],
                           time.strftime('%Y-%m-%d %H:%M.%S')),
                        server_id=server_id,
                        resource_id=resource_id)
                    time.sleep(resource['sql_begin_delay'])
            query = resource['sql_query'] % localdict
            # if type(query) is unicode and query_encoding:
            #     query = query.encode(query_encoding)
            cr.execute(query)
            rows = cr.fetchall()
            row_description = cr.description
            if resource['sql_end']:
                sql_end = resource['sql_end'] % localdict
                # if type(sql_end) is unicode and query_encoding:
                #     sql_end = sql_end.encode(query_encoding)
                cr.execute(sql_end)
            rows = [
                dict([(type(col) is tuple and col[0] or col.name, r[i])
                      for i, col in enumerate(row_description)]) for r in rows
            ]
            cr.close()
            conn.close()
        elif resource['etl_type'] == 'rpc':
            _cr = importlib.import_module('psycopg2').connect(
                "dbname=%s" % self.local.database).cursor()
            localdict = {
                'conn': conn,
                'context': context,
                'job': job_id and job or {},
                'cr': _cr
            }
            if resource['rpc_python']:
                exec(resource['rpc_python_code'], localdict)
                self.to_log(job_id, server_id, resource_id,
                            localdict.get('to_log'))
                rows = localdict.get('rows', [])
            elif job['template_run_from'] in (
                    'single', 'multiple') and job['model_name']:
                model_obj = conn.get_model(resource['rpc_model_name'])
                model_ids = model_obj.search([('id', 'in',
                                               eval(job.get('model_ids',
                                                            '[]')))])
                rows = model_obj.read(
                    model_ids,
                    [r['field_name'] for r in resource['rpc_fields']])
            else:
                model_obj = conn.get_model(resource['rpc_model_name'])
                model_ids = model_obj.search(
                    eval(resource['rpc_domain'], localdict))
                rows = model_obj.read(
                    model_ids,
                    [r['field_name'] for r in resource['rpc_fields']])

        default_value = {}
        if resource['row_default_value']:
            default_value = eval(resource['row_default_value'] % localdict)
        res = []
        for r in rows:
            d = default_value.copy()
            if sys.version > '3':
                for x, y in r.items():
                    if y is None:
                        y = False
                    if type(y) is decimal.Decimal:
                        y = float(y)
                    elif type(y) is bytes and query_encoding:
                        y = y.decode(query_encoding)
                    d[x] = y
            else:
                for x, y in r.iteritems():
                    if y is None:
                        y = False
                    if type(y) is decimal.Decimal:
                        y = float(y)
                    elif type(y) is str and query_encoding:
                        y = y.decode(query_encoding)
                    d[x] = y
            res.append(d)
        return res