Example #1
0
 def test_basic(self):
     patch = self.workspace.filename('patch.diff')
     with open(patch, 'w') as fout:
         fout.write("@@,NAME,DIGIT\n"
                    "->,two,2->22\n")
     patchsql([self.workspace.number_db, '--table', 'sheet', '--patch', patch])
     catsql([self.workspace.number_db, "--json", self.workspace.output_file,
             '--NAME', 'two'])
     result = self.workspace.output_json()
     self.assertEquals(len(result['results']), 1)
     self.assertEquals(result['results'][0]['DIGIT'], 22)
Example #2
0
 def test_from_file_pair(self):
     f1 = self.workspace.filename('f1.csv')
     f2 = self.workspace.filename('f2.csv')
     with open(f1, 'w') as fout:
         fout.write("NAME,DIGIT\n"
                    "two,2\n")
     with open(f2, 'w') as fout:
         fout.write("NAME,DIGIT\n"
                    "two,22\n")
     patchsql([self.workspace.number_db, '--table', 'sheet', '--follow', f1, f2,
               '--quiet'])
     catsql([self.workspace.number_db, "--json", self.workspace.output_file,
             '--NAME', 'two'])
     result = self.workspace.output_json()
     self.assertEquals(len(result['results']), 1)
     self.assertEquals(result['results'][0]['DIGIT'], 22)
Example #3
0
    def show(self):

        self.tables_so_far = []

        work = None
        output_filename = None
        self.output_file = sys.stdout
        work_file = None
        self.start_table(None, None)

        try:
            if self.args.edit:
                import tempfile

                work = tempfile.mkdtemp()
                output_filename = os.path.join(work, "reference.csv")
                work_file = open(output_filename, "wt")
                self.output_file = work_file
                self.output_in_csv = True
                self.args.safe_null = True
                self.args.save_bookmark = [os.path.join(work, "bookmark.json")]
            elif self.args.output:
                self.output_file = open(self.args.output[0], "wt")

            viable_tables = []

            q = self.database.query(columns=self.selected_columns)
            touchable = True
            if self.args.select_from:
                q = q.select_from(self.args.select_from[0])
                touchable = False
            if self.args.distinct:
                q = q.distinct()
            if self.row_filter is not None:
                q = q.where_sqls(self.row_filter)
            if self.args.value is not None:
                q = q.where_kv(self.context_filters)
            if self.values is not None:
                q = q.where_kv_with_expansion(self.values)
            if self.args.grep:
                for pattern in self.args.grep:
                    q = q.grep(pattern, case_sensitive=False)
            if self.args.order:
                if "none" not in self.args.order:
                    q.order(self.ordering)
            elif touchable:
                q.order()
            if self.args.limit:
                q = q.limit(int(self.args.limit[0]))

            ts = list(q)
            ts.sort(key=lambda t: t["table_name"])

            for t in ts:
                table_name = t["table_name"]
                table = t["table"]
                rows = t["rows"]

                self.header_shown = False
                self.start_table(table_name, table.columns.keys())
                viable_tables.append(table_name)

                if self.args.types:
                    column_types = []
                    for name in self.columns:
                        if not self.ok_column(name):
                            continue
                        try:
                            column = table.c[name]
                            sql_name = str(column.type)  # make sure not nulltype
                        except CompileError:
                            sql_name = None
                        column_types.append(sql_name)
                    rows = [column_types]

                if self.target_db:
                    if table_name in self.target_db.tables_metadata.keys():
                        # clear previous results
                        self.target_db.tables_metadata[table_name].drop(self.target_db.engine)
                    target = {"table": None, "rows": []}

                    def fallback_type(example):
                        if isinstance(example, bool):
                            return types.Boolean
                        elif isinstance(example, int):
                            return types.Integer
                        elif isinstance(example, float):
                            return types.Float
                        elif isinstance(example, datetime):
                            return types.DateTime
                        return types.UnicodeText

                    def create_table(data):
                        if target["table"] is not None:
                            return
                        columns = []
                        for name in self.columns:
                            if not self.ok_column(name):
                                continue
                            column = table.c[name]
                            sql_type = column.type
                            try:
                                self.target_db.engine.dialect.type_compiler.process(sql_type)
                            except CompileError:
                                # some types need to be approximated
                                sql_type = None
                            if sql_type is None or isinstance(sql_type, types.NullType):
                                example = data.get(name)
                                sql_type = fallback_type(example)
                            columns.append(Column(name, sql_type, primary_key=column.primary_key))
                        metadata = MetaData(bind=self.target_db.engine)
                        target["table"] = Table(table_name, metadata, *columns)
                        target["table"].create(self.target_db.engine)

                    def add_row(data):
                        if data:
                            target["rows"].append(data)
                        if len(target["rows"]) > 10000 or not data:
                            target["table"].insert().execute(target["rows"])
                            target["rows"] = []

                    def sqlited(data):
                        if isinstance(data, dict) or isinstance(data, list):
                            return json.dumps(data)
                        return data

                    for row in rows:
                        data = dict(
                            (self.columns[c], sqlited(cell))
                            for c, cell in enumerate(row)
                            if self.ok_column(self.columns[c])
                        )
                        create_table(data)
                        add_row(data)
                    create_table({})
                    add_row(None)

                if self.output_in_json or self.output_in_sqlite:
                    if not self.show_header_on_need():
                        continue
                    if self.output_in_json:
                        self.save_as_json(table, rows, self.output_in_json[0])
                elif not self.args.count:
                    # csv spec is that eol is \r\n; we ignore this for our purposes
                    # for good reasons that unfortunately there isn't space to describe
                    # here on the back of this envelope
                    csv_writer = csv.writer(self.output_file, lineterminator="\n")
                    if self.args.safe_null:
                        nullify = Nullify()
                        for row in rows:
                            if not self.show_header_on_need():
                                continue
                            csv_writer.writerow(
                                list(
                                    nullify.encode_null(cell)
                                    for c, cell in enumerate(row)
                                    if self.ok_column(self.columns[c])
                                )
                            )
                    else:
                        for row in rows:
                            if not self.show_header_on_need():
                                continue
                            csv_writer.writerow(
                                list(cell for c, cell in enumerate(row) if self.ok_column(self.columns[c]))
                            )
                    del csv_writer
                else:
                    self.show_header_on_need()
                    ct = rows.count()
                    print("({} row{})".format(ct, "" if ct == 1 else "s"), file=self.output_file)

            if len(self.tables_so_far) == 0 and len(viable_tables) == 1:
                self.show_header_on_need()

            if self.args.save_bookmark:
                with open(self.args.save_bookmark[0], "w") as fout:
                    link = OrderedDict()
                    link["url"] = self.args.catsql_database_url
                    link["table"] = list(self.tables) if self.tables else None
                    link["column"] = self.selected_columns
                    link["distinct"] = self.args.distinct
                    link["context"] = self.context_filters
                    link["hidden_columns"] = sorted(self.context_columns)
                    link["sql"] = self.args.sql
                    fout.write(json.dumps(link, indent=2))

            if self.args.edit and not self.failure:
                work_file.close()
                work_file = None
                edit_filename = os.path.join(work, "variant.csv")
                copyfile(output_filename, edit_filename)
                editor = os.environ.get("TABLE_EDITOR", None)
                if not editor:
                    editor = os.environ.get("EDITOR", "nano")
                call([editor, edit_filename])
                patchsql(
                    [self.url, "--table"]
                    + self.tables_so_far
                    + ["--follow", output_filename, edit_filename, "--safe-null"]
                    + (["--quiet"] if self.args.quiet else [])
                )

        finally:
            if self.failure:
                print("ERROR: " "More than one table in csv/json output, consider adding:", file=sys.stderr)
                for name in self.tables_so_far:
                    print("  --table {}".format(name), file=sys.stderr)

            if work:
                if work_file:
                    try:
                        work_file.close()
                    except:
                        pass
                    work_file = None
                import shutil

                shutil.rmtree(work)
                work = None
Example #4
0
    def show(self):

        self.tables_so_far = []

        work = None
        output_filename = None
        self.output_file = sys.stdout
        work_file = None
        self.start_table(None, None)

        try:
            if self.args.edit:
                import tempfile
                work = tempfile.mkdtemp()
                output_filename = os.path.join(work, 'reference.csv')
                work_file = open(output_filename, 'wt')
                self.output_file = work_file
                self.output_in_csv = True
                self.args.safe_null = True
                self.args.save_bookmark = [os.path.join(work, 'bookmark.json')]
            elif self.args.output:
                self.output_file = open(self.args.output[0], 'wt')

            viable_tables = []

            q = self.database.query(columns=self.selected_columns)
            touchable = True
            if self.args.select_from:
                q = q.select_from(self.args.select_from[0])
                touchable = False
            if self.args.distinct:
                q = q.distinct()
            if self.row_filter is not None:
                q = q.where_sqls(self.row_filter)
            if self.args.value is not None:
                q = q.where_kv(self.context_filters)
            if self.values is not None:
                q = q.where_kv_with_expansion(self.values)
            if self.args.grep:
                for pattern in self.args.grep:
                    q = q.grep(pattern, case_sensitive=False)
            if self.args.order:
                if 'none' not in self.args.order:
                    q.order(self.ordering)
            elif touchable:
                q.order()
            if self.args.limit:
                q = q.limit(int(self.args.limit[0]))

            ts = list(q)
            ts.sort(key=lambda t: t['table_name'])

            for t in ts:
                table_name = t['table_name']
                table = t['table']
                rows = t['rows']

                self.header_shown = False
                keys = None
                if hasattr(table, 'columns'):
                    keys = table.columns.keys()
                else:
                    keys = table.keys
                self.start_table(table_name, keys)
                viable_tables.append(table_name)

                if self.args.types:
                    column_types = []
                    for name in self.columns:
                        if not self.ok_column(name):
                            continue
                        try:
                            column = table.c[name]
                            sql_name = str(
                                column.type)  # make sure not nulltype
                        except CompileError:
                            sql_name = None
                        column_types.append(sql_name)
                    rows = [column_types]

                if self.target_ss:
                    ws = self.target_ss.create_sheet()
                    ws.title = table_name
                    ws.append(column for column in self.columns
                              if self.ok_column(column))
                    for row in rows:
                        ws.append(cell for c, cell in enumerate(row)
                                  if self.ok_column(self.columns[c]))
                    s = openpyxl.styles.NamedStyle(
                        name="Header", font=openpyxl.styles.Font(bold=True))
                    for cell in next(ws.rows):
                        cell.style = s
                    for column_cells in ws.columns:
                        ws.column_dimensions[
                            column_cells[0].column].auto_size = True

                if self.target_db:
                    if table_name in self.target_db.tables_metadata.keys():
                        # clear previous results
                        self.target_db.tables_metadata[table_name].drop(
                            self.target_db.engine)
                    target = {'table': None, 'rows': []}

                    def fallback_type(example):
                        if isinstance(example, bool):
                            return types.Boolean
                        elif isinstance(example, int):
                            return types.Integer
                        elif isinstance(example, float):
                            return types.Float
                        elif isinstance(example, datetime):
                            return types.DateTime
                        return types.UnicodeText

                    def create_table(data):
                        if target['table'] is not None:
                            return
                        columns = []
                        for name in self.columns:
                            if not self.ok_column(name):
                                continue
                            column = table.c[name]
                            sql_type = column.type
                            try:
                                self.target_db.engine.dialect.type_compiler.process(
                                    sql_type)
                            except CompileError:
                                # some types need to be approximated
                                sql_type = None
                            if sql_type is None or isinstance(
                                    sql_type, types.NullType):
                                example = data.get(name)
                                sql_type = fallback_type(example)
                            columns.append(
                                Column(name,
                                       sql_type,
                                       primary_key=column.primary_key))
                        metadata = MetaData(bind=self.target_db.engine)
                        target['table'] = Table(table_name, metadata, *columns)
                        target['table'].create(self.target_db.engine)

                    def add_row(data):
                        if data:
                            target['rows'].append(data)
                        if len(target['rows']) > 10000 or not data:
                            target['table'].insert().execute(target['rows'])
                            target['rows'] = []

                    def sqlited(data):
                        if isinstance(data, dict) or isinstance(data, list):
                            return json.dumps(data)
                        return data

                    for row in rows:
                        data = dict((self.columns[c], sqlited(cell))
                                    for c, cell in enumerate(row)
                                    if self.ok_column(self.columns[c]))
                        create_table(data)
                        add_row(data)
                    create_table({})
                    add_row(None)

                if self.output_in_json or self.output_in_sqlite or self.output_in_excel:
                    if not self.show_header_on_need():
                        continue
                    if self.output_in_json:
                        self.save_as_json(table, rows, self.output_in_json[0])
                elif not self.args.count:
                    # csv spec is that eol is \r\n; we ignore this for our purposes
                    # for good reasons that unfortunately there isn't space to describe
                    # here on the back of this envelope
                    csv_writer = csv.writer(self.output_file,
                                            lineterminator='\n')
                    if not self.show_header_on_need():
                        continue
                    if self.args.safe_null:
                        nullify = Nullify()
                        for row in rows:
                            csv_writer.writerow(
                                list(
                                    nullify.encode_null(cell)
                                    for c, cell in enumerate(row)
                                    if self.ok_column(self.columns[c])))
                    else:
                        for row in rows:
                            csv_writer.writerow(
                                list(cell for c, cell in enumerate(row)
                                     if self.ok_column(self.columns[c])))
                    del csv_writer
                else:
                    self.show_header_on_need()
                    ct = rows.count()
                    print("({} row{})".format(ct, '' if ct == 1 else 's'),
                          file=self.output_file)

            if len(self.tables_so_far) == 0 and len(viable_tables) == 1:
                self.show_header_on_need()

            if self.args.save_bookmark:
                with open(self.args.save_bookmark[0], 'w') as fout:
                    link = OrderedDict()
                    link['url'] = self.args.catsql_database_url
                    link['table'] = list(self.tables) if self.tables else None
                    link['column'] = self.selected_columns
                    link['distinct'] = self.args.distinct
                    link['context'] = self.context_filters
                    link['hidden_columns'] = sorted(self.context_columns)
                    link['sql'] = self.args.sql
                    fout.write(json.dumps(link, indent=2))

            if self.args.edit and not self.failure:
                work_file.close()
                work_file = None
                edit_filename = os.path.join(work, 'variant.csv')
                copyfile(output_filename, edit_filename)
                editor = os.environ.get('TABLE_EDITOR', None)
                if not editor:
                    editor = os.environ.get('EDITOR', 'nano')
                call([editor, edit_filename])
                patchsql([self.url, '--table'] + self.tables_so_far + [
                    '--follow', output_filename, edit_filename, '--safe-null'
                ] + (['--quiet'] if self.args.quiet else []) +
                         (['--schema', self.schema] if self.schema else []),
                         database=self.database)

        finally:
            if self.failure:
                print(
                    "ERROR: "
                    "More than one table in csv/json output, consider adding:",
                    file=sys.stderr)
                for name in self.tables_so_far:
                    print("  --table {}".format(name), file=sys.stderr)

            if self.target_ss:
                self.target_ss.save(self.output_in_excel[0])
            if work:
                if work_file:
                    try:
                        work_file.close()
                    except Exception:
                        pass
                    work_file = None
                import shutil
                shutil.rmtree(work)
                work = None