Ejemplo n.º 1
0
 def setUp(self):
     self._columns = {
         'f__o_o': ColumnDefinition('f__o_o', type_name='text'),
         'bar': ColumnDefinition('bar', type_name='int'),
         'baz': ColumnDefinition('baz', type_name='text[]'),
         'quux': ColumnDefinition('quux', type_name='int')
     }
     self._fdw = ESForeignDataWrapper(
         {
             'doc_type': 'foo_doc',
             'index': 'our_index'
         }, self._columns)
     self._quals = [Qual('f__o_o', '=', 'value'), Qual('bar', '>', 5)]
Ejemplo n.º 2
0
    def import_schema(self, schema, srv_options, options,
                      restriction_type, restricts):

        tables = []

        for provider in Faker().providers:
            columns = []

            schema_name, provider_name = self._destructure_class_name(provider)
            for field in filter(lambda x: not x.startswith('_'), dir(provider)):

                if field.startswith('random') or field.endswith('ify') or field == 'generator':
                    continue

                columns.append(ColumnDefinition(
                    column_name=field,
                    type_name="varchar"))

            tables.append(TableDefinition(
                table_name=provider_name,
                schema=schema,
                columns=columns,
                options=options))

        return tables
Ejemplo n.º 3
0
 def import_schema(self, schema, srv_options, options, restriction_type,
                   restricts):
     log_to_postgres(
         "IMPORT %s FROM srv %s OPTIONS %s RESTRICTION: %s %s" %
         (schema, srv_options, options, restriction_type, restricts))
     tables = set([
         unicode_("imported_table_1"),
         unicode_("imported_table_2"),
         unicode_("imported_table_3")
     ])
     if restriction_type == 'limit':
         tables = tables.intersection(set(restricts))
     elif restriction_type == 'except':
         tables = tables - set(restricts)
     rv = []
     for tname in sorted(list(tables)):
         table = TableDefinition(tname)
         nb_col = options.get('nb_col', 3)
         for col in range(nb_col):
             table.columns.append(
                 ColumnDefinition("col%s" % col,
                                  type_name="text",
                                  options={"option1": "value1"}))
         rv.append(table)
     return rv
Ejemplo n.º 4
0
def _get_table_definition(response, fdw_options, table_name, table_options):
    # Allow overriding introspection options with per-table params (e.g. encoding, delimiter...)
    fdw_options = copy(fdw_options)
    fdw_options.update(table_options)

    csv_options, reader = make_csv_reader(
        response, CSVOptions.from_fdw_options(fdw_options))
    sample = list(islice(reader, csv_options.schema_inference_rows))

    if not csv_options.header:
        sample = [[""] * len(sample[0])] + sample

    sg_schema = infer_sg_schema(sample, None, None)

    # For nonexistent column names: replace with autogenerated ones (can't have empty column names)
    sg_schema = generate_column_names(sg_schema)

    # Merge the autodetected table options with the ones passed to us originally (e.g.
    # S3 object etc)
    new_table_options = copy(table_options)
    new_table_options.update(csv_options.to_table_options())

    # Build Multicorn TableDefinition. ColumnDefinition takes in type OIDs,
    # typmods and other internal PG stuff but other FDWs seem to get by with just
    # the textual type name.
    return TableDefinition(
        table_name=table_name,
        schema=None,
        columns=[
            ColumnDefinition(column_name=c.name, type_name=c.pg_type)
            for c in sg_schema
        ],
        options=new_table_options,
    )
Ejemplo n.º 5
0
 def _import_table(cls, db, table, options):
     columns = []
     sql = "SELECT name, type FROM system.columns where database='%s' and table='%s'" % (db.db_name, table)
     for row in db.select(sql):
         try:
             columns.append(ColumnDefinition(row.name, type_name=_convert_column_type(row.type)))
         except KeyError:
             cls._warn('Unsupported column type %s in table %s was skipped' % (row.type, table))
     merged_options = dict(options, table_name=table)
     return TableDefinition(table, columns=columns, options=merged_options)
Ejemplo n.º 6
0
    def import_schema(self, schema, srv_options, options, restriction_type,
                      restricts):
        Bag = import_bag(srv_options)
        pcid_str = options.pop('pcid', srv_options.pop('pcid', 0))
        pcid = int(pcid_str)
        patch_column = options.pop('patch_column',
                                   srv_options.pop('patch_column', 'points'))
        patch_columns = options.pop('patch_columns', '*').strip()
        patch_columns = [
            col.strip() for col in patch_columns.split(',') if col.strip()
        ]
        filename = srv_options.pop('rosbag_path', "") + options.pop(
            'rosbag_path', "") + schema
        bag = Bag(filename, 'r')

        tablecols = []
        topics = bag.get_type_and_topic_info().topics
        pcid_for_topic = {k: pcid + 1 + i for i, k in enumerate(topics.keys())}
        pointcloud_formats = True
        if restriction_type is 'limit':
            topics = {k: v for k, v in topics.items() if k in restricts}
            pointcloud_formats = 'pointcloud_formats' in restricts
        elif restriction_type is 'except':
            topics = {k: v for k, v in topics.items() if k not in restricts}
            pointcloud_formats = 'pointcloud_formats' not in restricts

        tabledefs = []
        if pointcloud_formats:
            tablecols = [
                ColumnDefinition('pcid', type_name='integer'),
                ColumnDefinition('srid', type_name='integer'),
                ColumnDefinition('schema', type_name='text'),
                ColumnDefinition('format', type_name='text'),
                ColumnDefinition('rostype', type_name='text'),
                ColumnDefinition('columns', type_name='text[]'),
                ColumnDefinition('ply_header', type_name='text'),
            ]
            tableopts = {
                'metadata': 'true',
                'rosbag': schema,
                'pcid': pcid_str
            }
            tabledefs.append(
                TableDefinition("pointcloud_formats",
                                columns=tablecols,
                                options=tableopts))

        for topic, infos in topics.items():
            pcid = pcid_for_topic[topic]
            columns, _, _, _, _, _ = get_columns(bag, topic, infos, pcid,
                                                 patch_column, patch_columns)
            tablecols = [get_column_def(k, *v) for k, v in columns.items()]
            tableopts = {'topic': topic, 'rosbag': schema, 'pcid': str(pcid)}
            tabledefs.append(
                TableDefinition(topic, columns=tablecols, options=tableopts))
        return tabledefs
Ejemplo n.º 7
0
    def _format_table(properties, table):
        """
        Utility method. Given a dict and a TableDefinition object
        iterates through the dict and populate the table
        using the dict keys

        :param dict properties: the dict holding the keys
          used to build the table
        :param TableDefinition table: the table to build
        """
        for key in properties.keys():
            table.columns.append(
                ColumnDefinition(column_name=key,
                                 type_name='character varying'
                                 ))
Ejemplo n.º 8
0
    def test_execute_column_name_translation(self, scan_mock,
                                             _elasticsearch_mock):
        columns = {
            'object__nested_field':
            ColumnDefinition('object__nested_field', type_name='text')
        }
        fdw = ESForeignDataWrapper(
            {
                'doc_type': 'foo_doc',
                'index': 'our_index',
                'column_name_translation': 'true'
            }, columns)
        quals = [Qual('object__nested_field', '=', 'value')]
        scan_mock.return_value = [{
            'fields': {
                'object.nested-field': ['value']
            }
        }]
        rows = list(fdw.execute(quals, ['object__nested_field']))

        expected_query = {
            'fields': ['object.nested-field'],
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'object.nested-field': 'value'
                                }
                            }]
                        }
                    }
                }
            }
        }
        scan_mock.assert_called_once_with(fdw.esclient,
                                          query=expected_query,
                                          index='our_index',
                                          doc_type='foo_doc',
                                          size=fdw._SCROLL_SIZE,
                                          scroll=fdw._SCROLL_LENGTH)

        expected_rows = [{'object__nested_field': 'value'}]
        self.assertEqual(rows, expected_rows)
Ejemplo n.º 9
0
def get_column_def(col, typ, array, typmod, fmt):
    py2sql = {
        'int8': 'int2',
        'uint8': 'int2',
        'int16': 'int2',
        'uint16': 'int4',
        'int32': 'int4',
        'uint32': 'int8',
        'float32': 'float4',
        'float64': 'float8',
        'int64': 'int8',
        'uint64': 'int8',
        'bool': 'bool',
        'string': 'text',
    }
    if typ in py2sql:
        typ = py2sql[typ]
    typ += array
    if typmod:
        typ = "{}({})".format(typ, typmod)
    return ColumnDefinition(col, type_name=typ)
Ejemplo n.º 10
0
def import_schema(schema, srv_options, options, restriction_type, restricts):
    if ISDEBUG:
        logger.log(
            u"import schema {0} requiested with options {1}; restriction type: {2}; restrictions: {3}".format(schema,
                                                                                                              options,
                                                                                                              restriction_type,
                                                                                                              restricts))
    if "hosts" not in srv_options:
        logger.log("The hosts parameter is needed, setting to localhost.", WARNING)
    hosts = srv_options.get("hosts", "localhost").split(",")
    if "port" not in srv_options:
        logger.log("The port parameter is needed, setting to 9042.", WARNING)
    port = srv_options.get("port", "9042")
    username = srv_options.get("username", None)
    password = srv_options.get("password", None)
    with_row_id = options.get('with_row_id', 'True') == 'True'
    names_mapping = options.get('mapping', '').split(';')
    mapping_dict = {}
    mapping_dict_backward = {}
    for s in names_mapping:
        kp = s.split('=')
        if len(kp) != 2:
            continue
        key = kp[0].strip()
        value = kp[1].strip()
        mapping_dict[key] = value
        mapping_dict_backward[value] = key

    cluster = Cluster(hosts)
    if (username is not None):
        cluster.auth_provider = PlainTextAuthProvider(username=username, password=password)
    # Cassandra connection init
    session = cluster.connect()
    keyspace = cluster.metadata.keyspaces[schema]
    cassandra_tables = []
    tables = keyspace.tables
    views = keyspace.views
    if restriction_type is None:
        for t in tables:
            if t in tables:
                cassandra_tables.append(tables[t])
            else:
                cassandra_tables.append(views[t])
    elif restriction_type == 'limit':
        for r in restricts:
            t_name = r
            if t_name in mapping_dict_backward:
                t_name = mapping_dict_backward[t_name]
            if t_name in tables:
                cassandra_tables.append(tables[t_name])
            else:
                cassandra_tables.append(views[t_name])
    elif restriction_type == 'except':
        for t in tables:
            if t not in restricts:
                if t in tables:
                    cassandra_tables.append(tables[t])
                else:
                    cassandra_tables.append(views[t])
    pg_tables = []
    for c_table in cassandra_tables:
        if ISDEBUG:
            logger.log("Importing table {0}...".format(c_table.name))
        pg_table_name = c_table.name
        if pg_table_name in mapping_dict:
            if ISDEBUG:
                logger.log("Cassandra table name '{0}' maps to PostgreSQL table name '{1}'".format(pg_table_name,
                                                                                                   mapping_dict[
                                                                                                       pg_table_name]))
            pg_table_name = mapping_dict[pg_table_name]
        pg_table = TableDefinition(pg_table_name)
        pg_table.options['keyspace'] = schema
        pg_table.options['columnfamily'] = c_table.name
        for c_column_name in c_table.columns:
            cql_type = c_table.columns[c_column_name].cql_type
            pg_type = types_mapper.get_pg_type(cql_type)
            if ISDEBUG:
                logger.log("Adding column {0} with PostgreSQL type {2} (CQL type {1})".format(c_column_name, cql_type,
                                                                                              pg_type))
            pg_table.columns.append(ColumnDefinition(c_column_name, type_name=pg_type))
        if with_row_id:
            pg_table.columns.append(ColumnDefinition('__rowid__', type_name='text'))
        pg_tables.append(pg_table)
        if ISDEBUG:
            logger.log("Table imported: {0}".format(c_table.name))
    session.shutdown()
    return pg_tables
Ejemplo n.º 11
0
    def import_schema(self, schema, srv_options, options, restriction_type,
                      restricts):
        """
        Called on an IMPORT FOREIGN SCHEMA command.

        Populates a PostgreSQL schema using the json output of the
        barman diagnose command.
        This method iterates through the results of the diagnose command,
        creates a foreign table for every server using the field of the backup
        json object as columns of the table.
        Additionally creates 2 tables: 'server_config' and 'server_status'
        'server_config' contains the name, description and json
        configuration of every barman server stored in a jsonb column.
        'server_status' contains the status of every server,
        using as columns the field of the status json object.

        Every table is created using the 'table_name' server option.
        The table_name is used during the execution of SELECT statements to
        retrieve the correct table for the execution of the query.
        """
        # Executes the barman diagnose command through ssh connection
        errors, result = self._execute_barman_cmd("barman diagnose",
                                                  srv_options['barman_user'],
                                                  srv_options['barman_host'])
        # if an error is present, return.
        if errors:
            return
        # Load the result using json
        result = json.loads(result)
        servers = result['servers']
        tables = []
        # Iterates through the available servers
        for server, values in servers.items():
            # Encodes the name of the server replacing - and . characters
            # with _
            server = server.replace('-', '_').replace('.', '_')
            log_to_postgres('schema %s table %s' % (schema, server), DEBUG)
            table = TableDefinition(table_name=server)
            table.options['schema'] = schema
            table.options['table_name'] = server
            for backup, properties in values['backups'].items():
                # Creates a table for every server. uses the fields of the
                # keys of the json object as columns.
                # set the encoded server name as table name
                self._format_table(properties, table)
                # Add the table to the list of the tables.
                log_to_postgres('schema %s table %s' % (schema, server))
                break
            tables.append(table)
        # Create the server_config table
        table_config = TableDefinition(table_name='server_config')
        table_config.options['schema'] = schema
        # Set the table name
        table_config.options['table_name'] = 'server_config'
        # Create the columns
        table_config.columns.append(
            ColumnDefinition(
                column_name='server',
                type_name='character varying'
            ))
        table_config.columns.append(
            ColumnDefinition(
                column_name='description',
                type_name='character varying'
            ))
        table_config.columns.append(
            ColumnDefinition(
                column_name='config',
                type_name='jsonb'
            ))
        # Add the table to the list of the tables.
        tables.append(table_config)
        # Create the server_status table
        table_status = TableDefinition(table_name='server_status')
        table_status.options['schema'] = schema
        table_status.options['table_name'] = 'server_status'
        # Iterates through the fields of the status json object,
        # and use them to create the columns of the table
        for server, values in servers.items():
            self._format_table(values['status'], table_status)
            break
        # Adds a column containing the server name.
        table_status.columns.append(
            ColumnDefinition(
                column_name='server',
                type_name='character varying'
            ))
        # Add the table to the list of the tables.
        tables.append(table_status)
        return tables
Ejemplo n.º 12
0
    def import_schema(cls, schema, srv_options, options, restriction_type,
                      restricts):

        return [
            TableDefinition('{}_things'.format(schema),
                            schema=schema,
                            options={'table_type': 'thing'},
                            columns=[
                                ColumnDefinition('thing_name',
                                                 type_name='text'),
                                ColumnDefinition('thing_type_name',
                                                 type_name='text'),
                                ColumnDefinition('thing_arn',
                                                 type_name='text'),
                                ColumnDefinition('thing_version',
                                                 type_name='integer'),
                                ColumnDefinition('thing_groups',
                                                 type_name='jsonb'),
                                ColumnDefinition('thing_attributes',
                                                 type_name='jsonb'),
                                ColumnDefinition('thing_shadow_data',
                                                 type_name='jsonb')
                            ]),
            TableDefinition('{}_thing_types'.format(schema),
                            schema=schema,
                            options={'table_type': 'thing-type'},
                            columns=[
                                ColumnDefinition('thing_type_name',
                                                 type_name='text'),
                                ColumnDefinition('thing_type_arn',
                                                 type_name='text'),
                                ColumnDefinition('thing_type_properties',
                                                 type_name='jsonb'),
                                ColumnDefinition('thing_type_metadata',
                                                 type_name='jsonb')
                            ]),
            TableDefinition('{}_thing_groups'.format(schema),
                            schema=schema,
                            options={'table_type': 'thing-group'},
                            columns=[
                                ColumnDefinition('thing_group_name',
                                                 type_name='text'),
                                ColumnDefinition('thing_group_arn',
                                                 type_name='text')
                            ])
        ]
Ejemplo n.º 13
0
    def import_schema_bigquery_fdw(self,
                                   schema,
                                   srv_options,
                                   options,
                                   restriction_type,
                                   restricts=None):
        """
        Pulls in the remote schema.
        """
        if restriction_type == 'limit':
            only = lambda t: t in restricts
        elif restriction_type == 'except':
            only = lambda t: t not in restricts
        else:
            only = None

        client = self.getClient()
        query = f'''
            SELECT table_schema, table_name, column_name, data_type
            FROM `{schema}.INFORMATION_SCHEMA.COLUMNS`
            ORDER BY ordinal_position;
        '''

        schemas = set()

        client.runQuery(query, (), self.dialect)

        tables = defaultdict(list)
        for row in client.readResult():
            if only and not only(row.table_name):
                # doesn't match required fields
                continue
            schemas.add(row.table_schema)
            tables[row.table_schema, row.table_name].append(
                (row.column_name, row.data_type))

        to_insert = []
        for (_schema, table), columns in tables.items():
            if _schema.lower() != schema.lower():
                # wrong schema, we'll skip
                continue

            # Let's make sure the table is sane-ish with respect to
            # column names and counts.
            try:
                if not self._check_table(table, columns):
                    # for "skip" in fdw_colcount and "skip_table" in fdw_colnames
                    continue
            except FDWImportError:
                # for "error" cases in fdw_colnames and fdw_colcount
                return []

            # for non-error, trim, and trim_columns

            ftable = TableDefinition(table)
            ftable.options['schema'] = schema
            ftable.options['tablename'] = table

            for col, typ in columns:
                typ = DEFAULT_MAPPINGS.get(typ, "TEXT")
                ftable.columns.append(ColumnDefinition(col, type_name=typ))

            to_insert.append(ftable)
            if self.verbose:
                log_to_postgres(
                    "fdw importing table `" + schema + "." + table + "`",
                    WARNING)

        return to_insert
Ejemplo n.º 14
0
    def import_schema(self, schema, srv_options, options, restriction_type,
                      restricts):
        """Support for IMPORT FOREIGN SCHEMA"""
        ftable = TableDefinition(options['species'],
                                 schema=None,
                                 columns=[
                                     ColumnDefinition("chrom",
                                                      type_name="text"),
                                     ColumnDefinition("pos",
                                                      type_name="integer"),
                                     ColumnDefinition("id", type_name="text"),
                                     ColumnDefinition("ref", type_name="text"),
                                     ColumnDefinition("alt",
                                                      type_name="text[]"),
                                     ColumnDefinition("qual",
                                                      type_name="real"),
                                     ColumnDefinition("heterozygosity",
                                                      type_name="real"),
                                     ColumnDefinition("sample",
                                                      type_name="text"),
                                     ColumnDefinition("species",
                                                      type_name="text"),
                                     ColumnDefinition("info",
                                                      type_name="text"),
                                     ColumnDefinition("depth",
                                                      type_name="integer"),
                                     ColumnDefinition("genotype",
                                                      type_name="text"),
                                     ColumnDefinition("filter",
                                                      type_name="text"),
                                     ColumnDefinition("issnp",
                                                      type_name="boolean"),
                                     ColumnDefinition("issv",
                                                      type_name="boolean"),
                                     ColumnDefinition("isindel",
                                                      type_name="boolean"),
                                     ColumnDefinition("ismonomorphic",
                                                      type_name="boolean"),
                                     ColumnDefinition("isdeletion",
                                                      type_name="boolean"),
                                     ColumnDefinition("issvprecise",
                                                      type_name="boolean"),
                                     ColumnDefinition("istransition",
                                                      type_name="boolean"),
                                     ColumnDefinition("source",
                                                      type_name="text")
                                 ])

        ftable.options = options
        ftable.options['basedir'] = schema

        return [ftable]