Ejemplo n.º 1
0
    def _connection_type(self):
        """
        Finds out if the migration can be done directly via the ``EXPORT`` in
        Vertica, or if data needs to be loaded via odbc.

        :return:
            A ``str`` stating 'direct' or 'odbc'.
        """
        details = connection_details(self._target)

        connect = ("CONNECT TO VERTICA {db} USER {user} "
                   "PASSWORD '{pwd}' ON '{host}',5433".format(
                       db=details['db'],
                       user=self._kwargs.get('target_user'),
                       host=self._kwargs.get('target_host'),
                       pwd=self._kwargs.get('target_pwd')))

        try:
            self._source.execute(connect)
            self._target.execute(
                'CREATE GLOBAL TEMPORARY TABLE tmp_connect (test VARCHAR(42)) '
                'ON COMMIT DELETE ROWS')
            self._source.execute(
                'EXPORT TO VERTICA {db}.tmp_connect AS SELECT * '
                'FROM v_catalog.dual'.format(db=details['db']))
            self._target.execute('DROP TABLE tmp_connect')

            return 'direct'
        except:
            return 'odbc'
Ejemplo n.º 2
0
    def _get_ddls(self, objects=[]):
        """
        Query the source vertica to get the DDLs as a big string, using the
        ``EXPORT_OBJECTS`` SQL function.

        It happens that this function returns ``None`` from odbc. In that case
        vsql is used, and the ``source_pwd`` parameter becomes useful.

        :return:
            A ``str`` containg the DDLs.
        """
        logger.info('Getting DDLs...')
        export_sql = "SELECT EXPORT_OBJECTS('', '{0}', False)".format(
            ','.join(objects))

        # I often have a segfault when running this, so let's fallback
        # by default
        # from_db = self._source.execute(export_sql).fetchone()
        from_db = None
        if from_db is None:
            logger.warning('Exporting object is done via vsql')
            details = connection_details(self._source_con)

            err = 'Unknown error'
            try:
                pr = subprocess.Popen(
                    [
                        '/opt/vertica/bin/vsql',
                        '-U',
                        self._kwargs.get('source_user'),
                        '-h',
                        self._kwargs.get('source_host'),
                        '-d',
                        details['db'],
                        '-t',  # rows only
                        '-w',
                        self._kwargs.get('source_pwd'),
                        '-c',
                        export_sql
                    ],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE)
                ddls, err = pr.communicate()
            except CalledProcessError as e:
                raise VerticaMigratorError("""
                    Could not use vsql to get ddls: {0}.
                    Output was: {1}
                    """.format(e, err))
        else:
            logger.info('From export_objects')
            ddls = from_db[0]

        return ddls
Ejemplo n.º 3
0
    def _get_ddls(self, objects=[]):
        """
        Query the source vertica to get the DDLs as a big string, using the
        ``EXPORT_OBJECTS`` SQL function.

        It happens that this function returns ``None`` from odbc. In that case
        vsql is used, and the ``source_pwd`` parameter becomes useful.

        :return:
            A ``str`` containg the DDLs.
        """
        logger.info('Getting DDLs...')
        export_sql = "SELECT EXPORT_OBJECTS('', '{0}', False)".format(
            ','.join(objects))

        # I often have a segfault when running this, so let's fallback
        # by default
        # from_db = self._source.execute(export_sql).fetchone()
        from_db = None
        if from_db is None:
            logger.warning('Exporting object is done via vsql')
            details = connection_details(self._source_con)

            err = 'Unknown error'
            try:
                pr = subprocess.Popen([
                    '/opt/vertica/bin/vsql',
                    '-U', self._kwargs.get('source_user'),
                    '-h', self._kwargs.get('source_host'),
                    '-d', details['db'],
                    '-t',  # rows only
                    '-w',  self._kwargs.get('source_pwd'),
                    '-c',  export_sql
                ],  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                ddls, err = pr.communicate()
            except CalledProcessError as e:
                raise VerticaMigratorError("""
                    Could not use vsql to get ddls: {0}.
                    Output was: {1}
                    """.format(e, err))
        else:
            logger.info('From export_objects')
            ddls = from_db[0]

        return ddls
Ejemplo n.º 4
0
    def _connection_type(self):
        """
        Finds out if the migration can be done directly via the ``EXPORT`` in
        Vertica, or if data needs to be loaded via odbc.

        :return:
            A ``str`` stating 'direct' or 'odbc'.
        """
        details = connection_details(self._target)

        connect = (
            "CONNECT TO VERTICA {db} USER {user} "
            "PASSWORD '{pwd}' ON '{host}',5433".format(
                db=details['db'],
                user=self._kwargs.get('target_user'),
                host=self._kwargs.get('target_host'),
                pwd=self._kwargs.get('target_pwd')
            )
        )

        try:
            self._source.execute(connect)
            self._target.execute(
                'CREATE GLOBAL TEMPORARY TABLE tmp_connect (test VARCHAR(42)) '
                'ON COMMIT DELETE ROWS'
            )
            self._source.execute(
                'EXPORT TO VERTICA {db}.connect_tst AS SELECT * '
                'FROM v_catalog.dual'.format(
                    db=details['db']
                )
            )
            self._target.execute('DROP TABLE tmp_connect')

            return 'direct'
        except:
            return 'odbc'
Ejemplo n.º 5
0
    def migrate_data(self, objects):
        """
        Migrate data.

        :param objects:
            A ``list`` of objects to migrate.
        """
        logger.warning('Starting migrating data.')

        con_type = self._connection_type()
        logger.warning('Connection type: {t}'.format(t=con_type))

        # used if we are direct, cannot hurt otherwise, and save a lot of
        # queries if done now instead of inside _migrate_table
        target_details = connection_details(self._target)

        tables = self._get_table_list(self._source, objects)
        done_tbl = 0
        errors = []
        nbrows = 0

        try:
            while len(tables) > 0:
                table = tables.pop(0)
                tname = '{s}.{t}'.format(s=table[0], t=table[1])
                done_tbl += 1
                logging.info('Exporting data of {t}'.format(t=tname))

                try:
                    nbrows = self._migrate_table(
                        con_type, tname, target_details)
                except pyodbc.ProgrammingError as e:
                    errors.append(tname)
                    logger.error('Something went wrong during data copy for '
                        'table {t}. Waiting 2 minutes to resume'.format(
                            t=tname))
                    logger.error("{c}: {t}".format(c=e.args[0], t=e.args[1]))
                    # wait a few minutes in case the cluster comes back to life
                    time.sleep(120)

                logger.info('{d} tables done ({r} exportes), {td} todo'.format(
                    d=done_tbl, td=len(tables), r=nbrows))

        except Exception as e:
            logger.error('Something went very wrong during data copy '
                'for table {t}.'.format(t=tname))
            errors.append(tname)
            for t in tables:
                errors.append('{s}.{t} '.format(s=t[0], t=t[1]))
            logger.error('Missing tables:')
            logger.error(' '.join(errors))
            # re-raise last exception
            raise

        wouldhavebeen = '' if self._commit else 'would have been with --commit'
        logger.warning('All data {0} exported.'.format(wouldhavebeen))

        if len(errors) > 0:
            logger.error('Missing tables:')
            logger.error(' '.join(errors))

        if con_type == 'direct':
            self._source.execute('DISCONNECT {db}'.format(
                db=target_details['db']))
Ejemplo n.º 6
0
    def migrate_data(self, objects):
        """
        Migrate data.

        :param objects:
            A ``list`` of objects to migrate.
        """
        logger.warning('Starting migrating data.')

        con_type = self._connection_type()
        logger.warning('Connection type: {t}'.format(t=con_type))

        # used if we are direct, cannot hurt otherwise, and save a lot of
        # queries if done now instead of inside _migrate_table
        target_details = connection_details(self._target)

        tables = self._get_table_list(self._source, objects)
        done_tbl = 0
        errors = []
        nbrows = 0

        try:
            while len(tables) > 0:
                table = tables.pop(0)
                tname = '{s}.{t}'.format(s=table[0], t=table[1])
                done_tbl += 1
                logging.info('Exporting data of {t}'.format(t=tname))

                try:
                    nbrows = self._migrate_table(
                        con_type, tname, target_details)
                except pyodbc.ProgrammingError as e:
                    errors.append(tname)
                    logger.error('Something went wrong during '
                                 'data copy for table {t}. Waiting 2 '
                                 'minutes to resume'.format(t=tname))
                    logger.error("{c}: {t}".format(c=e.args[0], t=e.args[1]))
                    # wait a few minutes in case the cluster comes back to life
                    time.sleep(120)

                logger.info('{d} tables done ({r} exportes), {td} todo'.format(
                    d=done_tbl, td=len(tables), r=nbrows))

        except Exception as e:
            logger.error('Something went very wrong during data copy '
                         'for table {t}.'.format(t=tname))
            errors.append(tname)
            for t in tables:
                errors.append('{s}.{t} '.format(s=t[0], t=t[1]))
            logger.error('Missing tables:')
            logger.error(' '.join(errors))
            # re-raise last exception
            raise

        wouldhavebeen = '' if self._commit else 'would have been with --commit'
        logger.warning('All data {0} exported.'.format(wouldhavebeen))

        if len(errors) > 0:
            logger.error('Missing tables:')
            logger.error(' '.join(errors))

        if con_type == 'direct':
            self._source.execute('DISCONNECT {db}'.format(
                db=target_details['db']))