def _connection_type(self): """ Finds out if the migration can be done directly via the ``EXPORT`` in Vertica, or if data needs to be loaded via odbc. :return: A ``str`` stating 'direct' or 'odbc'. """ details = connection_details(self._target) connect = ("CONNECT TO VERTICA {db} USER {user} " "PASSWORD '{pwd}' ON '{host}',5433".format( db=details['db'], user=self._kwargs.get('target_user'), host=self._kwargs.get('target_host'), pwd=self._kwargs.get('target_pwd'))) try: self._source.execute(connect) self._target.execute( 'CREATE GLOBAL TEMPORARY TABLE tmp_connect (test VARCHAR(42)) ' 'ON COMMIT DELETE ROWS') self._source.execute( 'EXPORT TO VERTICA {db}.tmp_connect AS SELECT * ' 'FROM v_catalog.dual'.format(db=details['db'])) self._target.execute('DROP TABLE tmp_connect') return 'direct' except: return 'odbc'
def _get_ddls(self, objects=[]): """ Query the source vertica to get the DDLs as a big string, using the ``EXPORT_OBJECTS`` SQL function. It happens that this function returns ``None`` from odbc. In that case vsql is used, and the ``source_pwd`` parameter becomes useful. :return: A ``str`` containg the DDLs. """ logger.info('Getting DDLs...') export_sql = "SELECT EXPORT_OBJECTS('', '{0}', False)".format( ','.join(objects)) # I often have a segfault when running this, so let's fallback # by default # from_db = self._source.execute(export_sql).fetchone() from_db = None if from_db is None: logger.warning('Exporting object is done via vsql') details = connection_details(self._source_con) err = 'Unknown error' try: pr = subprocess.Popen( [ '/opt/vertica/bin/vsql', '-U', self._kwargs.get('source_user'), '-h', self._kwargs.get('source_host'), '-d', details['db'], '-t', # rows only '-w', self._kwargs.get('source_pwd'), '-c', export_sql ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) ddls, err = pr.communicate() except CalledProcessError as e: raise VerticaMigratorError(""" Could not use vsql to get ddls: {0}. Output was: {1} """.format(e, err)) else: logger.info('From export_objects') ddls = from_db[0] return ddls
def _get_ddls(self, objects=[]): """ Query the source vertica to get the DDLs as a big string, using the ``EXPORT_OBJECTS`` SQL function. It happens that this function returns ``None`` from odbc. In that case vsql is used, and the ``source_pwd`` parameter becomes useful. :return: A ``str`` containg the DDLs. """ logger.info('Getting DDLs...') export_sql = "SELECT EXPORT_OBJECTS('', '{0}', False)".format( ','.join(objects)) # I often have a segfault when running this, so let's fallback # by default # from_db = self._source.execute(export_sql).fetchone() from_db = None if from_db is None: logger.warning('Exporting object is done via vsql') details = connection_details(self._source_con) err = 'Unknown error' try: pr = subprocess.Popen([ '/opt/vertica/bin/vsql', '-U', self._kwargs.get('source_user'), '-h', self._kwargs.get('source_host'), '-d', details['db'], '-t', # rows only '-w', self._kwargs.get('source_pwd'), '-c', export_sql ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) ddls, err = pr.communicate() except CalledProcessError as e: raise VerticaMigratorError(""" Could not use vsql to get ddls: {0}. Output was: {1} """.format(e, err)) else: logger.info('From export_objects') ddls = from_db[0] return ddls
def _connection_type(self): """ Finds out if the migration can be done directly via the ``EXPORT`` in Vertica, or if data needs to be loaded via odbc. :return: A ``str`` stating 'direct' or 'odbc'. """ details = connection_details(self._target) connect = ( "CONNECT TO VERTICA {db} USER {user} " "PASSWORD '{pwd}' ON '{host}',5433".format( db=details['db'], user=self._kwargs.get('target_user'), host=self._kwargs.get('target_host'), pwd=self._kwargs.get('target_pwd') ) ) try: self._source.execute(connect) self._target.execute( 'CREATE GLOBAL TEMPORARY TABLE tmp_connect (test VARCHAR(42)) ' 'ON COMMIT DELETE ROWS' ) self._source.execute( 'EXPORT TO VERTICA {db}.connect_tst AS SELECT * ' 'FROM v_catalog.dual'.format( db=details['db'] ) ) self._target.execute('DROP TABLE tmp_connect') return 'direct' except: return 'odbc'
def migrate_data(self, objects): """ Migrate data. :param objects: A ``list`` of objects to migrate. """ logger.warning('Starting migrating data.') con_type = self._connection_type() logger.warning('Connection type: {t}'.format(t=con_type)) # used if we are direct, cannot hurt otherwise, and save a lot of # queries if done now instead of inside _migrate_table target_details = connection_details(self._target) tables = self._get_table_list(self._source, objects) done_tbl = 0 errors = [] nbrows = 0 try: while len(tables) > 0: table = tables.pop(0) tname = '{s}.{t}'.format(s=table[0], t=table[1]) done_tbl += 1 logging.info('Exporting data of {t}'.format(t=tname)) try: nbrows = self._migrate_table( con_type, tname, target_details) except pyodbc.ProgrammingError as e: errors.append(tname) logger.error('Something went wrong during data copy for ' 'table {t}. Waiting 2 minutes to resume'.format( t=tname)) logger.error("{c}: {t}".format(c=e.args[0], t=e.args[1])) # wait a few minutes in case the cluster comes back to life time.sleep(120) logger.info('{d} tables done ({r} exportes), {td} todo'.format( d=done_tbl, td=len(tables), r=nbrows)) except Exception as e: logger.error('Something went very wrong during data copy ' 'for table {t}.'.format(t=tname)) errors.append(tname) for t in tables: errors.append('{s}.{t} '.format(s=t[0], t=t[1])) logger.error('Missing tables:') logger.error(' '.join(errors)) # re-raise last exception raise wouldhavebeen = '' if self._commit else 'would have been with --commit' logger.warning('All data {0} exported.'.format(wouldhavebeen)) if len(errors) > 0: logger.error('Missing tables:') logger.error(' '.join(errors)) if con_type == 'direct': self._source.execute('DISCONNECT {db}'.format( db=target_details['db']))
def migrate_data(self, objects): """ Migrate data. :param objects: A ``list`` of objects to migrate. """ logger.warning('Starting migrating data.') con_type = self._connection_type() logger.warning('Connection type: {t}'.format(t=con_type)) # used if we are direct, cannot hurt otherwise, and save a lot of # queries if done now instead of inside _migrate_table target_details = connection_details(self._target) tables = self._get_table_list(self._source, objects) done_tbl = 0 errors = [] nbrows = 0 try: while len(tables) > 0: table = tables.pop(0) tname = '{s}.{t}'.format(s=table[0], t=table[1]) done_tbl += 1 logging.info('Exporting data of {t}'.format(t=tname)) try: nbrows = self._migrate_table( con_type, tname, target_details) except pyodbc.ProgrammingError as e: errors.append(tname) logger.error('Something went wrong during ' 'data copy for table {t}. Waiting 2 ' 'minutes to resume'.format(t=tname)) logger.error("{c}: {t}".format(c=e.args[0], t=e.args[1])) # wait a few minutes in case the cluster comes back to life time.sleep(120) logger.info('{d} tables done ({r} exportes), {td} todo'.format( d=done_tbl, td=len(tables), r=nbrows)) except Exception as e: logger.error('Something went very wrong during data copy ' 'for table {t}.'.format(t=tname)) errors.append(tname) for t in tables: errors.append('{s}.{t} '.format(s=t[0], t=t[1])) logger.error('Missing tables:') logger.error(' '.join(errors)) # re-raise last exception raise wouldhavebeen = '' if self._commit else 'would have been with --commit' logger.warning('All data {0} exported.'.format(wouldhavebeen)) if len(errors) > 0: logger.error('Missing tables:') logger.error(' '.join(errors)) if con_type == 'direct': self._source.execute('DISCONNECT {db}'.format( db=target_details['db']))