Example #1
0
def add_argument(log,vector,parser):
	# From CSV file
	(source,target) = vector.split('2')
	#print vector
	#print (source,target)
	#sys.exit(0)
	if source.upper() in ('MYSQL') or target.upper() in ('MYSQL'):		
		parser.add_argument('-z','--mysql_client_home', type=str,  help='Mysql client home.')

	if source.upper() in ('ORA'):
		parser.add_argument('-f','--from_db', type=str,  help='From database.')		
		parser.add_argument('-c','--from_table', type=str,  help='From table.')		
		parser.add_argument('-e','--nls_date_format', type=str, default='DD-Mon-YYYY HH:MI:SS AM', help='nls_date_format for spool.')
		parser.add_argument('-m','--nls_time_format', type=str, default='DD-Mon-RR HH.MI.SSXFF AM', help='nls_time_format for spool.')
	elif source.upper() in ('CSV'):
		parser.add_argument('-i','--input_file',type=str,  help='Input CSV file.')
		parser.add_argument('-k','--skip_rows',type=int, default=0, help='Header size. Number of rows to skip in input file.')
		parser.add_argument('-y','--sample_size_pct', type=int,  default=1, help='Sample size in %% to extimate number of lines in input CSV file.')		
	elif source.upper() in ('SS'):
		parser.add_argument('-q','--query_sql_file',type=str,  help='Input file with query sql.')
		parser.add_argument('-j','--from_user',type=str,  help='SQL Server user')
		parser.add_argument('-x','--from_passwd',type=str,  help='SQL Server user password')
		parser.add_argument('-b','--from_db_name', type=str,  help='SQL Server database')
		parser.add_argument('-n','--from_db_server', type=str,  help='SQL Server instance name.')
		parser.add_argument('-i','--wait_limit_sec', type=str,  help='Secons to wait for spool file to become available.')
	elif source.upper() in ('MYSQL'):
		parser.add_argument('-q','--query_sql_file',type=str,  help='Input file with query sql.')
		parser.add_argument('-j','--from_user',type=str,  help='Mysql user')
		parser.add_argument('-x','--from_passwd',type=str,  help='Mysql user password')
		parser.add_argument('-b','--from_db_name', type=str,  help='Mysql database')
		parser.add_argument('-n','--from_db_server', type=str,  help='Mysql instance name.')

	else:
		raise CopyVectorError(vector)
	if target.upper() in ('ORA'):
		parser.add_argument('-g','--to_db', type=str,  help='To database.')
		parser.add_argument('-a','--to_table', type=str,  help='To table.')
		if not source.upper() in ('ORA'):
			parser.add_argument('-e','--nls_date_format', type=str, default='DD-Mon-YYYY HH:MI:SS AM', help='nls_date_format for spool.')
			parser.add_argument('-m','--nls_time_format', type=str, default='DD-Mon-RR HH.MI.SSXFF AM', help='nls_time_format for spool.')
	elif target.upper() in ('CSV'):
		parser.add_argument('-a','--to_file', type=str,  help='To file.')		
	elif target.upper() in ('SS'):
		parser.add_argument('-u','--to_user',type=str,  help='SQL Server user')
		parser.add_argument('-p','--to_passwd',type=str,  help='SQL Server user password')
		parser.add_argument('-d','--to_db_name', type=str,  help='SQL Server database')
		parser.add_argument('-s','--to_db_server', type=str,  help='SQL Server instance name.')
		parser.add_argument('-a','--to_table', type=str,  help='To table.')
	elif target.upper() in ('MYSQL'):
		parser.add_argument('-u','--to_user',type=str,  help='Target db user')
		parser.add_argument('-p','--to_passwd',type=str,  help='Target db user password')
		parser.add_argument('-d','--to_db_name', type=str,  help='Target database')
		parser.add_argument('-s','--to_db_server', type=str,  help='Target db instance name.')
		parser.add_argument('-a','--to_table', type=str,  help='Target table.')		
	else:
		raise CopyVectorError(vector)
Example #2
0
    def Prepare(self):
        vector = self.args.copy_vector
        log = self.log
        args = self.args
        uconf = self.uargs
        datadir = self.datadir
        (self.source, self.target) = vector.split(self.conf._to)
        (source, target) = (self.source, self.target)
        assert source.upper(
        ) in self.conf.dbs, 'Data source %s is not supported'
        ppl = {}
        from_ppl = None
        import all_spoolers as spoolers
        #db=source.upper()[:3]
        #spoolers.get_ppl(source)
        from_ppl = spoolers.get_ppl(source.upper())  #ppl[db]
        #print from_ppl
        assert from_ppl, 'Source pipeline is not set for "%s"' % source
        #self.toDb = to_ppl(log, datadir,self.conf,target.upper())

        if 0:
            if source.upper().startswith('ORA'):
                from pipeline.v101.from_oracle import FromOracle as from_ppl
            else:
                from pipeline.v101.from_db import FromDb as from_ppl
            self.fromDb = from_ppl(log, datadir, self.conf, db=source.upper())

        if target.upper() in ('CSV'):
            from pipeline.v101.to_csv import ToCSV as to_ppl
            self.toDb = to_ppl(self, log, args.to_file, datadir, self.conf)
        elif target.upper() in ('JSON'):
            from pipeline.v101.to_json import ToJSON as to_ppl
            self.toDb = to_ppl(self, log, args.to_file, datadir, self.conf)
        elif target.upper() in ('DDL'):
            from pipeline.v101.to_ddl import ToDDL as to_ppl
            self.toDb = to_ppl(self, log, args.to_file, datadir, self.conf)
        else:
            raise CopyVectorError(vector)
        self.fromDb = from_ppl(self,
                               log,
                               datadir,
                               self.conf,
                               db=source.upper())
Example #3
0
    def Prepare(self):

        vector = self.args.copy_vector
        datadir = self.datadir
        log = self.log
        args = self.args
        #assert os.path.isfile(args.input_file), 'Missing input file %s' % args.input_file
        (source, target) = vector.split(self.conf._to)

        ppl = {}
        if source.upper() in ('CSV'):

            self.no_sharded_load(source, target)

            from pipeline.v101.from_csv import FromCSV as from_ppl
            self.fromDb = from_ppl(log, args.input_files, args.skip_rows,
                                   datadir, self.conf)
        else:
            raise CopyVectorError(vector)
        to_ppl = None
        db = target.upper()[:3]
        import all_loaders as loaders
        to_ppl = loaders.get_ppl(target.upper())  #ppl[db]
        self.toDb = to_ppl(log, datadir, self.conf, target.upper())
Example #4
0
    def Prepare(self):
        vector = self.args.copy_vector
        log = self.log
        args = self.args
        uconf = self.uargs
        datadir = self.datadir
        (self.source, self.target) = vector.split(self.conf._to)
        (source, target) = (self.source, self.target)

        assert source.upper(
        ) in self.conf.dbs, 'Data source %s is not supported'
        ppl = {}
        to_ppl = None
        input_files = self.args.input_files
        if source.upper() in ('CSV'):
            from pipeline.v101.from_csv import FromCSV as from_ppl
            #pprint(dir(self.args))
            self.fromDb = from_ppl(self, log, input_files, 0, datadir,
                                   self.conf)
        elif source.upper() in ('JSON'):
            from pipeline.v101.from_json import FromJSON as from_ppl
            self.fromDb = from_ppl(self, log, input_files, datadir, self.conf)
        elif source.upper() in ('DDL'):
            from pipeline.v101.from_ddl import FromDDL as from_ppl
            self.fromDb = from_ppl(self, log, input_files, skip_rows, datadir,
                                   self.conf)
        else:
            raise CopyVectorError(vector)

        import all_loaders as loaders
        #db=source.upper()[:3]
        #spoolers.get_ppl(source)
        to_ppl = loaders.get_ppl(target.upper())  #ppl[db]
        #print from_ppl
        assert to_ppl, 'Target pipeline is not set for "%s"' % source
        self.toDb = to_ppl(self, log, datadir, self.conf, target.upper())
Example #5
0
    if source.upper() in ff:
        if target.upper() in 'MYSQL':
            # only serial load
            if args.pool_size > 1 or args.num_of_shards > 1:
                log.warn('Forcing serial load in MySQL.')
                args.pool_size = 1
                args.num_of_shards = 1
        from template.v101.load_from_file import load_from_file as etl_tmpl
        #from  template.v101.load_from_csv import load_from_csv as etl_tmpl
    elif target.upper() in ff:
        from template.v101.spool_to_file import spool_to_file as etl_tmpl
    elif target.upper() in dbs and source.upper() in dbs:
        from template.v101.spool_and_load import spool_and_load as etl_tmpl
    else:
        #log.error('Unsupported copy vector %s.' % copy_vector)
        raise CopyVectorError(copy_vector)

    #e(0)

    #python  datamule.py -w ora11g2csv -o 1 -r 1 -t "|" -c SCOTT.Timestamp_test_from -f SCOTT/tiger2@orcl -e "YYYY/MM/DD" -m "YYYY-MM-DD-HH24.MI.SS.FF" -O "YYYY-MM-DD-HH24:MI:SS.FF" -z "C:\app\alex_buz\product\11.2.0\dbhome_2\BIN"

    etl = etl_tmpl(log, datadir, conf)
    etl.print_copy_details()
    #print etl
    #e(0)

    if 1:
        var = raw_input("Are you sure you want to proceed?(y/n): ")
        #print 'you entered:'
        #pprint(var)
        #time.sleep(1)
Example #6
0
    def Prepare(self):
        vector = self.args.copy_vector
        log = self.log
        args = self.args
        uconf = self.uargs
        datadir = self.datadir
        (source, target) = vector.split(self.conf._to)
        assert source.upper(
        ) in self.conf.dbs, 'Data source %s is not supported'
        ppl = {}
        from_ppl = None
        import all_spoolers as spoolers
        #db=source.upper()[:3]
        #spoolers.get_ppl(source)
        from_ppl = spoolers.get_ppl(source.upper())  #ppl[db]
        #print from_ppl
        assert from_ppl, 'Source pipeline is not set for "%s"' % source
        #self.toDb = to_ppl(log, datadir,self.conf,target.upper())
        self.fromDb = from_ppl(log, datadir, self.conf, db=source.upper())

        if source.upper().startswith('ORA'):
            from pipeline.v101.from_oracle import FromOracle as from_ppl
        self.fromDb = from_ppl(log, datadir, self.conf, db=source.upper())
        if 0:
            if source.upper() in ('ORA11G', 'EXAD', 'ORAXE'):
                from pipeline.v101.from_oracle import FromOracle as from_ppl
                self.fromDb = from_ppl(log,
                                       args.from_db,
                                       args.from_table,
                                       datadir,
                                       self.conf,
                                       db=source.upper())
            elif source.upper() in ('SSENT', 'SSEXP'):
                from pipeline.v101.from_sqlserver import FromSQLServer as from_ppl
                self.from_db = (args.from_user, args.from_passwd,
                                args.from_db_name, args.from_db_server)
                self.fromDb = from_ppl(log,
                                       self.from_db,
                                       args.query_sql_file,
                                       datadir,
                                       self.conf,
                                       db=source.upper())
            elif source.upper() in ('PGRES'):
                from pipeline.v101.from_postgresql import FromPostgreSQL as from_ppl
                self.from_db = (args.from_user, args.from_passwd,
                                args.from_db_name, args.from_db_server,
                                args.source_port)
                self.fromDb = from_ppl(log,
                                       self.from_db,
                                       args.query_sql_file,
                                       datadir,
                                       self.conf,
                                       db=source.upper())
            elif source.upper() in ('SYANY', 'SYASE', 'SYIQ'):
                from pipeline.v101.from_sybasesqlanywhere import FromSybaseSQLAnywhere as from_ppl
                self.from_db = (args.from_user, args.from_passwd,
                                args.from_db_name, args.from_db_server)
                self.fromDb = from_ppl(log,
                                       self.from_db,
                                       args.query_sql_file,
                                       datadir,
                                       self.conf,
                                       db=source.upper())
            elif source.upper() in ('TTEN'):
                from pipeline.v101.from_timesten import FromTimesTen as from_ppl
                self.from_db = (args.from_user, args.from_passwd,
                                args.from_DSN_name)
                self.fromDb = from_ppl(log,
                                       self.from_db,
                                       args.from_table,
                                       datadir,
                                       self.conf,
                                       db=source.upper())
            elif source.upper() in ('DBTAES', 'DBTES', 'DBTAWS', 'DBTWS',
                                    'DBTE', 'DBTEC', 'DBTDE'):
                from pipeline.v101.from_db2udb import FromDb2UDB as from_ppl
                self.from_db = (args.from_user, args.from_passwd,
                                args.from_db_name, args.from_db_server)
                self.fromDb = from_ppl(log,
                                       self.from_db,
                                       args.query_sql_file,
                                       datadir,
                                       self.conf,
                                       db=source.upper())
            elif source.upper() in ('INFOR', 'INFORC'):
                from pipeline.v101.from_informix import FromInformix as from_ppl
                self.from_db = (args.from_user, args.from_passwd,
                                args.from_db_name, args.from_db_server)
                self.fromDb = from_ppl(log,
                                       self.from_db,
                                       args.query_sql_file,
                                       datadir,
                                       self.conf,
                                       db=source.upper())
            elif source.upper() in ('MYSQL', 'INFOB', 'MARIA'):
                from pipeline.v101.from_mysql import FromMysql as from_ppl
                self.from_db = (args.from_user, args.from_passwd,
                                args.from_db_name, args.from_db_server)
                self.fromDb = from_ppl(log,
                                       self.from_db,
                                       args.query_sql_file,
                                       datadir,
                                       self.conf,
                                       db=source.upper())
            elif source.upper() in ('SLITE'):
                from pipeline.v101.from_sqllite import FromSQLLite as from_ppl
                self.from_db = (args.from_db_name)
                self.fromDb = from_ppl(log,
                                       self.from_db,
                                       args.query_sql_file,
                                       datadir,
                                       self.conf,
                                       db=source.upper())
            else:
                raise CopyVectorError(vector)

        if target.upper() in ('CSV'):
            from pipeline.v101.to_csv import ToCSV as to_ppl
            self.toDb = to_ppl(log, args.to_file, datadir, self.conf)
        else:
            raise CopyVectorError(vector)