def add_argument(log,vector,parser): # From CSV file (source,target) = vector.split('2') #print vector #print (source,target) #sys.exit(0) if source.upper() in ('MYSQL') or target.upper() in ('MYSQL'): parser.add_argument('-z','--mysql_client_home', type=str, help='Mysql client home.') if source.upper() in ('ORA'): parser.add_argument('-f','--from_db', type=str, help='From database.') parser.add_argument('-c','--from_table', type=str, help='From table.') parser.add_argument('-e','--nls_date_format', type=str, default='DD-Mon-YYYY HH:MI:SS AM', help='nls_date_format for spool.') parser.add_argument('-m','--nls_time_format', type=str, default='DD-Mon-RR HH.MI.SSXFF AM', help='nls_time_format for spool.') elif source.upper() in ('CSV'): parser.add_argument('-i','--input_file',type=str, help='Input CSV file.') parser.add_argument('-k','--skip_rows',type=int, default=0, help='Header size. Number of rows to skip in input file.') parser.add_argument('-y','--sample_size_pct', type=int, default=1, help='Sample size in %% to extimate number of lines in input CSV file.') elif source.upper() in ('SS'): parser.add_argument('-q','--query_sql_file',type=str, help='Input file with query sql.') parser.add_argument('-j','--from_user',type=str, help='SQL Server user') parser.add_argument('-x','--from_passwd',type=str, help='SQL Server user password') parser.add_argument('-b','--from_db_name', type=str, help='SQL Server database') parser.add_argument('-n','--from_db_server', type=str, help='SQL Server instance name.') parser.add_argument('-i','--wait_limit_sec', type=str, help='Secons to wait for spool file to become available.') elif source.upper() in ('MYSQL'): parser.add_argument('-q','--query_sql_file',type=str, help='Input file with query sql.') parser.add_argument('-j','--from_user',type=str, help='Mysql user') parser.add_argument('-x','--from_passwd',type=str, help='Mysql user password') parser.add_argument('-b','--from_db_name', type=str, help='Mysql database') parser.add_argument('-n','--from_db_server', type=str, help='Mysql instance name.') else: raise CopyVectorError(vector) if target.upper() in ('ORA'): parser.add_argument('-g','--to_db', type=str, help='To database.') parser.add_argument('-a','--to_table', type=str, help='To table.') if not source.upper() in ('ORA'): parser.add_argument('-e','--nls_date_format', type=str, default='DD-Mon-YYYY HH:MI:SS AM', help='nls_date_format for spool.') parser.add_argument('-m','--nls_time_format', type=str, default='DD-Mon-RR HH.MI.SSXFF AM', help='nls_time_format for spool.') elif target.upper() in ('CSV'): parser.add_argument('-a','--to_file', type=str, help='To file.') elif target.upper() in ('SS'): parser.add_argument('-u','--to_user',type=str, help='SQL Server user') parser.add_argument('-p','--to_passwd',type=str, help='SQL Server user password') parser.add_argument('-d','--to_db_name', type=str, help='SQL Server database') parser.add_argument('-s','--to_db_server', type=str, help='SQL Server instance name.') parser.add_argument('-a','--to_table', type=str, help='To table.') elif target.upper() in ('MYSQL'): parser.add_argument('-u','--to_user',type=str, help='Target db user') parser.add_argument('-p','--to_passwd',type=str, help='Target db user password') parser.add_argument('-d','--to_db_name', type=str, help='Target database') parser.add_argument('-s','--to_db_server', type=str, help='Target db instance name.') parser.add_argument('-a','--to_table', type=str, help='Target table.') else: raise CopyVectorError(vector)
def Prepare(self): vector = self.args.copy_vector log = self.log args = self.args uconf = self.uargs datadir = self.datadir (self.source, self.target) = vector.split(self.conf._to) (source, target) = (self.source, self.target) assert source.upper( ) in self.conf.dbs, 'Data source %s is not supported' ppl = {} from_ppl = None import all_spoolers as spoolers #db=source.upper()[:3] #spoolers.get_ppl(source) from_ppl = spoolers.get_ppl(source.upper()) #ppl[db] #print from_ppl assert from_ppl, 'Source pipeline is not set for "%s"' % source #self.toDb = to_ppl(log, datadir,self.conf,target.upper()) if 0: if source.upper().startswith('ORA'): from pipeline.v101.from_oracle import FromOracle as from_ppl else: from pipeline.v101.from_db import FromDb as from_ppl self.fromDb = from_ppl(log, datadir, self.conf, db=source.upper()) if target.upper() in ('CSV'): from pipeline.v101.to_csv import ToCSV as to_ppl self.toDb = to_ppl(self, log, args.to_file, datadir, self.conf) elif target.upper() in ('JSON'): from pipeline.v101.to_json import ToJSON as to_ppl self.toDb = to_ppl(self, log, args.to_file, datadir, self.conf) elif target.upper() in ('DDL'): from pipeline.v101.to_ddl import ToDDL as to_ppl self.toDb = to_ppl(self, log, args.to_file, datadir, self.conf) else: raise CopyVectorError(vector) self.fromDb = from_ppl(self, log, datadir, self.conf, db=source.upper())
def Prepare(self): vector = self.args.copy_vector datadir = self.datadir log = self.log args = self.args #assert os.path.isfile(args.input_file), 'Missing input file %s' % args.input_file (source, target) = vector.split(self.conf._to) ppl = {} if source.upper() in ('CSV'): self.no_sharded_load(source, target) from pipeline.v101.from_csv import FromCSV as from_ppl self.fromDb = from_ppl(log, args.input_files, args.skip_rows, datadir, self.conf) else: raise CopyVectorError(vector) to_ppl = None db = target.upper()[:3] import all_loaders as loaders to_ppl = loaders.get_ppl(target.upper()) #ppl[db] self.toDb = to_ppl(log, datadir, self.conf, target.upper())
def Prepare(self): vector = self.args.copy_vector log = self.log args = self.args uconf = self.uargs datadir = self.datadir (self.source, self.target) = vector.split(self.conf._to) (source, target) = (self.source, self.target) assert source.upper( ) in self.conf.dbs, 'Data source %s is not supported' ppl = {} to_ppl = None input_files = self.args.input_files if source.upper() in ('CSV'): from pipeline.v101.from_csv import FromCSV as from_ppl #pprint(dir(self.args)) self.fromDb = from_ppl(self, log, input_files, 0, datadir, self.conf) elif source.upper() in ('JSON'): from pipeline.v101.from_json import FromJSON as from_ppl self.fromDb = from_ppl(self, log, input_files, datadir, self.conf) elif source.upper() in ('DDL'): from pipeline.v101.from_ddl import FromDDL as from_ppl self.fromDb = from_ppl(self, log, input_files, skip_rows, datadir, self.conf) else: raise CopyVectorError(vector) import all_loaders as loaders #db=source.upper()[:3] #spoolers.get_ppl(source) to_ppl = loaders.get_ppl(target.upper()) #ppl[db] #print from_ppl assert to_ppl, 'Target pipeline is not set for "%s"' % source self.toDb = to_ppl(self, log, datadir, self.conf, target.upper())
if source.upper() in ff: if target.upper() in 'MYSQL': # only serial load if args.pool_size > 1 or args.num_of_shards > 1: log.warn('Forcing serial load in MySQL.') args.pool_size = 1 args.num_of_shards = 1 from template.v101.load_from_file import load_from_file as etl_tmpl #from template.v101.load_from_csv import load_from_csv as etl_tmpl elif target.upper() in ff: from template.v101.spool_to_file import spool_to_file as etl_tmpl elif target.upper() in dbs and source.upper() in dbs: from template.v101.spool_and_load import spool_and_load as etl_tmpl else: #log.error('Unsupported copy vector %s.' % copy_vector) raise CopyVectorError(copy_vector) #e(0) #python datamule.py -w ora11g2csv -o 1 -r 1 -t "|" -c SCOTT.Timestamp_test_from -f SCOTT/tiger2@orcl -e "YYYY/MM/DD" -m "YYYY-MM-DD-HH24.MI.SS.FF" -O "YYYY-MM-DD-HH24:MI:SS.FF" -z "C:\app\alex_buz\product\11.2.0\dbhome_2\BIN" etl = etl_tmpl(log, datadir, conf) etl.print_copy_details() #print etl #e(0) if 1: var = raw_input("Are you sure you want to proceed?(y/n): ") #print 'you entered:' #pprint(var) #time.sleep(1)
def Prepare(self): vector = self.args.copy_vector log = self.log args = self.args uconf = self.uargs datadir = self.datadir (source, target) = vector.split(self.conf._to) assert source.upper( ) in self.conf.dbs, 'Data source %s is not supported' ppl = {} from_ppl = None import all_spoolers as spoolers #db=source.upper()[:3] #spoolers.get_ppl(source) from_ppl = spoolers.get_ppl(source.upper()) #ppl[db] #print from_ppl assert from_ppl, 'Source pipeline is not set for "%s"' % source #self.toDb = to_ppl(log, datadir,self.conf,target.upper()) self.fromDb = from_ppl(log, datadir, self.conf, db=source.upper()) if source.upper().startswith('ORA'): from pipeline.v101.from_oracle import FromOracle as from_ppl self.fromDb = from_ppl(log, datadir, self.conf, db=source.upper()) if 0: if source.upper() in ('ORA11G', 'EXAD', 'ORAXE'): from pipeline.v101.from_oracle import FromOracle as from_ppl self.fromDb = from_ppl(log, args.from_db, args.from_table, datadir, self.conf, db=source.upper()) elif source.upper() in ('SSENT', 'SSEXP'): from pipeline.v101.from_sqlserver import FromSQLServer as from_ppl self.from_db = (args.from_user, args.from_passwd, args.from_db_name, args.from_db_server) self.fromDb = from_ppl(log, self.from_db, args.query_sql_file, datadir, self.conf, db=source.upper()) elif source.upper() in ('PGRES'): from pipeline.v101.from_postgresql import FromPostgreSQL as from_ppl self.from_db = (args.from_user, args.from_passwd, args.from_db_name, args.from_db_server, args.source_port) self.fromDb = from_ppl(log, self.from_db, args.query_sql_file, datadir, self.conf, db=source.upper()) elif source.upper() in ('SYANY', 'SYASE', 'SYIQ'): from pipeline.v101.from_sybasesqlanywhere import FromSybaseSQLAnywhere as from_ppl self.from_db = (args.from_user, args.from_passwd, args.from_db_name, args.from_db_server) self.fromDb = from_ppl(log, self.from_db, args.query_sql_file, datadir, self.conf, db=source.upper()) elif source.upper() in ('TTEN'): from pipeline.v101.from_timesten import FromTimesTen as from_ppl self.from_db = (args.from_user, args.from_passwd, args.from_DSN_name) self.fromDb = from_ppl(log, self.from_db, args.from_table, datadir, self.conf, db=source.upper()) elif source.upper() in ('DBTAES', 'DBTES', 'DBTAWS', 'DBTWS', 'DBTE', 'DBTEC', 'DBTDE'): from pipeline.v101.from_db2udb import FromDb2UDB as from_ppl self.from_db = (args.from_user, args.from_passwd, args.from_db_name, args.from_db_server) self.fromDb = from_ppl(log, self.from_db, args.query_sql_file, datadir, self.conf, db=source.upper()) elif source.upper() in ('INFOR', 'INFORC'): from pipeline.v101.from_informix import FromInformix as from_ppl self.from_db = (args.from_user, args.from_passwd, args.from_db_name, args.from_db_server) self.fromDb = from_ppl(log, self.from_db, args.query_sql_file, datadir, self.conf, db=source.upper()) elif source.upper() in ('MYSQL', 'INFOB', 'MARIA'): from pipeline.v101.from_mysql import FromMysql as from_ppl self.from_db = (args.from_user, args.from_passwd, args.from_db_name, args.from_db_server) self.fromDb = from_ppl(log, self.from_db, args.query_sql_file, datadir, self.conf, db=source.upper()) elif source.upper() in ('SLITE'): from pipeline.v101.from_sqllite import FromSQLLite as from_ppl self.from_db = (args.from_db_name) self.fromDb = from_ppl(log, self.from_db, args.query_sql_file, datadir, self.conf, db=source.upper()) else: raise CopyVectorError(vector) if target.upper() in ('CSV'): from pipeline.v101.to_csv import ToCSV as to_ppl self.toDb = to_ppl(log, args.to_file, datadir, self.conf) else: raise CopyVectorError(vector)