def test_query(self): args = [ '--query', 'select m.usda_id, avg(i.sepal_length) as mean_sepal_length from iris as i join irismeta as m on (i.species = m.species) group by m.species', 'examples/iris.csv', 'examples/irismeta.csv' ] output_file = six.StringIO() input_file = six.StringIO("a,b,c\n1,2,3\n") with stdin_as_string(input_file): utility = CSVSQL(args, output_file) utility.main() sql = output_file.getvalue() if six.PY2: self.assertTrue('usda_id,mean_sepal_length' in sql) self.assertTrue('IRSE,5.006' in sql) self.assertTrue('IRVE2,5.936' in sql) self.assertTrue('IRVI,6.588' in sql) else: self.assertTrue('usda_id,mean_sepal_length' in sql) self.assertTrue('IRSE,5.005' in sql) self.assertTrue('IRVE2,5.936' in sql) self.assertTrue('IRVI,6.587' in sql)
def test_table_argument(self): args = ['--table', 'foo', 'file1.csv', 'file2.csv'] output_file = StringIO() utility = CSVSQL(args, output_file) with stderr_as_stdout(): self.assertRaises(SystemExit, utility.main)
def create_db(cls): sys.stdin.close() utility = CSVSQL() utility.args.connection_string = "sqlite:///join.db" utility.args.insert = True utility.args.query = None utility.args.input_paths = ["examples/join_table.csv"] utility.main() utility.args.input_paths = ["examples/join_addr.csv"] utility.main()
def test_empty_with_query(self): input_file = six.StringIO() with stdin_as_string(input_file): output_file = six.StringIO() utility = CSVSQL(['--query', 'select 1'], output_file) utility.run() output_file.close() input_file.close()
def csvsql(self, csv_file): """ Load test data into the DB and return it as a string for comparison. """ args = ['--db', "sqlite:///" + self.db_file, '--table', 'foo', '--insert', csv_file] utility = CSVSQL(args) utility.main() return open(csv_file, 'r').read().strip()
def test_no_header_row(self): args = ['--table', 'foo', '--no-header-row', 'examples/no_header_row.csv'] output_file = StringIO() utility = CSVSQL(args, output_file) utility.main() sql = output_file.getvalue() self.assertTrue('CREATE TABLE foo' in sql) self.assertTrue('column1 INTEGER NOT NULL' in sql) self.assertTrue('column2 INTEGER NOT NULL' in sql) self.assertTrue('column3 INTEGER NOT NULL' in sql)
def test_stdin_and_filename(self): args = ['examples/dummy.csv'] output_file = six.StringIO() input_file = six.StringIO("a,b,c\n1,2,3\n") with stdin_as_string(input_file): utility = CSVSQL(args, output_file) utility.main() sql = output_file.getvalue() self.assertTrue('CREATE TABLE stdin' in sql) self.assertTrue('CREATE TABLE dummy' in sql)
def getHiveSQL(args): #args = ['--table', 'customer', '/home/ranjan/reb_repository/customer.csv'] output_file = six.StringIO() utility = CSVSQL(args, output_file) utility.main() sql = output_file.getvalue() dropsql = "DROP TABLE IF EXISTS %s" % (args[1]) createsql = sql[:-2] + " row format delimited fields terminated by ','" loadsql = "LOAD DATA LOCAL INPATH '/repository/%s.csv' OVERWRITE INTO TABLE %s" % ( args[1], args[1]) # print dropsql # print createsql # print loadsql extn_fields = createsql[createsql.index("(") + 1:createsql.index(") row")] # print extn_fields hdp_extn_sql = """ drop server if exists fdw_hadoop_srv cascade; create server fdw_hadoop_srv foreign data wrapper multicorn options( wrapper 'rebataur.HadoopFDW' ); drop foreign table if exists fdw_hdp_%s cascade; create foreign table fdw_hdp_%s ( %s )server fdw_hadoop_srv options( createsql '%s', loadsql '%s', dropsql '%s', selectsql '%s', docker_start 'docker run -d -v /home/ranjan/reb_repository:/repository -p 10000:10000 --name rebdoop rebdoop', docker_stop 'docker stop rebdoop', docker_remove 'docker rm rebdoop' ); """ % (args[1], args[1], extn_fields, createsql.replace( "'", "''"), loadsql.replace("'", "''"), dropsql.replace( "'", "''"), "select * from %s" % args[1]) return [loadsql, createsql, dropsql]
def csvsql(self, csv_file, db=None): """ Load test data into the DB and return it as a string for comparison. """ if not db: db = 'sqlite:///' + self.db_file args = ['--db', db, '--tables', 'foo', '--insert', csv_file] utility = CSVSQL(args) utility.run() with open(csv_file, 'r') as f: text = f.read() return text.strip()
def test_stdin(self): args = ['--table', 'foo'] output_file = six.StringIO() input_file = six.StringIO('a,b,c\n1,2,3\n') with stdin_as_string(input_file): utility = CSVSQL(args, output_file) utility.main() sql = output_file.getvalue() self.assertTrue('CREATE TABLE foo' in sql) self.assertTrue('a INTEGER NOT NULL' in sql) self.assertTrue('b INTEGER NOT NULL' in sql) self.assertTrue('c INTEGER NOT NULL' in sql)
def test_create_table(self): args = ['--table', 'foo', 'examples/testfixed_converted.csv'] output_file = six.StringIO() utility = CSVSQL(args, output_file) utility.main() sql = output_file.getvalue() self.assertTrue('CREATE TABLE foo' in sql) self.assertTrue('text VARCHAR(17) NOT NULL' in sql) self.assertTrue('date DATE' in sql) self.assertTrue('integer INTEGER' in sql) self.assertTrue('boolean BOOLEAN' in sql) self.assertTrue('float FLOAT' in sql) self.assertTrue('time TIME' in sql) self.assertTrue('datetime DATETIME' in sql)
def test_no_inference(self): args = ['--table', 'foo', '--no-inference', 'examples/testfixed_converted.csv'] output_file = StringIO() utility = CSVSQL(args, output_file) utility.main() sql = output_file.getvalue() self.assertTrue('CREATE TABLE foo' in sql) self.assertTrue('text VARCHAR(17) NOT NULL' in sql) self.assertTrue('date VARCHAR(10) NOT NULL' in sql) self.assertTrue('integer VARCHAR(3) NOT NULL' in sql) self.assertTrue('boolean VARCHAR(5) NOT NULL' in sql) self.assertTrue('float VARCHAR(11) NOT NULL' in sql) self.assertTrue('time VARCHAR(8) NOT NULL' in sql) self.assertTrue('datetime VARCHAR(19) NOT NULL' in sql)
def test_empty_with_query(self): with stdin_as_string(six.StringIO()): utility = CSVSQL(['--query', 'select 1'], six.StringIO()) utility.main()