Beispiel #1
0
def gen_csv_webhdfs(col_defs,
                    num_rows,
                    host,
                    user,
                    hdfs_path,
                    has_id_col=False,
                    use_kerberos=False):
    col_list = parse_cols(col_defs)
    with tempdir() as tmpdir:
        filename = os.path.basename(hdfs_path)
        tmp_path = os.path.join(tmpdir, filename)
        with open(tmp_path, 'w') as file:
            for i in range(int(num_rows)):
                for c in range(len(col_list)):
                    if c > 0:
                        file.write(',')
                    if c == 0 and has_id_col:
                        file.write(str(i))
                    else:
                        write_col(file, col_list[c], 1)
                file.write('\n')
        webhdfs.create_file(host,
                            user,
                            hdfs_path,
                            tmp_path,
                            use_kerberos=use_kerberos)
Beispiel #2
0
    def test_xml_processing(self):
        self.query(udf.fixindent('''
                CREATE python SCALAR SCRIPT
                process_users(url VARCHAR(200))
                EMITS (firstname VARCHAR(100), lastname VARCHAR(100)) AS

                import urllib
                import lxml.etree as etree
                # import xml.etree.cElementTree as etree


                def run(ctx):
                    data = ''.join(urllib.urlopen(ctx.url).readlines())
                    tree = etree.XML(data)
                    for user in tree.findall('user/[@active="1"]'):
                        fn = user.findtext('first_name')
                        ln = user.findtext('family_name')
                        ctx.emit(fn, ln)
                '''))
            
        with tempdir() as tmp:
            with open(os.path.join(tmp, 'keepers.xml'), 'w') as f:
                f.write(self.xml())
            
            with HTTPServer(tmp) as hs:
                url = 'http://%s:%d/keepers.xml' % hs.address
                rows = self.query('''
                        SELECT process_users('%s')
                        FROM DUAL
                        ORDER BY lastname
                        ''' % url)
            
        expected = [('Joe', 'Hart'), ('Manuel', 'Neuer')]
        self.assertRowsEqual(expected, rows)
Beispiel #3
0
 def test_selftest(self):
     with tempdir() as tmp:
         with open(os.path.join(tmp, 'foo.xml'), 'w') as f:
             f.write('''<foo/>\n''')
         with HTTPServer(tmp) as hs:
             self.assertIn('<foo/>',
                 urllib.urlopen('http://%s:%d/foo.xml' % hs.address).read())
Beispiel #4
0
 def test_server_is_chdir_safe(self):
     cwd = os.getcwd()
     with tempdir() as tmp:
         self.assertEqual(cwd, os.getcwd())
         with FTPServer(tmp) as ftpd:
             self.assertEqual(cwd, os.getcwd())
         self.assertEqual(cwd, os.getcwd())
     self.assertEqual(cwd, os.getcwd())
Beispiel #5
0
 def test_1(self):
     with tempdir() as tmp:
         with open(os.path.join(tmp, 'dummy'), 'w') as f:
             f.write('babelfish')
         with HTTPServer(tmp) as httpd:
             url = urllib.urlopen('http://%s:%d/dummy' %
                                  httpd.address)
             data = url.readlines()
     self.assertIn('babelfish', '\n'.join(data))
Beispiel #6
0
 def test_server_is_chdir_safe(self):
     cwd = os.getcwd()
     with tempdir() as tmp:
         self.assertEqual(cwd, os.getcwd())
         with HTTPServer(tmp) as httpd:
             # Current implementation chdir to documentroot;
             # needs subprocesses to avoid this.
             pass
             #self.assertEqual(cwd, os.getcwd())
         self.assertEqual(cwd, os.getcwd())
     self.assertEqual(cwd, os.getcwd())
Beispiel #7
0
 def test_1(self):
     with tempdir() as tmp:
         with open(os.path.join(tmp, 'dummy'), 'w'):
             pass
         with FTPServer(tmp) as ftpd:
             ftp = ftplib.FTP()
             ftp.connect(*ftpd.address)
             ftp.login()
             data = []
             ls = ftp.retrlines('LIST', data.append)
             ftp.quit()
     self.assertIn('dummy', '\n'.join(data))
Beispiel #8
0
 def test_1(self):
     with tempdir() as tmp:
         with open(os.path.join(tmp, 'dummy'), 'w'):
             pass
         auth = DummyAuthorizer()
         auth.add_user('user', 'passwd', tmp, perm='elradfmw')
         with FTPServer(tmp, authorizer=auth) as ftpd:
             ftp = ftplib.FTP()
             ftp.connect(*ftpd.address)
             ftp.login('user', 'passwd')
             ftp.mkd('some_dir')
             data = []
             ls = ftp.retrlines('LIST', data.append)
             ftp.quit()
     self.assertIn('dummy', '\n'.join(data))
     self.assertIn('some_dir', '\n'.join(data))
Beispiel #9
0
 def importHelper(self, csv, tableDefinition, exception):
     with tempdir() as tmp:
         with open(os.path.join(tmp, 'data.csv'), 'w') as f:
             f.write(csv)
         with FTPServer(tmp) as ftpd:
             url = 'ftp://*****:*****@%s:%d' % ftpd.address
             self.query('''
                     create connection ftpconnection to '%s'
                     ''' % url)
             self.query(tableDefinition)
             with self.assertRaisesRegexp(Exception, exception):
                 self.query('''
                 import into t from csv at ftpConnection file 'data.csv';
                            ''')
             rows = self.query('''SELECT * FROM T''')
             self.assertEqual(0, self.rowcount())
Beispiel #10
0
    def test_xml_processing(self):
        '''DWA-13842'''
        self.query(
            udf.fixindent('''
                CREATE OR REPLACE R SCALAR SCRIPT
                process_users(url VARCHAR(200))
                EMITS (firstname VARCHAR(100), lastname VARCHAR(100)) AS
 
                require('RCurl')
                require('XML')
                run <- function(ctx) {
                    cont <- getURL(ctx$url)
                    tree <- xmlTreeParse(cont)
                    for (i in 1:length(tree$doc$children$users)) {
                        if (tree$doc$children$users[i]$user$attributes['active']==1) {
                                firstname <- tree$doc$children$users[i]$user$children$first_name$children$text$value;
                                familyname <- tree$doc$children$users[i]$user$children$family_name$children$text$value;
                                ctx$emit(firstname, familyname)
                        }
                    }
                }
                '''))

        with tempdir() as tmp:
            with open(os.path.join(tmp, 'keepers.xml'), 'w') as f:
                f.write(self.xml())

            with FTPServer(tmp) as ftpd:
                url = 'ftp://*****:*****@%s:%d/keepers.xml' % ftpd.address
                rows = self.query('''
                        SELECT process_users('%s')
                        FROM DUAL
                        ORDER BY lastname
                        ''' % url)

        expected = [('Joe', 'Hart'), ('Manuel', 'Neuer')]
        self.assertRowsEqual(expected, rows)