예제 #1
0
    def test_index(self):
        if sys.platform == 'win32':
            # TODO: this test fails on Windows because of file lock problems
            return

        num_records = 100000
        expected_values = [((num_records-1)*5) + x for x in range(5)]

        data = StringIO()
        generate_dataset(data, IntIter(), ',', num_records)

        # test explicit index building
        adapter = iopro.text_adapter(data, delimiter=',', field_names=False, infer_types=False)
        adapter.set_field_types({0:'u4',1:'u4',2:'u4',3:'u4',4:'u4'})
        adapter.create_index()

        self.assert_equality(adapter[0].item(), tuple([(0*5) + x for x in range(5)]))
        self.assert_equality(adapter[10].item(), tuple([(10*5) + x for x in range(5)]))
        self.assert_equality(adapter[100].item(), tuple([(100*5) + x for x in range(5)]))
        self.assert_equality(adapter[1000].item(), tuple([(1000*5) + x for x in range(5)]))
        self.assert_equality(adapter[10000].item(), tuple([(10000*5) + x for x in range(5)]))
        self.assert_equality(adapter[num_records - 1].item(), tuple([((num_records - 1)*5) + x for x in range(5)]))
        #self.assert_equality(adapter[-1].item(), tuple(expected_values))

        # test implicitly creating disk index on the fly
        if os.path.exists('test.idx'):
            os.remove('test.idx')
        data.seek(0)
        adapter = iopro.text_adapter(data, delimiter=',', field_names=False, infer_types=False, index_name='test.idx')
        adapter.set_field_types({0:'u4',1:'u4',2:'u4',3:'u4',4:'u4'})
        adapter.to_array()

        self.assert_equality(adapter[0].item(), tuple([(0*5) + x for x in range(5)]))
        self.assert_equality(adapter[10].item(), tuple([(10*5) + x for x in range(5)]))
        self.assert_equality(adapter[100].item(), tuple([(100*5) + x for x in range(5)]))
        self.assert_equality(adapter[1000].item(), tuple([(1000*5) + x for x in range(5)]))
        self.assert_equality(adapter[10000].item(), tuple([(10000*5) + x for x in range(5)]))
        self.assert_equality(adapter[num_records - 1].item(), tuple([((num_records - 1)*5) + x for x in range(5)]))
        #self.assert_equality(adapter[-1].item(), tuple(expected_values))

        adapter.close()

        # test loading disk index
        data.seek(0)
        adapter2 = iopro.text_adapter(data, delimiter=',', field_names=False, infer_types=False, index_name='test.idx')
        adapter2.set_field_types({0:'u4',1:'u4',2:'u4',3:'u4',4:'u4'})

        self.assert_equality(adapter2[0].item(), tuple([(0*5) + x for x in range(5)]))
        self.assert_equality(adapter2[10].item(), tuple([(10*5) + x for x in range(5)]))
        self.assert_equality(adapter2[100].item(), tuple([(100*5) + x for x in range(5)]))
        self.assert_equality(adapter2[1000].item(), tuple([(1000*5) + x for x in range(5)]))
        self.assert_equality(adapter2[10000].item(), tuple([(10000*5) + x for x in range(5)]))
        self.assert_equality(adapter2[num_records - 1].item(), tuple([((num_records - 1)*5) + x for x in range(5)]))
        #self.assert_equality(adapter2[-1].item(), tuple(expected_values))

        adapter.close()

        os.remove('test.idx')
예제 #2
0
    def test_missing_fill_values(self):
        data = StringIO()
        generate_dataset(data, MissingValuesIter(), ',', self.num_records)

        adapter = iopro.text_adapter(data, delimiter=',', field_names=False, infer_types=False)
        adapter.set_field_types({'f0':'u4', 1:'u4', 2:'u4', 3:'u4', 'f4':'u4'})
        adapter.set_missing_values({0:['NA', 'NaN'], 'f4':['xx','inf']})
        adapter.set_fill_values({0:99, 4:999})

        array = adapter[:]

        self.assert_equality(array.size, self.num_records)

        record = [x for x in range(0, 5)]
        for i in range(0, self.num_records):
            if i % 4 == 0 or i % 4 == 1:
                record[0] = 99
                record[4] = 999
            else:
                record[0] = record[1] - 1
                record[4] = record[3] + 1
            self.assert_equality(array[i].item(), tuple(record))
            record[1] += 5
            record[2] += 5
            record[3] += 5

        data.seek(0)
        adapter = iopro.text_adapter(data, delimiter=',', field_names=False, infer_types=True)
        adapter.set_missing_values({0:['NA', 'NaN'], 4:['xx','inf']})

        array = adapter[:]

        self.assert_equality(array.size, self.num_records)

        record = [x for x in range(0, 5)]
        for i in range(0, self.num_records):
            if i % 4 == 0 or i % 4 == 1:
                record[0] = 0
                record[4] = 0
            else:
                record[0] = record[1] - 1
                record[4] = record[3] + 1
            self.assert_equality(array[i].item(), tuple(record))
            record[1] += 5
            record[2] += 5
            record[3] += 5

        # Test missing field
        data = StringIO('1,2,3\n4,5\n7,8,9')
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.field_types = {0:'O', 1:'O', 2:'O'}
        adapter.set_fill_values({0:np.nan, 1:np.nan, 2:np.nan})
        array = adapter[:]

        # NumPy assert_array_equal no longer supports mixed O/nan types
        expected = [('1','2','3'),('4','5',np.nan),('7','8','9')]
        self.assert_equality(array.tolist(), expected)
예제 #3
0
    def test_quoted_whitespace(self):
        data = StringIO('"1  ","2  ","3  "\n')
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.set_field_types({0:'S3', 1:'S3', 2:'S3'})
        assert_array_equal(adapter[:], np.array([('1  ', '2  ', '3  ')], dtype='S3,S3,S3'))

        data = StringIO('"\t1\t"\t"\t2\t"\t"\t3\t"\n')
        adapter = iopro.text_adapter(data, field_names=False, delimiter='\t')
        adapter.set_field_types({0:'S3', 1:'S3', 2:'S3'})
        assert_array_equal(adapter[:], np.array([('\t1\t', '\t2\t', '\t3\t')], dtype='S3,S3,S3'))
예제 #4
0
    def test_utf8_parsing(self):
        # test single byte character
        data = io.BytesIO(u'1,2,\u0033'.encode('utf_8'))
        adapter = iopro.text_adapter(data, field_names=False)
        expected = np.array([('1', '2', '3')], dtype='u8,u8,u8')
        assert_array_equal(adapter[:], expected)

        # test multibyte character
        data = io.BytesIO(u'1,2,\u2092'.encode('utf_8'))
        adapter = iopro.text_adapter(data, field_names=False)
        expected = np.array([('1', '2', u'\u2092')], dtype='u8,u8,O')
        assert_array_equal(adapter[:], expected)
예제 #5
0
    def test_comments(self):
        data = StringIO('1,2,3\n#4,5,6')
        adapter = iopro.text_adapter(data, field_names=False)
        array = adapter[:]
        self.assert_equality(array.size, 1)
        self.assert_equality(array[0].item(), (1,2,3))

        data = StringIO('1,2,3\n#4,5,6')
        adapter = iopro.text_adapter(data, field_names=False, comment=None)
        array = adapter[:]
        self.assert_equality(array.size, 2)
        self.assert_equality(array[0].item(), ('1',2,3))
        self.assert_equality(array[1].item(), ('#4',5,6))
예제 #6
0
    def test_string_parsing(self):
        data = StringIO('1,2,3\n')
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.set_field_types({0:'S5', 1:'S5', 2:'S5'})
        assert_array_equal(adapter[:], np.array([('1', '2', '3')], dtype='S5,S5,S5'))

        data = io.StringIO(u'1,2,3\n')
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.set_field_types({0:'S5', 1:'S5', 2:'S5'})
        assert_array_equal(adapter[:], np.array([('1', '2', '3')], dtype='S5,S5,S5'))

        data = io.BytesIO(b'1,2,3\n')
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.set_field_types({0:'S5', 1:'S5', 2:'S5'})
        assert_array_equal(adapter[:], np.array([('1', '2', '3')], dtype='S5,S5,S5'))
예제 #7
0
    def test_converters(self):
        data = StringIO()
        generate_dataset(data, IntIter(), ',', self.num_records)
        adapter = iopro.text_adapter(data, delimiter=',', field_names=False)
        #adapter.set_field_types({0:'u4', 1:'u4', 2:'u4', 3:'u4', 4:'u4'})

        def increment(input_str):
            return int(input_str) + 1

        def double(input_str):
            return int(input_str) + int(input_str)

        if sys.platform == 'win32' and tuple.__itemsize__ == 8:
            # TODO: there problems below here 64-bit Windows, I get
            # OverflowError: can't convert negative value to unigned PY_LONG_LONG
            return

        adapter.set_converter(0, increment)
        adapter.set_converter('f1', double)

        array = adapter[:]

        self.assert_equality(array.size, self.num_records)

        record = [1, 2, 2, 3, 4]
        for i in range(0, self.num_records):
            self.assert_equality(array[i].item(), tuple(record))
            record[0] += 5
            record[1] = (10 * (i+1)) + 2
            record[2] += 5
            record[3] += 5
            record[4] += 5
예제 #8
0
 def test_csv(self):
     # Test skipping blank lines
     data = StringIO('1,2,3\n\n4,5,6')
     adapter = iopro.text_adapter(data, field_names=False)
     array = adapter[:]
     assert_array_equal(array, np.array([(1,2,3), (4,5,6)],
         dtype=[('f0','<u8'),('f1','<u8'),('f2','<u8')]))
예제 #9
0
    def test_spaces_around_numeric_values(self):
        data = StringIO(' 1 , -2 , 3.3 , -4.4 \n  5  ,  -6  ,  7.7 , -8.8 ')
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.set_field_types({0:'u4', 1:'i8', 2:'f4', 3:'f8'})
        array = adapter[:]

        control = np.array([(1,-2,3.3,-4.4), (5,-6,7.7,-8.8)], dtype='u4,i8,f4,f8')
        assert_array_equal(array, control)
예제 #10
0
    def test_auto_type_inference(self):
        data = StringIO('0,1,2,3,4\n5.5,6,7,8,9\n10,11,12,13,14a\n15,16,xxx,18,19')
        adapter = iopro.text_adapter(data, field_names=False, infer_types=True)
        array = adapter.to_array()
        self.assert_equality(array.dtype.fields['f0'][0], np.dtype('float64'))
        self.assert_equality(array.dtype.fields['f1'][0], np.dtype('uint64'))
        self.assert_equality(array.dtype.fields['f2'][0], np.dtype('O'))
        self.assert_equality(array.dtype.fields['f3'][0], np.dtype('uint64'))
        self.assert_equality(array.dtype.fields['f4'][0], np.dtype('O'))

        data = StringIO('0,1,2,3,4\n5.5,6,7,8,9\n10,11,12,13,14a\n15,16,xxx,18,19')
        adapter = iopro.text_adapter(data, field_names=False, infer_types=True)
        self.assert_equality(adapter[0].dtype.fields['f0'][0], np.dtype('uint64'))
        self.assert_equality(adapter[1:3].dtype.fields['f0'][0], np.dtype('float64'))
        self.assert_equality(adapter[3].dtype.fields['f4'][0], np.dtype('uint64'))
        self.assert_equality(adapter[:].dtype.fields['f3'][0], np.dtype('uint64'))
        self.assert_equality(adapter[-1].dtype.fields['f2'][0], np.dtype('O'))
        self.assert_equality(adapter[2].dtype.fields['f4'][0], np.dtype('O'))
예제 #11
0
    def test_header_footer(self):
        data = StringIO('0,1,2,3,4\n5,6,7,8,9\n10,11,12,13,14')
        adapter = iopro.text_adapter(data, header=1, field_names=False)
        adapter.field_types = dict(zip(range(5), ['u4']*5))
        assert_array_equal(adapter[:], np.array([(5,6,7,8,9), (10,11,12,13,14)],
            dtype='u4,u4,u4,u4,u4'))

        data.seek(0)
        adapter = iopro.text_adapter(data, header=2, field_names=False)
        adapter.field_types = dict(zip(range(5), ['u4']*5))
        assert_array_equal(adapter[:], np.array([(10,11,12,13,14)],
            dtype='u4,u4,u4,u4,u4'))

        data.seek(0)
        adapter = iopro.text_adapter(data, header=1, field_names=True)
        adapter.field_types = dict(zip(range(5), ['u4']*5))
        assert_array_equal(adapter[:], np.array([(10,11,12,13,14)],
            dtype=[('5','u4'),('6','u4'),('7','u4'),('8','u4'),('9','u4')]))
예제 #12
0
    def test_no_whitespace_stripping(self):
        data = StringIO('1  ,2  ,3  \n')
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.set_field_types({0:'S3', 1:'S3', 2:'S3'})
        assert_array_equal(adapter[:], np.array([('1  ', '2  ', '3  ')], dtype='S3,S3,S3'))

        data = StringIO('  1,  2,  3\n')
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.set_field_types({0:'S3', 1:'S3', 2:'S3'})
        assert_array_equal(adapter[:], np.array([('  1', '  2', '  3')], dtype='S3,S3,S3'))

        data = StringIO('  1  ,  2  ,  3  \n')
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.set_field_types({0:'S5', 1:'S5', 2:'S5'})
        assert_array_equal(adapter[:], np.array([('  1  ', '  2  ', '  3  ')], dtype='S5,S5,S5'))

        data = StringIO('\t1\t,\t2\t,\t3\t\n')
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.set_field_types({0:'S3', 1:'S3', 2:'S3'})
        assert_array_equal(adapter[:], np.array([('\t1\t', '\t2\t', '\t3\t')], dtype='S3,S3,S3'))
예제 #13
0
    def test_escapechar(self):
        data = StringIO('1,2\\2,3\n4,5\\5\\5,6')
        array = iopro.text_adapter(data, field_names=False)[:]
        assert_array_equal(array,
            np.array([(1,22,3), (4,555,6)], dtype='u8,u8,u8'))

        data = StringIO('\\1,2,3\n4,5,6\\')
        array = iopro.text_adapter(data, field_names=False)[:]
        assert_array_equal(array,
            np.array([(1,2,3), (4,5,6)], dtype='u8,u8,u8'))

        data = StringIO('a,b\\,b,c\na,b\\,b\\,b,c')
        array = iopro.text_adapter(data, field_names=False)[:]
        assert_array_equal(array,
            np.array([('a', 'b,b', 'c'), ('a', 'b,b,b', 'c')], dtype='O,O,O'))

        data = StringIO('a,bx,b,c\na,bx,bx,b,c')
        array = iopro.text_adapter(data, field_names=False, escape='x')[:]
        assert_array_equal(array,
            np.array([('a', 'b,b', 'c'), ('a', 'b,b,b', 'c')], dtype='O,O,O'))
예제 #14
0
 def test_float_conversion(self):
     data = StringIO('10,1.333,-1.23,10.0E+2,999.9e-2')
     adapter = iopro.text_adapter(data, field_names=False, infer_types=False)
     adapter.set_field_types(dict(zip(range(5), ['f8']*5)))
     array = adapter[0]
     #self.assert_equality(array[0].item(), (10.0,1.333,-1.23,1000.0,9.999))
     self.assertAlmostEqual(array[0][0], 10.0)
     self.assertAlmostEqual(array[0][1], 1.333)
     self.assertAlmostEqual(array[0][2], -1.23)
     self.assertAlmostEqual(array[0][3], 1000.0)
     self.assertAlmostEqual(array[0][4], 9.999)
예제 #15
0
    def test_delimiter(self):
        data = StringIO('1,2,3\n')
        adapter = iopro.text_adapter(data, field_names=False)
        self.assert_equality(adapter[0].item(), (1,2,3))

        data = StringIO('1 2 3\n')
        adapter = iopro.text_adapter(data, field_names=False)
        self.assert_equality(adapter[0].item(), (1,2,3))

        data = StringIO('1\t2\t3\n')
        adapter = iopro.text_adapter(data, field_names=False)
        self.assert_equality(adapter[0].item(), (1,2,3))

        data = StringIO('1x2x3\n')
        adapter = iopro.text_adapter(data, field_names=False)
        self.assert_equality(adapter[0].item(), (1,2,3))

        # Test no delimiter in single field csv data
        data = StringIO('aaa\nbbb\nccc')
        array = iopro.text_adapter(data, field_names=False, delimiter=None)[:]
        assert_array_equal(array, np.array([('aaa',), ('bbb',), ('ccc',)], dtype=[('f0', 'O')]))
예제 #16
0
    def test_field_names(self):
        # Test for ignoring of extra fields
        data = StringIO('f0,f1\n0,1,2\n3,4,5')
        adapter = iopro.text_adapter(data, 'csv', delimiter=',', field_names=True)
        array = adapter.to_array()
        self.assert_equality(array.dtype.names, ('f0', 'f1'))
        self.assert_equality(array[0].item(), (0,1))
        self.assert_equality(array[1].item(), (3,4))

        # Test for duplicate field names
        data = StringIO('f0,field,field\n0,1,2\n3,4,5')
        adapter = iopro.text_adapter(data, 'csv', delimiter=',', field_names=True, infer_types=False)
        adapter.set_field_types({0:'u4', 1:'u4', 2:'u4'})
        array = adapter.to_array()
        self.assert_equality(array.dtype.names, ('f0', 'field', 'field1'))

        # Test for field names list
        data = StringIO('0,1,2\n3,4,5')
        adapter = iopro.text_adapter(data, field_names=['a', 'b', 'c'], infer_types=False)
        adapter.field_types = {0:'u4', 1:'u4', 2:'u4'}
        array = adapter[:]
        self.assertTrue(array.dtype.names == ('a', 'b', 'c'))
        assert_array_equal(array, np.array([(0,1,2), (3,4,5)], dtype=[('a', 'u4'), ('b', 'u4'), ('c', 'u4')]))
예제 #17
0
    def reduce(iter,out, params):
        import ftplib,os
 
        ftp = ftplib.FTP('ftp.ncdc.noaa.gov')
        ftp.login()

        gsod_path = 'pub/data/gsod/'
        dirpath = '/tmp/weather_files_coverage/'

        if not os.path.exists(dirpath):
            os.makedirs(dirpath)
        
        for key, StatRange in iter:
            stations = list(StatRange[0])
            ranges = StatRange[1]
            
            for date in ranges:
                for stat in stations:
                    
                    cache = open(dirpath+stat+'-'+str(date)+'.op.gz','wb')
                    f = gsod_path+str(date)+'/'+stat+'-'+str(date)+'.op.gz'
                   
                    try:
                        ftp.retrbinary("RETR " + f, cache.write, 8*1024)
                    except:
                        ftp = ftplib.FTP('ftp.ncdc.noaa.gov')
                        # 'Succesfully Connected...'
                        ftp.login()
                        ftp.retrbinary("RETR " + f, cache.write, 8*1024)

                    try:
                        cache.close()
                        adapter = iopro.text_adapter(cache.name,compression='gzip',parser='csv', field_names=True)
                        avg_temp = adapter[:]['TEMP']
                    except:
                        stations.remove(stat)
                        continue                    
                    if (len(avg_temp) < 360):
                        stations.remove(stat)
                        continue       
                    if 9999.9 in avg_temp:
                        stations.remove(stat)
                        continue
                
        out.add(1,set(stations))
예제 #18
0
    def test_generators(self):
        def int_generator(num_recs):
            for i in range(num_recs):
                yield ','.join([str(i*5), str(i*5+1), str(i*5+2), str(i*5+3), str(i*5+4)])

        adapter = iopro.text_adapter(int_generator(self.num_records), field_names=False)
        array = adapter[:]

        self.assert_equality(array.size, self.num_records)

        record = [x for x in range(0, 5)]
        for i in range(0, self.num_records):
            self.assert_equality(array[i].item(), tuple(record))
            record[0] += 5
            record[1] += 5
            record[2] += 5
            record[3] += 5
            record[4] += 5
예제 #19
0
    def reduce(iter, out, params):
        import numpy as np
        import ftplib,os
        import iopro,shutil
        from disco.util import kvgroup
        
        for date, WeatherDateStat in kvgroup(iter):
            print date
            # print 'Connecting to NOAA...'
            ftp = ftplib.FTP('ftp.ncdc.noaa.gov')
            # print 'Succesfully Connected...'
            ftp.login()
 
            avg_temp = []
            
            stdev = 0
            SUM = 0
            mean = 0

            path = '/tmp/weather_files/'+str(date)+'/'
 
            if not os.path.exists(path):
                os.makedirs(path)
            for file in WeatherDateStat:
                cache = open(path+file.split('/')[-1],'wb')
                # print file
                try:
                    ftp.retrbinary("RETR " + file, cache.write, 8*1024)
                except:
                    ftp = ftplib.FTP('ftp.ncdc.noaa.gov')
                    # 'Succesfully Connected...'
                    ftp.login()
                    ftp.retrbinary("RETR " + file, cache.write, 8*1024)
                   
                cache.close()
                adapter = iopro.text_adapter(cache.name,compression='gzip',parser='csv', field_names=True)
                avg_temp = avg_temp + list(adapter[:]['TEMP'])
                # mean = (mean+adapter[:]['TEMP'].mean())/2.0
                # stdev = np.sqrt(stdev**2+adapter[:]['TEMP'].std()**2)/2.0
                adapter.close()

            print 'Date Mean Std: ', date, np.mean(avg_temp), np.std(avg_temp)
            out.add(date, (np.mean(avg_temp),np.std(avg_temp)))
예제 #20
0
    def test_gzip_index(self):
        num_records = 1000000

        data = StringIO()
        generate_dataset(data, IntIter(), ',', num_records)

        #if sys.version > '3':
        if True:
            dataz = io.BytesIO()
        else:
            dataz = StringIO()
        gzip_output = gzip.GzipFile(fileobj=dataz, mode='wb')
        #if sys.version > '3':
        if True:
            gzip_output.write(data.getvalue().encode('utf8'))
        else:
            gzip_output.write(data.getvalue())
        gzip_output.close()
        dataz.seek(0)

        # test explicit index building
        adapter = iopro.text_adapter(dataz, compression='gzip', delimiter=',', field_names=False, infer_types=False)
        adapter.set_field_types({0:'u4',1:'u4',2:'u4',3:'u4',4:'u4'})
        adapter.create_index()

        self.assert_equality(adapter[0].item(), tuple([(0*5) + x for x in range(5)]))
        self.assert_equality(adapter[10].item(), tuple([(10*5) + x for x in range(5)]))
        self.assert_equality(adapter[100].item(), tuple([(100*5) + x for x in range(5)]))
        self.assert_equality(adapter[1000].item(), tuple([(1000*5) + x for x in range(5)]))
        self.assert_equality(adapter[10000].item(), tuple([(10000*5) + x for x in range(5)]))
        self.assert_equality(adapter[100000].item(), tuple([(100000*5) + x for x in range(5)]))
        self.assert_equality(adapter[num_records - 1].item(), tuple([((num_records - 1)*5) + x for x in range(5)]))
        #self.assert_equality(adapter[-1].item(), tuple(expected_values))

        # test 'trouble' records that have caused crashes in the past
        self.assert_equality(adapter[290000].item(), tuple([(290000*5) + x for x in range(5)]))
        self.assert_equality(adapter[818000].item(), tuple([(818000*5) + x for x in range(5)]))

        # test implicitly creating disk index on the fly
        # JNB: not implemented yet
        '''adapter = iopro.text_adapter(dataz, compression='gzip', delimiter=',', field_names=False, infer_types=False, indexing=True, index_filename='test.idx')
예제 #21
0
    def test_json(self):
        # Test json number
        data = StringIO('{"id":123}')
        adapter = iopro.text_adapter(data, parser='json')
        array = adapter[:]
        assert_array_equal(array, np.array([(123,)], dtype=[('id', 'u8')]))

        # Test json number
        data = StringIO('{"id":"xxx"}')
        adapter = iopro.text_adapter(data, parser='json')
        array = adapter[:]
        assert_array_equal(array, np.array([('xxx',)], dtype=[('id', 'O')]))

        # Test multiple values
        data = StringIO('{"id":123, "name":"xxx"}')
        adapter = iopro.text_adapter(data, parser='json')
        array = adapter[:]
        assert_array_equal(array, np.array([(123, 'xxx',)], dtype=[('id', 'u8'), ('name', 'O')]))

        # Test multiple records
        data = StringIO('[{"id":123, "name":"xxx"}, {"id":456, "name":"yyy"}]')
        adapter = iopro.text_adapter(data, parser='json')
        array = adapter[:]
        assert_array_equal(array, np.array([(123, 'xxx',), (456, 'yyy')], dtype=[('id', 'u8'), ('name', 'O')]))

        # Test multiple objects separated by newlines
        data = StringIO('{"id":123, "name":"xxx"}\n{"id":456, "name":"yyy"}')
        adapter = iopro.text_adapter(data, parser='json')
        array = adapter[:]
        assert_array_equal(array, np.array([(123, 'xxx',), (456, 'yyy')], dtype=[('id', 'u8'), ('name', 'O')]))

        data = StringIO('{"id":123, "name":"xxx"}\n')
        adapter = iopro.text_adapter(data, parser='json')
        array = adapter[:]
        assert_array_equal(array, np.array([(123, 'xxx',)], dtype=[('id', 'u8'), ('name', 'O')]))

        # JNB: broken; should be really be supporting the following json inputs?
        '''
        # Test subarrays
        data = StringIO('{"id":123, "names":["xxx","yyy","zzz"]}')
        adapter = iopro.text_adapter(data, parser='json')
        array = adapter[:]
        assert_array_equal(array, np.array([(123, 'xxx', 'yyy', 'zzz',)],
            dtype=[('f0', 'u8'), ('f1', 'O'), ('f2', 'O'), ('f3', 'O')]))

        # Test subobjects
        data = StringIO('{"id":123, "names":{"a":"xxx", "b":"yyy", "c":"zzz"}}')
        adapter = iopro.text_adapter(data, parser='json')
        array = adapter[:]
        assert_array_equal(array, np.array([(123, 'xxx', 'yyy', 'zzz',)],
            dtype=[('f0', 'u8'), ('f1', 'O'), ('f2', 'O'), ('f3', 'O')]))
        '''

        # Test ranges
        data = StringIO('{"id": 1, "name": "www"}\n'
                                 '{"id": 2, "name": "xxx"}\n'
                                 '{"id": 3, "name": "yyy"}\n'
                                 '{"id": 4, "name": "zzz"}')
        adapter = iopro.text_adapter(data, parser='json')
        array = adapter[2:4]
        assert_array_equal(array, np.array([(3, 'yyy'), (4, 'zzz')],
            dtype=[('id', 'u8'), ('name', 'O')]))

        # Test column order
        data = StringIO('{"xxx": 1, "aaa": 2}\n')
        adapter = iopro.text_adapter(data, parser='json')
        array = adapter[:]
        assert_array_equal(array, np.array([(1, 2)],
            dtype=[('xxx', 'u8'), ('aaa', 'u8')]))

        # Test field filter
        data = StringIO('{"id": 1, "name": "www"}\n'
                                 '{"id": 2, "name": "xxx"}\n'
                                 '{"id": 3, "name": "yyy"}\n'
                                 '{"id": 4, "name": "zzz"}')
        adapter = iopro.text_adapter(data, parser='json')
        adapter.field_filter = ['name']
        array = adapter[:]
        assert_array_equal(array, np.array([('www',), ('xxx',), ('yyy',), ('zzz',)],
            dtype=[('name', 'O')]))
예제 #22
0
    def test_fixed_width(self):
        data = StringIO()
        generate_dataset(data, FixedWidthIter(), '', self.num_records)
        adapter = iopro.FixedWidthTextAdapter(data, [2,3,4,5,6], field_names=False, infer_types=False)
        adapter.set_field_types({0:'u4',1:'u4',2:'u4',3:'u4',4:'u4'})

        array = adapter[:]

        self.assert_equality(array.size, self.num_records)

        record = [0, 0, 0, 0, 0]
        for i in range(0, self.num_records):
            self.assert_equality(array[i].item(), tuple(record))
            record = [x+1 for x in record]
            if record[0] == 100:
                record[0] = 0
            if record[1] == 1000:
                record[1] = 0
            if record[2] == 10000:
                record[2] = 0
            if record[3] == 100000:
                record[3] = 0
            if record[4] == 1000000:
                record[4] = 0

        # Test skipping blank lines
        data = StringIO(' 1 2 3\n\n 4 5 6')
        adapter = iopro.text_adapter(data, parser='fixed_width',
            field_widths=[2,2,2], field_names=False)
        array = adapter[:]
        assert_array_equal(array, np.array([(1,2,3), (4,5,6)],
            dtype=[('f0','<u8'),('f1','<u8'),('f2','<u8')]))

        # Test comment lines
        data = StringIO('# 1 2 3\n 1 2 3\n# foo\n 4 5 6')
        adapter = iopro.text_adapter(data, parser='fixed_width',
            field_widths=[2,2,2], field_names=False)
        array = adapter[:]
        assert_array_equal(array, np.array([(1,2,3), (4,5,6)],
            dtype=[('f0','<u8'),('f1','<u8'),('f2','<u8')]))

        # Test field names line
        data = StringIO(' a b c\n 1 2 3')
        adapter = iopro.text_adapter(data, parser='fixed_width',
            field_widths=[2,2,2], field_names=True)
        array = adapter[:]
        assert_array_equal(array, np.array([(1,2,3)],
            dtype=[('a','<u8'),('b','<u8'),('c','<u8')]))

        # Test field names line as comment line
        data = StringIO('# a b c\n 1 2 3')
        adapter = iopro.text_adapter(data, parser='fixed_width',
            field_widths=[2,2,2], field_names=True)
        array = adapter[:]
        assert_array_equal(array, np.array([(1,2,3)],
            dtype=[('a','<u8'),('b','<u8'),('c','<u8')]))

        # Test incomplete field names line
        data = StringIO(' a\n 1 2 3')
        adapter = iopro.text_adapter(data, parser='fixed_width',
            field_widths=[2,2,2], field_names=True)
        array = adapter[:]
        assert_array_equal(array, np.array([(1,2,3)],
            dtype=[('a','<u8'),('f1','<u8'),('f2','<u8')]))
예제 #23
0
    def test_adapter_factory(self):
        data = StringIO("1,2,3")
        adapter = iopro.text_adapter(data, "csv", delimiter=',', field_names=False, infer_types=False)
        self.assertTrue(isinstance(adapter, iopro.CSVTextAdapter))

        self.assertRaises(iopro.AdapterException, iopro.text_adapter, data, "foobar")
예제 #24
0
 def test_64bit_ints(self):
     data = StringIO(str((2**63)-1) + ',' + str(((2**63)-1)*-1) + ',' + str((2**64)-1))
     adapter = iopro.text_adapter(data, delimiter=',', field_names=False, infer_types=False)
     adapter.set_field_types({0:'i8', 1:'i8', 2:'u8'})
     array = adapter.to_array()
     self.assert_equality(array[0].item(), ((2**63)-1, ((2**63)-1)*-1, (2**64)-1))
예제 #25
0
    def test_slicing(self):
        data = StringIO()
        generate_dataset(data, IntIter(), ',', self.num_records)
        adapter = iopro.text_adapter(data, field_names=False)
        adapter.set_field_types({0:'u4',1:'u4',2:'u4',3:'u4',4:'u4'})

        assert_array_equal(adapter[0], np.array([(0, 1, 2, 3, 4)], dtype='u4,u4,u4,u4,u4'))
        expected_values = [((self.num_records-1)*5)+x for x in range(5)]
        self.assert_equality(adapter[self.num_records-1].item(), tuple(expected_values))

        #adapter.create_index()
        #self.assert_equality(adapter[-1].item(), tuple(expected_values))

        self.assert_equality(adapter['f0'][0].item(), (0,))
        self.assert_equality(adapter['f4'][1].item(), (9,))
        #self.assert_equality(adapter[self.num_records-1]['f4'], (self.num_records*5)-1)

        array = adapter[:]
        record = [x for x in range(0, 5)]
        self.assert_equality(array.size, self.num_records)
        for i in range(0, self.num_records):
            self.assert_equality(array[i].item(), tuple(record))
            record = [x+5 for x in record]

        array = adapter[:-1]
        record = [x for x in range(0, 5)]
        self.assert_equality(array.size, self.num_records-1)
        for i in range(0, self.num_records-1):
            self.assert_equality(array[i].item(), tuple(record))
            record = [x+5 for x in record]

        array = adapter[0:10]
        self.assert_equality(array.size, 10)
        record = [x for x in range(0, 5)]
        for i in range(0, 10):
            self.assert_equality(array[i].item(), tuple(record))
            record = [x+5 for x in record]

        array = adapter[1:]
        self.assert_equality(array.size, self.num_records-1)
        record = [x for x in range(5, 10)]
        for i in range(0, self.num_records-1):
            self.assert_equality(array[i].item(), tuple(record))
            record = [x+5 for x in record]

        array = adapter[0:10:2]
        self.assert_equality(array.size, 5)
        record = [x for x in range(0, 5)]
        for i in range(0, 5):
            self.assert_equality(array[i].item(), tuple(record))
            record = [x+10 for x in record]

        array = adapter[['f0', 'f4']][:]
        record = [0, 4]
        self.assert_equality(array.size, self.num_records)
        for i in range(0, self.num_records):
            self.assert_equality(array[i].item(), tuple(record))
            record = [x+5 for x in record]

        adapter.field_filter = [0, 'f4']
        array = adapter[:]
        record = [0, 4]
        self.assert_equality(array.size, self.num_records)
        for i in range(0, self.num_records):
            self.assert_equality(array[i].item(), tuple(record))
            record = [x+5 for x in record]

        adapter.field_filter = None
        array = adapter[:]
        record = [0, 1, 2, 3, 4]
        self.assert_equality(array.size, self.num_records)
        for i in range(0, self.num_records):
            self.assert_equality(array[i].item(), tuple(record))
            record = [x+5 for x in record]

        try:
            adapter[self.num_records]
        except iopro.AdapterIndexError:
            pass
        else:
            self.fail('AdaperIndexError not thrown')

        try:
            adapter[0:self.num_records+1]
        except iopro.AdapterIndexError:
            pass
        else:
            self.fail('AdaperIndexError not thrown')
예제 #26
0
import iopro
import sys
import pandas as pd

df = pd.read_csv('station_complete.csv', header=False)
df.columns = ['STDIN']

stations = pd.read_csv('ish-history.csv')
adapter = iopro.text_adapter('station_complete.csv',
                             parser='csv',
                             delimiter=' ',
                             field_names=False)
usaf_list = list(adapter[:]['f0'])

station_clean = stations['USAF'].map(lambda x: x in usaf_list)
cleaned = stations[station_clean]

lat = cleaned['LAT']
lon = cleaned['LON']

import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap

from mpl_toolkits.basemap import Basemap
import numpy as np
import matplotlib.pyplot as plt

m = Basemap(projection='merc',llcrnrlat=-80,urcrnrlat=80,\
            llcrnrlon=-180,urcrnrlon=180,lat_ts=20,resolution='c')
m.drawcoastlines()
m.fillcontinents(color='coral', lake_color='aqua')
예제 #27
0
 def test_num_records(self):
     data = StringIO('0,1\n2,3\n4,5\n6,7\n8,9\n10,11\n12,13\n14,15\n16,17\n18,19')
     adapter = iopro.text_adapter(data, field_names=False, num_records=2)
     assert_array_equal(adapter[:], np.array([(0, 1), (2, 3)], dtype='u8,u8'))
예제 #28
0
        for stat1973, v in stationIDs:
            cache = open(path+stat1973.split('/')[-1],'wb')
 
            try:
                ftp.retrbinary("RETR " + stat1973, cache.write, 8*1024)
            except:
                ftp = ftplib.FTP('ftp.ncdc.noaa.gov')
                # 'Succesfully Connected...'
                ftp.login()
                ftp.retrbinary("RETR " + stat1973, cache.write, 8*1024)
             
            cache.close()
 
            #skip tar file
            if stat1973.endswith('.op.gz'):
                adapter = iopro.text_adapter(cache.name,compression='gzip',parser='csv', field_names=True)
 
                avg_temp = adapter[:]['TEMP']
                
                if (len(avg_temp) < 360) or (len(avg_temp) > 366):
                    # print '\ttoo small or too big'
                    continue      
                if 9999.9 in avg_temp:
                    continue
                else:
                    stations.append(cache.name.split('/')[-1][:12])
                    #store station which has good coverage for the year
 
        out.add(1,stations)
        #yield list of stations
 
예제 #29
0
 def test_stepping(self):
     data = StringIO('0,1\n2,3\n4,5\n6,7\n8,9\n10,11\n12,13\n14,15\n16,17\n18,19')
     adapter = iopro.text_adapter(data, field_names=False)
     assert_array_equal(adapter[::2], np.array([(0,1), (4,5), (8,9), (12,13), (16,17)], dtype='u8,u8'))
     assert_array_equal(adapter[::3], np.array([(0,1), (6,7), (12,13), (18,19)], dtype='u8,u8'))
예제 #30
0
    def test_regex(self):
        data = StringIO()
        generate_dataset(data, IntIter(), ',', self.num_records)
        adapter = iopro.RegexTextAdapter(data, '([0-9]*),([0-9]*),([0-9]*),([0-9]*),([0-9]*)\n', field_names=False, infer_types=False)
        adapter.set_field_types({0:'u4',1:'u4',2:'u4',3:'u4',4:'u4'})

        array = adapter[:]

        self.assert_equality(array.size, self.num_records)

        record = [x for x in range(0, 5)]
        for i in range(0, self.num_records):
            self.assert_equality(array[i].item(), tuple(record))
            record = [x+5 for x in record]

        # Test skipping blank lines
        data = StringIO('1 2 3\n\n4 5 6')
        adapter = iopro.text_adapter(data, parser='regex',
            regex_string='([0-9]) ([0-9]) ([0-9])', field_names=False)
        array = adapter[:]
        assert_array_equal(array, np.array([(1,2,3), (4,5,6)],
            dtype=[('f0','<u8'),('f1','<u8'),('f2','<u8')]))

        # Test comment lines
        data = StringIO('#1 2 3\n1 2 3\n# foo\n4 5 6')
        adapter = iopro.text_adapter(data, parser='regex',
            regex_string='([0-9]) ([0-9]) ([0-9])', field_names=False)
        array = adapter[:]
        assert_array_equal(array, np.array([(1,2,3), (4,5,6)],
            dtype=[('f0','<u8'),('f1','<u8'),('f2','<u8')]))

        # Test field names line
        data = StringIO('a b c\n4 5 6')
        adapter = iopro.text_adapter(data, parser='regex',
            regex_string='([0-9]) ([0-9]) ([0-9])', field_names=True)
        array = adapter[:]
        assert_array_equal(array, np.array([(4,5,6)],
            dtype=[('a','<u8'),('b','<u8'),('c','<u8')]))

        # Test field names line as comment line
        data = StringIO('#a b c\n4 5 6')
        adapter = iopro.text_adapter(data, parser='regex',
            regex_string='([0-9]) ([0-9]) ([0-9])', field_names=True)
        array = adapter[:]
        assert_array_equal(array, np.array([(4,5,6)],
            dtype=[('a','<u8'),('b','<u8'),('c','<u8')]))

        # Test incomplete field names line
        data = StringIO('a b\n4 5 6')
        adapter = iopro.text_adapter(data, parser='regex',
            regex_string='([0-9]) ([0-9]) ([0-9])', field_names=True)
        array = adapter[:]
        assert_array_equal(array, np.array([(4,5,6)],
            dtype=[('a','<u8'),('b','<u8'),('f2','<u8')]))

        # Test field names line that doesn't match regex
        data = StringIO('a b c\n1 2  3 4  5 6')
        adapter = iopro.text_adapter(data, parser='regex',
            regex_string='([0-9\s]+)  ([0-9\s]+)  ([0-9\s]+)', field_names=True)
        array = adapter[:]
        assert_array_equal(array, np.array([('1 2', '3 4', '5 6')],
            dtype=[('a','O'),('b','O'),('c','O')]))
예제 #31
0
import os
import iopro

from blaze import Table, mean, std, params, select, open
from blaze.algo.select import select2

adapter = iopro.text_adapter(
    'noaa_gsod_example.op',
    header=1,
    infer_types=False,
    field_names=['x', 'y', 'z', 'w']
)
adapter.set_field_types({0:'u8', 1:'u8', 2:'u8', 3:'f8'})


def test_simple():
    if not os.path.exists('./noaa_data'):
        p = params(clevel=5, storage='./noaa_data')

        t = Table([], dshape='{f0: int, f1:int, f2:int, f3:float}', params=p)

        # TODO: chunkwise copy
        t.append(adapter[:])
        t.commit()
    else:
        t = open('ctable://noaa_data')

    print '--------------------------------------'
    print 'mean', mean(t, 'f3')
    print 'std', std(t, 'f2')
    print '--------------------------------------'
예제 #32
0
import iopro
import sys
import pandas as pd

df = pd.read_csv('station_complete.csv',header=False)
df.columns=['STDIN']


stations = pd.read_csv('ish-history.csv')
adapter = iopro.text_adapter('station_complete.csv',parser='csv',delimiter=' ',field_names=False)
usaf_list = list(adapter[:]['f0'])

station_clean = stations['USAF'].map(lambda x: x in usaf_list)
cleaned = stations[station_clean]

lat = cleaned['LAT']
lon = cleaned['LON']


import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap

from mpl_toolkits.basemap import Basemap
import numpy as np
import matplotlib.pyplot as plt

m = Basemap(projection='merc',llcrnrlat=-80,urcrnrlat=80,\
            llcrnrlon=-180,urcrnrlon=180,lat_ts=20,resolution='c')
m.drawcoastlines()
m.fillcontinents(color='coral',lake_color='aqua')