Esempio n. 1
0
def ingest_tsv(filepath):
    log.info('ingesting %s as tsv file', filepath)
    save_file_metadata(filepath, status='parsing', filetype='tsv')
    with open(filepath, 'rU') as fid:
        reader = csv.reader(fid, delimiter='\t')
        header = reader.next()
        log.debug("%d columns: %s", len(header), ", ".join(header))
        if len(header) == 0:
            raise ValueError('header row must contain at least one column')

        keys = [normalize_column_name(h) for h in header]

        def parse(row):
            if len(keys) == len(row):
                return dict(zip(keys, row))

        parsed = [parse(row) for row in reader]
        parsed = [v for v in parsed if v is not None]

        header = [{'raw': h, 'key': k} for h, k in itertools.izip(header, keys)]

        for h in header:
            data = [p[h['key']] for p in parsed]
            if all(is_boolean(d) for d in data):
                h['datatype'] = 'boolean'
            elif all(is_numeric(d) for d in data):
                h['datatype'] = 'numeric'
            else:
                h['datatype'] = 'string'

        save_file_metadata(filepath, headers=header)

        return parsed
Esempio n. 2
0
 def test_neg_booelans( self ):
     self.assertFalse( util.is_boolean( "True" ) )
     self.assertFalse( util.is_boolean( "abcd" ) )
Esempio n. 3
0
 def test_positive_types( self ):
     self.assertTrue( util.is_string( "abcd" ) )
     self.assertTrue( util.is_boolean( True ) )
     self.assertTrue( util.is_integer( 1234 ) )
     self.assertTrue( util.is_list( [1,2,3,4]) )
     self.assertTrue( util.is_dict( {"foo":"bar"} ) )