def ingest_tsv(filepath): log.info('ingesting %s as tsv file', filepath) save_file_metadata(filepath, status='parsing', filetype='tsv') with open(filepath, 'rU') as fid: reader = csv.reader(fid, delimiter='\t') header = reader.next() log.debug("%d columns: %s", len(header), ", ".join(header)) if len(header) == 0: raise ValueError('header row must contain at least one column') keys = [normalize_column_name(h) for h in header] def parse(row): if len(keys) == len(row): return dict(zip(keys, row)) parsed = [parse(row) for row in reader] parsed = [v for v in parsed if v is not None] header = [{'raw': h, 'key': k} for h, k in itertools.izip(header, keys)] for h in header: data = [p[h['key']] for p in parsed] if all(is_boolean(d) for d in data): h['datatype'] = 'boolean' elif all(is_numeric(d) for d in data): h['datatype'] = 'numeric' else: h['datatype'] = 'string' save_file_metadata(filepath, headers=header) return parsed
def test_neg_booelans( self ): self.assertFalse( util.is_boolean( "True" ) ) self.assertFalse( util.is_boolean( "abcd" ) )
def test_positive_types( self ): self.assertTrue( util.is_string( "abcd" ) ) self.assertTrue( util.is_boolean( True ) ) self.assertTrue( util.is_integer( 1234 ) ) self.assertTrue( util.is_list( [1,2,3,4]) ) self.assertTrue( util.is_dict( {"foo":"bar"} ) )