def _dataconvert(args): # What is the type of input file? if args.format: intype = args.format else: intype = guess_type(args.inpath) # What is the type of output file? outtype = guess_type(args.outpath) # If outtype is ARFF then we need to guess field-types. # Thus we overwrite the args.guess_types to True. if outtype == arff.MIMETYPE: args.guess_types = True if is_url_path(args.inpath): instream = urllib2.urlopen(args.inpath) else: instream = open(args.inpath) # tsv_types = ['tsv', 'text/tsv', 'text/tab-separated-values'] if intype in ['text/csv', 'csv']: records, metadata = dcsv.parse(instream, guess_types=args.guess_types) elif intype in [ 'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'xls' ]: import dataconverters.xls excel_type = 'xls' if intype == 'application/vnd.ms-excel' else 'xlsx' records, metadata = dataconverters.xls.parse( instream, excel_type=excel_type, sheet=args.sheet, guess_types=args.guess_types, encoding=args.encoding) else: raise ValueError( 'No support for reading file type %s - support for csv or xls only at present' % intype) if args.outpath.startswith('_.'): outstream = sys.stdout else: outstream = open(args.outpath, 'w') if (args.records): records = itertools.islice(records, int(args.records)) if outtype == 'text/csv': dcsv.write(outstream, records, metadata) elif outtype == 'application/json': import dataconverters.jsondata as js js.write(outstream, records, metadata) elif outtype == arff.MIMETYPE: arff.write(outstream, records, metadata) else: raise ValueError('Only support writing to csv and json at present')
def _dataconvert(args): # What is the type of input file? if args.format: intype = args.format else: intype = guess_type(args.inpath) # What is the type of output file? outtype = guess_type(args.outpath) # If outtype is ARFF then we need to guess field-types. # Thus we overwrite the args.guess_types to True. if outtype == arff.MIMETYPE: args.guess_types = True if is_url_path(args.inpath): instream = urllib2.urlopen(args.inpath) else: instream = open(args.inpath) tsv_types = ['tsv', 'text/tsv', 'text/tab-separated-values'] if intype in ['text/csv', 'csv'] + tsv_types: records, metadata = dcsv.parse(instream, guess_types=args.guess_types) elif intype in ['application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'xls' ]: import dataconverters.xls excel_type = 'xls' if intype == 'application/vnd.ms-excel' else 'xlsx' records, metadata = dataconverters.xls.parse(instream, excel_type=excel_type, sheet=args.sheet, guess_types=args.guess_types, encoding=args.encoding ) else: raise ValueError( 'No support for reading file type %s - support for csv or xls only at present' % intype) if args.outpath.startswith('_.'): outstream = sys.stdout else: outstream = open(args.outpath, 'w') if (args.records): records = itertools.islice(records, int(args.records)) if outtype == 'text/csv': dcsv.write(outstream, records, metadata) elif outtype == 'application/json': import dataconverters.jsondata as js js.write(outstream, records, metadata) elif outtype == arff.MIMETYPE: arff.write(outstream, records, metadata) else: raise ValueError('Only support writing to csv and json at present')
def test_1(self): metadata = {"fields": [{"type": "Integer", "id": u"temperature"}, {"type": "String", "id": u"place"}]} records = [ {"place": u"Cairo", "temperature": 32}, {"place": u"Alexandria", "temperature": 22}, {"place": u"Aswan", "temperature": 42}, ] desired_results = """@RELATION testdataset\n @ATTRIBUTE temperature NUMERIC @ATTRIBUTE place STRING\n @DATA\n32,'Cairo' 22,'Alexandria' 42,'Aswan' """ out = StringIO() arff.write(out, records, metadata, dataset_name="testdataset") out.seek(0) result = out.read() assert_equal(result, desired_results)
def test_1(self): metadata = { 'fields': [{ 'type': 'Integer', 'id': u'temperature' }, { 'type': 'String', 'id': u'place' }] } records = [ { 'place': u'Cairo', 'temperature': 32 }, { 'place': u'Alexandria', 'temperature': 22 }, { 'place': u'Aswan', 'temperature': 42 }, ] desired_results = """@RELATION testdataset\n @ATTRIBUTE temperature NUMERIC @ATTRIBUTE place STRING\n @DATA\n32,'Cairo' 22,'Alexandria' 42,'Aswan' """ out = StringIO() arff.write(out, records, metadata, dataset_name='testdataset') out.seek(0) result = out.read() assert_equal(result, desired_results)