def _dataconvert(args): # What is the type of input file? if args.format: intype = args.format else: intype = guess_type(args.inpath) # What is the type of output file? outtype = guess_type(args.outpath) # If outtype is ARFF then we need to guess field-types. # Thus we overwrite the args.guess_types to True. if outtype == arff.MIMETYPE: args.guess_types = True if is_url_path(args.inpath): instream = urllib2.urlopen(args.inpath) else: instream = open(args.inpath) # tsv_types = ['tsv', 'text/tsv', 'text/tab-separated-values'] if intype in ['text/csv', 'csv']: records, metadata = dcsv.parse(instream, guess_types=args.guess_types) elif intype in [ 'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'xls' ]: import dataconverters.xls excel_type = 'xls' if intype == 'application/vnd.ms-excel' else 'xlsx' records, metadata = dataconverters.xls.parse( instream, excel_type=excel_type, sheet=args.sheet, guess_types=args.guess_types, encoding=args.encoding) else: raise ValueError( 'No support for reading file type %s - support for csv or xls only at present' % intype) if args.outpath.startswith('_.'): outstream = sys.stdout else: outstream = open(args.outpath, 'w') if (args.records): records = itertools.islice(records, int(args.records)) if outtype == 'text/csv': dcsv.write(outstream, records, metadata) elif outtype == 'application/json': import dataconverters.jsondata as js js.write(outstream, records, metadata) elif outtype == arff.MIMETYPE: arff.write(outstream, records, metadata) else: raise ValueError('Only support writing to csv and json at present')
def _dataconvert(args): # What is the type of input file? if args.format: intype = args.format else: intype = guess_type(args.inpath) # What is the type of output file? outtype = guess_type(args.outpath) # If outtype is ARFF then we need to guess field-types. # Thus we overwrite the args.guess_types to True. if outtype == arff.MIMETYPE: args.guess_types = True if is_url_path(args.inpath): instream = urllib2.urlopen(args.inpath) else: instream = open(args.inpath) tsv_types = ['tsv', 'text/tsv', 'text/tab-separated-values'] if intype in ['text/csv', 'csv'] + tsv_types: records, metadata = dcsv.parse(instream, guess_types=args.guess_types) elif intype in ['application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'xls' ]: import dataconverters.xls excel_type = 'xls' if intype == 'application/vnd.ms-excel' else 'xlsx' records, metadata = dataconverters.xls.parse(instream, excel_type=excel_type, sheet=args.sheet, guess_types=args.guess_types, encoding=args.encoding ) else: raise ValueError( 'No support for reading file type %s - support for csv or xls only at present' % intype) if args.outpath.startswith('_.'): outstream = sys.stdout else: outstream = open(args.outpath, 'w') if (args.records): records = itertools.islice(records, int(args.records)) if outtype == 'text/csv': dcsv.write(outstream, records, metadata) elif outtype == 'application/json': import dataconverters.jsondata as js js.write(outstream, records, metadata) elif outtype == arff.MIMETYPE: arff.write(outstream, records, metadata) else: raise ValueError('Only support writing to csv and json at present')
def test_csv_from_ressource(): """ download file from ressource """ url ="https://ckannet-storage.commondatastorage.googleapis.com/2013-05-02T185247/Valeurs_ajoutees_par_branches_dactivites_aux_prix_constants_de_1999_en_milliards_FCFA).csv" instream =urllib.urlopen(url) records, metadata = dcsv.parse(instream, guess_types=True #guess_types=args.guess_types) ) outstream = open("some_json.json", 'w') js.write(outstream, records, metadata)
def main(): parser = argparse.ArgumentParser(description=\ '''Convert data between formats. Supported formats: Input: csv, tsv, excel (xls, xlsx). Output: csv, json Examples ======== dataconvert https://github.com/okfn/dataconverters/raw/master/testdata/xls/simple.xls out.csv Help ==== ''', epilog=\ '''Copyright Open Knowledge Foundation 2007-2013. Licensed under the MIT license. Part of the DataConverters project: https://github.com/okfn/dataconverters''', formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('inpath', metavar='inpath', type=str, help='in file path or url') parser.add_argument('outpath', metavar='outpath', type=str, help='out file path to write to (use underscore "_" as filename to indicate stdout e.g. _.csv or _.json)') parser.add_argument('--no-guess-types', dest='guess_types', action='store_false', help='''Disable type-guessing (where it is used e.g. with CSVs). Type guessing may significantly affect performance''', default=True ) parser.add_argument('--sheet', metavar='NUM', help='''Index of sheet in spreadsheet to convert (index starts at 1)''', default=1 ) parser.add_argument('--records', metavar='NUM', help='''Only convert a maximum of NUM records''' ) args = parser.parse_args() intype = guess_type(args.inpath) outtype = guess_type(args.outpath) if is_url_path(args.inpath): instream = urllib2.urlopen(args.inpath) else: instream = open(args.inpath) # tsv_types = ['tsv', 'text/tsv', 'text/tab-separated-values'] if intype == 'text/csv': records, metadata = dcsv.parse(instream, guess_types=args.guess_types) elif intype in ['application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ]: import dataconverters.xls excel_type = 'xls' if intype == 'application/vnd.ms-excel' else 'xlsx' records, metadata = dataconverters.xls.parse(instream, excel_type=excel_type, sheet=args.sheet, guess_types=args.guess_types) else: raise ValueError( 'No support for reading file type %s - support for csv or xls only at present' % intype) if args.outpath.startswith('_.'): outstream = sys.stdout else: outstream = open(args.outpath, 'w') if (args.records): records = itertools.islice(records, int(args.records)) if outtype == 'text/csv': dcsv.write(outstream, records, metadata) elif outtype == 'application/json': import dataconverters.jsondata as js js.write(outstream, records, metadata) else: print 'Only support writing to csv and json at present'
def main(): parser = argparse.ArgumentParser(description=\ '''Convert data between formats. Supported formats: Input: csv, tsv, excel (xls, xlsx). Output: csv, json Examples ======== dataconvert https://github.com/okfn/dataconverters/raw/master/testdata/xls/simple.xls out.csv Help ==== ''', epilog=\ '''Copyright Open Knowledge Foundation 2007-2013. Licensed under the MIT license. Part of the DataConverters project: https://github.com/okfn/dataconverters''', formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('inpath', metavar='inpath', type=str, help='in file path or url') parser.add_argument('outpath', metavar='outpath', type=str, help='out file path to write to (use underscore "_" as filename to indicate stdout e.g. _.csv or _.json)') parser.add_argument('--no-guess-types', dest='guess_types', action='store_false', help='''Disable type-guessing (where it is used e.g. with CSVs). Type guessing may significantly affect performance''', default=True ) parser.add_argument('--sheet', metavar='NUM', help='''Index of sheet in spreadsheet to convert (index starts at 1)''', default=1 ) parser.add_argument('--records', metavar='NUM', help='''Only convert a maximum of NUM records''', default=1 ) args = parser.parse_args() intype = guess_type(args.inpath) outtype = guess_type(args.outpath) if is_url_path(args.inpath): instream = urllib2.urlopen(args.inpath) else: instream = open(args.inpath) # tsv_types = ['tsv', 'text/tsv', 'text/tab-separated-values'] if intype == 'text/csv': records, metadata = dcsv.parse(instream, guess_types=args.guess_types) elif intype in ['application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ]: import dataconverters.xls excel_type = 'xls' if intype == 'application/vnd.ms-excel' else 'xlsx' records, metadata = dataconverters.xls.parse(instream, excel_type=excel_type, sheet=args.sheet, guess_types=args.guess_types) else: raise ValueError( 'No support for reading file type %s - support for csv or xls only at present' % intype) if args.outpath.startswith('_.'): outstream = sys.stdout else: outstream = open(args.outpath, 'w') if (args.records): records = itertools.islice(records, int(args.records)) if outtype == 'text/csv': dcsv.write(outstream, records, metadata) elif outtype == 'application/json': import dataconverters.jsondata as js js.write(outstream, records, metadata) else: print 'Only support writing to csv and json at present'