def action(args): # for debugging: # pandas.set_option('display.max_columns', None) # pd.set_option('display.max_rows', None) dfs = [] columns = None # to preserve column order for csv in args.csv: df = utils.read_csv(csv, dtype=str, nrows=args.limit, comment='#', na_filter=False, header=None if args.no_header else 0) columns = df.columns dfs.append(df) df = pandas.concat(dfs, ignore_index=True) if not args.stack: if args.on: on = args.on.split(',') if args.no_header: on = map(int, on) df = df.groupby(by=on, sort=False) df = df.tail(1) if args.take_last else df.head(1) else: df = df.drop_duplicates(take_last=args.take_last) df.to_csv(args.out, columns=columns, index=False)
def action(args): # for debugging: # pandas.set_option('display.max_columns', None) # pd.set_option('display.max_rows', None) dfs = [] for csv in args.csv: dfs.append( utils.read_csv(csv, dtype=str, comment=args.comment, na_filter=False, header=None if args.no_header else 0)) if args.on: on = args.on.split(',') if args.no_header: on = map(int, on) else: on = None df = dfs.pop(0) for d in dfs: df = df.merge(d, how=args.how, on=on) df.to_csv(args.out, header=not args.no_header, index=False)
def action(args): # for debugging: # pandas.set_option('display.max_columns', None) # pd.set_option('display.max_rows', None) dfs = [] for csv in args.csv: df = utils.read_csv(csv, dtype=str, nrows=args.limit, sep=args.from_delimiter, comment=args.comment, na_filter=False, header=None if args.no_header else 0) dfs.append(df) df = pandas.concat(dfs, ignore_index=True) if args.header: df.columns = args.header.split(',') df.to_csv(args.out, header=args.header or not args.no_header, sep=args.to_delimiter, index=False)
def action(args): # for debugging: # pandas.set_option('display.max_columns', None) # pd.set_option('display.max_rows', None) dfs = [] for csv in args.csv: df = utils.read_csv( csv, dtype=str, nrows=args.limit, sep=args.from_delimiter, comment=args.comment, na_filter=False, header=None if args.no_header else 0) dfs.append(df) df = pandas.concat(dfs, ignore_index=True) if args.header: df.columns = args.header.split(',') df.to_csv( args.out, header=args.header or not args.no_header, sep=args.to_delimiter, index=False)
def action(args): # for debugging: # pandas.set_option('display.max_columns', None) # pd.set_option('display.max_rows', None) dfs = [] for csv in args.csv: dfs.append(utils.read_csv( csv, dtype=str, comment=args.comment, na_filter=False, header=None if args.no_header else 0)) if args.on: on = args.on.split(',') if args.no_header: on = map(int, on) else: on = None df = dfs.pop(0) for d in dfs: df = df.merge(d, how=args.how, on=on) df.to_csv(args.out, header=not args.no_header, index=False)