def test_commented_header(self): "Check that names can be retrieved even if the line is commented out." data = StringIO(""" #gender age weight M 21 72.100000 F 35 58.330000 M 33 21.99 """) # The # is part of the first name and should be deleted automatically. test = iopro.genfromtxt(data, names=True, dtype=None) ctrl = np.array([('M', 21, 72.1), ('F', 35, 58.33), ('M', 33, 21.99)], # JNB: changed test because iopro defaults to object string # instead of fixed length string, and unsigned long int # instead of int. dtype=[('gender', 'O'), ('age', 'u8'), ('weight', 'f8')]) #dtype=[('gender', '|S1'), ('age', int), ('weight', float)]) assert_equal(test, ctrl) # Ditto, but we should get rid of the first element data = StringIO(""" # gender age weight M 21 72.100000 F 35 58.330000 M 33 21.99 """) test = iopro.genfromtxt(data, names=True, dtype=None) assert_equal(test, ctrl)
def test_usecols_with_named_columns(self): "Test usecols with named columns" ctrl = np.array([(1, 3), (4, 6)], dtype=[('a', float), ('c', float)]) data = "1 2 3\n4 5 6" kwargs = dict(names="a, b, c") test = iopro.genfromtxt(StringIO(data), usecols=(0, -1), **kwargs) assert_equal(test, ctrl) test = iopro.genfromtxt(StringIO(data), usecols=('a', 'c'), **kwargs) assert_equal(test, ctrl)
def test_usecols_as_css(self): "Test giving usecols with a comma-separated string" data = "1 2 3\n4 5 6" test = iopro.genfromtxt(StringIO(data), names="a, b, c", usecols="a, c") ctrl = np.array([(1, 3), (4, 6)], dtype=[(_, float) for _ in "ac"]) assert_equal(test, ctrl)
def test_with_masked_column_uniform(self): "Test masked column" data = StringIO('1 2 3\n4 5 6\n') test = iopro.genfromtxt(data, dtype=None, missing_values='2,5', usemask=True) control = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[0, 1, 0], [0, 1, 0]]) assert_equal(test, control)
def test_gft_using_generator(self): def count(): for i in range(10): yield "%d" % i res = iopro.genfromtxt(count()) assert_array_equal(res, np.arange(10))
def test_user_filling_values(self): "Test with missing and filling values" ctrl = np.array([(0, 3), (4, -999)], dtype=[('a', int), ('b', int)]) data = "N/A, 2, 3\n4, ,???" kwargs = dict(delimiter=",", dtype=int, names="a,b,c", missing_values={0:"N/A", 'b':" ", 2:"???"}, filling_values={0:0, 'b':0, 2:-999}) test = iopro.genfromtxt(StringIO(data), **kwargs) ctrl = np.array([(0, 2, 3), (4, 0, -999)], dtype=[(_, int) for _ in "abc"]) assert_equal(test, ctrl) test = iopro.genfromtxt(StringIO(data), usecols=(0, -1), **kwargs) ctrl = np.array([(0, 3), (4, -999)], dtype=[(_, int) for _ in "ac"]) assert_equal(test, ctrl)
def test_userconverters_with_explicit_dtype(self): "Test user_converters w/ explicit (standard) dtype" data = StringIO('skip,skip,2001-01-01,1.0,skip') test = iopro.genfromtxt(data, delimiter=",", names=None, dtype=float, usecols=(2, 3), converters={2: bytes}) control = np.array([('2001-01-01', 1.)], dtype=[('', '|S10'), ('', float)]) assert_equal(test, control)
def test_tricky_converter_bug1666(self): "Test some corner case" assert_equal(True, False) s = StringIO('q1,2\nq3,4') cnv = lambda s:float(s[1:]) test = iopro.genfromtxt(s, delimiter=',', converters={0:cnv}) control = np.array([[1., 2.], [3., 4.]]) assert_equal(test, control)
def test_names_auto_completion(self): "Make sure that names are properly completed" data = "1 2 3\n 4 5 6" test = iopro.genfromtxt(StringIO(data), dtype=(int, float, int), names="a") ctrl = np.array([(1, 2, 3), (4, 5, 6)], dtype=[('a', int), ('f1', float), ('f2', int)]) assert_equal(test, ctrl)
def test_missing_with_tabs(self): "Test w/ a delimiter tab" txt = "1\t2\t3\n\t2\t\n1\t\t3" test = iopro.genfromtxt(StringIO(txt), delimiter="\t", usemask=True,) ctrl_d = np.array([(1, 2, 3), (np.nan, 2, np.nan), (1, np.nan, 3)],) ctrl_m = np.array([(0, 0, 0), (1, 0, 1), (0, 1, 0)], dtype=bool) assert_equal(test.data, ctrl_d) assert_equal(test.mask, ctrl_m)
def test_with_masked_column_various(self): "Test masked column" data = StringIO('True 2 3\nFalse 5 6\n') test = iopro.genfromtxt(data, dtype=None, missing_values='2,5', usemask=True) control = ma.array([(1, 2, 3), (0, 5, 6)], mask=[(0, 1, 0), (0, 1, 0)], dtype=[('f0', bool), ('f1', bool), ('f2', int)]) assert_equal(test, control)
def test_names_with_usecols_bug1636(self): "Make sure we pick up the right names w/ usecols" data = "A,B,C,D,E\n0,1,2,3,4\n0,1,2,3,4\n0,1,2,3,4" ctrl_names = ("A", "C", "E") test = iopro.genfromtxt(StringIO(data), dtype=(int, int, int), delimiter=",", usecols=(0, 2, 4), names=True) assert_equal(test.dtype.names, ctrl_names) # test = iopro.genfromtxt(StringIO(data), dtype=(int, int, int), delimiter=",", usecols=("A", "C", "E"), names=True) assert_equal(test.dtype.names, ctrl_names) # test = iopro.genfromtxt(StringIO(data), dtype=int, delimiter=",", usecols=("A", "C", "E"), names=True) assert_equal(test.dtype.names, ctrl_names)
def test_skip_footer(self): data = ["# %i" % i for i in range(1, 6)] data.append("A, B, C") data.extend(["%i,%3.1f,%03s" % (i, i, i) for i in range(51)]) data[-1] = "99,99" kwargs = dict(delimiter=",", names=True, skip_header=5, skip_footer=10) test = iopro.genfromtxt(StringIO("\n".join(data)), **kwargs) ctrl = np.array([("%f" % i, "%f" % i, "%f" % i) for i in range(41)], dtype=[(_, float) for _ in "ABC"]) assert_equal(test, ctrl)
def test_empty_file(self): "Test that an empty file raises the proper warning." warn_ctx = WarningManager() warn_ctx.__enter__() try: warnings.filterwarnings("ignore", message="genfromtxt: Empty input file:") data = StringIO() test = iopro.genfromtxt(data) assert_equal(test, np.array([])) finally: warn_ctx.__exit__()
def test_skip_footer_with_invalid(self): warn_ctx = WarningManager() warn_ctx.__enter__() try: basestr = '1 1\n2 2\n3 3\n4 4\n5 \n6 \n7 \n' warnings.filterwarnings("ignore") # Footer too small to get rid of all invalid values assert_raises(ValueError, iopro.genfromtxt, StringIO(basestr), skip_footer=1) a = iopro.genfromtxt(StringIO(basestr), skip_footer=1, invalid_raise=False) assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])) # a = iopro.genfromtxt(StringIO(basestr), skip_footer=3) assert_equal(a, np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])) # basestr = '1 1\n2 \n3 3\n4 4\n5 \n6 6\n7 7\n' a = iopro.genfromtxt(StringIO(basestr), skip_footer=1, invalid_raise=False) assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.], [6., 6.]])) a = iopro.genfromtxt(StringIO(basestr), skip_footer=3, invalid_raise=False) assert_equal(a, np.array([[1., 1.], [3., 3.], [4., 4.]])) finally: warn_ctx.__exit__()
def test_replace_space(self): "Test the 'replace_space' option" txt = "A.A, B (B), C:C\n1, 2, 3.14" # Test default: replace ' ' by '_' and delete non-alphanum chars test = iopro.genfromtxt(StringIO(txt), delimiter=",", names=True, dtype=None) ctrl_dtype = [("AA", int), ("B_B", int), ("CC", float)] ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) assert_equal(test, ctrl) # Test: no replace, no delete test = iopro.genfromtxt(StringIO(txt), delimiter=",", names=True, dtype=None, replace_space='', deletechars='') ctrl_dtype = [("A.A", int), ("B (B)", int), ("C:C", float)] ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) assert_equal(test, ctrl) # Test: no delete (spaces are replaced by _) test = iopro.genfromtxt(StringIO(txt), delimiter=",", names=True, dtype=None, deletechars='') ctrl_dtype = [("A.A", int), ("B_(B)", int), ("C:C", float)] ctrl = np.array((1, 2, 3.14), dtype=ctrl_dtype) assert_equal(test, ctrl)
def test_dtype_with_object(self): "Test using an explicit dtype with an object" assert_equal(True, False) from datetime import date import time data = """ 1; 2001-01-01 2; 2002-01-31 """ ndtype = [('idx', int), ('code', np.object)] func = lambda s: strptime(s.strip(), "%Y-%m-%d") converters = {1: func} test = iopro.genfromtxt(StringIO(data), delimiter=";", dtype=ndtype, converters=converters) control = np.array([(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))], dtype=ndtype) assert_equal(test, control) # ndtype = [('nest', [('idx', int), ('code', np.object)])] try: test = iopro.genfromtxt(StringIO(data), delimiter=";", dtype=ndtype, converters=converters) except NotImplementedError: errmsg = "Nested dtype involving objects should be supported." raise AssertionError(errmsg)
def test_gft_using_filename(self): # Test that we can load data from a filename as well as a file object wanted = np.arange(6).reshape((2,3)) if sys.version_info[0] >= 3: # python 3k is known to fail for '\r' linesep = ('\n', '\r\n') else: linesep = ('\n', '\r\n', '\r') for sep in linesep: data = '0 1 2' + sep + '3 4 5' f, name = mkstemp() # We can't use NamedTemporaryFile on windows, because we cannot # reopen the file. try: os.write(f, asbytes(data)) assert_array_equal(iopro.genfromtxt(name), wanted) finally: os.close(f) os.unlink(name)
def test_iopro(): # this is kind of stupid right now because it does a copy, # but Tight IOPro integration will be a priority... h = Heap() s = StringIO(','.join(letters)) data = iopro.genfromtxt(s, dtype='c', delimiter=",") addr, block = allocate_numpy(h, data.dtype, data.shape) block[:] = data[:] assert not block.flags['OWNDATA'] assert block.ctypes.data == addr assert len(h._arenas) == 1 assert block.nbytes < h._lengths[0] finalize(h)
def test_usecols_with_integer(self): "Test usecols with an integer" test = iopro.genfromtxt(StringIO("1 2 3\n4 5 6"), usecols=0) assert_equal(test, np.array([1., 4.]))
import iopro import numpy as np a_0p28125 = iopro.genfromtxt('cached_kde.ada.0p28125.csv') a_0p50 = iopro.genfromtxt('cached_kde.ada.0p5.csv') N, D = a_0p50.shape #a = np.hstack((a0[:,0:2], a5[:,2:])) #np.savetxt("cached_kde.ada.hack1.csv", a, delimiter=" ") #a = np.hstack((a5[:,0:2], a0[:,2:])) #np.savetxt("cached_kde.ada.hack2.csv", a, delimiter=" ") #a = np.hstack((a5[:,0].reshape((N, 1)), a0[:,1].reshape((N,1)), a5[:,2:])) #np.savetxt("cached_kde.ada.hack3.csv", a, delimiter=" ") # !! # mildly adapting c0? #a = np.hstack((a_0p0[:,0].reshape((N, 1)), # a_0p50[:,1].reshape((N, 1)), # a_0p50[:,2].reshape((N, 1)), # a_0p50[:,3].reshape((N, 1)), # a_0p50[:,4].reshape((N, 1)))) #np.savetxt("cached_kde.ada.hack4.csv", a, delimiter=" ") # over-adaptation = bad #a = np.hstack((a5[:,0].reshape((N, 1)), # a5[:,1].reshape((N, 1)), # a10[:,2].reshape((N, 1)), # a5[:,3].reshape((N, 1)), # a5[:,4].reshape((N, 1))))
parser.add_argument('--zero_abs_tol', type=float, default=1e-15, help='whether to bootstrap the data. ') parser.add_argument('cache_dir', type=str, help='directory holding the cache files. ') parser.add_argument('n_components', type=int, help='number of components. ') parser.add_argument('alphas', type=str, nargs='*', help='sensitiviy parameters.') args = parser.parse_args() # assemble cached data based on sensitivity list print '+ assembling cached records. ' start = time.time() cache_fname = args.cache_dir + '/' + args.alphas[0] + args.cache_suffix arr = genfromtxt(cache_fname) if len(args.alphas) == 1: pass else: if len(args.alphas) != args.n_components: raise RuntimeError('alphas must have either 1 or the same number of ' 'arguments as what is specified in n_components. ') for i in range(1, args.n_components): curr_cache_fname = args.cache_dir + '/' + args.alphas[i] + args.cache_suffix curr_arr = genfromtxt(curr_cache_fname) arr[:,i] = curr_arr[:,i] N, D = arr.shape
def plot_kde2d_summary( data2d_fname,kde2d_fname, kde1d_x_fname, kde1d_y_fname, title=None, xlabel=None, ylabel=None, draw_scatter=True, scatter_color='grey', scatter_marker='o', scatter_undersample=1.0, scatter_s=20, scatter_alpha=0.5, contour_nlevels=10, data1d_x_bins=100, data1d_y_bins=100, figsize=(12,12), xlim=None, ylim=None): # define axes dimensions left, width = 0.1, 0.65 bottom, height = 0.1, 0.65 bottom_h = left_h = left + width + 0.02 rect_c = [left, bottom, width, height] rect_x = [left, bottom_h, width, 0.18] rect_y = [left_h, bottom, 0.2, height] # create plot area fig = plt.figure(1, figsize=figsize) ax_c = plt.axes(rect_c) ax_x = plt.axes(rect_x) ax_y = plt.axes(rect_y) nullfmt = NullFormatter() ax_x.xaxis.set_major_formatter(nullfmt) ax_y.yaxis.set_major_formatter(nullfmt) # draw center contour plot plot_contour2d(kde2d_fname, ax=ax_c, nlevels=contour_nlevels, colorbar=False, xlabel=xlabel, ylabel=ylabel) # draw center scatter plot if draw_scatter: plot_data2d(data2d_fname, ax=ax_c, color=scatter_color, marker=scatter_marker, undersample=scatter_undersample, s=scatter_s, alpha=scatter_alpha) # draw marginal densities plot_data1d(kde1d_x_fname, ax=ax_x, xlabel=' ') plot_data1d(kde1d_y_fname, ax=ax_y, xlabel=' ', orientation='horizontal') # draw marginal histograms arr = iopro.genfromtxt(data2d_fname) plot_data1d_histogram(arr[:,0], weights=arr[:,2], bins=data1d_x_bins, normed=True, ax=ax_x, xlabel=' '); plot_data1d_histogram(arr[:,1], weights=arr[:,2], bins=data1d_y_bins, normed=True, ax=ax_y, xlabel=' ', orientation='horizontal'); ax_c.set_xlim(xlim) ax_c.set_ylim(ylim) ax_x.set_xlim(ax_c.get_xlim()) ax_y.set_ylim(ax_c.get_ylim()) if title: fig.suptitle(title, fontsize=20) return fig, ax_c, ax_x, ax_y
writer = csv.writer(csv_file, delimiter=',', lineterminator='\n') writer.writerow(['ImageId', 'Label']) for i, p in enumerate(y_pred): writer.writerow([i + 1, p]) #------------------------------------------ # MAIN ENTRY POINT #------------------------------------------ if __name__ == "__main__": # Misura il tempo per le operazioni principali start = time.time() # Fase 1: Training # Read CSV from Numpy, Link: # https://docs.scipy.org/doc/numpy/reference/generated/numpy.genfromtxt.html import iopro my_data = iopro.genfromtxt('Projects/MINST/train.csv', delimiter=',', skip_header=1) print('Reading time:', time.time() - start) start = time.time() ann = ANN(len(my_data[0]), 10, 10) ann.Learn(my_data) print('Learning time:', time.time() - start, '- size:', len(my_data))
if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('nbag', type=int, help='Number of trials to bag/fit.') parser.add_argument('sample_fname', type=str, help='Path to the data sample cached kde score.') parser.add_argument('output_fname', type=str, help='Path to store output.') parser.add_argument('--obj_scale', type=float, default=1e-8, help='Scale factor to apply to objective function.') args = parser.parse_args() # Read in cached KDE evalutions of the data sample to fit p_raw = genfromtxt(args.sample_fname) N, D = p_raw.shape # Open the file to write results fout = open(args.output_fname, 'w') for i in range(args.nbag): sys.stdout.write('Bag iteration {0}:\n\n'.format(i+1)) sys.stdout.flush() # Create a bagged sample. p = p_raw[np.random.choice(N, N)] # Specify the objective function and its derivatives def F(x=None, z=None):
import sys import math import numpy as np import cvxopt as cvx from iopro import genfromtxt #p = genfromtxt('cached_kde/cached_kde.ada.0p28125.csv') p = genfromtxt('cached_kde/cached_kde.ada.hack10.csv') N, D = p.shape I = np.random.choice(N, N) p = p[I] s = 1e-8 # Truth proportions: # 0.004597168626601867 # 0.009011011183443553 # 0.41817119150694954 # 0.07374540140202024 # 0.49265798277959727 def F(x=None, z=None): if x is None: return 0, cvx.matrix(0.2, (D, 1)) x_arr = np.array(x.trans()).reshape(-1) arg = np.dot(p, x_arr) if np.min(arg) <= 0.0:
def plot_kde2d_summary(data2d_fname, kde2d_fname, kde1d_x_fname, kde1d_y_fname, title=None, xlabel=None, ylabel=None, draw_scatter=True, scatter_color='grey', scatter_marker='o', scatter_undersample=1.0, scatter_s=20, scatter_alpha=0.5, contour_nlevels=10, data1d_x_bins=100, data1d_y_bins=100, figsize=(12, 12), xlim=None, ylim=None): # define axes dimensions left, width = 0.1, 0.65 bottom, height = 0.1, 0.65 bottom_h = left_h = left + width + 0.02 rect_c = [left, bottom, width, height] rect_x = [left, bottom_h, width, 0.18] rect_y = [left_h, bottom, 0.2, height] # create plot area fig = plt.figure(1, figsize=figsize) ax_c = plt.axes(rect_c) ax_x = plt.axes(rect_x) ax_y = plt.axes(rect_y) nullfmt = NullFormatter() ax_x.xaxis.set_major_formatter(nullfmt) ax_y.yaxis.set_major_formatter(nullfmt) # draw center contour plot plot_contour2d(kde2d_fname, ax=ax_c, nlevels=contour_nlevels, colorbar=False, xlabel=xlabel, ylabel=ylabel) # draw center scatter plot if draw_scatter: plot_data2d(data2d_fname, ax=ax_c, color=scatter_color, marker=scatter_marker, undersample=scatter_undersample, s=scatter_s, alpha=scatter_alpha) # draw marginal densities plot_data1d(kde1d_x_fname, ax=ax_x, xlabel=' ') plot_data1d(kde1d_y_fname, ax=ax_y, xlabel=' ', orientation='horizontal') # draw marginal histograms arr = iopro.genfromtxt(data2d_fname) plot_data1d_histogram(arr[:, 0], weights=arr[:, 2], bins=data1d_x_bins, normed=True, ax=ax_x, xlabel=' ') plot_data1d_histogram(arr[:, 1], weights=arr[:, 2], bins=data1d_y_bins, normed=True, ax=ax_y, xlabel=' ', orientation='horizontal') ax_c.set_xlim(xlim) ax_c.set_ylim(ylim) ax_x.set_xlim(ax_c.get_xlim()) ax_y.set_ylim(ax_c.get_ylim()) if title: fig.suptitle(title, fontsize=20) return fig, ax_c, ax_x, ax_y
import sys import math import numpy as np import cvxopt as cvx from iopro import genfromtxt #p = genfromtxt('cached_kde/cached_kde.ada.0p28125.csv') p = genfromtxt('cached_kde/cached_kde.ada.hack10.csv') N, D = p.shape I = np.random.choice(N, N) p = p[I] s = 1e-8 # Truth proportions: # 0.004597168626601867 # 0.009011011183443553 # 0.41817119150694954 # 0.07374540140202024 # 0.49265798277959727 def F(x=None, z=None): if x is None: return 0, cvx.matrix(0.2, (D, 1)) x_arr = np.array(x.trans()).reshape(-1) arg = np.dot(p, x_arr) if np.min(arg) <= 0.0: return None
help='directory holding the cache files. ') parser.add_argument('n_components', type=int, help='number of components. ') parser.add_argument('alphas', type=str, nargs='*', help='sensitiviy parameters.') args = parser.parse_args() # assemble cached data based on sensitivity list print '+ assembling cached records. ' start = time.time() cache_fname = args.cache_dir + '/' + args.alphas[0] + args.cache_suffix arr = genfromtxt(cache_fname) if len(args.alphas) == 1: pass else: if len(args.alphas) != args.n_components: raise RuntimeError( 'alphas must have either 1 or the same number of ' 'arguments as what is specified in n_components. ') for i in range(1, args.n_components): curr_cache_fname = args.cache_dir + '/' + args.alphas[ i] + args.cache_suffix curr_arr = genfromtxt(curr_cache_fname) arr[:, i] = curr_arr[:, i]
def test_integer_delimiter(self): "Test using an integer for delimiter" data = " 1 2 3\n 4 5 67\n890123 4" test = iopro.genfromtxt(StringIO(data), delimiter=3, dtype=int) control = np.array([[1, 2, 3], [4, 5, 67], [890, 123, 4]]) assert_equal(test, control)
import argparse parser = argparse.ArgumentParser() parser.add_argument('nbag', type=int, help='Number of trials to bag/fit.') parser.add_argument('sample_fname', type=str, help='Path to the data sample cached kde score.') parser.add_argument('output_fname', type=str, help='Path to store output.') parser.add_argument('--obj_scale', type=float, default=1e-8, help='Scale factor to apply to objective function.') args = parser.parse_args() # Read in cached KDE evalutions of the data sample to fit p_raw = genfromtxt(args.sample_fname) N, D = p_raw.shape # Open the file to write results fout = open(args.output_fname, 'w') for i in range(args.nbag): sys.stdout.write('Bag iteration {0}:\n\n'.format(i + 1)) sys.stdout.flush() # Create a bagged sample. p = p_raw[np.random.choice(N, N)] # Solve sol = perform_mle(p, args.obj_scale)