def test_make_recarray(): # Test make_array # From list / sequence # 2D case data_2d = [(3, 4), (4, 6), (7, 9)] m = F.make_recarray(data_2d, 'wv', [np.float, np.int]) assert_starr_equal(m, fromrecords(data_2d, dtype=[('w', float), ('v', int)])) # 1D vector, sequence and array for data_1d in (range(4), np.arange(4).astype(float)): # Column vector. For array case, drop name dim for shape match assert_starr_equal( F.make_recarray(data_1d, ['f1'], drop_name_dim=True), np.arange(4).astype([('f1', float)])) # Row vector. Drop name dim for shape match assert_starr_equal( F.make_recarray(data_1d, 'abcd', drop_name_dim=True), np.array(tuple(range(4)), dtype=[(c, float) for c in 'abcd'])) # From another recarray, reaming fields m2 = F.make_recarray(m, 'xy') assert_starr_equal(m2, fromrecords(data_2d, dtype=[('x', float), ('y', int)])) # Recarrays don't change shape, trailing dimensions or no assert_starr_equal(F.make_recarray(m2, 'xy'), m2) m2_dash = np.reshape(m2, (3, 1, 1, 1)) assert_starr_equal(F.make_recarray(m2_dash, 'xy'), m2_dash) # From an array, drop dim case arr = np.array(data_2d) assert_equal(arr.shape, (3, 2)) assert_starr_equal(F.make_recarray(arr, 'xy', drop_name_dim=True), fromrecords(data_2d, dtype=[('x', int), ('y', int)])) assert_starr_equal( F.make_recarray(arr.astype(float), 'xy', drop_name_dim=True), fromrecords(data_2d, dtype=[('x', float), ('y', float)])) assert_starr_equal( F.make_recarray(arr.reshape((3, 1, 2)), 'xy', drop_name_dim=True), fromrecords(data_2d, dtype=[('x', int), ('y', int)]).reshape((3, 1))) # Not drop dim case, trailing length 1 axis. assert_starr_equal( F.make_recarray(arr, 'xy', drop_name_dim=False), fromrecords(data_2d, dtype=[('x', int), ('y', int)]).reshape((3, 1))) assert_starr_equal( F.make_recarray(arr.reshape((3, 1, 2)), 'xy', drop_name_dim=False), fromrecords(data_2d, dtype=[('x', int), ('y', int)]).reshape( (3, 1, 1))) # False case is the default, with warning (for now) with catch_warnings(record=True) as warn_list: # Clear any pre-existing warnings cached in formula module, to make # sure warning is triggered.. See # nibabel.testing.clear_and_catch_warnings for detail. if hasattr(F, '__warningregistry__'): F.__warningregistry__.clear() simplefilter('always') assert_starr_equal( F.make_recarray(arr, 'xy'), fromrecords(data_2d, dtype=[('x', int), ('y', int)]).reshape( (3, 1))) assert_equal(warn_list[0].category, VisibleDeprecationWarning) # Can't pass dtypes to array version of function assert_raises(ValueError, F.make_recarray, arr, 'xy', [int, float])
def opt(products): time_start = time.time() recs = [] model = MACD() for product in products: logger.info('Start analysis of %s', product.code) model.product = product recs += opt_single(model) time_end = time.time() recs = fromrecords(recs, names=['code', 'nfast', 'nslow', 'nmacd', 'revenue']) rec2csv(recs, join(CONFIG['out_p'], 'opt.csv')) pst = time_end - time_start logger.info(('Total %s para combinations computed, ' 'roughly %.3f seconds, %.3f ms per calc.'), len(recs), pst, pst / len(recs) * 1000) return recs
def read_text(filename): """ Read in .TXT filename and return a numpy record array. """ parser = csv_parse.csv_parse(names, types) infile = csv.reader(open(filename, "r"), delimiter=" ", skipinitialspace=True) # skip three header lines, use the next one for i in range(4): line = infile.next() data = [] try: while line: data.append(parser.convert2types(line)) line = infile.next() except StopIteration: # we reached the end of the file pass result = rec.fromrecords(data, names=names, formats=parser._formats, shape=len(data)) if "1X" in filename: # 'x' and 'y' are flipped with respect to each other (handled below), # and one of them is also 'flipped' about the axis. result["x"] = -result["x"] return result
def load_array(data_file, col_name_row=3, header_rows=4, time_slice_col=0, time_slice='', record_sep='\t'): """ Returns a records array (from numpy.core.records) data_file = text file with the data. One row per data record col_name_row = the row number that has the column names to be matched to col_name, with the row numbers starting at 1 header_rows = the number of header rows before the data starts time_slice_col = the column number that has the time slice data record_sep = the character(s) that separate the data items in the data record """ data = open(data_file) data_list = [] header_names = [] for i in range(col_name_row - 1): # advance to row with column names data.readline() col_names = data.readline().split(record_sep) for name in col_names: # replace white space in name with '_' to avoid errors later name = re.sub(r'\s+', r'_', name) header_names.append(name) for i in range(header_rows - col_name_row): # advance to firs row with data line = data.readline() # advance to start of desired data if it is specified try: if time_slice[0]: data_items = data.readline().split(record_sep) while data_items[time_slice_col] < time_slice[0]: data_items = data.readline().split(record_sep) except IndexError: pass while True: data_items = data.readline().split(record_sep) if data_items[0] == "": # return data_list, header_names return npr.fromrecords(data_list, names=header_names) # check to see if time is past end of desired data if it is specified try: if time_slice[1]: try: if data_items[time_slice_col] > time_slice[1]: # return data_list, header_names return npr.fromrecords(data_list, names=header_names) except IndexError: # return data_list, header_names return npr.fromrecords(data_list, names=header_names) except IndexError: pass for n, item in enumerate(data_items): try: data_items[n] = float(item) except ValueError: # pass if data_items[n] == '': data_items[n] = 0.0 data_list.append(data_items[:-1])
records.fromarrays([[1]], formats=dict( a=1)) # E: Argument "formats" to "fromarrays" has incompatible type records.fromarrays([[1]], names=dict( a=1)) # E: Argument "names" to "fromarrays" has incompatible type records.fromarrays([[1]], titles=dict( a=1)) # E: Argument "titles" to "fromarrays" has incompatible type records.fromarrays( [[1]], aligned=1) # E: Argument "aligned" to "fromarrays" has incompatible type records.fromarrays( [[1]], byteorder=1 ) # E: Argument "byteorder" to "fromarrays" has incompatible type # Testing various incompatible args for fromrecords records.fromrecords( dict(a=1) ) # E: Argument 1 to "fromrecords" has incompatible type "Dict[str, int]" records.fromrecords(datetime( 1970, 1, 1)) # E: Argument 1 to "fromrecords" has incompatible type "datetime" records.fromrecords([(1, )], dtype=dict( a=1)) # E: Argument "dtype" to "fromrecords" has incompatible type records.fromrecords([(1, )], formats=dict( a=1)) # E: Argument "formats" to "fromrecords" has incompatible type records.fromrecords([(1, )], names=dict( a=1)) # E: Argument "names" to "fromrecords" has incompatible type records.fromrecords([(1, )], titles=dict( a=1)) # E: Argument "titles" to "fromrecords" has incompatible type records.fromrecords( [(1, )],
def clean_file_qso(dataFile): print "\t Beginning to clean file: " + dataFile data = pyfits.open(dataFile)[1].data indices = np.array(range(len(data))) searchIDs = list(set(data.objid)) print "Number of objids: %i. Number of indices: %i" % (len(searchIDs), len(indices)) speczgood = ((data.ZBEST[:, 0] > 0) | (data.ZBEST[:, 7] > 0) | (data.ZBEST[:, 2] > 0) | (data.ZBEST[:, 3] > 0) | (data.ZBEST[:, 4] > 0) | (data.ZBEST[:, 5] > 0) | (data.ZBEST[:, 6] > 0) | (data.ZBEST[:, 8] > 0) | (data.ZBEST[:, 11] > 0) | (data.ZBEST[:, 12] > 0) | (data.ZBEST[:, 13] > 0)) fluxgood = ((data.PSFFLUX_CLEAN[:, 0] != -9999.0) & (data.PSFFLUX_CLEAN[:, 1] != -9999.0) & (data.PSFFLUX_CLEAN[:, 2] != -9999.0) & (data.PSFFLUX_CLEAN[:, 3] != -9999.0) & (data.PSFFLUX_CLEAN[:, 4] != -9999.0)) data2 = data[speczgood & fluxgood] indices = np.array(range(len(data2))) searchIDs = list(set(data2.objid)) print "Number of objids with good zs: %i. Number of indices with good zs: %i" % ( len(searchIDs), len(indices)) bad_objids = [] good_objids = [] psfMagAll_cleaned = [] psfMag_u = [] psfMag_g = [] psfMag_r = [] psfMag_i = [] psfMag_z = [] count = 0. for sid in searchIDs: count += 1. bad_object = False x = data2[data2.objid == sid] if DEBUG == True: print x psfMagList = [] for i in xrange(len(x)): if i == 0: try: coadd_u = x.coadd_u[i] except ValueError: coadd_u = np.nan try: coadd_g = x.coadd_g[i] except ValueError: coadd_g = np.nan try: coadd_r = x.coadd_r[i] except ValueError: coadd_r = np.nan try: coadd_i = x.coadd_i[i] except ValueError: coadd_i = np.nan try: coadd_z = x.coadd_z[i] except ValueError: coadd_z = np.nan psfMag_u.append(x.u[i]) psfMag_g.append(x.g[i]) psfMag_r.append(x.r[i]) psfMag_i.append(x.i[i]) psfMag_z.append(x.z[i]) psfMag = np.array([x.u[i], x.g[i], x.r[i], x.i[i], x.z[i]]) psfMagList.append(psfMag.tolist()) psfMag = rec.fromrecords(psfMagList, names=','.join(SDSS_FILTERS)) if (min(psfMag.u) > coadd_u) or (max(psfMag.u) < coadd_u) or (coadd_u > 27.0): if (x.objid[i] in bad_objids) == False: bad_objids.append(x.objid[i]) bad_object = True if (min(psfMag.g) > coadd_g) or (max(psfMag.g) < coadd_g) or (coadd_g > 25.0): if (x.objid[i] in bad_objids) == False: bad_objids.append(x.objid[i]) bad_object = True if (min(psfMag.r) > coadd_r) or (max(psfMag.r) < coadd_r) or (coadd_r > 23.0): if (x.objid[i] in bad_objids) == False: bad_objids.append(x.objid[i]) bad_object = True if (min(psfMag.i) > coadd_i) or (max(psfMag.i) < coadd_i) or (coadd_i > 22.0): if (x.objid[i] in bad_objids) == False: bad_objids.append(x.objid[i]) bad_object = True if (min(psfMag.z) > coadd_z) or (max(psfMag.z) < coadd_z) or (coadd_z > 22.0): if (x.objid[i] in bad_objids) == False: bad_objids.append(x.objid[i]) bad_object = True if bad_object == False: good_objids.append(str(x.objid[i])) print "Number of good objects: %d. Number of bad objects: %d" % ( len(good_objids), len(bad_objids)) print "\t Done cleaning file." return good_objids
def load_array(data_file, col_name_row=3, header_rows=4, time_slice_col=0, time_slice='', record_sep='\t'): """ Returns a records array (from numpy.core.records) data_file = text file with the data. One row per data record col_name_row = the row number that has the column names to be matched to col_name, with the row numbers starting at 1 header_rows = the number of header rows before the data starts time_slice_col = the column number that has the time slice data record_sep = the character(s) that separate the data items in the data record """ DATA = open(data_file) data_list = [] header_names = [] for i in range(col_name_row - 1): # advance to row with column names DATA.readline() col_names = DATA.readline().split(record_sep) for name in col_names: # replace white space in name with '_' to avoid errors later name = re.sub(r'\s+',r'_',name) header_names.append(name) for i in range(header_rows - col_name_row): # advance to firs row with data line = DATA.readline() # advance to start of desired data if it is specified try: if time_slice[0]: data_items = DATA.readline().split(record_sep) while data_items[time_slice_col] < time_slice[0]: data_items = DATA.readline().split(record_sep) except IndexError: # print "error 0" pass while 1: data_items = DATA.readline().split(record_sep) if data_items[0]=="": # return data_list, header_names return NR.fromrecords(data_list, names=header_names) # check to see if time is past end of desired data if it is specified try: if time_slice[1]: try: if data_items[time_slice_col] > time_slice[1]: # return data_list, header_names return NR.fromrecords(data_list, names=header_names) except IndexError: # return data_list, header_names return NR.fromrecords(data_list, names=header_names) except IndexError: pass for n, item in enumerate(data_items): try: data_items[n] = float(item) except ValueError: # pass if data_items[n] == '': data_items[n] = 0.0 data_list.append(data_items[:-1])