def test_fractionate(self): data1 = csv2rec('arch1.csv') data2 = csv2rec('arch2.csv') dl = [data1, data2] fr = fractionate(dl, (10, 10), (5, 5), ['row', 'column']) self.assertTrue(fr[0]['row'][3] == 5) self.assertTrue(fr[1]['column'][2] == 0)
def _make(self, output_file, basin_poly, ba_csv, fa_ncons_csv, area_csv, arid_thresh=0.03, use_thresh=0.012, **kwargs): print "loading data" ba = np.genfromtxt(ba_csv,np.double,skip_header=1,delimiter=',') area_arr = mlab.csv2rec(area_csv) nc_arr = mlab.csv2rec(fa_ncons_csv) ids = ba[:,0] mean_ba = np.mean(ba[:,1:],1) ncons = gen_merge.arrange_vector_by_ids(nc_arr["ncons"],nc_arr["basinid"],ids).astype(np.double) area = gen_merge.arrange_vector_by_ids(area_arr["f_area"],area_arr["basinid"],ids).astype(np.double) wri = ncons/mean_ba miscmask = (ncons/area<use_thresh)*(mean_ba/area<arid_thresh) wri_s = self.score(wri) wri_s[miscmask] = MINSCORE wri_cat = self.categorize(wri_s, miscmask) joinarray = np.rec.fromarrays((ba[:,0],mean_ba,ncons,wri,wri_s,wri_cat),names=(BASIN_ID_FIELD,"BA","FA_NCONS",self.plot_field_name,"%s_s" % self.plot_field_name,"%s_cat" % self.plot_field_name)) print "joining data" ap.CopyFeatures_management(basin_poly,output_file) ap.da.ExtendTable(output_file,BASIN_ID_FIELD,joinarray,BASIN_ID_FIELD)
def test_merge_formatted(self): data1 = csv2rec('arch1.csv') data2 = csv2rec('arch2.csv') dl = [data1, data2] merged = merge_formatted(dl) self.assertTrue(sum(merged['rew']) == 2) self.assertTrue(sum(merged['column']) == 12)
def test_sanity(): from nipy.modalities.fmri.fmristat.tests import FIACdesigns """ Single subject fitting of FIAC model """ # Based on file # subj3_evt_fonc1.txt # subj3_bloc_fonc3.txt for subj, run, dtype in [(3, 1, "event"), (3, 3, "block")]: nvol = 191 TR = 2.5 Tstart = 1.25 volume_times = np.arange(nvol) * TR + Tstart volume_times_rec = formula.make_recarray(volume_times, "t") path_dict = {"subj": subj, "run": run} if exists(pjoin(DATADIR, "fiac_%(subj)02d", "block", "initial_%(run)02d.csv") % path_dict): path_dict["design"] = "block" else: path_dict["design"] = "event" experiment = csv2rec(pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "experiment_%(run)02d.csv") % path_dict) initial = csv2rec(pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "initial_%(run)02d.csv") % path_dict) X_exper, cons_exper = design.event_design(experiment, volume_times_rec, hrfs=delay.spectral) X_initial, _ = design.event_design(initial, volume_times_rec, hrfs=[hrf.glover]) X, cons = design.stack_designs((X_exper, cons_exper), (X_initial, {})) Xf = np.loadtxt(StringIO(FIACdesigns.designs[dtype])) for i in range(X.shape[1]): yield nitest.assert_true, (matchcol(X[:, i], Xf.T)[1] > 0.999)
def extract_lai_fpar(above_par_dat, below_par_dat): above_par_ra = mlab.csv2rec(above_par_dat) below_par_ra = mlab.csv2rec(below_par_dat) points_ra = mlab.csv2rec('lonlat_threet.csv') plot = below_par_ra['plot'] date = below_par_ra['date'] below_par = below_par_ra['par'] lats = np.array(points_ra['latitude'].tolist()*2) lons = np.array(points_ra['longitude'].tolist()*2) above_par = [] fapar = [] for time in enumerate(date): par_idx = find_nearest_idx(above_par_ra['date'], time[1]) above_par.append(np.mean((above_par_ra['par'][par_idx-1], above_par_ra['par'][par_idx], above_par_ra['par'][par_idx+1]))) if above_par_ra['par'][par_idx] < below_par[time[0]]: fapar.append(0) else: fapar.append((above_par_ra['par'][par_idx] - below_par[time[0]]) / above_par_ra['par'][par_idx]) above_par = np.array(above_par) fapar = np.array(fapar) newra = np.column_stack((date, plot, lats, lons, above_par, below_par, fapar)) new_ra = np.core.records.fromarrays(newra.transpose(), dtype=[('date', 'object'), ('plot', 'i'), ('lat', 'f'), ('lon', 'f'), ('above_par', 'f'), ('below_par', 'f'), ('fapar', 'f')]) return new_ra
def test_transform_data(): """ Testing the transformation of the data from raw data to functions used for fitting a function. """ # We start with actual data. We test here just that reading the data in # different ways ultimately generates the same arrays. from matplotlib import mlab ortho = mlab.csv2rec(op.join(data_path, 'ortho.csv')) para = mlab.csv2rec(op.join(data_path, 'para.csv')) x1, y1, n1 = sb.transform_data(ortho) x2, y2, n2 = sb.transform_data(op.join(data_path, 'ortho.csv')) npt.assert_equal(x1, x2) npt.assert_equal(y1, y2) # We can also be a bit more critical, by testing with data that we # generate, and should produce a particular answer: my_data = pd.DataFrame( np.array([[0.1, 2], [0.1, 1], [0.2, 2], [0.2, 2], [0.3, 1], [0.3, 1]]), columns=['contrast1', 'answer']) my_x, my_y, my_n = sb.transform_data(my_data) npt.assert_equal(my_x, np.array([0.1, 0.2, 0.3])) npt.assert_equal(my_y, np.array([0.5, 0, 1.0])) npt.assert_equal(my_n, np.array([2, 2, 2]))
def makediffs(models = _allmodels, verbose = False, kpp = True): for model in models: model = os.path.splitext(os.path.basename(model))[0] if kpp: kppdat = csv2rec(os.path.join(model, model + '.dat'), delimiter = ' ') else: if model not in _modelconfigs: raise IOError('If KPP is not properly installed, you cannot run tests on mechanisms other than cbm4, saprc99, and small_strato.') kppdat = csv2rec(os.path.join(os.path.dirname(__file__), model + '.dat'), delimiter = ' ') pykppdat = csv2rec(os.path.join(model, model + '.pykpp.dat'), delimiter = ',') diff = pykppdat.copy() pct = pykppdat.copy() keys = set(kppdat.dtype.names).intersection(pykppdat.dtype.names) notkeys = set(pykppdat.dtype.names).difference(kppdat.dtype.names) notkeys.remove('t') for k in notkeys: diff[k] = np.nan pct[k] = np.nan for k in keys: diff[k] = pykppdat[k] - kppdat[k][:] pct[k] = diff[k] / kppdat[k][:] * 100 diff['t'] = pykppdat['t'] - (kppdat['time'] * 3600. + pykppdat['t'][0]) pct['t'] = diff['t'] / (kppdat['time'] * 3600. + pykppdat['t'][0]) * 100 rec2csv(diff, os.path.join(model, model + '.diff.csv'), delimiter = ',') rec2csv(pct, os.path.join(model, model + '.pct.csv'), delimiter = ',')
def get_experiment_initial(path_dict): """Get the record arrays for the experimental/initial designs. Parameters ---------- path_dict : dict containing key 'rootdir', 'run', 'subj' Returns ------- experiment, initial : Two record arrays. """ # The following two lines read in the .csv files # and return recarrays, with fields # experiment: ['time', 'sentence', 'speaker'] # initial: ['time', 'initial'] rootdir = path_dict['rootdir'] if not exists(pjoin(rootdir, "experiment_%(run)02d.csv") % path_dict): e = "can't find design for subject=%(subj)d,run=%(subj)d" % path_dict raise IOError(e) experiment = csv2rec(pjoin(rootdir, "experiment_%(run)02d.csv") % path_dict) initial = csv2rec(pjoin(rootdir, "initial_%(run)02d.csv") % path_dict) return experiment, initial
def _make(self, output_file, basin_poly, ba_csv, withdrawal_csv, consumption_csv, area_csv, arid_thresh=0.03, use_thresh=0.012, **kwargs): print "loading data" ba = np.genfromtxt(ba_csv,np.double,skip_header=1,delimiter=',') area_arr = mlab.csv2rec(area_csv) ut_arr = mlab.csv2rec(withdrawal_csv) ct_arr = mlab.csv2rec(consumption_csv) ids = ba[:,0] mean_ba = np.mean(ba[:,1:],1) ut = gen_merge.arrange_vector_by_ids(ut_arr["ut"],ut_arr["basinid"],ids).astype(np.double) uc = gen_merge.arrange_vector_by_ids(ct_arr["ct"],ct_arr["basinid"],ids).astype(np.double) area = gen_merge.arrange_vector_by_ids(area_arr["f_area"],area_arr["basinid"],ids).astype(np.double) bws = ut/mean_ba miscmask = (ut/area<use_thresh)*(mean_ba/area<arid_thresh) #miscmask2 = (ut/area[:,1]<use_thresh)*(mean_ba/area[:,1]<arid_thresh)*(bws<.8) bws_s = self.score(bws) bws_s[miscmask] = MAXSCORE bws_cat = self.categorize(bws_s, miscmask) joinarray = np.rec.fromarrays((ba[:,0],mean_ba,ut,uc,bws,bws_s,bws_cat,area),names=(BASIN_ID_FIELD,"BA","WITHDRAWAL","CONSUMPTION",self.plot_field_name,"%s_s" % self.plot_field_name,"%s_cat" % self.plot_field_name,"AREAM3")) print "joining data" ap.CopyFeatures_management(basin_poly,output_file) ap.da.ExtendTable(output_file,BASIN_ID_FIELD,joinarray,BASIN_ID_FIELD)
def rewrite_spec(subj, run, root = "/home/jtaylo/FIAC-HBM2009"): """ Take a FIAC specification file and get two specifications (experiment, begin). This creates two new .csv files, one for the experimental conditions, the other for the "initial" confounding trials that are to be modelled out. For the block design, the "initial" trials are the first trials of each block. For the event designs, the "initial" trials are made up of just the first trial. """ if exists(pjoin("%(root)s", "fiac%(subj)d", "subj%(subj)d_evt_fonc%(run)d.txt") % {'root':root, 'subj':subj, 'run':run}): designtype = 'evt' else: designtype = 'bloc' # Fix the format of the specification so it is # more in the form of a 2-way ANOVA eventdict = {1:'SSt_SSp', 2:'SSt_DSp', 3:'DSt_SSp', 4:'DSt_DSp'} s = StringIO() w = csv.writer(s) w.writerow(['time', 'sentence', 'speaker']) specfile = pjoin("%(root)s", "fiac%(subj)d", "subj%(subj)d_%(design)s_fonc%(run)d.txt") % {'root':root, 'subj':subj, 'run':run, 'design':designtype} d = np.loadtxt(specfile) for row in d: w.writerow([row[0]] + eventdict[row[1]].split('_')) s.seek(0) d = csv2rec(s) # Now, take care of the 'begin' event # This is due to the FIAC design if designtype == 'evt': b = np.array([(d[0]['time'], 1)], np.dtype([('time', np.float), ('initial', np.int)])) d = d[1:] else: k = np.equal(np.arange(d.shape[0]) % 6, 0) b = np.array([(tt, 1) for tt in d[k]['time']], np.dtype([('time', np.float), ('initial', np.int)])) d = d[~k] designtype = {'bloc':'block', 'evt':'event'}[designtype] fname = pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "experiment_%(run)02d.csv") % {'root':root, 'subj':subj, 'run':run, 'design':designtype} rec2csv(d, fname) experiment = csv2rec(fname) fname = pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "initial_%(run)02d.csv") % {'root':root, 'subj':subj, 'run':run, 'design':designtype} rec2csv(b, fname) initial = csv2rec(fname) return d, b
def test_format_dense(self): data1 = csv2rec('arch1.csv') data2 = csv2rec('arch2.csv') dl = [data1, data2] form = format_dense(dl, 3, (4,4)) self.assertTrue(np.all(form[0]['count'][:4] == np.array([1,1,3,3]))) self.assertTrue(np.all(form[1]['count'] == np.array([1,1,3,3,1,1,5,1])))
def append_rec(recs): base = mlab.csv2rec(recs[0]["file"]) for nw in recs[1:]: append = mlab.csv2rec(nw["file"]) for k,v in append.dtype.fields.iteritems(): base = mlab.recs_join("sys_tick",k,[base,append],missing=0) return base
def test_add_data_fields(self): data1 = csv2rec('arch1.csv') data2 = csv2rec('arch2.csv') dl = [data1, data2] alt_data = add_data_fields(dl, {'year': (1998, 2002)}) self.assertTrue(np.all(alt_data[0]['year'] == '1998')) self.assertTrue(np.all(alt_data[1]['year'] == '2002')) alt_data = add_data_fields(dl, {'year' : (1998, 2002), 'why': ('h', 'a')}) self.assertTrue(np.all(alt_data[0]['why'] == 'h'))
def plotGraphs(): global gDateStr, gTimeStr print "Plotting..." print "temperatures" filename = "./data/" + gDateStr + "_temperatures.csv"; r = mlab.csv2rec(filename, delimiter=',') fig = Figure(figsize=(6,6)) canvas = FigureCanvas(fig) ax = fig.add_subplot(111) ax.set_title('Temperatures '+gDateStr,fontsize=14) ax.set_xlabel('Time',fontsize=6) ax.set_ylabel('Temperature (C)',fontsize=6) ax.grid(True,linestyle='-',color='0.75') # run two sanitize passes over the data r[r.dtype.names[1]] = arrayops.sanitize( r[r.dtype.names[1]] ) r[r.dtype.names[2]] = arrayops.sanitize( r[r.dtype.names[2]] ) # Generate the plot. ax.plot(r[r.dtype.names[0]],r[r.dtype.names[1]],color='tomato'); ax.plot(r[r.dtype.names[0]],r[r.dtype.names[2]],color='green'); # plot pump on times print "pump on" filename = "./data/" + gDateStr + "_pumpON.csv"; if os.path.exists(filename): r = mlab.csv2rec(filename, delimiter=',') ax.scatter(r[r.dtype.names[0]],r[r.dtype.names[1]],color='orange'); # plot pump off times print "pump off" filename = "./data/" + gDateStr + "_pumpOFF.csv"; if os.path.exists(filename): r = mlab.csv2rec(filename, delimiter=',') ax.scatter(r[r.dtype.names[0]],r[r.dtype.names[1]],color='blue'); for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(6) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(6) ax.set_ylim(-5, 35) # Save the generated Plot to a PNG file. filename = "/var/www/Prometheus/data/"+gDateStr+"_temperatures.png" canvas.print_figure(filename,dpi=100) os.system('ln -sf '+filename+' /var/www/Prometheus/data/current_temperatures.png')
def main(): inputlist = ["bin/global_BWS_20121015.csv","bin/global_WRI_20121015.csv"] lhs = mlab.csv2rec("bin/global_GU_20121015.csv") rhslist = [] for x in inputlist: rhslist.append(mlab.csv2rec(x)) rhslist[0]["basinid"] = rhslist[0]["basinid"].astype(np.long) keys = ("basinid","countryid","id") lhs = join_recs_on_keys(lhs,rhslist,keys) mlab.rec2csv(lhs,"bin/test.csv") print "complete"
def test_sanity(): from nipy.modalities.fmri import design, hrf import nipy.modalities.fmri.fmristat.hrf as fshrf from nipy.modalities.fmri.fmristat.tests import FIACdesigns from nipy.modalities.fmri.fmristat.tests.test_FIAC import matchcol from nipy.algorithms.statistics import formula from nose.tools import assert_true """ Single subject fitting of FIAC model """ # Based on file # subj3_evt_fonc1.txt # subj3_bloc_fonc3.txt for subj, run, design_type in [(3, 1, 'event'), (3, 3, 'block')]: nvol = 191 TR = 2.5 Tstart = 1.25 volume_times = np.arange(nvol)*TR + Tstart volume_times_rec = formula.make_recarray(volume_times, 't') path_dict = {'subj':subj, 'run':run} if exists(pjoin(DATADIR, "fiac_%(subj)02d", "block", "initial_%(run)02d.csv") % path_dict): path_dict['design'] = 'block' else: path_dict['design'] = 'event' experiment = csv2rec(pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "experiment_%(run)02d.csv") % path_dict) initial = csv2rec(pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "initial_%(run)02d.csv") % path_dict) X_exper, cons_exper = design.event_design(experiment, volume_times_rec, hrfs=fshrf.spectral) X_initial, _ = design.event_design(initial, volume_times_rec, hrfs=[hrf.glover]) X, cons = design.stack_designs((X_exper, cons_exper), (X_initial, {})) # Get original fmristat design Xf = FIACdesigns.fmristat[design_type] # Check our new design can be closely matched to the original for i in range(X.shape[1]): # Columns can be very well correlated negatively or positively assert_true(abs(matchcol(X[:,i], Xf)[1]) > 0.999)
def test_event_design(): block = csv2rec(StringIO(altdescr["block"])) event = csv2rec(StringIO(altdescr["event"])) t = np.arange(191) * 2.5 + 1.25 bkeep = np.not_equal((np.arange(block.time.shape[0])) % 6, 0) ekeep = np.greater(np.arange(event.time.shape[0]), 0) # Even though there is a FIAC block experiment # the design is represented as an event design # with the same event repeated several times in a row... Xblock, cblock = design.event_design(block[bkeep], t, hrfs=delay.spectral) Xevent, cevent = design.event_design(event[ekeep], t, hrfs=delay.spectral)
def get_data(file_name): file_read = file(file_name,'r') l = file_read.readline() p = {} #This will hold the params l = file_read.readline() data_rec = [] if l=='': return p,l,data_rec while l[0]=='#': try: p[l[1:l.find(':')-1]]=float(l[l.find(':')+1:l.find('\n')]) #Not all the parameters can be cast as float (the task and the #subject): except: p[l[2:l.find(':')-1]]=l[l.find(':')+1:l.find('\n')] l = file_read.readline() try: data_rec = csv2rec(file_name) except ValueError: p = [] return p,l,data_rec
def scatter_from_csv(self, filename, sand = 'sand', silt = 'silt', clay = 'clay', diameter = '', hue = '', tags = '', **kwargs): """Loads data from filename (expects csv format). Needs one header row with at least the columns {sand, silt, clay}. Can also plot two more variables for each point; specify the header value for columns to be plotted as diameter, hue. Can also add a text tag offset from each point; specify the header value for those tags. Note! text values (header entries, tag values ) need to be quoted to be recognized as text. """ fh = file(filename, 'rU') soilrec = csv2rec(fh) count = 0 if (sand in soilrec.dtype.names): count = count + 1 if (silt in soilrec.dtype.names): count = count + 1 if (clay in soilrec.dtype.names): count = count + 1 if (count < 3): print "ERROR: need columns for sand, silt and clay identified in ', filename" locargs = {'s': None, 'c': None} for (col, key) in ((diameter, 's'), (hue, 'c')): col = col.lower() if (col != '') and (col in soilrec.dtype.names): locargs[key] = soilrec.field(col) else: print 'ERROR: did not find ', col, 'in ', filename for k in kwargs: locargs[k] = kwargs[k] values = zip(*[soilrec.field(sand), soilrec.field(clay), soilrec.field(silt)]) print values (xs, ys) = self._toCart(values) p.scatter(xs, ys, label='_', **locargs) if (tags != ''): tags = tags.lower() for (x, y, tag) in zip(*[xs, ys, soilrec.field(tags)]): print x, print y, print tag p.text(x + 1, y + 1, tag, fontsize=12) fh.close()
def __init__(self, path): lines = open(path).read().split('\n') header = lines[3].split() nlay, nspc = list(map(int, header[:2])) sigmas = list(map(float, header[2:])) nsigmas = len(sigmas) date, time = list(map(int, lines[4].split())) starts = [5 + i + i * nspc for i in range(4)] ends = [s + 1 + nspc for s in starts] keys = [lines[s].strip().lower() for s in starts] fieldnames = ('name',) + tuple(['s%f' % i for i in sigmas]) self.data = dict([(k, csv2rec(StringIO(u'\n'.join(lines[s+1:e])), delimiter = ' ', names = fieldnames, converterd = dict(names = lambda x: str(x).strip()))) for k, s, e in zip(keys, starts, ends)]) self._profile_spcs = np.char.strip(self.data[keys[0]]['name']) data_type = self.data[keys[0]].dtype data_shape = self.data[keys[0]].shape ks = keys[1:] for k in ks: try: assert((np.char.strip(self.data[k]['name']) == self._profile_spcs).all()) assert(self.data[k].dtype == data_type) assert(self.data[k].dtype == data_type) except AssertionError: raise IOError('File is corrupt or inconsistent') self._prof_spc = ['NO2', 'NO', 'O3P', 'O3', 'NO3', 'N2O5', 'HNO3', 'O1D', 'HO', 'HONO', 'HO2', 'CO', 'HNO4', 'H2O2', 'SO2', 'SULF', 'MO2', 'HCHO', 'OP1', 'OP2', 'ONIT', 'KET', 'ACO3', 'PAN', 'PAA', 'ORA2', 'TPAN', 'ALD', 'ORA1', 'GLY', 'MGLY', 'CSL', 'MACR', 'MVK', 'ISOPROD', 'DCB', 'OL2', 'ISO', 'TERP', 'ETH', 'HC3', 'HC3', 'HC5', 'HC8', 'TOL', 'XYL', 'XYL', 'XYL', 'OLT', 'OLI', 'BENZENE', 'HG', 'HGIIGAS', 'CO2'] self._prof_dict = dict([(k, []) for k in self._prof_spc])
def csv2sql(database, table, comments="#", delimiter=","): """ The main method that will take in the csv file and create a database and a table. """ global csv_file # create the database and cursor con = sqlite3.connect(database) cur = con.cursor() # load in the data data = csv2rec(csv_file, comments=comments, delimiter=delimiter) # make the table create_table_command = "CREATE TABLE {0} (".format(table) for n, t in zip(data.dtype.names, data[0]): create_table_command += "{0} {1}, ".format(n, get_data_type(t)) create_table_command += ")" cur.execute(create_table_command) # insert the rows of data into the table. for row in data: cur.execute( "INSERT INTO {0} VALUES {1}".format(table, str(row))) # Save (commit) the database con.commit() # close the connection to the database con.close()
def add_time_interpolated_from_csv(path, timekey, incr=600): names = map(lambda x: x.strip(), file(path).read().split("\n")[0].split(",")) data = csv2rec(path) datadict = dict([(newkey, data[k]) for k, newkey in zip(data.dtype.names, names)]) time = datadict.pop(timekey) add_time_interpolated(time=time, incr=incr, **datadict)
def getData(self): self.dataLoaded = 1 fh = finance.fetch_historical_yahoo(self.name, self.startdate, self.enddate) self.data = mlab.csv2rec(fh); fh.close() self.data.sort()
def load_csv(self, fname): def clean_csv(): print "Reading csv from file %s" % (fname) reader = csv.reader(open(fname, 'rb')) cleaned_fname = "/tmp/lc-%s.csv" % (random.random()) print "Cleaning csv file using python csv library, writing new file to %s" % (cleaned_fname) writer = csv.writer(open(cleaned_fname, 'wb')) for i, row in enumerate(reader): # skip first 2 rows if i < 2: continue if len(self.csv_columns) == len(row): writer.writerow(row) else: print "\tError row %d, line contents:\"%s\"" % (i, ", ".join(row)) return cleaned_fname cleaned_fname = clean_csv() converterd = {'interest_rate': fieldparsers.strip_non_numeric_and_parse, 'loan_length': fieldparsers.strip_non_numeric_and_parse, 'employment_length': fieldparsers.parse_employment_years, 'debt_to_income_ratio': fieldparsers.strip_non_numeric_and_parse, 'revolving_line_utilization': fieldparsers.strip_non_numeric_and_parse, 'status': fieldparsers.parse_status } print "Loading csv via mlab" self.data = mlab.csv2rec(cleaned_fname, skiprows=2, converterd=converterd, names=self.csv_columns) subprocess.call(["rm", "-rf", cleaned_fname]) print "Done."
def open_dense_data(filenames, direct, delim=','): ''' This function takes in a list of dense data file names, opens them and returns them as list of rec arrays. Parameters ---------- filenames : list A list of filenames direct : string The directory within data/archival/ where the files are. Example 'ANBO_2010' or 'LBRI' delim : string The default file delimiter is ',' Returns ------- : list A list of rec arrays ''' assert direct.find('/') == -1, "%s should not contain a '/'" % (direct) filedir = jp(pd(pd(gcwd())), 'archival', direct) datayears = [] for name in filenames: data = plt.csv2rec(jp(filedir, name), delimiter=delim) datayears.append(data) return datayears
def replace_vals(filename, replace, delim=','): ''' Replace the values in filename with specified values in replace_values Parameters ---------- filename : string Will be read into a rec array replace_values : tuple First object is value to replace and second object is what to replace it with ''' data = csv2rec(filename, delimiter=delim, missing=replace[0]) for nm in data.dtype.names: try: # Missing float isNaN = (np.isnan(data[nm])) except: isNaN = np.zeros(len(data[nm]), dtype=bool) isBlank = np.array([it == '' for it in data[nm]]) isMinusOne = (data[nm] == -1)# Missing int # Missing other isNone = np.array([i == None for i in data[nm]]) ind = np.bitwise_or(isNaN, isBlank) ind = np.bitwise_or(ind, isMinusOne) ind = np.bitwise_or(ind, isNone) data[nm][ind] = replace[1] return data
def data_from_csv2rec(infile): """Uses matplotlib.mlab csv2rec to parse data trys to cast fildes into correct data-type datrec.dtype to see datat-types and names """ datrec = csv2rec(infile) return datrec
def __init__(self, strip=None): absdir = os.path.dirname(os.path.abspath(__file__)) self.data = csv2rec(os.path.join(absdir, '..', 'data', 'pearson_lee.csv')) self.M = self.data['mother'] self.D = self.data['daughter'] self.strip = strip
def test_csv2rec_roundtrip(self): delta = datetime.timedelta(days=1) date0 = datetime.date(2007,12,16) date1 = date0 + delta date2 = date1 + delta delta = datetime.timedelta(days=1) datetime0 = datetime.datetime(2007,12,16,22,29,34,924122) datetime1 = datetime0 + delta datetime2 = datetime1 + delta ra=numpy.rec.fromrecords([ (123, date0, datetime0, 1197346475.0137341, 'a,bc'), (456, date1, datetime1, 123.456, 'd\'ef'), (789, date2, datetime2, 0.000000001, 'ghi'), ], names='intdata,datedata,datetimedata,floatdata,stringdata') fh = StringIO.StringIO() mlab.rec2csv( ra, fh ) fh.seek(0) if 0: print 'CSV contents:','-'*40 print fh.read() print '-'*40 fh.seek(0) ra2 = mlab.csv2rec(fh) fh.close() for name in ra.dtype.names: if 0: print name, repr(ra[name]), repr(ra2[name]) dt = ra.dtype[name] print 'repr(dt.type)',repr(dt.type) self.failUnless( numpy.all(ra[name] == ra2[name]) ) # should not fail with numpy 1.0.5
def shift(self): """ Print the expected gain/loss of each party. """ nincum = np.zeros(3) #dem,gop,ind nnew = np.zeros(3) #dem,gop,ind rec = mlab.csv2rec('senate_polls.csv') states = np.unique(rec.state) for state in states: data = self.polldat(state) data = data[0,1:4] candidates = self.candidates(state) iincum = (np.where(self.partyarr == candidates[3]))[0][0] ileader = np.argmax(data) nnew[ileader] = nnew[ileader] +1 nincum[iincum] = nincum[iincum] + 1 shift = nnew - nincum print 'Expected Shift in Senate Party Balance' print self.partyarr print shift
def extract(data): data_recs = mlab.csv2rec(data) rv = [] for k, v in data_recs.dtype.fields.iteritems(): try: #note that datetime will raise s = data_recs[k].std() if s > 0 or options["boring"]: rv.append(k) except TypeError, e: pass #non number type, e.g. date
def graficoTes(): with open('/home/lucas/PycharmProjects/openCsv/dados_consumo_todos/total.csv', encoding="ISO-8859-1") as fname: gender_degree_data = csv2rec(fname) anos = pd.DataFrame(gender_degree_data, columns=['municipio', 'ibge', 'latitude', 'longitude', 'mwh', 'ano']) for index, row in anos.iterrows(): # if(len(str(row["mwh"]))>6): # cells = str(row["mwh"]) # for rank, c in cells: row["mwh"] = int(row["mwh"]) # if(rank!=6): # cells[rank] = '.' # cells[rank]=c grouped = anos.groupby('municipio') print(grouped) anossort = anos.sort_values(by='ano', ascending=True) anostype = anossort[['mwh', 'ano']].astype(float) ax = anostype.plot(x='ano', y='mwh', style='k.') ax.set_xlim(1990, 2016) american = anos['municipio'] == "Agudo" print(anos[american]) select = anos[american] nomes = [] for index, row in anos.iterrows(): nomes.append(row["municipio"]) if (index == 400): break colormap = plt.cm.gist_ncar # nipy_spectral, Set1,Paired colors = [colormap(i) for i in np.linspace(0, 1, len(nomes))] labels = [] plt.xlim([1990, 2015]) plt.ylim([0, 10]) for rank, colunm in enumerate(nomes): selecionados = anos['municipio'] == colunm umframe = anos[selecionados] umframesort = umframe.sort_values(by='ano', ascending=True) umframesort = umframesort[['mwh', 'ano']].astype(int) plt.plot(umframesort['ano'], umframesort['mwh'], 'k', color=colors[rank]) labels.append(colunm) # ax = umframesort.plot(x='ano', y='mwh', style='.-') plt.legend(labels, ncol=4, loc='upper center', bbox_to_anchor=[0.5, 1.1], columnspacing=1.0, labelspacing=0.0, handletextpad=0.0, handlelength=1.5, fancybox=True, shadow=True) plt.show()
def test(): x = csv2rec("lc.dat", delimiter=" ", names=["t", "y", "dy"]) s = selectp(x['t'], x['y'], x['dy'], 21.93784630, dynamic=False, verbose=True) s.select() print s.rez s.plot_best()
def tomdraw_KwithDate(rP,sd,ed): global db_r global rect_K global db_fig db_r=mlab.csv2rec(rP) db_r=db_r[np.where(db_r.time>tt.str2dateYmd(sd))] db_r=db_r[np.where(db_r.time<(tt.str2dateYmd(ed)+tt.one_Day_Delta))] db_r.sort() quote,dif=install_K_Data(db_r) k_h=getK_H(dif) initRect(k_h,'K') draw_K(db_fig,rect_K,quote,0.5,format_date)
def generate_charts_from_csv(csv_path, title=None): # http://matplotlib.org/examples/api/date_index_formatter.html r = mlab.csv2rec(open(csv_path)) # todo fix path deduction path = csv_path.replace('data.csv', '') r.sort() r = r[-30:] # get the last 30 values generate_deal_amount_plot(r, path, title=title) generate_deal_count_plot(r, path, title=title)
def __init__(self, path): lines = open(path).read().split('\n') header = lines[3].split() nlay, nspc = map(int, header[:2]) sigmas = map(float, header[2:]) nsigmas = len(sigmas) try: dateo = datetime.strptime(lines[4].strip(), '%Y-%m-%d') except: date, time = map(int, lines[4].split()) starts = [5] ends = [s + nspc for s in starts] keys = ['all'] fieldnames = ('name', ) + tuple(['s%f' % i for i in sigmas]) data = dict([(k, csv2rec(StringIO('\n'.join(lines[s:e])), delimiter=' ', names=fieldnames, converterd=dict(names=lambda x: str(x).strip()))) for k, s, e in zip(keys, starts, ends)]) profile_spcs = np.char.strip(data[keys[0]]['name']) data_type = data[keys[0]].dtype data_shape = data[keys[0]].shape self.createDimension('sigma', nsigmas) self.createDimension('sigma-mid', nsigmas - 1) self.createDimension('south_east_north_west', 4) self.createVariable('sigma', 'f', ('sigma', ), values=np.array(sigmas), units='sigma') self.createVariable('sigma-mid', 'f', ('sigma-mid', ), values=np.array(sigmas).repeat(2, 0)[1:-1].reshape( -1, 2).mean(1), units='sigma') self.VGLVLS = self.variables['sigma'] self.VGTOP = 5000 ks = keys[1:] for k in ks: try: assert ((np.char.strip(data[k]['name']) == profile_spcs).all()) assert (data[k].dtype == data_type) assert (data[k].dtype == data_type) except AssertionError: raise IOError('File is corrupt or inconsistent') for a in data['all']: self.createVariable(a[0].strip(), 'f', ('sigma-mid', 'south_east_north_west'), units="None", values=np.array( map(lambda x: tuple(x)[1:], [a])).T, long_name=a[0].ljust(16), var_desc=a[0].ljust(16))
def _load_ben_data(fname="LC_246.dat"): """loader for Ben's input files""" from matplotlib.mlab import csv2rec ## Get the photometry name = str(int(fname[fname.find("_")+1:fname.find(".dat")])) c = csv2rec(fname,delimiter=" ",names=["t","m","merr","rrl"]) x0 = c['t'] y = c['m'] dy = c['merr'] return x0,y,dy, name
def test_csv2rec_masks(self): # Make sure masked entries survive roundtrip csv = """date,age,weight,name 2007-01-01,12,32.2,"jdh1" 0000-00-00,0,23,"jdh2" 2007-01-03,,32.5,"jdh3" 2007-01-04,12,NaN,"jdh4" 2007-01-05,-1,NULL,""" missingd = dict(date='0000-00-00', age='-1', weight='NULL') fh = StringIO.StringIO(csv) r1 = mlab.csv2rec(fh, missingd=missingd) fh = StringIO.StringIO() mlab.rec2csv(r1, fh, missingd=missingd) fh.seek(0) r2 = mlab.csv2rec(fh, missingd=missingd) self.failUnless( numpy.all( r2['date'].mask == [0,1,0,0,0] )) self.failUnless( numpy.all( r2['age'].mask == [0,0,1,0,1] )) self.failUnless( numpy.all( r2['weight'].mask == [0,0,0,0,1] )) self.failUnless( numpy.all( r2['name'].mask == [0,0,0,0,1] ))
def readFile(filename): global top_list global num_workloads data = csv2rec(filename, delimiter=' ') headers = data.dtype.names #print headers #-- num of workloads (cols) num_workloads = len(headers) - 1 #-- prepare the headers #-- header_list: {"a", "b\nast", "c", "a", ...} if len(cfg_headers.strip()) == 0: #-- if 'headers' is empty in the cfg file, then read from data file header_list = [] for i in xrange(1, num_workloads + 1, 1): #header_list.extend(["a","b\n"+headers[i],"c"]) for ind in range(NUM_SYMBOLS): if (ind != cluster_label_location): header_list.extend([symbols_list[ind]]) else: header_list.extend( [symbols_list[ind] + "\n" + delim + headers[i]]) else: #-- if 'headers' is provided in the cfg file, then use them cfg_headers_list = cfg_headers.split(',') header_list = [] for i in xrange(1, num_workloads + 1, 1): for ind in range(NUM_SYMBOLS): if (ind != cluster_label_location): header_list.extend([symbols_list[ind]]) else: header_list.extend([ symbols_list[ind] + "\n" + delim + cfg_headers_list[i] ]) top_list.append(header_list) #-- prepare the row data #-- pad '0's, according to the scheme for row in data: #-- row[0]: scheme_case row_list = [] for i in xrange(1, num_workloads + 1, 1): for ind in range( NUM_SCHEMES): #-- add '0's to non_scheme locations if schemes_list[ind] in row[0]: if (ind > 0): for bf in range(ind): row_list.extend([0]) row_list.extend([row[i]]) if (ind + 1 == NUM_SCHEMES): break for bf in range(ind + 1, NUM_SCHEMES): row_list.extend([0]) top_list.append(row_list)
def run(): t1 = time.time() a = mpl.csv2rec('datas.csv') g = ok.Grid(a.x, a.y, a.v) ##plotit(g.grid.x, g.grid.y, g.grid.v, "Initial grid") model = g.fitSermivariogramModel('Exponential', nlag=20) ##model.plot() x, y = g.regularBasicGrid(nx=40, ny=40) pg = g.predictedGrid(x, y, model) ##plotit(pg.grid.x, pg.grid.y, pg.grid.v, "Predicted grid") ##plotit(pg.grid.x, pg.grid.y, pg.grid.e, "Predicted Error grid") t2 = time.time() print("Operation performed in %.2f seconds" % (t2 - t1))
def read_dataset(path, labelcomb=True): data = mlab.csv2rec(path, names=['id', 'labels', 'abstracts'], delimiter='\t') X = data['abstracts'] if labelcomb: Y = data['labels'] else: Y = [labels.split(',') for labels in data['labels']] return X, Y
def get_quote_daily_matplotlib(ticker): startdate = datetime.date(2006, 1, 1) today = enddate = datetime.date.today() #ticker = 'SPY' fh = finance.fetch_historical_yahoo(ticker, startdate, enddate) # a numpy record array with fields: date, open, high, low, close, volume, adj_close r = mlab.csv2rec(fh) fh.close() r.sort() return r
def tomdraw_VA(rP): global db_r global rect_V global rect_A global db_fig db_r=mlab.csv2rec(rP) db_r.sort() quote=install_VA_Data(db_r) indexes,volumes,amounts=[q[0] for q in quote],[q[1] for q in quote],[q[2] for q in quote] initRect(0.8,'V') initRect(0.8,'A') drawFill(db_fig,rect_V,indexes,volumes,format_date,'b') drawFill(db_fig,rect_A,indexes,amounts,format_date,'r')
def LoadRecordsUsingApi(name, sql): apiurl = "https://api.scraperwiki.com/api/1.0/datastore/sqlite" fin = urllib.urlopen( "%s?%s" % (apiurl, urllib.urlencode({ "format": "csv", "name": name, "query": sql }))) csv = StringIO.StringIO() csv.write(fin.read()) csv.seek(0) return mlab.csv2rec(csv)
def get_history_data(self, ticker, start, end): try: fh = finance.fetch_historical_yahoo(ticker, start, end) # a numpy record array with fields: date, open, high, low, close, volume, adj_close) raw = mlab.csv2rec(fh) fh.close() raw.sort() # get today data if end == datetime.date.today(): raw = self.get_realtime_data(ticker, raw) except: return 0 return raw
def test_recarray_csv_roundtrip(): expected = np.recarray((99, ), [('x', np.float), ('y', np.float), ('t', np.float)]) expected['x'][:] = np.linspace(-1e9, -1, 99) expected['y'][:] = np.linspace(1, 1e9, 99) expected['t'][:] = np.linspace(0, 0.01, 99) fd = tempfile.TemporaryFile(suffix='csv') mlab.rec2csv(expected, fd) fd.seek(0) actual = mlab.csv2rec(fd) fd.close() assert np.allclose(expected['x'], actual['x']) assert np.allclose(expected['y'], actual['y']) assert np.allclose(expected['t'], actual['t'])
def graficoScatter(): with open('/home/lucas/PycharmProjects/openCsv/dados_consumo_todos/total.csv', encoding="ISO-8859-1") as fname: gender_degree_data = csv2rec(fname) anos = pd.DataFrame(gender_degree_data, columns=['municipio', 'ibge', 'latitude', 'longitude', 'mwh', 'ano']) for index, row in anos.iterrows(): row["mwh"] = int(row["mwh"]) anossort = anos.sort_values(by='ano', ascending=True) anostype = anossort[['mwh', 'ano']].astype(float) ax =anostype.plot(x='ano', y='mwh',style='k.') ax.set_xlim(1990,2016) plt.show()
def test_recarray_csv_roundtrip(): expected = np.recarray((99, ), [('x', np.float), ('y', np.float), ('t', np.float)]) expected['x'][0] = 1 expected['y'][1] = 2 expected['t'][2] = 3 fd = tempfile.TemporaryFile(suffix='csv') mlab.rec2csv(expected, fd) fd.seek(0) actual = mlab.csv2rec(fd) fd.close() assert np.allclose(expected['x'], actual['x']) assert np.allclose(expected['y'], actual['y']) assert np.allclose(expected['t'], actual['t'])
def extract_s2_mod_obs_508(point='high', sitenml='site.nml', bnd_indx=(3,7)): field_dir = '/export/cloud/nceo/users/if910917/sentinel_data/field_data/munich/field_508/' state_name = field_dir+'mni_state_field_508_'+point+'.csv' s2_name = field_dir+'mni_s2_508_'+point+'.csv' s2_arr = mlab.csv2rec(s2_name, comments='%') bnds = ['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8a', 'b8b', 'b9', 'b10', 'b11', 'b12'] sim = ss.Simulator(site_nml=sitenml) sim.get_land_state=sim.state_csv(state_name) sim.get_geom=sim.geom_csv(s2_name) sim.run_rt = sim.passive_optical sim.run() refl = np.array(sim.spectra.refl) return sim.spectra.date_sat_ob, refl[:,bnd_indx[0]], refl[:,bnd_indx[1]], s2_arr[bnds[bnd_indx[0]]], \ s2_arr[bnds[bnd_indx[1]]]
def get_stock(ticker, startdate, enddate): fh = finance.fetch_historical_yahoo(ticker, startdate, enddate) # a numpy record array with fields: (date, open, high, low, close, volume, adj_close) r = mlab.csv2rec(fh) fh.close() r.sort() print 'the length of data:', len(r.close) get_stock_data = [] for i in xrange(0, len(r.close) - 1): if (r.volume[i] != 0): get_stock_data.append(r.close[i].tolist()) print 'after removing the datas with zero volume, the length of data:', len( get_stock_data) return get_stock_data
def get_dates_lat_lon(state_sm_dir, point_no, point_level='high'): sm_loc_dat = mlab.csv2rec(state_sm_dir + '/locations_utm_epsg-32632.csv') easting = sm_loc_dat['point_x'][(sm_loc_dat['id'] == point_no) & (sm_loc_dat['esu'] == point_level)] northing = sm_loc_dat['point_y'][(sm_loc_dat['id'] == point_no) & (sm_loc_dat['esu'] == point_level)] file_head_1 = sm_loc_dat['esu_sm'][(sm_loc_dat['id'] == point_no) & (sm_loc_dat['esu'] == 'med')][0] file_head_2 = sm_loc_dat['esu_sm'][(sm_loc_dat['id'] == point_no) & (sm_loc_dat['esu'] == 'high')][0] file_head_3 = sm_loc_dat['esu_sm'][(sm_loc_dat['id'] == point_no) & (sm_loc_dat['esu'] == 'low')][0] lat, lon = utm.to_latlon(easting, northing, 32, 'U') sm_csv1 = glob.glob(state_sm_dir + '/' + file_head_1 + '*SM.csv')[0] sm_dat1 = mlab.csv2rec(sm_csv1) sm_csv2 = glob.glob(state_sm_dir + '/' + file_head_2 + '*SM.csv')[0] sm_dat2 = mlab.csv2rec(sm_csv2) sm_csv3 = glob.glob(state_sm_dir + '/' + file_head_3 + '*SM.csv')[0] sm_dat3 = mlab.csv2rec(sm_csv3) #sm_idx = [find_nearest(sm_dat['date'], dt.datetime.combine(x,dt.datetime.min.time()))[1] for x in dates] sm_dates = sm_dat1['date'][3:] return sm_dates, lat, lon
def check_indicator(ticker): try: # [tickerData,ma20,ma200,rsi,macd,ema9]=generate_indicator(ticker) f = 'data/' + ticker + '.txt' tickerData = mlab.csv2rec(f) tickerData.sort() prices = tickerData.adj_close volumes = tickerData.volume # only proceed if it's not a penny stock, with high liquidity, with sufficient records if statistics.mean(volumes[-10:]) < 150000: return False if statistics.mean(prices[-10:]) < 1: return False if len(prices) < 200: return False # generate the MA ma20 = ti.moving_average(prices, 20, type='simple') ma200 = ti.moving_average(prices, 200, type='simple') # RSI rsi = ti.relative_strength(prices) # MACD emaSlow, emaFast, macd = ti.moving_average_convergence(prices, nslow=26, nfast=12) ema9 = ti.moving_average(macd, 9, type='exponential') # check RSI RSICheck = check_rsi(rsi) # check MACD MACDCheck = check_macd(macd, ema9) buySignal = RSICheck and MACDCheck if buySignal: 1 # plot_graph.plot(tickerData,ticker) return buySignal except FileNotFoundError: # print('Error with ticker '+ticker) return False
def test_transform_data(): """ Testing the transformation of the data from raw data to functions used for fitting a function. """ # We start with actual data. We test here just that reading the data in # different ways ultimately generates the same arrays. from matplotlib import mlab ortho = mlab.csv2rec(op.join(data_path, 'ortho.csv')) para = mlab.csv2rec(op.join(data_path, 'para.csv')) x1, y1, n1 = sb.transform_data(ortho) x2, y2, n2 = sb.transform_data(op.join(data_path, 'ortho.csv')) npt.assert_equal(x1, x2) npt.assert_equal(y1, y2) # We can also be a bit more critical, by testing with data that we # generate, and should produce a particular answer: my_data = pd.DataFrame(np.array([[0.1, 2], [0.1, 1], [0.2, 2], [0.2, 2], [0.3, 1], [0.3, 1]]), columns=['contrast1', 'answer']) my_x, my_y, my_n = sb.transform_data(my_data) npt.assert_equal(my_x, np.array([0.1, 0.2, 0.3])) npt.assert_equal(my_y, np.array([0.5, 0, 1.0])) npt.assert_equal(my_n, np.array([2, 2, 2]))
def __init__(self, path): lines = open(path).read().split('\n') header = lines[3].split() nlay, nspc = [int(_v) for _v in header[:2]] sigmas = [float(_v) for _v in header[2:]] nsigmas = len(sigmas) # try: # dateo = datetime.strptime(lines[4].strip(), '%Y-%m-%d') # except Exception: # date, time = [int(_v) for _v in lines[4].split()] starts = [5] ends = [s + nspc for s in starts] keys = ['all'] fieldnames = ('name',) + tuple(['s%f' % i for i in sigmas]) data = dict( [(k, csv2rec(StringIO('\n'.join(lines[s:e])), delimiter=' ', names=fieldnames, converterd=dict(names=lambda x: str(x).strip()))) for k, s, e in zip(keys, starts, ends)]) profile_spcs = np.char.strip(data[keys[0]]['name']) data_type = data[keys[0]].dtype self.createDimension('sigma', nsigmas) self.createDimension('LAY', nsigmas - 1) self.createVariable('sigma', 'f', ('sigma',), values=np.array(sigmas), units='sigma') self.createVariable('LAY', 'f', ('LAY',), values=np.array( sigmas).repeat(2, 0)[1:-1].reshape(-1, 2).mean(1), units='sigma') self.VGLVLS = self.variables['sigma'] self.VGTOP = 5000 ks = keys[1:] for k in ks: try: assert((np.char.strip(data[k]['name']) == profile_spcs).all()) assert(data[k].dtype == data_type) assert(data[k].dtype == data_type) except AssertionError: raise IOError('File is corrupt or inconsistent') varlist = [] for a in data['all']: varkey = a[0].strip() self.createVariable( varkey, 'f', ('LAY',), units=_getunit(varkey), values=np.array([tuple(a)[1:]])[0].astype('f'), long_name=varkey.ljust(16), var_desc=varkey.ljust(16)) varlist.append(varkey.ljust(16)) self.NVARS = len(varlist) self.createDimension('VAR', self.NVARS) setattr(self, 'VAR-LIST', ''.join(varlist))
def test_recarray_csv_roundtrip(self): expected = np.recarray((99, ), [('x', np.float), ('y', np.float), ('t', np.float)]) # initialising all values: uninitialised memory sometimes produces # floats that do not round-trip to string and back. expected['x'][:] = np.linspace(-1e9, -1, 99) expected['y'][:] = np.linspace(1, 1e9, 99) expected['t'][:] = np.linspace(0, 0.01, 99) mlab.rec2csv(expected, self.fd) self.fd.seek(0) actual = mlab.csv2rec(self.fd) np.testing.assert_allclose(expected['x'], actual['x']) np.testing.assert_allclose(expected['y'], actual['y']) np.testing.assert_allclose(expected['t'], actual['t'])
def get_data_from_yahoo_api(ticker, startdate=datetime.date(1900, 1, 1), enddate=datetime.date.today()): fh = finance.fetch_historical_yahoo(ticker, startdate, enddate) r = mlab.csv2rec(fh) fh.close() df = pd.DataFrame(data=r, columns=[ 'date', 'open', 'high', 'low', 'close', 'volume', 'adj_close' ]).sort('date') df['volume'] = df['volume'].astype(float) del df['adj_close'] print 'Available data: Ticker({ticker}) from {first_day} to {last_day}'.format( ticker=ticker, first_day=firstday(df), last_day=lastday(df)) return df, r
def test_recarray_csv_roundtrip(): expected = np.recarray((99, ), [('x', np.float), ('y', np.float), ('t', np.float)]) # initialising all values: uninitialised memory sometimes produces floats # that do not round-trip to string and back. expected['x'][:] = np.linspace(-1e9, -1, 99) expected['y'][:] = np.linspace(1, 1e9, 99) expected['t'][:] = np.linspace(0, 0.01, 99) fd = tempfile.TemporaryFile(suffix='csv', mode="w+") mlab.rec2csv(expected, fd) fd.seek(0) actual = mlab.csv2rec(fd) fd.close() assert np.allclose(expected['x'], actual['x']) assert np.allclose(expected['y'], actual['y']) assert np.allclose(expected['t'], actual['t'])
def recuperarDados(ticker, startdate=None, enddate=None): if startdate is None and enddate is None: records = np.load(dadosPathMask % (ticker) + ".npy") else: if startdate is None: startdate = datetime.date(2007, 1, 1) if enddate is None: enddate = datetime.date.today() with finance.fetch_historical_yahoo(ticker, startdate, enddate) as fh: records = mlab.csv2rec(fh) records.sort() np.save(dadosPathMask % (ticker), records) return records
def populate(csvFile, db, table, tablenames, delimiter="\t"): data = csv2rec(csvFile, delimiter=delimiter, names=tablenames) conn = MySQLdb.connect(db=db, host="localhost", user="******", passwd="angi4rf") curs = conn.cursor(DictCursor) cols = ", ".join(["`%s` %s" %(data.dtype.names[i], dtype2SQL(data.dtype[i].str)) for i in range(len(data.dtype.names))]) curs.execute("DROP TABLE IF EXISTS %s.%s " % (db, table)) tableQ = "CREATE TABLE %s (id INT NOT NULL AUTO_INCREMENT, %s, PRIMARY KEY (id)) ENGINE = MYISAM" % (table, cols) print tableQ curs.execute(tableQ) for row in data: #row = tuple(row) + (" ".join(filterTokens(preprocess(row[-1]))),) ## stoplist/tokenize? -- delete!!! insertQ = "INSERT INTO %s VALUES %s" %(table, str(tuple([0]+[str(reform(el)).strip() for el in row]))) curs.execute(insertQ) conn.close()
def add_csv2nc(csv_name, nc_name): csv_data = ml.csv2rec(csv_name) nc_data = open_netcdf(nc_name) nc_vars = [ 'air_temp', 'soil_temp', 'rg', 'co2_flux', 'qc_co2_flux', 'u_star', 'wind_dir', 'foot_print' ] for var_title in nc_vars: nc_var = nc_data.variables[var_title] csv_var = csv_data[var_title] if len(csv_var) != len(nc_var): raise ValueError( 'Cannot project data of different shapes together') else: nc_var[:] = csv_var[:] nc_data.close() return 'All updated'