Ejemplo n.º 1
0
 def test_fractionate(self):
     data1 = csv2rec('arch1.csv')
     data2 = csv2rec('arch2.csv')
     dl = [data1, data2]
     fr = fractionate(dl, (10, 10), (5, 5), ['row', 'column'])
     self.assertTrue(fr[0]['row'][3] == 5)
     self.assertTrue(fr[1]['column'][2] == 0)
Ejemplo n.º 2
0
 def _make(self, output_file, basin_poly, ba_csv, fa_ncons_csv, area_csv, arid_thresh=0.03, use_thresh=0.012, **kwargs):
     print "loading data"
     ba = np.genfromtxt(ba_csv,np.double,skip_header=1,delimiter=',')
     area_arr = mlab.csv2rec(area_csv)
     nc_arr = mlab.csv2rec(fa_ncons_csv)
     
     ids = ba[:,0]
     
     mean_ba = np.mean(ba[:,1:],1)
     ncons = gen_merge.arrange_vector_by_ids(nc_arr["ncons"],nc_arr["basinid"],ids).astype(np.double)
     area = gen_merge.arrange_vector_by_ids(area_arr["f_area"],area_arr["basinid"],ids).astype(np.double)
     
     wri = ncons/mean_ba
     
     miscmask = (ncons/area<use_thresh)*(mean_ba/area<arid_thresh)
     wri_s = self.score(wri)
     wri_s[miscmask] = MINSCORE
     wri_cat = self.categorize(wri_s, miscmask)
     
     
     joinarray = np.rec.fromarrays((ba[:,0],mean_ba,ncons,wri,wri_s,wri_cat),names=(BASIN_ID_FIELD,"BA","FA_NCONS",self.plot_field_name,"%s_s" % self.plot_field_name,"%s_cat" % self.plot_field_name))
     
     print "joining data"
     ap.CopyFeatures_management(basin_poly,output_file)
     ap.da.ExtendTable(output_file,BASIN_ID_FIELD,joinarray,BASIN_ID_FIELD)
Ejemplo n.º 3
0
 def test_merge_formatted(self):
     data1 = csv2rec('arch1.csv')
     data2 = csv2rec('arch2.csv')
     dl = [data1, data2]
     merged = merge_formatted(dl)
     self.assertTrue(sum(merged['rew']) == 2)
     self.assertTrue(sum(merged['column']) == 12)
Ejemplo n.º 4
0
def test_sanity():
    from nipy.modalities.fmri.fmristat.tests import FIACdesigns

    """
    Single subject fitting of FIAC model
    """

    # Based on file
    # subj3_evt_fonc1.txt
    # subj3_bloc_fonc3.txt

    for subj, run, dtype in [(3, 1, "event"), (3, 3, "block")]:
        nvol = 191
        TR = 2.5
        Tstart = 1.25

        volume_times = np.arange(nvol) * TR + Tstart
        volume_times_rec = formula.make_recarray(volume_times, "t")

        path_dict = {"subj": subj, "run": run}
        if exists(pjoin(DATADIR, "fiac_%(subj)02d", "block", "initial_%(run)02d.csv") % path_dict):
            path_dict["design"] = "block"
        else:
            path_dict["design"] = "event"

        experiment = csv2rec(pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "experiment_%(run)02d.csv") % path_dict)
        initial = csv2rec(pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "initial_%(run)02d.csv") % path_dict)

        X_exper, cons_exper = design.event_design(experiment, volume_times_rec, hrfs=delay.spectral)
        X_initial, _ = design.event_design(initial, volume_times_rec, hrfs=[hrf.glover])
        X, cons = design.stack_designs((X_exper, cons_exper), (X_initial, {}))

        Xf = np.loadtxt(StringIO(FIACdesigns.designs[dtype]))
        for i in range(X.shape[1]):
            yield nitest.assert_true, (matchcol(X[:, i], Xf.T)[1] > 0.999)
Ejemplo n.º 5
0
def extract_lai_fpar(above_par_dat, below_par_dat):
    above_par_ra = mlab.csv2rec(above_par_dat)
    below_par_ra = mlab.csv2rec(below_par_dat)
    points_ra = mlab.csv2rec('lonlat_threet.csv')
    plot = below_par_ra['plot']
    date = below_par_ra['date']
    below_par = below_par_ra['par']
    lats = np.array(points_ra['latitude'].tolist()*2)
    lons = np.array(points_ra['longitude'].tolist()*2)
    above_par = []
    fapar = []
    for time in enumerate(date):
        par_idx = find_nearest_idx(above_par_ra['date'], time[1])
        above_par.append(np.mean((above_par_ra['par'][par_idx-1], above_par_ra['par'][par_idx],
                                 above_par_ra['par'][par_idx+1])))
        if above_par_ra['par'][par_idx] < below_par[time[0]]:
            fapar.append(0)
        else:
            fapar.append((above_par_ra['par'][par_idx] - below_par[time[0]]) /
                     above_par_ra['par'][par_idx])
    above_par = np.array(above_par)
    fapar = np.array(fapar)
    newra = np.column_stack((date, plot, lats, lons, above_par, below_par, fapar))
    new_ra = np.core.records.fromarrays(newra.transpose(),
                                        dtype=[('date', 'object'),
                                               ('plot', 'i'), ('lat', 'f'),
                                               ('lon', 'f'), ('above_par', 'f'),
                                               ('below_par', 'f'), ('fapar', 'f')])
    return new_ra
Ejemplo n.º 6
0
def test_transform_data():
    """ 
    Testing the transformation of the data from raw data to functions 
    used for fitting a function.
    
    """
    # We start with actual data. We test here just that reading the data in 
    # different ways ultimately generates the same arrays. 
    from matplotlib import mlab 
    ortho = mlab.csv2rec(op.join(data_path, 'ortho.csv'))
    para = mlab.csv2rec(op.join(data_path, 'para.csv'))
    x1, y1, n1 = sb.transform_data(ortho)
    x2, y2, n2 = sb.transform_data(op.join(data_path, 'ortho.csv'))
    npt.assert_equal(x1, x2)
    npt.assert_equal(y1, y2)
    # We can also be a bit more critical, by testing with data that we 
    # generate, and should produce a particular answer:
    my_data = pd.DataFrame(
        np.array([[0.1, 2], [0.1, 1], [0.2, 2], [0.2, 2], [0.3, 1], 
                  [0.3, 1]]),
        columns=['contrast1', 'answer'])
    my_x, my_y, my_n = sb.transform_data(my_data)
    npt.assert_equal(my_x, np.array([0.1, 0.2, 0.3]))
    npt.assert_equal(my_y, np.array([0.5, 0, 1.0]))
    npt.assert_equal(my_n, np.array([2, 2, 2]))
Ejemplo n.º 7
0
def makediffs(models = _allmodels, verbose = False, kpp = True):
    for model in models:
        model = os.path.splitext(os.path.basename(model))[0]
        if kpp:
            kppdat = csv2rec(os.path.join(model, model + '.dat'), delimiter = ' ')
        else:
            if model not in _modelconfigs:
                raise IOError('If KPP is not properly installed, you cannot run tests on mechanisms other than cbm4, saprc99, and small_strato.')
            kppdat = csv2rec(os.path.join(os.path.dirname(__file__), model + '.dat'), delimiter = ' ')
        pykppdat = csv2rec(os.path.join(model, model + '.pykpp.dat'), delimiter = ',')
        diff = pykppdat.copy()
        pct = pykppdat.copy()
        keys = set(kppdat.dtype.names).intersection(pykppdat.dtype.names)
        notkeys = set(pykppdat.dtype.names).difference(kppdat.dtype.names)
        notkeys.remove('t')
        for k in notkeys:
            diff[k] = np.nan
            pct[k] = np.nan
    
        for k in keys:
            diff[k] = pykppdat[k] - kppdat[k][:]
            pct[k] = diff[k] / kppdat[k][:] * 100
        diff['t'] = pykppdat['t'] - (kppdat['time'] * 3600. + pykppdat['t'][0])
        pct['t'] = diff['t'] / (kppdat['time'] * 3600. + pykppdat['t'][0]) * 100
        
        rec2csv(diff, os.path.join(model, model + '.diff.csv'), delimiter = ',')
        rec2csv(pct, os.path.join(model, model + '.pct.csv'), delimiter = ',')
Ejemplo n.º 8
0
def get_experiment_initial(path_dict):
    """Get the record arrays for the experimental/initial designs.

    Parameters
    ----------
    path_dict : dict
        containing key 'rootdir', 'run', 'subj'

    Returns
    -------
    experiment, initial : Two record arrays.

    """
    # The following two lines read in the .csv files
    # and return recarrays, with fields
    # experiment: ['time', 'sentence', 'speaker']
    # initial: ['time', 'initial']

    rootdir = path_dict['rootdir']
    if not exists(pjoin(rootdir, "experiment_%(run)02d.csv") % path_dict):
        e = "can't find design for subject=%(subj)d,run=%(subj)d" % path_dict
        raise IOError(e)

    experiment = csv2rec(pjoin(rootdir, "experiment_%(run)02d.csv") % path_dict)
    initial = csv2rec(pjoin(rootdir, "initial_%(run)02d.csv") % path_dict)

    return experiment, initial
Ejemplo n.º 9
0
 def _make(self, output_file, basin_poly, ba_csv, withdrawal_csv, consumption_csv, area_csv, arid_thresh=0.03, use_thresh=0.012, **kwargs):
     print "loading data"
     ba = np.genfromtxt(ba_csv,np.double,skip_header=1,delimiter=',')
     area_arr = mlab.csv2rec(area_csv)
     ut_arr = mlab.csv2rec(withdrawal_csv)
     ct_arr = mlab.csv2rec(consumption_csv)
     
     ids = ba[:,0]
     
     mean_ba = np.mean(ba[:,1:],1)
     ut = gen_merge.arrange_vector_by_ids(ut_arr["ut"],ut_arr["basinid"],ids).astype(np.double)
     uc = gen_merge.arrange_vector_by_ids(ct_arr["ct"],ct_arr["basinid"],ids).astype(np.double)
     area = gen_merge.arrange_vector_by_ids(area_arr["f_area"],area_arr["basinid"],ids).astype(np.double)
     bws = ut/mean_ba
     
     miscmask = (ut/area<use_thresh)*(mean_ba/area<arid_thresh)
     #miscmask2 = (ut/area[:,1]<use_thresh)*(mean_ba/area[:,1]<arid_thresh)*(bws<.8)
     bws_s = self.score(bws)
     bws_s[miscmask] = MAXSCORE
     bws_cat = self.categorize(bws_s, miscmask)
     
     joinarray = np.rec.fromarrays((ba[:,0],mean_ba,ut,uc,bws,bws_s,bws_cat,area),names=(BASIN_ID_FIELD,"BA","WITHDRAWAL","CONSUMPTION",self.plot_field_name,"%s_s" % self.plot_field_name,"%s_cat" % self.plot_field_name,"AREAM3"))
     
     print "joining data"
     ap.CopyFeatures_management(basin_poly,output_file)
     ap.da.ExtendTable(output_file,BASIN_ID_FIELD,joinarray,BASIN_ID_FIELD)
Ejemplo n.º 10
0
def rewrite_spec(subj, run, root = "/home/jtaylo/FIAC-HBM2009"):
    """
    Take a FIAC specification file and get two specifications
    (experiment, begin).

    This creates two new .csv files, one for the experimental
    conditions, the other for the "initial" confounding trials that
    are to be modelled out. 

    For the block design, the "initial" trials are the first
    trials of each block. For the event designs, the 
    "initial" trials are made up of just the first trial.

    """

    if exists(pjoin("%(root)s", "fiac%(subj)d", "subj%(subj)d_evt_fonc%(run)d.txt") % {'root':root, 'subj':subj, 'run':run}):
        designtype = 'evt'
    else:
        designtype = 'bloc'

    # Fix the format of the specification so it is
    # more in the form of a 2-way ANOVA

    eventdict = {1:'SSt_SSp', 2:'SSt_DSp', 3:'DSt_SSp', 4:'DSt_DSp'}
    s = StringIO()
    w = csv.writer(s)
    w.writerow(['time', 'sentence', 'speaker'])

    specfile = pjoin("%(root)s", "fiac%(subj)d", "subj%(subj)d_%(design)s_fonc%(run)d.txt") % {'root':root, 'subj':subj, 'run':run, 'design':designtype}
    d = np.loadtxt(specfile)
    for row in d:
        w.writerow([row[0]] + eventdict[row[1]].split('_'))
    s.seek(0)
    d = csv2rec(s)

    # Now, take care of the 'begin' event
    # This is due to the FIAC design

    if designtype == 'evt':
        b = np.array([(d[0]['time'], 1)], np.dtype([('time', np.float),
                                                    ('initial', np.int)]))
        d = d[1:]
    else:
        k = np.equal(np.arange(d.shape[0]) % 6, 0)
        b = np.array([(tt, 1) for tt in d[k]['time']], np.dtype([('time', np.float),
                                                                 ('initial', np.int)]))
        d = d[~k]

    designtype = {'bloc':'block', 'evt':'event'}[designtype]

    fname = pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "experiment_%(run)02d.csv") % {'root':root, 'subj':subj, 'run':run, 'design':designtype}
    rec2csv(d, fname)
    experiment = csv2rec(fname)

    fname = pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "initial_%(run)02d.csv") % {'root':root, 'subj':subj, 'run':run, 'design':designtype}
    rec2csv(b, fname)
    initial = csv2rec(fname)

    return d, b
Ejemplo n.º 11
0
 def test_format_dense(self):
     data1 = csv2rec('arch1.csv')
     data2 = csv2rec('arch2.csv')
     dl = [data1, data2]
     form = format_dense(dl, 3, (4,4))
     self.assertTrue(np.all(form[0]['count'][:4] == np.array([1,1,3,3])))
     self.assertTrue(np.all(form[1]['count'] ==
                                            np.array([1,1,3,3,1,1,5,1])))
Ejemplo n.º 12
0
def append_rec(recs):
    base = mlab.csv2rec(recs[0]["file"])

    for nw in recs[1:]:
        append = mlab.csv2rec(nw["file"])
        for k,v in append.dtype.fields.iteritems():
            base = mlab.recs_join("sys_tick",k,[base,append],missing=0)
    return base
Ejemplo n.º 13
0
 def test_add_data_fields(self):
     data1 = csv2rec('arch1.csv')
     data2 = csv2rec('arch2.csv')
     dl = [data1, data2]
     alt_data = add_data_fields(dl, {'year': (1998, 2002)})
     self.assertTrue(np.all(alt_data[0]['year'] == '1998'))
     self.assertTrue(np.all(alt_data[1]['year'] == '2002'))
     alt_data = add_data_fields(dl, {'year' : (1998, 2002), 'why': ('h',
                                 'a')})
     self.assertTrue(np.all(alt_data[0]['why'] == 'h'))
Ejemplo n.º 14
0
def plotGraphs():
  global gDateStr, gTimeStr
  
  print "Plotting..." 
  print "temperatures"
  filename = "./data/" + gDateStr + "_temperatures.csv";
  r = mlab.csv2rec(filename, delimiter=',')

  fig = Figure(figsize=(6,6))
  canvas = FigureCanvas(fig)

  ax = fig.add_subplot(111)
  ax.set_title('Temperatures '+gDateStr,fontsize=14)

  ax.set_xlabel('Time',fontsize=6)
  ax.set_ylabel('Temperature (C)',fontsize=6)

  ax.grid(True,linestyle='-',color='0.75')

  # run two sanitize passes over the data
  r[r.dtype.names[1]] = arrayops.sanitize( r[r.dtype.names[1]] )
  r[r.dtype.names[2]] = arrayops.sanitize( r[r.dtype.names[2]] )


  # Generate the plot.
  ax.plot(r[r.dtype.names[0]],r[r.dtype.names[1]],color='tomato');
  ax.plot(r[r.dtype.names[0]],r[r.dtype.names[2]],color='green');

  # plot pump on times
  print "pump on"
  filename = "./data/" + gDateStr + "_pumpON.csv";
  if os.path.exists(filename):
    r = mlab.csv2rec(filename, delimiter=',')
    ax.scatter(r[r.dtype.names[0]],r[r.dtype.names[1]],color='orange');

  # plot pump off times
  print "pump off"
  filename = "./data/" + gDateStr + "_pumpOFF.csv";
  if os.path.exists(filename):
    r = mlab.csv2rec(filename, delimiter=',')
    ax.scatter(r[r.dtype.names[0]],r[r.dtype.names[1]],color='blue');


  for tick in ax.xaxis.get_major_ticks():
	  tick.label.set_fontsize(6)

  for tick in ax.yaxis.get_major_ticks():
	  tick.label.set_fontsize(6)

  ax.set_ylim(-5, 35)

  # Save the generated Plot to a PNG file.
  filename = "/var/www/Prometheus/data/"+gDateStr+"_temperatures.png"
  canvas.print_figure(filename,dpi=100)
  os.system('ln -sf '+filename+' /var/www/Prometheus/data/current_temperatures.png')
Ejemplo n.º 15
0
def main():
    inputlist = ["bin/global_BWS_20121015.csv","bin/global_WRI_20121015.csv"]
    lhs = mlab.csv2rec("bin/global_GU_20121015.csv")
    rhslist = []
    for x in inputlist:
        rhslist.append(mlab.csv2rec(x))
    
    rhslist[0]["basinid"] = rhslist[0]["basinid"].astype(np.long)
    keys = ("basinid","countryid","id")
    lhs = join_recs_on_keys(lhs,rhslist,keys)
    mlab.rec2csv(lhs,"bin/test.csv")
    print "complete"
Ejemplo n.º 16
0
def test_sanity():
    from nipy.modalities.fmri import design, hrf
    import nipy.modalities.fmri.fmristat.hrf as fshrf
    from nipy.modalities.fmri.fmristat.tests import FIACdesigns
    from nipy.modalities.fmri.fmristat.tests.test_FIAC import matchcol
    from nipy.algorithms.statistics import formula
    from nose.tools import assert_true

    """
    Single subject fitting of FIAC model
    """

    # Based on file
    # subj3_evt_fonc1.txt
    # subj3_bloc_fonc3.txt

    for subj, run, design_type in [(3, 1, 'event'), (3, 3, 'block')]:
        nvol = 191
        TR = 2.5
        Tstart = 1.25

        volume_times = np.arange(nvol)*TR + Tstart
        volume_times_rec = formula.make_recarray(volume_times, 't')

        path_dict = {'subj':subj, 'run':run}
        if exists(pjoin(DATADIR, "fiac_%(subj)02d",
                        "block", "initial_%(run)02d.csv") % path_dict):
            path_dict['design'] = 'block'
        else:
            path_dict['design'] = 'event'

        experiment = csv2rec(pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "experiment_%(run)02d.csv")
                             % path_dict)
        initial = csv2rec(pjoin(DATADIR, "fiac_%(subj)02d", "%(design)s", "initial_%(run)02d.csv")
                                % path_dict)

        X_exper, cons_exper = design.event_design(experiment,
                                                  volume_times_rec,
                                                  hrfs=fshrf.spectral)
        X_initial, _ = design.event_design(initial,
                                           volume_times_rec,
                                           hrfs=[hrf.glover])
        X, cons = design.stack_designs((X_exper, cons_exper), (X_initial, {}))

        # Get original fmristat design
        Xf = FIACdesigns.fmristat[design_type]
        # Check our new design can be closely matched to the original
        for i in range(X.shape[1]):
            # Columns can be very well correlated negatively or positively
            assert_true(abs(matchcol(X[:,i], Xf)[1]) > 0.999)
Ejemplo n.º 17
0
def test_event_design():
    block = csv2rec(StringIO(altdescr["block"]))
    event = csv2rec(StringIO(altdescr["event"]))
    t = np.arange(191) * 2.5 + 1.25

    bkeep = np.not_equal((np.arange(block.time.shape[0])) % 6, 0)
    ekeep = np.greater(np.arange(event.time.shape[0]), 0)

    # Even though there is a FIAC block experiment
    # the design is represented as an event design
    # with the same event repeated several times in a row...

    Xblock, cblock = design.event_design(block[bkeep], t, hrfs=delay.spectral)
    Xevent, cevent = design.event_design(event[ekeep], t, hrfs=delay.spectral)
Ejemplo n.º 18
0
def get_data(file_name):
    file_read = file(file_name,'r')
    l = file_read.readline()
    p = {} #This will hold the params
    l = file_read.readline()
    data_rec = []
    
    if l=='':
        return p,l,data_rec

    while l[0]=='#':
        try:
            p[l[1:l.find(':')-1]]=float(l[l.find(':')+1:l.find('\n')]) 

        #Not all the parameters can be cast as float (the task and the
        #subject): 
        except:
            p[l[2:l.find(':')-1]]=l[l.find(':')+1:l.find('\n')]

        l = file_read.readline()

    try:
        data_rec = csv2rec(file_name)
    except ValueError:
        p = []
    
    return p,l,data_rec
Ejemplo n.º 19
0
 def scatter_from_csv(self, filename, sand = 'sand', silt = 'silt', clay = 'clay', diameter = '', hue = '', tags = '', **kwargs):
     """Loads data from filename (expects csv format). Needs one header row with at least the columns {sand, silt, clay}. Can also plot two more variables for each point; specify the header value for columns to be plotted as diameter, hue. Can also add a text tag offset from each point; specify the header value for those tags.
     Note! text values (header entries, tag values ) need to be quoted to be recognized as text. """
     fh = file(filename, 'rU')
     soilrec = csv2rec(fh)
     count = 0
     if (sand in soilrec.dtype.names):
         count = count + 1
     if (silt in soilrec.dtype.names):
         count = count + 1
     if (clay in soilrec.dtype.names):
         count = count + 1
     if (count < 3):
         print "ERROR: need columns for sand, silt and clay identified in ', filename"
     locargs = {'s': None, 'c': None}
     for (col, key) in ((diameter, 's'), (hue, 'c')):
         col = col.lower()
         if (col != '') and (col in soilrec.dtype.names):
             locargs[key] = soilrec.field(col)
         else:
             print 'ERROR: did not find ', col, 'in ', filename
     for k in kwargs:
         locargs[k] = kwargs[k]
     values = zip(*[soilrec.field(sand), soilrec.field(clay), soilrec.field(silt)])
     print values
     (xs, ys) = self._toCart(values)
     p.scatter(xs, ys, label='_', **locargs)
     if (tags != ''):
         tags = tags.lower()
         for (x, y, tag) in zip(*[xs, ys, soilrec.field(tags)]):
             print x,
             print y,
             print tag
             p.text(x + 1, y + 1, tag, fontsize=12)
     fh.close()
Ejemplo n.º 20
0
 def __init__(self, path):
     lines = open(path).read().split('\n')
     header = lines[3].split()
     nlay, nspc = list(map(int, header[:2]))
     sigmas = list(map(float, header[2:]))
     nsigmas = len(sigmas)
     date, time = list(map(int, lines[4].split()))
     starts =  [5 + i + i * nspc for i in range(4)]
     ends = [s + 1 + nspc for s in starts]
     keys = [lines[s].strip().lower() for s in starts]
     fieldnames = ('name',) + tuple(['s%f' % i for i in sigmas])
     self.data = dict([(k, csv2rec(StringIO(u'\n'.join(lines[s+1:e])), delimiter = ' ', names = fieldnames, converterd = dict(names = lambda x: str(x).strip()))) for k, s, e in zip(keys, starts, ends)])
     self._profile_spcs = np.char.strip(self.data[keys[0]]['name'])
     data_type = self.data[keys[0]].dtype
     data_shape =  self.data[keys[0]].shape
     ks = keys[1:]
     for k in ks:
         try:
             assert((np.char.strip(self.data[k]['name']) == self._profile_spcs).all())
             assert(self.data[k].dtype == data_type)
             assert(self.data[k].dtype == data_type)
         except AssertionError:
             raise IOError('File is corrupt or inconsistent')
         
     self._prof_spc = ['NO2', 'NO', 'O3P', 'O3', 'NO3', 'N2O5', 'HNO3', 'O1D', 'HO', 'HONO', 'HO2', 'CO', 'HNO4', 'H2O2', 'SO2', 'SULF', 'MO2', 'HCHO', 'OP1', 'OP2', 'ONIT', 'KET', 'ACO3', 'PAN', 'PAA', 'ORA2', 'TPAN', 'ALD', 'ORA1', 'GLY', 'MGLY', 'CSL', 'MACR', 'MVK', 'ISOPROD', 'DCB', 'OL2', 'ISO', 'TERP', 'ETH', 'HC3', 'HC3', 'HC5', 'HC8', 'TOL', 'XYL', 'XYL', 'XYL', 'OLT', 'OLI', 'BENZENE', 'HG', 'HGIIGAS', 'CO2']
     self._prof_dict = dict([(k, []) for k in self._prof_spc])
Ejemplo n.º 21
0
def csv2sql(database, table, comments="#", delimiter=","):
    """
    The main method that will
    take in the csv file and 
    create a database and
    a table.
    """
    global csv_file

    # create the database and cursor
    con = sqlite3.connect(database)
    cur = con.cursor()

    # load in the data
    data = csv2rec(csv_file, comments=comments, delimiter=delimiter)

    # make the table
    create_table_command = "CREATE TABLE {0} (".format(table)
    for n, t in zip(data.dtype.names, data[0]):
        create_table_command += "{0} {1}, ".format(n, get_data_type(t))
    create_table_command += ")"
    cur.execute(create_table_command)

    # insert the rows of data into the table.
    for row in data:
        cur.execute(
            "INSERT INTO {0} VALUES {1}".format(table, str(row)))

    # Save (commit) the database
    con.commit()

    # close the connection to the database
    con.close()
Ejemplo n.º 22
0
def add_time_interpolated_from_csv(path, timekey, incr=600):
    names = map(lambda x: x.strip(), file(path).read().split("\n")[0].split(","))

    data = csv2rec(path)
    datadict = dict([(newkey, data[k]) for k, newkey in zip(data.dtype.names, names)])
    time = datadict.pop(timekey)
    add_time_interpolated(time=time, incr=incr, **datadict)
Ejemplo n.º 23
0
    def getData(self):
      self.dataLoaded = 1

      fh = finance.fetch_historical_yahoo(self.name, self.startdate, self.enddate)
  
      self.data = mlab.csv2rec(fh); fh.close()
      self.data.sort()
Ejemplo n.º 24
0
    def load_csv(self, fname):
        def clean_csv():
            print "Reading csv from file %s" % (fname)
            reader = csv.reader(open(fname, 'rb'))
            cleaned_fname = "/tmp/lc-%s.csv" % (random.random())
            print "Cleaning csv file using python csv library, writing new file to %s" % (cleaned_fname)
            writer = csv.writer(open(cleaned_fname, 'wb'))
            for i, row in enumerate(reader):
                # skip first 2 rows
                if i < 2: continue
                if len(self.csv_columns) == len(row):
                    writer.writerow(row)
                else:
                    print "\tError row %d, line contents:\"%s\"" % (i, ", ".join(row))
            return cleaned_fname

        cleaned_fname = clean_csv()
        converterd = {'interest_rate': fieldparsers.strip_non_numeric_and_parse,
                      'loan_length': fieldparsers.strip_non_numeric_and_parse,
                      'employment_length': fieldparsers.parse_employment_years,
                      'debt_to_income_ratio': fieldparsers.strip_non_numeric_and_parse,
                      'revolving_line_utilization': fieldparsers.strip_non_numeric_and_parse,
                      'status': fieldparsers.parse_status
                      }
        print "Loading csv via mlab"
        self.data = mlab.csv2rec(cleaned_fname, skiprows=2, converterd=converterd, names=self.csv_columns)
        subprocess.call(["rm", "-rf", cleaned_fname])
        print "Done."
Ejemplo n.º 25
0
def open_dense_data(filenames, direct, delim=','):
    '''
    This function takes in a list of dense data file names, opens
    them and returns them as list of rec arrays.

    Parameters
    ----------

    filenames : list 
        A list of filenames

    direct : string
        The directory within data/archival/ where the files are.
        Example 'ANBO_2010' or 'LBRI'

    delim : string
        The default file delimiter is ','

    Returns
    -------
    : list
        A list of rec arrays

    '''
    assert direct.find('/') == -1, "%s should not contain a '/'" % (direct)
    filedir = jp(pd(pd(gcwd())), 'archival', direct)
    datayears = []
    for name in filenames:
        data = plt.csv2rec(jp(filedir, name), delimiter=delim)
        datayears.append(data)
    return datayears
Ejemplo n.º 26
0
def replace_vals(filename, replace, delim=','):
    '''
    Replace the values in filename with specified values in replace_values
    
    Parameters
    ----------
    filename : string 
        Will be read into a rec array

    replace_values : tuple
        First object is value to replace and second object is what to replace
        it with

    
    '''
    data = csv2rec(filename, delimiter=delim, missing=replace[0])
    for nm in data.dtype.names:
        try:
            # Missing float
            isNaN = (np.isnan(data[nm]))
        except:
            isNaN = np.zeros(len(data[nm]), dtype=bool)
        isBlank = np.array([it == '' for it in data[nm]])
        isMinusOne = (data[nm] == -1)# Missing int
        # Missing other
        isNone = np.array([i == None for i in data[nm]])
        ind = np.bitwise_or(isNaN, isBlank)
        ind = np.bitwise_or(ind, isMinusOne)
        ind = np.bitwise_or(ind, isNone)
        data[nm][ind] = replace[1]
    return data
Ejemplo n.º 27
0
def data_from_csv2rec(infile):
    """Uses matplotlib.mlab csv2rec to parse data
    trys to cast fildes into correct data-type
    datrec.dtype to see datat-types and names
    """
    datrec = csv2rec(infile)
    return datrec 
Ejemplo n.º 28
0
    def __init__(self, strip=None):
        absdir = os.path.dirname(os.path.abspath(__file__))
        self.data = csv2rec(os.path.join(absdir, '..', 'data', 'pearson_lee.csv'))
        self.M = self.data['mother']
        self.D = self.data['daughter']

        self.strip = strip
Ejemplo n.º 29
0
 def test_csv2rec_roundtrip(self):
     delta = datetime.timedelta(days=1)
     date0 = datetime.date(2007,12,16)
     date1 = date0 + delta
     date2 = date1 + delta
     delta = datetime.timedelta(days=1)
     datetime0 = datetime.datetime(2007,12,16,22,29,34,924122)
     datetime1 = datetime0 + delta
     datetime2 = datetime1 + delta
     ra=numpy.rec.fromrecords([
             (123, date0, datetime0, 1197346475.0137341, 'a,bc'),
             (456, date1, datetime1, 123.456, 'd\'ef'),
             (789, date2, datetime2, 0.000000001, 'ghi'),
                         ],
         names='intdata,datedata,datetimedata,floatdata,stringdata')
     fh = StringIO.StringIO()
     mlab.rec2csv( ra, fh )
     fh.seek(0)
     if 0:
         print 'CSV contents:','-'*40
         print fh.read()
         print '-'*40
         fh.seek(0)
     ra2 = mlab.csv2rec(fh)
     fh.close()
     for name in ra.dtype.names:
         if 0:
             print name, repr(ra[name]), repr(ra2[name])
             dt = ra.dtype[name]
             print 'repr(dt.type)',repr(dt.type)
         self.failUnless( numpy.all(ra[name] == ra2[name]) ) # should not fail with numpy 1.0.5
Ejemplo n.º 30
0
    def shift(self):
        """
        Print the expected gain/loss of each party.
        """
        nincum = np.zeros(3) #dem,gop,ind
        nnew = np.zeros(3) #dem,gop,ind

        rec = mlab.csv2rec('senate_polls.csv')
        states = np.unique(rec.state)
        
        for state in states:
            data = self.polldat(state)
            data = data[0,1:4]
            candidates = self.candidates(state)
            iincum =  (np.where(self.partyarr == candidates[3]))[0][0]
            ileader = np.argmax(data)

            nnew[ileader]  = nnew[ileader] +1 
            nincum[iincum] = nincum[iincum] + 1
            

        shift = nnew - nincum

        print 'Expected Shift in Senate Party Balance'
        print self.partyarr
        print shift
Ejemplo n.º 31
0
    def extract(data):
        data_recs = mlab.csv2rec(data)
        rv = []

        for k, v in data_recs.dtype.fields.iteritems():
            try:  #note that datetime will raise
                s = data_recs[k].std()
                if s > 0 or options["boring"]:
                    rv.append(k)
            except TypeError, e:
                pass  #non number type, e.g. date
Ejemplo n.º 32
0
def graficoTes():

    with open('/home/lucas/PycharmProjects/openCsv/dados_consumo_todos/total.csv', encoding="ISO-8859-1") as fname:
        gender_degree_data = csv2rec(fname)
    anos = pd.DataFrame(gender_degree_data, columns=['municipio', 'ibge', 'latitude', 'longitude', 'mwh', 'ano'])

    for index, row in anos.iterrows():
        # if(len(str(row["mwh"]))>6):
        # cells = str(row["mwh"])
        # for rank, c in cells:
        row["mwh"] = int(row["mwh"])
        # if(rank!=6):
        #   cells[rank] = '.'
        # cells[rank]=c

    grouped = anos.groupby('municipio')
    print(grouped)
    anossort = anos.sort_values(by='ano', ascending=True)
    anostype = anossort[['mwh', 'ano']].astype(float)
    ax = anostype.plot(x='ano', y='mwh', style='k.')
    ax.set_xlim(1990, 2016)

    american = anos['municipio'] == "Agudo"
    print(anos[american])
    select = anos[american]

    nomes = []
    for index, row in anos.iterrows():
        nomes.append(row["municipio"])
        if (index == 400):
            break

    colormap = plt.cm.gist_ncar  # nipy_spectral, Set1,Paired
    colors = [colormap(i) for i in np.linspace(0, 1, len(nomes))]
    labels = []

    plt.xlim([1990, 2015])
    plt.ylim([0, 10])
    for rank, colunm in enumerate(nomes):
        selecionados = anos['municipio'] == colunm
        umframe = anos[selecionados]
        umframesort = umframe.sort_values(by='ano', ascending=True)
        umframesort = umframesort[['mwh', 'ano']].astype(int)
        plt.plot(umframesort['ano'], umframesort['mwh'], 'k', color=colors[rank])
        labels.append(colunm)
        # ax = umframesort.plot(x='ano', y='mwh', style='.-')

    plt.legend(labels, ncol=4, loc='upper center',
               bbox_to_anchor=[0.5, 1.1],
               columnspacing=1.0, labelspacing=0.0,
               handletextpad=0.0, handlelength=1.5,
               fancybox=True, shadow=True)

    plt.show()
Ejemplo n.º 33
0
def test():
    x = csv2rec("lc.dat", delimiter=" ", names=["t", "y", "dy"])
    s = selectp(x['t'],
                x['y'],
                x['dy'],
                21.93784630,
                dynamic=False,
                verbose=True)
    s.select()
    print s.rez
    s.plot_best()
Ejemplo n.º 34
0
def tomdraw_KwithDate(rP,sd,ed):
    global db_r
    global rect_K
    global db_fig
    db_r=mlab.csv2rec(rP)
    db_r=db_r[np.where(db_r.time>tt.str2dateYmd(sd))]
    db_r=db_r[np.where(db_r.time<(tt.str2dateYmd(ed)+tt.one_Day_Delta))]
    db_r.sort()
    quote,dif=install_K_Data(db_r)
    k_h=getK_H(dif)
    initRect(k_h,'K')
    draw_K(db_fig,rect_K,quote,0.5,format_date)
Ejemplo n.º 35
0
def generate_charts_from_csv(csv_path, title=None):
    # http://matplotlib.org/examples/api/date_index_formatter.html
    r = mlab.csv2rec(open(csv_path))

    # todo fix path deduction
    path = csv_path.replace('data.csv', '')

    r.sort()
    r = r[-30:]  # get the last 30 values

    generate_deal_amount_plot(r, path, title=title)
    generate_deal_count_plot(r, path, title=title)
Ejemplo n.º 36
0
 def __init__(self, path):
     lines = open(path).read().split('\n')
     header = lines[3].split()
     nlay, nspc = map(int, header[:2])
     sigmas = map(float, header[2:])
     nsigmas = len(sigmas)
     try:
         dateo = datetime.strptime(lines[4].strip(), '%Y-%m-%d')
     except:
         date, time = map(int, lines[4].split())
     starts = [5]
     ends = [s + nspc for s in starts]
     keys = ['all']
     fieldnames = ('name', ) + tuple(['s%f' % i for i in sigmas])
     data = dict([(k,
                   csv2rec(StringIO('\n'.join(lines[s:e])),
                           delimiter=' ',
                           names=fieldnames,
                           converterd=dict(names=lambda x: str(x).strip())))
                  for k, s, e in zip(keys, starts, ends)])
     profile_spcs = np.char.strip(data[keys[0]]['name'])
     data_type = data[keys[0]].dtype
     data_shape = data[keys[0]].shape
     self.createDimension('sigma', nsigmas)
     self.createDimension('sigma-mid', nsigmas - 1)
     self.createDimension('south_east_north_west', 4)
     self.createVariable('sigma',
                         'f', ('sigma', ),
                         values=np.array(sigmas),
                         units='sigma')
     self.createVariable('sigma-mid',
                         'f', ('sigma-mid', ),
                         values=np.array(sigmas).repeat(2, 0)[1:-1].reshape(
                             -1, 2).mean(1),
                         units='sigma')
     self.VGLVLS = self.variables['sigma']
     self.VGTOP = 5000
     ks = keys[1:]
     for k in ks:
         try:
             assert ((np.char.strip(data[k]['name']) == profile_spcs).all())
             assert (data[k].dtype == data_type)
             assert (data[k].dtype == data_type)
         except AssertionError:
             raise IOError('File is corrupt or inconsistent')
     for a in data['all']:
         self.createVariable(a[0].strip(),
                             'f', ('sigma-mid', 'south_east_north_west'),
                             units="None",
                             values=np.array(
                                 map(lambda x: tuple(x)[1:], [a])).T,
                             long_name=a[0].ljust(16),
                             var_desc=a[0].ljust(16))
def _load_ben_data(fname="LC_246.dat"):
    
    """loader for Ben's input files"""
    
    from matplotlib.mlab import csv2rec
    ## Get the photometry
    name = str(int(fname[fname.find("_")+1:fname.find(".dat")]))
    c = csv2rec(fname,delimiter=" ",names=["t","m","merr","rrl"])
    x0 = c['t']
    y  = c['m']
    dy = c['merr']
    return x0,y,dy, name
Ejemplo n.º 38
0
    def test_csv2rec_masks(self):
        # Make sure masked entries survive roundtrip

        csv = """date,age,weight,name
2007-01-01,12,32.2,"jdh1"
0000-00-00,0,23,"jdh2"
2007-01-03,,32.5,"jdh3"
2007-01-04,12,NaN,"jdh4"
2007-01-05,-1,NULL,"""
        missingd = dict(date='0000-00-00', age='-1', weight='NULL')
        fh = StringIO.StringIO(csv)
        r1 = mlab.csv2rec(fh, missingd=missingd)
        fh = StringIO.StringIO()
        mlab.rec2csv(r1, fh, missingd=missingd)
        fh.seek(0)
        r2 = mlab.csv2rec(fh, missingd=missingd)

        self.failUnless( numpy.all( r2['date'].mask   == [0,1,0,0,0] ))
        self.failUnless( numpy.all( r2['age'].mask    == [0,0,1,0,1] ))
        self.failUnless( numpy.all( r2['weight'].mask == [0,0,0,0,1] ))
        self.failUnless( numpy.all( r2['name'].mask   == [0,0,0,0,1] ))
Ejemplo n.º 39
0
def readFile(filename):
    global top_list
    global num_workloads
    data = csv2rec(filename, delimiter=' ')
    headers = data.dtype.names
    #print headers

    #-- num of workloads (cols)
    num_workloads = len(headers) - 1

    #-- prepare the headers
    #-- header_list: {"a", "b\nast", "c", "a", ...}
    if len(cfg_headers.strip()) == 0:
        #-- if 'headers' is empty in the cfg file, then read from data file
        header_list = []
        for i in xrange(1, num_workloads + 1, 1):
            #header_list.extend(["a","b\n"+headers[i],"c"])
            for ind in range(NUM_SYMBOLS):
                if (ind != cluster_label_location):
                    header_list.extend([symbols_list[ind]])
                else:
                    header_list.extend(
                        [symbols_list[ind] + "\n" + delim + headers[i]])
    else:
        #-- if 'headers' is provided in the cfg file, then use them
        cfg_headers_list = cfg_headers.split(',')
        header_list = []
        for i in xrange(1, num_workloads + 1, 1):
            for ind in range(NUM_SYMBOLS):
                if (ind != cluster_label_location):
                    header_list.extend([symbols_list[ind]])
                else:
                    header_list.extend([
                        symbols_list[ind] + "\n" + delim + cfg_headers_list[i]
                    ])
    top_list.append(header_list)

    #-- prepare the row data
    #-- pad '0's, according to the scheme
    for row in data:  #-- row[0]: scheme_case
        row_list = []
        for i in xrange(1, num_workloads + 1, 1):
            for ind in range(
                    NUM_SCHEMES):  #-- add '0's to non_scheme locations
                if schemes_list[ind] in row[0]:
                    if (ind > 0):
                        for bf in range(ind):
                            row_list.extend([0])
                    row_list.extend([row[i]])
                    if (ind + 1 == NUM_SCHEMES): break
                    for bf in range(ind + 1, NUM_SCHEMES):
                        row_list.extend([0])
        top_list.append(row_list)
Ejemplo n.º 40
0
def run():
    t1 = time.time()
    a = mpl.csv2rec('datas.csv')
    g = ok.Grid(a.x, a.y, a.v)
    ##plotit(g.grid.x, g.grid.y, g.grid.v, "Initial grid")
    model = g.fitSermivariogramModel('Exponential', nlag=20)
    ##model.plot()
    x, y = g.regularBasicGrid(nx=40, ny=40)
    pg = g.predictedGrid(x, y, model)
    ##plotit(pg.grid.x, pg.grid.y, pg.grid.v, "Predicted grid")
    ##plotit(pg.grid.x, pg.grid.y, pg.grid.e, "Predicted Error grid")
    t2 = time.time()
    print("Operation performed in %.2f seconds" % (t2 - t1))
Ejemplo n.º 41
0
def read_dataset(path, labelcomb=True):
    data = mlab.csv2rec(path,
                        names=['id', 'labels', 'abstracts'],
                        delimiter='\t')

    X = data['abstracts']

    if labelcomb:
        Y = data['labels']
    else:
        Y = [labels.split(',') for labels in data['labels']]

    return X, Y
Ejemplo n.º 42
0
def get_quote_daily_matplotlib(ticker):

    startdate = datetime.date(2006, 1, 1)
    today = enddate = datetime.date.today()
    #ticker = 'SPY'

    fh = finance.fetch_historical_yahoo(ticker, startdate, enddate)
    # a numpy record array with fields: date, open, high, low, close, volume, adj_close

    r = mlab.csv2rec(fh)
    fh.close()
    r.sort()
    return r
Ejemplo n.º 43
0
def tomdraw_VA(rP):
    global db_r
    global rect_V
    global rect_A
    global db_fig
    db_r=mlab.csv2rec(rP)
    db_r.sort()
    quote=install_VA_Data(db_r)
    indexes,volumes,amounts=[q[0] for q in quote],[q[1] for q in quote],[q[2] for q in quote]
    initRect(0.8,'V')
    initRect(0.8,'A')
    drawFill(db_fig,rect_V,indexes,volumes,format_date,'b')
    drawFill(db_fig,rect_A,indexes,amounts,format_date,'r')
Ejemplo n.º 44
0
def LoadRecordsUsingApi(name, sql):
    apiurl = "https://api.scraperwiki.com/api/1.0/datastore/sqlite"
    fin = urllib.urlopen(
        "%s?%s" %
        (apiurl, urllib.urlencode({
            "format": "csv",
            "name": name,
            "query": sql
        })))
    csv = StringIO.StringIO()
    csv.write(fin.read())
    csv.seek(0)
    return mlab.csv2rec(csv)
Ejemplo n.º 45
0
 def get_history_data(self, ticker, start, end):
     try:
         fh = finance.fetch_historical_yahoo(ticker, start, end)
         # a numpy record array with fields: date, open, high, low, close, volume, adj_close)
         raw = mlab.csv2rec(fh)
         fh.close()
         raw.sort()
         # get today data
         if end == datetime.date.today():
             raw = self.get_realtime_data(ticker, raw)
     except:
         return 0
     return raw
Ejemplo n.º 46
0
def test_recarray_csv_roundtrip():
    expected = np.recarray((99, ), [('x', np.float), ('y', np.float),
                                    ('t', np.float)])
    expected['x'][:] = np.linspace(-1e9, -1, 99)
    expected['y'][:] = np.linspace(1, 1e9, 99)
    expected['t'][:] = np.linspace(0, 0.01, 99)
    fd = tempfile.TemporaryFile(suffix='csv')
    mlab.rec2csv(expected, fd)
    fd.seek(0)
    actual = mlab.csv2rec(fd)
    fd.close()
    assert np.allclose(expected['x'], actual['x'])
    assert np.allclose(expected['y'], actual['y'])
    assert np.allclose(expected['t'], actual['t'])
Ejemplo n.º 47
0
def graficoScatter():
    with open('/home/lucas/PycharmProjects/openCsv/dados_consumo_todos/total.csv', encoding="ISO-8859-1") as fname:
        gender_degree_data = csv2rec(fname)
    anos = pd.DataFrame(gender_degree_data, columns=['municipio', 'ibge', 'latitude', 'longitude', 'mwh', 'ano'])

    for index, row in anos.iterrows():
        row["mwh"] = int(row["mwh"])

    anossort = anos.sort_values(by='ano', ascending=True)
    anostype = anossort[['mwh', 'ano']].astype(float)
    ax =anostype.plot(x='ano', y='mwh',style='k.')
    ax.set_xlim(1990,2016)

    plt.show()
Ejemplo n.º 48
0
def test_recarray_csv_roundtrip():
    expected = np.recarray((99, ), [('x', np.float), ('y', np.float),
                                    ('t', np.float)])
    expected['x'][0] = 1
    expected['y'][1] = 2
    expected['t'][2] = 3
    fd = tempfile.TemporaryFile(suffix='csv')
    mlab.rec2csv(expected, fd)
    fd.seek(0)
    actual = mlab.csv2rec(fd)
    fd.close()
    assert np.allclose(expected['x'], actual['x'])
    assert np.allclose(expected['y'], actual['y'])
    assert np.allclose(expected['t'], actual['t'])
Ejemplo n.º 49
0
def extract_s2_mod_obs_508(point='high', sitenml='site.nml', bnd_indx=(3,7)):
    field_dir = '/export/cloud/nceo/users/if910917/sentinel_data/field_data/munich/field_508/'
    state_name = field_dir+'mni_state_field_508_'+point+'.csv'
    s2_name = field_dir+'mni_s2_508_'+point+'.csv'
    s2_arr = mlab.csv2rec(s2_name, comments='%')
    bnds = ['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8a', 'b8b', 'b9', 'b10', 'b11', 'b12']
    sim = ss.Simulator(site_nml=sitenml)
    sim.get_land_state=sim.state_csv(state_name)
    sim.get_geom=sim.geom_csv(s2_name)
    sim.run_rt = sim.passive_optical
    sim.run()
    refl = np.array(sim.spectra.refl)
    return sim.spectra.date_sat_ob, refl[:,bnd_indx[0]], refl[:,bnd_indx[1]], s2_arr[bnds[bnd_indx[0]]], \
           s2_arr[bnds[bnd_indx[1]]]
Ejemplo n.º 50
0
def get_stock(ticker, startdate, enddate):
    fh = finance.fetch_historical_yahoo(ticker, startdate, enddate)
    # a numpy record array with fields: (date, open, high, low, close, volume, adj_close)
    r = mlab.csv2rec(fh)
    fh.close()
    r.sort()
    print 'the length of data:', len(r.close)
    get_stock_data = []
    for i in xrange(0, len(r.close) - 1):
        if (r.volume[i] != 0):
            get_stock_data.append(r.close[i].tolist())
    print 'after removing the datas with zero volume, the length of data:', len(
        get_stock_data)
    return get_stock_data
Ejemplo n.º 51
0
def get_dates_lat_lon(state_sm_dir, point_no, point_level='high'):
    sm_loc_dat = mlab.csv2rec(state_sm_dir + '/locations_utm_epsg-32632.csv')
    easting = sm_loc_dat['point_x'][(sm_loc_dat['id'] == point_no)
                                    & (sm_loc_dat['esu'] == point_level)]
    northing = sm_loc_dat['point_y'][(sm_loc_dat['id'] == point_no)
                                     & (sm_loc_dat['esu'] == point_level)]
    file_head_1 = sm_loc_dat['esu_sm'][(sm_loc_dat['id'] == point_no)
                                       & (sm_loc_dat['esu'] == 'med')][0]
    file_head_2 = sm_loc_dat['esu_sm'][(sm_loc_dat['id'] == point_no)
                                       & (sm_loc_dat['esu'] == 'high')][0]
    file_head_3 = sm_loc_dat['esu_sm'][(sm_loc_dat['id'] == point_no)
                                       & (sm_loc_dat['esu'] == 'low')][0]
    lat, lon = utm.to_latlon(easting, northing, 32, 'U')
    sm_csv1 = glob.glob(state_sm_dir + '/' + file_head_1 + '*SM.csv')[0]
    sm_dat1 = mlab.csv2rec(sm_csv1)
    sm_csv2 = glob.glob(state_sm_dir + '/' + file_head_2 + '*SM.csv')[0]
    sm_dat2 = mlab.csv2rec(sm_csv2)
    sm_csv3 = glob.glob(state_sm_dir + '/' + file_head_3 + '*SM.csv')[0]
    sm_dat3 = mlab.csv2rec(sm_csv3)

    #sm_idx = [find_nearest(sm_dat['date'], dt.datetime.combine(x,dt.datetime.min.time()))[1] for x in dates]
    sm_dates = sm_dat1['date'][3:]
    return sm_dates, lat, lon
Ejemplo n.º 52
0
def check_indicator(ticker):
    try:
        # [tickerData,ma20,ma200,rsi,macd,ema9]=generate_indicator(ticker)
        f = 'data/' + ticker + '.txt'

        tickerData = mlab.csv2rec(f)
        tickerData.sort()

        prices = tickerData.adj_close

        volumes = tickerData.volume

        # only proceed if it's not a penny stock, with high liquidity, with sufficient records
        if statistics.mean(volumes[-10:]) < 150000:
            return False
        if statistics.mean(prices[-10:]) < 1:
            return False
        if len(prices) < 200:
            return False

        # generate the MA
        ma20 = ti.moving_average(prices, 20, type='simple')
        ma200 = ti.moving_average(prices, 200, type='simple')

        # RSI
        rsi = ti.relative_strength(prices)

        # MACD
        emaSlow, emaFast, macd = ti.moving_average_convergence(prices,
                                                               nslow=26,
                                                               nfast=12)
        ema9 = ti.moving_average(macd, 9, type='exponential')

        # check RSI
        RSICheck = check_rsi(rsi)

        # check MACD
        MACDCheck = check_macd(macd, ema9)

        buySignal = RSICheck and MACDCheck

        if buySignal:
            1
            # plot_graph.plot(tickerData,ticker)

        return buySignal
    except FileNotFoundError:
        # print('Error with ticker '+ticker)
        return False
Ejemplo n.º 53
0
def test_transform_data():
    """ 
    Testing the transformation of the data from raw data to functions 
    used for fitting a function.
    
    """
    # We start with actual data. We test here just that reading the data in
    # different ways ultimately generates the same arrays.
    from matplotlib import mlab
    ortho = mlab.csv2rec(op.join(data_path, 'ortho.csv'))
    para = mlab.csv2rec(op.join(data_path, 'para.csv'))
    x1, y1, n1 = sb.transform_data(ortho)
    x2, y2, n2 = sb.transform_data(op.join(data_path, 'ortho.csv'))
    npt.assert_equal(x1, x2)
    npt.assert_equal(y1, y2)
    # We can also be a bit more critical, by testing with data that we
    # generate, and should produce a particular answer:
    my_data = pd.DataFrame(np.array([[0.1, 2], [0.1, 1], [0.2, 2], [0.2, 2],
                                     [0.3, 1], [0.3, 1]]),
                           columns=['contrast1', 'answer'])
    my_x, my_y, my_n = sb.transform_data(my_data)
    npt.assert_equal(my_x, np.array([0.1, 0.2, 0.3]))
    npt.assert_equal(my_y, np.array([0.5, 0, 1.0]))
    npt.assert_equal(my_n, np.array([2, 2, 2]))
Ejemplo n.º 54
0
 def __init__(self, path):
     lines = open(path).read().split('\n')
     header = lines[3].split()
     nlay, nspc = [int(_v) for _v in header[:2]]
     sigmas = [float(_v) for _v in header[2:]]
     nsigmas = len(sigmas)
     # try:
     #     dateo = datetime.strptime(lines[4].strip(), '%Y-%m-%d')
     # except Exception:
     #     date, time = [int(_v) for _v in lines[4].split()]
     starts = [5]
     ends = [s + nspc for s in starts]
     keys = ['all']
     fieldnames = ('name',) + tuple(['s%f' % i for i in sigmas])
     data = dict(
         [(k, csv2rec(StringIO('\n'.join(lines[s:e])),
                      delimiter=' ', names=fieldnames,
                      converterd=dict(names=lambda x: str(x).strip())))
          for k, s, e in zip(keys, starts, ends)])
     profile_spcs = np.char.strip(data[keys[0]]['name'])
     data_type = data[keys[0]].dtype
     self.createDimension('sigma', nsigmas)
     self.createDimension('LAY', nsigmas - 1)
     self.createVariable('sigma', 'f', ('sigma',),
                         values=np.array(sigmas), units='sigma')
     self.createVariable('LAY', 'f', ('LAY',), values=np.array(
         sigmas).repeat(2, 0)[1:-1].reshape(-1, 2).mean(1), units='sigma')
     self.VGLVLS = self.variables['sigma']
     self.VGTOP = 5000
     ks = keys[1:]
     for k in ks:
         try:
             assert((np.char.strip(data[k]['name']) == profile_spcs).all())
             assert(data[k].dtype == data_type)
             assert(data[k].dtype == data_type)
         except AssertionError:
             raise IOError('File is corrupt or inconsistent')
     varlist = []
     for a in data['all']:
         varkey = a[0].strip()
         self.createVariable(
             varkey, 'f', ('LAY',), units=_getunit(varkey),
             values=np.array([tuple(a)[1:]])[0].astype('f'),
             long_name=varkey.ljust(16), var_desc=varkey.ljust(16))
         varlist.append(varkey.ljust(16))
     self.NVARS = len(varlist)
     self.createDimension('VAR', self.NVARS)
     setattr(self, 'VAR-LIST', ''.join(varlist))
Ejemplo n.º 55
0
    def test_recarray_csv_roundtrip(self):
        expected = np.recarray((99, ), [('x', np.float), ('y', np.float),
                                        ('t', np.float)])
        # initialising all values: uninitialised memory sometimes produces
        # floats that do not round-trip to string and back.
        expected['x'][:] = np.linspace(-1e9, -1, 99)
        expected['y'][:] = np.linspace(1, 1e9, 99)
        expected['t'][:] = np.linspace(0, 0.01, 99)

        mlab.rec2csv(expected, self.fd)
        self.fd.seek(0)
        actual = mlab.csv2rec(self.fd)

        np.testing.assert_allclose(expected['x'], actual['x'])
        np.testing.assert_allclose(expected['y'], actual['y'])
        np.testing.assert_allclose(expected['t'], actual['t'])
Ejemplo n.º 56
0
def get_data_from_yahoo_api(ticker,
                            startdate=datetime.date(1900, 1, 1),
                            enddate=datetime.date.today()):
    fh = finance.fetch_historical_yahoo(ticker, startdate, enddate)
    r = mlab.csv2rec(fh)
    fh.close()
    df = pd.DataFrame(data=r,
                      columns=[
                          'date', 'open', 'high', 'low', 'close', 'volume',
                          'adj_close'
                      ]).sort('date')
    df['volume'] = df['volume'].astype(float)
    del df['adj_close']
    print 'Available data: Ticker({ticker}) from {first_day} to {last_day}'.format(
        ticker=ticker, first_day=firstday(df), last_day=lastday(df))
    return df, r
Ejemplo n.º 57
0
def test_recarray_csv_roundtrip():
    expected = np.recarray((99, ), [('x', np.float), ('y', np.float),
                                    ('t', np.float)])
    # initialising all values: uninitialised memory sometimes produces floats
    # that do not round-trip to string and back.
    expected['x'][:] = np.linspace(-1e9, -1, 99)
    expected['y'][:] = np.linspace(1, 1e9, 99)
    expected['t'][:] = np.linspace(0, 0.01, 99)
    fd = tempfile.TemporaryFile(suffix='csv', mode="w+")
    mlab.rec2csv(expected, fd)
    fd.seek(0)
    actual = mlab.csv2rec(fd)
    fd.close()
    assert np.allclose(expected['x'], actual['x'])
    assert np.allclose(expected['y'], actual['y'])
    assert np.allclose(expected['t'], actual['t'])
Ejemplo n.º 58
0
def recuperarDados(ticker, startdate=None, enddate=None):
    if startdate is None and enddate is None:
        records = np.load(dadosPathMask % (ticker) + ".npy")
    else:
        if startdate is None:
            startdate = datetime.date(2007, 1, 1)
        if enddate is None:
            enddate = datetime.date.today()

        with finance.fetch_historical_yahoo(ticker, startdate, enddate) as fh:
            records = mlab.csv2rec(fh)
        records.sort()

        np.save(dadosPathMask % (ticker), records)

    return records
Ejemplo n.º 59
0
def populate(csvFile, db, table, tablenames, delimiter="\t"):
    data = csv2rec(csvFile, delimiter=delimiter, names=tablenames)

    conn = MySQLdb.connect(db=db, host="localhost", user="******", passwd="angi4rf")
    curs = conn.cursor(DictCursor)

    cols = ", ".join(["`%s` %s" %(data.dtype.names[i], dtype2SQL(data.dtype[i].str)) for i in range(len(data.dtype.names))])

    curs.execute("DROP TABLE IF EXISTS %s.%s " % (db, table))
    tableQ = "CREATE TABLE %s (id INT NOT NULL AUTO_INCREMENT, %s, PRIMARY KEY (id)) ENGINE = MYISAM" % (table, cols)
    print tableQ
    curs.execute(tableQ)
    for row in data:
        #row = tuple(row) + (" ".join(filterTokens(preprocess(row[-1]))),) ## stoplist/tokenize? -- delete!!!
        insertQ = "INSERT INTO %s VALUES %s" %(table, str(tuple([0]+[str(reform(el)).strip() for el in row])))
        curs.execute(insertQ)
    conn.close()
Ejemplo n.º 60
0
def add_csv2nc(csv_name, nc_name):
    csv_data = ml.csv2rec(csv_name)
    nc_data = open_netcdf(nc_name)
    nc_vars = [
        'air_temp', 'soil_temp', 'rg', 'co2_flux', 'qc_co2_flux', 'u_star',
        'wind_dir', 'foot_print'
    ]
    for var_title in nc_vars:
        nc_var = nc_data.variables[var_title]
        csv_var = csv_data[var_title]
        if len(csv_var) != len(nc_var):
            raise ValueError(
                'Cannot project data of different shapes together')
        else:
            nc_var[:] = csv_var[:]
    nc_data.close()
    return 'All updated'