def use_cache(self, dir): ''' Use cached data from disk instead of querying mysql for the latest version ''' try: self.prediction_matrix = pl.csv2rec(dir + 'prediction_matrix_' + self.cause + '_' + self.sex + '.csv') self.observation_matrix = pl.csv2rec(dir + 'observation_matrix_' + self.cause + '_' + self.sex + '.csv') except IOError: raise IOError('No cached data found.') self.data_rows = self.observation_matrix.shape[0] self.country_list = np.unique(self.prediction_matrix.country) self.region_list = np.unique(self.prediction_matrix.region) self.super_region_list = np.unique(self.prediction_matrix.super_region) self.age_list = np.unique(self.prediction_matrix.age) self.year_list = np.unique(self.prediction_matrix.year) self.covariate_dict = {'x0': 'constant'} for i in range(len(self.covariate_list)): self.covariate_dict['x' + str(i+1)] = self.covariate_list[i] if self.age_dummies == True: pre_ref = 1 for i,j in enumerate(self.age_list): if j == self.age_ref: pre_ref = 0 elif pre_ref == 1: self.covariate_dict['x' + str(len(self.covariate_list)+i+1)] = 'Age ' + str(j) else: self.covariate_dict['x' + str(len(self.covariate_list)+i)] = 'Age ' + str(j) self.training_split()
def evaluate_model(mod, comment='', data_fname='missing_noisy_data.csv', truth_fname='data.csv'): """ Run specified model on existing data (data.csv / missing_noisy_data.csv) and save results in dev_log.csv Existing models: %s """ % data_run_models if mod not in data_run_models.split(' '): raise TypeError, 'Unrecognized model "%s"; must be one of %s' % (mod, data_run_models) import model reload(model) print 'loading data' data = pl.csv2rec(data_fname) truth = pl.csv2rec(truth_fname) t0 = time.time() print 'generating model' mod_mc = eval('model.%s(data)' % mod) print 'fitting model with mcmc' mod_mc.sample(10000, 5000, 50, verbose=1) t1 = time.time() print 'summarizing results' import graphics reload(graphics) pl.figure(figsize=(22, 17), dpi=300) pl.clf() graphics.plot_all_predictions_over_time(data, mod_mc.predicted, more_data=truth) data_stats = mod_mc.data_predicted.stats() i_out = [i for i in range(len(data)) if pl.isnan(data.y[i])] rmse_abs_out = pl.rms_flat(truth.y[i_out] - data_stats['mean'][i_out]) rmse_rel_out = 100*pl.rms_flat(1. - data_stats['mean'][i_out]/truth.y[i_out]) i_in = [i for i in range(len(data)) if not pl.isnan(data.y[i])] rmse_abs_in = pl.rms_flat(truth.y[i_in] - data_stats['mean'][i_in]) rmse_rel_in = 100*pl.rms_flat(1. - data_stats['mean'][i_in]/truth.y[i_in]) param_stats = mod_mc.param_predicted.stats() coverage = 100*pl.sum((truth.y[i_out] >= param_stats['95% HPD interval'][i_out, 0]) & (truth.y[i_out] <= param_stats['95% HPD interval'][i_out, 1])) / float(len(i_out)) import md5 data_hash = md5.md5(data).hexdigest() results = [mod, t1-t0, rmse_abs_out, rmse_rel_out, rmse_abs_in, rmse_rel_in, coverage, len(data), len(pl.unique(data.region)), len(pl.unique(data.country)), len(pl.unique(data.year)), len(pl.unique(data.age)), data_hash, t0, comment] print '%s: time: %.0fs out-of-samp rmse abs=%.1f rel=%.0f in-samp rmse abs=%.1f rel=%.0f coverage=%.0f\ndata: %d rows; %d regions, %d countries %d years %d ages [data hash: %s]\n(run conducted at %f)\n%s' % tuple(results) pl.savefig('/home/j/Project/Models/space-time-smoothing/images/%s.png' % t0) # FIXME: don't hardcode path for saving images import csv f = open('dev_log.csv', 'a') f_csv = csv.writer(f) f_csv.writerow(results) f.close() return mod_mc
def get_cod_data_all_causes(iso3='USA', age_group='1_4', sex='F'): """ TODO: write doc string for this function""" print 'loading', iso3, age_group, sex import glob cause_list = [] fpath = '/home/j/Project/Causes of Death/Under Five Deaths/CoD Correct Input Data/v02_prep_%s/%s+*+%s+%s.csv' % ( iso3, iso3, age_group, sex) #fpath = '/home/j/Project/GBD/dalynator/data/cod_correct_input_pos/run_9_cause_*.csv' # use Mike's validation data fnames = glob.glob(fpath) # initialize input distribution array N = 990 # TODO: get this from the data files T = 32 # TODO: get this from the data files J = len(fnames) F = pl.zeros((N, T, J)) # fill input distribution array with data from files for j, fname in enumerate(sorted(fnames)): cause = fname.split('+')[1] # TODO: make this less brittle and clearer #cause = str(j) # use Mike's validation data causes print 'loading cause', cause F_j = pl.csv2rec(fname) for n in range(N): F[n, :, j] = F_j['ensemble_d%d' % (n + 1)] / F_j['envelope'] #F[n, :, j] = F_j['d%d'%(n+1)]/F_j['envelope'] # use Mike's validation data assert not pl.any( pl.isnan(F)), '%s should have no missing values' % fname cause_list.append(cause) print 'loading complete' return F, cause_list
def make_model(lon,lat,t,input_data,covariate_keys,n,datatype, genaa,genab,genbb,gen00,gena0,genb0,gena1,genb1,gen01,gen11, pheab,phea,pheb, phe0,prom0,promab, aphea,aphe0, bpheb,bphe0, vivax_pos,vivax_neg, lo_age, up_age, cpus=1): """ This function is required by the generic MBG code. """ ra = csv2rec(input_data) # Step method granularity grainsize = 20 where_vivax = np.where(datatype=='vivax') from dufvax import disttol, ttol # Duffy needs to be modelled everywhere Duffy or Vivax is observed. # Vivax only needs to be modelled where Vivax is observed. # Complication: Vivax can have multiple co-located observations at different times, # all corresponding to the same Duffy observation. duffy_data_mesh, duffy_logp_mesh, duffy_fi, duffy_ui, duffy_ti = uniquify_tol(disttol,ttol,lon,lat) duffy_data_mesh = np.hstack((duffy_data_mesh, np.atleast_2d(t).T)) duffy_logp_mesh = np.hstack((duffy_logp_mesh, np.atleast_2d(t[duffy_ui]).T)) vivax_data_mesh, vivax_logp_mesh, vivax_fi, vivax_ui, vivax_ti = uniquify_tol(disttol,ttol,lon[where_vivax],lat[where_vivax],t[where_vivax]) # Create the mean & its evaluation at the data locations. init_OK = False # Probability of mutation in the promoter region, given that the other thing is a. p1 = pm.Uniform('p1', 0, .04, value=.01) covariate_key_dict = {'v': set(covariate_keys), 'b': ['africa'], '0':[]} ui_dict = {'v': vivax_ui, 'b': duffy_ui, '0': duffy_ui} logp_mesh_dict = {'b': duffy_logp_mesh, '0': duffy_logp_mesh, 'v': vivax_logp_mesh} temporal_dict = {'b': False, '0': False, 'v': True} init_OK = False while not init_OK: try: spatial_vars = zipmap(lambda k: covariance_submodel(k, ra, logp_mesh_dict[k], covariate_key_dict[k], ui_dict[k], temporal_dict[k]), ['b','0','v']) sp_sub = zipmap(lambda k: spatial_vars[k]['sp_sub'], ['b','0','v']) sp_sub_b, sp_sub_0, sp_sub_v = [sp_sub[k] for k in ['b','0','v']] V = zipmap(lambda k: spatial_vars[k]['V'], ['b','0','v']) V_b, V_0, V_v = [V[k] for k in ['b','0','v']] tau = zipmap(lambda k: 1./spatial_vars[k]['V'], ['b','0','v']) # Loop over data clusters, adding nugget and applying link function. f = zipmap(lambda k: spatial_vars[k]['sp_sub'].f_eval, ['b','0','v']) init_OK = True except pm.ZeroProbability, msg: print 'Trying again: %s'%msg init_OK = False gc.collect()
def get_cod_data_all_causes(iso3='USA', age_group='1_4', sex='F'): """ TODO: write doc string for this function""" print 'loading', iso3, age_group, sex import glob cause_list = [] fpath = '/home/j/Project/Causes of Death/Under Five Deaths/CoD Correct Input Data/v02_prep_%s/%s+*+%s+%s.csv' % (iso3, iso3, age_group, sex) #fpath = '/home/j/Project/GBD/dalynator/data/cod_correct_input_pos/run_9_cause_*.csv' # use Mike's validation data fnames = glob.glob(fpath) # initialize input distribution array N = 990 # TODO: get this from the data files T = 32 # TODO: get this from the data files J = len(fnames) F = pl.zeros((N, T, J)) # fill input distribution array with data from files for j, fname in enumerate(sorted(fnames)): cause = fname.split('+')[1] # TODO: make this less brittle and clearer #cause = str(j) # use Mike's validation data causes print 'loading cause', cause F_j = pl.csv2rec(fname) for n in range(N): F[n, :, j] = F_j['ensemble_d%d'%(n+1)]/F_j['envelope'] #F[n, :, j] = F_j['d%d'%(n+1)]/F_j['envelope'] # use Mike's validation data assert not pl.any(pl.isnan(F)), '%s should have no missing values' % fname cause_list.append(cause) print 'loading complete' return F, cause_list
def combine_output(J, T, model, dir, reps, save=False): """ Combine output on absolute error, relative error, csmf_accuracy, and coverage from from multiple runs of validate_once. Either saves the output to the disk, or returns arays for each. """ cause = pl.zeros(J*T, dtype='f').view(pl.recarray) time = pl.zeros(J*T, dtype='f').view(pl.recarray) abs_err = pl.zeros(J*T, dtype='f').view(pl.recarray) rel_err = pl.zeros(J*T, dtype='f').view(pl.recarray) coverage = pl.zeros(J*T, dtype='f').view(pl.recarray) csmf_accuracy = pl.zeros(J*T, dtype='f').view(pl.recarray) for i in range(reps): metrics = pl.csv2rec('%s/metrics_%s_%i.csv' % (dir, model, i)) cause = pl.vstack((cause, metrics.cause)) time = pl.vstack((time, metrics.time)) abs_err = pl.vstack((abs_err, metrics.abs_err)) rel_err = pl.vstack((rel_err, metrics.rel_err)) coverage = pl.vstack((coverage, metrics.coverage)) csmf_accuracy = pl.vstack((csmf_accuracy, metrics.csmf_accuracy)) cause = cause[1:,] time = time[1:,] abs_err = abs_err[1:,] rel_err = rel_err[1:,] coverage = coverage[1:,] csmf_accuracy = csmf_accuracy[1:,] mean_abs_err = abs_err.mean(0) median_abs_err = pl.median(abs_err, 0) mean_rel_err = rel_err.mean(0) median_rel_err = pl.median(rel_err, 0) mean_csmf_accuracy = csmf_accuracy.mean(0) median_csmf_accuracy = pl.median(csmf_accuracy, 0) mean_coverage_bycause = coverage.mean(0) mean_coverage = coverage.reshape(reps, T, J).mean(0).mean(1) percent_total_coverage = (coverage.reshape(reps, T, J).sum(2)==3).mean(0) mean_coverage = pl.array([[i for j in range(J)] for i in mean_coverage]).ravel() percent_total_coverage = pl.array([[i for j in range(J)] for i in percent_total_coverage]).ravel() models = pl.array([[model for j in range(J)] for i in range(T)]).ravel() true_cf = metrics.true_cf true_std = metrics.true_std std_bias = metrics.std_bias all = pl.np.core.records.fromarrays([models, cause[0], time[0], true_cf, true_std, std_bias, mean_abs_err, median_abs_err, mean_rel_err, median_rel_err, mean_csmf_accuracy, median_csmf_accuracy, mean_coverage_bycause, mean_coverage, percent_total_coverage], names=['model', 'cause', 'time', 'true_cf', 'true_std', 'std_bias', 'mean_abs_err', 'median_abs_err', 'mean_rel_err', 'median_rel_err', 'mean_csmf_accuracy', 'median_csmf_accuracy', 'mean_covearge_bycause', 'mean_coverage', 'percent_total_coverage']) if save: pl.rec2csv(all, '%s/%s_summary.csv' % (dir, model)) else: return all
def fit_and_plot( mod, data_fname='irq_5q0.csv', image_fname='/home/j/Project/Models/space-time-smoothing/irq_test/5q0.%s.png', comment='', iter=40000): import model reload(model) data = pl.csv2rec(data_fname) # FIXME: this makes a big difference, but I don't understand why it would (could be prior on gp amp) data.x1 = (data.x1 - 1990.) / 10. # crude normalization of year data print 'generating model' mod_mc = eval('model.%s(data)' % mod) print 'fitting model with mcmc' mod_mc.sample(iter, iter / 2, iter / 2000, verbose=1) print 'summarizing results' import graphics reload(graphics) pl.figure() pl.clf() graphics.plot_prediction_over_time('IRQ', data, mod_mc.predicted, age=-1, cmap=pl.cm.RdYlBu, connected=False, jittered_posterior=False) graphics.plot_prediction_over_time('IRQ', data[:40], mod_mc.param_predicted, age=-1) #pl.plot(data.year, data.y, zorder=0, # linestyle='', marker='x', mew=3, color='r', ms=8, alpha=.5) pl.title('IRQ') pl.xlabel('Time (Years)') pl.ylabel('$\log(_5q_0)$') pl.axis([1945, 2030, -1.8, -.5]) pl.figtext(0, 1, '\n %s' % comment, va='top', ha='left') t1 = time.time() pl.savefig(image_fname % t1) try: for stoch in 'beta gamma sigma_f tau_f'.split(' '): print '%s =\n %s\n' % ( stoch, mean_w_ui(mod_mc.__getattribute__(stoch))) except AttributeError: pass return mod_mc
def brt_doublecheck(fname, brt_evaluator, brt_results): """ Computes the 'fit' element of a gbm.object and compares it with that stored in the gbm.object. """ ures = unpack_gbm_object(brt_results, 'fit') data = csv2rec(os.path.join('anopheles-caches', fname)) ddict = dict([(k, data[k]) for k in data.dtype.names[1:]]) out = brt_evaluator(ddict) print np.abs(out-ures).max()
def load_stimuli_raw(): folder = os.path.join(wd_dir, "stimuli") stim_file_re = r'lexique_nletters_4_block_(\d)\.txt$' stim_file_fil = re_filter(stim_file_re) stim_file_names = filter(stim_file_fil, glob.glob(os.path.join(folder, "*"))) stim_file_names = sorted(stim_file_names) return np.concatenate( [pl.csv2rec(filename, delimiter="\t") for filename in stim_file_names])
def knockout_uniformly_at_random(in_fname='noisy_data.csv', out_fname='missing_noisy_data.csv', pct=20.): """ replace data.csv y column with uniformly random missing entries Parameters ---------- pct : float, percent to knockout """ data = pl.csv2rec(in_fname) for i, row in enumerate(data): if pl.rand() < pct/100.: data[i].y = pl.nan pl.rec2csv(data, out_fname)
def load_stimuli_raw(): folder = os.path.join(wd_dir, "stimuli") stim_file_re = r'lexique_nletters_4_block_(\d)\.txt$' stim_file_fil = re_filter(stim_file_re) stim_file_names = filter(stim_file_fil, glob.glob( os.path.join(folder, "*"))) stim_file_names = sorted(stim_file_names) return np.concatenate([pl.csv2rec(filename, delimiter="\t") for filename in stim_file_names])
def get_data(concurrent): observations = csv2rec("kq1-2.csv", missing='NA') # Filter for historical or concurrent observations = observations[observations['concurrent']==concurrent] # Unique paper ID values unique_papers = set(observations['paper_id']) # Re-number papers paper_id = zeros(len(observations), int) for i,p in enumerate(unique_papers): paper_id[observations['paper_id']==p] = i # Unique grouped ID values unique_groups = set(observations['group_id']) # Re-number groups group_id = zeros(len(observations), int) for i,g in enumerate(unique_groups): group_id[observations['group_id']==g] = i # unique_units = set(observations['unit_id']) # unit_id = observations['unit_id']-1 # Index to individual-level data indiv = observations['n']==1 # Individual-level data obs_indiv = observations[indiv] # Summarized data obs_summ = observations[indiv-True] # Group IDs for individual data group_id_indiv = group_id[indiv] # Paper IDs for individual data paper_id_indiv = paper_id[indiv] # Unit IDs for individual data # unit_id_indiv = unit_id[indiv] # Unique paper ID's for individual studies unique_papers_indiv = set(paper_id_indiv) # Group IDs for summarized data group_id_summ = group_id[indiv-True] # Paper IDs for summarized data paper_id_summ = paper_id[indiv-True] # Unit IDs for summarized data # unit_id_summ = unit_id[indiv-True] # Unique paper IDs for group data unique_papers_summ = set(paper_id_summ) return locals()
def load_spikes_csv(spikefname, ch, unit): """ Load the spike timestamps from a csv file. The file is expected to have three columns: channel, unit and timestamp (This is the default format that data is exported in from Plexon's offline sorter) Inputs: spikefname - name of csv file ch - channel number unit - unit number Outputs: array - pylab array of timestamps """ sdata = pylab.csv2rec(spikefname, names=['channel','unit','timestamp']) idx = pylab.find((sdata['channel'] == ch) & (sdata['unit'] == unit)) return sdata['timestamp'][idx]
def load_spikes_csv(spikefname, ch, unit): """ Load the spike timestamps from a csv file. The file is expected to have three columns: channel, unit and timestamp (This is the default format that data is exported in from Plexon's offline sorter) Inputs: spikefname - name of csv file ch - channel number unit - unit number Outputs: array - pylab array of timestamps """ sdata = pylab.csv2rec(spikefname, names=['channel', 'unit', 'timestamp']) idx = pylab.find((sdata['channel'] == ch) & (sdata['unit'] == unit)) return sdata['timestamp'][idx]
def add_sampling_error(in_fname='data.csv', out_fname='noisy_data.csv', std=1.): """ add normally distributed noise to data.csv y column Parameters ---------- std : float, or array of floats standard deviation of noise """ data = pl.csv2rec(in_fname) if type(std) == float: std = std * pl.ones(len(data)) for i, row in enumerate(data): data[i].y += std[i] * pl.randn(1) data[i].se += std[i] pl.rec2csv(data, out_fname)
def use_cache(self, dir): ''' Use cached data from disk instead of querying mysql for the latest version ''' try: self.prediction_matrix = pl.csv2rec(dir + 'prediction_matrix_' + self.cause + '_' + self.sex + '.csv') self.observation_matrix = pl.csv2rec(dir + 'observation_matrix_' + self.cause + '_' + self.sex + '.csv') except IOError: raise IOError('No cached data found.') if self.just_testing == True: self.country_list = np.array(['USA','RUS','CAN','UKR','IND','BGD','THA','GBR']) obs_keeper = np.zeros(self.observation_matrix.shape[0]) pred_keeper = np.zeros(self.prediction_matrix.shape[0]) for i in self.country_list: obs_keeper[np.where(self.observation_matrix.country==i)[0]] = 1 pred_keeper[np.where(self.prediction_matrix.country==i)[0]] = 1 self.observation_matrix = np.delete(self.observation_matrix, np.where(obs_keeper==0)[0], axis=0) self.prediction_matrix = np.delete(self.prediction_matrix, np.where(pred_keeper==0)[0], axis=0) else: self.country_list = np.unique(self.prediction_matrix.country) self.data_rows = self.observation_matrix.shape[0] self.region_list = np.unique(self.prediction_matrix.region) self.super_region_list = np.unique(self.prediction_matrix.super_region) self.age_list = np.unique(self.prediction_matrix.age) self.year_list = np.unique(self.prediction_matrix.year) self.covariate_dict = {'x0': 'constant'} for i in range(len(self.covariate_list)): self.covariate_dict['x' + str(i+1)] = self.covariate_list[i] if self.age_dummies == True: pre_ref = 1 for i,j in enumerate(self.age_list): if j == self.age_ref: pre_ref = 0 elif pre_ref == 1: self.covariate_dict['x' + str(len(self.covariate_list)+i+1)] = 'Age ' + str(j) else: self.covariate_dict['x' + str(len(self.covariate_list)+i)] = 'Age ' + str(j) self.training_split()
def make_subplot(file, title_): data = pylab.csv2rec(file, delimiter=' ', names=('re', 'sim', 'th')) figW = 5.5 figH = 4.5 fig = plt.figure(subplotpars=mpl.figure.SubplotParams(left=0.125, bottom=0.130)) ax = fig.add_subplot(111) pl_sim = ax.loglog(data['re'], data['sim'], 'bo-') pl_th = ax.loglog(data['re'], data['th'], 'ro-') ax.legend((pl_sim, pl_th), ('simulation results', 'theoretical approximation'), 'best') ax.grid('on') ax.grid(which='minor') ax.set_xlim(data[0][0], data[-1][0]) ax.set_ylabel('drag coefficient') ax.set_xlabel('Reynolds number') ax.set_title(title_)
def load_data(self, csv): ''' load the data from csv ### Note: for now, only takes in data as a csv TODO: enable MySQL download ''' # load the csv file self.data = pl.csv2rec(csv) # keep just the specified age and sex self.data = self.data[(self.data.sex == self.sex) & (self.data.age_group == self.age)] # remove any instances of population zero, which might blow things up due to having offsets of negative infinity self.data = self.data[self.data.pop > 0.] # report how many rows were loaded print '%g rows of data loaded.' % len(self.data)
def fit_and_plot(mod, data_fname='irq_5q0.csv', image_fname='/home/j/Project/Models/space-time-smoothing/irq_test/5q0.%s.png', comment='', iter=40000): import model reload(model) data = pl.csv2rec(data_fname) # FIXME: this makes a big difference, but I don't understand why it would (could be prior on gp amp) data.x1 = (data.x1-1990.)/10. # crude normalization of year data print 'generating model' mod_mc = eval('model.%s(data)' % mod) print 'fitting model with mcmc' mod_mc.sample(iter, iter/2, iter/2000, verbose=1) print 'summarizing results' import graphics reload(graphics) pl.figure() pl.clf() graphics.plot_prediction_over_time('IRQ', data, mod_mc.predicted, age=-1, cmap=pl.cm.RdYlBu, connected=False, jittered_posterior=False) graphics.plot_prediction_over_time('IRQ', data[:40], mod_mc.param_predicted, age=-1) #pl.plot(data.year, data.y, zorder=0, # linestyle='', marker='x', mew=3, color='r', ms=8, alpha=.5) pl.title('IRQ') pl.xlabel('Time (Years)') pl.ylabel('$\log(_5q_0)$') pl.axis([1945, 2030, -1.8, -.5]) pl.figtext(0, 1, '\n %s' % comment, va='top', ha='left') t1 = time.time() pl.savefig(image_fname%t1) try: for stoch in 'beta gamma sigma_f tau_f'.split(' '): print '%s =\n %s\n' % (stoch, mean_w_ui(mod_mc.__getattribute__(stoch))) except AttributeError: pass return mod_mc
def draw_eels(subplot,file_basename,smooth): """Generate a plot from EELS data saved by CSI. Currently highlights just Co, Mn and O... generic code to be added. subplot: Subplot object to draw in. file_basename: Filename (no suffix) to read from (adds .txt) smooth: Window size for Hanning type smoothing """ data = pl.csv2rec(file_basename+".txt", delimiter="\t", names=["eV","raw","ev2","ref","ev3","bg","ev4","sign"]) #eels_plot.set_title("EELS spectrum of %s" % (sys.argv[2])) subplot.set_ylabel("Counts / $A.U.$") subplot.set_xlabel("Electron energy loss / $\Delta eV$ ") subplot.set_ylim(data["raw"].min()*0.3,data["raw"].max()*1.15) subplot.plot(data["eV"],data["raw"],label="raw data",color="grey") smoothed = smooth(data["raw"],window_len=smooth,window="hanning") subplot.plot(data["eV"],smoothed,color="k",label="smoothed data") # find peaks peak_index = argrelextrema(smoothed,np.greater,order=30)[0].tolist()[1:] ranges = [(535,580), (630,670), (770,810)] # annotate each peak for peak in peak_index: if any(lower <= data["eV"][peak] <= upper for (lower, upper) in ranges): subplot.annotate('%.1f' % (data["eV"][peak]),style='italic',size=11, xy=(data["eV"][peak], data["raw"][peak]), xycoords="data", textcoords="offset points", xytext=(0, 25), horizontalalignment='left', verticalalignment='top', arrowprops=dict(facecolor='black',arrowstyle="->",shrinkB=7,connectionstyle="arc3")) # mark regions subplot.axvspan(535,580,color="b",alpha=0.2) # o subplot.annotate("O\n$K$-edge", xy=(554,eels_plot.get_ylim()[0]), xycoords="data", va="bottom", ha="center", size=8) subplot.axvspan(630,670,color="r",alpha=0.2) # mn subplot.annotate("Mn\n $L_{2,3}$-edge", xy=(647,eels_plot.get_ylim()[0]), xycoords="data", va="bottom", ha="center", size=8) subplot.axvspan(770,810,color="g",alpha=0.2) # Co subplot.annotate("Co\n$L_{2,3}$-edge", xy=(790,eels_plot.get_ylim()[0]), xycoords="data", va="bottom", ha="center",size=8) subplot.set_title("Integral EELS spectrum")
def trees_to_diagnostics(brt_evaluator, fname, species_name, n_pseudopresences, n_pseudoabsences, config_filename): """ Takes the BRT evaluator and sees how well it does at predicting the training dataset. """ from diagnostics import simple_assessments, roc, plot_roc_ din = csv2rec(os.path.join('anopheles-caches',fname)) found = din.found din = dict([(k,din[k]) for k in brt_evaluator.nice_tree_dict.iterkeys()]) probs = pm.flib.invlogit(brt_evaluator(din)) print 'Species %s: fraction %f correctly classified.'%(species_name, ((probs>.5)*found+(probs<.5)*(True-found)).sum()/float(len(probs))) result_dirname = get_result_dir(config_filename) resdict = {} for f in simple_assessments: resdict[f.__name__] = f(probs>.5, found) pstack = np.array([pm.rbernoulli(probs) for i in xrange(10000)]) fp, tp, AUC = roc(pstack, found) resdict['AUC'] = AUC fout=file(os.path.join(result_dirname,'simple-diagnostics.txt'),'w') fout.write('presences: %i\n'%(found.sum()-n_pseudopresences)) fout.write('pseudopresences: %i\n'%n_pseudopresences) fout.write('pseudoabsences: %i\n'%n_pseudoabsences) for k in resdict.iteritems(): fout.write('%s: %s\n'%k) import pylab as pl pl.clf() plot_roc_(fp,tp,AUC) pl.savefig(os.path.join(result_dirname,'roc.pdf')) r = np.rec.fromarrays([fp,tp],names='false,true') rec2csv(r,os.path.join(result_dirname,'roc.csv'))
# You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # FIXME: Need to extract urban, rural from asciis, Otherwise they'll be # FIXME: NaN at some points. import sys from pylab import csv2rec, rec2csv import numpy as np import warnings duffy_datafile, vivax_datafile = sys.argv[1:] combined_datafile = duffy_datafile.split( '.')[0] + '_+_' + vivax_datafile.split('.')[0] + '.csv' duffy_data = csv2rec(duffy_datafile) vivax_data = csv2rec(vivax_datafile) n_duffy = len(duffy_data) n_vivax = len(vivax_data) duffy_nan = np.repeat(np.nan, n_duffy) vivax_nan = np.repeat(np.nan, n_vivax) tstart = vivax_data.yestart + (vivax_data.mostart - 1) / 12. tend = vivax_data.yeend + (vivax_data.moend - 1) / 12. weirdcols = ['lon', 'lat', 't', 'vivax_pos', 'vivax_neg', 'n', 'datatype'] vivaxcols = [ 'lo_age', 'up_age', 'urban',
### setup Python # import necessary libraries import pymc as mc import numpy as np import pylab as pl import os # setup directory info project = 'USCOD' proj_dir = 'D:/Projects/' + project +'/' if (os.environ['OS'] == 'Windows_NT') else '/shared/projects/' + project + '/' ### setup the data # load in the csv data = pl.csv2rec(proj_dir + 'data/model inputs/state_random_effects_input.csv') # keep just males aged 60-74 for now data = data[(data.sex == 1) & (data.age_group == '60to74')] # remove any instances of population zero, which might blow things up due to having offsets of negative infinity data = data[data.pop > 0.] ### setup temporal indexing # set year to start at 0 data = pl.rec_append_fields( rec = data, names = 'year0', arrs = np.array(data.year - np.min(data.year))
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, test): # load in the data all_data = csv2rec(infile, use_mrecords=False) for m in range(number_submodels): if all_data['spacetime_' + str(m+1)].dtype == 'float64': all_data = np.delete(all_data, np.where(np.isnan(all_data['spacetime_' + str(m+1)]))[0], axis=0) # find the list of years for which we need to predict year_list = np.unique(all_data.year) # find the list of country/age groups country_age = np.array([str(all_data.iso3[i]) + '_' + str(all_data.age_group[i]) for i in range(len(all_data))]) country_age_list = np.repeat(np.unique(country_age), len(year_list)) # make empty arrays in which to store the results draws = [np.empty(len(country_age_list), 'float') for i in range(number_submodels)] iso3 = np.empty(len(country_age_list), '|S3') age_group = np.empty(len(country_age_list), 'int') year = np.empty(len(country_age_list), 'int') # loop through country/age groups for ca in np.unique(country_age_list): print('GPRing ' + ca) # subset the data for this particular country/age ca_data = all_data[country_age==ca] # subset just the observed data if ca_data['lt_cf'].dtype != '|O8': ca_observed = ca_data[(np.isnan(ca_data['lt_cf'])==0) & (ca_data['test_' + test]==0)] if len(ca_observed) > 1: has_data = True else: has_data = False else: has_data = False # loop through each submodel for m in range(number_submodels): # skip models with no spacetime results if all_data['spacetime_' + str(m+1)].dtype != 'float64': draws[m][country_age_list==ca] = np.NaN continue # identify the dependent variable for this model dv = dv_list[m] # make a list of the spacetime predictions ca_prior = np.array([np.mean(ca_data['spacetime_' + str(m+1)][ca_data.year==y]) for y in year_list]) # find the amplitude for this country/age amplitude = np.mean(ca_data['spacetime_amplitude_' + str(m+1)]) # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR def mean_function(x) : return np.interp(x, year_list, ca_prior) # setup the covariance function M = gp.Mean(mean_function) C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=2, amp=amplitude, scale=scale) # observe the data if there is any if has_data: gp.observe(M=M, C=C, obs_mesh=ca_observed.year, obs_V=ca_observed['spacetime_data_variance_' + str(m+1)], obs_vals=ca_observed[dv]) # save the data for this country/age into the results array iso3[country_age_list==ca] = ca[0:3] age_group[country_age_list==ca] = ca[4:] year[country_age_list==ca] = year_list.T draws[m][country_age_list==ca] = M(year_list) # save the results print('Saving GPR results') names = ['iso3','age_group','year'] results = np.core.records.fromarrays([iso3,age_group,year], names=names) for m in range(number_submodels): results = recfunctions.append_fields(results, 'gpr_' + str(m+1) + '_spacetime_mean', draws[m]) rec2csv(results, outfile)
def make_model(lon,lat,t,input_data,covariate_keys,n,datatype, genaa,genab,genbb,gen00,gena0,genb0,gena1,genb1,gen01,gen11, pheab,phea,pheb, phe0,prom0,promab, aphea,aphe0, bpheb,bphe0, vivax_pos,vivax_neg, lo_age, up_age, cpus=1): """ This function is required by the generic MBG code. """ ra = csv2rec(input_data) # Step method granularity grainsize = 20 where_vivax = np.where(datatype=='vivax') from dufvax import disttol, ttol # Duffy needs to be modelled everywhere Duffy or Vivax is observed. # Vivax only needs to be modelled where Vivax is observed. # Complication: Vivax can have multiple co-located observations at different times, # all corresponding to the same Duffy observation. print 'Uniquifying.' duffy_data_mesh, duffy_logp_mesh, duffy_fi, duffy_ui, duffy_ti = uniquify_tol(disttol,ttol,lon,lat) duffy_data_mesh = np.hstack((duffy_data_mesh, np.atleast_2d(t).T)) duffy_logp_mesh = np.hstack((duffy_logp_mesh, np.atleast_2d(t[duffy_ui]).T)) vivax_data_mesh, vivax_logp_mesh, vivax_fi, vivax_ui, vivax_ti = uniquify_tol(disttol,ttol,lon[where_vivax],lat[where_vivax],t[where_vivax]) print 'Done uniquifying.' duffy_data_locs = map(tuple,duffy_data_mesh[:,:2]) vivax_data_locs = map(tuple,vivax_data_mesh[:,:2]) full_vivax_ui = np.arange(len(lon))[where_vivax][vivax_ui] # Create the mean & its evaluation at the data locations. init_OK = False # Probability of mutation in the promoter region, given that the other thing is a. p1 = pm.Uniform('p1', 0, .04, value=.01) covariate_key_dict = {'v': set(covariate_keys), 'b': ['africa'], '0':[]} ui_dict = {'v': full_vivax_ui, 'b': duffy_ui, '0': duffy_ui} logp_mesh_dict = {'b': duffy_logp_mesh, '0': duffy_logp_mesh, 'v': vivax_logp_mesh} temporal_dict = {'b': False, '0': False, 'v': True} init_OK = False while not init_OK: try: spatial_vars = zipmap(lambda k: covariance_submodel(k, ra, logp_mesh_dict[k], covariate_key_dict[k], ui_dict[k], input_data, temporal_dict[k]), ['b','0','v']) tau = zipmap(lambda k: 1./spatial_vars[k]['V'], ['b','0','v']) # Loop over data clusters, adding nugget and applying link function. init_OK = True except pm.ZeroProbability, msg: print 'Trying again: %s'%msg init_OK = False gc.collect()
import pylab as pl from mpl_toolkits import basemap import numpy as np import colors import matplotlib reload(colors) data = pl.csv2rec('Fy_input_091126_+_qryPvPR_MBG_with_covariates.csv') vivax_data = data[np.where(data.datatype == 'vivax')] duffy_data = data[np.where(data.datatype != 'vivax')] # b = basemap.Basemap(-19,5,52,20, resolution='i') b = basemap.Basemap(-19, 5, 52, 40, resolution='i') pl.close('all') pl.figure(figsize=(12, 3)) colors.map_axis_format(pl.subplot(1, 2, 1)) b.drawcoastlines(color=colors.map_outline) b.drawcountries(color=colors.map_outline) b.plot(vivax_data.lon, vivax_data.lat, linestyle='None', marker='.', color=colors.vivax_point, markersize=4, alpha=.2, label='vivax') colors.map_legend_format(pl.legend(loc=0))
print 'Warning: could not create data csv. Maybe it exists already?\n%s' % e ## fit the model dir = dismod3.settings.JOB_WORKING_DIR % id # rm %s/sfun_in.csv # if you want to regenerate default mesh parameters call_str = '/tmp/dismod4.abie/build/src/dismod4_csv /tmp/dismod4.abie/test/parameter.csv %s/measure_in.csv %s/sfun_in.csv' % (dir, dir) subprocess.call(call_str, shell=True) call_str = '/tmp/dismod4.abie/build/src/dismod4_csv /tmp/dismod4.abie/test/parameter.csv %s/measure_in.csv %s/sfun_in.csv %s/sfun_out.csv %s/measure_out.csv' % (dir, dir, dir, dir) subprocess.call(call_str, shell=True) # generate plots of results print 'summarizing results' measure_out = pl.csv2rec('%s/measure_out.csv' % dir) r = 'asia_southeast' for year in [1990]: for sex in ['male']: for dm3_type, dm4_type in [['remission', 'remission'], ['excess-mortality', 'excess'], ['incidence', 'incidence'], ['mrr', 'risk'], ['prevalence', 'prevalence'], ]: x = [0] y = [0] for age in age_mesh: x.append(age) y.append(measure_out.model[index_dict[(dm4_type, year, age)]])
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, test, spacetime_iters, top_submodel): # load in the data all_data = csv2rec(infile, use_mrecords=False) for m in range(number_submodels): if all_data['spacetime_' + str(m + 1)].dtype == 'float64': all_data = np.delete( all_data, np.where(np.isnan(all_data['spacetime_' + str(m + 1)]))[0], axis=0) # find the list of years for which we need to predict year_list = np.unique(all_data.year) # find the list of country/age groups country_age = np.array([ str(all_data.iso3[i]) + '_' + str(all_data.age_group[i]) for i in range(len(all_data)) ]) country_age_list = np.repeat(np.unique(country_age), len(year_list)) # make empty arrays in which to store the results total_iters = np.sum(spacetime_iters) draws = [ np.empty(len(country_age_list), 'float') for i in range(total_iters) ] if (top_submodel > 0): top_submodel_draws = [ np.empty(len(country_age_list), 'float') for i in range(100) ] iso3 = np.empty(len(country_age_list), '|S3') age_group = np.empty(len(country_age_list), 'int') year = np.empty(len(country_age_list), 'int') # loop through country/age groups for ca in np.unique(country_age_list): print('GPRing ' + ca) # subset the data for this particular country/age ca_data = all_data[country_age == ca] # subset just the observed data if ca_data['lt_cf'].dtype != '|O8': ca_observed = ca_data[(np.isnan(ca_data['lt_cf']) == 0) & (ca_data['test_' + test] == 0)] if len(ca_observed) > 1: has_data = True else: has_data = False else: has_data = False # keep track of how many iterations have been added for this model iter_counter = 0 # loop through each submodel for m in range(number_submodels): # identify the dependent variable for this model dv = dv_list[m] # continue making predictions if we actually need draws for this model if (spacetime_iters[m] > 0) or (m + 1 == top_submodel): # skip models with no spacetime results if all_data['spacetime_' + str(m + 1)].dtype != 'float64': for i in range(spacetime_iters[m]): draws[iter_counter][country_age_list == ca] = np.NaN iter_counter += 1 if (m + 1 == top_submodel): for i in range(100): top_submodel_draws[i][country_age_list == ca] = np.NaN continue # make a list of the spacetime predictions ca_prior = np.array([ np.mean(ca_data['spacetime_' + str(m + 1)][ca_data.year == y]) for y in year_list ]) # find the amplitude for this country/age amplitude = np.mean(ca_data['spacetime_amplitude_' + str(m + 1)]) # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR def mean_function(x): return np.interp(x, year_list, ca_prior) # setup the covariance function M = gp.Mean(mean_function) C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=2, amp=amplitude, scale=scale) # observe the data if there is any if has_data: gp.observe(M=M, C=C, obs_mesh=ca_observed.year, obs_V=ca_observed['spacetime_data_variance_' + str(m + 1)], obs_vals=ca_observed[dv]) # draw realizations from the data realizations = [ gp.Realization(M, C) for i in range(spacetime_iters[m]) ] # save the data for this country/age into the results array iso3[country_age_list == ca] = ca[0:3] age_group[country_age_list == ca] = ca[4:] year[country_age_list == ca] = year_list.T for i in range(spacetime_iters[m]): try: draws[iter_counter][country_age_list == ca] = realizations[i](year_list) except: print('Failure in ' + ca) iter_counter += 1 # if it's the top submodel, do 100 additional draws if (m + 1 == top_submodel): realizations = [gp.Realization(M, C) for i in range(100)] for i in range(100): try: top_submodel_draws[i][country_age_list == ca] = realizations[i]( year_list) except: print('Failure in ' + ca) # save the results print('Saving GPR results') names = ['iso3', 'age_group', 'year'] results = np.core.records.fromarrays([iso3, age_group, year], names=names) for i in range(total_iters): results = recfunctions.append_fields(results, 'ensemble_d' + str(i + 1), draws[i]) if (top_submodel > 0): for i in range(100): results = recfunctions.append_fields(results, 'top_submodel_d' + str(i + 1), top_submodel_draws[i]) rec2csv(results, outfile)
help='The submodel_id for GPR') args = parser.parse_args() # region_name = args.region_name location_id = args.location_id ihme_loc_id = args.ihme_loc_id version_id = args.version_id submodel_id = args.submodel_id version_dir = "FILEPATH" input_dir = "FILEPATH" output_dir = "FILEPATH" # Get data input_file = "FILEPATH" data = pl.csv2rec(input_file, missing='NA') # Create vectors of data all_location_index = (data['ihme_loc_id'] == ihme_loc_id) index = (data['ihme_loc_id'] == ihme_loc_id) & (data['data'] == 1) region_name = pl.array(data['region_name'][all_location_index])[0] print region_name data_year = pl.array(data['year'][index]) data_mort = pl.array(data['logit_mort'][index]) data_var = pl.array(data['logit_var'][index]) data_category = pl.array(data['category'][index]) # Prior index = (data['ihme_loc_id'] == ihme_loc_id) prior_year = pl.array(data['year'][index]) prior_mort = gpr.logit(
# zeta = .7 # transformation for the GPR step, choose from: ('log10','ln','logit','logit10') transform = 'logit' ## Set seed np.random.RandomState(intSeed) ''' Import data ''' os.chdir('strPath') data = pl.csv2rec('prediction_model_results_all_stages_%s_%i_%s_%s.txt' % (rr, ho, lam, zeta), missing='NA') for ss in sexes: # training data index = (data['ihme_loc_id'] == cc) & (data['sex'] == ss) & (data['data'] == 1) & (data['include'] == 'TRUE') train_year = pl.array(data['year'][index]) train_mort = pl.array(data['log_mort'][index]) train_stderr = pl.array(data['log_stderr'][index]) train_category = pl.array(data['category'][index]) # testing data index = (data['ihme_loc_id'] == cc) & (data['sex'] == ss) & (data['data'] == 1) & (data['include'] == 'FALSE') test_year = pl.array(data['year'][index]) test_mort = pl.array(data['log_mort'][index]) test_stderr = pl.array(data['log_stderr'][index])
# save the csv file import csv fname = dismod3.settings.JOB_WORKING_DIR % id + '/data.csv' try: f = open(fname, 'w') csv.writer(f).writerow(column_names) csv.DictWriter(f, column_names).writerows(data_list) f.close() except IOError, e: print 'Warning: could not create data csv. Maybe it exists already?\n%s' % e ## fit the model data = pl.csv2rec(fname) print 'generating model' from space_time_model import model reload(model) # for development, automatically reload in case model.py has changed mod_mc = model.gp_re_a(data) print 'fitting model with mcmc' iter = 10000 #iter = 100 # for testing mod_mc.sample(iter, iter/2, 1+iter/2000, verbose=1) # generate plots of results print 'summarizing results' param_predicted_stats = mod_mc.param_predicted.stats()
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, test, spacetime_iters, top_submodel): # load in the data all_data = csv2rec(infile, use_mrecords=False) for m in range(number_submodels): if all_data['spacetime_' + str(m+1)].dtype == 'float64': all_data = np.delete(all_data, np.where(np.isnan(all_data['spacetime_' + str(m+1)]))[0], axis=0) # find the list of years for which we need to predict year_list = np.unique(all_data.year) # find the list of country/age groups country_age = np.array([str(all_data.iso3[i]) + '_' + str(all_data.age_group[i]) for i in range(len(all_data))]) country_age_list = np.repeat(np.unique(country_age), len(year_list)) # make empty arrays in which to store the results total_iters = np.sum(spacetime_iters) draws = [np.empty(len(country_age_list), 'float') for i in range(total_iters)] if (top_submodel > 0): top_submodel_draws = [np.empty(len(country_age_list), 'float') for i in range(100)] iso3 = np.empty(len(country_age_list), '|S3') age_group = np.empty(len(country_age_list), 'int') year = np.empty(len(country_age_list), 'int') # loop through country/age groups for ca in np.unique(country_age_list): print('GPRing ' + ca) # subset the data for this particular country/age ca_data = all_data[country_age==ca] # subset just the observed data if ca_data['lt_cf'].dtype != '|O8': ca_observed = ca_data[(np.isnan(ca_data['lt_cf'])==0) & (ca_data['test_' + test]==0)] if len(ca_observed) > 1: has_data = True else: has_data = False else: has_data = False # keep track of how many iterations have been added for this model iter_counter = 0 # loop through each submodel for m in range(number_submodels): # identify the dependent variable for this model dv = dv_list[m] # continue making predictions if we actually need draws for this model if (spacetime_iters[m] > 0) or (m+1 == top_submodel): # skip models with no spacetime results if all_data['spacetime_' + str(m+1)].dtype != 'float64': for i in range(spacetime_iters[m]): draws[iter_counter][country_age_list==ca] = np.NaN iter_counter += 1 if (m+1 == top_submodel): for i in range(100): top_submodel_draws[i][country_age_list==ca] = np.NaN continue # make a list of the spacetime predictions ca_prior = np.array([np.mean(ca_data['spacetime_' + str(m+1)][ca_data.year==y]) for y in year_list]) # find the amplitude for this country/age amplitude = np.mean(ca_data['spacetime_amplitude_' + str(m+1)]) # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR def mean_function(x) : return np.interp(x, year_list, ca_prior) # setup the covariance function M = gp.Mean(mean_function) C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=2, amp=amplitude, scale=scale) # observe the data if there is any if has_data: gp.observe(M=M, C=C, obs_mesh=ca_observed.year, obs_V=ca_observed['spacetime_data_variance_' + str(m+1)], obs_vals=ca_observed[dv]) # draw realizations from the data realizations = [gp.Realization(M, C) for i in range(spacetime_iters[m])] # save the data for this country/age into the results array iso3[country_age_list==ca] = ca[0:3] age_group[country_age_list==ca] = ca[4:] year[country_age_list==ca] = year_list.T for i in range(spacetime_iters[m]): try: draws[iter_counter][country_age_list==ca] = realizations[i](year_list) except: print('Failure in ' + ca) iter_counter += 1 # if it's the top submodel, do 100 additional draws if (m+1 == top_submodel): realizations = [gp.Realization(M, C) for i in range(100)] for i in range(100): try: top_submodel_draws[i][country_age_list==ca] = realizations[i](year_list) except: print('Failure in ' + ca) # save the results print('Saving GPR results') names = ['iso3','age_group','year'] results = np.core.records.fromarrays([iso3,age_group,year], names=names) for i in range(total_iters): results = recfunctions.append_fields(results, 'ensemble_d' + str(i+1), draws[i]) if (top_submodel > 0): for i in range(100): results = recfunctions.append_fields(results, 'top_submodel_d' + str(i+1), top_submodel_draws[i]) rec2csv(results, outfile)
# import necessary libraries import pymc as mc import numpy as np import pylab as pl import os from scipy import sparse from scipy.interpolate import splrep, splev # setup directory info project = "USCOD" proj_dir = "D:/Projects/" + project + "/" if (os.environ["OS"] == "Windows_NT") else "/shared/projects/" + project + "/" ### setup the data # load in the csv data = pl.csv2rec(proj_dir + "data/model inputs/downsampled.csv") print "Data loaded" # keep just the specified age and sex data = data[(data.sex == sex) & (data.age_group == age)] # remove any instances of population zero, which might blow things up due to having offsets of negative infinity data = data[data.pop > 0.0] ### setup temporal indexing # set year to start at 0 data = pl.rec_append_fields(rec=data, names="year0", arrs=np.array(data.year - np.min(data.year))) # set years to go from 0 to (num years - 1) for i, y in enumerate(np.sort(np.unique(data.year0))):
def make_model( lon, lat, t, input_data, covariate_keys, pos, neg, lo_age=None, up_age=None, cpus=1, with_stukel=with_stukel, chunk=chunk, disttol=disttol, ttol=ttol, ): ra = csv2rec(input_data) if np.any(pos + neg == 0): where_zero = np.where(pos + neg == 0)[0] raise ValueError, "Pos+neg = 0 in the rows (starting from zero):\n %s" % where_zero C_time = [0.0] f_time = [0.0] M_time = [0.0] # ============================= # = Preprocess data, uniquify = # ============================= data_mesh = combine_st_inputs(lon, lat, t) if lo_age is None: lo_age = 2.0 * np.ones(data_mesh.shape[0]) if up_age is None: up_age = 10.0 * np.ones(data_mesh.shape[0]) # Find near spatiotemporal duplicates. ui = [] fi = [] ti = [] dx = np.empty(1) for i in xrange(data_mesh.shape[0]): match = False for j in xrange(len(ui)): pm.gp.geo_rad(dx, data_mesh[i, :2].reshape((1, 2)), data_mesh[ui[j], :2].reshape((1, 2))) dt = abs(t[ui[j]] - t[i]) if dx[0] < disttol and dt < ttol: match = True fi.append(j) ti[j].append(i) break if not match: fi.append(len(ui)) ui.append(i) ti.append([i]) ui = np.array(ui) ti = [np.array(tii) for tii in ti] fi = np.array(fi) logp_mesh = data_mesh[ui, :] # covariate_values_on_logp = dict([(k,covariate_values[k][ui]) for k in covariate_values.keys()]) # ===================== # = Create PyMC model = # ===================== init_OK = False while not init_OK: @pm.deterministic() def M(): return pm.gp.Mean(pm.gp.zero_fn) # Inverse-gamma prior on nugget variance V. tau = pm.Gamma("tau", alpha=3, beta=3 / 0.25, value=5) V = pm.Lambda("V", lambda tau=tau: 1.0 / tau) # V = pm.Exponential('V', .1, value=1.) vars_to_writeout = ["V", "m_const", "t_coef"] # Lock down parameters of Stukel's link function to obtain standard logit. # These can be freed by removing 'observed' flags, but mixing gets much worse. if with_stukel: a1 = pm.Uninformative("a1", 0.5) a2 = pm.Uninformative("a2", 0.8) else: a1 = pm.Uninformative("a1", 0, observed=True) a2 = pm.Uninformative("a2", 0, observed=True) inc = pm.CircVonMises("inc", 0, 0) # Use a uniform prior on sqrt ecc (sqrt ???). Using a uniform prior on ecc itself put too little # probability mass on appreciable levels of anisotropy. sqrt_ecc = pm.Uniform("sqrt_ecc", value=0.4, lower=0.0, upper=1.0) ecc = pm.Lambda("ecc", lambda s=sqrt_ecc: s ** 2) # Subjective skew-normal prior on amp (the partial sill, tau) in log-space. # Parameters are passed in in manual_MCMC_supervisor. log_amp = pm.SkewNormal("log_amp", value=amp_params["mu"], **amp_params) amp = pm.Lambda("amp", lambda log_amp=log_amp: np.exp(log_amp)) # Subjective skew-normal prior on scale (the range, phi_x) in log-space. log_scale = pm.SkewNormal("log_scale", value=-1, **scale_params) scale = pm.Lambda("scale", lambda log_scale=log_scale: np.exp(log_scale)) # Exponential prior on the temporal scale/range, phi_t. Standard one-over-x # doesn't work bc data aren't strong enough to prevent collapse to zero. scale_t = pm.Exponential("scale_t", 0.1, value=1.5) # Uniform prior on limiting correlation far in the future or past. t_lim_corr = pm.Uniform("t_lim_corr", 0, 1, value=0.5) # # Uniform prior on sinusoidal fraction in temporal variogram sin_frac = pm.Uniform("sin_frac", 0, 1, value=0.3) vars_to_writeout.extend(["inc", "ecc", "amp", "scale", "scale_t", "t_lim_corr", "sin_frac"]) # Create covariance and MV-normal F if model is spatial. try: # A constraint on the space-time covariance parameters that ensures temporal correlations are # always between -1 and 1. @pm.potential def st_constraint(sd=0.5, sf=sin_frac, tlc=t_lim_corr): if -sd >= 1.0 / (-sf * (1 - tlc) + tlc): return -np.Inf else: return 0.0 # A Deterministic valued as a Covariance object. Uses covariance my_st, defined above. @pm.deterministic def C( amp=amp, scale=scale, inc=inc, ecc=ecc, scale_t=scale_t, t_lim_corr=t_lim_corr, sin_frac=sin_frac, ra=ra ): eval_fun = CovarianceWithCovariates(my_st, input_data, covariate_keys, ui, fac=1.0e4, ra=ra) return pm.gp.FullRankCovariance( eval_fun, amp=amp, scale=scale, inc=inc, ecc=ecc, st=scale_t, sd=0.5, tlc=t_lim_corr, sf=sin_frac ) sp_sub = pm.gp.GPSubmodel("sp_sub", M, C, logp_mesh) init_OK = True except pm.ZeroProbability, msg: print "Trying again: %s" % msg init_OK = False gc.collect()
#cc = 'IND_43872' #ss = 'male' # transformation for the GPR step, choose from: ('log10','ln','logit','logit10') transform = 'logit' ''' Import data ''' hitercount = 0 for iter in iters: for ss in sexes: if (huncert == int(1)): os.chdir('strPath') data = pl.csv2rec('prediction_model_results_all_stages%s.txt' % (iter), missing='NA') else: os.chdir('%s/strPath' % ('/home/j' if os.name == 'posix' else 'J:')) data = pl.csv2rec('prediction_model_results_all_stages.txt', missing='NA') # data index = (data['ihme_loc_id'] == cc) & (data['sex'] == ss) & (data['data'] == 1) data_year = pl.array(data['year'][index]) data_mort = pl.array(data['log_mort'][index]) data_stderr = pl.array(data['log_stderr'][index]) data_category = pl.array(data['category'][index])
import numpy as np from csv import reader import pylab as pl # ================= # = Emelda's data = # ================= R = pl.csv2rec('datafiles/all_data.csv') # R = pl.csv2rec('all_data.csv') missing_fields = np.zeros(len(R)) for n in ['surv_int', 'pyor', 'cases', 'region', 'lat', 'lon']: missing_fields += R[n].mask missing_fields = np.array(missing_fields, dtype=bool) R_mis = R[np.where(missing_fields)] R = R[np.where(1 - missing_fields)].data R.pr /= 100. R.mbg_pr /= 100. R.mix_pr /= 100. R.pfpr /= 100 R_af = R[np.where(R.region == 'Africa+')] R_am = R[np.where(R.region == 'America')] R_as = R[np.where(R.region == 'CSE Asia')] R_am_as = R[np.where((R.region == 'America') + (R.region == 'CSE Asia'))] def time_scaling(pcd, surv_int): out = np.ones(len(pcd)) where_rescale = np.where((pcd != 'Y') * (surv_int > 7) + (surv_int < 7))
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, iters): # load in the data all_data = csv2rec(infile, use_mrecords=False) for m in range(number_submodels): all_data = np.delete( all_data, np.where(np.isnan(all_data['spacetime_' + str(m + 1)]))[0], axis=0) # Investigate error thrown for HKG, MAC, and SGP... they don't have data, but don't know why this is breaking line 62 all_data = all_data[all_data['iso3'] != "HKG"] all_data = all_data[all_data['iso3'] != "MAC"] all_data = all_data[all_data['iso3'] != "SGP"] # find the list of years for which we need to predict year_list = np.unique(all_data.year) # find the list of country/age groups country_age = np.array([all_data.iso3[i] for i in range(len(all_data))]) country_age_list = np.repeat(np.unique(country_age), len(year_list)) # make empty arrays in which to store the results draws = [ np.empty(len(country_age_list), 'float') for i in range(iters * number_submodels * 2) ] iso3 = np.empty(len(country_age_list), '|S3') # age_group = np.empty(len(country_age_list), 'int') year = np.empty(len(country_age_list), 'int') # loop through country/age groups for ca in np.unique(country_age_list): print('GPRing ' + ca) # subset the data for this particular country/age ca_data = all_data[country_age == ca] # subset just the observed data if ca_data['lt_prev'].dtype != '|O8': ca_observed = ca_data[(np.isnan(ca_data['lt_prev']) == 0)] if len(ca_observed) > 1: has_data = True else: has_data = False else: has_data = False # loop through each submodel for m in range(number_submodels): # identify the dependent variable for this model dv = dv_list[m] # loop through spacetime/linear for x, t in enumerate(['spacetime']): # make a list of the spacetime predictions ca_prior = np.array([ np.mean(ca_data[t + '_' + str(m + 1)][ca_data.year == y]) for y in year_list ]) # find the amplitude for this country/age amplitude = np.mean(ca_data[t + '_amplitude_' + str(m + 1)]) # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR def mean_function(x): return np.interp(x, year_list, ca_prior) # setup the covariance function M = gp.Mean(mean_function) C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=2, amp=amplitude, scale=scale) # observe the data if there is any if has_data: gp.observe(M=M, C=C, obs_mesh=ca_observed.year, obs_V=ca_observed[t + '_data_variance_' + str(m + 1)], obs_vals=ca_observed['lt_prev']) # draw realizations from the data realizations = [gp.Realization(M, C) for i in range(iters)] # save the data for this country/age into the results array iso3[country_age_list == ca] = ca[0:3] # age_group[country_age_list==ca] = ca[4:] year[country_age_list == ca] = year_list.T for i in range(iters): draws[((2 * m + x) * iters) + i][ country_age_list == ca] = realizations[i](year_list) # save the results print('Saving GPR results') names = ['iso3', 'age_group', 'year'] results = np.core.records.fromarrays([iso3, year], names=names) for m in range(number_submodels): for x, t in enumerate(['spacetime']): for i in range(iters): results = recfunctions.append_fields( results, 'gpr_' + str(m + 1) + '_' + t + '_d' + str(i + 1), draws[((2 * m + x) * iters) + i]) results = recfunctions.append_fields( results, 'gpr_' + str(m + 1) + '_' + t + '_mean', np.mean(draws[((2 * m + x) * iters):((2 * m + x + 1) * iters)], axis=0)) rec2csv(results, outfile)
import tables as tb import numpy as np import map_utils from pylab import csv2rec, rec2csv import os import sys from dufvax import covariate_names # TODO: draw these straight from /Volumes/data data_in = csv2rec(sys.argv[1]) covariate_path = sys.argv[2] data_box = data_in cols = dict([(key,data_box[key]) for key in data_box.dtype.names]) for k in ['urban','rural','africa']: cols.pop(k) def mode(a): vals = list(set(a)) counts = [(a==v).sum() for v in vals] return np.argmin(counts) def nan_callback(lon_old, lat_old, data, lon_new, lat_new, order): lon_ind = np.argmin(np.abs(np.subtract.outer(lon_old, lon_new)), axis=0) lat_ind = np.argmin(np.abs(np.subtract.outer(lat_old, lat_new)), axis=0) out = lat_new*0 for i in xrange(len(lon_new)): lai, loi = lat_ind[i], lon_ind[i] if data.mask[lai, loi]: for d in xrange(10):
iters = range((hsim - 1) * 25 + 1, (hsim - 1) * 25 + 1 + 25) else: dr = int(1000) iters = range(1, 2) # transformation for the GPR step, choose from: ('log10','ln','logit','logit10') transform = 'logit' ''' Import data ''' hitercount = 0 for iter in iters: for ss in sexes: if (huncert == int(1)): os.chdir('filepath') data = pl.csv2rec('filepath' % (iter), missing='NA') else: os.chdir('filepath' % ('filepath' if os.name == 'posix' else 'filepath')) data = pl.csv2rec('filepath', missing='NA') # data index = (data['ihme_loc_id'] == cc) & (data['sex'] == ss) & (data['data'] == 1) data_year = pl.array(data['year'][index]) data_mort = pl.array(data['log_mort'][index]) data_stderr = pl.array(data['log_stderr'][index]) data_category = pl.array(data['category'][index]) # prior
os.chdir('FILEPATH') data = pl.csv2rec('FILEPATH) # data index = (data['ihme_loc_id'] == cc) data_year = pl.array(data['year'][index]) data_fert = pl.array(data['logit_bound_tfr'][index]) data_var = pl.array(data['logit_bound_var'][index]) data_category = pl.array(data['category'][index]) # prior data = pl.csv2rec("FILEPATH") index = (data['ihme_loc_id'] == cc) prior_year = pl.array(data['year'][index]) prior_fert = pl.array(data['logit_bound_tfr_pred_smooth'][index]) # prediction years predictionyears = pl.array(range(int(min(data['year'])),int(max(data['year']))+1)) + 0.5 mse = pl.array(data['mse'][index]) print(mse) mse = float(mse[0]) ''' Fit model with best parameters
def import_as_recarray(self, csv): return pylab.csv2rec(csv)
dr = int(1000) iters = range(1, 2) # transformation for the GPR step, choose from: ('log10','ln','logit','logit10') transform = 'logit' ''' Import data ''' hitercount = 0 for iter in iters: for ss in sexes: if (hiv_uncert == int(1)): input_file = "FILEPATH" else: input_file = "FILEPATH" data = pl.csv2rec(input_file, missing='NA') # data index = (data['ihme_loc_id'] == cc) & (data['sex'] == ss) & (data['data'] == 1) data_year = pl.array(data['year'][index]) data_mort = pl.array(data['log_mort'][index]) data_stderr = pl.array(data['log_stderr'][index]) data_category = pl.array(data['category'][index]) # prior index = (data['ihme_loc_id'] == cc) & (data['sex'] == ss) prior_year = pl.array(data['year'][index]) if (transform == 'log10'): prior_mort = pl.log(pl.array(data['pred2final'][index])) / pl.log( 10) # this is to convert the prior to log base-10 space
#!/usr/bin/python """ Read csv station data and create an xml file from it """ from pylab import csv2rec import xml.etree.ElementTree as ET lines = ["Bakerloo", "Central", "Circle", "District", "HamAndCity", "Jubilee", "Metropolitan", "Northern", "Piccadilly", "Victoria", "WaterlooAndCity"] for line in lines: stationdata = csv2rec(line + ".csv", delimiter=',', converterd={5:str}) tree = ET.parse('Tube.xml') root = tree.getroot() for i in range(0, stationdata.size): newstation = ET.Element('station', {'name': stationdata[i][0]}) root.append(newstation) tree.write(line + ".xml")
import pylab as pl orig = pl.csv2rec('nature08230-s2.csv') country = [] year = [] hdi = [] tfr = [] for row in orig: for y in range(1975, 2006): if pl.isnan(row['hdi%d'%y]) \ or pl.isnan(row['tfr%d'%y]): continue country.append(row['country']) year.append(y) hdi.append(row['hdi%d' % y]) tfr.append(row['tfr%d' % y]) all = pl.np.core.rec.fromarrays([country, year, hdi, tfr], names='country year hdi tfr'.split()) hdi = all.hdi[(all.year == 1975) | (all.year == 2005)] tfr = all.tfr[(all.year == 1975) | (all.year == 2005)] hdi2005 = all.hdi[all.year == 2005] tfr2005 = all.tfr[all.year == 2005]
os.chdir('FILEPATH') ''' Get GPR settings ''' rr = sys.argv[1] cc = sys.argv[2] rnum = sys.argv[3] hivsims = int(sys.argv[4]) ''' Import data ''' if hivsims == 1: os.chdir('FILEPATH') data = pl.csv2rec('gpr_5q0_input_' + rnum + '.txt', missing='NA') else: os.chdir('FILEPATH') data = pl.csv2rec('gpr_5q0_input_GBD2015.txt', missing='NA') # data index = (data['ihme_loc_id'] == cc) & (data['data'] == 1) data_year = pl.array(data['year'][index]) data_mort = pl.array(data['logit_mort'][index]) data_var = pl.array(data['logit_var'][index]) data_category = pl.array(data['category'][index]) # prior index = (data['ihme_loc_id'] == cc) prior_year = pl.array(data['year'][index]) prior_mort = gpr.logit(
if __name__ == '__main__': import pylab as pl import data data.age_range = pl.arange(0, 81, 20) data.time_range = pl.arange(1980, 2005, 5) data.regions = pl.randint(5, 15) time.sleep(pl.rand() * 5.) t0 = time.time() data.generate_fe('test_data/%s.csv' % t0) # included just to get good test coverage data.generate_smooth_gp_re_a('test_data/%s.csv' % t0, country_variation=True) std = 5. * pl.rand(len(pl.csv2rec('test_data/%s.csv' % t0))) pct = 90. print data.age_range, data.time_range, data.regions, pl.mean(std), pct data.add_sampling_error('test_data/%s.csv' % t0, 'test_data/noisy_%s.csv' % t0, std=std) data.knockout_uniformly_at_random('test_data/noisy_%s.csv' % t0, 'test_data/missing_noisy_%s.csv' % t0, pct=pct) mod_mc = evaluate_model( 'gp_re_a', 'knockout pct=%d, model matches data, has laplace priors, sigma_e = Exp(1)' % pct, 'test_data/missing_noisy_%s.csv' % t0, 'test_data/%s.csv' % t0)
''' Get GPR settings ''' rr = sys.argv[1] cc = sys.argv[2] rnum = int(1) hivsims = int(0) ''' Import data ''' os.chdir('PATH') data = pl.csv2rec('gpr_input_file.csv', missing='NA') # data index = (data['ihme_loc_id'] == cc) & (data['data'] == 1) data_year = pl.array(data['year_id'][index]) data_mort = pl.array(data['logit_q5_sexratio'][index]) data_var = pl.array(data['data_var'][index]) data_category = pl.array(data['category'][index]) # prior index = (data['ihme_loc_id'] == cc) prior_year = pl.array(data['year_id'][index]) prior_mort = data['pred_logitratio_s2'][index] # prediction years predictionyears = pl.array(range(int(min(data['year_id'])),int(max(data['year_id']))+1)) + 0.5
output_dir = "FILEPATH" try: os.makedirs(output_dir) except: pass ''' Get GPR settings ''' rnum = int(1) hivsims = int(0) ''' Import data ''' input_file = "FILEPATH" data = pl.csv2rec(input_file, missing='NA') # data index = (data['ihme_loc_id'] == cc) & (data['data'] == 1) data_year = pl.array(data['year_id'][index]) data_mort = pl.array(data['logit_q5_sexratio'][index]) data_var = pl.array(data['data_var'][index]) data_category = pl.array(data['category'][index]) # prior index = (data['ihme_loc_id'] == cc) prior_year = pl.array(data['year_id'][index]) prior_mort = data['pred_logitratio_s2'][index] # prediction years predictionyears = pl.array(
def GFFthreshold(infn, outbed): """ Thresholds the values in the GFF file *infn* and exports the results to the BED file *outbed*. """ converterd = {'probe': nodate, 'a': nodate, 'b': nodate} logging.debug('reading GFF into record array') a = csv2rec(infn, delimiter='\t', names=('chr', 'prog', 'id', 'start', 'stop', 'ratio', 'a', 'b', 'probe'), converterd=converterd) logging.debug('sorting record array') a.sort(order=('chr', 'start')) fout = open(outbed, 'w') m = a.ratio.mean() std = a.ratio.std() thresh = m + 2.5 * std allregions = [] region = [] lastchr = a.chr[0] lastpos = None count = 0 for data in a: if data.ratio < thresh: continue if lastpos is None: dist = 0 else: dist = data.start - lastpos logging.debug('region is currently') for i in region: logging.debug('\t%s' % i) logging.debug('this data: %s' % data) logging.debug('dist from last: %s' % dist) if dist > 500 or data.chr != lastchr: logging.debug('\ndist > 500; checking region len') logging.debug('regionlen: %s' % len(region)) for i in region: logging.debug('\t%s' % i) if len(region) < 4: logging.debug('region not long enough, erasing') else: logging.debug('region is long enough!!!!') logging.debug('region to be exported is') for i in region: logging.debug('\t%s' % i) chr = region[0].chr start = region[0].start stop = region[-1].stop fout.write('%s\t%s\t%s\n' % (chr, start, stop)) count += 1 region = [] lastpos = data.stop lastchr = data.chr logging.debug('adding %s to region' % data) region.append(data) if len(region) >= 4: logging.debug('last region will be exported') logging.debug('region to be exported is') for i in region: logging.debug('\t%s' % i) chr = region[0].chr start = region[0].start stop = region[-1].stop fout.write('%s\t%s\t%s\n' % (chr, start, stop)) count += 1 else: logging.debug('last region not long enough') fout.close() logging.debug('Number of enriched regions: %s' % count) logging.debug('using threshold: %s' % thresh)
import pymc as mc import numpy as np import pylab as pl import os from scipy import sparse from scipy.interpolate import splrep, splev # setup directory info project = 'USCOD' proj_dir = 'D:/Projects/' + project +'/' if (os.environ['OS'] == 'Windows_NT') else '/shared/projects/' + project + '/' ### setup the data # load in the csv data = pl.csv2rec(proj_dir + 'data/model inputs/state_random_effects_input.csv') print 'Data loaded' # keep just the specified age and sex data = data[(data.sex == sex) & (data.age_group == age)] # remove any instances of population zero, which might blow things up due to having offsets of negative infinity data = data[data.pop > 0.] ### setup temporal indexing # set year to start at 0 data = pl.rec_append_fields( rec = data, names = 'year0',
def evaluate_model(mod, comment='', data_fname='missing_noisy_data.csv', truth_fname='data.csv'): """ Run specified model on existing data (data.csv / missing_noisy_data.csv) and save results in dev_log.csv Existing models: %s """ % data_run_models if mod not in data_run_models.split(' '): raise TypeError, 'Unrecognized model "%s"; must be one of %s' % ( mod, data_run_models) import model reload(model) print 'loading data' data = pl.csv2rec(data_fname) truth = pl.csv2rec(truth_fname) t0 = time.time() print 'generating model' mod_mc = eval('model.%s(data)' % mod) print 'fitting model with mcmc' mod_mc.sample(10000, 5000, 50, verbose=1) t1 = time.time() print 'summarizing results' import graphics reload(graphics) pl.figure(figsize=(22, 17), dpi=300) pl.clf() graphics.plot_all_predictions_over_time(data, mod_mc.predicted, more_data=truth) data_stats = mod_mc.data_predicted.stats() i_out = [i for i in range(len(data)) if pl.isnan(data.y[i])] rmse_abs_out = pl.rms_flat(truth.y[i_out] - data_stats['mean'][i_out]) rmse_rel_out = 100 * pl.rms_flat(1. - data_stats['mean'][i_out] / truth.y[i_out]) i_in = [i for i in range(len(data)) if not pl.isnan(data.y[i])] rmse_abs_in = pl.rms_flat(truth.y[i_in] - data_stats['mean'][i_in]) rmse_rel_in = 100 * pl.rms_flat(1. - data_stats['mean'][i_in] / truth.y[i_in]) param_stats = mod_mc.param_predicted.stats() coverage = 100 * pl.sum( (truth.y[i_out] >= param_stats['95% HPD interval'][i_out, 0]) & (truth.y[i_out] <= param_stats['95% HPD interval'][i_out, 1])) / float( len(i_out)) import md5 data_hash = md5.md5(data).hexdigest() results = [ mod, t1 - t0, rmse_abs_out, rmse_rel_out, rmse_abs_in, rmse_rel_in, coverage, len(data), len(pl.unique(data.region)), len(pl.unique(data.country)), len(pl.unique(data.year)), len(pl.unique(data.age)), data_hash, t0, comment ] print '%s: time: %.0fs out-of-samp rmse abs=%.1f rel=%.0f in-samp rmse abs=%.1f rel=%.0f coverage=%.0f\ndata: %d rows; %d regions, %d countries %d years %d ages [data hash: %s]\n(run conducted at %f)\n%s' % tuple( results) pl.savefig('/home/j/Project/Models/space-time-smoothing/images/%s.png' % t0) # FIXME: don't hardcode path for saving images import csv f = open('dev_log.csv', 'a') f_csv = csv.writer(f) f_csv.writerow(results) f.close() return mod_mc
return (x1, delta) def calc_spines_pos(self, cursor_list, x1_list): """Calculate the spines position, returning the mid point of the interval from the two list.""" mid_points = [] for i, el in enumerate(cursor_list): mid_point = cursor_list[i] + (x1_list[i] - cursor_list[i])/2 mid_points.append(mid_point) return mid_points if __name__ == "__main__": from scipy.optimize import leastsq data = pylab.csv2rec('spines_distribution_Wilson_1992.csv') pfh = FitHandler() pfh.plot_data(data) order = 17 pfit = pfh.fit_and_plot(data, order) plt.title("Fitting the data") plt.legend() plt.savefig("Fitted_data.png") # Integrating pInteg = pfit.integ() plt.figure() pfh.plot_poly(pInteg)
def _Pressure(field, data): return (data.pf['Gamma'] - 1.0) * data['Density'] * data['InternalEnergy'] add_field('Pressure', function=_Pressure, units=r'\rm{dyne}/\rm{cm}^{2}') ### extract an ortho_ray (1D solution vector) ray = pf.h.ortho_ray(0, [0.5, 0.5]) ### define fields vector fields = ('Density', 'x-velocity', 'InternalEnergy', 'Pressure' ) ### read exact solution exact = pylab.csv2rec( exact_solution_filename, delimiter=' ', names=('x', 'Density', 'x-velocity', 'Pressure', 'InternalEnergy') ) ### calculate difference norm # first interpolate the exact solution onto the ray ray_exact = {'x': ray['x'], 'Density': pylab.stineman_interp(ray['x'],exact['x'],exact['Density']), 'x-velocity': pylab.stineman_interp(ray['x'],exact['x'],exact['x-velocity']), 'Pressure': pylab.stineman_interp(ray['x'],exact['x'],exact['Pressure']), 'InternalEnergy': pylab.stineman_interp(ray['x'],exact['x'],exact['InternalEnergy'])} # now calculate the norm (first order, since we're dealing with # conservation laws)
# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # FIXME: Need to extract urban, rural from asciis, Otherwise they'll be # FIXME: NaN at some points. import sys from pylab import csv2rec, rec2csv import numpy as np import warnings duffy_datafile, vivax_datafile = sys.argv[1:] combined_datafile = duffy_datafile.split(".")[0] + "_+_" + vivax_datafile.split(".")[0] + ".csv" duffy_data = csv2rec(duffy_datafile) vivax_data = csv2rec(vivax_datafile) n_duffy = len(duffy_data) n_vivax = len(vivax_data) duffy_nan = np.repeat(np.nan, n_duffy) vivax_nan = np.repeat(np.nan, n_vivax) tstart = vivax_data.yestart + (vivax_data.mostart - 1) / 12.0 tend = vivax_data.yeend + (vivax_data.moend - 1) / 12.0 weirdcols = ["lon", "lat", "t", "vivax_pos", "vivax_neg", "n", "datatype"] vivaxcols = ["lo_age", "up_age", "urban", "rural"] duffycols = [ "genaa", "genab",