def evaluate_model(mod, comment='', data_fname='missing_noisy_data.csv', truth_fname='data.csv'): """ Run specified model on existing data (data.csv / missing_noisy_data.csv) and save results in dev_log.csv Existing models: %s """ % data_run_models if mod not in data_run_models.split(' '): raise TypeError, 'Unrecognized model "%s"; must be one of %s' % (mod, data_run_models) import model reload(model) print 'loading data' data = pl.csv2rec(data_fname) truth = pl.csv2rec(truth_fname) t0 = time.time() print 'generating model' mod_mc = eval('model.%s(data)' % mod) print 'fitting model with mcmc' mod_mc.sample(10000, 5000, 50, verbose=1) t1 = time.time() print 'summarizing results' import graphics reload(graphics) pl.figure(figsize=(22, 17), dpi=300) pl.clf() graphics.plot_all_predictions_over_time(data, mod_mc.predicted, more_data=truth) data_stats = mod_mc.data_predicted.stats() i_out = [i for i in range(len(data)) if pl.isnan(data.y[i])] rmse_abs_out = pl.rms_flat(truth.y[i_out] - data_stats['mean'][i_out]) rmse_rel_out = 100*pl.rms_flat(1. - data_stats['mean'][i_out]/truth.y[i_out]) i_in = [i for i in range(len(data)) if not pl.isnan(data.y[i])] rmse_abs_in = pl.rms_flat(truth.y[i_in] - data_stats['mean'][i_in]) rmse_rel_in = 100*pl.rms_flat(1. - data_stats['mean'][i_in]/truth.y[i_in]) param_stats = mod_mc.param_predicted.stats() coverage = 100*pl.sum((truth.y[i_out] >= param_stats['95% HPD interval'][i_out, 0]) & (truth.y[i_out] <= param_stats['95% HPD interval'][i_out, 1])) / float(len(i_out)) import md5 data_hash = md5.md5(data).hexdigest() results = [mod, t1-t0, rmse_abs_out, rmse_rel_out, rmse_abs_in, rmse_rel_in, coverage, len(data), len(pl.unique(data.region)), len(pl.unique(data.country)), len(pl.unique(data.year)), len(pl.unique(data.age)), data_hash, t0, comment] print '%s: time: %.0fs out-of-samp rmse abs=%.1f rel=%.0f in-samp rmse abs=%.1f rel=%.0f coverage=%.0f\ndata: %d rows; %d regions, %d countries %d years %d ages [data hash: %s]\n(run conducted at %f)\n%s' % tuple(results) pl.savefig('/home/j/Project/Models/space-time-smoothing/images/%s.png' % t0) # FIXME: don't hardcode path for saving images import csv f = open('dev_log.csv', 'a') f_csv = csv.writer(f) f_csv.writerow(results) f.close() return mod_mc
def probeData(settings): print "Probing data", settings.fileName samplesPerCode = int( round(settings.samplingFreq / (settings.codeFreqBasis / settings.codeLength))) samples = getSamples.int8(settings.fileName, 10 * samplesPerCode, settings.skipNumberOfBytes) #Initialize figure fig = pylab.figure() pylab.clf() #X axis timeScale = [x*(1/settings.samplingFreq) for x in \ range(0,int(round((5e-3 + 1/settings.samplingFreq)*settings.samplingFreq)))] #Time domain plot pylab.subplot(2, 2, 1) plot_max = int(round(samplesPerCode / 50)) pylab.plot([1000 * i for i in timeScale[0:plot_max]], samples[0:plot_max]) pylab.title('Time domain plot') pylab.xlabel('Time (ms)') pylab.ylabel('Amplitude') #Frequency domain plot (Pxx,freqs) = matplotlib.mlab.psd(x = samples-numpy.mean(samples),\ noverlap = 1024,\ NFFT = 2048,\ Fs = settings.samplingFreq/1e6) pylab.subplot(2, 2, 2) pylab.semilogy(freqs, Pxx) pylab.title('Frequency Domain Plot') pylab.xlabel('Frequency (MHz)') pylab.ylabel('Magnitude') #Histogram pylab.subplot(2, 2, 3) xticks = pylab.unique(samples) pylab.hist(samples, len(xticks)) axis = pylab.axis() pylab.axis([min(samples), max(samples), axis[2], axis[3]]) xticks = pylab.unique(pylab.round_(xticks)) pylab.xticks(xticks) pylab.title('Histogram') return fig
def scanbystate(vis,undo=False): mytb=taskinit.tbtool() mytb.open(vis,nomodify=False) scans=mytb.getcol('SCAN_NUMBER') states=mytb.getcol('STATE_ID') print 'Unique STATE_IDs = ',str(pl.unique(states)) maxstate=states.max() if undo: d=10**int(floor(log10(scans.min()))) if d<10: mytb.close() raise Exception, 'Apparently, nothing to undo' scans-=states scans/=d print 'New SCAN_NUMBER = (SCAN_NUMBER - STATE_ID) / '+str(d) else: m=10**int(floor(log10(states.max())+1.0)) scans*=m scans+=states print 'New SCAN_NUMBER = SCAN_NUMBER * '+str(m)+' + STATE_ID' mytb.putcol('SCAN_NUMBER',scans) mytb.close()
def addDataVectorAccessor(self, data_vector_accessor): self.__data_vectors_accessors__.append(data_vector_accessor) _sum = pl.sum(data_vector_accessor.signal) _min = pl.amin(data_vector_accessor.signal) _max = pl.amax(data_vector_accessor.signal) if self.__minimal_signal__ == None: self.__minimal_signal__ = _sum self.__minimal_data_vector_accessor__ = data_vector_accessor self.__min_signal__ = _min self.__max_signal__ = _max if _sum < self.__minimal_signal__: self.__minimal_data_vector_accessor__ = data_vector_accessor self.__minimal_signal__ = _sum if _min < self.__min_signal__: self.__min_signal__ = _min if _max > self.__max_signal__: self.__max_signal__ = _max #collects unique annotations (>0) as a set if not data_vector_accessor.annotation == None: unique_annotations = pl.unique(data_vector_accessor.annotation[ pl.where(data_vector_accessor.annotation > 0)]) if len(unique_annotations) > 0: #union of sets self.__unique_annotations__ |= set(unique_annotations)
def scanbystate(vis, undo=False): mytb = taskinit.tbtool() mytb.open(vis, nomodify=False) scans = mytb.getcol('SCAN_NUMBER') states = mytb.getcol('STATE_ID') print 'Unique STATE_IDs = ', str(pl.unique(states)) maxstate = states.max() if undo: d = 10**int(floor(log10(scans.min()))) if d < 10: mytb.close() raise Exception, 'Apparently, nothing to undo' scans -= states scans /= d print 'New SCAN_NUMBER = (SCAN_NUMBER - STATE_ID) / ' + str(d) else: m = 10**int(floor(log10(states.max()) + 1.0)) scans *= m scans += states print 'New SCAN_NUMBER = SCAN_NUMBER * ' + str(m) + ' + STATE_ID' mytb.putcol('SCAN_NUMBER', scans) mytb.close()
def plotGroupSize(AllData): """ run permutations based on group size """ means = [] subjects = range(len(AllData[1]['correct'])) for i in subjects[1:]: print(subjects[1:]) current_means = [] perms = py.unique(list(it.combinations(subjects, i))) for j in range(len(perms)): #print(len(perms[j])) current = groupPercentCorrect(AllData, subjects, perms[j]) current_means.append(current) group_mean = np.mean(current_means) means.append(group_mean) #print(means) fig = py.figure() ax10 = fig.add_subplot(111) ax10.plot(subjects[1:], means, 'bo', alpha=1) ax10.plot(subjects[1:], means, 'b', linewidth=3, alpha=0.2) ax10.set_ylim(-0.2,1.2) ax10.set_title('Group Size: Percent Correct') # check means of all members individually submeans = [] for i in subjects: curmean = getIndMeans(AllData, subjects[i]) submeans.append(curmean[0]) print('Individual means: %.3f ' % np.mean(submeans))
def AllDataDist(AllData): # subjects = range(len(AllData[1]['correct'])) matrix = py.zeros([len(AllData.keys()),len(subjects)]) kcount = -1 for k in AllData.keys(): kcount += 1 icount = 0 while icount < len(subjects): matrix[kcount][icount] = \ AllData[k]['correct'][icount] icount += 1 meanmean = [] for i in subjects[1:]: # create combination list #print(subjects[1:]) perms = py.unique(list(it.combinations(subjects, i))) for h in range(len(perms)): # for each combination, get the mean correct means = [] for k in range(len(matrix[:][1])): # for each question... current = [] for j in perms[h]: #print(perms[h]) # get the correct for that subject, append current.append(matrix[k][j]) # then take the mode #print(int(stats.mode(current)[0])) means.append( int(stats.mode(current)[0]) ) #print(means) # append mean for each group size meanmean.append(np.mean(means)) allsum = sum(sum(matrix)) m, n = py.shape(matrix) print('Total mean is %.3f / %.3f = %.3f ' % ( allsum, m*n, (allsum/(m*n)))) subjects = subjects[1::2] meanmean = meanmean[1::2] if len(subjects) > len(meanmean): subjects=subjects[1:] elif len(subjects) < len(meanmean): meanmean = meanmean[1:] fig = py.figure() ax14 = fig.add_subplot(111) ax14.plot(subjects, meanmean, 'bo', alpha=1) ax14.plot(subjects, meanmean, 'b', linewidth=3, alpha=0.2) ax14.set_ylim(-0.2,1.2) ax14.set_title('Real Data Group Size: Percent Correct') ax14.set_xlabel('Group size') ax14.set_ylabel('% Correct') print(meanmean) return meanmean
def probeData(settings): print "Probing data", settings.fileName samplesPerCode = int(round(settings.samplingFreq / (settings.codeFreqBasis / settings.codeLength))) samples = getSamples.int8(settings.fileName,10*samplesPerCode,settings.skipNumberOfBytes) #Initialize figure fig = pylab.figure() pylab.clf() #X axis timeScale = [x*(1/settings.samplingFreq) for x in \ range(0,int(round((5e-3 + 1/settings.samplingFreq)*settings.samplingFreq)))] #Time domain plot pylab.subplot(2,2,1) plot_max = int(round(samplesPerCode/50)) pylab.plot([1000*i for i in timeScale[0:plot_max]],samples[0:plot_max]) pylab.title('Time domain plot') pylab.xlabel('Time (ms)') pylab.ylabel('Amplitude') #Frequency domain plot (Pxx,freqs) = matplotlib.mlab.psd(x = samples-numpy.mean(samples),\ noverlap = 1024,\ NFFT = 2048,\ Fs = settings.samplingFreq/1e6) pylab.subplot(2,2,2) pylab.semilogy(freqs,Pxx) pylab.title('Frequency Domain Plot') pylab.xlabel('Frequency (MHz)') pylab.ylabel('Magnitude') #Histogram pylab.subplot(2,2,3) xticks = pylab.unique(samples) pylab.hist(samples,len(xticks)) axis = pylab.axis() pylab.axis([min(samples),max(samples),axis[2],axis[3]]) xticks = pylab.unique(pylab.round_(xticks)) pylab.xticks(xticks) pylab.title('Histogram'); return fig
def randDist(AllData): # generate a group-size histo based on random data subjects = range(15) matrix = genRandMatrix(AllData, 15) #range(len(AllData[1]['correct'])) meanmean = [] for i in subjects[1:]: # create combination list #print(subjects[1:]) perms = py.unique(list(it.combinations(subjects, i))) for h in range(len(perms)): # for each combination, get the mean correct means = [] # print(len(matrix[:][1])) # change k for number of simulated questions k = 0 while k < 20: #print(k) # for each question... current = [] for j in perms[h]: #print(perms[h]) # get the correct for that subject, append current.append(matrix[k][j]) # then take the mode #print(int(stats.mode(current)[0])) means.append( int(stats.mode(current)[0]) ) k += 1 #print(means) # append mean for each group size meanmean.append(np.mean(means)) allsum = sum(sum(matrix)) m, n = py.shape(matrix) print('Total mean is %.3f / %.3f = %.3f ' % ( allsum, m*n, (allsum/(m*n)))) #print('subjects length %d , meanmean length %d ', # % (len(subjects), len(meanmean))) subjects = subjects[1::2] meanmean = meanmean[1::2] fig = py.figure() ax13 = fig.add_subplot(111) ax13.plot(subjects, meanmean, 'bo', alpha=1) ax13.plot(subjects, meanmean, 'b', linewidth=3, alpha=0.2) ax13.set_ylim(-0.2,1.2) ax13.set_title('Random Group Size: Percent Correct') ax13.set_xlabel('Group size') ax13.set_ylabel('% Correct') print(meanmean) return meanmean
def pixSeedfillBinary(self, Imask, Iseed): Iseedfill = copy.deepcopy(Iseed) s = ones((3, 3)) Ijmask, k = ndimage.label(Imask, s) Ijmask2 = Ijmask * Iseedfill A = list(unique(Ijmask2)) A.remove(0) for i in range(0, len(A)): x, y = where(Ijmask == A[i]) Iseedfill[x, y] = 1 return Iseedfill
def plot_all_predictions_over_time(data, predicted, cmap=pl.cm.spectral, alpha=1., more_data=None): """ Plot the predicted values for a specific country as a function of time for each age Parameters ---------- data : data rec predicted : pymc trace additional optional parameters, to be described """ for a in pl.unique(data.age): print 'plotting for age %s' % a plot_all_predictions_over_time_for_age(data, predicted, cmap=cmap, alpha=alpha, more_data=more_data, age=a)
def prior_m_area(dm3, model_num, data_type): # create 'm_sub'/'m_region' from unique input_data['area'] prior_in = empty_prior_in(pl.unique(dm3.input_data['area']).index) prior_in['name'] = pl.unique(dm3.input_data['area']) prior_in['mean'] = 0. prior_in['std'] = 1. prior_in['lower'] = '-inf' prior_in['upper'] = 'inf' # create hierarchy model = mu.load_new_model(model_num, 'all', data_type) superregion = set(model.hierarchy.neighbors('all')) region = set(pl.flatten([model.hierarchy.neighbors(sr) for sr in model.hierarchy.neighbors('all')])) country = set(pl.flatten([[model.hierarchy.neighbors(r) for r in model.hierarchy.neighbors(sr)] for sr in model.hierarchy.neighbors('all')])) # create data area levels for i in pl.unique(dm3.input_data['area']).index: if dm3.input_data.ix[i,'area'] in country: prior_in.ix[i,'type'] = 'm_sub' elif dm3.input_data.ix[i,'area'] in region: prior_in.ix[i,'type'] = 'm_region' elif dm3.input_data.ix[i,'area'] in superregion: prior_in.ix[i,'type'] = 'm_super' return prior_in
def setAnnotationsButtons(self, _annotation): empty = is_empty(_annotation) or pl.sum(_annotation) == 0 self.set_title(empty) if empty: self.reset() else: unique = list(pl.unique(_annotation)) if len(unique) == self.buttons_count: self.setEnabledAnnotations(ALL_ANNOTATIONS) else: self.setEnabledAnnotations(unique) self.setUncheckNotAnnotations(unique) if self.isAllUnchecked(): self.__action_button__.setChecked(False) self.__action_button__.setEnabled(False)
def computePerformance(self,idx=None,round_prec=4): if(idx==None): trials = self.trials else: trials = [trial for trial in self.trials if (trial.target_index==idx)]; trial_types = sorted(pl.unique([round(trial.target_contrast,round_prec) for trial in trials])); scores = [[] for i in trial_types]; for trial in trials: for i,trial_type in enumerate(trial_types): if(round(trial.target_contrast,round_prec)==trial_type): scores[i].append(trial.score); ks = pl.array([sum(el) for el in scores]); ns = pl.array([len(el) for el in scores]); xs = trial_types; ps = ks/pl.double(ns); return pl.array([xs,ks,ns]);
def plot_each_country(axis_bounds=[.8, .99, 1.1, 3.]): years = range(1975, 2006) for i, c in enumerate(pl.unique(data.all.country)): pl.subplot(3, 4, i/12+1) pl.plot(data.all.hdi[data.all.country==c], data.all.tfr[data.all.country==c], linewidth=4, alpha=.8) pl.axis(axis_bounds) for r in range(3): for c in range(4): pl.subplot(3, 4, r*4+c+1) if r != 2: pl.xticks([]) if c != 0: pl.yticks([]) pl.subplots_adjust(.05, .05, .95, .95, 0, 0)
def epsilon_greedy_probability(self, state, action): q = self.get_q(state) if size(unique(q)) < self.env.get_num_actions(): max_q = max(q) max_observations = 0 for value in q: if value == max_q: max_observations += 1 probabilities = zeros(size(q)) for i in range(size(q)): if q[i] == max_q: probabilities[i] = ((1-self.epsilon) / max_observations) + \ (self.epsilon / self.env.get_num_actions()) else: probabilities[i] = self.epsilon / self.env.get_num_actions() return probabilities[action] else: if action == argmax(q): return self.optimal_p else: return self.epsilon / self.env.get_num_actions()
def plot_each_country(axis_bounds=[.8, .99, 1.1, 3.]): years = range(1975, 2006) for i, c in enumerate(pl.unique(data.all.country)): pl.subplot(3, 4, i / 12 + 1) pl.plot(data.all.hdi[data.all.country == c], data.all.tfr[data.all.country == c], linewidth=4, alpha=.8) pl.axis(axis_bounds) for r in range(3): for c in range(4): pl.subplot(3, 4, r * 4 + c + 1) if r != 2: pl.xticks([]) if c != 0: pl.yticks([]) pl.subplots_adjust(.05, .05, .95, .95, 0, 0)
def astausgleich(ab2org, mn2org, rhoaorg): """shifts the branches of a dc sounding to generate a matching curve.""" ab2 = P.asarray(ab2org) mn2 = P.asarray(mn2org) rhoa = P.asarray(rhoaorg) um = P.unique(mn2) for i in range(len(um) - 1): r0, r1 = [], [] ac = P.intersect1d(ab2[mn2 == um[i]], ab2[mn2 == um[i + 1]]) for a in ac: r0.append(rhoa[(ab2 == a) * (mn2 == um[i])][0]) r1.append(rhoa[(ab2 == a) * (mn2 == um[i + 1])][0]) if len(r0) > 0: fak = P.mean(P.array(r0) / P.array(r1)) print(fak) if P.isfinite(fak) and fak > 0.: rhoa[mn2 == um[i + 1]] *= fak return rhoa # formerly pg as vector
def plotPETH(): binsize = 20 # bin size in ms binedges = arange(0, s.duration + binsize, binsize) peth = [] for ipop in unique(s.cellpops): hist, binedges = histogram( s.allspiketimes[array( [s.cellpops[int(i)] for i in s.allspikecells]) == ipop], binedges) peth.append(hist) figure() plot(array(peth).T) title('PETH (%d ms bins)' % binsize) xlabel('Time (ms)') ylabel('Spikes/bin') ylim(0, s.scale * binsize * 2) h = axes() h.set_xticks(range(0, len(binedges), len(binedges) / 10)) h.set_xticklabels(binedges[0:-1:len(binedges) / 10].astype(int)) legend(s.popnames)
def discreteRawPDF(data): """ Returns the raw (unbinned) PDF for the discrete data in 'data'. """ pdf = dict() support = numpy.array(pylab.unique(data)) support.sort() pSupport = numpy.zeros(len(support)) for s in support: pdf[s] = 0.0 for d in data: pdf[d] = pdf[d] + 1.0 for j in range(len(support)): pSupport[j] = pdf[support[j]] pSupport = pSupport/sum(pSupport) return support, pSupport
def FourD(): # collects data from file and plots L = 20 mc = int(1e5) temps = [100, 240] spinconfigs = ["up", "random"] most_often = {} for spin in spinconfigs: pl.figure() for temp in temps: Enername = "Energyprob_L" + str(L) + "_mc" + str(mc) + "_T" + str( temp) + "_spin" + str(spin) energies, variance = pl.loadtxt('../data/4c/' + Enername + ".dat", usecols=(0, 1), unpack=True) pl.hist(energies, normed=0, bins=100, histtype="step", label="Temp=%s" % temp) hist, bins = pl.histogram(energies, bins=len(pl.unique(energies))) E = (bins[:-1])[pl.argmax(hist)] + 0.5 * (bins[1] - bins[0]) most_often[spin + " " + str(temp)] = E, max(hist), variance[-1] pl.title("Energy occurrence histogram for spin %s" % spin) pl.xlabel("Occurring energies") pl.ylabel("Count of energy") pl.xlim([-820, -350]) pl.legend(loc="best") pl.savefig("../figs/4d/probabilityhistogram_%s.png" % spin) for i, j in most_often.iteritems(): print i, " energy:", j[0], "\n--- count:", j[1] print " Prob of state: %g " % (j[1] / 87000.) print " Variance: %g " % (j[2])
def main(filename, verbosity, plots=False, **kwargs): print('here!', filename) print('\n' + '-' * 40 + '\n') f = neutronParser(filename, verbose_level=1) f.parse() print() f.write() print('ADC Boards: ', pylab.unique(f.data['ADCBoard'])) print('ADC Channels: ', pylab.unique(f.data['ADCChannel'])) print('Detectors: ', pylab.unique(f.data['Detector'])) for j in pylab.unique(f.data['ADCBoard']): print(j, pylab.unique((f.data['ADCChannel'])[f.data['ADCBoard'] == j])) if plots: pylab.figure() for j in range(10): pylab.plot(f.data['RawSamples'][j], label='{:d}'.format(j)) pylab.legend() pylab.figure() for j in pylab.unique(f.data['Detector']): print(j) h = pylab.histogram((f.data['Energy'])[f.data['Detector'] == j], range=[0., 4096.], bins=1000) bin_centers = 0.5 * (h[1][1:] + h[1][:-1]) pylab.plot(bin_centers, h[0], label='D{:02d}'.format(j), drawstyle='steps-mid') pylab.xlim(0., 4096.) pylab.legend() pylab.show()
import pandas X = pandas.read_csv('/home/j/Project/dismod/dismod_status/prod/dm-20084/posterior/dm-20084-prevalence-north_africa_middle_east-male-2005.csv', index_col=None) Y = pandas.read_csv('/home/j/Project/dismod/dismod_status/prod/dm-19807/posterior/dm-19807-prevalence-north_africa_middle_east-male-2005.csv', index_col=None) import pylab as pl def weighted_age(df): return (df.filter(like='Draw').T*df['Population']/df['Population'].sum()).T.sum() pl.figure() for iso in list(pl.unique(X['Iso3'])): pl.plot(X[X['Iso3']==iso].filter(like='Draw').mean(1).__array__(), label=iso) pl.semilogy([1],[1]) Z = X.groupby('Age').apply(weighted_age) plot(Z.mean(1).__array__(), color='red', linewidth=3, alpha=.5, label='Inconsistent NA/ME') pl.legend() pl.axis([-5,130,1e-6,2]) pl.figure() for iso in list(pl.unique(Y['Iso3'])): pl.plot(Y[(Y['Iso3']==iso)&(Y['Rate type']=='prevalence')].filter(like='Draw').mean(1).__array__(), label=iso)
def tsysNormalize(vis, tsysTable='', newTsysTable='', scaleSpws=[], verbose=False): """ Generate Tsys entries for one field from other fields, using autocorr (linear!) or SQLD data to determine the change in Tsys. Inputs: vis the MS tsysTable: the tsys caltable (default = <vis>.tsys) newTsysTable: the new tsys caltable to create (default = <tsysTable>_normalized) """ # intents likely to imply different attenuations or tuning to science-like # scans that we are applying Tsys to. print("Entered") badIntents = [ 'CALIBRATE_POINTING', 'CALIBRATE_FOCUS', 'CALIBRATE_SIDEBAND_RATIO', 'CALIBRATE_ATMOSPHERE' ] if (tsysTable == ''): tsysTable = vis + '.tsys' if (not os.path.exists(tsysTable)): print("Cannot find Tsys table: ", tsysTable) return if (not os.path.exists(vis)): print("Cannot find measurement set: ", vis) return t = time.time() mytb = taskinit.tbtool() mymsmd = taskinit.msmdtool() mytb.open(tsysTable, nomodify=False) mymsmd.open(vis) print("tsysNormalize: initial setup took %.3f seconds" % (time.time() - t)) # For convenience squish the useful columns into unique lists t = time.time() tsysSpws = pb.unique(mytb.getcol("SPECTRAL_WINDOW_ID")) tsysScans = pb.unique(mytb.getcol("SCAN_NUMBER")) tsysTimes = pb.unique(mytb.getcol("TIME")) tsysFields = pb.unique(mytb.getcol("FIELD_ID")) tsysAntennas = pb.unique(mytb.getcol("ANTENNA1")) if type(scaleSpws) == str: scaleSpws = [int(i) for i in scaleSpws.split(',')] if len(scaleSpws) < len(tsysSpws): scaleSpws = [] for tsysSpw in tsysSpws: scaleSpws.append(scienceSpwForTsysSpw(mymsmd, tsysSpw)) print("Identified autocorrelation spws to use: ", scaleSpws) print("Tsys Spws (%d):" % len(tsysSpws), tsysSpws) print("Tsys Scans (%d):" % len(tsysScans), tsysScans) print("Tsys Times (%d):" % len(tsysTimes), tsysTimes) print("Tsys Fields (%d):" % len(tsysFields), tsysFields) print("Tsys Antennas (%d):" % len(tsysAntennas), tsysAntennas) # Gather the power levels to use in the normalization process refPowers = {} refScans = {} for f in tsysFields: scanFieldsTab = mytb.query('FIELD_ID==%d' % f) fieldTsysScans = pb.unique(scanFieldsTab.getcol("SCAN_NUMBER")) scanFieldsTab.close() fieldAllScans = mymsmd.scansforfield(f) fieldNonTsysScans = [ x for x in fieldAllScans if x not in fieldTsysScans ] fieldName = mymsmd.namesforfields(f)[0] if (len(fieldNonTsysScans) < 1): # Then there is no non-tsys scan for this field, e.g. which can happen in a mosaic where the Tsys scan has a different field ID, # but in this case the field name will have other scans with different field IDs, so revert to using field names. Using field # names might work from the outset, but I have not tried it. fieldAllScans = mymsmd.scansforfield(fieldName) fieldNonTsysScans = [ x for x in fieldAllScans if x not in fieldTsysScans ] if (len(fieldNonTsysScans) < 1): print( "****** This field (id=%d, name=%s) appears to have no non-Tsys-like-scans, and thus cannot be normalized." % (f, fieldName)) return -1 scienceLikeScans = [] for s in fieldNonTsysScans: intents = mymsmd.intentsforscan(s) good = True for i in intents: for b in badIntents: if i.startswith(b): good = False break if good: scienceLikeScans.append(s) powerRefScans = [] for s in fieldTsysScans: minDist = 9999999 refScan = -1 for r in scienceLikeScans: dist = abs(r - s) if dist < minDist: minDist = dist refScan = r powerRefScans.append(refScan) print("Field %d (%s) Tsys scans:" % (f, fieldname), fieldTsysScans, ", All scans:", fieldAllScans, ", Non-Tsys scans:", fieldNonTsysScans, ", Non-Tsys science-like scans:", scienceLikeScans) for i in range(len(fieldTsysScans)): print(" Tsys scan %3d power reference scan: %3d" % (fieldTsysScans[i], powerRefScans[i])) refScans[fieldTsysScans[i]] = powerRefScans[i] if verbose: print( "populating powers corresponding to each Tsys scan on field %d..." % (f)) for i in range(len(fieldTsysScans)): refPowers[fieldTsysScans[i]] = [] for spw in scaleSpws: if verbose: print("calling getPower(vis, %d, %d, 10.0, %s)" % (powerRefScans[i], spw, str(powerRefScans[i] < fieldTsysScans[i]))) p = getPower(vis, powerRefScans[i], spw, 10.0, powerRefScans[i] < fieldTsysScans[i], verbose=verbose) refPowers[fieldTsysScans[i]].append(p) if verbose: print("powers to use for Tsys scan %d:" % fieldTsysScans[i], refPowers[fieldTsysScans[i]]) if verbose: print(refPowers) print("tsysNormalize: summarising Tsys table took %.3f seconds" % (time.time() - t)) t = time.time() # Now copy the original Tsys caltable and update all the values in the new one. if (newTsysTable == ''): newTsysTable = tsysTable + '_normalized' if (os.path.exists(newTsysTable)): shutil.rmtree(newTsysTable) mytb.copy(newTsysTable) mytb.close() mytb.open(newTsysTable, nomodify=False) startRefPower = refPowers[tsysScans[0]] for i in range(1, len(tsysScans)): # need to adjust each successive Tsys refPower = refPowers[tsysScans[i]] for ispw in range(len(tsysSpws)): spw = tsysSpws[ispw] for ant in range(len(tsysAntennas)): tsysSubTab1 = mytb.query( "SCAN_NUMBER==%d AND SPECTRAL_WINDOW_ID==%d AND ANTENNA1==%d" % (tsysScans[i], tsysSpws[ispw], ant)) tsys1 = tsysSubTab1.getcell('FPARAM', 0) newTsys = tsysSubTab1.getcell('FPARAM', 0) for pol in range(len(tsys1)): for chan in range(len(tsys1[pol])): a = TsysAfterPowerChange( refPowers[tsysScans[i]][ispw][ant][pol], startRefPower[ispw][ant][pol], tsys1[pol][chan]) newTsys[pol][chan] = a print("Scan %2d spw %2d pol %d mean %.1f --> %.1f" % (tsysScans[i], spw, pol, np.mean( tsys1[pol]), np.mean(newTsys[pol]))) tsysSubTab1.putcell('FPARAM', 0, newTsys) tsysSubTab1.close() mymsmd.close() mytb.close()
def tsysTransfer(vis, scaleSpws='', tsysTable='', newTsysTable='', verbose=False, overwrite=True, printAntenna=0, printPol=0): """ Generate a new Tsys table where the entries for one field are propagated to other fields which do not have a measured Tsys, using autocorr (linear!) or SQLD data to determine the change in Tsys. Input: vis the MS scaleSpws the autocorr or SQLD SpWs to use for scaling (integer list or comma-delimited string, default is the channel-averaged science spws) tsysTable: if blank, then try vis+'.tsys' newTsysTable: if blank, then try vis+'.newtsys' printAntenna: print the before/after values for this antenna ID printPol: print the before/after values for this polarization (0 or 1) Returns: nothing """ # intents likely to imply different attenuations or tuning to science-like # scans that we are applying Tsys to. badIntents = [ 'CALIBRATE_POINTING', 'CALIBRATE_FOCUS', 'CALIBRATE_SIDEBAND_RATIO', 'CALIBRATE_ATMOSPHERE' ] if type(scaleSpws) == str: if (len(scaleSpws) > 0): scaleSpws = [int(i) for i in scaleSpws.split(',')] if (tsysTable == ''): tsysTable = vis + '.tsys' if not os.path.exists(tsysTable): tsysTables = glob.glob(os.path.join(vis, '*tsyscal.tbl')) if len(tsysTables) < 1: print("Could not find any tsys tables.") return tsysTable = tsysTables[0] if not os.path.exists(tsysTable): print("Could not find tsys table: %s" % (tsysTable)) return if (newTsysTable == ''): newTsysTable = vis + '.newtsys' if overwrite and os.path.exists(newTsysTable): print("Removing pre-existing newTsysTable: ", newTsysTable) rmtables(newTsysTable) if os.path.exists(newTsysTable): shutil.rmtree(newTsysTable) if (not os.path.exists(tsysTable)): print("Cannot find Tsys table: ", tsysTable) return if (not os.path.exists(vis)): print("Cannot find measurement set: ", vis) return t = time.time() mytb = taskinit.tbtool() mymsmd = taskinit.msmdtool() mytb.open(tsysTable, nomodify=False) mymsmd.open(vis) print("tsysTransfer: initial setup took %.3f seconds" % (time.time() - t)) # For convenience squish the useful columns into unique lists t = time.time() tsysSpws = pb.unique(mytb.getcol("SPECTRAL_WINDOW_ID")) tsysBasebands = getBasebands(mymsmd, tsysSpws) tsysScans = pb.unique(mytb.getcol("SCAN_NUMBER")) tsysTimes = pb.unique(mytb.getcol("TIME")) tsysFields = pb.unique(mytb.getcol("FIELD_ID")) tsysAntennas = pb.unique(mytb.getcol("ANTENNA1")) finalScan = np.max(mymsmd.scannumbers()) print("Tsys SpWs (%d):" % len(tsysSpws), tsysSpws) print("Tsys Basebands (%d):" % len(tsysSpws), tsysBasebands) print("Tsys Scans (%d):" % len(tsysScans), tsysScans) print("Tsys Times (%d):" % len(tsysTimes), tsysTimes) print("Tsys Fields (%d):" % len(tsysFields), tsysFields) print("Tsys Antennas (%d):" % len(tsysAntennas), tsysAntennas) if (len(scaleSpws) == 0): # number of scaleSpws should not exceed number of Tsys spws scaleSpws = np.unique(getChannelAveragedScienceSpws(vis, mymsmd=mymsmd)) scaleBasebands = getBasebands(mymsmd, scaleSpws) if scaleBasebands != tsysBasebands: print("re-ordering scaleSpws to match Tsys basebands") newScaleSpws = [] for baseband in tsysBasebands: newScaleSpws.append(scaleSpws[scaleBasebands.index(baseband)]) scaleSpws = newScaleSpws scaleBasebands = tsysBasebands[:] print("Getting power from spws: ", scaleSpws) tsysScanTimes = {} for s in tsysScans: st = mytb.query('SCAN_NUMBER==%d' % s) ts = st.getcol("TIME") st.close() tsysScanTimes[s] = sum(ts) / float(len(ts)) if verbose: print("Tsys scan %d assumed time: %.4f" % (s, tsysScanTimes[s])) refPowers = {} refScans = {} tsysScansOnField = {} for f in tsysFields: scanFieldsTab = mytb.query('FIELD_ID==%d' % f) fieldTsysScans = pb.unique(scanFieldsTab.getcol("SCAN_NUMBER")) scanFieldsTab.close() tsysScansOnField[f] = fieldTsysScans fieldAllScans = mymsmd.scansforfield(f) fieldName = mymsmd.namesforfields(f)[0] fieldNonTsysScans = [ x for x in fieldAllScans if x not in fieldTsysScans ] if (len(fieldNonTsysScans) < 1): # Then there is no non-tsys scan for this field, e.g. which can happen in a mosaic where the Tsys scan has a different field ID, # but in this case the field name will have other scans with different field IDs, so revert to using field names. Using field # names might work from the outset, but I have not tried it. fieldAllScans = mymsmd.scansforfield(fieldName) fieldNonTsysScans = [ x for x in fieldAllScans if x not in fieldTsysScans ] if (len(fieldNonTsysScans) < 1): print( "****** This field (id=%d, name=%s) appears to have no non-Tsys-like-scans, and thus cannot be normalized." % (f, fieldName)) return -1 print("Field %d (%s) Tsys scans:" % (f, fieldName), fieldTsysScans, ", All scans:", fieldAllScans, ", Non-Tsys scans:", fieldNonTsysScans) scienceLikeScans = [] for s in fieldNonTsysScans: intents = mymsmd.intentsforscan(s) good = True for i in intents: for b in badIntents: if i.startswith(b): good = False break if good: scienceLikeScans.append(s) powerRefScans = [] for s in fieldTsysScans: minDist = 9999999 refScan = -1 for r in scienceLikeScans: dist = abs(r - s) if dist < minDist: minDist = dist refScan = r powerRefScans.append(refScan) if verbose: print("Field %d (%s) Tsys scans:" % (f, fieldName), fieldTsysScans, ", All scans:", fieldAllScans, ", Non-Tsys scans:", fieldNonTsysScans, ", Non-Tsys science-like scans:", scienceLikeScans) for i in range(len(fieldTsysScans)): if verbose: print(" Tsys scan %3d power reference scan: %3d" % (fieldTsysScans[i], powerRefScans[i])) refScans[fieldTsysScans[i]] = powerRefScans[i] if verbose: print( "populating powers corresponding to each Tsys scan on field %d..." % (f)) for i in range(len(fieldTsysScans)): refPowers[fieldTsysScans[i]] = [] for spw in scaleSpws: if verbose: print("powerRefScans: ", powerRefScans) print("calling getPower(vis, %d, %d, 10.0, %s)" % (powerRefScans[i], spw, str(powerRefScans[i] < fieldTsysScans[i]))) p = getPower(vis, powerRefScans[i], spw, 10.0, powerRefScans[i] < fieldTsysScans[i], verbose=verbose) refPowers[fieldTsysScans[i]].append(p) #print "powers to use for Tsys scan %d:"%fieldTsysScans[i], refPowers[fieldTsysScans[i]] # print refPowers print("tsysTransfer: summarising Tsys table took %.3f seconds" % (time.time() - t)) t = time.time() mytb.copy(newTsysTable) mytb.close() # re-open original table as read-only mytb.open(tsysTable) mytbNew = taskinit.tbtool() mytbNew.open(newTsysTable, nomodify=False) print( "tsysTransfer: Copying Tsys table from '%s' to '%s' took %.3f seconds" % (tsysTable, newTsysTable, time.time() - t)) anyProcessingNeeded = False # Loop over each Tsys scan for i in range(len(tsysScans) - 1): tsysTime0 = tsysScanTimes[tsysScans[i]] tsysTime1 = tsysScanTimes[tsysScans[i + 1]] tsysTimeGap = tsysTime1 - tsysTime0 tsysFields0 = mymsmd.fieldsforscan(tsysScans[i]) # current Tsys scan tsysFields1 = mymsmd.fieldsforscan(tsysScans[i + 1]) # next Tsys scan # loop over all scans between the current Tsys scan and the next one startScan = tsysScans[i] + 1 stopScan = tsysScans[i + 1] # if finalScan > stopScan and i==len(tsysScans)-1: # print "There are more scans after the final Tsys scan, extending the range of scans accordingly." # stopScan = finalScan for scan in range(startScan, stopScan): if 'CALIBRATE_POINTING#ON_SOURCE' in mymsmd.intentsforscan(scan): continue processingNeeded = False fields = mymsmd.fieldsforscan(scan) times = mymsmd.timesforscan(scan) startTime = times[0] endTime = times[-1] print( "Processing scan %d with fields %s, between Tsys scan %d (fields %s) and %d (fields %s)" % (scan, str(fields[0]), tsysScans[i], str( tsysFields0[0]), tsysScans[i + 1], str(tsysFields1[0]))) print( " Scan %d starts %.3f sec after preceding Tsys, and ends %.3f sec before next Tsys" % (scan, startTime - tsysTime0, tsysTime1 - endTime)) # There are a few possible cases to deal with: # 1) this was a power reference scan for a Tsys scan, in which case only produce one extra Tsys, at the opposite end of the scan, or none if there are Tsys scans for the same field at both ends fieldMatchesPriorTsysField = fieldsMatch(fields, tsysFields0) fieldMatchesNextTsysField = fieldsMatch(fields, tsysFields1) priorScanIsTsys = scan == tsysScans[i] + 1 nextScanIsTsys = scan == tsysScans[i + 1] - 1 bracketingTsysFieldsMatch = fieldsMatch(tsysFields0, tsysFields1) scanIsNotRefScan = scan != refScans[ tsysScans[i]] and scan != refScans[tsysScans[i + 1]] if fieldMatchesPriorTsysField and fieldMatchesNextTsysField and priorScanIsTsys and nextScanIsTsys: print( " Nothing needed for scan %d as bracketed immediately by two Tsys scans of same field" % scan) # The most common case for wanting to do the transfer: science field bracketed by phase cal, or phase cal without Tsys immediately before/after elif bracketingTsysFieldsMatch and (not fieldMatchesPriorTsysField or scanIsNotRefScan): # The two Tsys scans that bracket this scan are taken on the same field; # and either this scan is not on the field of the prior Tsys scan, or # this scan is not a reference scan processingNeeded = True priorScanToUse = tsysScans[i] nextScanToUse = tsysScans[i + 1] elif (not bracketingTsysFieldsMatch and fields[0] in tsysScansOnField.keys()): candidateScans = np.array(tsysScansOnField[fields[0]]) if (scan < candidateScans[0] or scan > candidateScans[-1]): print( " The bracketing Tsys fields do not match, and there are not two scans to interpolate between." ) else: processingNeeded = True priorScanToUse = np.max( candidateScans[np.where(candidateScans < scan)]) nextScanToUse = np.min( candidateScans[np.where(candidateScans > scan)]) print( " The bracketing Tsys fields do not match, but there are two scans to interpolate between: %d and %d." % (priorScanToUse, nextScanToUse)) elif (not bracketingTsysFieldsMatch): # This section added by Todd for initial phase calibrator scans when Tsys taken on science target only. # Not sure what to do yet, though. print( " The bracketing Tsys fields do not match, and Tsys was never taken on this field. No processing will be done." ) if False: processingNeeded = True if i + 1 < len(tsysScans): print( " Extrapolating from subsequent Tsys scan: %d" % (tsysScans[i + 1])) priorScanToUse = tsysScans[i + 1] nextScanToUse = tsysScans[i + 1] else: print(" Extrapolating from prior Tsys scan: %d" % (tsysScans[i + 1])) priorScanToUse = tsysScans[i] nextScanToUse = tsysScans[i] else: print( " This scan arrangement is unexpected. No processing will be done." ) print(" bracketingTsysFieldsMatch = %s" % bracketingTsysFieldsMatch) print(" fieldMatchesPriorTsysField = %s" % fieldMatchesPriorTsysField) print(" fieldMatchesNextTsysField = %s" % fieldMatchesNextTsysField) print(" priorScanIsTsys = %s" % priorScanIsTsys) print(" nextScanIsTsys = %s" % nextScanIsTsys) print(" scanIsNotRefScan = %s" % scanIsNotRefScan) print(" %s in tsysScansOnField(%s) = %s" % (fields[0], tsysScansOnField.keys(), fields[0] in tsysScansOnField.keys())) if processingNeeded: anyProcessingNeeded = True print( " For scan %d will generate two Tsys entries for beginning and end of scan, interpolating reference from scans %d and %d" % (scan, priorScanToUse, nextScanToUse)) for ispw in range(len(scaleSpws)): spw = scaleSpws[ispw] startPower = getPower(vis, scan, spw, 10.0, False, verbose=verbose) endPower = getPower(vis, scan, spw, 10.0, True, verbose=verbose) for ant in range(len(tsysAntennas)): tsysSubTab0 = mytb.query( "SCAN_NUMBER==%d AND SPECTRAL_WINDOW_ID==%d AND ANTENNA1==%d" % (priorScanToUse, tsysSpws[ispw], ant)) tsysSubTab1 = mytb.query( "SCAN_NUMBER==%d AND SPECTRAL_WINDOW_ID==%d AND ANTENNA1==%d" % (nextScanToUse, tsysSpws[ispw], ant)) # sanity check for duplicate entries if tsysSubTab0.nrows() != 1 or tsysSubTab1.nrows( ) != 1: print( "WARNING!!! not one result row for (scan,ant,spw) query in Tsys table. Scan %d: %d rows, Scan %d: %d rows." % (priorScanToUse, tsysSubTab0.nrows(), nextScanToUse, tsysSubTab1.nrows())) tsys0 = tsysSubTab0.getcell('FPARAM', 0) tsys1 = tsysSubTab1.getcell('FPARAM', 0) tsysSubTab1.close() startTsys = copy.copy(tsys0) endTsys = copy.copy( tsys0 ) # just a placeholder, new values will be filled in below startRefPower = refPowers[priorScanToUse] endRefPower = refPowers[nextScanToUse] tsysTime0 = tsysScanTimes[priorScanToUse] tsysTime1 = tsysScanTimes[nextScanToUse] tsysTimeGap = tsysTime1 - tsysTime0 for pol in range(len(tsys0)): for chan in range(len(tsys0[pol])): startTsys0 = TsysAfterPowerChange( startRefPower[ispw][ant][pol], startPower[ant][0], tsys0[pol][chan]) startTsys1 = TsysAfterPowerChange( endRefPower[ispw][ant][pol], startPower[ant][0], tsys1[pol][chan]) endTsys0 = TsysAfterPowerChange( startRefPower[ispw][ant][pol], endPower[ant][0], tsys0[pol][chan]) endTsys1 = TsysAfterPowerChange( endRefPower[ispw][ant][pol], endPower[ant][0], tsys1[pol][chan]) if tsysTimeGap == 0: startTsys[pol][chan] = startTsys0 endTsys[pol][chan] = endTsys0 else: startTsys[pol][chan] = ( (startTime - tsysTime0) * startTsys1 + (tsysTime1 - startTime) * startTsys0) / tsysTimeGap endTsys[pol][chan] = ( (endTime - tsysTime0) * endTsys1 + (tsysTime1 - endTime) * endTsys0) / tsysTimeGap if chan == len( tsys0[pol] ) / 2 and ant == printAntenna and pol == printPol: print( " ispw=%d spw=%d ant=%d pol=%d chan=%d: TsysBefore: %.1f K, TsysScanStart: %.1f K (interp %.1f,%.1f), TsysScanEnd: %.1f K (interp %.1f,%.1f), TsysAfter: %.1f K" % (ispw, spw, ant, pol, chan, tsys0[pol][chan], startTsys[pol][chan], startTsys0, startTsys1, endTsys[pol][chan], endTsys0, endTsys1, tsys1[pol][chan])) for f in fields: nr = mytbNew.nrows() tsysSubTab0.copyrows(newTsysTable, nrow=1) if verbose: print("setting tsys at row %d" % nr) mytbNew.putcell('FPARAM', nr, startTsys) mytbNew.putcell('TIME', nr, startTime) mytbNew.putcell('FIELD_ID', nr, f) mytbNew.putcell('SCAN_NUMBER', nr, scan) nr = mytbNew.nrows() tsysSubTab0.copyrows(newTsysTable, nrow=1) if verbose: print("setting tsys at row %d" % nr) mytbNew.putcell('FPARAM', nr, endTsys) mytbNew.putcell('TIME', nr, endTime) mytbNew.putcell('FIELD_ID', nr, f) mytbNew.putcell('SCAN_NUMBER', nr, scan) tsysSubTab0.close() # end loop over fields (f) # end loop over antennas (ant) # end loop over spws (ispw) # end if processingNeeded # end loop over scans between tsysScans (scan) mytbNew.flush() # end loop over Tsys scans if not anyProcessingNeeded: print( "Because no processing was needed the new Tsys table is identical to the original." ) # TODO: These cleanups should be done also on an exception too print("Closing tables...") mytbNew.unlock() mytbNew.close() mytbNew.done() mymsmd.close() mytb.close() mytb.done()
dm.params['covariates']['Country_level']['LDI_id_Updated_7July2011']['rate']['value'] = 0 # clear any fit and priors dm.clear_fit() dm.clear_empirical_prior() dismod3.neg_binom_model.covariate_hash = {} # initialize model data prev_data = [d for d in dm.data if d['data_type'] == 'prevalence data'] r = pl.array([dm.value_per_1(s) for s in prev_data]) min_rate_per_100 = '%d' % round(r.min()*100) max_rate_per_100 = '%d' % round(r.max()*100) median_rate_per_100 = '%d' % round(pl.median(r*100)) regions = pl.array([d['gbd_region'] for d in prev_data]) num_regions = len(pl.unique(regions)) import fit_world #fit_world.fit_world(dm) #dm.data = prev_data # put data back in import fit_posterior region = 'north_america_high_income' sex = 'female' year='2005' fit_posterior.fit_posterior(dm, region, sex, year, map_only=faster_run_flag, store_results=False) dm.data = prev_data # put data back in pl.figure(**book_graphics.quarter_page_params) pl.subplot(1,2,1) dismod3.plotting.plot_intervals(dm, [d for d in dm.data if dm.relevant_to(d, 'prevalence', 'all', 'all', 'all')],
from scipy.spatial import ConvexHull # On the 2-Sphere, the Voronoi tesselation is equivalent to the convex hull projected on the sphere # (Sugihara, Journal for Geometry and Graphics Volume 6 (2002), No. 1, 69-81.) # I assume that the same is true in 4D.... [This has to be checked!] R= 1.6180339887498949 #magic number by Straley for 120 particles import sys if (sys.argv[1][-3:]=="npy"): polar=pl.load(sys.argv[1]) else: polar=pl.loadtxt(sys.argv[1]) from spheretools import * cartesian=convert(polar, R) CHull=ConvexHull(cartesian) with open("bonds.txt",'w') as fw: for p in range(cartesian.shape[0]): # print p which_simplex,position=pl.where(CHull.simplices==p) # print which_simplex all_neighs=pl.unique(CHull.simplices[which_simplex].flatten()) # print "all_neighs",all_neighs index_of_p=pl.where(all_neighs==p) # print "p is at",index_of_p neighs=pl.delete(all_neighs,index_of_p) # print"neighs after ", neighs fw.write(str(len(neighs))+" "+" ".join(map(str, neighs))+"\n" )
def test_load_area(): # find model unique areas model_areas = set(pl.unique(model2.input_data['area'])) # check that only official areas are listed assert model_areas.issubset(areas) == 1
def calantsub(incaltable,outcaltable='', spw='',scan='', ant='',subant=''): """ Substitute cal solutions by antenna Input: incaltable Input caltable outcaltable Output caltable (if '', overwrite result on incaltable) spw Spectral Window selection (no channel selection permitted) scan Scan selection ant Antenna (indices) which need replaced solutions subant Antenna (indices) with which to replace those in ant This function provides a means to replace solutions by antenna, e.g., to substitute one antenna's Tsys spectra with another. The processing can be limited to specific spectral windows and/or scans. The spw and scan parameters should be specified in the standard MS selection manner (comma-separated integers in a string), except no channel selection is supported. The ant parameter specifies one or more antenna indices (comma-separated in a string) for which solutions are to be replaced. The subant parameter lists the antenna indices from which the substitute solutions are to be obtained. E.g., ant='3,5,7',subant='6,8,10' will cause the solutions from antenna id 6 to be copied to antenna id 5, id 8 to id 5 and id 10 to id 7. The number of antennas specified in ant and subant must match. """ import pylab as mypl # trap insufficient ant subant specifications if len(ant)==0 or len(subant)==0: raise Exception, "Must specify at least one ant and subant." antlist=ant.split(',') sublist=subant.split(',') # trap dumb cases nant=len(antlist) nsub=len(sublist) if nant!=nsub: raise Exception, "Must specify equal number of ant and subant." # local tb tool mytb=taskinit.tbtool() # parse selection selstr='' if len(spw)>0: selstr+='SPECTRAL_WINDOW_ID IN ['+spw+']' if len(scan)>0: selstr+=' && ' if len(scan)>0: selstr+='SCAN_NUMBER IN ['+scan+']' print "selstr = '"+selstr+"'" # verify selection (if any) selects non-zero rows if len(selstr)>0: mytb.open(incaltable) st=mytb.query(query=selstr) nselrows=st.nrows() st.close() mytb.close() if nselrows==0: raise Exception, 'Error: scan and/or spw selection selects no rows!' # manage the output table if outcaltable=='': outcaltable=incaltable print "No outcaltable specified; will overwrite incaltable." if outcaltable!=incaltable: os.system('cp -r '+incaltable+' '+outcaltable) # open the output table for adjustment mytb.open(outcaltable,nomodify=False) stsel=mytb if len(selstr)>0: stsel=mytb.query(query=selstr,name='selected') # cols to substitute: collist=['TIME','INTERVAL','PARAMERR','SNR','FLAG'] cols=mytb.colnames() if cols.count('CPARAM')>0: collist.append('CPARAM') else: collist.append('FPARAM') # scan list scans=mypl.unique(stsel.getcol('SCAN_NUMBER')) print 'Found scans = ',scans # do one scan at a time for scan in scans: st1=stsel.query(query='SCAN_NUMBER=='+str(scan),name='byscan') spws=mypl.unique(st1.getcol('SPECTRAL_WINDOW_ID')) print 'Scan '+str(scan)+' has spws='+str(spws) # do one spw at a time for ispw in spws: st2=st1.query(query='SPECTRAL_WINDOW_ID=='+str(ispw),name='byspw'); for ia in range(nant): stsub=st2.query(query='ANTENNA1=='+sublist[ia], name='subant') stant=st2.query(query='ANTENNA1=='+antlist[ia], name='ant') # go to next ant if nothing to do if stant.nrows()<1: continue print ' scan='+str(scan)+' spw='+str(ispw)+' ants: '+str(sublist[ia])+'->'+str(antlist[ia]) # trap (unlikely?) pathological case if stsub.nrows()!=stant.nrows(): raise Exception, "In spw "+str(ispw)+" antenna ids "+str(antlist[ia])+" and "+str(sublist[ia])+" have a different number of solutions." # substitute values for col in collist: stant.putcol(col,stsub.getcol(col)) stsub.close() stant.close() st2.close() st1.close() stsel.close() mytb.close()
def fixsyscaltimes(vis,newinterval=2.0): """ Fix TIME,INTERVAL columns in MS SYSCAL subtable Input: vis the MS containing the offending SYSCAL subtable newinterval the interval to use in revised entries This function is intended to repair MS SYSCAL tables that suffer from multiple TIME values (over antennas) per Tsys measurement. The gencal task (mode='tsys' expects all antennas to share the same TIME value for each Tsys measurement (and this is usually true). The function finds those measurements that have multiple TIMEs and replaces them with a common TIME value which takes the value mean(oldTIME-INTERVAL/2)+newinterval/2. Usually (always?), oldTIME-INTERVAL/2 is constant over antennas and represents the physical timestamp of the Tsys measurment. If the function finds no pathological timestamps, it does not revise the table. """ import pylab as mypl import math as mymath myqa=taskinit.qatool() mytb=taskinit.tbtool() mytb.open(vis+'/SYSCAL',nomodify=False) spws=mypl.unique(mytb.getcol("SPECTRAL_WINDOW_ID")) for ispw in spws: st=mytb.query('SPECTRAL_WINDOW_ID=='+str(ispw),name='byspw') times=st.getcol('TIME') interval=st.getcol('INTERVAL') timestamps=times-interval/2 t0=86400.0*mymath.floor(timestamps[0]/86400.0) utimes=mypl.unique(times-t0) nT=len(utimes) utimestamps=mypl.unique(mypl.floor(timestamps)-t0) nTS=len(utimestamps) msg='In spw='+str(ispw)+' found '+str(nTS)+' Tsys measurements with '+str(nT)+' TIMEs...' if nT==nTS: msg+='OK.' print msg else: msg+=' which is too many, so fixing it:' print msg for uts in utimestamps: mask = ((mypl.floor(timestamps))-t0==uts) uTIMEs=mypl.unique(times[mask]) nTIMEs=len(uTIMEs) newtime = mypl.mean(times[mask]-interval[mask]/2) + newinterval/2 msg=' Found '+str(nTIMEs)+' TIMEs at timestamp='+str(myqa.time(str(newtime-newinterval/2)+'s',form='ymd')[0]) if nTIMEs>1: msg+=':' print msg print ' TIMEs='+str([myqa.time(str(t)+'s',form='ymd')[0] for t in uTIMEs])+' --> '+str(myqa.time(str(newtime)+'s',form='ymd')[0])+' w/ INTERVAL='+str(newinterval) times[mask]=newtime interval[mask]=newinterval st.putcol('TIME',times) st.putcol('INTERVAL',interval) else: msg+='...ok.' print msg st.close() mytb.close()
print sys.argv[5] # time process start = time.time() # assert that system arguments are correct if len(sys.argv[5].split(' ')) != 1: assert len(sys.argv[5].split(' ')) == len(sys.argv[4].split(' ')), 'rate_type_list has the incorrect number of arguments--length must be 1 or match length of param_type_list' # download data to j drive os.system('/usr/local/epd-7.3-2/bin/python download_model.py %s'%(sys.argv[1])) # load country list country_list = pandas.read_csv('/snfs1/DATA/IHME_COUNTRY_CODES/IHME_COUNTRYCODES.CSV', index_col=None) country_list = country_list[country_list.ix[:,'ihme_indic_country'] == 1] country_list = list(pl.unique(country_list['iso3'])) country_list.remove('BMU') country_list.remove('HKG') country_list.remove('MAC') country_list.remove('PRI') # launch on cluster name_list = [] for country in country_list: #['USA', 'GBR']: for sex in ['male', 'female']: name = country + str(sys.argv[3]) + sex name_list.append(name) os.system('/usr/local/bin/SGE/bin/lx24-amd64/qsub -cwd -N ' + name + ' dmco_fit_posterior.sh "%s" "%s" "%s" "%s" "%s" "%s" "%s"' %(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], country, sex)) pandas.DataFrame(name_list).to_csv('/home/j/Project/dismod/dismod_status/prod/dm-%s/posterior/stdout/name_list.csv'%(sys.argv[1]))
allcategory.append('complete') allcategory.append('ddm_adjust') allcategory.append('gb_adjust') allcategory.append('sibs') allcategory.append('no_adjust') allcategory.append('dss') for acat in allcategory: year_cat[acat] = [] mort_cat[acat] = [] var_cat[acat] = [] #category is a vector of data$category[data$ihme_loc_id == cc & data$data == 1] #outside loop: all possible categories, inside loop: all observations #year_cat, etc. become vectors of all years (etc) w/ specific category of data for ucat in pl.unique(category): count = 0 for cat in category: if cat == ucat: year_cat[ucat].append(year[count]) var_cat[ucat].append(log10_var[count]) if ((ihme_loc_id in ['DOM' , 'PER' , 'MAR' , 'MDG']) & (cat in ["sibs","ddm_adjust","gb_adjust"])): mort_cat[ucat].append(log10_mort[count] - rnormal(mu=0., tau=.01**-2)) else: mort_cat[ucat].append(log10_mort[count]) count = count + 1 for acat in allcategory: for ucat in pl.unique(category): if acat == ucat: allyear = allyear + year_cat[ucat]
# reshape predicted labels to an image img_pred = np.reshape(data_pred, (hypData.numRows, hypData.numCols)) # read labels into numpy array mat_gt = scipy.io.loadmat('PaviaU_gt.mat') img_gt = mat_gt['paviaU_gt'] class_names = [ 'asphalt', 'meadow', 'gravel', 'tree', 'painted metal', 'bare soil', 'bitumen', 'brick', 'shadow' ] cmap = pl.cm.jet # save ground truth figure pl.figure() for entry in pl.unique(img_gt): colour = cmap(entry * 255 / (np.max(img_gt) - 0)) pl.plot(0, 0, "-", c=colour, label=(['background'] + class_names)[entry]) pl.imshow(img_gt, cmap=cmap) pl.legend(bbox_to_anchor=(2, 1)) pl.title('ground truth labels') pl.savefig(os.path.join('results', 'test_classification_gt.png')) # save predicted classes figure pl.figure() for entry in pl.unique(img_pred): colour = cmap(entry * 255 / (np.max(img_pred) - 0))
def get_unique_annotations(_annotations): if _annotations is not None: unique_annotations = pl.unique(_annotations) return unique_annotations[pl.where(unique_annotations > 0)]
def plot_fits_pdf(disease, prior, year, param_type_list, filename=''): '''Plot country fits''' dir = '/home/j/Project/dismod/dismod_status/prod/' mortality = pandas.read_csv('/homes/peterhm/gbd/dmco_mortality.csv') world = load_new_model(disease) # create list of countries to report country_list = pandas.read_csv('/snfs1/DATA/IHME_COUNTRY_CODES/IHME_COUNTRYCODES.CSV', index_col=None) country_list = country_list[country_list.ix[:,'ihme_indic_country'] == 1] country_list = list(pl.unique(country_list['iso3'])) country_list.remove('BMU') country_list.remove('HKG') country_list.remove('MAC') country_list.remove('PRI') # create list of countries order by number of data points, then alphabetical country_ordered = [] for country in country_list: country_ordered.append((country,len(world.input_data[world.input_data['area']==country]),len(world.get_data('p')[world.get_data('p')['area']==country]))) dtype = [('ISO3','S10'),('pts',int),('p',int)] country_ordered = pl.array(country_ordered, dtype=dtype) country_ordered = list(pl.sort(country_ordered,order=['pts','p','ISO3'])) country_ordered.reverse() pp = PdfPages(dir + '/dm-%s/image/%s_w_prior_%s_%s.pdf'%(disease, prior, year, filename)) for c,country in enumerate(country_ordered): country = country[0] pl.figure(c, figsize=(len(param_type_list)*4,8)) for s,sex in enumerate(['male', 'female']): model = load_new_model(disease, country, sex) model.keep(start_year=year-2) model.keep(end_year=year+2) add_data(model, mortality, country, sex, year) for j,data_type in enumerate(param_type_list): pl.subplot(2,len(param_type_list),(j+1)+(s*len(param_type_list))) if (data_type == 'm_with') | (data_type == 'm_all'): dismod3.graphics.plot_data_bars(model.get_data('m_all'), color='grey', label='m_all') # get estimates else: #(data_type != 'm_with') | (data_type != 'm_all'): est = pandas.read_csv(dir+'dm-%s/posterior/dm-%s-%s-%s-%s-%s.csv' % (disease, disease, full_name[data_type], country, sex, year),index_col=None) est = est.filter(like='Draw') gbd_est = get_emp(prior, data_type, country, sex, year) find_fnrfx(model, prior, data_type, country, sex, year) ymax = 0. if max(est.mean(1)) > ymax: ymax = max(est.mean(1)) if max(gbd_est.mean(1)) > ymax: ymax = max(gbd_est.mean(1)) # plotting df = model.input_data if sex == 'male': #shift all so male is zero map_func = {'male': 0, 'total': -.5, 'female': -1} if sex == 'female': #shift all so female is zero map_func = {'male': 1, 'total': .5, 'female': 0} model.get_data(data_type)['value'] = model.get_data(data_type)['value'] * pl.exp(-model.parameters[data_type]['fixed_effects']['x_sex']['mu'] * df[df['data_type']==data_type]['sex'].map(map_func).mean()) dismod3.graphics.plot_data_bars(df[df['data_type']==data_type]) pl.plot(pl.array(est.mean(1)), 'k-', label='DM-CO') pl.plot(pl.array(gbd_est.mean(1)), 'r-', label='GBD2010') pl.plot(mc.utils.hpd(pl.array(gbd_est).T, .05), 'r:') pl.plot(mc.utils.hpd(pl.array(est).T, .05), 'k:') pl.axis([-5, 105, -ymax*.05, ymax*1.1]) pl.title(country +' '+ data_type +' '+ sex +' '+ str(year) ) if sex == 'male': pl.legend(loc=(.25,1.145)) pl.subplots_adjust(top=.83, bottom=.07) pp.savefig(c) pl.clf() pp.close()
for counter in counters: fieldnames = ['time'] fieldnames.extend(authornames) fid = open('../gitstats/' + counter.replace(' ', '_') + '_by_author.dat', 'r') reader = csv.DictReader(fid, fieldnames=fieldnames, delimiter=' ') fields = [] for row in reader: fields.append(list(map(np.int, row.values()))) fid.close() fields = np.array(fields) fieldnames = list(row.keys()) authorfields = {} for author in pylab.unique(authoralias.values()): af = [] for i in range(1, np.size(fieldnames)): if fieldnames[i] in authoralias: if authoralias[fieldnames[i]] == author: af.append(i) authorfields[author] = af print(authorfields) institutefields = {} for institute in pylab.unique(authorinstitute.values()): #print institute af = [] for i in range(0, np.size(fieldnames)): if fieldnames[i] == 'time': continue
def build_model(vm, force_recomp=False): ''' Builds the model, if needed. Tries to reload if it can ''' logmsg('\n\nRequested: Build Model') if not force_recomp and not vm.isDirty: logmsg('The model is clean and is not forced to recompute') return True cm = vm.hs.cm # Delete old index and resample chips to index vm.delete_model() vm.sample_train_set() # Try to load the correct model if not force_recomp and vm.load_model(): logmsg('Loaded saved model from disk') return logmsg('Building the model. This may take some time.') # Could not load old model. Do full rebuild # ----- # STEP 1 - Loading logdbg('Step 1: Aggregate the model support (Load feature vectors) ---') tx2_cx = vm.get_train_cx() tx2_cid = vm.get_train_cid() assert len(tx2_cx) > 0, 'Training set cannot be np.empty' logdbg('Building model with %d sample chips' % (vm.num_train())) cm.load_features(tx2_cx) tx2_nfpts = cm.cx2_nfpts(tx2_cx) num_train_keypoints = sum(tx2_nfpts) # ----- # STEP 2 - Aggregating logdbg('Step 2: Build the model Words') isTFIDF = False if vm.hs.am.algo_prefs.model.quantizer == 'naive_bayes': logdbg('No Quantization. Aggregating all fdscriptors for nearest neighbor search.') vm.wx2_fdsc = np.empty((num_train_keypoints,128),dtype=np.uint8) _p = 0 for cx in tx2_cx: nfdsc = cm.cx2_nfpts(cx) vm.wx2_fdsc[_p:_p+nfdsc,:] = cm.cx2_fdsc[cx] _p += nfdsc ax2_wx = np.array(range(0,num_train_keypoints),dtype=np.uint32) if vm.hs.am.algo_prefs.model.quantizer == 'akmeans': raise NotImplementedError(':)') # ----- # STEP 3 - Inverted Indexing logdbg('Step 3: Point the parts of the model back to their source') vm.wx2_axs = np.empty(vm.wx2_fdsc.shape[0], dtype=object) for ax in xrange(0,num_train_keypoints): if vm.wx2_axs[ax] is None: vm.wx2_axs[ax] = [] wx = ax2_wx[ax] vm.wx2_axs[wx].append(ax) vm.ax2_cid = -np.ones(num_train_keypoints,dtype=np.int32) vm.ax2_fx = -np.ones(num_train_keypoints,dtype=np.int32) ax2_tx = -np.ones(num_train_keypoints,dtype=np.int32) curr_fx = 0; next_fx = 0 for tx in xrange(vm.num_train()): nfpts = tx2_nfpts[tx] next_fx = next_fx + nfpts ax_range = range(curr_fx,next_fx) ax2_tx[ax_range] = tx vm.ax2_cid[ax_range] = tx2_cid[tx] # Point to Inst vm.ax2_fx[ax_range] = range(nfpts) # Point to Kpts curr_fx = curr_fx + nfpts if isTFIDF: # Compute info for TF-IDF logdbg('Computing TF-IDF metadata') max_tx = len(tx2_cx) tx2_wtf_denom = np.float32(cm.cx2_nfpts(tx2_cx)) vm.wx2_maxtf = map(lambda ax_of_wx:\ max( np.float32(bincount(ax2_tx[ax_of_wx], minlength=max_tx)) / tx2_wtf_denom ), vm.wx2_axs) vm.wx2_idf = np.log2(map(lambda ax_of_wx:\ vm.num_train()/len(pylab.unique(ax2_tx[ax_of_wx])),\ vm.wx2_axs)+eps(1)) logdbg('Built Model using %d feature vectors. Preparing to index.' % len(vm.ax2_cid)) # ----- # STEP 4 - Indexing logdbg('Step 4: Building FLANN Index: over '+str(len(vm.wx2_fdsc))+' words') assert vm.flann is None, 'Flann already exists' vm.flann = FLANN() flann_param_dict = vm.hs.am.algo_prefs.model.indexer.to_dict() flann_params = vm.flann.build_index(vm.wx2_fdsc, **flann_param_dict) vm.isDirty = False vm.save_model() logmsg('The model was built.')
def loadData(folder, islands, dataFrom): #%% Load data from files if islands > 1: ind_gens_isl = [] # individuals data for islands ind_cands_isl = [] ind_fits_isl = [] ind_cs_isl = [] stat_gens_isl = [] # statistics.csv for islands stat_worstfits_isl = [] stat_bestfits_isl = [] stat_avgfits_isl = [] stat_stdfits_isl = [] fits_sort_isl = [] #sorted data gens_sort_isl = [] cands_sort_isl = [] params_sort_isl = [] for island in range(islands): ind_gens = [] # individuals data ind_cands = [] ind_fits = [] ind_cs = [] eval_gens = [] # error files for each evaluation eval_cands = [] eval_fits = [] eval_params = [] stat_gens = [] # statistics.csv stat_worstfits = [] stat_bestfits = [] stat_avgfits = [] stat_stdfits = [] if islands > 0: folderFinal = folder + "_island_" + str(island) else: folderFinal = folder with open('../data/%s/individuals.csv' % (folderFinal)) as f: # read individuals.csv reader = csv.reader(f) for row in reader: ind_gens.append(int(row[0])) ind_cands.append(int(row[1])) ind_fits.append(float(row[2])) cs = [ float(row[i].replace("[", "").replace("]", "")) for i in range(3, len(row)) ] ind_cs.append(cs) with open('../data/%s/statistics.csv' % (folderFinal)) as f: # read statistics.csv reader = csv.reader(f) for row in reader: stat_gens.append(float(row[0])) stat_worstfits.append(float(row[2])) stat_bestfits.append(float(row[3])) stat_avgfits.append(float(row[4])) stat_stdfits.append(float(row[6])) # unique generation number (sometimes repeated due to rerunning in hpc) stat_gens, stat_gens_indices = unique(stat_gens, 1) # unique individuals stat_worstfits, stat_bestfits, stat_avgfits, stat_stdfits = zip(*[[ stat_worstfits[i], stat_bestfits[i], stat_avgfits[i], stat_stdfits[i] ] for i in stat_gens_indices]) if dataFrom == 'fitness': for igen in range( max(ind_gens)): # read error files from evaluations for ican in range(max(ind_cands)): try: f = open('../data/%s/gen_%d_cand_%d_error' % (folderFinal, igen, ican)) eval_fits.append(pickle.load(f)) f = open('../data/%s/gen_%d_cand_%d_params' % (folderFinal, igen, ican)) eval_params.append(pickle.load(f)) eval_gens.append(igen) eval_cands.append(ican) except: pass #eval_fits.append(0.15) #eval_params.append([]) # find x corresponding to smallest error from function evaluations if dataFrom == 'fitness': #fits_sort, fits_sort_indices, fits_sort_origind = unique(eval_fits, True, True) fits_sort_indices = sorted(range(len(eval_fits)), key=lambda k: eval_fits[k]) fits_sort = [eval_fits[i] for i in fits_sort_indices] gens_sort = [eval_gens[i] for i in fits_sort_indices] cands_sort = [eval_cands[i] for i in fits_sort_indices] params_sort = [eval_params[i] for i in fits_sort_indices] # find x corresponding to smallest error from individuals file elif dataFrom == 'individuals': params_unique, unique_indices = uniqueList( ind_cs) # unique individuals fits_unique = [ind_fits[i] for i in unique_indices] gens_unique = [ind_gens[i] for i in unique_indices] cands_unique = [ind_cands[i] for i in unique_indices] sort_indices = sorted(range(len(fits_unique)), key=lambda k: fits_unique[k]) # sort fits fits_sort = [fits_unique[i] for i in sort_indices] gens_sort = [gens_unique[i] for i in sort_indices] cands_sort = [cands_unique[i] for i in sort_indices] params_sort = [params_unique[i] for i in sort_indices] # if multiple islands, save data for each if islands > 1: ind_gens_isl.append(ind_gens) # individuals data for islands ind_cands_isl.append(ind_cands) ind_fits_isl.append(ind_fits) ind_cs_isl.append(ind_cs) stat_gens_isl.append(stat_gens) # statistics.csv for islands stat_worstfits_isl.append(stat_worstfits) stat_bestfits_isl.append(stat_bestfits) stat_avgfits_isl.append(stat_avgfits) stat_stdfits_isl.append(stat_stdfits) fits_sort_isl.append(fits_sort) #sorted data gens_sort_isl.append(gens_sort) cands_sort_isl.append(cands_sort) params_sort_isl.append(params_sort) if islands > 1: return ind_gens_isl, ind_cands_isl, ind_fits_isl, ind_cs_isl, stat_gens_isl, \ stat_worstfits_isl, stat_bestfits_isl, stat_avgfits_isl, stat_stdfits_isl, \ fits_sort_isl, gens_sort_isl, cands_sort_isl, params_sort_isl
def test_load_datatype(): data_types = list(pl.unique(model2.input_data['data_type'])) assert data_types == [data_type]
thin = 10 # set font book_graphics.set_font() ### @export 'data' # TODO: migrate data into a csv, load with pandas dm = dismod3.load_disease_model(15630) dm.calc_effective_sample_size(dm.data) some_data = ([d for d in dm.data if d['data_type'] == 'prevalence data' and d['sex'] == 'male' and 15 <= d['age_start'] < 20 and d['age_end'] == 99 and d['effective_sample_size'] > 1]) countries = pl.unique([s['region'] for s in some_data]) min_year = min([s['year_start'] for s in some_data]) max_year = max([s['year_end'] for s in some_data]) cy = ['%s-%d'%(s['region'], s['year_start']) for s in some_data] n = pl.array([s['effective_sample_size'] for s in some_data]) r = pl.array([dm.value_per_1(s) for s in some_data]) s = pl.sqrt(r * (1-r) / n) ### @export 'binomial-model' pi = mc.Uniform('pi', lower=0, upper=1, value=.5) @mc.potential def obs(pi=pi):
def evaluate_model(mod, comment='', data_fname='missing_noisy_data.csv', truth_fname='data.csv'): """ Run specified model on existing data (data.csv / missing_noisy_data.csv) and save results in dev_log.csv Existing models: %s """ % data_run_models if mod not in data_run_models.split(' '): raise TypeError, 'Unrecognized model "%s"; must be one of %s' % ( mod, data_run_models) import model reload(model) print 'loading data' data = pl.csv2rec(data_fname) truth = pl.csv2rec(truth_fname) t0 = time.time() print 'generating model' mod_mc = eval('model.%s(data)' % mod) print 'fitting model with mcmc' mod_mc.sample(10000, 5000, 50, verbose=1) t1 = time.time() print 'summarizing results' import graphics reload(graphics) pl.figure(figsize=(22, 17), dpi=300) pl.clf() graphics.plot_all_predictions_over_time(data, mod_mc.predicted, more_data=truth) data_stats = mod_mc.data_predicted.stats() i_out = [i for i in range(len(data)) if pl.isnan(data.y[i])] rmse_abs_out = pl.rms_flat(truth.y[i_out] - data_stats['mean'][i_out]) rmse_rel_out = 100 * pl.rms_flat(1. - data_stats['mean'][i_out] / truth.y[i_out]) i_in = [i for i in range(len(data)) if not pl.isnan(data.y[i])] rmse_abs_in = pl.rms_flat(truth.y[i_in] - data_stats['mean'][i_in]) rmse_rel_in = 100 * pl.rms_flat(1. - data_stats['mean'][i_in] / truth.y[i_in]) param_stats = mod_mc.param_predicted.stats() coverage = 100 * pl.sum( (truth.y[i_out] >= param_stats['95% HPD interval'][i_out, 0]) & (truth.y[i_out] <= param_stats['95% HPD interval'][i_out, 1])) / float( len(i_out)) import md5 data_hash = md5.md5(data).hexdigest() results = [ mod, t1 - t0, rmse_abs_out, rmse_rel_out, rmse_abs_in, rmse_rel_in, coverage, len(data), len(pl.unique(data.region)), len(pl.unique(data.country)), len(pl.unique(data.year)), len(pl.unique(data.age)), data_hash, t0, comment ] print '%s: time: %.0fs out-of-samp rmse abs=%.1f rel=%.0f in-samp rmse abs=%.1f rel=%.0f coverage=%.0f\ndata: %d rows; %d regions, %d countries %d years %d ages [data hash: %s]\n(run conducted at %f)\n%s' % tuple( results) pl.savefig('/home/j/Project/Models/space-time-smoothing/images/%s.png' % t0) # FIXME: don't hardcode path for saving images import csv f = open('dev_log.csv', 'a') f_csv = csv.writer(f) f_csv.writerow(results) f.close() return mod_mc
def convert_MAPGPS_TEC(ms_name,mad_data_file,ref_time,ref_start,ref_end,plot_vla_tec,im_name): """ ## ============================================================================= ## ## This opens the MAPGPS Data table and selects a subset of TEC/DTEC values ## within a 15 deg square of the VLA. This then plots the zenith TEC/DTEC at the ## VLA site and makes the TEC map for use at the C++ level. We chose to deal ## with the MAPGPS data in this separate fashion because there are large ## 'gaps' in the data where no TEC/DTEC values exist. Consequently, we use the ## filled in CASA table to produce a TEC map and can not simply ## concatenate arrays. ## ## ============================================================================= ## ## Inputs: ## ms_name type = string Name of the measurement set for which to ## acquire TEC/DTEC data ## mad_data_file type = string Name of the MAPGPS TEC/DTEC data table ## ref_time type = float Reference time (s) for setting the ## coordinates, UT 0 on the first day ## plot_vla_tec type = boolean When True, this will open a plot of the ## interpolated TEC/DTEC at the VLA. ## im_name type = string Name of the output TEC Map optionally ## specified by the user ## ## Returns: ## Opens a plot showing the zenith TEC/DTEC at the VLA (if plot_vla_tec=True) ## and the name of the CASA image file containing the TEC map. ## ## ============================================================================= """ ## Only retrieve data in a 15x15 deg. patch centered (more or less) at the VLA tb.open(mad_data_file+'.tab') st0=tb.query('GDLAT>19 && GDLAT<49 && GLON>-122 && GLON<-92', ## If you want ALL the data to make a global map, use the line below: #st0=tb.query('GDLAT>-90. && GDLAT<90. && GLON>-180. && GLON<180', name='tecwindow') utimes=pylab.unique(st0.getcol('UT1_UNIX')) ulat=pylab.unique(st0.getcol('GDLAT')) ulong=pylab.unique(st0.getcol('GLON')) points_lat=len(ulat) points_long=len(ulong) num_maps=len(utimes) ## Initialize the array which will be used to make the image tec_array=pylab.zeros((2,points_long,points_lat,num_maps),dtype=float) minlat=min(ulat) minlong=min(ulong) print 'rows',len(utimes) itime=0 for t in utimes: st1=st0.query('UT1_UNIX=='+str(t),name='bytime') n=st1.nrows() if itime%100==0: print itime, n ilong=st1.getcol('GLON')-minlong ilat=st1.getcol('GDLAT')-minlat itec=st1.getcol('TEC') idtec=st1.getcol('DTEC') for i in range(n): tec_array[0,int(ilong[i]),int(ilat[i]),itime]=itec[i] tec_array[1,int(ilong[i]),int(ilat[i]),itime]=idtec[i] st1.close() ## Simply interpolate to cull as many zeros as possible ## (median of good neighbors, if at least four of them) thistec_array=tec_array[:,:,:,itime].copy() thisgood=thistec_array[0]>0.0 for i in range(1,points_long-1): for j in range(1,points_lat-1): if not thisgood[i,j]: mask=thisgood[(i-1):(i+2),(j-1):(j+2)] if pylab.sum(mask)>4: #print itime, i,j, pylab.sum(mask) tec_array[0,i,j,itime]=pylab.median(thistec_array[0,(i-1):(i+2),(j-1):(j+2)][mask]) tec_array[1,i,j,itime]=pylab.median(thistec_array[1,(i-1):(i+2),(j-1):(j+2)][mask]) itime+=1 st0.close() tb.close() ztec_value(-107.6184,34.0790,points_long,points_lat,minlong,minlat,1,\ 1,5,ref_start,ref_end,int(num_maps),tec_array,plot_vla_tec) ## ref_time + 150 accounts for the fact that the MAPGPS map starts at 00:02:30 UT, not 00:00:00 UT if im_name == '': prefix = ms_name else: prefix = im_name CASA_image = make_image(prefix,minlong,minlat,ref_time+150.0,1,1,5*60,tec_array[0],'MAPGPS',appendix = '.MAPGPS_TEC') return CASA_image
for counter in counters: fieldnames=['time'] fieldnames.extend(authornames) fid=open('../gitstats/' + counter.replace(' ','_') + '_by_author.dat','rb') reader=csv.DictReader(fid,fieldnames=fieldnames,delimiter=' ') fields=[] for row in reader: fields.append(map(numpy.int,row.values())) fid.close() fields=numpy.array(fields) fieldnames=row.keys() authorfields={} for author in pylab.unique(authoralias.values()): #print author af=[] for i in range(1,numpy.size(fieldnames)): if authoralias.has_key(fieldnames[i]): if authoralias[fieldnames[i]]==author: af.append(i) authorfields[author]=af institutefields={} for institute in pylab.unique(authorinstitute.values()): #print institute af=[] for i in range(0,numpy.size(fieldnames)): if fieldnames[i]=='time': continue
import pylab as pl from scipy.spatial import ConvexHull # On the 2-Sphere, the Voronoi tesselation is equivalent to the convex hull projected on the sphere # (Sugihara, Journal for Geometry and Graphics Volume 6 (2002), No. 1, 69-81.) # I assume that the same is true in 4D.... [This has to be checked!] R = 1.6180339887498949 #magic number by straley for 120 particles import sys polar = pl.load(sys.argv[1]) from spheretools import * cartesian = convert(polar, R) CHull = ConvexHull(cartesian) with open("bonds.txt", 'w') as fw: for p in range(cartesian.shape[0]): # print p which_simplex, position = pl.where(CHull.simplices == p) # print which_simplex all_neighs = pl.unique(CHull.simplices[which_simplex].flatten()) # print "all_neighs",all_neighs index_of_p = pl.where(all_neighs == p) # print "p is at",index_of_p neighs = pl.delete(all_neighs, index_of_p) # print"neighs after ", neighs fw.write(str(len(neighs)) + " " + " ".join(map(str, neighs)) + "\n")
def interactive_initial_guess(comp_im, trace_im, comp_names): comp = pf.getdata(comp_im) trc = pf.getdata(trace_im) # reading in reference spectrum ref_lam = np.loadtxt('/Users/tomczak/pydir/vp_art/data/spectrum_'+comp_names[0]+'.dat')[:,0] ref_flux = np.zeros(len(ref_lam)) for comp0 in comp_names: compspecdat = np.loadtxt('/Users/tomczak/pydir/vp_art/data/spectrum_'+comp0+'.dat') ref_flux += compspecdat[:,1] ref_spec = np.array(zip(ref_lam, ref_flux)) # reading in reference spectrum's line list lines = [] for comp0 in comp_names: complinesdat = np.loadtxt('/Users/tomczak/pydir/vp_art/data/lines_'+comp0+'.dat')[:,0] lines += complinesdat.tolist() lines.sort() # extracting spectrum for the first fiber fibnums = pl.unique(trc) fibnums.sort() fibnums = fibnums[pl.find(fibnums > 0)] first_fiber_inds = pl.where(trc == fibnums[0]) first_fiber_spec = pl.zeros(max(first_fiber_inds[1]) + 1) for y,x in zip(first_fiber_inds[0], first_fiber_inds[1]): first_fiber_spec[x] += comp[y][x] xaxis = range(max(first_fiber_inds[1])+1) class compspec: def __init__(self, xaxis, flux, ref_spec, ref_lines): self.xaxis, self.flux = xaxis, flux self.ref_spec = ref_spec self.ref_lines = ref_lines self.soln_data = [] self.counter = 0 self.fig = pl.figure(figsize=(15, 7)) self.sp1 = self.fig.add_subplot(211) self.sp2 = self.fig.add_subplot(212) self.zooms = [] self.sp1.plot(self.xaxis, self.flux/max(self.flux)) self.sp1.set_ylim(-0.03, 1.2) self.sp1.set_title('ArcLamp Spectrum: x = identify line, i = skip line, z/a = zoom in/out, r = reset') self.sp1.set_xlabel('Pixel') self.sp2.plot(self.ref_spec[:,0], self.ref_spec[:,1]/max(self.ref_spec[:,1])) self.sp2.set_ylim(-0.03, 1.2) x0, x1, y0, y1 = self.sp2.axis() self.ytext = y0 + (y1-y0)*0.85 self.show_lines = [[self.sp2.axvline(self.ref_lines[0], color='r', lw=1, ls='--'), self.sp2.annotate(str(int(self.ref_lines[0]+0.5)), (self.ref_lines[0], self.ytext), rotation='vertical', size=11)]] self.sp2.set_title('Reference Spectrum') self.sp2.set_xlabel('Wavelength (Angstroms)') self.fig.subplots_adjust(hspace=0.6, right=0.95) self.fig.canvas.mpl_connect('key_press_event', self.key) pl.show() def key(self, event): ''' # x = identify line # i = skip to next line # z = zoom in # a = zoom out # r = reset ''' k = event.key x = event.xdata y = event.ydata sp = event.inaxes if k == 'x' and sp.get_subplotspec() == self.sp1.get_subplotspec(): self.sp1.axvline(x, color='r', lw=1) self.show_lines[self.counter][0].set_ls('-') self.counter += 1 if self.counter == len(self.ref_lines): self.sp1.text(0.35, 0.35, "\n Complete: close \n this window \n", transform=self.sp1.transAxes, size=22, bbox=dict(fc='w')) self.sp2.text(0.35, 0.35, "\n Complete: close \n this window \n", transform=self.sp2.transAxes, size=22, bbox=dict(fc='w')) pl.savetxt('initial_lambda_soln.dat', self.soln_data) else: self.soln_data.append([self.ref_lines[self.counter], x]) self.show_lines.append([self.sp2.axvline(self.ref_lines[self.counter], color='r', lw=1, ls='--'), self.sp2.annotate(str(int(self.ref_lines[self.counter]+0.5)), (self.ref_lines[self.counter], self.ytext), rotation='vertical', size=11)]) if k == 'i': self.show_lines[self.counter][0].set_lw(0) self.show_lines[self.counter][1].set_visible(False) self.counter += 1 if self.counter == len(self.ref_lines): self.sp1.text(0.35, 0.35, "\n Complete: close \n this window \n", transform=self.sp1.transAxes, size=22, bbox=dict(fc='w')) self.sp2.text(0.35, 0.35, "\n Complete: close \n this window \n", transform=self.sp2.transAxes, size=22, bbox=dict(fc='w')) pl.savetxt('initial_lambda_soln.dat', self.soln_data) else: self.show_lines.append([self.sp2.axvline(self.ref_lines[self.counter], color='r', lw=1, ls='--'), self.sp2.annotate(str(int(self.ref_lines[self.counter]+0.5)), (self.ref_lines[self.counter], self.ytext), rotation='vertical', size=11)]) if k == 'z': self.zooms.append([self.sp1.axis()[:2], self.sp2.axis()[:2]]) axis = sp.axis() dx = (axis[1] - axis[0])/10. sp.set_xlim(x-dx, x+dx) if k == 'a': if len(self.zooms): self.sp1.set_xlim(self.zooms[-1][0]) self.sp2.set_xlim(self.zooms[-1][1]) self.zooms.pop(-1) if k == 'r': self.fig.clf() self.soln_data = [] self.counter = 0 self.sp1 = self.fig.add_subplot(211) self.sp2 = self.fig.add_subplot(212) self.zooms = [] self.sp1.plot(self.xaxis, self.flux/max(self.flux)) self.sp1.set_ylim(-0.03, 1.2) self.sp1.set_title('ArcLamp Spectrum: x = identify line, i = skip line, z/a = zoom in/out, r = reset') self.sp1.set_xlabel('Pixel') self.sp2.plot(self.ref_spec[:,0], self.ref_spec[:,1]/max(self.ref_spec[:,1])) self.sp2.set_ylim(-0.03, 1.2) x0, x1, y0, y1 = self.sp2.axis() self.ytext = y0 + (y1-y0)*0.85 self.show_lines = [[self.sp2.axvline(self.ref_lines[0], color='r', lw=1, ls='--'), self.sp2.annotate(str(int(self.ref_lines[0]+0.5)), (self.ref_lines[0], self.ytext), rotation='vertical', size=11)]] self.sp2.set_title('Reference Spectrum') self.sp2.set_xlabel('Wavelength (Angstroms)') pl.draw() interactive_go = compspec(xaxis, first_fiber_spec, ref_spec, lines)
) Y = pandas.read_csv( "/home/j/Project/dismod/dismod_status/prod/dm-19807/posterior/dm-19807-prevalence-north_africa_middle_east-male-2005.csv", index_col=None, ) import pylab as pl def weighted_age(df): return (df.filter(like="Draw").T * df["Population"] / df["Population"].sum()).T.sum() pl.figure() for iso in list(pl.unique(X["Iso3"])): pl.plot(X[X["Iso3"] == iso].filter(like="Draw").mean(1).__array__(), label=iso) pl.semilogy([1], [1]) Z = X.groupby("Age").apply(weighted_age) plot(Z.mean(1).__array__(), color="red", linewidth=3, alpha=0.5, label="Inconsistent NA/ME") pl.legend() pl.axis([-5, 130, 1e-6, 2]) pl.figure() for iso in list(pl.unique(Y["Iso3"])): pl.plot(Y[(Y["Iso3"] == iso) & (Y["Rate type"] == "prevalence")].filter(like="Draw").mean(1).__array__(), label=iso) pl.semilogy([1], [1])
print SITE[i] url='http://gisweb.wh.whoi.edu:8080/dods/whoi/emolt_sensor?emolt_sensor.TIME_LOCAL&emolt_sensor.SITE=' dataset=open_url(url+'"'+SITE[i]+'"') var=dataset['emolt_sensor'] print 'hold on ... extracting your eMOLT mooring data' year_month_day = list(var.TIME_LOCAL) timelocal=[] for j in range(len(year_month_day)): timelocal.append(datetime.strptime(year_month_day[j],"%Y-%m-%d")) index = range(len(timelocal)) index.sort(lambda x, y:cmp(timelocal[x], timelocal[y])) timelocal = [timelocal[ii] for ii in index] print 'now generating a datetime' timepd=pd.DataFrame(range(len(timelocal)),index=timelocal) timepd['Year']=timepd.index.year year=unique(timepd['Year']) monthall=[] if len(year)>=minyear: for k in range(len(year)): timemonth=timepd.ix[timepd.index.year==year[k]] timemonth=timemonth.resample('m',how=['count'],kind='period') timemonth=timemonth.ix[timemonth[0,'count']>minhour*minday] month=unique(timemonth.index.month) print year[k],month # f.write(str(SITE[i])+','+str(year[k])+','+str(month)+'\n') monthall.append(month) common=[] for jj in range(1,13): num=0 for kk in range(len(monthall)): if jj in monthall[kk]:
temp = [temp[i] for i in index] depth = [depth[i] for i in index] salt=[salt[i] for i in index] print 'Delimiting mooring data according to user-specified time' part_t,part_time,part_salt,distinct_dep= [],[],[],[] start_time = input_time[0] end_time = input_time[1] print start_time, end_time for i in range(len(temp)): if (start_time <= datet[i] <= end_time) & (dep[0]<=depth[i]<= dep[1]): part_t.append(temp[i]) part_time.append(datet[i]) part_salt.append(salt[i]) distinct_dep.append(unique(depth)) obs_temp=part_t obs_dt=part_time obs_salt=part_salt obs_dtindex=[] if intend_to=='temp': for kk in range(len(obs_temp)): obs_temp[kk]=f2c(obs_temp[kk]) # converts to Celcius obs_dtindex.append(datetime.strptime(str(obs_dt[kk])[:19],'%Y-%m-%d %H:%M:%S')) obstso=pd.DataFrame(obs_temp,index=obs_dtindex) else: for kk in range(len(obs_salt)): obs_dtindex.append(datetime.strptime(str(obs_dt[kk])[:19],'%Y-%m-%d %H:%M:%S')) obstso=pd.DataFrame(obs_salt,index=obs_dtindex) print 'obs Dataframe is ready'
def plot_fits_pdf(disease, prior, year, param_type_list, filename=''): '''Plot country fits''' dir = '/home/j/Project/dismod/dismod_status/prod/' mortality = pandas.read_csv('/homes/peterhm/gbd/dmco_mortality.csv') world = load_new_model(disease) # create list of countries to report country_list = pandas.read_csv( '/snfs1/DATA/IHME_COUNTRY_CODES/IHME_COUNTRYCODES.CSV', index_col=None) country_list = country_list[country_list.ix[:, 'ihme_indic_country'] == 1] country_list = list(pl.unique(country_list['iso3'])) country_list.remove('BMU') country_list.remove('HKG') country_list.remove('MAC') country_list.remove('PRI') # create list of countries order by number of data points, then alphabetical country_ordered = [] for country in country_list: country_ordered.append( (country, len(world.input_data[world.input_data['area'] == country]), len(world.get_data('p')[world.get_data('p')['area'] == country]))) dtype = [('ISO3', 'S10'), ('pts', int), ('p', int)] country_ordered = pl.array(country_ordered, dtype=dtype) country_ordered = list(pl.sort(country_ordered, order=['pts', 'p', 'ISO3'])) country_ordered.reverse() pp = PdfPages(dir + '/dm-%s/image/%s_w_prior_%s_%s.pdf' % (disease, prior, year, filename)) for c, country in enumerate(country_ordered): country = country[0] pl.figure(c, figsize=(len(param_type_list) * 4, 8)) for s, sex in enumerate(['male', 'female']): model = load_new_model(disease, country, sex) model.keep(start_year=year - 2) model.keep(end_year=year + 2) add_data(model, mortality, country, sex, year) for j, data_type in enumerate(param_type_list): pl.subplot(2, len(param_type_list), (j + 1) + (s * len(param_type_list))) if (data_type == 'm_with') | (data_type == 'm_all'): dismod3.graphics.plot_data_bars(model.get_data('m_all'), color='grey', label='m_all') # get estimates else: #(data_type != 'm_with') | (data_type != 'm_all'): est = pandas.read_csv( dir + 'dm-%s/posterior/dm-%s-%s-%s-%s-%s.csv' % (disease, disease, full_name[data_type], country, sex, year), index_col=None) est = est.filter(like='Draw') gbd_est = get_emp(prior, data_type, country, sex, year) find_fnrfx(model, prior, data_type, country, sex, year) ymax = 0. if max(est.mean(1)) > ymax: ymax = max(est.mean(1)) if max(gbd_est.mean(1)) > ymax: ymax = max(gbd_est.mean(1)) # plotting df = model.input_data if sex == 'male': #shift all so male is zero map_func = {'male': 0, 'total': -.5, 'female': -1} if sex == 'female': #shift all so female is zero map_func = {'male': 1, 'total': .5, 'female': 0} model.get_data(data_type)['value'] = model.get_data( data_type)['value'] * pl.exp( -model.parameters[data_type]['fixed_effects'] ['x_sex']['mu'] * df[df['data_type'] == data_type] ['sex'].map(map_func).mean()) dismod3.graphics.plot_data_bars( df[df['data_type'] == data_type]) pl.plot(pl.array(est.mean(1)), 'k-', label='DM-CO') pl.plot(pl.array(gbd_est.mean(1)), 'r-', label='GBD2010') pl.plot(mc.utils.hpd(pl.array(gbd_est).T, .05), 'r:') pl.plot(mc.utils.hpd(pl.array(est).T, .05), 'k:') pl.axis([-5, 105, -ymax * .05, ymax * 1.1]) pl.title(country + ' ' + data_type + ' ' + sex + ' ' + str(year)) if sex == 'male': pl.legend(loc=(.25, 1.145)) pl.subplots_adjust(top=.83, bottom=.07) pp.savefig(c) pl.clf() pp.close()
# Calculate the time range, if not given. delta_t = delta_t * CPU_CLOCK if delta_t == 0: dt = toc_step - tic_step if dt > delta_t: delta_t = dt print "Data range: ", delta_t / CPU_CLOCK, "ms" # Once more doing the real gather and plots this time. start_t = float(tic_step) tics -= tic_step tocs -= tic_step end_t = (toc_step - start_t) / CPU_CLOCK # Get all "task" names and assign colours. TASKTYPES = pl.unique(funcs) print TASKTYPES # Set colours of task/subtype. TASKCOLOURS = {} ncolours = 0 for task in TASKTYPES: TASKCOLOURS[task] = colours[ncolours] ncolours = (ncolours + 1) % maxcolours # For fiddling with colours... if args.verbose: print "#Selected colours:" for task in sorted(TASKCOLOURS.keys()): print "# " + task + ": " + TASKCOLOURS[task] for task in sorted(SUBCOLOURS.keys()):