Esempio n. 1
0
def evaluate_model(mod, comment='', data_fname='missing_noisy_data.csv', truth_fname='data.csv'):
    """ Run specified model on existing data (data.csv / missing_noisy_data.csv) and save results in dev_log.csv
    Existing models: %s """ % data_run_models
    if mod not in data_run_models.split(' '):
        raise TypeError, 'Unrecognized model "%s"; must be one of %s' % (mod, data_run_models)

    import model
    reload(model)

    print 'loading data'
    data = pl.csv2rec(data_fname)
    truth = pl.csv2rec(truth_fname)
    
    t0 = time.time()
    print 'generating model'
    mod_mc = eval('model.%s(data)' % mod)

    print 'fitting model with mcmc'
    mod_mc.sample(10000, 5000, 50, verbose=1)
    t1 = time.time()

    print 'summarizing results'

    import graphics
    reload(graphics)
    pl.figure(figsize=(22, 17), dpi=300)
    pl.clf()
    graphics.plot_all_predictions_over_time(data, mod_mc.predicted, more_data=truth)

    data_stats = mod_mc.data_predicted.stats()
    i_out = [i for i in range(len(data)) if pl.isnan(data.y[i])]
    rmse_abs_out = pl.rms_flat(truth.y[i_out] - data_stats['mean'][i_out])
    rmse_rel_out = 100*pl.rms_flat(1. - data_stats['mean'][i_out]/truth.y[i_out])

    i_in = [i for i in range(len(data)) if not pl.isnan(data.y[i])]
    rmse_abs_in = pl.rms_flat(truth.y[i_in] - data_stats['mean'][i_in])
    rmse_rel_in = 100*pl.rms_flat(1. - data_stats['mean'][i_in]/truth.y[i_in])

    param_stats = mod_mc.param_predicted.stats()
    coverage = 100*pl.sum((truth.y[i_out] >= param_stats['95% HPD interval'][i_out, 0]) & (truth.y[i_out] <= param_stats['95% HPD interval'][i_out, 1])) / float(len(i_out))

    import md5
    data_hash = md5.md5(data).hexdigest()
    results = [mod, t1-t0, rmse_abs_out, rmse_rel_out, rmse_abs_in, rmse_rel_in, coverage,
               len(data), len(pl.unique(data.region)), len(pl.unique(data.country)), len(pl.unique(data.year)), len(pl.unique(data.age)), data_hash,
               t0, comment]
    print '%s: time: %.0fs out-of-samp rmse abs=%.1f rel=%.0f in-samp rmse abs=%.1f rel=%.0f coverage=%.0f\ndata: %d rows; %d regions, %d countries %d years %d ages [data hash: %s]\n(run conducted at %f)\n%s' % tuple(results)

    pl.savefig('/home/j/Project/Models/space-time-smoothing/images/%s.png' % t0)  # FIXME: don't hardcode path for saving images

    import csv
    f = open('dev_log.csv', 'a')
    f_csv = csv.writer(f)
    f_csv.writerow(results)
    f.close()

    return mod_mc
Esempio n. 2
0
def probeData(settings):

    print "Probing data", settings.fileName

    samplesPerCode = int(
        round(settings.samplingFreq /
              (settings.codeFreqBasis / settings.codeLength)))

    samples = getSamples.int8(settings.fileName, 10 * samplesPerCode,
                              settings.skipNumberOfBytes)

    #Initialize figure
    fig = pylab.figure()
    pylab.clf()

    #X axis
    timeScale = [x*(1/settings.samplingFreq) for x in \
                 range(0,int(round((5e-3 + 1/settings.samplingFreq)*settings.samplingFreq)))]
    #Time domain plot
    pylab.subplot(2, 2, 1)
    plot_max = int(round(samplesPerCode / 50))
    pylab.plot([1000 * i for i in timeScale[0:plot_max]], samples[0:plot_max])
    pylab.title('Time domain plot')
    pylab.xlabel('Time (ms)')
    pylab.ylabel('Amplitude')

    #Frequency domain plot
    (Pxx,freqs) = matplotlib.mlab.psd(x = samples-numpy.mean(samples),\
                                                      noverlap = 1024,\
                                                          NFFT = 2048,\
                                       Fs = settings.samplingFreq/1e6)
    pylab.subplot(2, 2, 2)
    pylab.semilogy(freqs, Pxx)
    pylab.title('Frequency Domain Plot')
    pylab.xlabel('Frequency (MHz)')
    pylab.ylabel('Magnitude')

    #Histogram
    pylab.subplot(2, 2, 3)
    xticks = pylab.unique(samples)
    pylab.hist(samples, len(xticks))
    axis = pylab.axis()
    pylab.axis([min(samples), max(samples), axis[2], axis[3]])
    xticks = pylab.unique(pylab.round_(xticks))
    pylab.xticks(xticks)
    pylab.title('Histogram')

    return fig
Esempio n. 3
0
def scanbystate(vis,undo=False):

    mytb=taskinit.tbtool()

    mytb.open(vis,nomodify=False)
    scans=mytb.getcol('SCAN_NUMBER')
    states=mytb.getcol('STATE_ID')
    print 'Unique STATE_IDs = ',str(pl.unique(states))
    maxstate=states.max()

    if undo:
        d=10**int(floor(log10(scans.min())))
        if d<10:
            mytb.close()
            raise Exception, 'Apparently, nothing to undo'
        scans-=states
        scans/=d
        print 'New SCAN_NUMBER = (SCAN_NUMBER - STATE_ID) / '+str(d)
    else:
        m=10**int(floor(log10(states.max())+1.0))
        scans*=m
        scans+=states
        print 'New SCAN_NUMBER = SCAN_NUMBER * '+str(m)+' + STATE_ID'

    mytb.putcol('SCAN_NUMBER',scans)
    mytb.close()
    def addDataVectorAccessor(self, data_vector_accessor):
        self.__data_vectors_accessors__.append(data_vector_accessor)

        _sum = pl.sum(data_vector_accessor.signal)
        _min = pl.amin(data_vector_accessor.signal)
        _max = pl.amax(data_vector_accessor.signal)

        if self.__minimal_signal__ == None:
            self.__minimal_signal__ = _sum
            self.__minimal_data_vector_accessor__ = data_vector_accessor

            self.__min_signal__ = _min
            self.__max_signal__ = _max

        if _sum < self.__minimal_signal__:
            self.__minimal_data_vector_accessor__ = data_vector_accessor
            self.__minimal_signal__ = _sum

        if _min < self.__min_signal__:
            self.__min_signal__ = _min

        if _max > self.__max_signal__:
            self.__max_signal__ = _max

        #collects unique annotations (>0) as a set
        if not data_vector_accessor.annotation == None:
            unique_annotations = pl.unique(data_vector_accessor.annotation[
                                pl.where(data_vector_accessor.annotation > 0)])
            if len(unique_annotations) > 0:
                #union of sets
                self.__unique_annotations__ |= set(unique_annotations)
Esempio n. 5
0
def scanbystate(vis, undo=False):

    mytb = taskinit.tbtool()

    mytb.open(vis, nomodify=False)
    scans = mytb.getcol('SCAN_NUMBER')
    states = mytb.getcol('STATE_ID')
    print 'Unique STATE_IDs = ', str(pl.unique(states))
    maxstate = states.max()

    if undo:
        d = 10**int(floor(log10(scans.min())))
        if d < 10:
            mytb.close()
            raise Exception, 'Apparently, nothing to undo'
        scans -= states
        scans /= d
        print 'New SCAN_NUMBER = (SCAN_NUMBER - STATE_ID) / ' + str(d)
    else:
        m = 10**int(floor(log10(states.max()) + 1.0))
        scans *= m
        scans += states
        print 'New SCAN_NUMBER = SCAN_NUMBER * ' + str(m) + ' + STATE_ID'

    mytb.putcol('SCAN_NUMBER', scans)
    mytb.close()
Esempio n. 6
0
def plotGroupSize(AllData):
  """
  run permutations based on group size
  """
  means = []
  subjects = range(len(AllData[1]['correct']))
  
  for i in subjects[1:]:
    print(subjects[1:])
    current_means = []
    perms = py.unique(list(it.combinations(subjects, i)))

    for j in range(len(perms)):
      #print(len(perms[j]))
      current = groupPercentCorrect(AllData, subjects, perms[j])
      
      current_means.append(current)
    group_mean = np.mean(current_means)
    means.append(group_mean)
  
  #print(means)
  fig = py.figure()
  ax10 = fig.add_subplot(111)
  ax10.plot(subjects[1:], means, 'bo', alpha=1)
  ax10.plot(subjects[1:], means, 'b', linewidth=3, alpha=0.2)
  ax10.set_ylim(-0.2,1.2)
  ax10.set_title('Group Size: Percent Correct')
  
  # check means of all members individually
  submeans = []
  for i in subjects:
    curmean = getIndMeans(AllData, subjects[i])
    submeans.append(curmean[0])
  
  print('Individual means: %.3f ' % np.mean(submeans))
Esempio n. 7
0
def AllDataDist(AllData):
  # 
  subjects = range(len(AllData[1]['correct']))
  matrix = py.zeros([len(AllData.keys()),len(subjects)])
  
  kcount = -1
  for k in AllData.keys():
    kcount += 1
    icount = 0
    while icount < len(subjects):
      matrix[kcount][icount] = \
                               AllData[k]['correct'][icount]
      icount += 1
  
  meanmean = []
  for i in subjects[1:]:
    # create combination list
    #print(subjects[1:])
    
    perms = py.unique(list(it.combinations(subjects, i)))

    for h in range(len(perms)):
      # for each combination, get the mean correct
      means = []
      for k in range(len(matrix[:][1])):
        # for each question...
        current = []
        for j in perms[h]:
          #print(perms[h])
          # get the correct for that subject, append
          current.append(matrix[k][j])
        # then take the mode
        #print(int(stats.mode(current)[0]))
        means.append( int(stats.mode(current)[0]) )
      #print(means)
    # append mean for each group size

    meanmean.append(np.mean(means))
  allsum = sum(sum(matrix))
  m, n = py.shape(matrix)
  print('Total mean is %.3f / %.3f = %.3f '
        % ( allsum, m*n, (allsum/(m*n))))
  
  subjects = subjects[1::2]
  meanmean = meanmean[1::2]
  if len(subjects) > len(meanmean):
    subjects=subjects[1:]
  elif len(subjects) < len(meanmean):
    meanmean = meanmean[1:]
  fig = py.figure()
  ax14 = fig.add_subplot(111)
  ax14.plot(subjects, meanmean, 'bo', alpha=1)
  ax14.plot(subjects, meanmean, 'b', linewidth=3, alpha=0.2)
  ax14.set_ylim(-0.2,1.2)
  ax14.set_title('Real Data Group Size: Percent Correct')
  ax14.set_xlabel('Group size')
  ax14.set_ylabel('% Correct')
  print(meanmean)
  return meanmean
Esempio n. 8
0
def probeData(settings):

  print "Probing data", settings.fileName

  samplesPerCode = int(round(settings.samplingFreq / (settings.codeFreqBasis / settings.codeLength)))
  
  samples = getSamples.int8(settings.fileName,10*samplesPerCode,settings.skipNumberOfBytes)
  
  #Initialize figure
  fig = pylab.figure()
  pylab.clf()
  
  #X axis
  timeScale = [x*(1/settings.samplingFreq) for x in \
               range(0,int(round((5e-3 + 1/settings.samplingFreq)*settings.samplingFreq)))]
  #Time domain plot
  pylab.subplot(2,2,1)
  plot_max = int(round(samplesPerCode/50))
  pylab.plot([1000*i for i in timeScale[0:plot_max]],samples[0:plot_max])
  pylab.title('Time domain plot')
  pylab.xlabel('Time (ms)')
  pylab.ylabel('Amplitude')
  
  #Frequency domain plot
  (Pxx,freqs) = matplotlib.mlab.psd(x = samples-numpy.mean(samples),\
                                                    noverlap = 1024,\
                                                        NFFT = 2048,\
                                     Fs = settings.samplingFreq/1e6)
  pylab.subplot(2,2,2)
  pylab.semilogy(freqs,Pxx)
  pylab.title('Frequency Domain Plot')
  pylab.xlabel('Frequency (MHz)')
  pylab.ylabel('Magnitude')
  
  #Histogram
  pylab.subplot(2,2,3)
  xticks = pylab.unique(samples)
  pylab.hist(samples,len(xticks))
  axis = pylab.axis()
  pylab.axis([min(samples),max(samples),axis[2],axis[3]])
  xticks = pylab.unique(pylab.round_(xticks))
  pylab.xticks(xticks)
  pylab.title('Histogram');
  
  return fig
Esempio n. 9
0
def randDist(AllData):
  # generate a group-size histo based on random data
  subjects = range(15)
  matrix = genRandMatrix(AllData, 15)
  #range(len(AllData[1]['correct']))
  
  meanmean = []
  for i in subjects[1:]:
    # create combination list
    #print(subjects[1:])
    
    perms = py.unique(list(it.combinations(subjects, i)))

    for h in range(len(perms)):
      # for each combination, get the mean correct
      means = []
      # print(len(matrix[:][1]))
      
      # change k for number of simulated questions
      k = 0
      while k < 20:
        #print(k)
        # for each question...
        current = []
        for j in perms[h]:
          #print(perms[h])
          # get the correct for that subject, append
          current.append(matrix[k][j])
        # then take the mode
        #print(int(stats.mode(current)[0]))
        means.append( int(stats.mode(current)[0]) )
        k += 1
      #print(means)
    # append mean for each group size

    meanmean.append(np.mean(means))
  allsum = sum(sum(matrix))
  m, n = py.shape(matrix)
  print('Total mean is %.3f / %.3f = %.3f '
        % ( allsum, m*n, (allsum/(m*n))))
  #print('subjects length %d , meanmean length %d ', 
         # % (len(subjects), len(meanmean)))
  subjects = subjects[1::2]
  meanmean = meanmean[1::2]
  fig = py.figure()
  ax13 = fig.add_subplot(111)
  ax13.plot(subjects, meanmean, 'bo', alpha=1)
  ax13.plot(subjects, meanmean, 'b', linewidth=3, alpha=0.2)
  ax13.set_ylim(-0.2,1.2)
  ax13.set_title('Random Group Size: Percent Correct')
  ax13.set_xlabel('Group size')
  ax13.set_ylabel('% Correct')
  print(meanmean)
  return meanmean
Esempio n. 10
0
 def pixSeedfillBinary(self, Imask, Iseed):
     Iseedfill = copy.deepcopy(Iseed)
     s = ones((3, 3))
     Ijmask, k = ndimage.label(Imask, s)
     Ijmask2 = Ijmask * Iseedfill
     A = list(unique(Ijmask2))
     A.remove(0)
     for i in range(0, len(A)):
         x, y = where(Ijmask == A[i])
         Iseedfill[x, y] = 1
     return Iseedfill
Esempio n. 11
0
def plot_all_predictions_over_time(data, predicted, cmap=pl.cm.spectral, alpha=1., more_data=None):
    """ Plot the predicted values for a specific country as a function of time for each age

    Parameters
    ----------
    data : data rec
    predicted : pymc trace
    additional optional parameters, to be described
    """
    for a in pl.unique(data.age):
        print 'plotting for age %s' % a
        plot_all_predictions_over_time_for_age(data, predicted, cmap=cmap, alpha=alpha, more_data=more_data, age=a)
def prior_m_area(dm3, model_num, data_type):
    # create 'm_sub'/'m_region' from unique input_data['area']
    prior_in = empty_prior_in(pl.unique(dm3.input_data['area']).index)
    prior_in['name'] = pl.unique(dm3.input_data['area'])
    prior_in['mean'] = 0.
    prior_in['std'] = 1.
    prior_in['lower'] = '-inf'
    prior_in['upper'] = 'inf'
    # create hierarchy
    model = mu.load_new_model(model_num, 'all', data_type)
    superregion = set(model.hierarchy.neighbors('all'))
    region = set(pl.flatten([model.hierarchy.neighbors(sr) for sr in model.hierarchy.neighbors('all')]))
    country = set(pl.flatten([[model.hierarchy.neighbors(r) for r in model.hierarchy.neighbors(sr)] for sr in model.hierarchy.neighbors('all')]))
    # create data area levels
    for i in pl.unique(dm3.input_data['area']).index:
        if dm3.input_data.ix[i,'area'] in country:
            prior_in.ix[i,'type'] = 'm_sub'
        elif dm3.input_data.ix[i,'area'] in region:
            prior_in.ix[i,'type'] = 'm_region'
        elif dm3.input_data.ix[i,'area'] in superregion:
            prior_in.ix[i,'type'] = 'm_super'
    return prior_in
Esempio n. 13
0
 def setAnnotationsButtons(self, _annotation):
     empty = is_empty(_annotation) or pl.sum(_annotation) == 0
     self.set_title(empty)
     if empty:
         self.reset()
     else:
         unique = list(pl.unique(_annotation))
         if len(unique) == self.buttons_count:
             self.setEnabledAnnotations(ALL_ANNOTATIONS)
         else:
             self.setEnabledAnnotations(unique)
             self.setUncheckNotAnnotations(unique)
         if self.isAllUnchecked():
             self.__action_button__.setChecked(False)
             self.__action_button__.setEnabled(False)
Esempio n. 14
0
 def computePerformance(self,idx=None,round_prec=4):
     if(idx==None):
         trials = self.trials
     else:
         trials = [trial for trial in self.trials if (trial.target_index==idx)];
     trial_types = sorted(pl.unique([round(trial.target_contrast,round_prec) for trial in trials]));
     scores = [[] for i in trial_types];
     for trial in trials:
         for i,trial_type in enumerate(trial_types):
             if(round(trial.target_contrast,round_prec)==trial_type):
                 scores[i].append(trial.score);
     ks = pl.array([sum(el) for el in scores]);
     ns = pl.array([len(el) for el in scores]);
     xs = trial_types;
     ps = ks/pl.double(ns);
     return pl.array([xs,ks,ns]);
def plot_each_country(axis_bounds=[.8, .99, 1.1, 3.]):
    years = range(1975, 2006)
    for i, c in enumerate(pl.unique(data.all.country)):
        pl.subplot(3, 4, i/12+1)
        pl.plot(data.all.hdi[data.all.country==c],
                data.all.tfr[data.all.country==c],
                linewidth=4, alpha=.8)
        pl.axis(axis_bounds)
    
    for r in range(3):
        for c in range(4):
            pl.subplot(3, 4, r*4+c+1)
            if r != 2:
                pl.xticks([])
            if c != 0:
                pl.yticks([])

    pl.subplots_adjust(.05, .05, .95, .95, 0, 0)
Esempio n. 16
0
 def epsilon_greedy_probability(self, state, action):
     q = self.get_q(state)
     if size(unique(q)) < self.env.get_num_actions():
         max_q = max(q)
         max_observations = 0
         for value in q:
             if value == max_q: max_observations += 1
         probabilities = zeros(size(q))
         for i in range(size(q)):
             if q[i] == max_q: probabilities[i] = ((1-self.epsilon) / max_observations) + \
                                                  (self.epsilon / self.env.get_num_actions())
             else: probabilities[i] = self.epsilon / self.env.get_num_actions()
         return probabilities[action]
     else:
         if action == argmax(q):
             return self.optimal_p
         else:
             return self.epsilon / self.env.get_num_actions()
Esempio n. 17
0
def plot_each_country(axis_bounds=[.8, .99, 1.1, 3.]):
    years = range(1975, 2006)
    for i, c in enumerate(pl.unique(data.all.country)):
        pl.subplot(3, 4, i / 12 + 1)
        pl.plot(data.all.hdi[data.all.country == c],
                data.all.tfr[data.all.country == c],
                linewidth=4,
                alpha=.8)
        pl.axis(axis_bounds)

    for r in range(3):
        for c in range(4):
            pl.subplot(3, 4, r * 4 + c + 1)
            if r != 2:
                pl.xticks([])
            if c != 0:
                pl.yticks([])

    pl.subplots_adjust(.05, .05, .95, .95, 0, 0)
Esempio n. 18
0
def astausgleich(ab2org, mn2org, rhoaorg):
    """shifts the branches of a dc sounding to generate a matching curve."""
    ab2 = P.asarray(ab2org)
    mn2 = P.asarray(mn2org)
    rhoa = P.asarray(rhoaorg)
    um = P.unique(mn2)
    for i in range(len(um) - 1):
        r0, r1 = [], []
        ac = P.intersect1d(ab2[mn2 == um[i]], ab2[mn2 == um[i + 1]])
        for a in ac:
            r0.append(rhoa[(ab2 == a) * (mn2 == um[i])][0])
            r1.append(rhoa[(ab2 == a) * (mn2 == um[i + 1])][0])

        if len(r0) > 0:
            fak = P.mean(P.array(r0) / P.array(r1))
            print(fak)
            if P.isfinite(fak) and fak > 0.:
                rhoa[mn2 == um[i + 1]] *= fak

    return rhoa  # formerly pg as vector
Esempio n. 19
0
def plotPETH():
    binsize = 20  # bin size in ms
    binedges = arange(0, s.duration + binsize, binsize)
    peth = []
    for ipop in unique(s.cellpops):
        hist, binedges = histogram(
            s.allspiketimes[array(
                [s.cellpops[int(i)] for i in s.allspikecells]) == ipop],
            binedges)
        peth.append(hist)
    figure()
    plot(array(peth).T)
    title('PETH (%d ms bins)' % binsize)
    xlabel('Time (ms)')
    ylabel('Spikes/bin')
    ylim(0, s.scale * binsize * 2)
    h = axes()
    h.set_xticks(range(0, len(binedges), len(binedges) / 10))
    h.set_xticklabels(binedges[0:-1:len(binedges) / 10].astype(int))
    legend(s.popnames)
Esempio n. 20
0
def astausgleich(ab2org, mn2org, rhoaorg):
    """shifts the branches of a dc sounding to generate a matching curve."""
    ab2 = P.asarray(ab2org)
    mn2 = P.asarray(mn2org)
    rhoa = P.asarray(rhoaorg)
    um = P.unique(mn2)
    for i in range(len(um) - 1):
        r0, r1 = [], []
        ac = P.intersect1d(ab2[mn2 == um[i]], ab2[mn2 == um[i + 1]])
        for a in ac:
            r0.append(rhoa[(ab2 == a) * (mn2 == um[i])][0])
            r1.append(rhoa[(ab2 == a) * (mn2 == um[i + 1])][0])

        if len(r0) > 0:
            fak = P.mean(P.array(r0) / P.array(r1))
            print(fak)
            if P.isfinite(fak) and fak > 0.:
                rhoa[mn2 == um[i + 1]] *= fak

    return rhoa  # formerly pg as vector
Esempio n. 21
0
def plot_all_predictions_over_time(data,
                                   predicted,
                                   cmap=pl.cm.spectral,
                                   alpha=1.,
                                   more_data=None):
    """ Plot the predicted values for a specific country as a function of time for each age

    Parameters
    ----------
    data : data rec
    predicted : pymc trace
    additional optional parameters, to be described
    """
    for a in pl.unique(data.age):
        print 'plotting for age %s' % a
        plot_all_predictions_over_time_for_age(data,
                                               predicted,
                                               cmap=cmap,
                                               alpha=alpha,
                                               more_data=more_data,
                                               age=a)
Esempio n. 22
0
def discreteRawPDF(data):
	"""
	Returns the raw (unbinned) PDF for the discrete data in 'data'.
	"""
	pdf = dict()
	support = numpy.array(pylab.unique(data))
	support.sort()
	pSupport = numpy.zeros(len(support))
	
	for s in support:
		pdf[s] = 0.0

	for d in data:
		pdf[d] = pdf[d] + 1.0

	for j in range(len(support)):
		pSupport[j] = pdf[support[j]]

	pSupport = pSupport/sum(pSupport)
	
	return support, pSupport
Esempio n. 23
0
def FourD():
    # collects data from file and plots
    L = 20
    mc = int(1e5)
    temps = [100, 240]
    spinconfigs = ["up", "random"]
    most_often = {}

    for spin in spinconfigs:

        pl.figure()
        for temp in temps:

            Enername = "Energyprob_L" + str(L) + "_mc" + str(mc) + "_T" + str(
                temp) + "_spin" + str(spin)
            energies, variance = pl.loadtxt('../data/4c/' + Enername + ".dat",
                                            usecols=(0, 1),
                                            unpack=True)
            pl.hist(energies,
                    normed=0,
                    bins=100,
                    histtype="step",
                    label="Temp=%s" % temp)
            hist, bins = pl.histogram(energies, bins=len(pl.unique(energies)))
            E = (bins[:-1])[pl.argmax(hist)] + 0.5 * (bins[1] - bins[0])
            most_often[spin + " " + str(temp)] = E, max(hist), variance[-1]

        pl.title("Energy occurrence histogram for spin %s" % spin)
        pl.xlabel("Occurring energies")
        pl.ylabel("Count of energy")
        pl.xlim([-820, -350])
        pl.legend(loc="best")
        pl.savefig("../figs/4d/probabilityhistogram_%s.png" % spin)
    for i, j in most_often.iteritems():
        print i, " energy:", j[0], "\n---          count:", j[1]
        print "        Prob of state: %g " % (j[1] / 87000.)
        print "             Variance: %g " % (j[2])
Esempio n. 24
0
def main(filename, verbosity, plots=False, **kwargs):

    print('here!', filename)

    print('\n' + '-' * 40 + '\n')
    f = neutronParser(filename, verbose_level=1)
    f.parse()
    print()
    f.write()

    print('ADC Boards:   ', pylab.unique(f.data['ADCBoard']))
    print('ADC Channels: ', pylab.unique(f.data['ADCChannel']))
    print('Detectors:    ', pylab.unique(f.data['Detector']))
    for j in pylab.unique(f.data['ADCBoard']):
        print(j, pylab.unique((f.data['ADCChannel'])[f.data['ADCBoard'] == j]))

    if plots:
        pylab.figure()
        for j in range(10):
            pylab.plot(f.data['RawSamples'][j], label='{:d}'.format(j))
        pylab.legend()

        pylab.figure()
        for j in pylab.unique(f.data['Detector']):
            print(j)
            h = pylab.histogram((f.data['Energy'])[f.data['Detector'] == j],
                                range=[0., 4096.],
                                bins=1000)
            bin_centers = 0.5 * (h[1][1:] + h[1][:-1])
            pylab.plot(bin_centers,
                       h[0],
                       label='D{:02d}'.format(j),
                       drawstyle='steps-mid')
        pylab.xlim(0., 4096.)
        pylab.legend()
        pylab.show()
Esempio n. 25
0
import pandas

X = pandas.read_csv('/home/j/Project/dismod/dismod_status/prod/dm-20084/posterior/dm-20084-prevalence-north_africa_middle_east-male-2005.csv', index_col=None)

Y = pandas.read_csv('/home/j/Project/dismod/dismod_status/prod/dm-19807/posterior/dm-19807-prevalence-north_africa_middle_east-male-2005.csv', index_col=None)

import pylab as pl


def weighted_age(df):                                                    
    return (df.filter(like='Draw').T*df['Population']/df['Population'].sum()).T.sum()


pl.figure()
for iso in list(pl.unique(X['Iso3'])):
    pl.plot(X[X['Iso3']==iso].filter(like='Draw').mean(1).__array__(), label=iso)
pl.semilogy([1],[1])

Z = X.groupby('Age').apply(weighted_age)
plot(Z.mean(1).__array__(), color='red', linewidth=3, alpha=.5, label='Inconsistent NA/ME')

pl.legend()
pl.axis([-5,130,1e-6,2])



pl.figure()
for iso in list(pl.unique(Y['Iso3'])):
    pl.plot(Y[(Y['Iso3']==iso)&(Y['Rate type']=='prevalence')].filter(like='Draw').mean(1).__array__(), label=iso)
Esempio n. 26
0
def tsysNormalize(vis,
                  tsysTable='',
                  newTsysTable='',
                  scaleSpws=[],
                  verbose=False):
    """
    Generate Tsys entries for one field from other fields, using autocorr
    (linear!) or SQLD data to determine the change in Tsys.
    Inputs:
     vis          the MS
     tsysTable:  the tsys caltable (default = <vis>.tsys)
     newTsysTable:  the new tsys caltable to create (default = <tsysTable>_normalized)
    """

    # intents likely to imply different attenuations or tuning to science-like
    # scans that we are applying Tsys to.
    print("Entered")
    badIntents = [
        'CALIBRATE_POINTING', 'CALIBRATE_FOCUS', 'CALIBRATE_SIDEBAND_RATIO',
        'CALIBRATE_ATMOSPHERE'
    ]
    if (tsysTable == ''):
        tsysTable = vis + '.tsys'
    if (not os.path.exists(tsysTable)):
        print("Cannot find Tsys table: ", tsysTable)
        return
    if (not os.path.exists(vis)):
        print("Cannot find measurement set: ", vis)
        return

    t = time.time()
    mytb = taskinit.tbtool()
    mymsmd = taskinit.msmdtool()
    mytb.open(tsysTable, nomodify=False)
    mymsmd.open(vis)
    print("tsysNormalize: initial setup took %.3f seconds" % (time.time() - t))

    # For convenience squish the useful columns into unique lists
    t = time.time()
    tsysSpws = pb.unique(mytb.getcol("SPECTRAL_WINDOW_ID"))
    tsysScans = pb.unique(mytb.getcol("SCAN_NUMBER"))
    tsysTimes = pb.unique(mytb.getcol("TIME"))
    tsysFields = pb.unique(mytb.getcol("FIELD_ID"))
    tsysAntennas = pb.unique(mytb.getcol("ANTENNA1"))
    if type(scaleSpws) == str:
        scaleSpws = [int(i) for i in scaleSpws.split(',')]
    if len(scaleSpws) < len(tsysSpws):
        scaleSpws = []
        for tsysSpw in tsysSpws:
            scaleSpws.append(scienceSpwForTsysSpw(mymsmd, tsysSpw))
        print("Identified autocorrelation spws to use: ", scaleSpws)
    print("Tsys Spws (%d):" % len(tsysSpws), tsysSpws)
    print("Tsys Scans (%d):" % len(tsysScans), tsysScans)
    print("Tsys Times (%d):" % len(tsysTimes), tsysTimes)
    print("Tsys Fields (%d):" % len(tsysFields), tsysFields)
    print("Tsys Antennas (%d):" % len(tsysAntennas), tsysAntennas)

    # Gather the power levels to use in the normalization process
    refPowers = {}
    refScans = {}
    for f in tsysFields:
        scanFieldsTab = mytb.query('FIELD_ID==%d' % f)
        fieldTsysScans = pb.unique(scanFieldsTab.getcol("SCAN_NUMBER"))
        scanFieldsTab.close()
        fieldAllScans = mymsmd.scansforfield(f)
        fieldNonTsysScans = [
            x for x in fieldAllScans if x not in fieldTsysScans
        ]
        fieldName = mymsmd.namesforfields(f)[0]
        if (len(fieldNonTsysScans) < 1):
            # Then there is no non-tsys scan for this field, e.g. which can happen in a mosaic where the Tsys scan has a different field ID,
            # but in this case the field name will have other scans with different field IDs, so revert to using field names.  Using field
            # names might work from the outset, but I have not tried it.
            fieldAllScans = mymsmd.scansforfield(fieldName)
            fieldNonTsysScans = [
                x for x in fieldAllScans if x not in fieldTsysScans
            ]
            if (len(fieldNonTsysScans) < 1):
                print(
                    "****** This field (id=%d, name=%s) appears to have no non-Tsys-like-scans, and thus cannot be normalized."
                    % (f, fieldName))
                return -1
        scienceLikeScans = []
        for s in fieldNonTsysScans:
            intents = mymsmd.intentsforscan(s)
            good = True
            for i in intents:
                for b in badIntents:
                    if i.startswith(b):
                        good = False
                        break
            if good: scienceLikeScans.append(s)
        powerRefScans = []
        for s in fieldTsysScans:
            minDist = 9999999
            refScan = -1
            for r in scienceLikeScans:
                dist = abs(r - s)
                if dist < minDist:
                    minDist = dist
                    refScan = r
            powerRefScans.append(refScan)
        print("Field %d (%s) Tsys scans:" % (f, fieldname), fieldTsysScans,
              ", All scans:", fieldAllScans, ", Non-Tsys scans:",
              fieldNonTsysScans, ", Non-Tsys science-like scans:",
              scienceLikeScans)
        for i in range(len(fieldTsysScans)):
            print("        Tsys scan %3d power reference scan: %3d" %
                  (fieldTsysScans[i], powerRefScans[i]))
            refScans[fieldTsysScans[i]] = powerRefScans[i]
        if verbose:
            print(
                "populating powers corresponding to each Tsys scan on field %d..."
                % (f))
        for i in range(len(fieldTsysScans)):
            refPowers[fieldTsysScans[i]] = []
            for spw in scaleSpws:
                if verbose:
                    print("calling getPower(vis, %d, %d, 10.0, %s)" %
                          (powerRefScans[i], spw,
                           str(powerRefScans[i] < fieldTsysScans[i])))
                p = getPower(vis,
                             powerRefScans[i],
                             spw,
                             10.0,
                             powerRefScans[i] < fieldTsysScans[i],
                             verbose=verbose)
                refPowers[fieldTsysScans[i]].append(p)
            if verbose:
                print("powers to use for Tsys scan %d:" % fieldTsysScans[i],
                      refPowers[fieldTsysScans[i]])
    if verbose: print(refPowers)

    print("tsysNormalize: summarising Tsys table took %.3f seconds" %
          (time.time() - t))
    t = time.time()

    # Now copy the original Tsys caltable and update all the values in the new one.
    if (newTsysTable == ''):
        newTsysTable = tsysTable + '_normalized'
    if (os.path.exists(newTsysTable)):
        shutil.rmtree(newTsysTable)
    mytb.copy(newTsysTable)
    mytb.close()
    mytb.open(newTsysTable, nomodify=False)
    startRefPower = refPowers[tsysScans[0]]
    for i in range(1, len(tsysScans)):
        # need to adjust each successive Tsys
        refPower = refPowers[tsysScans[i]]
        for ispw in range(len(tsysSpws)):
            spw = tsysSpws[ispw]
            for ant in range(len(tsysAntennas)):
                tsysSubTab1 = mytb.query(
                    "SCAN_NUMBER==%d AND SPECTRAL_WINDOW_ID==%d AND ANTENNA1==%d"
                    % (tsysScans[i], tsysSpws[ispw], ant))
                tsys1 = tsysSubTab1.getcell('FPARAM', 0)
                newTsys = tsysSubTab1.getcell('FPARAM', 0)
                for pol in range(len(tsys1)):
                    for chan in range(len(tsys1[pol])):
                        a = TsysAfterPowerChange(
                            refPowers[tsysScans[i]][ispw][ant][pol],
                            startRefPower[ispw][ant][pol], tsys1[pol][chan])
                        newTsys[pol][chan] = a
                    print("Scan %2d spw %2d pol %d mean %.1f --> %.1f" %
                          (tsysScans[i], spw, pol, np.mean(
                              tsys1[pol]), np.mean(newTsys[pol])))
                tsysSubTab1.putcell('FPARAM', 0, newTsys)
                tsysSubTab1.close()
    mymsmd.close()
    mytb.close()
Esempio n. 27
0
def tsysTransfer(vis,
                 scaleSpws='',
                 tsysTable='',
                 newTsysTable='',
                 verbose=False,
                 overwrite=True,
                 printAntenna=0,
                 printPol=0):
    """
    Generate a new Tsys table where the entries for one field are propagated to
    other fields which do not have a measured Tsys, using autocorr
    (linear!) or SQLD data to determine the change in Tsys.
    Input:
     vis          the MS
     scaleSpws    the autocorr or SQLD SpWs to use for scaling (integer list or
          comma-delimited string, default is the channel-averaged science spws)
     tsysTable:   if blank, then try vis+'.tsys'
     newTsysTable:   if blank, then try vis+'.newtsys'
     printAntenna: print the before/after values for this antenna ID
     printPol: print the before/after values for this polarization (0 or 1)
    Returns: nothing
    """
    # intents likely to imply different attenuations or tuning to science-like
    # scans that we are applying Tsys to.
    badIntents = [
        'CALIBRATE_POINTING', 'CALIBRATE_FOCUS', 'CALIBRATE_SIDEBAND_RATIO',
        'CALIBRATE_ATMOSPHERE'
    ]
    if type(scaleSpws) == str:
        if (len(scaleSpws) > 0):
            scaleSpws = [int(i) for i in scaleSpws.split(',')]
    if (tsysTable == ''):
        tsysTable = vis + '.tsys'
        if not os.path.exists(tsysTable):
            tsysTables = glob.glob(os.path.join(vis, '*tsyscal.tbl'))
            if len(tsysTables) < 1:
                print("Could not find any tsys tables.")
                return
            tsysTable = tsysTables[0]
    if not os.path.exists(tsysTable):
        print("Could not find tsys table: %s" % (tsysTable))
        return
    if (newTsysTable == ''):
        newTsysTable = vis + '.newtsys'
    if overwrite and os.path.exists(newTsysTable):
        print("Removing pre-existing newTsysTable: ", newTsysTable)
        rmtables(newTsysTable)
        if os.path.exists(newTsysTable):
            shutil.rmtree(newTsysTable)
    if (not os.path.exists(tsysTable)):
        print("Cannot find Tsys table: ", tsysTable)
        return
    if (not os.path.exists(vis)):
        print("Cannot find measurement set: ", vis)
        return

    t = time.time()
    mytb = taskinit.tbtool()
    mymsmd = taskinit.msmdtool()
    mytb.open(tsysTable, nomodify=False)
    mymsmd.open(vis)
    print("tsysTransfer: initial setup took %.3f seconds" % (time.time() - t))

    # For convenience squish the useful columns into unique lists
    t = time.time()
    tsysSpws = pb.unique(mytb.getcol("SPECTRAL_WINDOW_ID"))
    tsysBasebands = getBasebands(mymsmd, tsysSpws)
    tsysScans = pb.unique(mytb.getcol("SCAN_NUMBER"))
    tsysTimes = pb.unique(mytb.getcol("TIME"))
    tsysFields = pb.unique(mytb.getcol("FIELD_ID"))
    tsysAntennas = pb.unique(mytb.getcol("ANTENNA1"))
    finalScan = np.max(mymsmd.scannumbers())
    print("Tsys SpWs (%d):" % len(tsysSpws), tsysSpws)
    print("Tsys Basebands (%d):" % len(tsysSpws), tsysBasebands)
    print("Tsys Scans (%d):" % len(tsysScans), tsysScans)
    print("Tsys Times (%d):" % len(tsysTimes), tsysTimes)
    print("Tsys Fields (%d):" % len(tsysFields), tsysFields)
    print("Tsys Antennas (%d):" % len(tsysAntennas), tsysAntennas)
    if (len(scaleSpws) == 0):
        # number of scaleSpws should not exceed number of Tsys spws
        scaleSpws = np.unique(getChannelAveragedScienceSpws(vis,
                                                            mymsmd=mymsmd))
        scaleBasebands = getBasebands(mymsmd, scaleSpws)
        if scaleBasebands != tsysBasebands:
            print("re-ordering scaleSpws to match Tsys basebands")
            newScaleSpws = []
            for baseband in tsysBasebands:
                newScaleSpws.append(scaleSpws[scaleBasebands.index(baseband)])
            scaleSpws = newScaleSpws
            scaleBasebands = tsysBasebands[:]
        print("Getting power from spws: ", scaleSpws)

    tsysScanTimes = {}
    for s in tsysScans:
        st = mytb.query('SCAN_NUMBER==%d' % s)
        ts = st.getcol("TIME")
        st.close()
        tsysScanTimes[s] = sum(ts) / float(len(ts))
        if verbose:
            print("Tsys scan %d assumed time: %.4f" % (s, tsysScanTimes[s]))

    refPowers = {}
    refScans = {}
    tsysScansOnField = {}
    for f in tsysFields:
        scanFieldsTab = mytb.query('FIELD_ID==%d' % f)
        fieldTsysScans = pb.unique(scanFieldsTab.getcol("SCAN_NUMBER"))
        scanFieldsTab.close()
        tsysScansOnField[f] = fieldTsysScans
        fieldAllScans = mymsmd.scansforfield(f)
        fieldName = mymsmd.namesforfields(f)[0]
        fieldNonTsysScans = [
            x for x in fieldAllScans if x not in fieldTsysScans
        ]
        if (len(fieldNonTsysScans) < 1):
            # Then there is no non-tsys scan for this field, e.g. which can happen in a mosaic where the Tsys scan has a different field ID,
            # but in this case the field name will have other scans with different field IDs, so revert to using field names.  Using field
            # names might work from the outset, but I have not tried it.
            fieldAllScans = mymsmd.scansforfield(fieldName)
            fieldNonTsysScans = [
                x for x in fieldAllScans if x not in fieldTsysScans
            ]
            if (len(fieldNonTsysScans) < 1):
                print(
                    "****** This field (id=%d, name=%s) appears to have no non-Tsys-like-scans, and thus cannot be normalized."
                    % (f, fieldName))
                return -1
        print("Field %d (%s) Tsys scans:" % (f, fieldName), fieldTsysScans,
              ", All scans:", fieldAllScans, ", Non-Tsys scans:",
              fieldNonTsysScans)
        scienceLikeScans = []
        for s in fieldNonTsysScans:
            intents = mymsmd.intentsforscan(s)
            good = True
            for i in intents:
                for b in badIntents:
                    if i.startswith(b):
                        good = False
                        break
            if good: scienceLikeScans.append(s)
        powerRefScans = []
        for s in fieldTsysScans:
            minDist = 9999999
            refScan = -1
            for r in scienceLikeScans:
                dist = abs(r - s)
                if dist < minDist:
                    minDist = dist
                    refScan = r
            powerRefScans.append(refScan)
        if verbose:
            print("Field %d (%s) Tsys scans:" % (f, fieldName), fieldTsysScans,
                  ", All scans:", fieldAllScans, ", Non-Tsys scans:",
                  fieldNonTsysScans, ", Non-Tsys science-like scans:",
                  scienceLikeScans)
        for i in range(len(fieldTsysScans)):
            if verbose:
                print("        Tsys scan %3d power reference scan: %3d" %
                      (fieldTsysScans[i], powerRefScans[i]))
            refScans[fieldTsysScans[i]] = powerRefScans[i]
        if verbose:
            print(
                "populating powers corresponding to each Tsys scan on field %d..."
                % (f))
        for i in range(len(fieldTsysScans)):
            refPowers[fieldTsysScans[i]] = []
            for spw in scaleSpws:
                if verbose:
                    print("powerRefScans: ", powerRefScans)
                    print("calling getPower(vis, %d, %d, 10.0, %s)" %
                          (powerRefScans[i], spw,
                           str(powerRefScans[i] < fieldTsysScans[i])))
                p = getPower(vis,
                             powerRefScans[i],
                             spw,
                             10.0,
                             powerRefScans[i] < fieldTsysScans[i],
                             verbose=verbose)
                refPowers[fieldTsysScans[i]].append(p)
            #print "powers to use for Tsys scan %d:"%fieldTsysScans[i], refPowers[fieldTsysScans[i]]
#    print refPowers

    print("tsysTransfer: summarising Tsys table took %.3f seconds" %
          (time.time() - t))

    t = time.time()
    mytb.copy(newTsysTable)
    mytb.close()
    # re-open original table as read-only
    mytb.open(tsysTable)
    mytbNew = taskinit.tbtool()
    mytbNew.open(newTsysTable, nomodify=False)
    print(
        "tsysTransfer: Copying Tsys table from '%s' to '%s' took %.3f seconds"
        % (tsysTable, newTsysTable, time.time() - t))
    anyProcessingNeeded = False

    # Loop over each Tsys scan
    for i in range(len(tsysScans) - 1):
        tsysTime0 = tsysScanTimes[tsysScans[i]]
        tsysTime1 = tsysScanTimes[tsysScans[i + 1]]
        tsysTimeGap = tsysTime1 - tsysTime0
        tsysFields0 = mymsmd.fieldsforscan(tsysScans[i])  # current Tsys scan
        tsysFields1 = mymsmd.fieldsforscan(tsysScans[i + 1])  # next Tsys scan
        # loop over all scans between the current Tsys scan and the next one
        startScan = tsysScans[i] + 1
        stopScan = tsysScans[i + 1]
        #        if finalScan > stopScan and i==len(tsysScans)-1:
        #            print "There are more scans after the final Tsys scan, extending the range of scans accordingly."
        #            stopScan = finalScan
        for scan in range(startScan, stopScan):
            if 'CALIBRATE_POINTING#ON_SOURCE' in mymsmd.intentsforscan(scan):
                continue
            processingNeeded = False
            fields = mymsmd.fieldsforscan(scan)
            times = mymsmd.timesforscan(scan)
            startTime = times[0]
            endTime = times[-1]
            print(
                "Processing scan %d with fields %s, between Tsys scan %d (fields %s) and %d (fields %s)"
                % (scan, str(fields[0]), tsysScans[i], str(
                    tsysFields0[0]), tsysScans[i + 1], str(tsysFields1[0])))
            print(
                "    Scan %d starts %.3f sec after preceding Tsys, and ends %.3f sec before next Tsys"
                % (scan, startTime - tsysTime0, tsysTime1 - endTime))
            # There are a few possible cases to deal with:
            # 1) this was a power reference scan for a Tsys scan, in which case only produce one extra Tsys, at the opposite end of the scan, or none if there are Tsys scans for the same field at both ends
            fieldMatchesPriorTsysField = fieldsMatch(fields, tsysFields0)
            fieldMatchesNextTsysField = fieldsMatch(fields, tsysFields1)
            priorScanIsTsys = scan == tsysScans[i] + 1
            nextScanIsTsys = scan == tsysScans[i + 1] - 1
            bracketingTsysFieldsMatch = fieldsMatch(tsysFields0, tsysFields1)
            scanIsNotRefScan = scan != refScans[
                tsysScans[i]] and scan != refScans[tsysScans[i + 1]]
            if fieldMatchesPriorTsysField and fieldMatchesNextTsysField and priorScanIsTsys and nextScanIsTsys:
                print(
                    "    Nothing needed for scan %d as bracketed immediately by two Tsys scans of same field"
                    % scan)
            # The most common case for wanting to do the transfer: science field bracketed by phase cal, or phase cal without Tsys immediately before/after
            elif bracketingTsysFieldsMatch and (not fieldMatchesPriorTsysField
                                                or scanIsNotRefScan):
                # The two Tsys scans that bracket this scan are taken on the same field;
                # and either this scan is not on the field of the prior Tsys scan, or
                # this scan is not a reference scan
                processingNeeded = True
                priorScanToUse = tsysScans[i]
                nextScanToUse = tsysScans[i + 1]
            elif (not bracketingTsysFieldsMatch
                  and fields[0] in tsysScansOnField.keys()):
                candidateScans = np.array(tsysScansOnField[fields[0]])
                if (scan < candidateScans[0] or scan > candidateScans[-1]):
                    print(
                        "    The bracketing Tsys fields do not match, and there are not two scans to interpolate between."
                    )
                else:
                    processingNeeded = True
                    priorScanToUse = np.max(
                        candidateScans[np.where(candidateScans < scan)])
                    nextScanToUse = np.min(
                        candidateScans[np.where(candidateScans > scan)])
                    print(
                        "    The bracketing Tsys fields do not match, but there are two scans to interpolate between: %d and %d."
                        % (priorScanToUse, nextScanToUse))
            elif (not bracketingTsysFieldsMatch):
                # This section added by Todd for initial phase calibrator scans when Tsys taken on science target only.
                # Not sure what to do yet, though.
                print(
                    "    The bracketing Tsys fields do not match, and Tsys was never taken on this field. No processing will be done."
                )
                if False:
                    processingNeeded = True
                    if i + 1 < len(tsysScans):
                        print(
                            "    Extrapolating from subsequent Tsys scan: %d" %
                            (tsysScans[i + 1]))
                        priorScanToUse = tsysScans[i + 1]
                        nextScanToUse = tsysScans[i + 1]
                    else:
                        print("    Extrapolating from prior Tsys scan: %d" %
                              (tsysScans[i + 1]))
                        priorScanToUse = tsysScans[i]
                        nextScanToUse = tsysScans[i]
            else:
                print(
                    "    This scan arrangement is unexpected.  No processing will be done."
                )
                print("      bracketingTsysFieldsMatch = %s" %
                      bracketingTsysFieldsMatch)
                print("      fieldMatchesPriorTsysField = %s" %
                      fieldMatchesPriorTsysField)
                print("      fieldMatchesNextTsysField = %s" %
                      fieldMatchesNextTsysField)
                print("      priorScanIsTsys = %s" % priorScanIsTsys)
                print("      nextScanIsTsys = %s" % nextScanIsTsys)
                print("      scanIsNotRefScan = %s" % scanIsNotRefScan)
                print("      %s in tsysScansOnField(%s) = %s" %
                      (fields[0], tsysScansOnField.keys(), fields[0]
                       in tsysScansOnField.keys()))
            if processingNeeded:
                anyProcessingNeeded = True
                print(
                    "    For scan %d will generate two Tsys entries for beginning and end of scan, interpolating reference from scans %d and %d"
                    % (scan, priorScanToUse, nextScanToUse))
                for ispw in range(len(scaleSpws)):
                    spw = scaleSpws[ispw]
                    startPower = getPower(vis,
                                          scan,
                                          spw,
                                          10.0,
                                          False,
                                          verbose=verbose)
                    endPower = getPower(vis,
                                        scan,
                                        spw,
                                        10.0,
                                        True,
                                        verbose=verbose)
                    for ant in range(len(tsysAntennas)):
                        tsysSubTab0 = mytb.query(
                            "SCAN_NUMBER==%d AND SPECTRAL_WINDOW_ID==%d AND ANTENNA1==%d"
                            % (priorScanToUse, tsysSpws[ispw], ant))
                        tsysSubTab1 = mytb.query(
                            "SCAN_NUMBER==%d AND SPECTRAL_WINDOW_ID==%d AND ANTENNA1==%d"
                            % (nextScanToUse, tsysSpws[ispw], ant))
                        # sanity check for duplicate entries
                        if tsysSubTab0.nrows() != 1 or tsysSubTab1.nrows(
                        ) != 1:
                            print(
                                "WARNING!!! not one result row for (scan,ant,spw) query in Tsys table. Scan %d: %d rows, Scan %d: %d rows."
                                % (priorScanToUse, tsysSubTab0.nrows(),
                                   nextScanToUse, tsysSubTab1.nrows()))
                        tsys0 = tsysSubTab0.getcell('FPARAM', 0)
                        tsys1 = tsysSubTab1.getcell('FPARAM', 0)
                        tsysSubTab1.close()
                        startTsys = copy.copy(tsys0)
                        endTsys = copy.copy(
                            tsys0
                        )  # just a placeholder, new values will be filled in below
                        startRefPower = refPowers[priorScanToUse]
                        endRefPower = refPowers[nextScanToUse]
                        tsysTime0 = tsysScanTimes[priorScanToUse]
                        tsysTime1 = tsysScanTimes[nextScanToUse]
                        tsysTimeGap = tsysTime1 - tsysTime0
                        for pol in range(len(tsys0)):
                            for chan in range(len(tsys0[pol])):
                                startTsys0 = TsysAfterPowerChange(
                                    startRefPower[ispw][ant][pol],
                                    startPower[ant][0], tsys0[pol][chan])
                                startTsys1 = TsysAfterPowerChange(
                                    endRefPower[ispw][ant][pol],
                                    startPower[ant][0], tsys1[pol][chan])
                                endTsys0 = TsysAfterPowerChange(
                                    startRefPower[ispw][ant][pol],
                                    endPower[ant][0], tsys0[pol][chan])
                                endTsys1 = TsysAfterPowerChange(
                                    endRefPower[ispw][ant][pol],
                                    endPower[ant][0], tsys1[pol][chan])
                                if tsysTimeGap == 0:
                                    startTsys[pol][chan] = startTsys0
                                    endTsys[pol][chan] = endTsys0
                                else:
                                    startTsys[pol][chan] = (
                                        (startTime - tsysTime0) * startTsys1 +
                                        (tsysTime1 - startTime) *
                                        startTsys0) / tsysTimeGap
                                    endTsys[pol][chan] = (
                                        (endTime - tsysTime0) * endTsys1 +
                                        (tsysTime1 - endTime) *
                                        endTsys0) / tsysTimeGap
                                if chan == len(
                                        tsys0[pol]
                                ) / 2 and ant == printAntenna and pol == printPol:
                                    print(
                                        "    ispw=%d spw=%d ant=%d pol=%d chan=%d: TsysBefore: %.1f K, TsysScanStart: %.1f K (interp %.1f,%.1f), TsysScanEnd: %.1f K (interp %.1f,%.1f), TsysAfter: %.1f K"
                                        %
                                        (ispw, spw, ant, pol, chan,
                                         tsys0[pol][chan],
                                         startTsys[pol][chan], startTsys0,
                                         startTsys1, endTsys[pol][chan],
                                         endTsys0, endTsys1, tsys1[pol][chan]))
                        for f in fields:
                            nr = mytbNew.nrows()
                            tsysSubTab0.copyrows(newTsysTable, nrow=1)
                            if verbose:
                                print("setting tsys at row %d" % nr)
                            mytbNew.putcell('FPARAM', nr, startTsys)
                            mytbNew.putcell('TIME', nr, startTime)
                            mytbNew.putcell('FIELD_ID', nr, f)
                            mytbNew.putcell('SCAN_NUMBER', nr, scan)
                            nr = mytbNew.nrows()
                            tsysSubTab0.copyrows(newTsysTable, nrow=1)
                            if verbose:
                                print("setting tsys at row %d" % nr)
                            mytbNew.putcell('FPARAM', nr, endTsys)
                            mytbNew.putcell('TIME', nr, endTime)
                            mytbNew.putcell('FIELD_ID', nr, f)
                            mytbNew.putcell('SCAN_NUMBER', nr, scan)
                        tsysSubTab0.close()
                        # end loop over fields (f)
                    # end loop over antennas (ant)
                # end loop over spws (ispw)
            # end if processingNeeded
        # end loop over scans between tsysScans (scan)
        mytbNew.flush()
    # end loop over Tsys scans
    if not anyProcessingNeeded:
        print(
            "Because no processing was needed the new Tsys table is identical to the original."
        )
    # TODO: These cleanups should be done also on an exception too
    print("Closing tables...")
    mytbNew.unlock()
    mytbNew.close()
    mytbNew.done()
    mymsmd.close()
    mytb.close()
    mytb.done()
Esempio n. 28
0
dm.params['covariates']['Country_level']['LDI_id_Updated_7July2011']['rate']['value'] = 0


# clear any fit and priors
dm.clear_fit()
dm.clear_empirical_prior()
dismod3.neg_binom_model.covariate_hash = {}

# initialize model data
prev_data = [d for d in dm.data if d['data_type'] == 'prevalence data']
r = pl.array([dm.value_per_1(s) for s in prev_data])
min_rate_per_100 = '%d' % round(r.min()*100)
max_rate_per_100 = '%d' % round(r.max()*100)
median_rate_per_100 = '%d' % round(pl.median(r*100))
regions = pl.array([d['gbd_region'] for d in prev_data])
num_regions = len(pl.unique(regions))

import fit_world
#fit_world.fit_world(dm)
#dm.data = prev_data # put data back in

import fit_posterior
region = 'north_america_high_income'
sex = 'female'
year='2005'
fit_posterior.fit_posterior(dm, region, sex, year, map_only=faster_run_flag, store_results=False)
dm.data = prev_data # put data back in

pl.figure(**book_graphics.quarter_page_params)
pl.subplot(1,2,1)
dismod3.plotting.plot_intervals(dm, [d for d in dm.data if dm.relevant_to(d, 'prevalence', 'all', 'all', 'all')],
Esempio n. 29
0
from scipy.spatial import ConvexHull

# On the 2-Sphere, the Voronoi tesselation is equivalent to the convex hull projected on the sphere
# (Sugihara, Journal for Geometry and Graphics Volume 6 (2002), No. 1, 69-81.)
# I assume that the same is true in 4D.... [This has to be checked!]

R= 1.6180339887498949 #magic number by Straley for 120 particles

import sys
if (sys.argv[1][-3:]=="npy"):
    polar=pl.load(sys.argv[1])
else:
    polar=pl.loadtxt(sys.argv[1])
from spheretools import *
cartesian=convert(polar, R)

CHull=ConvexHull(cartesian)

with open("bonds.txt",'w') as fw:
    for p in range(cartesian.shape[0]):
        # print p
        which_simplex,position=pl.where(CHull.simplices==p)
        # print which_simplex
        all_neighs=pl.unique(CHull.simplices[which_simplex].flatten())
        # print "all_neighs",all_neighs
        index_of_p=pl.where(all_neighs==p)
        # print "p is at",index_of_p
        neighs=pl.delete(all_neighs,index_of_p)
        # print"neighs after ", neighs
        fw.write(str(len(neighs))+" "+" ".join(map(str, neighs))+"\n" )
Esempio n. 30
0
def test_load_area():
    # find model unique areas
    model_areas = set(pl.unique(model2.input_data['area']))
    # check that only official areas are listed
    assert model_areas.issubset(areas) == 1
Esempio n. 31
0
def calantsub(incaltable,outcaltable='',
              spw='',scan='',
              ant='',subant=''):

    """
    Substitute cal solutions by antenna
    Input:
     incaltable  Input caltable
     outcaltable Output caltable (if '', overwrite result on incaltable)
     spw         Spectral Window selection (no channel selection permitted)
     scan        Scan selection
     ant         Antenna (indices) which need replaced solutions
     subant      Antenna (indices) with which to replace those in ant

    This function provides a means to replace solutions by antenna,
    e.g., to substitute one antenna's Tsys spectra with another.

    The processing can be limited to specific spectral windows and/or
    scans.  The spw and scan parameters should be specified in the
    standard MS selection manner (comma-separated integers in a string),
    except no channel selection is supported.

    The ant parameter specifies one or more antenna indices 
    (comma-separated in a string) for which solutions are to be
    replaced.  The subant parameter lists the antenna indices
    from which the substitute solutions are to be obtained. E.g.,
    ant='3,5,7',subant='6,8,10' will cause the solutions from
    antenna id 6 to be copied to antenna id 5, id 8 to id 5 and id 10
    to id 7.  The number of antennas specified in ant and subant
    must match.
    
    """

    import pylab as mypl
    
    # trap insufficient ant subant specifications
    if len(ant)==0 or len(subant)==0:
        raise Exception, "Must specify at least one ant and subant."

    antlist=ant.split(',')
    sublist=subant.split(',')

    # trap dumb cases
    nant=len(antlist)
    nsub=len(sublist)
    if nant!=nsub:
        raise Exception, "Must specify equal number of ant and subant."

    # local tb tool
    mytb=taskinit.tbtool()

    # parse selection
    selstr=''
    if len(spw)>0:
        selstr+='SPECTRAL_WINDOW_ID IN ['+spw+']'
        if len(scan)>0:
            selstr+=' && '
    if len(scan)>0:
        selstr+='SCAN_NUMBER IN ['+scan+']'
        print "selstr = '"+selstr+"'"

    # verify selection (if any) selects non-zero rows
    if len(selstr)>0:
        mytb.open(incaltable)
        st=mytb.query(query=selstr)
        nselrows=st.nrows()
        st.close()
        mytb.close()
        if nselrows==0:
            raise Exception, 'Error: scan and/or spw selection selects no rows!'

    # manage the output table
    if outcaltable=='':
        outcaltable=incaltable
        print "No outcaltable specified; will overwrite incaltable."
    if outcaltable!=incaltable:
        os.system('cp -r '+incaltable+' '+outcaltable)

    # open the output table for adjustment
    mytb.open(outcaltable,nomodify=False)

    stsel=mytb
    if len(selstr)>0:
        stsel=mytb.query(query=selstr,name='selected')

    # cols to substitute:
    collist=['TIME','INTERVAL','PARAMERR','SNR','FLAG']
    cols=mytb.colnames()
    if cols.count('CPARAM')>0:
        collist.append('CPARAM')
    else:
        collist.append('FPARAM')

    # scan list
    scans=mypl.unique(stsel.getcol('SCAN_NUMBER'))

    print 'Found scans = ',scans

    # do one scan at a time
    for scan in scans:
        st1=stsel.query(query='SCAN_NUMBER=='+str(scan),name='byscan')
        spws=mypl.unique(st1.getcol('SPECTRAL_WINDOW_ID'))

        print 'Scan '+str(scan)+' has spws='+str(spws)

        # do one spw at a time
        for ispw in spws:
            st2=st1.query(query='SPECTRAL_WINDOW_ID=='+str(ispw),name='byspw');

            for ia in range(nant):
                stsub=st2.query(query='ANTENNA1=='+sublist[ia],
                                name='subant')
                stant=st2.query(query='ANTENNA1=='+antlist[ia],
                                name='ant')


                # go to next ant if nothing to do
                if stant.nrows()<1:
                    continue

                print ' scan='+str(scan)+' spw='+str(ispw)+' ants: '+str(sublist[ia])+'->'+str(antlist[ia])

                # trap (unlikely?) pathological case
                if stsub.nrows()!=stant.nrows():
                    raise Exception, "In spw "+str(ispw)+" antenna ids "+str(antlist[ia])+" and "+str(sublist[ia])+" have a different number of solutions."

                # substitute values 
                for col in collist:
                    stant.putcol(col,stsub.getcol(col))

                stsub.close()
                stant.close()
            st2.close()
        st1.close()
    stsel.close()
    mytb.close()
Esempio n. 32
0
def fixsyscaltimes(vis,newinterval=2.0):
    """
    Fix TIME,INTERVAL columns in MS SYSCAL subtable
    Input:
     vis          the MS containing the offending SYSCAL subtable
     newinterval  the interval to use in revised entries

     This function is intended to repair MS SYSCAL tables that suffer from
     multiple TIME values (over antennas) per Tsys measurement.  The gencal
     task (mode='tsys' expects all antennas to share the same TIME value
     for each Tsys measurement (and this is usually true).  The function
     finds those measurements that have multiple TIMEs and replaces them
     with a common TIME value which takes the value
     mean(oldTIME-INTERVAL/2)+newinterval/2.
     Usually (always?), oldTIME-INTERVAL/2 is constant over antennas
     and represents the physical timestamp of the Tsys measurment.
     If the function finds no pathological timestamps, it does not
     revise the table.
    """

    import pylab as mypl
    import math as mymath
    myqa=taskinit.qatool()
    mytb=taskinit.tbtool()
    mytb.open(vis+'/SYSCAL',nomodify=False)

    spws=mypl.unique(mytb.getcol("SPECTRAL_WINDOW_ID"))

    for ispw in spws:
        st=mytb.query('SPECTRAL_WINDOW_ID=='+str(ispw),name='byspw')
        times=st.getcol('TIME')
        interval=st.getcol('INTERVAL')
        timestamps=times-interval/2
        t0=86400.0*mymath.floor(timestamps[0]/86400.0)

        utimes=mypl.unique(times-t0)
        nT=len(utimes)
        utimestamps=mypl.unique(mypl.floor(timestamps)-t0)
        nTS=len(utimestamps)
        
        msg='In spw='+str(ispw)+' found '+str(nTS)+' Tsys measurements with '+str(nT)+' TIMEs...'
        if nT==nTS:
            msg+='OK.'
            print msg

        else:
            msg+=' which is too many, so fixing it:'
            print msg 

            for uts in utimestamps:
                mask = ((mypl.floor(timestamps))-t0==uts)
                uTIMEs=mypl.unique(times[mask])
                nTIMEs=len(uTIMEs)
                newtime = mypl.mean(times[mask]-interval[mask]/2) + newinterval/2
                msg='  Found '+str(nTIMEs)+' TIMEs at timestamp='+str(myqa.time(str(newtime-newinterval/2)+'s',form='ymd')[0])
                if nTIMEs>1:
                    msg+=':'
                    print msg
                    print '   TIMEs='+str([myqa.time(str(t)+'s',form='ymd')[0] for t in uTIMEs])+' --> '+str(myqa.time(str(newtime)+'s',form='ymd')[0])+' w/ INTERVAL='+str(newinterval)
                    times[mask]=newtime
                    interval[mask]=newinterval
                    st.putcol('TIME',times)
                    st.putcol('INTERVAL',interval)
                else:
                    msg+='...ok.'
                    print msg
        st.close()
    mytb.close()
Esempio n. 33
0
print sys.argv[5]

# time process
start = time.time()

# assert that system arguments are correct
if len(sys.argv[5].split(' ')) != 1:
    assert len(sys.argv[5].split(' ')) == len(sys.argv[4].split(' ')), 'rate_type_list has the incorrect number of arguments--length must be 1 or match length of param_type_list'

# download data to j drive
os.system('/usr/local/epd-7.3-2/bin/python download_model.py %s'%(sys.argv[1]))

# load country list
country_list = pandas.read_csv('/snfs1/DATA/IHME_COUNTRY_CODES/IHME_COUNTRYCODES.CSV', index_col=None)
country_list = country_list[country_list.ix[:,'ihme_indic_country'] == 1]
country_list = list(pl.unique(country_list['iso3']))
country_list.remove('BMU')
country_list.remove('HKG')
country_list.remove('MAC')
country_list.remove('PRI')

# launch on cluster
name_list = []
for country in country_list: #['USA', 'GBR']:
    for sex in ['male', 'female']:
        name = country + str(sys.argv[3]) + sex
        name_list.append(name)
        os.system('/usr/local/bin/SGE/bin/lx24-amd64/qsub -cwd -N ' + name + ' dmco_fit_posterior.sh "%s" "%s" "%s" "%s" "%s" "%s" "%s"' %(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], country, sex))

pandas.DataFrame(name_list).to_csv('/home/j/Project/dismod/dismod_status/prod/dm-%s/posterior/stdout/name_list.csv'%(sys.argv[1]))
Esempio n. 34
0
allcategory.append('complete')
allcategory.append('ddm_adjust')
allcategory.append('gb_adjust')
allcategory.append('sibs')
allcategory.append('no_adjust')
allcategory.append('dss')

for acat in allcategory:
  year_cat[acat] = []
mort_cat[acat] = []
var_cat[acat] = []

#category is a vector of data$category[data$ihme_loc_id == cc & data$data == 1]
#outside loop: all possible categories, inside loop: all observations 
#year_cat, etc. become vectors of all years (etc) w/ specific category of data
for ucat in pl.unique(category):
  count = 0
for cat in category:
  if cat == ucat:
  year_cat[ucat].append(year[count])
var_cat[ucat].append(log10_var[count])
if ((ihme_loc_id in ['DOM' , 'PER' , 'MAR' , 'MDG']) & (cat in ["sibs","ddm_adjust","gb_adjust"])):
  mort_cat[ucat].append(log10_mort[count]  - rnormal(mu=0., tau=.01**-2))
else:
  mort_cat[ucat].append(log10_mort[count])
count = count + 1

for acat in allcategory:
  for ucat in pl.unique(category):
  if acat == ucat:
  allyear = allyear + year_cat[ucat]
Esempio n. 35
0
    # reshape predicted labels to an image
    img_pred = np.reshape(data_pred, (hypData.numRows, hypData.numCols))

    # read labels into numpy array
    mat_gt = scipy.io.loadmat('PaviaU_gt.mat')
    img_gt = mat_gt['paviaU_gt']

    class_names = [
        'asphalt', 'meadow', 'gravel', 'tree', 'painted metal', 'bare soil',
        'bitumen', 'brick', 'shadow'
    ]
    cmap = pl.cm.jet

    # save ground truth figure
    pl.figure()
    for entry in pl.unique(img_gt):
        colour = cmap(entry * 255 / (np.max(img_gt) - 0))
        pl.plot(0,
                0,
                "-",
                c=colour,
                label=(['background'] + class_names)[entry])
    pl.imshow(img_gt, cmap=cmap)
    pl.legend(bbox_to_anchor=(2, 1))
    pl.title('ground truth labels')
    pl.savefig(os.path.join('results', 'test_classification_gt.png'))

    # save predicted classes figure
    pl.figure()
    for entry in pl.unique(img_pred):
        colour = cmap(entry * 255 / (np.max(img_pred) - 0))
Esempio n. 36
0
def get_unique_annotations(_annotations):
    if _annotations is not None:
        unique_annotations = pl.unique(_annotations)
        return unique_annotations[pl.where(unique_annotations > 0)]
Esempio n. 37
0
def plot_fits_pdf(disease, prior, year, param_type_list, filename=''):
    '''Plot country fits'''
    dir = '/home/j/Project/dismod/dismod_status/prod/'
    mortality = pandas.read_csv('/homes/peterhm/gbd/dmco_mortality.csv')
    
    world = load_new_model(disease)
    
    # create list of countries to report
    country_list = pandas.read_csv('/snfs1/DATA/IHME_COUNTRY_CODES/IHME_COUNTRYCODES.CSV', index_col=None)
    country_list = country_list[country_list.ix[:,'ihme_indic_country'] == 1]
    country_list = list(pl.unique(country_list['iso3']))
    country_list.remove('BMU')
    country_list.remove('HKG')
    country_list.remove('MAC')
    country_list.remove('PRI')
    
    # create list of countries order by number of data points, then alphabetical
    country_ordered = []
    for country in country_list:
        country_ordered.append((country,len(world.input_data[world.input_data['area']==country]),len(world.get_data('p')[world.get_data('p')['area']==country])))
    
    dtype = [('ISO3','S10'),('pts',int),('p',int)]
    country_ordered = pl.array(country_ordered, dtype=dtype)
    country_ordered = list(pl.sort(country_ordered,order=['pts','p','ISO3']))
    country_ordered.reverse()
        
    pp = PdfPages(dir + '/dm-%s/image/%s_w_prior_%s_%s.pdf'%(disease, prior, year, filename))
    for c,country in enumerate(country_ordered):
        country = country[0]
        pl.figure(c, figsize=(len(param_type_list)*4,8))
        for s,sex in enumerate(['male', 'female']):
            model = load_new_model(disease, country, sex)
            model.keep(start_year=year-2)
            model.keep(end_year=year+2)
            add_data(model, mortality, country, sex, year)
            for j,data_type in enumerate(param_type_list):
                pl.subplot(2,len(param_type_list),(j+1)+(s*len(param_type_list)))
                if (data_type == 'm_with') | (data_type == 'm_all'): dismod3.graphics.plot_data_bars(model.get_data('m_all'), color='grey', label='m_all')
                # get estimates
                else: #(data_type != 'm_with') | (data_type != 'm_all'):
                    est = pandas.read_csv(dir+'dm-%s/posterior/dm-%s-%s-%s-%s-%s.csv' % (disease, disease, full_name[data_type], country, sex, year),index_col=None)
                    est = est.filter(like='Draw')
                    gbd_est = get_emp(prior, data_type, country, sex, year)
                    find_fnrfx(model, prior, data_type, country, sex, year)
                    
                    ymax = 0.
                    if max(est.mean(1)) > ymax: ymax = max(est.mean(1))
                    if max(gbd_est.mean(1)) > ymax: ymax = max(gbd_est.mean(1))
                
                    # plotting
                    df = model.input_data
                    if sex == 'male': #shift all so male is zero
                        map_func = {'male': 0, 'total': -.5, 'female': -1}
                    if sex == 'female': #shift all so female is zero
                        map_func = {'male': 1, 'total': .5, 'female': 0}

                    model.get_data(data_type)['value'] = model.get_data(data_type)['value'] * pl.exp(-model.parameters[data_type]['fixed_effects']['x_sex']['mu'] * df[df['data_type']==data_type]['sex'].map(map_func).mean())
                    dismod3.graphics.plot_data_bars(df[df['data_type']==data_type])
                    pl.plot(pl.array(est.mean(1)), 'k-', label='DM-CO')
                    pl.plot(pl.array(gbd_est.mean(1)), 'r-', label='GBD2010')
                    pl.plot(mc.utils.hpd(pl.array(gbd_est).T, .05), 'r:')
                    pl.plot(mc.utils.hpd(pl.array(est).T, .05), 'k:')
                    pl.axis([-5, 105, -ymax*.05, ymax*1.1])
                pl.title(country +' '+ data_type +' '+ sex +' '+ str(year) )
                if sex == 'male': pl.legend(loc=(.25,1.145))
        pl.subplots_adjust(top=.83, bottom=.07)

        pp.savefig(c)
        pl.clf()
    pp.close()
Esempio n. 38
0
for counter in counters:

    fieldnames = ['time']
    fieldnames.extend(authornames)
    fid = open('../gitstats/' + counter.replace(' ', '_') + '_by_author.dat',
               'r')
    reader = csv.DictReader(fid, fieldnames=fieldnames, delimiter=' ')
    fields = []
    for row in reader:
        fields.append(list(map(np.int, row.values())))
    fid.close()
    fields = np.array(fields)
    fieldnames = list(row.keys())

    authorfields = {}
    for author in pylab.unique(authoralias.values()):
        af = []
        for i in range(1, np.size(fieldnames)):
            if fieldnames[i] in authoralias:
                if authoralias[fieldnames[i]] == author:
                    af.append(i)
        authorfields[author] = af

    print(authorfields)
    institutefields = {}
    for institute in pylab.unique(authorinstitute.values()):
        #print institute
        af = []
        for i in range(0, np.size(fieldnames)):
            if fieldnames[i] == 'time':
                continue
Esempio n. 39
0
 def build_model(vm, force_recomp=False):
     ''' Builds the model, if needed. Tries to reload if it can '''
     logmsg('\n\nRequested: Build Model')
     if not force_recomp and not vm.isDirty:
         logmsg('The model is clean and is not forced to recompute')
         return True
     cm = vm.hs.cm
     # Delete old index and resample chips to index
     vm.delete_model()
     vm.sample_train_set()
     # Try to load the correct model
     if not force_recomp and vm.load_model():
         logmsg('Loaded saved model from disk')
         return
     logmsg('Building the model. This may take some time.')
     # Could not load old model. Do full rebuild
     # -----
     # STEP 1 - Loading
     logdbg('Step 1: Aggregate the model support (Load feature vectors) ---')
     tx2_cx   = vm.get_train_cx()
     tx2_cid  = vm.get_train_cid()
     assert len(tx2_cx) > 0, 'Training set cannot be  np.empty'
     logdbg('Building model with %d sample chips' % (vm.num_train()))
     cm.load_features(tx2_cx)
     tx2_nfpts = cm.cx2_nfpts(tx2_cx)
     num_train_keypoints = sum(tx2_nfpts)
     # -----
     # STEP 2 - Aggregating 
     logdbg('Step 2: Build the model Words')
     isTFIDF = False
     if vm.hs.am.algo_prefs.model.quantizer == 'naive_bayes':
         logdbg('No Quantization. Aggregating all fdscriptors for nearest neighbor search.')
         vm.wx2_fdsc = np.empty((num_train_keypoints,128),dtype=np.uint8)
         _p = 0
         for cx in tx2_cx:
             nfdsc = cm.cx2_nfpts(cx)
             vm.wx2_fdsc[_p:_p+nfdsc,:] = cm.cx2_fdsc[cx]
             _p += nfdsc
         ax2_wx = np.array(range(0,num_train_keypoints),dtype=np.uint32)
     if vm.hs.am.algo_prefs.model.quantizer == 'akmeans':
         raise NotImplementedError(':)')
     # -----
     # STEP 3 - Inverted Indexing
     logdbg('Step 3: Point the parts of the model back to their source')
     vm.wx2_axs = np.empty(vm.wx2_fdsc.shape[0], dtype=object) 
     for ax in xrange(0,num_train_keypoints):
         if vm.wx2_axs[ax] is None:
             vm.wx2_axs[ax] = []
         wx = ax2_wx[ax]
         vm.wx2_axs[wx].append(ax)
     vm.ax2_cid = -np.ones(num_train_keypoints,dtype=np.int32) 
     vm.ax2_fx  = -np.ones(num_train_keypoints,dtype=np.int32)
     ax2_tx     = -np.ones(num_train_keypoints,dtype=np.int32)
     curr_fx = 0; next_fx = 0
     for tx in xrange(vm.num_train()):
         nfpts    = tx2_nfpts[tx]
         next_fx  = next_fx + nfpts
         ax_range = range(curr_fx,next_fx)
         ax2_tx[ax_range] = tx
         vm.ax2_cid[ax_range] = tx2_cid[tx]    # Point to Inst
         vm.ax2_fx[ax_range]  = range(nfpts)   # Point to Kpts
         curr_fx = curr_fx + nfpts
     if isTFIDF: # Compute info for TF-IDF
         logdbg('Computing TF-IDF metadata')
         max_tx = len(tx2_cx)
         tx2_wtf_denom = np.float32(cm.cx2_nfpts(tx2_cx))
         vm.wx2_maxtf = map(lambda ax_of_wx:\
             max( np.float32(bincount(ax2_tx[ax_of_wx], minlength=max_tx)) / tx2_wtf_denom ), vm.wx2_axs)
         vm.wx2_idf = np.log2(map(lambda ax_of_wx:\
             vm.num_train()/len(pylab.unique(ax2_tx[ax_of_wx])),\
             vm.wx2_axs)+eps(1))
     logdbg('Built Model using %d feature vectors. Preparing to index.' % len(vm.ax2_cid))
     # -----
     # STEP 4 - Indexing
     logdbg('Step 4: Building FLANN Index: over '+str(len(vm.wx2_fdsc))+' words')
     assert vm.flann is None, 'Flann already exists'
     vm.flann = FLANN()
     flann_param_dict = vm.hs.am.algo_prefs.model.indexer.to_dict()
     flann_params = vm.flann.build_index(vm.wx2_fdsc, **flann_param_dict)
     vm.isDirty  = False
     vm.save_model()
     logmsg('The model was built.')
Esempio n. 40
0
def loadData(folder, islands, dataFrom):
    #%% Load data from files
    if islands > 1:
        ind_gens_isl = []  # individuals data for islands
        ind_cands_isl = []
        ind_fits_isl = []
        ind_cs_isl = []

        stat_gens_isl = []  # statistics.csv for islands
        stat_worstfits_isl = []
        stat_bestfits_isl = []
        stat_avgfits_isl = []
        stat_stdfits_isl = []

        fits_sort_isl = []  #sorted data
        gens_sort_isl = []
        cands_sort_isl = []
        params_sort_isl = []

    for island in range(islands):
        ind_gens = []  # individuals data
        ind_cands = []
        ind_fits = []
        ind_cs = []

        eval_gens = []  # error files for each evaluation
        eval_cands = []
        eval_fits = []
        eval_params = []

        stat_gens = []  # statistics.csv
        stat_worstfits = []
        stat_bestfits = []
        stat_avgfits = []
        stat_stdfits = []

        if islands > 0:
            folderFinal = folder + "_island_" + str(island)
        else:
            folderFinal = folder

        with open('../data/%s/individuals.csv' %
                  (folderFinal)) as f:  # read individuals.csv
            reader = csv.reader(f)
            for row in reader:
                ind_gens.append(int(row[0]))
                ind_cands.append(int(row[1]))
                ind_fits.append(float(row[2]))
                cs = [
                    float(row[i].replace("[", "").replace("]", ""))
                    for i in range(3, len(row))
                ]
                ind_cs.append(cs)

        with open('../data/%s/statistics.csv' %
                  (folderFinal)) as f:  # read statistics.csv
            reader = csv.reader(f)
            for row in reader:
                stat_gens.append(float(row[0]))
                stat_worstfits.append(float(row[2]))
                stat_bestfits.append(float(row[3]))
                stat_avgfits.append(float(row[4]))
                stat_stdfits.append(float(row[6]))

        # unique generation number (sometimes repeated due to rerunning in hpc)
        stat_gens, stat_gens_indices = unique(stat_gens,
                                              1)  # unique individuals
        stat_worstfits, stat_bestfits, stat_avgfits, stat_stdfits = zip(*[[
            stat_worstfits[i], stat_bestfits[i], stat_avgfits[i],
            stat_stdfits[i]
        ] for i in stat_gens_indices])

        if dataFrom == 'fitness':
            for igen in range(
                    max(ind_gens)):  # read error files from evaluations
                for ican in range(max(ind_cands)):
                    try:
                        f = open('../data/%s/gen_%d_cand_%d_error' %
                                 (folderFinal, igen, ican))
                        eval_fits.append(pickle.load(f))
                        f = open('../data/%s/gen_%d_cand_%d_params' %
                                 (folderFinal, igen, ican))
                        eval_params.append(pickle.load(f))
                        eval_gens.append(igen)
                        eval_cands.append(ican)
                    except:
                        pass
                    #eval_fits.append(0.15)
                    #eval_params.append([])

        # find x corresponding to smallest error from function evaluations
        if dataFrom == 'fitness':
            #fits_sort, fits_sort_indices, fits_sort_origind = unique(eval_fits, True, True)
            fits_sort_indices = sorted(range(len(eval_fits)),
                                       key=lambda k: eval_fits[k])
            fits_sort = [eval_fits[i] for i in fits_sort_indices]
            gens_sort = [eval_gens[i] for i in fits_sort_indices]
            cands_sort = [eval_cands[i] for i in fits_sort_indices]
            params_sort = [eval_params[i] for i in fits_sort_indices]
        # find x corresponding to smallest error from individuals file
        elif dataFrom == 'individuals':
            params_unique, unique_indices = uniqueList(
                ind_cs)  # unique individuals
            fits_unique = [ind_fits[i] for i in unique_indices]
            gens_unique = [ind_gens[i] for i in unique_indices]
            cands_unique = [ind_cands[i] for i in unique_indices]

            sort_indices = sorted(range(len(fits_unique)),
                                  key=lambda k: fits_unique[k])  # sort fits
            fits_sort = [fits_unique[i] for i in sort_indices]
            gens_sort = [gens_unique[i] for i in sort_indices]
            cands_sort = [cands_unique[i] for i in sort_indices]
            params_sort = [params_unique[i] for i in sort_indices]

        # if multiple islands, save data for each
        if islands > 1:
            ind_gens_isl.append(ind_gens)  # individuals data for islands
            ind_cands_isl.append(ind_cands)
            ind_fits_isl.append(ind_fits)
            ind_cs_isl.append(ind_cs)

            stat_gens_isl.append(stat_gens)  # statistics.csv for islands
            stat_worstfits_isl.append(stat_worstfits)
            stat_bestfits_isl.append(stat_bestfits)
            stat_avgfits_isl.append(stat_avgfits)
            stat_stdfits_isl.append(stat_stdfits)

            fits_sort_isl.append(fits_sort)  #sorted data
            gens_sort_isl.append(gens_sort)
            cands_sort_isl.append(cands_sort)
            params_sort_isl.append(params_sort)

    if islands > 1:
        return ind_gens_isl, ind_cands_isl, ind_fits_isl, ind_cs_isl, stat_gens_isl, \
            stat_worstfits_isl, stat_bestfits_isl, stat_avgfits_isl, stat_stdfits_isl, \
            fits_sort_isl, gens_sort_isl, cands_sort_isl, params_sort_isl
Esempio n. 41
0
def test_load_datatype():
    data_types = list(pl.unique(model2.input_data['data_type']))
    assert data_types == [data_type]
Esempio n. 42
0
thin = 10

# set font
book_graphics.set_font()

### @export 'data'
# TODO: migrate data into a csv, load with pandas
dm = dismod3.load_disease_model(15630)
dm.calc_effective_sample_size(dm.data)
some_data = ([d for d in dm.data
              if d['data_type'] == 'prevalence data'
              and d['sex'] == 'male'
              and 15 <= d['age_start'] < 20
              and d['age_end'] == 99
              and d['effective_sample_size'] > 1])
countries = pl.unique([s['region'] for s in some_data])
min_year = min([s['year_start'] for s in some_data])
max_year = max([s['year_end'] for s in some_data])
cy = ['%s-%d'%(s['region'], s['year_start']) for s in some_data]

n = pl.array([s['effective_sample_size'] for s in some_data])
r = pl.array([dm.value_per_1(s) for s in some_data])

s = pl.sqrt(r * (1-r) / n)


### @export 'binomial-model'
pi = mc.Uniform('pi', lower=0, upper=1, value=.5)

@mc.potential
def obs(pi=pi):
Esempio n. 43
0
def fixsyscaltimes(vis,newinterval=2.0):
    """
    Fix TIME,INTERVAL columns in MS SYSCAL subtable
    Input:
     vis          the MS containing the offending SYSCAL subtable
     newinterval  the interval to use in revised entries

     This function is intended to repair MS SYSCAL tables that suffer from
     multiple TIME values (over antennas) per Tsys measurement.  The gencal
     task (mode='tsys' expects all antennas to share the same TIME value
     for each Tsys measurement (and this is usually true).  The function
     finds those measurements that have multiple TIMEs and replaces them
     with a common TIME value which takes the value
     mean(oldTIME-INTERVAL/2)+newinterval/2.
     Usually (always?), oldTIME-INTERVAL/2 is constant over antennas
     and represents the physical timestamp of the Tsys measurment.
     If the function finds no pathological timestamps, it does not
     revise the table.
    """

    import pylab as mypl
    import math as mymath
    myqa=taskinit.qatool()
    mytb=taskinit.tbtool()
    mytb.open(vis+'/SYSCAL',nomodify=False)

    spws=mypl.unique(mytb.getcol("SPECTRAL_WINDOW_ID"))

    for ispw in spws:
        st=mytb.query('SPECTRAL_WINDOW_ID=='+str(ispw),name='byspw')
        times=st.getcol('TIME')
        interval=st.getcol('INTERVAL')
        timestamps=times-interval/2
        t0=86400.0*mymath.floor(timestamps[0]/86400.0)

        utimes=mypl.unique(times-t0)
        nT=len(utimes)
        utimestamps=mypl.unique(mypl.floor(timestamps)-t0)
        nTS=len(utimestamps)
        
        msg='In spw='+str(ispw)+' found '+str(nTS)+' Tsys measurements with '+str(nT)+' TIMEs...'
        if nT==nTS:
            msg+='OK.'
            print msg

        else:
            msg+=' which is too many, so fixing it:'
            print msg 

            for uts in utimestamps:
                mask = ((mypl.floor(timestamps))-t0==uts)
                uTIMEs=mypl.unique(times[mask])
                nTIMEs=len(uTIMEs)
                newtime = mypl.mean(times[mask]-interval[mask]/2) + newinterval/2
                msg='  Found '+str(nTIMEs)+' TIMEs at timestamp='+str(myqa.time(str(newtime-newinterval/2)+'s',form='ymd')[0])
                if nTIMEs>1:
                    msg+=':'
                    print msg
                    print '   TIMEs='+str([myqa.time(str(t)+'s',form='ymd')[0] for t in uTIMEs])+' --> '+str(myqa.time(str(newtime)+'s',form='ymd')[0])+' w/ INTERVAL='+str(newinterval)
                    times[mask]=newtime
                    interval[mask]=newinterval
                    st.putcol('TIME',times)
                    st.putcol('INTERVAL',interval)
                else:
                    msg+='...ok.'
                    print msg
        st.close()
    mytb.close()
Esempio n. 44
0
def evaluate_model(mod,
                   comment='',
                   data_fname='missing_noisy_data.csv',
                   truth_fname='data.csv'):
    """ Run specified model on existing data (data.csv / missing_noisy_data.csv) and save results in dev_log.csv
    Existing models: %s """ % data_run_models
    if mod not in data_run_models.split(' '):
        raise TypeError, 'Unrecognized model "%s"; must be one of %s' % (
            mod, data_run_models)

    import model
    reload(model)

    print 'loading data'
    data = pl.csv2rec(data_fname)
    truth = pl.csv2rec(truth_fname)

    t0 = time.time()
    print 'generating model'
    mod_mc = eval('model.%s(data)' % mod)

    print 'fitting model with mcmc'
    mod_mc.sample(10000, 5000, 50, verbose=1)
    t1 = time.time()

    print 'summarizing results'

    import graphics
    reload(graphics)
    pl.figure(figsize=(22, 17), dpi=300)
    pl.clf()
    graphics.plot_all_predictions_over_time(data,
                                            mod_mc.predicted,
                                            more_data=truth)

    data_stats = mod_mc.data_predicted.stats()
    i_out = [i for i in range(len(data)) if pl.isnan(data.y[i])]
    rmse_abs_out = pl.rms_flat(truth.y[i_out] - data_stats['mean'][i_out])
    rmse_rel_out = 100 * pl.rms_flat(1. - data_stats['mean'][i_out] /
                                     truth.y[i_out])

    i_in = [i for i in range(len(data)) if not pl.isnan(data.y[i])]
    rmse_abs_in = pl.rms_flat(truth.y[i_in] - data_stats['mean'][i_in])
    rmse_rel_in = 100 * pl.rms_flat(1. -
                                    data_stats['mean'][i_in] / truth.y[i_in])

    param_stats = mod_mc.param_predicted.stats()
    coverage = 100 * pl.sum(
        (truth.y[i_out] >= param_stats['95% HPD interval'][i_out, 0]) &
        (truth.y[i_out] <= param_stats['95% HPD interval'][i_out, 1])) / float(
            len(i_out))

    import md5
    data_hash = md5.md5(data).hexdigest()
    results = [
        mod, t1 - t0, rmse_abs_out, rmse_rel_out, rmse_abs_in, rmse_rel_in,
        coverage,
        len(data),
        len(pl.unique(data.region)),
        len(pl.unique(data.country)),
        len(pl.unique(data.year)),
        len(pl.unique(data.age)), data_hash, t0, comment
    ]
    print '%s: time: %.0fs out-of-samp rmse abs=%.1f rel=%.0f in-samp rmse abs=%.1f rel=%.0f coverage=%.0f\ndata: %d rows; %d regions, %d countries %d years %d ages [data hash: %s]\n(run conducted at %f)\n%s' % tuple(
        results)

    pl.savefig('/home/j/Project/Models/space-time-smoothing/images/%s.png' %
               t0)  # FIXME: don't hardcode path for saving images

    import csv
    f = open('dev_log.csv', 'a')
    f_csv = csv.writer(f)
    f_csv.writerow(results)
    f.close()

    return mod_mc
Esempio n. 45
0
def convert_MAPGPS_TEC(ms_name,mad_data_file,ref_time,ref_start,ref_end,plot_vla_tec,im_name):
    """
## =============================================================================
##
## This opens the MAPGPS Data table and selects a subset of TEC/DTEC values
## within a 15 deg square of the VLA. This then plots the zenith TEC/DTEC at the
## VLA site and makes the TEC map for use at the C++ level. We chose to deal
## with the MAPGPS data in this separate fashion because there are large
## 'gaps' in the data where no TEC/DTEC values exist.  Consequently, we use the
## filled in CASA table to produce a TEC map and can not simply
## concatenate arrays.
##
## =============================================================================
##
## Inputs:
##    ms_name         type = string    Name of the measurement set for which to
##                                         acquire TEC/DTEC data
##    mad_data_file   type = string    Name of the MAPGPS TEC/DTEC data table
##    ref_time        type = float     Reference time (s) for setting the 
##                                         coordinates, UT 0 on the first day
##    plot_vla_tec    type = boolean   When True, this will open a plot of the 
##                                         interpolated TEC/DTEC at the VLA.
##    im_name       type = string    Name of the output TEC Map optionally
##                                       specified by the user
##
## Returns:
##    Opens a plot showing the zenith TEC/DTEC at the VLA (if plot_vla_tec=True)
##    and the name of the CASA image file containing the TEC map.
##
## =============================================================================
    """
    ## Only retrieve data in a 15x15 deg. patch centered (more or less) at the VLA
    tb.open(mad_data_file+'.tab')
    st0=tb.query('GDLAT>19 && GDLAT<49 && GLON>-122 && GLON<-92',
    ## If you want ALL the data to make a global map, use the line below:
    #st0=tb.query('GDLAT>-90. && GDLAT<90. && GLON>-180. && GLON<180',
                name='tecwindow')

    utimes=pylab.unique(st0.getcol('UT1_UNIX'))
    ulat=pylab.unique(st0.getcol('GDLAT'))
    ulong=pylab.unique(st0.getcol('GLON'))

    points_lat=len(ulat)
    points_long=len(ulong)
    num_maps=len(utimes)

    ## Initialize the array which will be used to make the image
    tec_array=pylab.zeros((2,points_long,points_lat,num_maps),dtype=float)

    minlat=min(ulat)
    minlong=min(ulong)

    print 'rows',len(utimes)

    itime=0
    for t in utimes:
        st1=st0.query('UT1_UNIX=='+str(t),name='bytime')
        n=st1.nrows()
        if itime%100==0:
            print itime, n
        ilong=st1.getcol('GLON')-minlong
        ilat=st1.getcol('GDLAT')-minlat
        itec=st1.getcol('TEC')
        idtec=st1.getcol('DTEC')
        for i in range(n):
            tec_array[0,int(ilong[i]),int(ilat[i]),itime]=itec[i]
            tec_array[1,int(ilong[i]),int(ilat[i]),itime]=idtec[i]
        st1.close()

        ## Simply interpolate to cull as many zeros as possible
        ## (median of good neighbors, if at least four of them)
        thistec_array=tec_array[:,:,:,itime].copy()
        thisgood=thistec_array[0]>0.0
        for i in range(1,points_long-1):
            for j in range(1,points_lat-1):
                if not thisgood[i,j]:
                    mask=thisgood[(i-1):(i+2),(j-1):(j+2)]
                    if pylab.sum(mask)>4:
                        #print itime, i,j, pylab.sum(mask)
                        tec_array[0,i,j,itime]=pylab.median(thistec_array[0,(i-1):(i+2),(j-1):(j+2)][mask])
                        tec_array[1,i,j,itime]=pylab.median(thistec_array[1,(i-1):(i+2),(j-1):(j+2)][mask])
        itime+=1

    st0.close()
    tb.close()

    ztec_value(-107.6184,34.0790,points_long,points_lat,minlong,minlat,1,\
                1,5,ref_start,ref_end,int(num_maps),tec_array,plot_vla_tec)
    ## ref_time + 150 accounts for the fact that the MAPGPS map starts at 00:02:30 UT, not 00:00:00 UT
    if im_name == '':
        prefix = ms_name
    else:
        prefix = im_name
    CASA_image = make_image(prefix,minlong,minlat,ref_time+150.0,1,1,5*60,tec_array[0],'MAPGPS',appendix = '.MAPGPS_TEC')

    return CASA_image
Esempio n. 46
0
def calantsub(incaltable,outcaltable='',
              spw='',scan='',
              ant='',subant=''):

    """
    Substitute cal solutions by antenna
    Input:
     incaltable  Input caltable
     outcaltable Output caltable (if '', overwrite result on incaltable)
     spw         Spectral Window selection (no channel selection permitted)
     scan        Scan selection
     ant         Antenna (indices) which need replaced solutions
     subant      Antenna (indices) with which to replace those in ant

    This function provides a means to replace solutions by antenna,
    e.g., to substitute one antenna's Tsys spectra with another.

    The processing can be limited to specific spectral windows and/or
    scans.  The spw and scan parameters should be specified in the
    standard MS selection manner (comma-separated integers in a string),
    except no channel selection is supported.

    The ant parameter specifies one or more antenna indices 
    (comma-separated in a string) for which solutions are to be
    replaced.  The subant parameter lists the antenna indices
    from which the substitute solutions are to be obtained. E.g.,
    ant='3,5,7',subant='6,8,10' will cause the solutions from
    antenna id 6 to be copied to antenna id 5, id 8 to id 5 and id 10
    to id 7.  The number of antennas specified in ant and subant
    must match.
    
    """

    import pylab as mypl
    
    # trap insufficient ant subant specifications
    if len(ant)==0 or len(subant)==0:
        raise Exception, "Must specify at least one ant and subant."

    antlist=ant.split(',')
    sublist=subant.split(',')

    # trap dumb cases
    nant=len(antlist)
    nsub=len(sublist)
    if nant!=nsub:
        raise Exception, "Must specify equal number of ant and subant."

    # local tb tool
    mytb=taskinit.tbtool()

    # parse selection
    selstr=''
    if len(spw)>0:
        selstr+='SPECTRAL_WINDOW_ID IN ['+spw+']'
        if len(scan)>0:
            selstr+=' && '
    if len(scan)>0:
        selstr+='SCAN_NUMBER IN ['+scan+']'
        print "selstr = '"+selstr+"'"

    # verify selection (if any) selects non-zero rows
    if len(selstr)>0:
        mytb.open(incaltable)
        st=mytb.query(query=selstr)
        nselrows=st.nrows()
        st.close()
        mytb.close()
        if nselrows==0:
            raise Exception, 'Error: scan and/or spw selection selects no rows!'

    # manage the output table
    if outcaltable=='':
        outcaltable=incaltable
        print "No outcaltable specified; will overwrite incaltable."
    if outcaltable!=incaltable:
        os.system('cp -r '+incaltable+' '+outcaltable)

    # open the output table for adjustment
    mytb.open(outcaltable,nomodify=False)

    stsel=mytb
    if len(selstr)>0:
        stsel=mytb.query(query=selstr,name='selected')

    # cols to substitute:
    collist=['TIME','INTERVAL','PARAMERR','SNR','FLAG']
    cols=mytb.colnames()
    if cols.count('CPARAM')>0:
        collist.append('CPARAM')
    else:
        collist.append('FPARAM')

    # scan list
    scans=mypl.unique(stsel.getcol('SCAN_NUMBER'))

    print 'Found scans = ',scans

    # do one scan at a time
    for scan in scans:
        st1=stsel.query(query='SCAN_NUMBER=='+str(scan),name='byscan')
        spws=mypl.unique(st1.getcol('SPECTRAL_WINDOW_ID'))

        print 'Scan '+str(scan)+' has spws='+str(spws)

        # do one spw at a time
        for ispw in spws:
            st2=st1.query(query='SPECTRAL_WINDOW_ID=='+str(ispw),name='byspw');

            for ia in range(nant):
                stsub=st2.query(query='ANTENNA1=='+sublist[ia],
                                name='subant')
                stant=st2.query(query='ANTENNA1=='+antlist[ia],
                                name='ant')


                # go to next ant if nothing to do
                if stant.nrows()<1:
                    continue

                print ' scan='+str(scan)+' spw='+str(ispw)+' ants: '+str(sublist[ia])+'->'+str(antlist[ia])

                # trap (unlikely?) pathological case
                if stsub.nrows()!=stant.nrows():
                    raise Exception, "In spw "+str(ispw)+" antenna ids "+str(antlist[ia])+" and "+str(sublist[ia])+" have a different number of solutions."

                # substitute values 
                for col in collist:
                    stant.putcol(col,stsub.getcol(col))

                stsub.close()
                stant.close()
            st2.close()
        st1.close()
    stsel.close()
    mytb.close()
Esempio n. 47
0
for counter in counters:

  fieldnames=['time']
  fieldnames.extend(authornames)
  fid=open('../gitstats/' + counter.replace(' ','_') + '_by_author.dat','rb')
  reader=csv.DictReader(fid,fieldnames=fieldnames,delimiter=' ')
  fields=[]
  for row in reader:
    fields.append(map(numpy.int,row.values()))
  fid.close()
  fields=numpy.array(fields)
  fieldnames=row.keys()

  authorfields={}
  for author in pylab.unique(authoralias.values()):
    #print author
    af=[]
    for i in range(1,numpy.size(fieldnames)):
      if authoralias.has_key(fieldnames[i]):
        if authoralias[fieldnames[i]]==author:
          af.append(i)
    authorfields[author]=af

  institutefields={}
  for institute in pylab.unique(authorinstitute.values()):
    #print institute
    af=[]
    for i in range(0,numpy.size(fieldnames)):
      if fieldnames[i]=='time':
        continue
Esempio n. 48
0
import pylab as pl

from scipy.spatial import ConvexHull

# On the 2-Sphere, the Voronoi tesselation is equivalent to the convex hull projected on the sphere
# (Sugihara, Journal for Geometry and Graphics Volume 6 (2002), No. 1, 69-81.)
# I assume that the same is true in 4D.... [This has to be checked!]

R = 1.6180339887498949  #magic number by straley for 120 particles

import sys
polar = pl.load(sys.argv[1])
from spheretools import *
cartesian = convert(polar, R)

CHull = ConvexHull(cartesian)

with open("bonds.txt", 'w') as fw:
    for p in range(cartesian.shape[0]):
        # print p
        which_simplex, position = pl.where(CHull.simplices == p)
        # print which_simplex
        all_neighs = pl.unique(CHull.simplices[which_simplex].flatten())
        # print "all_neighs",all_neighs
        index_of_p = pl.where(all_neighs == p)
        # print "p is at",index_of_p
        neighs = pl.delete(all_neighs, index_of_p)
        # print"neighs after ", neighs
        fw.write(str(len(neighs)) + " " + " ".join(map(str, neighs)) + "\n")
Esempio n. 49
0
def interactive_initial_guess(comp_im, trace_im, comp_names):

    comp = pf.getdata(comp_im)
    trc = pf.getdata(trace_im)

    # reading in reference spectrum
    ref_lam = np.loadtxt('/Users/tomczak/pydir/vp_art/data/spectrum_'+comp_names[0]+'.dat')[:,0]
    ref_flux = np.zeros(len(ref_lam))
    for comp0 in comp_names:
        compspecdat = np.loadtxt('/Users/tomczak/pydir/vp_art/data/spectrum_'+comp0+'.dat')
        ref_flux += compspecdat[:,1]
    ref_spec = np.array(zip(ref_lam, ref_flux))


    # reading in reference spectrum's line list
    lines = []
    for comp0 in comp_names:
        complinesdat = np.loadtxt('/Users/tomczak/pydir/vp_art/data/lines_'+comp0+'.dat')[:,0]
        lines += complinesdat.tolist()
    lines.sort()



    # extracting spectrum for the first fiber
    fibnums = pl.unique(trc)
    fibnums.sort()
    fibnums = fibnums[pl.find(fibnums > 0)]

    first_fiber_inds = pl.where(trc == fibnums[0])
    first_fiber_spec = pl.zeros(max(first_fiber_inds[1]) + 1)
    for y,x in zip(first_fiber_inds[0], first_fiber_inds[1]):
        first_fiber_spec[x] += comp[y][x]

    xaxis = range(max(first_fiber_inds[1])+1)

    class compspec:

        def __init__(self, xaxis, flux, ref_spec, ref_lines):
            self.xaxis, self.flux = xaxis, flux
            self.ref_spec = ref_spec
            self.ref_lines = ref_lines
            self.soln_data = []
            self.counter = 0

            self.fig = pl.figure(figsize=(15, 7))
            self.sp1 = self.fig.add_subplot(211)
            self.sp2 = self.fig.add_subplot(212)
            self.zooms = []

            self.sp1.plot(self.xaxis, self.flux/max(self.flux))
            self.sp1.set_ylim(-0.03, 1.2)
            self.sp1.set_title('ArcLamp Spectrum: x = identify line,  i = skip line,  z/a = zoom in/out,  r = reset')
            self.sp1.set_xlabel('Pixel')

            self.sp2.plot(self.ref_spec[:,0], self.ref_spec[:,1]/max(self.ref_spec[:,1]))
            self.sp2.set_ylim(-0.03, 1.2)
            x0, x1, y0, y1 = self.sp2.axis()
            self.ytext = y0 + (y1-y0)*0.85
            self.show_lines = [[self.sp2.axvline(self.ref_lines[0], color='r', lw=1, ls='--'),
                                self.sp2.annotate(str(int(self.ref_lines[0]+0.5)),
                                                  (self.ref_lines[0], self.ytext),
                                                  rotation='vertical', size=11)]]
            self.sp2.set_title('Reference Spectrum')
            self.sp2.set_xlabel('Wavelength (Angstroms)')

            self.fig.subplots_adjust(hspace=0.6, right=0.95)
            self.fig.canvas.mpl_connect('key_press_event', self.key)
            pl.show()

        def key(self, event):
            '''
            #  x = identify line
            #  i = skip to next line
            #  z = zoom in
            #  a = zoom out
            #  r = reset
            '''
            k = event.key
            x = event.xdata
            y = event.ydata
            sp = event.inaxes

            if k == 'x' and sp.get_subplotspec() == self.sp1.get_subplotspec():
                self.sp1.axvline(x, color='r', lw=1)
                self.show_lines[self.counter][0].set_ls('-')
                self.counter += 1
                if self.counter == len(self.ref_lines):
                    self.sp1.text(0.35, 0.35, "\n  Complete: close  \n     this window \n",
                                  transform=self.sp1.transAxes, size=22, bbox=dict(fc='w'))
                    self.sp2.text(0.35, 0.35, "\n  Complete: close  \n     this window \n",
                                  transform=self.sp2.transAxes, size=22, bbox=dict(fc='w'))
                    pl.savetxt('initial_lambda_soln.dat', self.soln_data)
                else:
                    self.soln_data.append([self.ref_lines[self.counter], x])
                    self.show_lines.append([self.sp2.axvline(self.ref_lines[self.counter],
                                                             color='r', lw=1, ls='--'),
                                            self.sp2.annotate(str(int(self.ref_lines[self.counter]+0.5)),
                                                              (self.ref_lines[self.counter], self.ytext),
                                                              rotation='vertical', size=11)])

            if k == 'i':
                self.show_lines[self.counter][0].set_lw(0)
                self.show_lines[self.counter][1].set_visible(False)
                self.counter += 1
                if self.counter == len(self.ref_lines):
                    self.sp1.text(0.35, 0.35, "\n  Complete: close  \n     this window \n",
                                  transform=self.sp1.transAxes, size=22, bbox=dict(fc='w'))
                    self.sp2.text(0.35, 0.35, "\n  Complete: close  \n     this window \n",
                                  transform=self.sp2.transAxes, size=22, bbox=dict(fc='w'))
                    pl.savetxt('initial_lambda_soln.dat', self.soln_data)
                else:
                    self.show_lines.append([self.sp2.axvline(self.ref_lines[self.counter],
                                                             color='r', lw=1, ls='--'),
                                            self.sp2.annotate(str(int(self.ref_lines[self.counter]+0.5)),
                                                              (self.ref_lines[self.counter], self.ytext),
                                                              rotation='vertical', size=11)])

            if k == 'z':
                self.zooms.append([self.sp1.axis()[:2], self.sp2.axis()[:2]])
                axis = sp.axis()
                dx = (axis[1] - axis[0])/10.
                sp.set_xlim(x-dx, x+dx)

            if k == 'a':
                if len(self.zooms):
                    self.sp1.set_xlim(self.zooms[-1][0])
                    self.sp2.set_xlim(self.zooms[-1][1])
                    self.zooms.pop(-1)

            if k == 'r':
                self.fig.clf()
                self.soln_data = []
                self.counter = 0

                self.sp1 = self.fig.add_subplot(211)
                self.sp2 = self.fig.add_subplot(212)
                self.zooms = []

                self.sp1.plot(self.xaxis, self.flux/max(self.flux))
                self.sp1.set_ylim(-0.03, 1.2)
                self.sp1.set_title('ArcLamp Spectrum: x = identify line,  i = skip line,  z/a = zoom in/out,  r = reset')
                self.sp1.set_xlabel('Pixel')

                self.sp2.plot(self.ref_spec[:,0], self.ref_spec[:,1]/max(self.ref_spec[:,1]))
                self.sp2.set_ylim(-0.03, 1.2)
                x0, x1, y0, y1 = self.sp2.axis()
                self.ytext = y0 + (y1-y0)*0.85
                self.show_lines = [[self.sp2.axvline(self.ref_lines[0], color='r', lw=1, ls='--'),
                                    self.sp2.annotate(str(int(self.ref_lines[0]+0.5)),
                                                      (self.ref_lines[0], self.ytext),
                                                      rotation='vertical', size=11)]]
                self.sp2.set_title('Reference Spectrum')
                self.sp2.set_xlabel('Wavelength (Angstroms)')

            pl.draw()

    interactive_go = compspec(xaxis, first_fiber_spec, ref_spec, lines)
Esempio n. 50
0
)

Y = pandas.read_csv(
    "/home/j/Project/dismod/dismod_status/prod/dm-19807/posterior/dm-19807-prevalence-north_africa_middle_east-male-2005.csv",
    index_col=None,
)

import pylab as pl


def weighted_age(df):
    return (df.filter(like="Draw").T * df["Population"] / df["Population"].sum()).T.sum()


pl.figure()
for iso in list(pl.unique(X["Iso3"])):
    pl.plot(X[X["Iso3"] == iso].filter(like="Draw").mean(1).__array__(), label=iso)
pl.semilogy([1], [1])

Z = X.groupby("Age").apply(weighted_age)
plot(Z.mean(1).__array__(), color="red", linewidth=3, alpha=0.5, label="Inconsistent NA/ME")

pl.legend()
pl.axis([-5, 130, 1e-6, 2])


pl.figure()
for iso in list(pl.unique(Y["Iso3"])):
    pl.plot(Y[(Y["Iso3"] == iso) & (Y["Rate type"] == "prevalence")].filter(like="Draw").mean(1).__array__(), label=iso)

pl.semilogy([1], [1])
Esempio n. 51
0
    print SITE[i]
    url='http://gisweb.wh.whoi.edu:8080/dods/whoi/emolt_sensor?emolt_sensor.TIME_LOCAL&emolt_sensor.SITE='
    dataset=open_url(url+'"'+SITE[i]+'"')
    var=dataset['emolt_sensor']
    print 'hold on  ... extracting your eMOLT mooring data'
    year_month_day = list(var.TIME_LOCAL)
    timelocal=[]
    for j in range(len(year_month_day)):
         timelocal.append(datetime.strptime(year_month_day[j],"%Y-%m-%d"))  
    index = range(len(timelocal))
    index.sort(lambda x, y:cmp(timelocal[x], timelocal[y]))
    timelocal = [timelocal[ii] for ii in index]
    print 'now generating a datetime'
    timepd=pd.DataFrame(range(len(timelocal)),index=timelocal)
    timepd['Year']=timepd.index.year
    year=unique(timepd['Year'])
    monthall=[]
    if len(year)>=minyear:
        for k in range(len(year)):
            timemonth=timepd.ix[timepd.index.year==year[k]]
            timemonth=timemonth.resample('m',how=['count'],kind='period')
            timemonth=timemonth.ix[timemonth[0,'count']>minhour*minday]
            month=unique(timemonth.index.month)
            print year[k],month
#            f.write(str(SITE[i])+','+str(year[k])+','+str(month)+'\n')
            monthall.append(month)
        common=[]
        for jj in range(1,13):
                num=0
                for kk in range(len(monthall)):
                    if jj in monthall[kk]:
Esempio n. 52
0
          temp = [temp[i] for i in index]
          depth = [depth[i] for i in index]
          salt=[salt[i] for i in index]

        print 'Delimiting mooring data according to user-specified time'
        part_t,part_time,part_salt,distinct_dep= [],[],[],[]
        start_time = input_time[0]
        end_time = input_time[1]
        print start_time, end_time
        for i in range(len(temp)):
            if (start_time <= datet[i] <= end_time) & (dep[0]<=depth[i]<= dep[1]):
                part_t.append(temp[i])
                part_time.append(datet[i])
                part_salt.append(salt[i])
            
        distinct_dep.append(unique(depth))
        obs_temp=part_t
        obs_dt=part_time
        obs_salt=part_salt

        obs_dtindex=[]
        if intend_to=='temp':
            for kk in range(len(obs_temp)):
                obs_temp[kk]=f2c(obs_temp[kk]) # converts to Celcius
                obs_dtindex.append(datetime.strptime(str(obs_dt[kk])[:19],'%Y-%m-%d %H:%M:%S'))
            obstso=pd.DataFrame(obs_temp,index=obs_dtindex)
        else:
            for kk in range(len(obs_salt)):
                obs_dtindex.append(datetime.strptime(str(obs_dt[kk])[:19],'%Y-%m-%d %H:%M:%S'))
            obstso=pd.DataFrame(obs_salt,index=obs_dtindex)
        print 'obs Dataframe is ready'
Esempio n. 53
0
def plot_fits_pdf(disease, prior, year, param_type_list, filename=''):
    '''Plot country fits'''
    dir = '/home/j/Project/dismod/dismod_status/prod/'
    mortality = pandas.read_csv('/homes/peterhm/gbd/dmco_mortality.csv')

    world = load_new_model(disease)

    # create list of countries to report
    country_list = pandas.read_csv(
        '/snfs1/DATA/IHME_COUNTRY_CODES/IHME_COUNTRYCODES.CSV', index_col=None)
    country_list = country_list[country_list.ix[:, 'ihme_indic_country'] == 1]
    country_list = list(pl.unique(country_list['iso3']))
    country_list.remove('BMU')
    country_list.remove('HKG')
    country_list.remove('MAC')
    country_list.remove('PRI')

    # create list of countries order by number of data points, then alphabetical
    country_ordered = []
    for country in country_list:
        country_ordered.append(
            (country,
             len(world.input_data[world.input_data['area'] == country]),
             len(world.get_data('p')[world.get_data('p')['area'] == country])))

    dtype = [('ISO3', 'S10'), ('pts', int), ('p', int)]
    country_ordered = pl.array(country_ordered, dtype=dtype)
    country_ordered = list(pl.sort(country_ordered, order=['pts', 'p',
                                                           'ISO3']))
    country_ordered.reverse()

    pp = PdfPages(dir + '/dm-%s/image/%s_w_prior_%s_%s.pdf' %
                  (disease, prior, year, filename))
    for c, country in enumerate(country_ordered):
        country = country[0]
        pl.figure(c, figsize=(len(param_type_list) * 4, 8))
        for s, sex in enumerate(['male', 'female']):
            model = load_new_model(disease, country, sex)
            model.keep(start_year=year - 2)
            model.keep(end_year=year + 2)
            add_data(model, mortality, country, sex, year)
            for j, data_type in enumerate(param_type_list):
                pl.subplot(2, len(param_type_list),
                           (j + 1) + (s * len(param_type_list)))
                if (data_type == 'm_with') | (data_type == 'm_all'):
                    dismod3.graphics.plot_data_bars(model.get_data('m_all'),
                                                    color='grey',
                                                    label='m_all')
                    # get estimates
                else:  #(data_type != 'm_with') | (data_type != 'm_all'):
                    est = pandas.read_csv(
                        dir + 'dm-%s/posterior/dm-%s-%s-%s-%s-%s.csv' %
                        (disease, disease, full_name[data_type], country, sex,
                         year),
                        index_col=None)
                    est = est.filter(like='Draw')
                    gbd_est = get_emp(prior, data_type, country, sex, year)
                    find_fnrfx(model, prior, data_type, country, sex, year)

                    ymax = 0.
                    if max(est.mean(1)) > ymax: ymax = max(est.mean(1))
                    if max(gbd_est.mean(1)) > ymax: ymax = max(gbd_est.mean(1))

                    # plotting
                    df = model.input_data
                    if sex == 'male':  #shift all so male is zero
                        map_func = {'male': 0, 'total': -.5, 'female': -1}
                    if sex == 'female':  #shift all so female is zero
                        map_func = {'male': 1, 'total': .5, 'female': 0}

                    model.get_data(data_type)['value'] = model.get_data(
                        data_type)['value'] * pl.exp(
                            -model.parameters[data_type]['fixed_effects']
                            ['x_sex']['mu'] * df[df['data_type'] == data_type]
                            ['sex'].map(map_func).mean())
                    dismod3.graphics.plot_data_bars(
                        df[df['data_type'] == data_type])
                    pl.plot(pl.array(est.mean(1)), 'k-', label='DM-CO')
                    pl.plot(pl.array(gbd_est.mean(1)), 'r-', label='GBD2010')
                    pl.plot(mc.utils.hpd(pl.array(gbd_est).T, .05), 'r:')
                    pl.plot(mc.utils.hpd(pl.array(est).T, .05), 'k:')
                    pl.axis([-5, 105, -ymax * .05, ymax * 1.1])
                pl.title(country + ' ' + data_type + ' ' + sex + ' ' +
                         str(year))
                if sex == 'male': pl.legend(loc=(.25, 1.145))
        pl.subplots_adjust(top=.83, bottom=.07)

        pp.savefig(c)
        pl.clf()
    pp.close()
Esempio n. 54
0
#  Calculate the time range, if not given.
delta_t = delta_t * CPU_CLOCK
if delta_t == 0:
    dt = toc_step - tic_step
    if dt > delta_t:
        delta_t = dt
    print "Data range: ", delta_t / CPU_CLOCK, "ms"

#  Once more doing the real gather and plots this time.
start_t = float(tic_step)
tics -= tic_step
tocs -= tic_step
end_t = (toc_step - start_t) / CPU_CLOCK

#  Get all "task" names and assign colours.
TASKTYPES = pl.unique(funcs)
print TASKTYPES

#  Set colours of task/subtype.
TASKCOLOURS = {}
ncolours = 0
for task in TASKTYPES:
    TASKCOLOURS[task] = colours[ncolours]
    ncolours = (ncolours + 1) % maxcolours

#  For fiddling with colours...
if args.verbose:
    print "#Selected colours:"
    for task in sorted(TASKCOLOURS.keys()):
        print "# " + task + ": " + TASKCOLOURS[task]
    for task in sorted(SUBCOLOURS.keys()):