Exemple #1
0
    def __init__(self, depth=9):
        self.depth = depth

        depth = 9
        train = (3, 32, 32)

        nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)]
        strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])]

        # Now construct the network
        layers = [Conv(**self.conv_params(3, 16))]
        layers.append(self.module_s1(nfms[0], True))

        for nfm, stride in zip(nfms[1:], strides):
            res_module = self.module_s1(nfm) if stride == 1 else self.module_s2(nfm)
            layers.append(res_module)
        layers.append(BatchNorm())
        layers.append(Activation(Rectlin()))
        layers.append(Pooling('all', op='avg'))
        layers.append(Affine(10, init=Kaiming(local=False),
                             batch_norm=True, activation=Softmax()))
        self.layers = layers
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyMulti())
        model.initialize(train, cost=cost)
        self.model = model
def get_burst_photons(d, ich=0, ph_sel=Ph_sel('all')):
    """Return a list of arrays of photon timestamps in each burst.

    Arguments:
        d (Data): Data() object
        ich (int): channel index
        ph_sel (Ph_sel): photon selection. It allows to select timestamps
            from a specific photon selection. Example ph_sel=Ph_sel(Dex='Dem').
            See :mod:`fretbursts.ph_sel` for details.

    Returns:
        A list of arrays of photon timestamps (one array per burst).
    """
    bursts = d.mburst[ich]
    i_start, i_end = bursts.istart, bursts.istop

    ph_times = d.get_ph_times(ich)
    burst_slices = [slice(i1, i2 + 1) for i1, i2 in zip(i_start, i_end)]
    burst_photons = [ph_times[slice_i] for slice_i in burst_slices]

    if ph_sel != Ph_sel('all'):
        ph_times_mask = d.get_ph_mask(ich, ph_sel=ph_sel)
        photon_masks = [ph_times_mask[slice_i] for slice_i in burst_slices]
        burst_photons = [ph[mask] for ph, mask in zip(burst_photons,
                                                      photon_masks)]
    return burst_photons
def expr_as_list():



    iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))

    # multiple rows and columns
    res = 2 - iris
    res = h2o.as_list(res, use_pandas=False)
    res = list(zip(*res))
    assert abs(float(res[0][4]) - -2.6) < 1e-10 and abs(float(res[1][5]) - -1.6) < 1e-10 and \
           abs(float(res[2][11]) - 0.5) < 1e-10, "incorrect values"

    # single column
    res = 2 - iris
    res = h2o.as_list(res[0], use_pandas=False)
    res = list(zip(*res))
    assert abs(float(res[0][4]) - -2.6) < 1e-10 and abs(float(res[0][18]) - -3.1) < 1e-10 and \
           abs(float(res[0][25]) - -2.8) < 1e-10, "incorrect values"

    # local data
    frm = h2o.as_list(h2o.H2OFrame([[1,2,3]]), use_pandas=False)
    assert float(frm[1][2]) == 3, "incorrect values"

    frm = h2o.as_list(h2o.H2OFrame([[1,2,3], [4,5,6]]), use_pandas=False)
    assert float(frm[2][1]) == 5, "incorrect values"
Exemple #4
0
def represent_table(table, indent=0, cell_wrap=str):
    """
    Render a table

    cell_wrap is a method to wrap the cell values in
    """

    if not table:
        return ''

    # calculate the width of each column
    table = [[str(cell).replace('|', r'\|')
              for cell in row]
             for row in table]

    lengths = [
        max(
            get_terminal_width(cell)
            for cell in column
        )
        for column in zip(*table)  # transpose
    ]

    return '\n'.join(
        ' ' * indent +
        '| %s |' % ' | '.join(cell_wrap(ljust(cell, length))
                              for cell, length in zip(row, lengths))
        for row in table
    )
Exemple #5
0
 def bootstrap(self):
     """
     Return a new Alignment that is a bootstrap replicate of self
     """
     new_sites = sorted(sample_wr(self.get_sites()))
     seqs = list(zip(self.get_names(), (''.join(seq) for seq in zip(*new_sites))))
     return self.__class__(seqs)
Exemple #6
0
def test_concat_l1_l1(backend_default, allrand_args):
    # test two linear layers that are merged with concat
    dtypeu = np.float32
    w_rng, rngmax = allrand_args
    # Diff size inputs and outputs
    nins = [128, 1024]
    nouts = [64, 2048]
    batch_size = 16
    NervanaObject.be.bsz = batch_size
    be = NervanaObject.be

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    layers = [Sequential(Affine(nout=nout, init=init_unif)) for nout in nouts]
    inputs = [be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins]
    merge = MergeMultistream(layers, merge="stack")
    assert(len(inputs) == len(layers))
    merge.configure(inputs)
    merge.allocate()
    merge.set_deltas(None)
    out = merge.fprop(inputs).get()

    sublayers = [s.layers[0] for s in layers]
    weights = [layer.W.get() for layer in sublayers]
    out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)])

    assert np.allclose(out, out_exp, atol=1e-3)

    err_lst = [dtypeu(np.random.random((nout, batch_size))) for nout in nouts]
    err_concat = np.concatenate(err_lst)
    merge.bprop(be.array(err_concat))
    dW_exp_lst = [np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs)]

    for layer, dW_exp in zip(sublayers, dW_exp_lst):
        assert np.allclose(layer.dW.get(), dW_exp)
    return
    def test_call(self):
        new_toy_snap = self.toy_modifier(self.toy_snapshot)
        assert_array_almost_equal(new_toy_snap.coordinates,
                                  self.toy_snapshot.coordinates)
        new_vel = new_toy_snap.velocities
        old_vel = self.toy_snapshot.velocities
        same_vel = [np.allclose(new_vel[i], old_vel[i]) 
                    for i in range(len(new_vel))]
        assert_equal(Counter(same_vel), Counter({True: 2, False: 1}))
        for new_v, old_v in zip(new_vel, old_vel):
            assert_almost_equal(sum([v**2 for v in new_v]),
                                sum([v**2 for v in old_v]))

        new_omm_snap = self.openmm_modifier(self.openmm_snap)
        n_atoms = len(self.openmm_snap.coordinates)
        assert_array_almost_equal(new_omm_snap.coordinates,
                                  self.openmm_snap.coordinates)
        new_vel = new_omm_snap.velocities
        old_vel = self.openmm_snap.velocities
        same_vel = [np.allclose(new_vel[i], old_vel[i]) 
                    for i in range(len(new_vel))]
        same_vel = [np.allclose(new_vel[i], old_vel[i]) 
                    for i in range(len(new_vel))]
        assert_equal(Counter(same_vel), Counter({True: n_atoms-1, False: 1}))
        u_vel_sq = (old_div(u.nanometers, u.picoseconds))**2
        for new_v, old_v in zip(new_vel, old_vel):
            assert_almost_equal(
                sum([(v**2).value_in_unit(u_vel_sq) for v in new_v]),
                sum([(v**2).value_in_unit(u_vel_sq) for v in old_v])
            )
Exemple #8
0
    def permuted_copy(self, partition=None):
        """ Return a copy of the collection with all alignment columns permuted
        """
        def take(n, iterable):
            return [next(iterable) for _ in range(n)]

        if partition is None:
            partition = Partition([1] * len(self))

        index_tuples = partition.get_membership()

        alignments = []
        for ix in index_tuples:
            concat = Concatenation(self, ix)
            sites = concat.alignment.get_sites()
            random.shuffle(sites)
            d = dict(zip(concat.alignment.get_names(), [iter(x) for x in zip(*sites)]))
            new_seqs = [[(k, ''.join(take(l, d[k]))) for k in d] for l in concat.lengths]

            for seqs, datatype, name in zip(new_seqs, concat.datatypes, concat.names):
                alignment = Alignment(seqs, datatype)
                alignment.name = name
                alignments.append(alignment)

        return self.__class__(records=sorted(alignments, key=lambda x: SORT_KEY(x.name)))
Exemple #9
0
def find_nearest_mcat(band, skypos, radius, maglimit=30.):
    """
    Given a sky position and a search radius, find the nearest MCAT source
        and return its position and magnitude in specified band.

    :param band: The band to use, either 'FUV' or 'NUV'.

    :type band: str

    :param skypos: Two element array of RA and Dec in decimal degrees.

    :type skypos: array

    :param radius: Search radius in decimal degrees.

    :type radius: float

    :param maglimit: The NUV faint limit to return MCAT sources for.

    :type maglimit: float
    """

    data = get_mags(band, skypos[0], skypos[1], radius, 30)
    if not data:
        return None

    separation = [angularSeparation(skypos[0], skypos[1], a[0], a[1])
                  for a in zip(data['ra'], data['dec'])]
    minsep = np.where(separation == min(separation))

    return {'mag':data[band]['mag'][minsep][0],
            'skypos':np.array(list(zip(data['ra'],
                                       data['dec'])))[minsep][0].tolist(),
            'distance':min(separation)}
Exemple #10
0
def _fill_gaps(frame_iter1, frame_iter2):
    """Fill missing rows in the corrected images with data from nearby times.

    Parameters
    ----------
    frame_iter1 : iterator of list of array
        The corrected frames (one list entry per channel).
    frame_iter2 : iterator of list of array
        The corrected frames (one list entry per channel).

    Yields
    ------
    list of array
        The corrected and filled frames.
    """
    first_obs = next(frame_iter1)
    for frame in frame_iter1:
        for frame_chan, fobs_chan in zip(frame, first_obs):
            fobs_chan[np.isnan(fobs_chan)] = frame_chan[np.isnan(fobs_chan)]
        if all(np.all(np.isfinite(chan)) for chan in first_obs):
            break
    most_recent = [x * np.nan for x in first_obs]
    for frame in frame_iter2:
        for fr_chan, mr_chan in zip(frame, most_recent):
            mr_chan[np.isfinite(fr_chan)] = fr_chan[np.isfinite(fr_chan)]
        yield [np.nan_to_num(mr_ch) + np.isnan(mr_ch) * fo_ch
               for mr_ch, fo_ch in zip(most_recent, first_obs)]
Exemple #11
0
 def _select_best_compound(cls, xs):
     """
     Selects the "best" combination of units based on the number of units
     in the compound, then the ones with the smallest number of SI units,
     then the ones with the lowest indices in the basis list
     """
     # Convert xs to numpy arrays
     xs = [numpy.asarray(list(x), dtype='int') for x in xs]
     # Find the number of units in each of the compounds
     lengths = [sum(abs(x)) for x in xs]
     min_length = min(lengths)
     min_length_xs = [x for x, l in zip(xs, lengths) if l == min_length]
     # If there are multiple compounds of equal length pick the compound
     # with the smallest number of base units
     if len(min_length_xs) == 1:
         min_x = min_length_xs[0]
     else:
         si_length_sums = [abs(x).dot(cls.si_lengths)
                           for x in min_length_xs]
         min_si_length_sum = min(si_length_sums)
         min_si_length_sums = [x for x, l in zip(min_length_xs,
                                                 si_length_sums)
                               if l == min_si_length_sum]
         if len(min_si_length_sums) == 1:
             min_x = min_si_length_sums[0]
         else:
             index_sums = [nonzero(x)[0].sum() for x in min_si_length_sums]
             min_x = min_si_length_sums[argmin(index_sums)]
     return min_x
Exemple #12
0
def print_label_on_image(frame, top_labels):
    labels = [(label_index[index], "{0:.2f}".format(prob)) for (index, prob) in top_labels]

    font = cv2.FONT_HERSHEY_COMPLEX_SMALL
    rect_color = (0, 0, 0)
    text_color = (255, 255, 255)
    font_scale = 0.45
    thickness = 1
    start_pt = (10, 10)
    extra_space = (4, 10)

    label_offset = 0
    label_num = 0
    for label, prob in labels:
        if label_num > 0:
            font_scale = .3
        rect_pt = (start_pt[0], start_pt[1] + label_offset)
        text_size = cv2.getTextSize(label, font, font_scale, thickness)[0]
        prob_size = cv2.getTextSize(prob, font, font_scale, thickness)[0]
        prob_offset = (prob_size[0] + extra_space[0], 0)
        text_top = tuple(map(sum, list(zip(rect_pt, extra_space))))
        rect_ops_pt = tuple(map(sum, list(zip(text_top, text_size, extra_space, prob_offset))))
        text_bot = (text_top[0], rect_ops_pt[1] - extra_space[1])
        prob_bot = (text_top[0] + text_size[0] + extra_space[0], text_bot[1])
        cv2.rectangle(frame, rect_pt, rect_ops_pt, rect_color, thickness=cv2.cv.CV_FILLED)
        cv2.putText(frame, label, text_bot, font, font_scale, text_color, thickness)
        cv2.putText(frame, prob, prob_bot, font, font_scale, text_color, thickness)
        label_offset += rect_ops_pt[1] - rect_pt[1]
        label_num += 1

    return frame
    def testCartesianFromSpherical(self):
        nsamples = 10
        theta = self.rng.random_sample(nsamples) * np.pi - 0.5 * np.pi
        phi = self.rng.random_sample(nsamples) * 2.0 * np.pi

        points = []
        for ix in range(nsamples):
            vv = [np.cos(theta[ix]) * np.cos(phi[ix]),
                  np.cos(theta[ix]) * np.sin(phi[ix]),
                  np.sin(theta[ix])]

            points.append(vv)

        points = np.array(points)
        lon, lat = utils.sphericalFromCartesian(points)
        outPoints = utils.cartesianFromSpherical(lon, lat)

        for pp, oo in zip(points, outPoints):
            np.testing.assert_array_almost_equal(pp, oo, decimal=6)

        # test passing in arguments as floats
        for ix, (ll, bb) in enumerate(zip(lon, lat)):
            xyz = utils.cartesianFromSpherical(ll, bb)
            self.assertIsInstance(xyz[0], np.float)
            self.assertIsInstance(xyz[1], np.float)
            self.assertIsInstance(xyz[2], np.float)
            self.assertAlmostEqual(xyz[0], outPoints[ix][0], 12)
            self.assertAlmostEqual(xyz[1], outPoints[ix][1], 12)
            self.assertAlmostEqual(xyz[2], outPoints[ix][2], 12)

        # test _xyz_from_ra_dec <-> testCartesianFromSpherical
        np.testing.assert_array_equal(utils.cartesianFromSpherical(lon, lat),
                                      utils._xyz_from_ra_dec(lon, lat).transpose())
def assert_not_equal_array_array(list_a, list_b):
    exist_diff = False
    for (alpha, beta) in zip(list_a, list_b):
        for (elem_a, elem_b) in zip(alpha, beta):
            if elem_a != elem_b:
                exist_diff = True
    return exist_diff
    def _weighting(self, interpPoints, values):
        """
        interpPoints is a numpy array where interpolation is desired
        values are the model values.
        """
        result = np.zeros((interpPoints.size, np.size(values[0])), dtype=float)

        inRange = np.where((interpPoints['airmass'] <= np.max(self.dimDict['airmass'])) &
                           (interpPoints['airmass'] >= np.min(self.dimDict['airmass'])))
        usePoints = interpPoints[inRange]
        # Find the neighboring healpixels
        hpids, hweights = get_neighbours(self.nside, np.pi/2.-usePoints['altEclip'],
                                         usePoints['azEclipRelSun'])

        badhp = np.in1d(hpids.ravel(), self.dimDict['hpid'], invert=True).reshape(hpids.shape)
        hweights[badhp] = 0.

        norm = np.sum(hweights, axis=0)
        good = np.where(norm != 0.)[0]
        hweights[:, good] = hweights[:, good]/norm[good]

        amRightIndex, amLeftIndex, amRightW, amLeftW = self.indxAndWeights(usePoints['airmass'],
                                                                           self.dimDict['airmass'])

        nhpid = self.dimDict['hpid'].size
        # loop though the hweights and the airmass weights
        for hpid, hweight in zip(hpids, hweights):
            for amIndex, amW in zip([amRightIndex, amLeftIndex], [amRightW, amLeftW]):
                weight = hweight*amW
                result[inRange] += weight[:, np.newaxis]*values[amIndex*nhpid+hpid]

        return result
  def attack(train, x):
    kwargs = {}

    # randomly select parameters and their corresponding values
    kwargs['k'] = random.randint(1,20)
    if random.randint(0,1): kwargs['model_id'] = "my_model"
    if random.randint(0,1): kwargs['max_iterations'] = random.randint(1,1000)
    if random.randint(0,1): kwargs['standardize'] = [True, False][random.randint(0,1)]
    if random.randint(0,1):
      method = random.randint(0,3)
      if method == 3:
        s = [[random.uniform(train[c].mean()[0]-100,train[c].mean()[0]+100) for p in range(kwargs['k'])] for c in x]
        print("s: {0}".format(s))
        start = h2o.H2OFrame(list(zip(*s)))
        kwargs['user_points'] = start
      else:
        kwargs['init'] = ["Furthest","Random", "PlusPlus"][method]
    if random.randint(0,1): kwargs['seed'] = random.randint(1,10000)

    # display the parameters and their corresponding values
    print("-----------------------")
    print("x: {0}".format(x))
    for k, v in zip(list(kwargs.keys()), list(kwargs.values())):
      if k == 'user_points':
        print(k + ": ")
        start.show()
      else:
        print(k + ": {0}".format(v))


    H2OKMeansEstimator(**kwargs).train(x=x, training_frame=train)
    print("-----------------------")
Exemple #17
0
def plot(data,axes=None,alpha=.5,clabel=True,cbar=False,aspect='equal',**kw):
	"""Given output from post2d.data, plot the scalar as discrete or smooth plot.

	For raw discrete data, plot filled circles with radii of particles, colored by the scalar value.

	For smooth discrete data, plot image with optional contours and contour labels.

	For vector data (raw or smooth), plot quiver (vector field), with arrows colored by the magnitude.

	:param axes: matplotlib.axes\ instance where the figure will be plotted; if None, will be created from scratch.
	:param data: value returned by :yref:`yade.post2d.data`
	:param bool clabel: show contour labels (smooth mode only), or annotate cells with numbers inside (with perArea==2)
	:param bool cbar: show colorbar (equivalent to calling pylab.colorbar(mappable) on the returned mappable)

	:return: tuple of ``(axes,mappable)``; mappable can be used in further calls to pylab.colorbar.
	"""
	import pylab,math
	if not axes: axes=pylab.gca()
	if data['type']=='rawScalar':
		from matplotlib.patches import Circle
		import matplotlib.collections,numpy
		patches=[]
		for x,y,d,r in zip(data['x'],data['y'],data['val'],data['radii']):
			patches.append(Circle(xy=(x,y),radius=r))
		coll=matplotlib.collections.PatchCollection(patches,linewidths=0.,**kw)
		coll.set_array(numpy.array(data['val']))
		bb=coll.get_datalim(coll.get_transform())
		axes.add_collection(coll)
		axes.set_xlim(bb.xmin,bb.xmax); axes.set_ylim(bb.ymin,bb.ymax)
		if cbar: axes.get_figure().colorbar(coll)
		axes.grid(True); axes.set_aspect(aspect)
		return axes,coll
	elif data['type']=='smoothScalar':
		loHi=data['bbox']
		if data['perArea'] in (0,1):
			img=axes.imshow(data['val'],extent=(loHi[0][0],loHi[1][0],loHi[0][1],loHi[1][1]),origin='lower',aspect=aspect,**kw)
			ct=axes.contour(data['x'],data['y'],data['val'],colors='k',origin='lower',extend='both')
			if clabel: axes.clabel(ct,inline=1,fontsize=10)
		else:
			img=axes.imshow(data['val'],extent=(loHi[0][0],loHi[1][0],loHi[0][1],loHi[1][1]),origin='lower',aspect=aspect,interpolation='nearest',**kw)
			xStep=(data['x'][1]-data['x'][0]) if len(data['x'])>1 else 0
			for y,valLine in zip(data['y'],data['val']):
				for x,val in zip(data['x'],valLine): axes.text(x-.4*xStep,y,('-' if math.isnan(val) else '%5g'%val),size=4)
		axes.update_datalim(loHi)
		axes.set_xlim(loHi[0][0],loHi[1][0]); axes.set_ylim(loHi[0][1],loHi[1][1])
		if cbar: axes.get_figure().colorbar(img)
		axes.grid(True if data['perArea'] in (0,1) else False); axes.set_aspect(aspect)
		return axes,img
	elif data['type'] in ('rawVector','smoothVector'):
		import numpy
		loHi=data['bbox']
		valX,valY=numpy.array(data['valX']),numpy.array(data['valY']) # rawVector data are plain python lists
		scalars=numpy.sqrt(valX**2+valY**2)
		# numpy.sqrt computes element-wise sqrt
		quiv=axes.quiver(data['x'],data['y'],data['valX'],data['valY'],scalars,**kw)
		#axes.update_datalim(loHi)
		axes.set_xlim(loHi[0][0],loHi[1][0]); axes.set_ylim(loHi[0][1],loHi[1][1])
		if cbar: axes.get_figure().colorbar(coll)
		axes.grid(True); axes.set_aspect(aspect)
		return axes,quiv
Exemple #18
0
    def __iter__(self):
        shape = self.shape
        granularity = self.granularity
        offset = self.offset

        def out(group):
            """Calculate a single iteration output"""
            return np.array(list(it.chain.from_iterable(
                (base + s for s in it.product(
                    *[range(o, o + x) for x, o in
                      zip(shape[(granularity[0] + 1):],
                          offset[(granularity[0] + 1):])]))
                for base in group)))

        if granularity[0] > 0 or granularity[1] == 1:
            def cycle():
                """Iterator that produces one period/period of the output."""
                base_iter = it.product(*[list(range(o, x + o)) for x, o in
                                         zip(shape[1:(granularity[0] + 1)],
                                             offset[1:(granularity[0] + 1)])])
                for group in zip(*[base_iter] * granularity[1]):
                    yield out(group)
            for positions in it.cycle(cycle()):
                yield positions
        else:
            base_iter = it.product(*[list(range(o, x + o)) for x, o in
                                     zip(shape[:(granularity[0] + 1)],
                                         offset[:(granularity[0] + 1)])])
            for group in zip(*[base_iter] * granularity[1]):
                yield out([b[1:] for b in group])
Exemple #19
0
 def cycle():
     """Iterator that produces one period/period of the output."""
     base_iter = it.product(*[list(range(o, x + o)) for x, o in
                              zip(shape[1:(granularity[0] + 1)],
                                  offset[1:(granularity[0] + 1)])])
     for group in zip(*[base_iter] * granularity[1]):
         yield out(group)
Exemple #20
0
def col_names_check():

  iris_wheader = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))
  assert iris_wheader.col_names == ["sepal_len","sepal_wid","petal_len","petal_wid","class"], \
      "Expected {0} for column names but got {1}".format(["sepal_len","sepal_wid","petal_len","petal_wid","class"],
                                                         iris_wheader.col_names)

  iris = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris.csv"))
  assert iris.col_names == ["C1","C2","C3","C4","C5"], "Expected {0} for column names but got " \
                                                         "{1}".format(["C1","C2","C3","C4","C5"], iris.col_names)

  df = h2o.H2OFrame.from_python(list(zip(*np.random.randn(100,4).tolist())), column_names=list("ABCD"), column_types=["enum"]*4)
  df.head()
  assert df.col_names == list("ABCD"), "Expected {} for column names but got {}".format(list("ABCD"), df.col_names)
  assert list(df.types.values()) == ["enum"]*4, "Expected {} for column types but got {}".format(["enum"]*4, df.types)

  df = h2o.H2OFrame(list(zip(*np.random.randn(100,4).tolist())))
  df.head()
  assert df.col_names == ["C1","C2","C3","C4"], "Expected {} for column names but got {}".format(["C1","C2","C3","C4"]
                                                                                                 , df.col_names)
  assert list(df.types.values()) == ["real"]*4, "Expected {} for column types but got {}".format(["real"]*4, df.types)

  df = h2o.H2OFrame({'B': ['a', 'a', 'b', 'NA', 'NA']})
  df.head()
  assert df.col_names == ["B"], "Expected {} for column names but got {}".format(["B"], df.col_names)

  df = h2o.H2OFrame.from_python({'B': ['a', 'a', 'b', 'NA', 'NA']}, column_names=["X"])
  df.head()
  assert df.col_names == ["X"], "Expected {} for column names but got {}".format(["X"], df.col_names)
Exemple #21
0
def create_network(stage_depth):
    if stage_depth in (18, 18):
        stages = (2, 2, 2, 2)
    elif stage_depth in (34, 50):
        stages = (3, 4, 6, 3)
    elif stage_depth in (68, 101):
        stages = (3, 4, 23, 3)
    elif stage_depth in (102, 152):
        stages = (3, 8, 36, 3)
    else:
        raise ValueError('Invalid stage_depth value'.format(stage_depth))

    bottleneck = False
    if stage_depth in (50, 101, 152):
        bottleneck = True

    layers = [Conv(**conv_params(7, 64, strides=2)),
              Pooling(3, strides=2)]

    # Structure of the deep residual part of the network:
    # stage_depth modules of 2 convolutional layers each at feature map depths
    # of 64, 128, 256, 512
    nfms = list(itt.chain.from_iterable(
        [itt.repeat(2**(x + 6), r) for x, r in enumerate(stages)]))
    strides = [-1] + [1 if cur == prev else 2 for cur,
                      prev in zip(nfms[1:], nfms[:-1])]

    for nfm, stride in zip(nfms, strides):
        layers.append(module_factory(nfm, bottleneck, stride))

    layers.append(Pooling('all', op='avg'))
    layers.append(Conv(**conv_params(1, 1000, relu=False)))
    layers.append(Activation(Softmax()))
    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
Exemple #22
0
    def bprop_mergebroadcast(self, ngLayer, layers, error_views,
                             error, deltas, out_shape, alpha, beta, alphas, betas):
        C, D, H, W, N = ngLayer.in_shape5D
        i = 0
        for l, e in zip(layers, error_views):
            ngLayer.tensors_temp[(i * 4):(i * 4 + 4)] = e.primitive[0:4]
            i += 1

        channel = c_longlong(ngLayer.channels.ctypes.data)
        tensors = c_longlong(ngLayer.tensors_temp.ctypes.data)
        prim = c_longlong(ngLayer.primitive.ctypes.data)
        self.mklEngine.Concat_b(tensors, ngLayer.layer_num, error.get_prim(), prim, channel,
                                ngLayer.initOK_b, N, H, W)

        ngLayer.initOK_b = 1

        i = 0
        for l, e in list(zip(layers, error_views)):
            e.primitive[0:4] = ngLayer.tensors_temp[(i * 4):(i * 4 + 4)]
            e.shape5D = l.layers[-1].outputs.shape5D
            err = l.bprop(e)
            ngLayer.tensors_temp[(i * 4):(i * 4 + 4)] = err.primitive[0:4]
            i += 1

        if deltas is None:
            return

        size = c_longlong(np.prod(ngLayer.in_shape5D))
        prim = c_longlong(ngLayer.sum_prim.ctypes.data)
        tensors = c_longlong(ngLayer.tensors_temp.ctypes.data)
        self.mklEngine.MklSumTensor(ngLayer.layer_num, tensors, size, deltas.get_prim(), prim)

        deltas.shape5D = ngLayer.in_shape5D
def MCStoPFI(xysky, za):

   arg=[mt.atan2(j,i)+mt.pi for i,j in zip(*xysky)]


   print("Scaling", file=sys.stderr)

   scale=ScalingFactor(xysky)

   #deviation
   # base
   #print >> sys.stderr , "Offset 1"
   #offx1,offy1=OffsetBase(xysky)

   # z-dependent
   print("Offset 2", file=sys.stderr)
   offx2,offy2=DeviationZenithAngle(xysky,za)

   xyf3c=[]
   #print zip(scale,arg,offx1,offy1,offx2,offy2)
   #for s,t,ox1,oy1,ox2,oy2 in zip(scale,arg,offx1,offy1,offx2,offy2):
   for s,t,ox2,oy2 in zip(scale,arg,offx2,offy2):
       x=s*mt.cos(t)+ox2
       y=s*mt.sin(t)+oy2
       #x=s*mt.cos(t)+ox1+ox2
       #y=s*mt.sin(t)+oy1+oy2
       #print x,y,x+y
       #xyf3c.append([x,y])
       #xyf3c.append([x,y,s,t,ox1,oy1,ox2,oy2])
       xyf3c.append([x,y,s,t,ox2,oy2])

   #print xyf3c
   return xyf3c
def iris_h2o_vs_sciKmeans():
  # Connect to a pre-existing cluster
    # connect to localhost:54321

  iris_h2o = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv"))
  iris_sci = np.genfromtxt(pyunit_utils.locate("smalldata/iris/iris.csv"), delimiter=',')
  iris_sci = iris_sci[:,0:4]

  s =[[4.9,3.0,1.4,0.2],
  [5.6,2.5,3.9,1.1],
  [6.5,3.0,5.2,2.0]]

  start = h2o.H2OFrame(s)

  h2o_km = h2o.kmeans(x=iris_h2o[0:4], k=3, user_points=start, standardize=False)

  sci_km = KMeans(n_clusters=3, init=np.asarray(s), n_init=1)
  sci_km.fit(iris_sci)

  # Log.info("Cluster centers from H2O:")
  print("Cluster centers from H2O:")
  h2o_centers = h2o_km.centers()
  print(h2o_centers)

  # Log.info("Cluster centers from scikit:")
  print("Cluster centers from scikit:")
  sci_centers = sci_km.cluster_centers_.tolist()

  print(sci_centers)

  for hcenter, scenter in zip(h2o_centers, sci_centers):
    for hpoint, spoint in zip(hcenter,scenter):
      assert (hpoint- spoint) < 1e-10, "expected centers to be the same"
def shuffling_large():
  print("Reading in Arcene training data for binomial modeling.")
  train_data = h2o.upload_file(path=pyunit_utils.locate("smalldata/arcene/shuffle_test_version/arcene.csv"))
  train_data_shuffled = h2o.upload_file(path=pyunit_utils.locate("smalldata/arcene/shuffle_test_version/arcene_shuffled.csv"))


  print("Create model on original Arcene dataset.")
  h2o_model = H2OGeneralizedLinearEstimator(family="binomial", lambda_search=True, alpha=0.5)
  h2o_model.train(x=list(range(1000)), y=1000, training_frame=train_data)

  print("Create second model on original Arcene dataset.")
  h2o_model_2 = H2OGeneralizedLinearEstimator(family="binomial", lambda_search=True, alpha=0.5)
  h2o_model_2.train(x=list(range(1000)), y=1000, training_frame=train_data)

  print("Create model on shuffled Arcene dataset.")
  h2o_model_s = H2OGeneralizedLinearEstimator(family="binomial", lambda_search=True, alpha=0.5)
  h2o_model_s.train(x=list(range(1000)), y=1000, training_frame=train_data_shuffled)

  print("Assert that number of predictors remaining and their respective coefficients are equal.")

  for x, y in zip(h2o_model._model_json['output']['coefficients_table'].cell_values,h2o_model_2.
          _model_json['output']['coefficients_table'].cell_values):
    assert (type(x[1]) == type(y[1])) and (type(x[2]) == type(y[2])), "coefficients should be the same type"
    if isinstance(x[1],float):
      assert abs(x[1] - y[1]) < 5e-10, "coefficients should be equal"
    if isinstance(x[2],float):
      assert abs(x[2] - y[2]) < 5e-10, "coefficients should be equal"

  for x, y in zip(h2o_model._model_json['output']['coefficients_table'].cell_values,h2o_model_s.
          _model_json['output']['coefficients_table'].cell_values):
    assert (type(x[1]) == type(y[1])) and (type(x[2]) == type(y[2])), "coefficients should be the same type"
    if isinstance(x[1],float):
      assert abs(x[1] - y[1]) < 5e-10, "coefficients should be equal"
    if isinstance(x[2],float):
      assert abs(x[2] - y[2]) < 5e-10, "coefficients should be equal"
    def test_storage(self):
        import os
        fname = data_filename("interface_set_storage_test.nc")
        if os.path.isfile(fname):
            os.remove(fname)
        template_traj = make_1d_traj([0.0])
        template = template_traj[0]
        storage_w = paths.Storage(fname, "w")
        storage_w.save(template_traj)
        storage_w.save(self.increasing_set)
        storage_w.sync_all()

        storage_r = paths.AnalysisStorage(fname)
        reloaded = storage_r.interfacesets[0]

        assert_items_equal(reloaded.lambdas, self.increasing_set.lambdas)
        assert_equal(reloaded.period_min, self.increasing_set.period_min)
        assert_equal(reloaded.period_max, self.increasing_set.period_max)
        for (truth, beauty) in zip(self.increasing_set, reloaded):
            assert_equal(truth, beauty)

        for (v, l) in zip(reloaded.volumes, reloaded.lambdas):
            assert_equal(reloaded.get_lambda(v), l)

        storage_r.close()
        storage_w.close()

        if os.path.isfile(fname):
            os.remove(fname)
Exemple #27
0
def local_maxima(array2d,user_peak,index=False,count=4,floor=0,bug=False):

    from operator import itemgetter, attrgetter
    
    if user_peak == 0:
        where = ((array2d >= np.roll(array2d,1,0)) &
                 (array2d >= np.roll(array2d,-1,0)) &
                 (array2d >= np.roll(array2d,0,1)) &
                 (array2d >= np.roll(array2d,0,-1)) &
                 (array2d >= old_div(array2d.max(),5.0)) &
                 (array2d > floor*np.ones(array2d.shape)) &
                 (array2d >= array2d.mean()))
    else: #some simpler filter if user indicated some modes
        where = array2d > floor

    #ignore the lesser local maxima, throw out anything with a ZERO
    if bug==True:    
        print(array2d,array2d[where.nonzero()],where.nonzero()[0])
    
    peaks = list(zip(where.nonzero()[0],where.nonzero()[1],array2d[where.nonzero()]))
    
    peaks = sorted(peaks,key=itemgetter(2),reverse=True)
   
    if len(peaks) > count and user_peak==0:
        peaks = peaks[0:count]
    
            
    keys = ['y_i','z_i','amp']
    
    peaks = [dict(list(zip(keys,peaks[x]))) for x in range(len(peaks))]
    
    return peaks
def inception_bare(ref_module, kvals, name="i"):
    (p1, p2, p3) = kvals
    branch1 = [Conv(fshape(1, p1[0]), **common)] if p1[0] else []
    branch2 = [Conv(fshape(1, p2[0]), **common), Conv(fshape(3, p2[1]), **commonp1)]
    branch3 = [Pooling(op=p3[0], **pool3s1p1)] + (
        [Conv(fshape(1, p3[1]), **common)] if p3[1] else [])

    branch1 = Sequential(branch1)
    branch2 = Sequential(branch2)
    branch3 = Sequential(branch3)

    (branch1_ref, branch2_ref, branch3_ref) = ref_module[0].layers

    if p1[0]:
        for ll, lr in zip(branch1.layers, branch1_ref.layers):
            if ll.has_params:
                ll.set_params({'params': {'W': lr.W.get(), 'weight_bias': lr.weight_bias.get()}})

    for ll, lr in zip(branch2.layers, branch2_ref.layers):
        if ll.has_params:
            ll.set_params({'params': {'W': lr.W.get(), 'weight_bias': lr.weight_bias.get()}})

    if p3[1]:
        for ll, lr in zip(branch3.layers, branch3_ref.layers):
            if ll.has_params:
                ll.set_params({'params': {'W': lr.W.get(), 'weight_bias': lr.weight_bias.get()}})

    return (branch1.layers, branch2.layers, branch3.layers)
    def _filter_on_value(self, sounding_ids, data_values, filter_comparison=None, mapping_func=lambda x: x):
        if filter_comparison == None:
            print("Filter value not supplied returning all ids", file=sys.stderr)
            return sounding_ids

        if not isinstance(filter_comparison, types.FunctionType):
            def regexp_compare(val):
                return re.search(filter_comparison, str(val)) != None 
            comparison_func = regexp_compare
        else:
            comparison_func = filter_comparison

        ret_ids = []
        for obj_snd_ids, obj_values in zip(sounding_ids, data_values):
            obj_filtered_ids = []
            mapped_values = list(map(mapping_func, obj_values))
            for curr_id, curr_value in zip(obj_snd_ids, mapped_values):
                if hasattr(curr_value, "strip"):
                    curr_value = curr_value.strip()

                if comparison_func(curr_value):
                    obj_filtered_ids.append(curr_id)
            obj_filtered_ids.sort()
            ret_ids.append( tuple(obj_filtered_ids) )

        return tuple(ret_ids)
Exemple #30
0
def shape_divide(arr, scale, reduction='mean'):
    '''Scale down an array (shape N x M x ...) by the specified scale
       in each dimension (n x m x ...)
       Each dimension in arr must be divisible by its scale
       (throws an error otherwise)
       This is reduces each sub-array (n x m x ...) to a single element
       according to the reduction parameter, which is one of:
        * mean (default): mean of each sub-array
        * median: median of each sub-array
        * first: the [0,0,0, ...] element of the sub-array
        * all: all the possible (N x M x ...) sub-arrays;
               returns an array of shape (n, m, ..., N, M, ...)
       This is a downsampling operation, similar to
       scipy.misc.imresize and scipy.ndimage.interpolate'''
    arr = np.asanyarray(arr)
    reduction_options = ['mean', 'median', 'first', 'all']
    assert reduction in reduction_options, \
        'reduction must be one of: ' + ' '.join(reduction_options)
    scale = coerce_to_target_length(scale, arr.ndim)
    assert all([sh % sc == 0 for sh, sc in zip(arr.shape,scale)]), \
        'all dimensions must be divisible by their respective scale!'
    new_shape = flatten([sh//sc, sc] for sh, sc in zip(arr.shape, scale))
    # group pixes into smaller sub-arrays that can then be modified by standard operations
    subarrays = _transpose_interleaved(arr.reshape(new_shape))
    flat_subarrays = subarrays.reshape([np.product(scale)] + new_shape[::2])
    return (np.mean(flat_subarrays, axis=0) if reduction == 'mean' else
            np.median(flat_subarrays, axis=0) if reduction == 'median' else
            flat_subarrays[0] if reduction == 'first' else
            subarrays if reduction == 'all' else
            None)
Exemple #31
0
    def loadSedsFromList(self, sedNameList, magNormList, \
                         internalAvList=None, galacticAvList=None, redshiftList=None):
        """
        Load the Seds specified by sedNameList, applying the specified normalization,
        extinction, and redshift.

        @param [in] sedList is a list of file names containing Seds

        @param [in] magNorm is the magnitude normalization

        @param [in] internalAvList is an optional list of A(V) due to internal
        dust

        @param [in] galacticAvList is an optional list of A(V) due to
        Milky Way dust

        @param [in] redshiftList is an optional list of redshifts for the
        input Sed

        Seds are read in and stored to this object's internal list of Seds.

        Note: if you constructed this SedList object without internalAvList,
        you cannot load Seds with internalAvList now.  Likewise for galacticAvlist
        and redshiftList.
        """

        if not self._initialized:
            if internalAvList is not None:
                self._internal_av_list = copy.deepcopy(list(internalAvList))
            else:
                self._internal_av_list = None

            if galacticAvList is not None:
                self._galactic_av_list = copy.deepcopy(list(galacticAvList))
            else:
                self._galactic_av_list = None

            if redshiftList is not None:
                self._redshift_list = copy.deepcopy(list(redshiftList))
            else:
                self._redshift_list = None

        else:
            if self._internal_av_list is None and internalAvList is not None:
                raise RuntimeError("This SedList does not contain internalAvList")
            elif self._internal_av_list is not None:
                if internalAvList is None:
                    self._internal_av_list += [None] * len(sedNameList)
                else:
                    self._internal_av_list += list(internalAvList)

            if self._galactic_av_list is None and galacticAvList is not None:
                raise RuntimeError("This SedList does not contain galacticAvList")
            elif self._galactic_av_list is not None:
                if galacticAvList is None:
                    self._galactic_av_list += [None] * len(sedNameList)
                else:
                    self._galactic_av_list += list(galacticAvList)

            if self._redshift_list is None and redshiftList is not None:
                raise RuntimeError("This SedList does not contain redshiftList")
            elif self._redshift_list is not None:
                if redshiftList is None:
                    self._redshift_list += [None] * len(sedNameList)
                else:
                    self._redshift_list += list(redshiftList)

        temp_sed_list = []
        for sedName, magNorm in zip(sedNameList, magNormList):
            sed = Sed()

            if sedName != "None":
                if self._spec_map is not None:
                    sed.readSED_flambda(os.path.join(self._file_dir, self._spec_map[sedName]))
                else:
                    sed.readSED_flambda(os.path.join(self._file_dir, sedName))

                if self._normalizing_bandpass is not None:
                    fNorm = sed.calcFluxNorm(magNorm, self._normalizing_bandpass)
                else:
                    fNorm = getImsimFluxNorm(sed, magNorm)

                sed.multiplyFluxNorm(fNorm)

            temp_sed_list.append(sed)


        if internalAvList is not None:
            self._av_int_wavelen, \
            self._a_int, \
            self._b_int = self.applyAv(temp_sed_list, internalAvList,
                                       self._av_int_wavelen, self._a_int, self._b_int)

        if redshiftList is not None:
            self.applyRedshift(temp_sed_list, redshiftList)

        if self._wavelen_match is not None:
            for sedObj in temp_sed_list:
                if sedObj.wavelen is not None:
                    sedObj.resampleSED(wavelen_match=self._wavelen_match)

        if galacticAvList is not None:
            self._av_gal_wavelen, \
            self._a_gal, \
            self._b_gal = self.applyAv(temp_sed_list, galacticAvList,
                                       self._av_gal_wavelen, self._a_gal, self._b_gal)

        self._sed_list += temp_sed_list

        self._initialized = True
Exemple #32
0
 def fwd_exit_cols(self):
     return [(1 - p)**(np.arange(r)[::-1])
             for r, p in zip(self.rs, self.ps)]
Exemple #33
0
 def bwd_enter_rows(self):
     return [
         stats.binom.pmf(np.arange(r)[::-1], r - 1, p)
         for r, p in zip(self.rs, self.ps)
     ]
Exemple #34
0
 def __iter__(self):
     for frame, displacement in zip(self._base, self.displacements):
         yield self._align(frame, displacement)
Exemple #35
0
 def __iter__(self):
     for frames in zip(*self._sequences):
         yield np.concatenate(frames, axis=3)
# %%
#
# def cart2pol(x, y):
#    rho = np.sqrt(x**2 + y**2)
#    phi = np.arctan2(y, x)
#    return(rho, phi)
#
# def pol2cart(rho, phi):
#    x = rho * np.cos(phi)
#    y = rho * np.sin(phi)
#    return(x, y)
#%% find center of mass
movie_shifts_x = np.zeros((T, ) + dim_r)
movie_shifts_y = np.zeros((T, ) + dim_r)

for r, idx_mat in zip(res_p, idfl):
    img_temp = np.zeros(np.prod(dim_r))
    img_temp[idx_mat] = 1
    img_temp = np.reshape(img_temp, dim_r, order='F')
    #    pl.imshow(img_temp)
    x1, x2 = np.round(scipy.ndimage.center_of_mass(img_temp)).astype(np.int)
    print((x1, x2))
    movie_shifts_x[:, x1, x2] = np.array(r[0][-1])[:, 0]
    movie_shifts_y[:, x1, x2] = np.array(r[0][-1])[:, 1]

#%%
pl.close()
mn = np.mean(m, 0)
pl.imshow(mn)

for imm_x, imm_y in zip(movie_shifts_x, movie_shifts_y):
traces, masks, triggers_out, amplitudes, ISI = load_data_from_stored_results(base_folder, thresh_CR=0.1,
                                                                             threshold_responsiveness=0.1, is_blob=True, time_CR_on=-.1, time_US_on=.05, thresh_MOV_iqr=1000,
                                                                             time_CS_on_MOV=-.25, time_US_on_MOV=0)
wheel_mat = traces['wheel_traces']
ftraces = traces['fluo_traces']
time_mat = traces['time_fluo']
time_e_mat = traces['time_eye']
time_w_mat = traces['time_wheel']
eye_mat = traces['eye_traces']
amplitudes_eyelid = amplitudes['amplitudes_eyelid']
amplitudes_fluo = amplitudes['amplitudes_fluo']
#%%
counter = 0
with np.load(glob(os.path.join(base_folder, '*-template_total.npz'))[0]) as ld:
    templs = ld['template_each']
    for mn1, A in zip(templs, masks['A_each']):

        pl.subplot(2, 3, counter + 1)
#            mn=np.median(templs,0)
        mn = mn1
        d1, d2 = np.shape(mn)
#            selem = disk(50)
#            mn=(mn1 - np.min(mn1))/(np.max(mn1)-np.min(mn1))
#            mn = rank.equalize(mn, selem=selem)
#            mn = exposure.equalize_hist(mn,nbins=1024)
#            os.path.split(fl)[-1]
#            pl.imshow(mn,cmap='gray')
#            pl.imshow(mn,cmap='gray',vmax=np.percentile(mn,99))

#            pl.imshow(mn,cmap='gray',vmax=np.percentile(mn,98))
        pl.imshow(A.mean(1).reshape((d1, d2), order='F'), alpha=1, cmap='hot')
 def _conn_to_dict(cls, conn):
     if isinstance(conn, dict):
         return conn
     if isinstance(conn, (tuple,list)):
         return dict(zip(cls._conn_params,conn))
     return {"port":conn}
Exemple #39
0
 def __str__(self):
     """Return ``str(self)``."""
     return ' x '.join('[{}, {}]'.format(xmin, xmax)
                       for xmin, xmax in zip(self.min_pt, self.max_pt))
Exemple #40
0
    def test_complete_with_registration_cancellation(self, runner):
        course_id = u"cmsc40300"
        course_name = u"Foobarmentals of Foobar II"

        admin_id = u"admin"
        instructor_ids = [u"instructor"]
        grader_ids = [u"grader"]
        student_ids = [u"student1", u"student2", u"student3", u"student4"]

        all_users = instructor_ids + grader_ids + student_ids

        admin, instructors, graders, students = self.create_clients(
            runner,
            admin_id,
            instructor_ids,
            grader_ids,
            student_ids,
            course_id,
            verbose=True)
        self.create_users(admin, all_users)

        self.create_course(admin, course_id, course_name)

        course = Course.get_by_course_id(course_id)
        self.assertIsNotNone(course)
        self.assertEqual(course.name, course_name)

        result = admin.run(
            "admin course set-attribute %s default_extensions 2" % (course_id))
        self.assertEqual(result.exit_code, 0)

        result = admin.run(
            "admin course set-attribute %s extension_policy per-student" %
            (course_id))
        self.assertEqual(result.exit_code, 0)

        self.add_users_to_course(admin, course_id, instructors, graders,
                                 students)

        deadline = get_datetime_now_utc() - timedelta(minutes=5)
        deadline = deadline.isoformat(sep=" ")

        result = instructors[0].run(
            "instructor assignment add",
            ["pa1", "Programming Assignment 1", deadline])
        self.assertEqual(result.exit_code, 0)

        result = instructors[0].run("instructor assignment set-attribute",
                                    ["pa1", "max_students", "2"])
        self.assertEqual(result.exit_code, 0)

        teams = [u"student1-student2", u"student3-student4"]

        students_team = [(students[0], students[1]),
                         (students[2], students[3])]

        self.register_team(students_team[0], teams[0], "pa1", course_id)
        self.register_team(students_team[1], teams[1], "pa1", course_id)

        _, _, team_commits = self.create_team_repos(admin, course_id,
                                                    teams[0:2],
                                                    students_team[0:2])

        # Team 0 cancels their registration, which they can do because they haven't submitted yet.
        result = students_team[0][0].run(
            "student assignment cancel-registration", ["pa1", "--yes"])
        self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS)

        # Team 0 tries to cancel their registration again, which doesn't work. There's nothing to cancel.
        result = students_team[0][0].run(
            "student assignment cancel-registration", ["pa1", "--yes"])
        self.assertEqual(result.exit_code, CHISUBMIT_FAIL)

        # Team 0 registers again
        result = students_team[0][0].run(
            "student assignment register",
            ["pa1", "--partner", students_team[0][1].user_id])
        self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS)

        # Team 1 submits.
        result = students_team[1][0].run("student assignment submit",
                                         ["pa1", "--yes"])
        self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS)

        # Team 1 tries to cancel their registration, which doesn't work. They have a submission.
        result = students_team[1][0].run(
            "student assignment cancel-registration", ["pa1", "--yes"])
        self.assertEqual(result.exit_code, CHISUBMIT_FAIL)

        # Team 1 cancels their submission
        result = students_team[1][0].run("student assignment cancel-submit",
                                         ["pa1", "--yes"])
        self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS)

        # Team 1 can now cancel their registration.
        result = students_team[1][0].run(
            "student assignment cancel-registration", ["pa1", "--yes"])
        self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS)

        for team, student_team in zip(teams, students_team):
            result = student_team[0].run("student team show", [team])
            self.assertEqual(result.exit_code, CHISUBMIT_SUCCESS)
Exemple #41
0
def evaluate_model_binary(model,
                          name,
                          data=None,
                          valid_d=None,
                          valid_l=None,
                          train_d=None,
                          train_l=None,
                          n_proc=2,
                          betaloss=False,
                          fudgeysoft=False):
    if not model.prediction_node.shape['f'] == 2:
        logger.warning(
            "Evaluate_model_binary is intended only for binary"
            "classification, this model has more or less outputs than 2")

    report_str = "T_nll,\tT_acc,\tT_ROCA,\tV_nll,\tV_acc,\tV_ROCA,\td_acc,\t" \
                 "d_ROCA,\tri0,\tr01,\tri2,\tri3,\trim\n"

    # Training Data ###########################################################
    if train_d is None:
        train_d = data.train_d
    if train_l is None:
        train_l = data.train_l
    train_preds = []
    train_gt = []
    for i, (d, l) in enumerate(zip(train_d[:4], train_l[:4])):
        if os.path.exists("2-" + name + "_train_%i_pred.h5" % i):
            pred = utils.h5load("2-" + name + "_train_%i_pred.h5" % i)
        else:
            pred = model.predict_dense(d, pad_raw=False)  # (f,z,x,y)
            utils.h5save(pred, "2-" + name + "_train_%i_pred.h5" % i)

        if betaloss:
            pred = pred[0]  # only mode
        else:
            pred = pred[1]  # only pred for class '1'

        l = l[0]  # throw away channel
        l, pred = image.center_cubes(l, pred, crop=True)
        train_preds.append(pred)
        train_gt.append(l)

    train_gt = [gt > 0.5
                for gt in train_gt]  # binarise possibly probabilistic GT

    train_acc, train_area, train_thresh = evaluate(train_gt, train_preds,
                                                   "1-" + name + "_train")

    gt_flat = np.concatenate(list(map(np.ravel, train_gt)))
    preds_flat = np.concatenate(list(map(np.ravel, train_preds)))
    if fudgeysoft:
        train_nll = binary_nll(rescale_fudge(preds_flat), gt_flat)
    else:
        train_nll = binary_nll(preds_flat, gt_flat)

    print("Train nll %.3f" % train_nll)
    report_str += "%.3f,\t%.3f,\t%.3f,\t" % (train_nll, train_acc, train_area)

    error_hist(gt_flat,
               preds_flat,
               "1-" + name + "_train",
               thresh=train_thresh)

    # Validation data #########################################################
    if data and len(data.valid_l) == 0:
        raise RuntimeError("No validation data!")

    if valid_d is None:
        valid_d = data.valid_d
    if valid_l is None:
        valid_l = data.valid_l

    valid_preds = []
    valid_gt = []
    for i, (d, l) in enumerate(zip(valid_d, valid_l)):
        if os.path.exists("2-" + name + "_valid_%i_pred.h5" % i):
            pred = utils.h5load("2-" + name + "_valid_%i_pred.h5" % i)
        else:
            pred = model.predict_dense(d, pad_raw=False)  # (f,z,x,y)
            utils.h5save(pred, "2-" + name + "_valid_%i_pred.h5" % i)

        if betaloss:
            pred = pred[0]  # only mode
        else:
            pred = pred[1]  # only pred for class '1'
        l = l[0]  # throw away channel
        l, pred = image.center_cubes(l, pred, crop=True)
        valid_preds.append(pred)
        valid_gt.append(l)

    valid_gt = [gt > 0.5
                for gt in valid_gt]  # binarise possibly probabilistic GT

    valid_acc, valid_area, valid_thresh = evaluate(valid_gt, valid_preds,
                                                   "1-" + name + "_valid")
    gt_flat = np.concatenate(list(map(np.ravel, valid_gt)))
    preds_flat = np.concatenate(list(map(np.ravel, valid_preds)))

    if fudgeysoft:
        valid_nll = binary_nll(rescale_fudge(preds_flat), gt_flat)
    else:
        valid_nll = binary_nll(preds_flat, gt_flat)

    print("Valid nll %.3f" % valid_nll)
    report_str += "%.3f,\t%.3f,\t%.3f,\t%.3f,\t%.3f,\t" % (
        valid_nll, valid_acc, valid_area, train_acc - valid_acc,
        train_area - valid_area)

    error_hist(gt_flat,
               preds_flat,
               "1-" + name + "_valid",
               thresh=valid_thresh)

    ris = []
    best_ris = []
    for i, (l, p) in enumerate(zip(valid_gt, valid_preds)):
        if betaloss or fudgeysoft:
            p = rescale_fudge(p)

        p_int = (p * 255).astype(np.uint8)
        ri, best_ri, seg = image.optimise_segmentation(l,
                                                       p_int,
                                                       "2-" + name +
                                                       "_valid_%i" % i,
                                                       n_proc=n_proc)
        best_ris.append(best_ri)
        ris.append(ri)

    ris.append(np.mean(ris))
    for ri in ris:
        report_str += "%.4f,\t" % (ri, )

    with open("0-%s-REPORT.txt" % (name, ), 'w') as f:
        f.write(report_str)
Exemple #42
0
        noisyCs.append(noisyC)
        num_frames.append(np.where(~np.isnan(noisyC.sum(0)))[0][-1] + 1)
        count += 1
        pl.subplot(1, 3, count)
        crd = cm.utils.visualization.plot_contours(A, Cn, thr=0.9, vmax=.75)
#        pl.xlim([200,400]);pl.ylim([200,400])
#        pl.subplot(2,3,2*count)
#        pl.imshow(A.sum(0).reshape(dims,))

#%%
pl.figure()
count = 0
idx_neuro = 10
neuron_groups = [[180], [183, 277], [183, 277, 709]]
for A, b, C, f, Cn, ftc, noisyC, nfr, ngrp in zip(As, bs, Cs, fs, Cns,
                                                  files_to_compare, noisyCs,
                                                  num_frames, neuron_groups):
    count += 1
    a = A.tocsc()[:, np.array(ngrp) - 1]
    pl.subplot(3, 3, count)
    #    pl.imshow(Cn,vmax = 0.7)
    crd = cm.utils.visualization.plot_contours(a,
                                               Cn,
                                               thr=0.9,
                                               vmax=.7,
                                               colors='r')

    pl.ylabel('Correlation Image')
    pl.xlim([200, 400])
    pl.ylim([200, 400])
    #    pl.colorbar()
Exemple #43
0
    def load(self):
        year = int(re.search(r'\d{4}', self.election_id).group())
        xlsfile = xlrd.open_workbook(self._xls_file_path)
        if 'primary' in self._xls_file_path:
            primary = True
            if year == 2004:
                party = None  # get party from individual sheets
            else:
                party = self._party_from_filepath(self._xls_file_path)
        else:
            primary = False
            party = None
        results = []

        sheets = self._get_sheets(xlsfile)
        for sheet in sheets:
            if year == 2004:
                if primary:
                    party = sheet.name.split()[1]
                candidates = self._build_candidates_2004(sheet, party)
            elif self.source == "20021126__wy__special__general__natrona__state_house__36__precinct.xls":
                candidates = self._build_candidates_2002_special(sheet)
            elif year < 2004:
                if primary:
                    if year == 2000:
                        party = self.source.split('__')[2].title()
                    else:
                        party = sheet.name.split()[1]
                if year == 2002:
                    candidates = self._build_candidates_2002(sheet, party)
                elif year == 2000:
                    candidates = self._build_candidates_2000(
                        sheet, party, primary)
            else:
                candidates = self._build_candidates(sheet, party)

            for i in range(sheet.nrows):
                row = [r for r in sheet.row_values(i) if not r == '']
                # remove empty cells
                # Skip non-target offices
                if self._skip_row(row):
                    continue
                else:
                    precinct = str(row[0])
                    if self.source == '20021126__wy__special__general__natrona__state_house__36__precinct.xls':
                        votes = [v for v in row[1:] if not v == '']
                    elif len(candidates) == 1:
                        votes = [v for v in row[1:] if not v == '']
                    elif year == 2000 and primary is False:
                        precinct = row[0]
                        votes = [
                            v for v in row[2:len(candidates)]
                            if not v == precinct
                        ]
                    elif year < 2006:
                        votes = [
                            v for v in row[2:len(candidates)] if not v == ''
                        ]
                    else:
                        votes = [
                            v for v in row[1:len(candidates)] if not v == ''
                        ]
                        grouped_results = list(zip(candidates, votes))
                    for (candidate, office,
                         candidate_party), votes in grouped_results:
                        if not votes == '-':
                            results.append(
                                self._prep_precinct_result(
                                    precinct, self.mapping['name'], candidate,
                                    office, candidate_party, votes))
            try:
                RawResult.objects.insert(results)
            except:
                raise
Exemple #44
0
def genFields(names, types):
	return list(zip(names, types))
npix = hp.nside2npix(nside)
magMap = np.zeros((npix, sunAlts.size), dtype=float)
rmsMap = np.zeros((npix, sunAlts.size), dtype=float)

filterNames = ['R', 'G', 'B']
#filterNames = ['R']
#sunAlts = [sunAlts[5]]

for filterName in filterNames:

    dataPath = getPackageDir('SIMS_SKYBRIGHTNESS_DATA')
    dbAddress = 'sqlite:///'+os.path.join(dataPath, 'photometry', 'skydata.sqlite')

    names = ['mjd', 'ra', 'dec', 'alt', 'starMag', 'sky', 'filter']
    types = [float, float, float, float, float, float, '|S1']
    dtypes = list(zip(names, types))

    engine = sqla.create_engine(dbAddress)
    connection = engine.raw_connection()
    cursor = connection.cursor()

    for i, ack in enumerate(sunAlts):
        q = 'select dates.mjd, stars.ra, stars.dec, obs.alt, obs.starMag, obs.sky, obs.filter from obs,stars,dates where obs.starID = stars.ID and obs.dateID = dates.ID and obs.filter = "%s" and obs.dateID in (select ID from dates where sunAlt >= %f and sunAlt <= %f)' % (filterName, sunAlts[
            i]-altBin, sunAlts[i]+altBin)

        print('Executing:')
        print(q)
        print('%i of %i' % (i, np.size(sunAlts)))

        cursor.execute(q)
        data = cursor.fetchall()
Exemple #46
0
def evaluate(gt, preds, save_name, thresh=None, n_proc=None):
    """
    Evaluate prediction w.r.t. GT
    Saves plot to file
    :param save_name:
    :param gt:
    :param preds: from 0.0 to 1.0
    :param thresh: if thresh is given (e.g. from tuning on validation set)
    some performance measures are shown at this threshold
    :return: perf, roc-area, threshs
    """
    n = 64
    threshs = np.linspace(0, 1, n)
    perf = np.zeros((7, threshs.size))
    print("Scanning for best probmap THRESHOLD")
    if n_proc:
        if n_proc > 2:
            mp = Pool(6)
            ret = mp.imap(eval_thresh, zip(threshs, repeat(gt), repeat(preds)))
    else:
        ret = list(map(eval_thresh, zip(threshs, repeat(gt), repeat(preds))))

    for i, r in enumerate(ret):
        perf[:, i] = r

    # Find thresh according to maximal accuracy
    thresh = find_nearest(threshs,
                          thresh) if thresh else threshs[perf[5, :].argmax()]

    area = roc_area(perf[0, :], perf[1, :])
    area2 = roc_area(perf[2, :], perf[3, :])

    plt.figure(figsize=(12, 9))

    plt.subplot(221)
    plt.plot(threshs, perf[6, :].T)
    plt.ylim(0, 1)
    f1_max = perf[6, np.where(threshs == thresh)]
    plt.vlines(thresh, 0, 1, color='gray')
    plt.title("F1=%.2f at %.4f" % (f1_max, thresh))
    plt.xlabel("Classifier Threshold")

    plt.subplot(222)
    plt.plot(threshs, perf[5, :].T)
    plt.ylim(0, 1)
    acc_max = perf[5, np.where(threshs == thresh)]
    plt.vlines(thresh, 0, 1, color='gray')
    plt.title("Accuracy max=%.2f at %.4f" % (acc_max, thresh))
    plt.xlabel("Classifier Threshold")

    plt.subplot(223)
    plt.plot(perf[3, :].T, perf[2, :].T)
    plt.ylim(0, 1)
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title("Precision-Recall AUC=%.4f" % (area2, ))

    plt.subplot(224)
    plt.plot(perf[1, :].T, perf[0, :].T)
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    title = "ROC AUC=%.4f" % (area, )
    plt.title(title)

    with FileLock('plotting'):
        plt.savefig(save_name + ".performance.png", bbox_inches='tight')

    return acc_max, area, thresh
Exemple #47
0
 def add_input(self, accumulator, element):
     return [
         c.add_input(a, element)
         for c, a in zip(self._combiners, accumulator)
     ]
Exemple #48
0
def create_cnn(config_file, n_ch, param_file=None, mfp=False,
               axis_order='theano', constant_weights=False,
               imposed_input_size=None):
    raise RuntimeError("Dont use this, rebuild the graph and import the "
                       "weights using load_params_into_model")

    config = Config(config_file, None, None, use_existing_dir=True,
                    override_MFP_to_active=mfp,
                    imposed_input_size=imposed_input_size)

    if config.mode!='img-img':
        raise NotImplementedError()

    if axis_order=='theano':
        ps = config.patch_size
        ndim = len(ps)

        input_size = [None, ] * (2 + ndim)
        input_size[0] = config.batch_size
        if ndim==3:
            tags = 'b,z,f,y,x'
            input_size[1] = config.patch_size[0]
            input_size[2] = n_ch
            input_size[3] = config.patch_size[1]
            input_size[4] = config.patch_size[2]
        elif ndim==2:
            tags = 'b,f,x,y'
            input_size[1] = n_ch
            input_size[2] = config.patch_size[0]
            input_size[3] = config.patch_size[1]

        if param_file is None:
            param_file = config.paramfile
        params = pickleload(param_file)
        pool = params[-1]
        f_shapes = params[0]
        params = params[1:-1]  # come in order W0, b0, W1, b1,...

        neuromancer.node_basic.model_manager.newmodel('legacy')
        inp = neuromancer.Input(input_size, tags)
        conv = list(
            zip(config.nof_filters,  # doesn't have to be a list, does it?
                config.filters, config.pool, config.activation_func,
                config.pooling_mode, params[::2], params[1::2]))
        for i, (n, f, p, act, p_m, W, b) in enumerate(conv):
            W = [W, 'const'] if constant_weights else W
            b = [b, 'const'] if constant_weights else b
            inp = neuromancer.Conv(inp, n, f, p, mfp=mfp, activation_func=act,
                                   w=W, b=b)

        # last Layer
        W = [params[-2], 'const'] if constant_weights else params[-2]
        b = [params[-1], 'const'] if constant_weights else params[-1]
        out = neuromancer.Conv(inp, config.n_lab, (1,) * ndim, (1,) * ndim,
                               activation_func='lin', w=W, b=b)
        if mfp:
            out = neuromancer.FragmentsToDense(out)

        if config.target in ['affinity', 'malis']:
            probs = neuromancer.Softmax(out, n_class=2, n_indep=3,
                                        name='class_probabilities')
        else:
            probs = neuromancer.Softmax(out, n_class=config.n_lab,
                                        name='class_probabilities')


    elif axis_order=='dnn':
        raise NotImplementedError()

    model = neuromancer.model_manager.getmodel('legacy')
    model.designate_nodes(input_node=inp, prediction_node=probs)

    return model
Exemple #49
0
 def compact(self, accumulator):
     return [c.compact(a) for c, a in zip(self._combiners, accumulator)]
Exemple #50
0
 def merge_accumulators(self, accumulators):
     sums, counts = zip(*accumulators)
     return sum(sums), sum(counts)
Exemple #51
0
 def test_lzip(self):
     lst = [builtins.range(10), builtins.range(10), builtins.range(10)]
     results = lzip(*lst),
     expecteds = list(builtins.zip(*lst)),
     lengths = 10,
     self.check_results(results, expecteds, lengths)
Exemple #52
0
 def extract_output(self, accumulator):
     return tuple([
         c.extract_output(a) for c, a in zip(self._combiners, accumulator)
     ])
    def execute(self, context=None):
        metastore = HiveMetastoreHook(metastore_conn_id=self.metastore_conn_id)
        table = metastore.get_table(table_name=self.table)
        field_types = {col.name: col.type for col in table.sd.cols}

        exprs = {('', 'count'): 'COUNT(*)'}
        for col, col_type in list(field_types.items()):
            d = {}
            if self.assignment_func:
                d = self.assignment_func(col, col_type)
                if d is None:
                    d = self.get_default_exprs(col, col_type)
            else:
                d = self.get_default_exprs(col, col_type)
            exprs.update(d)
        exprs.update(self.extra_exprs)
        exprs = OrderedDict(exprs)
        exprs_str = ",\n        ".join(
            [v + " AS " + k[0] + '__' + k[1] for k, v in exprs.items()])

        where_clause = [
            "{0} = '{1}'".format(k, v) for k, v in self.partition.items()
        ]
        where_clause = " AND\n        ".join(where_clause)
        sql = """
        SELECT
            {exprs_str}
        FROM {self.table}
        WHERE
            {where_clause};
        """.format(**locals())

        hook = PrestoHook(presto_conn_id=self.presto_conn_id)
        self.log.info('Executing SQL check: %s', sql)
        row = hook.get_first(hql=sql)
        self.log.info("Record: %s", row)
        if not row:
            raise AirflowException("The query returned None")

        part_json = json.dumps(self.partition, sort_keys=True)

        self.log.info("Deleting rows from previous runs if they exist")
        mysql = MySqlHook(self.mysql_conn_id)
        sql = """
        SELECT 1 FROM hive_stats
        WHERE
            table_name='{self.table}' AND
            partition_repr='{part_json}' AND
            dttm='{self.dttm}'
        LIMIT 1;
        """.format(**locals())
        if mysql.get_records(sql):
            sql = """
            DELETE FROM hive_stats
            WHERE
                table_name='{self.table}' AND
                partition_repr='{part_json}' AND
                dttm='{self.dttm}';
            """.format(**locals())
            mysql.run(sql)

        self.log.info("Pivoting and loading cells into the Airflow db")
        rows = [(self.ds, self.dttm, self.table, part_json) +
                (r[0][0], r[0][1], r[1]) for r in zip(exprs, row)]
        mysql.insert_rows(table='hive_stats',
                          rows=rows,
                          target_fields=[
                              'ds',
                              'dttm',
                              'table_name',
                              'partition_repr',
                              'col',
                              'metric',
                              'value',
                          ])
Exemple #54
0
 def merge_accumulators(self, accumulators):
     return [
         c.merge_accumulators(a)
         for c, a in zip(self._combiners, zip(*accumulators))
     ]
def test_csv_table():
    # Maybe not truly a unit test, but here because it doesn't do
    # network IO to synapse
    data = [["1", "1", "John Coltrane",  1926, 8.65, False],
            ["2", "1", "Miles Davis",    1926, 9.87, False],
            ["3", "1", "Bill Evans",     1929, 7.65, False],
            ["4", "1", "Paul Chambers",  1935, 5.14, False],
            ["5", "1", "Jimmy Cobb",     1929, 5.78, True],
            ["6", "1", "Scott LaFaro",   1936, 4.21, False],
            ["7", "1", "Sonny Rollins",  1930, 8.99, True],
            ["8", "1", "Kenny Burrel",   1931, 4.37, True]]

    filename = None

    cols = [Column(id='1', name='Name', columnType='STRING'),
            Column(id='2', name='Born', columnType='INTEGER'),
            Column(id='3', name='Hipness', columnType='DOUBLE'),
            Column(id='4', name='Living', columnType='BOOLEAN')]

    schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001")

    # TODO: use StringIO.StringIO(data) rather than writing files
    try:
        # create CSV file
        with tempfile.NamedTemporaryFile(delete=False) as temp:
            filename = temp.name

        with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
            writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep))
            headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols]
            writer.writerow(headers)
            for row in data:
                writer.writerow(row)

        table = Table(schema1, filename)
        assert isinstance(table, CsvFileTable)

        # need to set column headers to read a CSV file
        table.setColumnHeaders(
            [SelectColumn(name="ROW_ID", columnType="STRING"),
             SelectColumn(name="ROW_VERSION", columnType="STRING")] +
            [SelectColumn.from_column(col) for col in cols])

        # test iterator
        for table_row, expected_row in zip(table, data):
            assert table_row == expected_row

        # test asRowSet
        rowset = table.asRowSet()
        for rowset_row, expected_row in zip(rowset.rows, data):
            assert rowset_row['values'] == expected_row[2:]
            assert rowset_row['rowId'] == expected_row[0]
            assert rowset_row['versionNumber'] == expected_row[1]

        df = table.asDataFrame()
        assert list(df['Name']) == [row[2] for row in data]
        assert list(df['Born']) == [row[3] for row in data]
        assert list(df['Living']) == [row[5] for row in data]
        assert list(df.index) == ['%s_%s' % tuple(row[0:2]) for row in data]
        assert df.shape == (8, 4)

    except Exception:
        if filename:
            try:
                if os.path.isdir(filename):
                    shutil.rmtree(filename)
                else:
                    os.remove(filename)
            except Exception as ex:
                print(ex)
        raise
def calculate_feature_matrix(features,
                             entityset=None,
                             cutoff_time=None,
                             instance_ids=None,
                             entities=None,
                             relationships=None,
                             cutoff_time_in_index=False,
                             training_window=None,
                             approximate=None,
                             save_progress=None,
                             verbose=False,
                             chunk_size=None,
                             n_jobs=1,
                             dask_kwargs=None):
    """Calculates a matrix for a given set of instance ids and calculation times.

    Args:
        features (list[:class:`.FeatureBase`]): Feature definitions to be calculated.

        entityset (EntitySet): An already initialized entityset. Required if `entities` and `relationships`
            not provided

        cutoff_time (pd.DataFrame or Datetime): Specifies at which time to calculate
            the features for each instance. The resulting feature matrix will use data
            up to and including the cutoff_time. Can either be a DataFrame with
            'instance_id' and 'time' columns, DataFrame with the name of the
            index variable in the target entity and a time column, or a single
            value to calculate for all instances. If the dataframe has more than two columns, any additional
            columns will be added to the resulting feature matrix.

        instance_ids (list): List of instances to calculate features on. Only
            used if cutoff_time is a single datetime.

        entities (dict[str -> tuple(pd.DataFrame, str, str)]): dictionary of
            entities. Entries take the format
            {entity id: (dataframe, id column, (time_column))}.

        relationships (list[(str, str, str, str)]): list of relationships
            between entities. List items are a tuple with the format
            (parent entity id, parent variable, child entity id, child variable).

        cutoff_time_in_index (bool): If True, return a DataFrame with a MultiIndex
            where the second index is the cutoff time (first is instance id).
            DataFrame will be sorted by (time, instance_id).

        training_window (Timedelta or str, optional):
            Window defining how much time before the cutoff time data
            can be used when calculating features. If ``None``, all data before cutoff time is used.
            Defaults to ``None``.

        approximate (Timedelta or str): Frequency to group instances with similar
            cutoff times by for features with costly calculations. For example,
            if bucket is 24 hours, all instances with cutoff times on the same
            day will use the same calculation for expensive features.

        verbose (bool, optional): Print progress info. The time granularity is
            per chunk.

        chunk_size (int or float or None or "cutoff time"): Number of rows of
            output feature matrix to calculate at time. If passed an integer
            greater than 0, will try to use that many rows per chunk. If passed
            a float value between 0 and 1 sets the chunk size to that
            percentage of all instances. If passed the string "cutoff time",
            rows are split per cutoff time.

        n_jobs (int, optional): number of parallel processes to use when
            calculating feature matrix

        dask_kwargs (dict, optional): Dictionary of keyword arguments to be
            passed when creating the dask client and scheduler. Even if n_jobs
            is not set, using `dask_kwargs` will enable multiprocessing.
            Main parameters:

            cluster (str or dask.distributed.LocalCluster):
                cluster or address of cluster to send tasks to. If unspecified,
                a cluster will be created.
            diagnostics port (int):
                port number to use for web dashboard.  If left unspecified, web
                interface will not be enabled.

            Valid keyword arguments for LocalCluster will also be accepted.

        save_progress (str, optional): path to save intermediate computational results.
    """
    assert (isinstance(features, list) and features != [] and
            all([isinstance(feature, FeatureBase) for feature in features])), \
        "features must be a non-empty list of features"

    # handle loading entityset
    from featuretools.entityset.entityset import EntitySet
    if not isinstance(entityset, EntitySet):
        if entities is not None and relationships is not None:
            entityset = EntitySet("entityset", entities, relationships)

    target_entity = entityset[features[0].entity.id]
    pass_columns = []

    if not isinstance(cutoff_time, pd.DataFrame):
        if isinstance(cutoff_time, list):
            raise TypeError("cutoff_time must be a single value or DataFrame")

        if cutoff_time is None:
            if entityset.time_type == NumericTimeIndex:
                cutoff_time = np.inf
            else:
                cutoff_time = datetime.now()

        if instance_ids is None:
            index_var = target_entity.index
            df = target_entity._handle_time(target_entity.df,
                                            time_last=cutoff_time,
                                            training_window=training_window)
            instance_ids = df[index_var].tolist()

        cutoff_time = [cutoff_time] * len(instance_ids)
        map_args = [(id, time) for id, time in zip(instance_ids, cutoff_time)]
        cutoff_time = pd.DataFrame(map_args, columns=['instance_id', 'time'])

    cutoff_time = cutoff_time.reset_index(drop=True)
    # handle how columns are names in cutoff_time
    # maybe add _check_time_dtype helper function
    if "instance_id" not in cutoff_time.columns:
        if target_entity.index not in cutoff_time.columns:
            raise AttributeError(
                'Name of the index variable in the target entity'
                ' or "instance_id" must be present in cutoff_time')
        # rename to instance_id
        cutoff_time.rename(columns={target_entity.index: "instance_id"},
                           inplace=True)

    if "time" not in cutoff_time.columns:
        # take the first column that isn't instance_id and assume it is time
        not_instance_id = [
            c for c in cutoff_time.columns if c != "instance_id"
        ]
        cutoff_time.rename(columns={not_instance_id[0]: "time"}, inplace=True)
    # Check that cutoff_time time type matches entityset time type
    if entityset.time_type == NumericTimeIndex:
        if cutoff_time['time'].dtype.name not in PandasTypes._pandas_numerics:
            raise TypeError("cutoff_time times must be numeric: try casting "
                            "via pd.to_numeric(cutoff_time['time'])")
    elif entityset.time_type == DatetimeTimeIndex:
        if cutoff_time['time'].dtype.name not in PandasTypes._pandas_datetimes:
            raise TypeError(
                "cutoff_time times must be datetime type: try casting via pd.to_datetime(cutoff_time['time'])"
            )
    assert (cutoff_time[['instance_id', 'time']].duplicated().sum() == 0), \
        "Duplicated rows in cutoff time dataframe."
    pass_columns = [column_name for column_name in cutoff_time.columns[2:]]

    if _check_time_type(cutoff_time['time'].iloc[0]) is None:
        raise ValueError("cutoff_time time values must be datetime or numeric")

    feature_set = FeatureSet(features)

    # make sure dtype of instance_id in cutoff time
    # is same as column it references
    target_entity = features[0].entity
    dtype = entityset[target_entity.id].df[target_entity.index].dtype
    cutoff_time["instance_id"] = cutoff_time["instance_id"].astype(dtype)

    # Get features to approximate
    if approximate is not None:
        _, all_approx_feature_set = gather_approximate_features(feature_set)
    else:
        all_approx_feature_set = None

    # Check if there are any non-approximated aggregation features
    no_unapproximated_aggs = True
    for feature in features:
        if isinstance(feature, AggregationFeature):
            # do not need to check if feature is in to_approximate since
            # only base features of direct features can be in to_approximate
            no_unapproximated_aggs = False
            break

        deps = feature.get_dependencies(deep=True,
                                        ignored=all_approx_feature_set)
        for dependency in deps:
            if isinstance(dependency, AggregationFeature):
                no_unapproximated_aggs = False
                break

    cutoff_df_time_var = 'time'
    target_time = '_original_time'
    num_per_chunk = calc_num_per_chunk(chunk_size, cutoff_time.shape)

    if approximate is not None:
        # If there are approximated aggs, bin times
        binned_cutoff_time = bin_cutoff_times(cutoff_time.copy(), approximate)

        # Think about collisions: what if original time is a feature
        binned_cutoff_time[target_time] = cutoff_time[cutoff_df_time_var]

        cutoff_time_to_pass = binned_cutoff_time

    else:
        cutoff_time_to_pass = cutoff_time

    if num_per_chunk == "cutoff time":
        iterator = cutoff_time_to_pass.groupby(cutoff_df_time_var)
    else:
        iterator = get_next_chunk(cutoff_time=cutoff_time_to_pass,
                                  time_variable=cutoff_df_time_var,
                                  num_per_chunk=num_per_chunk)

    chunks = []
    if num_per_chunk == "cutoff time":
        for _, group in iterator:
            chunks.append(group)
    else:
        for chunk in iterator:
            chunks.append(chunk)

    if n_jobs != 1 or dask_kwargs is not None:
        feature_matrix = parallel_calculate_chunks(
            chunks=chunks,
            feature_set=feature_set,
            approximate=approximate,
            training_window=training_window,
            verbose=verbose,
            save_progress=save_progress,
            entityset=entityset,
            n_jobs=n_jobs,
            no_unapproximated_aggs=no_unapproximated_aggs,
            cutoff_df_time_var=cutoff_df_time_var,
            target_time=target_time,
            pass_columns=pass_columns,
            dask_kwargs=dask_kwargs or {})
    else:
        feature_matrix = linear_calculate_chunks(
            chunks=chunks,
            feature_set=feature_set,
            approximate=approximate,
            training_window=training_window,
            verbose=verbose,
            save_progress=save_progress,
            entityset=entityset,
            no_unapproximated_aggs=no_unapproximated_aggs,
            cutoff_df_time_var=cutoff_df_time_var,
            target_time=target_time,
            pass_columns=pass_columns)

    feature_matrix = pd.concat(feature_matrix)

    feature_matrix.sort_index(level='time', kind='mergesort', inplace=True)
    if not cutoff_time_in_index:
        feature_matrix.reset_index(level='time', drop=True, inplace=True)

    if save_progress and os.path.exists(os.path.join(save_progress, 'temp')):
        shutil.rmtree(os.path.join(save_progress, 'temp'))

    return feature_matrix
                test_fps = [
                    np_fps_act[i] for i in test_list[:num_test_actives]
                ]
                test_fps += [
                    np_fps_dcy[i] for i in test_list[num_test_actives:]
                ]
                test_mols = [[actives[i][0], 1]
                             for i in test_list[:num_test_actives]]
                test_mols += [[decoys[i][0], 0]
                              for i in test_list[num_test_actives:]]

                # rank based on probability
                single_score = ml.predict_proba(test_fps)
                # store: [probability, internal ID, active/inactive]
                single_score = [[s[1], m[0], m[1]]
                                for s, m in zip(single_score, test_mols)]
                single_score.sort(reverse=True)
                scores["lr_" + fp_build].append(single_score)

            # write scores to file
            if do_append:
                outfile = gzip.open(
                    outpath + "/list_" + dataset + "_" + str(target) +
                    ".pkl.gz",
                    "ab+",
                )  # binary format
            else:
                outfile = gzip.open(
                    outpath + "/list_" + dataset + "_" + str(target) +
                    ".pkl.gz",
                    "wb+",
Exemple #58
0
def runSlices(opsimName,
              metadata,
              simdata,
              fields,
              bins,
              args,
              opsDb,
              verbose=False):
    # Set up the movie slicer.
    movieslicer = setupMovieSlicer(simdata, bins)
    # Set up formatting for output suffix.
    sliceformat = '%s0%dd' % ('%', int(np.log10(len(movieslicer))) + 1)
    # Get the telescope latitude info.
    lat_tele = Site(name='LSST').latitude_rad
    # Run through the movie slicer slicePoints and generate plots at each point.
    for i, ms in enumerate(movieslicer):
        t = time.time()
        slicenumber = sliceformat % (i)
        if verbose:
            print(slicenumber)
        # Set up metrics.
        if args.movieStepsize != 0:
            tstep = args.movieStepsize
        else:
            tstep = ms['slicePoint']['binRight'] - bins[i]
            if tstep > 1:
                tstep = 40. / 24. / 60. / 60.
        # Add simple view of time to plot label.
        times_from_start = ms['slicePoint']['binRight'] - (int(bins[0]) +
                                                           0.16 - 0.5)
        # Opsim years are 365 days (not 365.25)
        years = int(times_from_start / 365)
        days = times_from_start - years * 365
        plotlabel = 'Year %d Day %.4f' % (years, days)
        # Set up metrics.
        metricList, plotDictList = setupMetrics(
            opsimName,
            metadata,
            plotlabel=plotlabel,
            t0=ms['slicePoint']['binRight'],
            tStep=tstep,
            years=years,
            verbose=verbose)
        # Identify the subset of simdata in the movieslicer 'data slice'
        simdatasubset = simdata[ms['idxs']]
        # Set up opsim slicer on subset of simdata provided by movieslicer
        opslicer = slicers.OpsimFieldSlicer()
        # Set up metricBundles to combine metrics, plotdicts and slicer.
        bundles = []
        sqlconstraint = ''
        for metric, plotDict in zip(metricList, plotDictList):
            bundles.append(
                metricBundles.MetricBundle(metric,
                                           opslicer,
                                           constraint=sqlconstraint,
                                           metadata=metadata,
                                           runName=opsimName,
                                           plotDict=plotDict))
        # Remove (default) stackers from bundles, because we've already run them above on the original data.
        for mb in bundles:
            mb.stackerList = []
        bundledict = metricBundles.makeBundlesDictFromList(bundles)
        # Set up metricBundleGroup to handle metrics calculation + plotting
        bg = metricBundles.MetricBundleGroup(bundledict,
                                             opsDb,
                                             outDir=args.outDir,
                                             resultsDb=None,
                                             saveEarly=False)
        # 'Hack' bundleGroup to just go ahead and run the metrics, without querying the database.
        simData = simdatasubset
        bg.fieldData = fields
        bg.setCurrent(sqlconstraint)
        bg.runCurrent(constraint=sqlconstraint, simData=simData)
        # Plot data each metric, for this slice of the movie, adding slicenumber as a suffix for output plots.
        # Plotting here, rather than automatically via sliceMetric method because we're going to rotate the sky,
        #  and add extra legend info and figure text (for FilterColors metric).
        ph = plots.PlotHandler(outDir=args.outDir,
                               figformat='png',
                               dpi=72,
                               thumbnail=False,
                               savefig=False)
        obsnow = np.where(simdatasubset['observationStartMJD'] ==
                          simdatasubset['observationStartMJD'].max())[0]
        raCen = np.radians(
            np.mean(simdatasubset[obsnow]['observationStartLST']))
        # Calculate horizon location.
        horizonlon, horizonlat = addHorizon(lat_telescope=lat_tele)
        # Create the plot for each metric and save it (after some additional manipulation).
        for mb in bundles:
            ph.setMetricBundles([mb])
            fignum = ph.plot(plotFunc=plots.BaseSkyMap(),
                             plotDicts={'raCen': raCen})
            fig = plt.figure(fignum)
            ax = plt.gca()
            # Add horizon and zenith.
            plt.plot(horizonlon, horizonlat, 'k.', alpha=0.3, markersize=1.8)
            plt.plot(0, lat_tele, 'k+')
            # For the FilterColors metric, add some extra items.
            if mb.metric.name == 'FilterColors':
                # Add the time stamp info (plotlabel) with a fancybox.
                plt.figtext(0.75,
                            0.9,
                            '%s' % (plotlabel),
                            bbox=dict(boxstyle='Round, pad=0.7',
                                      fc='w',
                                      ec='k',
                                      alpha=0.5))
                # Add a legend for the filters.
                filterstacker = stackers.FilterColorStacker()
                for i, f in enumerate(['u', 'g', 'r', 'i', 'z', 'y']):
                    plt.figtext(0.92,
                                0.55 - i * 0.035,
                                f,
                                color=filterstacker.filter_rgb_map[f])
                # Add a moon.
                moonRA = np.radians(np.mean(simdatasubset[obsnow]['moonRA']))
                lon = -(moonRA - raCen - np.pi) % (np.pi * 2) - np.pi
                moonDec = np.radians(np.mean(simdatasubset[obsnow]['moonDec']))
                # Note that moonphase is 0-100 (translate to 0-1). 0=new.
                moonPhase = np.mean(simdatasubset[obsnow]['moonPhase']) / 100.
                alpha = np.max([moonPhase, 0.15])
                circle = Circle((lon, moonDec),
                                radius=0.05,
                                color='k',
                                alpha=alpha)
                ax.add_patch(circle)
                # Add some explanatory text.
                ecliptic = Line2D([], [], color='r', label="Ecliptic plane")
                galaxy = Line2D([], [], color='b', label="Galactic plane")
                horizon = Line2D([], [],
                                 color='k',
                                 alpha=0.3,
                                 label="20 deg elevation limit")
                moon = Line2D([], [],
                              color='k',
                              linestyle='',
                              marker='o',
                              markersize=8,
                              alpha=alpha,
                              label="\nMoon (Dark=Full)\n         (Light=New)")
                zenith = Line2D([], [],
                                color='k',
                                linestyle='',
                                marker='+',
                                markersize=5,
                                label="Zenith")
                plt.legend(
                    handles=[horizon, zenith, galaxy, ecliptic, moon],
                    loc=[0.1, -0.35],
                    ncol=3,
                    frameon=False,
                    title=
                    'Aitoff plot showing HA/Dec of simulated survey pointings',
                    numpoints=1,
                    fontsize='small')
            # Save figure.
            plt.savefig(os.path.join(
                args.outDir,
                mb.metric.name + '_' + slicenumber + '_SkyMap.png'),
                        format='png',
                        dpi=72)
            plt.close('all')
            dt, t = dtime(t)
            if verbose:
                print('Ran and plotted slice %s of movieslicer in %f s' %
                      (slicenumber, dt))
    def run(self, sensorRefList, calibType):
        """Process a calibration frame.

        @param sensorRef: sensor-level butler data reference
        @return pipe_base Struct containing these fields:
        - masterExpList: amp exposures of master calibration products
        """
        referenceAmps = sensorRefList[0].subItems(level="channel")
        masterExpList = []
        dataIdList = []
        expmeta = None
        for amp in referenceAmps:
            if amp.dataId['snap'] == 1:
                continue
            self.log.info("Amp: Processing %s", amp.dataId)
            print("dataid %s" % (amp.dataId))
            butler = amp.butlerSubset.butler
            ampMIList = []
            for sRef in sensorRefList:
                self.log.info("Sensor: Processing %s", sRef.dataId)
                ampSnapMIList = []
                dataId = eval(amp.dataId.__repr__())
                dataId['visit'] = sRef.dataId['visit']
                for snap in (0, 1):
                    dataId['snap'] = snap
                    ampExposure = sRef.butlerSubset.butler.get('raw', dataId)
                    if expmeta is None:
                        expmeta = ampExposure.getMetadata()
                        expfilter = ampExposure.getFilter()
                        expcalib = ampExposure.getCalib()
                    ampDetector = ampExposure.getDetector()

                    ampExposure = self.convertIntToFloat(ampExposure)
                    ampExpDataView = ampExposure.Factory(
                        ampExposure, ampDetector.getDiskDataSec())

                    self.saturationDetection(ampExposure, ampDetector)

                    self.overscanCorrection(ampExposure, ampDetector)
                    if calibType in ('flat', 'dark'):
                        self.biasCorrection(ampExpDataView, amp)

                    if False:
                        self.darkCorrection(ampExpDataView, amp)

                    self.updateVariance(ampExpDataView, ampDetector)
                    ampSnapMIList.append(ampExpDataView.getMaskedImage())
                ampMIList.append(self.combineMIList(ampSnapMIList))
            masterFrame = self.combineMIList(ampMIList)
            # Fix saturation too???
            self.fixDefectsAndSat(masterFrame, ampDetector)
            exp = afwImage.ExposureF(masterFrame)
            self.copyMetadata(exp, expmeta, calibType)
            exp.setDetector(ampDetector)
            exp.setWcs(None)
            exp.setCalib(expcalib)
            if calibType is 'flat':
                exp.setFilter(expfilter)
            if self.config.doWrite and calibType is not 'flat':
                print("writing file %s" % dataId)
                butler.put(exp, calibType, dataId=amp.dataId)
            masterExpList.append(exp)
            dataIdList.append(amp.dataId)
        if self.config.doWrite and calibType is 'flat':
            self.normChipAmps(masterExpList)
            for exp, dataId in zip(masterExpList, dataIdList):
                print("writing flat file %s" % dataId)
                butler.put(exp, calibType, dataId)
        return pipeBase.Struct(masterFrameList=masterExpList, )
Exemple #60
0
def train_rcnn(network, dataset, image_set, root_path, dataset_path,
               frequent, kvstore, work_load_list, no_flip, no_shuffle, resume,
               ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
               train_shared, lr, lr_step, proposal):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # set up config
    config.TRAIN.BATCH_IMAGES = 2
    config.TRAIN.BATCH_ROIS = 128
    if proposal == 'ss':
        config.TRAIN.BG_THRESH_LO = 0.1  # reproduce Fast R-CNN

    # load symbol
    sym = eval('get_' + network + '_rcnn')(num_classes=config.NUM_CLASSES)

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in image_set.split('+')]
    roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path,
                                  proposal=proposal, append_gt=True, flip=not no_flip)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)
    means, stds = add_bbox_regression_targets(roidb)

    # load training data
    train_data = ROIIter(roidb, batch_size=input_batch_size, shuffle=not no_shuffle,
                         ctx=ctx, work_load_list=work_load_list, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(list(zip(sym.list_arguments(), arg_shape)))
    out_shape_dict = dict(list(zip(sym.list_outputs(), out_shape)))
    aux_shape_dict = dict(list(zip(sym.list_auxiliary_states(), aux_shape)))
    print('output shape')
    pprint.pprint(out_shape_dict)

    # load and initialize params
    if resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # prepare training
    # create solver
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    if train_shared:
        fixed_param_prefix = config.FIXED_PARAMS_SHARED
    else:
        fixed_param_prefix = config.FIXED_PARAMS
    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=work_load_list,
                        max_data_shapes=max_data_shape, fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': (old_div(1.0, batch_size)),
                        'clip_gradient': 5}

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)