Python insert Examples, numpy.insert Python Examples

Example #1

0

Show file

File: Forecaster.py Project: ChrisDelaX/EXOSIMS

 def indicate(self, M, trans, i):
     '''
     indicate which M belongs to population i given transition parameter
     '''
     ts = np.insert(np.insert(trans, self.n_pop-1, np.inf), 0, -np.inf)
     ind = (M>=ts[i]) & (M<ts[i+1])
     return ind

Example #2

0

Show file

File: smooth_brightness.py Project: abuchanan/image-bolts

def produce_smoothed_images(get_component, replace_component, bins, output_path, paths):

    start_img = io.imread(paths[0])
    start_cdf = get_cdf(get_component(start_img), bins)

    end_img = io.imread(paths[-1])
    end_cdf = get_cdf(get_component(end_img), bins)

    delta_cdf = end_cdf - start_cdf

    for i, path in enumerate(paths[1:-1]):
        percentage = i / len(paths[1:-1])
        target_cdf = start_cdf + (delta_cdf * percentage)

        img = io.imread(path)
        values = get_component(img)
        cdf = get_cdf(values, bins)

        # In order to match the length of "bins" for the interpolation below
        # we prepend a 0
        target_cdf = numpy.insert(target_cdf, 0, 0)
        cdf = numpy.insert(cdf, 0, 0)

        matched = match(values, cdf, target_cdf, bins)
        matched = matched.reshape(values.shape)

        img = replace_component(img, matched)

        result_path = os.path.join(output_path, os.path.basename(path))
        io.imsave(result_path, img)
        print('Done with', result_path)

Example #3

0

Show file

File: approximate.py Project: FedericaLionetto/scikit-learn

    def _get_radius_neighbors(self, query, max_depth, bin_queries, radius):
        """Finds radius neighbors from the candidates obtained.

        Their distances from query are smaller than radius.
        Returns radius neighbors and distances.
        """
        ratio_within_radius = 1
        threshold = 1 - self.radius_cutoff_ratio
        total_candidates = np.array([], dtype=int)
        total_neighbors = np.array([], dtype=int)
        total_distances = np.array([], dtype=float)

        while max_depth > self.min_hash_match and ratio_within_radius > threshold:
            left_mask = self._left_mask[max_depth]
            right_mask = self._right_mask[max_depth]
            candidates = []
            for i in range(self.n_estimators):
                start, stop = _find_matching_indices(self.trees_[i], bin_queries[i], left_mask, right_mask)
                candidates.extend(self.original_indices_[i][start:stop].tolist())
            candidates = np.setdiff1d(candidates, total_candidates)
            total_candidates = np.append(total_candidates, candidates)
            ranks, distances = self._compute_distances(query, candidates)
            m = np.searchsorted(distances, radius, side="right")
            positions = np.searchsorted(total_distances, distances[:m])
            total_neighbors = np.insert(total_neighbors, positions, candidates[ranks[:m]])
            total_distances = np.insert(total_distances, positions, distances[:m])
            ratio_within_radius = total_neighbors.shape[0] / float(total_candidates.shape[0])
            max_depth = max_depth - 1
        return total_neighbors, total_distances

Example #4

0

Show file

File: sndaq_root_hdf5_converter_v2.py Project: briedel/sandbox_icecube

    def correct_missing_doms(self, scalerarray, no_channels):
        """ 
        Backup method in case geometry is not given. 
        Very back-of-the-envelope.
        Not used at the moment.
    
        Correcting an artifact of storing variable length arrays in a table.
    
        Changes to the SNDAQ geometry removed certain DOMs from the snall 
        data array, so putting them back into the array at the right location.
        Need to remove last 7 or 8 dummy entries produced when reading data from file
        and insert zeros at appropriate places in array.

        :param scalerarray: Scaler array missing DOMs shifted to end
        :param no_channels: Number of active channels assumed for the file
        :returns: Scaler array with the correct location mapping
        """
        if no_channels == 5153:
            return np.insert(scalerarray[:-7], 
                             [45, 403, 1308, 1925, 2278, 3594, 4061], 0)
        elif no_channels == 5152:
            return np.insert(scalerarray[:-8],
                             [45, 403, 1308, 1925, 2278, 3594, 4061, 5069], 0)
        else:
            raise RuntimeError("No. of channels (= %d) is not support" % no_channels)

Example #5

0

Show file

File: time_series.py Project: gdmcbain/meshio

    def cells(self, cells, grid):
        from lxml import etree as ET

        if len(cells) == 1:
            meshio_type = list(cells.keys())[0]
            num_cells = len(cells[meshio_type])
            xdmf_type = meshio_to_xdmf_type[meshio_type][0]
            topo = ET.SubElement(
                grid,
                "Topology",
                TopologyType=xdmf_type,
                NumberOfElements=str(num_cells),
            )
            dt, prec = numpy_to_xdmf_dtype[cells[meshio_type].dtype.name]
            dim = "{} {}".format(*cells[meshio_type].shape)
            data_item = ET.SubElement(
                topo,
                "DataItem",
                DataType=dt,
                Dimensions=dim,
                Format=self.data_format,
                Precision=prec,
            )
            data_item.text = self.numpy_to_xml_string(cells[meshio_type])
        elif len(cells) > 1:
            total_num_cells = sum(c.shape[0] for c in cells.values())
            topo = ET.SubElement(
                grid,
                "Topology",
                TopologyType="Mixed",
                NumberOfElements=str(total_num_cells),
            )
            total_num_cell_items = sum(numpy.prod(c.shape) for c in cells.values())
            dim = total_num_cell_items + total_num_cells
            # Lines translate to Polylines, and one needs to specify the exact
            # number of nodes. Hence, prepend 2.
            if "line" in cells:
                cells["line"] = numpy.insert(cells["line"], 0, 2, axis=1)
                dim += len(cells["line"])
            dim = str(dim)
            cd = numpy.concatenate(
                [
                    # prepend column with xdmf type index
                    numpy.insert(
                        value, 0, meshio_type_to_xdmf_index[key], axis=1
                    ).flatten()
                    for key, value in cells.items()
                ]
            )
            dt, prec = numpy_to_xdmf_dtype[cd.dtype.name]
            data_item = ET.SubElement(
                topo,
                "DataItem",
                DataType=dt,
                Dimensions=dim,
                Format=self.data_format,
                Precision=prec,
            )
            data_item.text = self.numpy_to_xml_string(cd)
        return

Example #6

0

Show file

File: fit_practice.py Project: bbartley/DataMuncher

def insert_initial(val, blankAvg, InducedAvg, LOQ):
    Initial = initial_pt(val, blankAvg, LOQ)
    masked_vals = mask_apply(InducedAvg, LOQ, blankAvg)
    Initial_ACF = Initial/(10E6)
    sample_ACF = np.insert(masked_vals[0], 0, Initial_ACF)
    sample_time = np.insert(masked_vals[1], 0, 0)
    return (sample_ACF, sample_time)

Example #7

0

Show file

File: actinTreadmill_sim.py Project: chemaoxfz/proteinInteractionSim

 def execEnd(self,eventIdx):
     # execute an end-breaking or depolymerization event.
     oligoEndBreak=self.ald['end'][eventIdx/2]
     leftRight=eventIdx%2*2-1
     lr=-(leftRight+1)/2
     unitMoving=oligoEndBreak.ends[lr]
     oligo_vanish,form_oligo,self.event_code=oligoEndBreak.end_break(leftRight,self.units)
     if form_oligo:
         # not empty
         mono=form_oligo['monomer']
         if mono:
             # monomer + monomer (mergeOligo)
             idx=np.where([x in [mono,unitMoving] for x in self.monomers])[0]
             self.monomers=np.delete(self.monomers,idx)
             self.oligos=np.insert(self.oligos,0,form_oligo['oligo'])
         else:
             # monomer + multimer (mergeOligo)
             idx=np.where([unitMoving is x for x in self.monomers])[0]
             self.monomers=np.delete(self.monomers,idx)
     else:
         #empty, add the end to monomers
         self.monomers=np.insert(self.monomers,0,unitMoving)
         unitMoving.energize()
     
     if oligo_vanish:
         idx=np.where([oligoEndBreak is x for x in self.oligos])[0]
         self.oligos=np.delete(self.oligos,idx)
         
         idx=np.where([unitMoving is not x for x in oligoEndBreak.subunits])[0]
         nonmoving_unit=oligoEndBreak.subunits[idx[0]]
         self.monomers=np.insert(self.monomers,0,nonmoving_unit)
         nonmoving_unit.energize()

Example #8

0

Show file

File: indexer_megacloudlet.py Project: cmusatyalab/GigaSight

def load_from_classifier(classifier):
    forest0 = load_forest_from_classifier(classifier, 'forest0.npz')
    hist0 = load_ndarray(classifier, 'hist0.npy')
    prior = np.true_divide(hist0[0].sum(axis=0), hist0[0].sum())
    hist0 = np.insert(normalize(hist0), 0, 0, axis=0)

    forest1 = load_forest_from_classifier(classifier, 'forest1.npz')
    hist1 = load_ndarray(classifier, 'hist1.npy')
    hist1 = np.insert(normalize(hist1), 0, 0, axis=0)

    svmmodels = []
    try:
        training_bosts = normalize(load_ndarray(classifier, 'bosts.npy')).T

        NLABELS = hist0.shape[2]
        for i in range(1, NLABELS):
            model = classifier.read('svmmodel%d' % i)
            tmp = tempfile.NamedTemporaryFile()
            tmp.write(model)
            tmp.flush()
            svmmodels.append(load_model(tmp.name))
            tmp.close()
    except KeyError:
        training_bosts = None
    return forest0, hist0, forest1, hist1, training_bosts, svmmodels, prior

Example #9

0

Show file

File: array.py Project: Casyfill/Capstone_dashboard

    def value_counts(self, dropna=True):
        """
        Returns a Series containing counts of unique values.

        Parameters
        ----------
        dropna : boolean, default True
            Don't include counts of NaN, even if NaN is in sp_values.

        Returns
        -------
        counts : Series
        """
        keys, counts = algos._value_counts_arraylike(self.sp_values,
                                                     dropna=dropna)
        fcounts = self.sp_index.ngaps
        if fcounts > 0:
            if self._null_fill_value and dropna:
                pass
            else:
                if self._null_fill_value:
                    mask = pd.isnull(keys)
                else:
                    mask = keys == self.fill_value

                if mask.any():
                    counts[mask] += fcounts
                else:
                    keys = np.insert(keys, 0, self.fill_value)
                    counts = np.insert(counts, 0, fcounts)

        if not isinstance(keys, pd.Index):
            keys = pd.Index(keys)
        result = pd.Series(counts, index=keys)
        return result

Example #10

0

Show file

File: tickstore.py Project: Laeeth/arctic

 def _prepend_image(self, document, im, rtn_length, column_dtypes, column_set, columns):
     image = im[IMAGE]
     first_dt = im[IMAGE_TIME]
     if not first_dt.tzinfo:
         first_dt = first_dt.replace(tzinfo=mktz('UTC'))
     document[INDEX] = np.insert(document[INDEX], 0, np.uint64(datetime_to_ms(first_dt)))
     for field in image:
         if field == INDEX:
             continue
         if columns and field not in columns:
             continue
         if field not in document or document[field] is None:
             col_dtype = np.dtype(str if isinstance(image[field], string_types) else 'f8')
             document[field] = self._empty(rtn_length, dtype=col_dtype)
             column_dtypes[field] = col_dtype
             column_set.add(field)
         val = image[field]
         document[field] = np.insert(document[field], 0, document[field].dtype.type(val))
     # Now insert rows for fields in document that are not in the image
     for field in set(document).difference(set(image)):
         if field == INDEX:
             continue
         logger.debug("Field %s is missing from image!" % field)
         if document[field] is not None:
             val = np.nan
             document[field] = np.insert(document[field], 0, document[field].dtype.type(val))
     return document

Example #11

0

Show file

File: sky.py Project: wschoenell/pynephoscope

	def calculate(self):
		ephem_location = ephem.Observer()
		ephem_location.lat = self.location.latitude.to(u.rad) / u.rad
		ephem_location.lon = self.location.longitude.to(u.rad) / u.rad
		ephem_location.elevation = self.location.height / u.meter
		ephem_location.date = ephem.Date(self.time.datetime)

		if self.data is None:
			self.alt = Latitude([], unit=u.deg)
			self.az = Longitude([], unit=u.deg)
			self.names = Column([], dtype=np.str)
			self.vmag = Column([])
		else:
			ra = Longitude(self.data["ra"], u.h)
			dec = Latitude(self.data["dec"], u.deg)
			c = SkyCoord(ra, dec, frame='icrs')
			altaz = c.transform_to(AltAz(obstime=self.time, location=self.location))
			self.alt = altaz.alt
			self.az = altaz.az

			self.names = self.data['name']
			self.vmag = self.data['mag']

		for ephemeris in self.ephemerides:
			ephemeris.compute(ephem_location)
			self.vmag = np.insert(self.vmag, [0], ephemeris.mag)
			self.alt = np.insert(self.alt, [0], (ephemeris.alt.znorm * u.rad).to(u.deg))
			self.az = np.insert(self.az, [0], (ephemeris.az * u.rad).to(u.deg))
			self.names = np.insert(self.names, [0], ephemeris.name)

		return self.names, self.vmag, self.alt, self.az

Example #12

0

Show file

File: inversion_analysis.py Project: vitale232/ves

def calcEarthParams(layerThickness, layerResistivity):
    """"""
    nLayers = len(layerResistivity["min"])  # or 'max'
    thicknessParam = np.empty((nLayers,))
    resistivityParam = np.empty((nLayers,))

    # Iterate through the layers, applying the p formula to both
    #  thickness and resistivity
    for i in range(nLayers):
        # Generate a random number to control where in the range of
        #  possible values the true value of p could lie. This precedes the
        #  MC iteration, so take one p value with a grain of salt, but many
        #  with a salt shaker
        randomNumber = np.random.random_sample()
        if i < (nLayers - 1):  # Skip last depth (infinite)
            thicknessP = (layerThickness["max"][i] - layerThickness["min"][i]) * randomNumber + layerThickness["min"][i]
            thicknessParam = np.insert(thicknessParam, i, thicknessP)
            del thicknessP

        resistivityP = (layerResistivity["max"][i] - layerResistivity["min"][i]) * randomNumber + layerResistivity[
            "min"
        ][i]
        resistivityParam = np.insert(resistivityParam, i, resistivityP)
        del resistivityP

    return (thicknessParam[: nLayers - 1], resistivityParam[:nLayers])

Example #13

0

Show file

File: AlphabetRecognization.py Project: prabhakar9885/Statistical-Methods-in-AI

 def trainNN(self, imagesTrainSet, labelsTrainSet, etha):
     self.reset_weights()
     trainingSetSize = labelsTrainSet.shape[0];
     j = 0
     while j < 30:
         i = 0
         # print("Round: " + str(j + 1))
         while i < trainingSetSize :
             x = imagesTrainSet[i].ravel()  # Convert 28x28 pixel image into a (784,) vector
             x = np.array([ 0 if val == 0 else 1 for val in x ])
             x_a = np.insert(x, 0, values=1, axis=0)  # Augmented Feature vector
             net_hidd = np.dot(self.w1, x_a)
             y = self.signum(net_hidd)
             y_a = np.insert(y, 0, values=1, axis=0)  # Augmented Feature vector
             
             net_out = np.dot(self.w2, y_a)
             z = self.signum(net_out)
             lab = np.array([ 1 if k == self.labels[i] else 0 for k in range(10) ])
             
             J = z - lab;
             J = np.sum(0.5 * J * J);
             if J < 1 and self.enableWeightDecay:
                 break;
             out_sensitivity = (lab - z) * self.signum_prime(net_out)
             net_hidd_prime = self.signum_prime(net_hidd) 
             hid_sensitivity = np.dot(self.w2.T, out_sensitivity) * np.insert(net_hidd_prime, 0, 1)
             
             grad_hidd_out = etha * np.outer(out_sensitivity, y_a.T)
             grad_in_hidd = etha * np.outer(hid_sensitivity[1:] , x_a.T) 
             
             self.update_weights_bias(grad_in_hidd, grad_hidd_out)
             i += 1
         j += 1
         
     return self.w1, self.w2

Example #14

0

Show file

File: histogram.py Project: glouppe/carl

    def fit(self, X, sample_weight=None, **kwargs):
        # Checks
        X = check_array(X)

        if sample_weight is not None and len(sample_weight) != len(X):
            raise ValueError

        # Compute histogram and edges
        h, e = np.histogramdd(X, bins=self.bins, range=self.range,
                              weights=sample_weight, normed=True)

        # Add empty bins for out of bound samples
        for j in range(X.shape[1]):
            h = np.insert(h, 0, 0., axis=j)
            h = np.insert(h, h.shape[j], 0., axis=j)
            e[j] = np.insert(e[j], 0, -np.inf)
            e[j] = np.insert(e[j], len(e[j]), np.inf)

        if X.shape[1] == 1 and self.interpolation:
            inputs = e[0][2:-1] - (e[0][2] - e[0][1]) / 2.
            inputs[0] = e[0][1]
            inputs[-1] = e[0][-2]
            outputs = h[1:-1]
            self.interpolation_ = interp1d(inputs, outputs,
                                           kind=self.interpolation,
                                           bounds_error=False,
                                           fill_value=0.)

        self.histogram_ = h
        self.edges_ = e
        self.ndim_ = X.shape[1]

        return self

Example #15

0

Show file

File: NeuralNet_.py Project: jkroening/machine-learning

def forwardPropPredict(nn_params, input_layer_size, hidden_layer_size, num_labels, X):

    length1 = (input_layer_size+1)*(hidden_layer_size)

    nn1 = nn_params[:length1]
    T1 = nn1.reshape((hidden_layer_size, input_layer_size+1))
    nn2 = nn_params[length1:]
    T2 = nn2.reshape((num_labels, 1+ hidden_layer_size))
    m = X.shape[0] # number of training examples, useful for calculations

    max_pred = 0
    predictions = []
    # for each training example
    train_ex = -1 # training example number we're on (ie. which row of input matrix)
    for x in X:
        train_ex += 1

        # forward propagation
        a1 = x
        a1 = np.insert(a1, 0, 1, axis=0)
        z2 = np.dot(T1, a1)
        a2 = sigmoid(z2)
        a2 = np.insert(a2, 0 , 1, axis=0)
        z3 = np.dot(T2, a2)
        a3 = sigmoid(z3)

        predictions.append(int(np.argmax(a3)))

    return predictions

Example #16

0

Show file

File: lmegarch.py Project: RJT1990/pyflux

    def _summarize_simulations(self, lmda, sim_vector, date_index, h, past_values):
        """ Summarizes a simulation vector and a mean vector of predictions
        
        Parameters
        ----------
        lmda : np.array
            Past volatility values for the moedl
        
        sim_vector : np.array
            N simulation predictions for h-step ahead forecasts
        
        date_index : pd.DateIndex or np.array
            Dates for the simulations
        
        h : int
            How many steps ahead are forecast
        
        past_values : int
            How many past observations to include in the forecast plot
        
        intervals : Boolean
            Would you like to show prediction intervals for the forecast?
        """ 
        mean_values = np.append(lmda, np.array([np.mean(i) for i in sim_vector]))

        error_bars = []
        for pre in range(5,100,5):
            error_bars.append(np.insert([np.percentile(i,pre) for i in sim_vector], 0, mean_values[-h-1]))
        forecasted_values = np.insert([np.mean(i) for i in sim_vector], 0, mean_values[-h-1])
        plot_values = mean_values[-h-past_values:]
        plot_index = date_index[-h-past_values:]

        return error_bars, forecasted_values, plot_values, plot_index

Example #17

0

Show file

File: MFBinaryClass.py Project: jdhughes/MF2005-SWR1

    def next(self):
        totim,dt,kper,kstp,swrstp,success = self.read_header()        
        if success == False: 
#            print 'SWR_Stage.next() object reached end of file'
            return 0.0,0.0,0,0,0,False,self.null_record
        else:
            if self.type > 0:
                #r = numpy.zeros((self.items+1)) 
                r = numpy.zeros((self.items+2)) 
                for rec in range(0,self.nrecord):
                    #nlay = self.read_integer()
                    nlay = self.reachlayers[rec]
                    for lay in range(0,nlay):
                        this_lay = self.read_integer()
                        this_items = self.read_items()
                        this_r = numpy.insert(this_items,[0],this_lay)
                        this_r = numpy.insert(this_r,[0],rec+1)
                        #print totim,this_lay,numpy.shape(r),numpy.shape(this_r)
                        r = numpy.vstack((r,this_r))
                r = numpy.delete(r,0,axis=0)
                return totim,dt,kper,kstp,swrstp,True,r
            else:
                r = self.read_record()
#        print 'SWR data read for time step ',kstp,',stress period \
#                    ',kper,'and swr step ',swrstp
        return totim,dt,kper,kstp,swrstp,True,r

Example #18

0

Show file

File: column.py Project: juliantaylor/astropy

    def insert(self, obj, values):
        """
        Insert values before the given indices in the column and return
        a new `~astropy.table.Column` object.

        Parameters
        ----------
        obj : int, slice or sequence of ints
            Object that defines the index or indices before which ``values`` is
            inserted.
        values : array_like
            Value(s) to insert.  If the type of ``values`` is different
            from that of quantity, ``values`` is converted to the matching type.
            ``values`` should be shaped so that it can be broadcast appropriately

        Returns
        -------
        out : `~astropy.table.Column`
            A copy of column with ``values`` and ``mask`` inserted.  Note that the
            insertion does not occur in-place: a new column is returned.
        """
        if self.dtype.kind == 'O':
            # Even if values is array-like (e.g. [1,2,3]), insert as a single
            # object.  Numpy.insert instead inserts each element in an array-like
            # input individually.
            data = np.insert(self, obj, None, axis=0)
            data[obj] = values
        else:
            # Explicitly convert to dtype of this column.  Needed because numpy 1.7
            # enforces safe casting by default, so .  This isn't the case for 1.6 or 1.8+.
            values = np.asarray(values, dtype=self.dtype)
            data = np.insert(self, obj, values, axis=0)
        out = data.view(self.__class__)
        out.__array_finalize__(self)
        return out

Example #19

0

Show file

File: myplot.py Project: dtnaylor/myplot

def cdf_vals_from_data(data, numbins=None, maxbins=None):

    # make sure data is a numpy array
    data = numpy.array(data)
    
    # by default, use numbins equal to number of distinct values
    # TODO: shouldn't this be one per possible x val?
    if numbins == None:
        numbins = numpy.unique(data).size

    if maxbins != None and numbins > maxbins:
        numbins = maxbins
    
    # bin the data and count fraction of points in each bin (for PDF)
    rel_bin_counts, min_bin_x, bin_size, _ =\
        stats.relfreq(data, numbins, (data.min(), data.max()))
    
    # bin the data and count each bin (cumulatively) (for CDF)
    cum_bin_counts, min_bin_x, bin_size, _ =\
        stats.cumfreq(data, numbins, (data.min(), data.max()))

    # normalize bin counts so rightmost count is 1
    cum_bin_counts /= cum_bin_counts.max()

    # make array of x-vals (lower end of each bin)
    x_vals = numpy.linspace(min_bin_x, min_bin_x+bin_size*numbins, numbins)

    # CDF always starts at y=0
    cum_bin_counts = numpy.insert(cum_bin_counts, 0, 0)  # y = 0
    cdf_x_vals = numpy.insert(x_vals, 0, x_vals[0])  # x = min x


    return cum_bin_counts, cdf_x_vals, rel_bin_counts, x_vals

Example #20

0

Show file

File: numpywrapper.py Project: juhasch/PhysicalQuantities

def insert(array, obj, values):
    """Insert values along the given axis before the given indices.
    Parameters:	
    -----------
    arr : array_like
        Input array.
    
    obj : int, slice or sequence of ints
        Object that defines the index or indices before which values is inserted.
    
    values : array_like
        Values to insert into arr. If the type of values is different from that of arr, values is converted to the type of arr.
    
    axis : int, optional
        Axis along which to insert values. If axis is None then arr is flattened first.
    
    Returns:	
    --------
    out : ndarray
    
    A copy of arr with values inserted. Note that insert does not occur in-place: a new array is returned. If axis is None, out is a flattened array.

    """
    if isphysicalquantity(array):
        return np.insert(array.value, obj, values.value) * q[array.unit]
    else:
        return np.insert(array, obj, values)

Example #21

0

Show file

File: loess.py Project: ali01/loess.py

def loess_query(x_query, X, y, alpha):
  if not isinstance(x_query, np.ndarray):
    x_query = np.array(x_query)
  elif isinstance(x_query, np.matrix):
    x_query = x_query.A

  if not isinstance(X, np.matrix):
    raise TypeError, 'X must be of type np.matrix'

  if isinstance(y, np.ndarray):
    y = np.mat(y).T

  if alpha <= 0 or alpha > 1:
    raise ValueError, 'ALPHA must be between 0 and 1'

  # inserting constant ones into X and X_QUERY for intercept term
  X = np.insert(X, obj=0, values=1, axis=1)
  x_query = np.insert(x_query, obj=0, values=1)

  # computing weights matrix using a tricube weight function
  W = weights_matrix(x_query, X, alpha)

  # computing theta from closed form solution to locally weighted linreg
  theta = (X.T * W * X).I * X.T * W * y

  # returning prediction
  return np.matrix.dot(theta.A.T, x_query)

Example #22

0

Show file

File: mayavi_aligner.py Project: QihongL/pycortex

    def transform(self, pos=(0,0), angle=0, scale=1):
        '''In-plane transformation function. Update the 3D transform based on the 2D changes'''
        center = self.shape * self.spacing / 2. + (self.shape + 1) % 2 * self.spacing / 2.
        inv = self.xfm.transform.homogeneous_inverse

        wpos = self.handle.center.representation.world_position
        wpos -= center
        if not isinstance(scale, (tuple, list, np.ndarray)):
            scale = [scale, scale]

        if self.axis == 1:
            trans = np.insert(pos[:2][::-1], self.axis, 0)
            wpos = np.insert(wpos[:2][::-1], self.axis, self.ipw_3d.ipw.slice_position)
            #angle = -angle
        else:
            trans = np.insert(pos[:2], self.axis, 0)
            wpos = np.insert(wpos[:2], self.axis, self.ipw_3d.ipw.slice_position)
        scale = np.insert(scale, self.axis, 1)

        self.parent._undolist.append(self.xfm.transform.matrix.to_array())

        self.xfm.transform.post_multiply()
        self.xfm.transform.translate(-wpos)
        self.xfm.transform.rotate_wxyz(np.degrees(angle), *self.ipw_3d.ipw.normal)
        self.xfm.transform.scale(scale)
        self.xfm.transform.translate(wpos)
        self.xfm.transform.translate(trans)
        self.xfm.transform.pre_multiply()

        self.xfm.widget.set_transform(self.xfm.filter.transform)
        self.xfm.update_pipeline()
        self.parent.update_slabs()

        np.save("/tmp/last_xfm.npy", self.parent.get_xfm())

Example #23

0

Show file

File: _mga.py Project: darioizzo/pykep

    def eta2direct(self, x):
        """eta2direct(x)

        Args:
            - x (``array-like``): a chromosome encoding an MGA trajectory in the eta encoding

        Returns:
            ``numpy.array``: a chromosome encoding the MGA trajectory using the direct encoding

        Raises:
            - ValueError: when the tof_encoding is not 'eta'
        """
        if self.tof_encoding is not 'eta':
            raise ValueError(
                "cannot call this method if the tof_encoding is not 'eta'")
   
        # decision vector is  [t0, n1, n2, n3, ... ]
        n = len(x) - 1
        dt = self.tof
        T = [0] * n
        T[0] = dt * x[1]
        for i in range(1, len(T)):
            T[i] = (dt - sum(T[:i])) * x[i + 1]
        np.insert(T, 0, [0])
        return T

Example #24

0

Show file

File: hawkes.py Project: B-Rich/VA_PYTHON

def hawkesfeat(timeseries,args):
    '''
    Generate hawkes feature: positive rate/negtive rate
    args['params']: 1X8 ndarray containing the params of hawkes process
    '''

    #Assign parameters
    params = args['params'] if 'params' in args.keys() else np.array([0.2,0.2, 0.2, 0.7, 0.7, 0.2, 1.0, 1.0])

    #Utilize the rate calculation function in the hawkes simulator
    sim = simulator(theta = params)
    sim.sethistory(timeseries)


    rate = sim.historydata[:,2]/sim.historydata[:,3]
    rate = np.insert(rate,0,params[0]/params[1]).reshape(-1,1)
    time = np.insert(sim.historydata[:,0],0,0.0).reshape(-1,1)
    time = np.cumsum(time,axis=0)

    value = np.hstack((time,rate))
    value = value.astype(object,copy=False)
    value[:,0] = Vsecond2delta(value[:,0])

    anchor = timeseries.values[0]
    anchor[1] = 0.0
    value = value + anchor

    rateseries = pd.DataFrame(value,columns=['time','quantity'])
    rateseries.index = rateseries['time']
    rateseries = rateseries.reindex(timeseries.index,method = 'ffill')

    return rateseries

Example #25

0

Show file

File: tools.py Project: hpparvi/pyina

def balance_workload(nproc, popsize, *index, **kwds):
    """divide popsize elements on 'nproc' chunks

nproc: int number of nodes
popsize: int number of jobs
index: int rank of node(s) to calculate for (using slice notation)
skip: int rank of node upon which to not calculate (i.e. the master)

returns (begin, end) index vectors"""
    _skip = False
    skip = kwds.get('skip', None)
    if skip is not None and skip < nproc:
        nproc = nproc - 1
        _skip = True
    count = np.round(popsize/nproc)
    counts = count * np.ones(nproc, dtype=np.int)
    diff = popsize - count*nproc
    counts[:diff] += 1
    begin = np.concatenate(([0], np.cumsum(counts)[:-1]))
   #return counts, index #XXX: (#jobs, begin index) for all elements
    if _skip:
        if skip == nproc: # remember: nproc has been reduced
            begin = np.append(begin, begin[-1]+counts[-1])
            counts = np.append(counts, 0)
        else:
            begin = np.insert(begin, skip, begin[skip])
            counts = np.insert(counts, skip, 0)
    if not index:
        return begin, begin+counts #XXX: (begin, end) index for all elements
   #if len(index) > 1:
   #    return lookup((begin, begin+counts), *index) # index a slice
    return lookup((begin, begin+counts), *index) # index a single element

Example #26

0

Show file

File: Segments.py Project: teuben/admit

    def chans(self, invert=False):
        """ Method to convert the bit mask into a string of channel ranges in CASA format. e.g.
            [3,10],[25,50] => "3~10;25~50"

            Parameters
            ----------
            None

            Returns
            -------
            string containing the formatted channel ranges

        """
        output = ""
        if invert:
            basechan = np.append(1-self._chans, 0)
            shiftchan = np.insert(1-self._chans, 0, 0)
        else:
            basechan = np.append(self._chans, 0)
            shiftchan = np.insert(self._chans, 0, 0)
        diff = basechan - shiftchan
        st = np.where(diff == 1)[0]
        en = np.where(diff == -1)[0]
        first = True
        for seg in zip(st, en):
            if not first:
                output += ";"
            else:
                first = False
            output += str(seg[0] + self._startchan) + "~" + str(seg[1] - 1 + self._startchan)
        return output

Example #27

0

Show file

File: PyNeuroData.py Project: SummitKwan/PyNeuroSG

def data_concatenate(list_data_neuro):
    """
    Tool function for blk_align_to_evt, make sure they contains the same number of signals

    :param list_data_neuro:  a list of data_neuro
    :return:                 concatenated data_neuro
    """

    data_neuro_all = {}
    for i, data_neuro in enumerate(list_data_neuro):
        if i == 0:  # if the first block, copy it
            data_neuro_all = data_neuro
        else:  # for next incoming blocks
            if len(data_neuro['ts']) == len(data_neuro_all['ts']):  # check if ts length matches, otherwise raise error
                # check if signals match, if not match, fill the missing signal with all zeros
                if not np.array_equal(data_neuro['signal_info'], data_neuro_all['signal_info']):
                    for indx_signal_new, signal_new in enumerate(data_neuro['signal_info']):  # if emerging signal
                        if signal_new not in data_neuro_all['signal_info']:
                            data_neuro_all['signal_info'] = np.insert(data_neuro_all['signal_info'], indx_signal_new,
                                                                      signal_new)
                            data_neuro_all['data'] = np.insert(data_neuro_all['data'], indx_signal_new, 0.0, axis=2)
                    for indx_signal_old, signal_old in enumerate(data_neuro_all['signal_info']):  # if mising signal
                        if signal_old not in data_neuro['signal_info']:
                            data_neuro['signal_info'] = np.insert(data_neuro['signal_info'], indx_signal_old,
                                                                  signal_old)
                            data_neuro['data'] = np.insert(data_neuro['data'], indx_signal_old, 0.0, axis=2)
                # concatenate
                data_neuro_all['data'] = np.concatenate((data_neuro_all['data'], data_neuro['data']), axis=0)
            else:
                print('function data_concatenate can not work with data of different "ts" length')
                warnings.warn('function data_concatenate can not work with data of different "ts" length')

    return data_neuro_all

Example #28

0

Show file

File: spatial_functions.py Project: Erotemic/hotspotter

def get_affine_inliers_RANSAC(num_m, xy1_m, xy2_m,\
                              acd1_m, acd2_m, xy_thresh_sqrd, sigma_thresh_sqrd=None):
    '''Computes initial inliers by iteratively computing affine transformations
    between matched keypoints'''
    aff_inliers = []
    # Enumerate All Hypothesis (Match transformations)
    for mx in xrange(num_m): 
        xy1  = xy1_m[:,mx].reshape(2,1) #  XY Positions
        xy2  = xy2_m[:,mx].reshape(2,1) 
        A1   = matrix(insert(acd1_m[:,mx], [1.], 0.)).reshape(2,2)
        A2   = matrix(insert(acd2_m[:,mx], [1.], 0.)).reshape(2,2)
        # Compute Affine Tranform 
        # from img1 to img2 = (E2\E1) 
        Aff  = linalg.inv(A2).dot(A1)
        #
        # Transform XY-Positions
        xy1_mAt = xy2 + Aff.dot( (xy1_m - xy1) ) 
        xy_err_sqrd = sum( power(xy1_mAt - xy2_m, 2) , 0)
        _inliers = find(xy_err_sqrd < xy_thresh_sqrd)
        #
        # Transform Ellipse Geometry (solved on paper)
        if not sigma_thresh_sqrd is None:
            scale1_mAt = (acd1_m[0]*Aff[0,0]) *\
                         (acd1_m[1]*Aff[1,0]+acd1_m[2]*Aff[1,1])
            scale2_m   = acd2_m[0] * acd2_m[2]
            scale_err  = np.abs(scale1_mAt - scale2_m)
            _inliers_scale = find(scale_err < sigma_thresh_sqrd)
            _inliers = np.bitwise_and(_inliers, _inliers_scale)
        #If this hypothesis transformation is better than the ones we have
        #previously seen then set it as the best
        if len(_inliers) > len(aff_inliers):
            aff_inliers = _inliers
            #bst_xy_err  = xy_err_sqrd 
    return aff_inliers

Example #29

0

Show file

File: linproc.py Project: GaussHuo/quant-econ

    def set_params(self):
        r"""
        Internally, scipy.signal works with systems of the form

        .. math::

            ar_{poly}(L) X_t = ma_{poly}(L) \epsilon_t

        where L is the lag operator. To match this, we set

        .. math::

            ar_{poly} = (1, -\phi_1, -\phi_2,..., -\phi_p)

            ma_{poly} = (1, \theta_1, \theta_2,..., \theta_q)

        In addition, ar_poly must be at least as long as ma_poly.
        This can be achieved by padding it out with zeros when required.

        """
        # === set up ma_poly === #
        ma_poly = np.asarray(self._theta)
        self.ma_poly = np.insert(ma_poly, 0, 1)  # The array (1, theta)

        # === set up ar_poly === #
        if np.isscalar(self._phi):
            ar_poly = np.array(-self._phi)
        else:
            ar_poly = -np.asarray(self._phi)
        self.ar_poly = np.insert(ar_poly, 0, 1)  # The array (1, -phi)

        # === pad ar_poly with zeros if required === #
        if len(self.ar_poly) < len(self.ma_poly):
            temp = np.zeros(len(self.ma_poly) - len(self.ar_poly))
            self.ar_poly = np.hstack((self.ar_poly, temp))

Example #30

0

Show file

File: polyadd.py Project: pytutor/python-tutor

def polyadd(p1,p2):
    s1 = np.size(p1)
    s2 = np.size(p2)
    length = max(s1,s2)
    p1 = np.insert(p1,np.zeros( length-s1 >= 0 and length-s1 or 0),0)
    p2 = np.insert(p2,np.zeros( length-s2 >= 0 and length-s2 or 0),0)
    return p1+p2

Example #31

0

Show file

      np.sort(arr, order=['grad_year', 'gpa']))

# NUMPY FUNCTIONS FOR APPENDING ARRAYS BY INSERTING,DELETING,JOINING,
# SPLITTING,CHANGING SHAPE AND CONVERTING ARRAY TO A DIFFERENT TYPE

my_array = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.int64)
my_2d_array = my_array
new_array = np.append(my_array, [7, 8, 9, 10])
new_2darray = np.append(my_2d_array, [[7], [8]], axis=1)

# you switch around the shape of the array
# Print `my_2d_array`
print(my_2d_array)

# print the inserted element in my_array
np.insert(my_array, 1, 5)
print("Array after inserting: \n", my_array)

# print the deleted element in my array
np.delete(my_array, [1])
print("Array after deleting an element:  \n", my_array)

# Select elements at (1,0), (0,1), (1,2) and (0,0)
print("Elements at(1,0), (0,1), (1,2) and (0,0) of my_2d_array :\n",
      my_2d_array[[1, 0, 1, 0], [0, 1, 2, 0]])

# Select a subset of the rows and columns
print("Subset of my_2d_array : \n", my_2d_array[[1, 0, 1, 0]][:, [0, 1, 2, 0]])

# ARRAY INDEXING
index_array = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

Example #32

0

Show file

File: path_planner.py Project: prajwal210798/EE674

    def update(self, map, state, PLAN, world_view):
        # this flag is set for one time step to signal a redraw in the viewer
        # planner_flag = 1  # return simple waypoint path
        # planner_flag = 2  # return dubins waypoint path
        # planner_flag = 3  # plan path through city using straight-line RRT
        # planner_flag = 4  # plan path through city using dubins RRT
        planner_flag = 5  # plan path through city using modified dubins RRT
        if planner_flag == 1:
            self.waypoints.type = 'fillet'
            self.waypoints.num_waypoints = 4
            Va = 25
            self.waypoints.ned[:, 0:self.waypoints.num_waypoints] \
                = np.array([[0, 0, -100],
                            [1000, 0, -100],
                            [0, 1000, -100],
                            [1000, 1000, -100]]).T
            self.waypoints.airspeed[:, 0:self.waypoints.num_waypoints] \
                = np.array([[Va, Va, Va, Va]])
        elif planner_flag == 2:
            self.waypoints.type = 'dubins'
            self.waypoints.num_waypoints = 4
            Va = 25
            self.waypoints.ned[:, 0:self.waypoints.num_waypoints] \
                = np.array([[0, 0, -100],
                            [1000, 0, -100],
                            [0, 1000, -100],
                            [1000, 1000, -100]]).T
            self.waypoints.airspeed[:, 0:self.waypoints.num_waypoints] \
                = np.array([[Va, Va, Va, Va]])
            self.waypoints.course[:, 0:self.waypoints.num_waypoints] \
                = np.array([[np.radians(0),
                             np.radians(45),
                             np.radians(45),
                             np.radians(-135)]])
        elif planner_flag == 3:
            self.waypoints.type = 'fillet'
            self.waypoints.num_waypoints = 0
            Va = 25
            primaryWaypoints = np.array([[0., 0., -100.],
                                         [2000., 0., -100.],
                                         [0., 1200., -100.],
                                         [3000., 3000., -100.]]).T
            primaryWaypointsAirspeed = np.array([[Va, Va, Va, Va]])
            for i in range(0, np.size(primaryWaypoints,1)):
                # current configuration vector format: N, E, D, Va
                if i == 0 and np.sqrt((state.pn - primaryWaypoints[0,0])**2 + (state.pe - primaryWaypoints[1,0])**2) > 150:
                    wpp_start = np.array([state.pn,
                                          state.pe,
                                          primaryWaypoints[0,0],
                                          state.Va])
                    self.waypoints.ned[:, self.waypoints.num_waypoints] = wpp_start[0:3]
                    self.waypoints.airspeed[:, self.waypoints.num_waypoints] = wpp_start.item(3)
                    self.waypoints.num_waypoints += 1
                elif i == 0:
                    self.waypoints.ned[:, self.waypoints.num_waypoints] = np.array([primaryWaypoints[0, 0],
                                                                                    primaryWaypoints[1, 0],
                                                                                    primaryWaypoints[2, 0]])
                    self.waypoints.airspeed[:, self.waypoints.num_waypoints] = primaryWaypointsAirspeed.item(0)
                    self.waypoints.num_waypoints += 1
                    continue
                else:
                    wpp_start = np.array([primaryWaypoints[0,i-1],
                                          primaryWaypoints[1,i-1],
                                          primaryWaypoints[2,i-1],
                                          primaryWaypointsAirspeed.item(i-1)])
                wpp_end = np.array([primaryWaypoints[0, i],
                                      primaryWaypoints[1, i],
                                      primaryWaypoints[2, i],
                                      primaryWaypointsAirspeed.item(i)])
                waypoints = self.rrt.planPath(wpp_start, wpp_end, map)
                numNew = waypoints.num_waypoints-1
                numOld = self.waypoints.num_waypoints
                if numNew >1:
                    self.waypoints.ned[:, numOld:numOld + numNew] = waypoints.ned[:, 1:numNew+1]
                    self.waypoints.airspeed[:,numOld:numOld + numNew] = wpp_end.item(3) * np.ones((1, numNew))
                else:
                    self.waypoints.ned[:, numOld] = waypoints.ned[:, 1]
                    self.waypoints.airspeed[:,numOld] = wpp_end.item(3)
                self.waypoints.num_waypoints = numNew + numOld
        elif planner_flag == 4:
            self.waypoints.type = 'dubins'
            self.waypoints.num_waypoints = 0
            Va = 25
            numberWaypoints = 4
            primaryWaypoints = np.array([[0., 0., -100.],
                                         [2000., 0., -100.],
                                         [0., 1200., -100.],
                                         [3000., 3000., -100.]]).T
            primaryWaypointsAirspeed = np.array([[Va, Va, Va, Va]])
            primaryCourseAngles = np.array([[np.radians(0),
                                             np.radians(45),
                                             np.radians(45),
                                             np.radians(-135)]])
            # numberWaypoints = 2
            # primaryWaypoints = np.array([[0., 0., -100.],
            #                              [1000., 0., -100.]]).T
            # primaryWaypointsAirspeed = np.array([[Va, Va]])
            # primaryCourseAngles = np.array([[np.radians(0),
            #                                  np.radians(45)]])
            for i in range(0, numberWaypoints):
                # current configuration vector format: N, E, D, Va
                if i == 0 and np.sqrt((state.pn - primaryWaypoints[0,0])**2 + (state.pe - primaryWaypoints[1,0])**2) > PLAN.R_min:
                    wpp_start = np.array([state.pn,
                                          state.pe,
                                          -state.h,
                                          state.chi,
                                          state.Va])
                    self.waypoints.ned[:, self.waypoints.num_waypoints] = wpp_start[0:3]
                    self.waypoints.course[:, self.waypoints.num_waypoints] = wpp_start.item(3)
                    self.waypoints.airspeed[:, self.waypoints.num_waypoints] = wpp_start.item(4)
                    self.waypoints.num_waypoints += 1

                elif i == 0:
                    self.waypoints.ned[:, self.waypoints.num_waypoints] = np.array([primaryWaypoints[0, 0],
                                                                                      primaryWaypoints[1, 0],
                                                                                      primaryWaypoints[2, 0]])
                    self.waypoints.course[:, self.waypoints.num_waypoints] = primaryCourseAngles.item(0)
                    self.waypoints.airspeed[:, self.waypoints.num_waypoints] = primaryWaypointsAirspeed.item(0)
                    self.waypoints.num_waypoints += 1
                    continue
                else:
                    wpp_start = np.array([primaryWaypoints[0, i - 1],
                                          primaryWaypoints[1, i - 1],
                                          primaryWaypoints[2, i - 1],
                                          primaryCourseAngles.item(i-1),
                                          primaryWaypointsAirspeed.item(i-1)])
                wpp_end = np.array([primaryWaypoints[0, i],
                                    primaryWaypoints[1, i],
                                    primaryWaypoints[2, i],
                                    primaryCourseAngles.item(i),
                                    primaryWaypointsAirspeed.item(i)])
                waypoints = self.rrtDubins.planPath(wpp_start, wpp_end, PLAN.R_min, map)
                numNew = waypoints.num_waypoints-1
                numOld = self.waypoints.num_waypoints
                if numNew > 1:
                    self.waypoints.ned[:, numOld:numOld + numNew] = waypoints.ned[:, 1:numNew+1]
                    self.waypoints.course[:,numOld:numOld + numNew] = waypoints.course[:,1:numNew+1]
                    self.waypoints.airspeed[:, numOld:(numOld + numNew)] = wpp_end.item(4) * np.ones((1, numNew))
                else:
                    self.waypoints.ned[:, numOld] = waypoints.ned[:, 1]
                    self.waypoints.course[:, numOld] = waypoints.course[:, 1]
                    self.waypoints.airspeed[:, numOld] = wpp_end.item(4) * np.ones((1, numNew))
                self.waypoints.num_waypoints = numNew + numOld
        elif planner_flag == 5:
            # self.waypoints.type = ['dubins','dubins','dubins','dubins']
            self.waypoints.type = ['straight_line']
            # self.waypoints.type = 'dubins'
            self.waypoints.num_waypoints = 0
            Va = 25
            numberWaypoints = 4
            primaryWaypoints = np.array([[0., 0., -100.],
                                         [2000., 0., -100.],
                                         [0., 1500., -100.],
                                         [3200., 3200., -100.]]).T
            world_view.drawWaypointPoints(primaryWaypoints)
            primaryWaypointsAirspeed = np.array([[Va, Va, Va, Va]])
            primaryCourseAngles = np.array([[np.radians(0),
                                             np.radians(45),
                                             np.radians(45),
                                             np.radians(-135)]])
            # Make new points before the real waypoints. In line with chi from previous waypoint pointing.
            #At least radius open from collision?? Or just check collision?
            j = 0
            prevChi = 0
            while j < numberWaypoints-1:
                chi = np.arctan2((primaryWaypoints[1,j+1] - primaryWaypoints[1,j]), (primaryWaypoints[0,j+1] - primaryWaypoints[0,j]))
                primaryCourseAngles[:,j] = prevChi+.01
                distBetween = PLAN.R_min*2
                newWay = primaryWaypoints[:,j+1] - distBetween * np.array([np.cos(chi), np.sin(chi), 0.]).T
                primaryWaypoints = np.insert(primaryWaypoints, j+1, newWay, 1)
                primaryWaypointsAirspeed = np.insert(primaryWaypointsAirspeed, j + 1, Va, 1)
                primaryCourseAngles = np.insert(primaryCourseAngles, j + 1, chi, 1)
                numberWaypoints += 1
                prevChi = chi
                j += 2
                #Make sure far enough apart




            for i in range(0, numberWaypoints):
                # current configuration vector format: N, E, D, Va
                if i == 0 and np.sqrt((state.pn - primaryWaypoints[0,0])**2 + (state.pe - primaryWaypoints[1,0])**2) > PLAN.R_min:
                    even = True
                    wpp_start = np.array([state.pn,
                                          state.pe,
                                          -state.h,
                                          state.chi,
                                          state.Va])
                    self.waypoints.ned[:, self.waypoints.num_waypoints] = wpp_start[0:3]
                    self.waypoints.course[:, self.waypoints.num_waypoints] = wpp_start.item(3)
                    self.waypoints.airspeed[:, self.waypoints.num_waypoints] = wpp_start.item(4)
                    self.waypoints.num_waypoints += 1

                elif i == 0:
                    even = True
                    self.waypoints.ned[:, self.waypoints.num_waypoints] = np.array([primaryWaypoints[0, 0],
                                                                                      primaryWaypoints[1, 0],
                                                                                      primaryWaypoints[2, 0]])
                    self.waypoints.course[:, self.waypoints.num_waypoints] = primaryCourseAngles.item(0)
                    self.waypoints.airspeed[:, self.waypoints.num_waypoints] = primaryWaypointsAirspeed.item(0)
                    self.waypoints.num_waypoints += 1
                    continue
                else:
                    if even:
                        even = False
                    else:
                        even = True
                    wpp_start = np.array([primaryWaypoints[0, i - 1],
                                          primaryWaypoints[1, i - 1],
                                          primaryWaypoints[2, i - 1],
                                          primaryCourseAngles.item(i-1),
                                          primaryWaypointsAirspeed.item(i-1)])
                wpp_end = np.array([primaryWaypoints[0, i],
                                    primaryWaypoints[1, i],
                                    primaryWaypoints[2, i],
                                    primaryCourseAngles.item(i),
                                    primaryWaypointsAirspeed.item(i)])
                if even and i != 0:
                    self.waypoints.ned[:, self.waypoints.num_waypoints] = np.array([primaryWaypoints[0, i],
                                                                                    primaryWaypoints[1, i],
                                                                                    primaryWaypoints[2, i]])
                    self.waypoints.course[:, self.waypoints.num_waypoints] = primaryCourseAngles.item(i)
                    self.waypoints.airspeed[:, self.waypoints.num_waypoints] = primaryWaypointsAirspeed.item(i)
                    self.waypoints.type.append('straight_line')
                    self.waypoints.num_waypoints += 1
                    continue
                waypoints = self.rrtDubinsProj.planPath(wpp_start, wpp_end, PLAN.R_min, map)
                numNew = waypoints.num_waypoints-1
                numOld = self.waypoints.num_waypoints
                if numNew > 1:
                    self.waypoints.ned[:, numOld:numOld + numNew] = waypoints.ned[:, 1:numNew+1]
                    self.waypoints.course[:,numOld:numOld + numNew] = waypoints.course[:,1:numNew+1]
                    self.waypoints.airspeed[:, numOld:(numOld + numNew)] = wpp_end.item(4) * np.ones((1, numNew))
                    for newI in range(0, numNew-1):
                        self.waypoints.type.append('dubins')
                else:
                    self.waypoints.ned[:, numOld] = waypoints.ned[:, 1]
                    self.waypoints.course[:, numOld] = waypoints.course[:, 1]
                    self.waypoints.airspeed[:, numOld] = wpp_end.item(4) * np.ones((1, numNew))
                if even:
                    self.waypoints.type.append('straight_line')
                else:
                    self.waypoints.type.append('dubins')
                self.waypoints.num_waypoints = numNew + numOld
        else:
            print("Error in Path Planner: Undefined planner type.")

        return self.waypoints

Example #33

0

Show file

def calculate_ptdf(branches,
                   buses,
                   index_set_branch,
                   index_set_bus,
                   reference_bus,
                   base_point=BasePointType.FLATSTART,
                   sparse_index_set_branch=None,
                   mapping_bus_to_idx=None):
    """
    Calculates the sensitivity of the voltage angle to real power injections
    Parameters
    ----------
    branches: dict{}
        The dictionary of branches for the test case
    buses: dict{}
        The dictionary of buses for the test case
    index_set_branch: list
        The list of keys for branches for the test case
    index_set_bus: list
        The list of keys for buses for the test case
    reference_bus: key value
        The reference bus key value
    base_point: egret.model_library_defn.BasePointType
        The base-point type for calculating the PTDF matrix
    sparse_index_set_branch: list
        The list of keys for branches needed to compute a sparse PTDF matrix
        If this is None, a dense PTDF matrix is returned
    mapping_bus_to_idx: dict
        A map from bus names to indices for matrix construction. If None,
        will be inferred from index_set_bus.
    """
    _len_bus = len(index_set_bus)

    if mapping_bus_to_idx is None:
        mapping_bus_to_idx = {
            bus_n: i
            for i, bus_n in enumerate(index_set_bus)
        }

    _len_branch = len(index_set_branch)

    _ref_bus_idx = mapping_bus_to_idx[reference_bus]

    ## check if the network is connected
    graph = construct_connection_graph(branches, mapping_bus_to_idx)
    connected = check_network_connection(graph, index_set_bus)

    J = _calculate_J11(branches,
                       buses,
                       index_set_branch,
                       index_set_bus,
                       mapping_bus_to_idx,
                       base_point,
                       approximation_type=ApproximationType.PTDF)
    A = calculate_adjacency_matrix_transpose(branches, index_set_branch,
                                             index_set_bus, mapping_bus_to_idx)
    M = A @ J

    if sparse_index_set_branch is None or len(
            sparse_index_set_branch) == _len_branch:
        ## the resulting matrix after inversion will be fairly dense,
        ## the scipy documenation recommends using dense for the inversion
        ## as well

        ref_bus_mask = np.ones(_len_bus, dtype=bool)
        ref_bus_mask[_ref_bus_idx] = False

        # M is now (A^T B_d A) with
        # row and column of reference
        # bus removed
        J0 = M[ref_bus_mask, :][:, ref_bus_mask]

        # (B_d A) with reference bus column removed
        B_dA = J[:, ref_bus_mask].A

        if connected:
            try:
                PTDF = np.linalg.solve(J0.T.A, B_dA.T).T
            except np.linalg.LinAlgError:
                logger.warning(
                    "Matrix not invertible. Calculating pseudo-inverse instead."
                )
                SENSI = np.linalg.pinv(J0.A, rcond=1e-7)
                PTDF = np.matmul(B_dA, SENSI)
        else:
            logger.warning(
                "Using pseudo-inverse method as network is disconnected")
            SENSI = np.linalg.pinv(J0.A, rcond=1e-7)
            PTDF = np.matmul(B_dA, SENSI)

        # insert 0 column for reference bus
        PTDF = np.insert(PTDF, _ref_bus_idx, np.zeros(_len_branch), axis=1)

    elif len(sparse_index_set_branch) < _len_branch:
        ref_bus_row = sp.coo_matrix(([1], ([0], [_ref_bus_idx])),
                                    shape=(1, _len_bus))
        ref_bus_col = sp.coo_matrix(([1], ([_ref_bus_idx], [0])),
                                    shape=(_len_bus, 1))

        J0 = sp.bmat([[M, ref_bus_col], [ref_bus_row, 0]], format='coo')

        B = np.array([], dtype=np.int64).reshape(_len_bus + 1, 0)
        _sparse_mapping_branch = {
            i: branch_n
            for i, branch_n in enumerate(index_set_branch)
            if branch_n in sparse_index_set_branch
        }

        ## TODO: Maybe just keep the sparse PTDFs as a dict of ndarrays?
        ## Right now the return type depends on the options
        ## passed in
        for idx, branch_name in _sparse_mapping_branch.items():
            b = np.zeros((_len_branch, 1))
            b[idx] = 1
            _tmp = J.transpose() @ b
            _tmp = np.vstack([_tmp, 0])
            B = np.concatenate((B, _tmp), axis=1)
        row_idx = list(_sparse_mapping_branch.keys())
        PTDF = sp.lil_matrix((_len_branch, _len_bus))
        _ptdf = sp.linalg.spsolve(J0.transpose().tocsr(), B).T
        PTDF[row_idx] = _ptdf[:, :-1]

    return PTDF

Example #34

0

Show file

File: utilities.py Project: michaelmunje/primary-filter-analysis

def print_condition_number(df):
    X = np.insert(np.array(df.values), 0, 1, axis=1)
    xpx = np.matmul(np.transpose(X), X)
    eigvals = [np.real(eig) for eig in np.linalg.eigvals(xpx)]
    print('Condition Number:' + str(abs(max(eigvals) / min(eigvals))))

Example #35

0

Show file

def test_insert_2():
    x = np.array([1, 2, 3])
    y = np.insert(x, 0, 4)
    z = nw.insert(x, 0, 4)
    assert_almost_equal(y, z)

Example #36

0

Show file

def test_pose(opt):

    if not os.path.isdir(opt.output_dir):
        os.makedirs(opt.output_dir)

    ##### init #####
    input_uint8 = tf.placeholder(
        tf.uint8,
        [opt.batch_size, opt.img_height, opt.img_width, opt.seq_length * 3],
        name='raw_input')
    tgt_image = input_uint8[:, :, :, :3]
    src_image_stack = input_uint8[:, :, :, 3:]

    model = GeoNetModel(opt, tgt_image, src_image_stack, None)
    fetches = {"pose": model.pred_poses}

    saver = tf.train.Saver([var for var in tf.model_variables()])

    ##### load test frames #####
    seq_dir = os.path.join(opt.dataset_dir, 'sequences',
                           '%.2d' % opt.pose_test_seq)
    img_dir = os.path.join(seq_dir, 'image_2')
    N = len(glob(img_dir + '/*.png'))
    test_frames = ['%.2d %.6d' % (opt.pose_test_seq, n) for n in range(N)]

    ##### load time file #####
    with open(opt.dataset_dir + 'sequences/%.2d/times.txt' % opt.pose_test_seq,
              'r') as f:
        times = f.readlines()
    times = np.array([float(s[:-1]) for s in times])

    ##### Go! #####
    max_src_offset = (opt.seq_length - 1) // 2
    with tf.Session() as sess:
        saver.restore(sess, opt.init_ckpt_file)
        for tgt_idx in range(max_src_offset, N - max_src_offset,
                             opt.batch_size):
            if (tgt_idx - max_src_offset) % 100 == 0:
                print('Progress: %d/%d' % (tgt_idx - max_src_offset, N))

            inputs = np.zeros((opt.batch_size, opt.img_height, opt.img_width,
                               3 * opt.seq_length),
                              dtype=np.uint8)

            for b in range(opt.batch_size):
                idx = tgt_idx + b
                if idx >= N - max_src_offset:
                    break
                image_seq = load_image_sequence(opt.dataset_dir, test_frames,
                                                idx, opt.seq_length,
                                                opt.img_height, opt.img_width)
                inputs[b] = image_seq

            pred = sess.run(fetches, feed_dict={input_uint8: inputs})
            pred_poses = pred['pose']
            # Insert the target pose [0, 0, 0, 0, 0, 0]
            pred_poses = np.insert(pred_poses,
                                   max_src_offset,
                                   np.zeros((1, 6)),
                                   axis=1)

            for b in range(opt.batch_size):
                idx = tgt_idx + b
                if idx >= N - max_src_offset:
                    break
                pred_pose = pred_poses[b]
                curr_times = times[idx - max_src_offset:idx + max_src_offset +
                                   1]
                out_file = opt.output_dir + '%.6d.txt' % (idx - max_src_offset)
                dump_pose_seq_TUM(out_file, pred_pose, curr_times)

Example #37

0

Show file

File: data_handling.py Project: zhul9311/AI-reflectivity

def insert_oxide_thickness(path_train_data, labels):
    train_data = np.loadtxt(path_train_data, skiprows=1)

    labels = np.insert(labels, 1, train_data[:, 1])

    return labels

Example #38

0

Show file

def check_lightcurve_time(light_curve, exposure_time, frame_time):
    """Check to be sure the provided lightcurve is long enough to cover
    the supplied total exposure time. If not, lengthen at the beginning
    or end such that it does. Times will only be added to the beginning
    if the first time entry in the lightcurve is > 0. Lightcurves where
    the initial time entry is < 0 will have all times < 0 chopped. This
    will allow the user to simulate lightcurves where the exposure starts
    somewhere in the middle of the lightcurve.

    Parameters
    ----------
    light_curve : dict
        Dictionary of lightcurve. "fluxes" and "times" keys contain
        arrays of those values

    exposure_time : float
        Total exposure time for the full exposure being simulated
        (in seconds)

    frame_time : float
        Exposure time of a single frame of the observation

    Returns
    -------
    light_curve : dict
        Potentially modified with added or removed elements
    """
    times = copy.deepcopy(light_curve["times"].value)
    fluxes = copy.deepcopy(light_curve["fluxes"].value)
    time_units = light_curve["times"].unit
    flux_units = light_curve["fluxes"].unit
    adjusted = False

    # Remove elements where time < 0.
    if np.min(times) < 0.:
        positive_times = times >= 0.
        times = times[positive_times]
        fluxes = fluxes[positive_times]
        adjusted = True

    # If the times begin at values significantly > 0,
    # then add entries to bring the start back to time = 0
    if np.min(times) > 0.:
        print((
            "Lightcurve time values do not start at zero. Prepending an entry with time=0 "
            "and flux = 1."))
        times = np.insert(times, 0, 0.)
        fluxes = np.insert(fluxes, 0, 1.)
        adjusted = True

    # If the ending time is less than the exposure's total
    # observation time, then add entries with flux=1
    if np.max(times) < exposure_time:
        print((
            "Lightcurve time values extend only to {} seconds. This is not long enough "
            "to cover the entire exposure time of {} seconds. Extending to cover the full "
            "exposure time with flux = 1.".format(np.max(times),
                                                  exposure_time)))
        times = np.append(times, exposure_time + 5 * frame_time)
        fluxes = np.append(fluxes, 1.)
        adjusted = True

    if adjusted:
        light_curve["times"] = times * time_units
        light_curve["fluxes"] = fluxes * flux_units

    return light_curve

Example #39

0

Show file

File: run_experiment.py Project: deepsphere/deepsphere-tf1

assert n_stations == full_data.shape[0]

print('n_stations: {}, n_days: {}'.format(n_stations, n_days))

neighbour = 5

leap_years = np.zeros_like(years).astype(np.bool)
for i, in_year in enumerate(np.split(valid_days, len(years))):
    leap_years[i] = in_year.sum() == 366
w_months = np.tile(np.repeat(np.arange(12), 31),
                   years[-1] - years[0] + 1)[valid_days]
w_days = np.tile(np.arange(365), years[-1] - years[0] + 1)
for i, leap in enumerate(leap_years):
    if leap:
        w_days = np.insert(w_days, ((i + 1) * 365), 365)
w_days_sin = np.sin(w_days / 367 * np.pi)
w_days_cos = -np.cos(w_days / 367 * np.pi * 2) / 2 + 0.5

if dense:
    EXP = 'future'
    datas_temp_reg, keep_reg, gReg = clean_nodes(full_data, [1, 3],
                                                 lon,
                                                 lat,
                                                 figs=True,
                                                 rad=False)
    from GHCN_preprocessing import dataset_reg
    training, validation = dataset_reg(datas_temp_reg,
                                       lon[keep_reg],
                                       lat[keep_reg],
                                       alt[keep_reg],

Example #40

0

Show file

def linear_normal(X,Y):
    X = np.insert(X, 0, 1, 1)
    # return parameters as numpy array
    return np.linalg.pinv(X.transpose() @ X) @ X.transpose() @ Y

Example #41

0

Show file

def CriticalT(T, E):

    Epoly = numpy.poly1d(numpy.polyfit(T, E, T.size / 3))
    dEpoly = numpy.diff(Epoly(T))
    dEpoly = numpy.insert(dEpoly, 0, 0)
    return (T[numpy.argmin(dEpoly)])

Example #42

0

Show file

File: method3version3minimisation.py Project: odo22/GV-simulation

def generateNlist(d_list, gray_list, lamb):
    #Conversion from gray values to refractive index
    maximum = 67  #np.amax(gray_list) #71
    minimum = 40  #np.amin(gray_list) #40

    n_list = np.zeros((lamb.size, gray_list.size), dtype=np.complex_)
    n1 = np.zeros((lamb.size, gray_list.size), dtype=np.complex_)
    n2 = np.zeros((lamb.size, gray_list.size), dtype=np.complex_)
    nmelanin = np.zeros((lamb.size, gray_list.size), dtype=np.complex_)
    n_list_converted = np.zeros((lamb.size, gray_list.size), dtype=np.complex_)

    #for k in 1/lamb:

    BR1 = 9464.8
    AR1 = 1.515  #1.5145

    BR2 = 20700  # B Coefficient in Cauchy's equation (real)
    AR2 = 1.648  #Calculates Cauchy's A from given values of B and n at 600nm
    BI2 = 210  #B coefficient in exponential equation(imaginary part) #210
    AI2 = 0.5  #A coefficient in exponential equation (im part)#0.56 #3.0

    for i in range(len(lamb)):
        k = 1 / lamb[i]
        #n1 = AR1 + BR1*(k**2) + 0*1j+ AI1 + BI1*(k**2)
        n1[i, :] = AR1 + BR1 * (k**2)
        nmelanin[i, :] = AR2 + BR2 * (k**2) + (AI2 * np.exp(-1 /
                                                            (BI2 * k))) * 1j
        qpigm = 0.70  #0.85
        n2[i, :] = n1[i, :] * (1 - qpigm) + nmelanin[i, :] * qpigm
        conversion = (n2 - n1) / (maximum - minimum)

        gray_list_trans = np.zeros((1, gray_list.size))
        gray_list_trans[:, 0:] = gray_list[0:]

        for j in range(len(gray_list)):
            n_list_converted[i, j] = n2[
                i, j] - conversion[i, j] * (gray_list_trans[0, j] - minimum)

        wavel = 1 / k
        n1i = n1.imag
        n2i = n2.imag


##        if DEBUG:
##            plt.figure(3)
##            plt.plot(wavel, n1, marker='o', ms = 10, alpha=1, color='b', label='Chitin layer')
##            plt.plot(wavel, n2, marker='o', ms = 10, alpha=1, color='k', label='Melanin layer')
##            plt.xlabel('Wavelength (/nm)')
##            plt.ylabel('Refractive index')
##            plt.title('Dispersion relations, imaginary part. Blue = Chitin layer. Black = Melanin layer')
##

#Adds bottom infinite layer at the end of the n list (same as last n calculated)
    n_last = []
    n_last = n_list_converted[:, -1]
    n_lists = np.column_stack((n_list_converted, n_last))
    #Adds air layer at the top of the n list
    air = 1.00029
    n_air = np.full((lamb.size, 1), air)
    n_list = np.insert(n_lists, 0, air, axis=1)
    return n_list

Example #43

0

Show file

 def get_arc_lengths(self, waypoints):
     d = np.diff(waypoints, axis=0)
     consecutive_diff = np.sqrt(np.sum(np.power(d, 2), axis=1))
     dists_cum = np.cumsum(consecutive_diff)
     dists_cum = np.insert(dists_cum, 0, 0.0)
     return dists_cum

Example #44

0

Show file

File: sdae_functions_6.py Project: arouillard/legacycb

def main(d):
    # d is a dictionary containing the auto-encoder design specifications and training phase specifications

    # RESET DEFAULT GRAPH
    print('resetting default graph...', flush=True)    
    tf.reset_default_graph()
    
    
    
    
    # FINISH CONFIGURATION
    print('finishing configuration...', flush=True)
    
    # specify noise distribution
    if d['noise_distribution'] == 'truncnorm':
        noise_distribution = tf.truncated_normal
    elif d['noise_distribution'] == 'uniform':
        noise_distribution = tf.random_uniform
        
    # specify distribution of initial weights
    if d['initialization_distribution'] == 'truncnorm':
        initialization_distribution = tf.truncated_normal
    
    # specify activation function
    if d['activation_function'] == 'tanh':
        activation_function = {'tf':tf.tanh, 'np':sdae_apply_functions.tanh}
    elif d['activation_function'] == 'relu':
        activation_function = {'tf':tf.nn.relu, 'np':sdae_apply_functions.relu}
    elif d['activation_function'] == 'elu':
        activation_function = {'tf':tf.nn.elu, 'np':sdae_apply_functions.elu}
    elif d['activation_function'] == 'sigmoid':
        activation_function = {'tf':tf.sigmoid, 'np':sdae_apply_functions.sigmoid}

    # load data
    partitions = ['train', 'valid', 'test']
    dataset = {}
    for partition in partitions:
        dataset[partition] = datasetIO.load_datamatrix('{0}/{1}.pickle'.format(d['input_path'], partition))
        d['{0}_examples'.format(partition)] = dataset[partition].shape[0]
    
    # create output directory
    if not os.path.exists(d['output_path']):
        os.makedirs(d['output_path'])
            
    # initialize model architecture (number of layers and dimension of each layer)
    d['current_dimensions'] = d['all_dimensions'][:d['current_hidden_layer']+1] # dimensions of model up to current depth
    
    # specify embedding function for current training phase
    # we want the option of skipping the embedding activation function to apply only to the full model
    if not d['apply_activation_to_embedding'] and d['current_dimensions'] == d['all_dimensions']:
        d['current_apply_activation_to_embedding'] = False
    else:
        d['current_apply_activation_to_embedding'] = True
    
    # initialize assignments of training examples to mini-batches and number of training steps for stochastic gradient descent
    d['batch_size'] = d['batch_fraction']*d['train_examples']
    batch_ids = create_batch_ids(d['train_examples'], d['batch_size'])
    d['batches'] = np.unique(batch_ids).size
    d['steps'] = d['current_epochs']*d['batches']
    
    # specify path to weights from previous training run
    d['previous_variables_path'] = '{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['previous_hidden_layer'], d['previous_finetuning_run'])
    d['fix_or_init'] = 'fix' if d['current_finetuning_run'] == 0 else 'init' # fix for pretraining, init for finetuning
    
    # specify rows and columns of figure showing data reconstructions
    d['reconstruction_rows'] = int(np.round(np.sqrt(np.min([100, d['valid_examples']])/2)))
    d['reconstruction_cols'] = 2*d['reconstruction_rows']
    
    # print some design information
    print('input path: {0}'.format(d['input_path']), flush=True)
    print('output path: {0}'.format(d['output_path']), flush=True)
    print('previous variables path: {0}'.format(d['previous_variables_path']), flush=True)
    print('previous variables fix or init: {0}'.format(d['fix_or_init']), flush=True)
    
    
    
    
    # SAVE CURRENT DESIGN
    print('saving current design...', flush=True)
    with open('{0}/design_layer{1!s}_finetuning{2!s}.json'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), mode='wt', encoding='utf-8', errors='surrogateescape') as fw:
        json.dump(d, fw, indent=2)
    
    
    
    
    # DEFINE REPORTING VARIABLES
    print('defining reporting variables...', flush=True)
    reporting_steps = sdae_design_functions.create_reporting_steps(d['steps'], d['firstcheckpoint'], d['maxstepspercheckpoint'])
    valid_losses = np.zeros(reporting_steps.size, dtype='float32')
    train_losses = np.zeros(reporting_steps.size, dtype='float32')
    valid_noisy_losses = np.zeros(reporting_steps.size, dtype='float32')
    train_noisy_losses = np.zeros(reporting_steps.size, dtype='float32')
    print('reporting steps:', reporting_steps, flush=True)
    
    
    
    
    # DEFINE COMPUTATIONAL GRAPH
    # define placeholders for input data, use None to allow feeding different numbers of examples
    print('defining placeholders...', flush=True)
    training = tf.placeholder(tf.bool, [])
    noise_stdv = tf.placeholder(tf.float32, [])
    noise_prob = tf.placeholder(tf.float32, [])
    training_and_validation_data_initializer = tf.placeholder(tf.float32, [dataset['train'].shape[0]+dataset['valid'].shape[0], dataset['train'].shape[1]])
    selection_mask = tf.placeholder(tf.bool, [dataset['train'].shape[0]+dataset['valid'].shape[0]])

    # define variables
    # W contains the weights, bencode contains the biases for encoding, and bdecode contains the biases for decoding
    print('defining variables...', flush=True)
    training_and_validation_data = tf.Variable(training_and_validation_data_initializer, trainable=False, collections=[])
    if os.path.exists(d['previous_variables_path']):
        # update variables (if continuing from a previous training run)
        print('loading previous variables...', flush=True)
        global_step, W, bencode, bdecode = update_variables(d['current_dimensions'], initialization_distribution, d['initialization_sigma'], d['previous_variables_path'], d['fix_or_init'], d['include_global_step'])
    elif (d['current_hidden_layer'] == 1 and d['current_finetuning_run'] == 0) or d['skip_layerwise_training']:
        # create variables        
        global_step, W, bencode, bdecode = create_variables(d['current_dimensions'], initialization_distribution, d['initialization_sigma'])
    else:
        raise ValueError('could not find previous variables')

    # define model
    # h contains the activations from input layer to bottleneck layer
    # hhat contains the activations from bottleneck layer to output layer
    # xhat is a reference to the output layer (i.e. the reconstruction)
    print('defining model...', flush=True)
    x = tf.boolean_mask(training_and_validation_data, selection_mask)
    if d['noise_distribution'] == 'truncnorm':
        noise = noise_distribution(tf.shape(x), stddev=noise_stdv)
    else:
        noise = noise_distribution(tf.shape(x), minval=0, maxval=noise_stdv)
    noise_mask = tf.to_float(tf.random_uniform(tf.shape(x)) <= noise_prob)
    xnoisy = apply_noise(x, noise, noise_mask, d['noise_operation'])
    if d['activation_function'] == 'sigmoid' and d['apply_activation_to_output']:
        h, hhat, xhat = create_autoencoder(xnoisy, activation_function['tf'], False, d['current_apply_activation_to_embedding'], d['use_batchnorm'], training, W, bencode, bdecode)
    else:
        h, hhat, xhat = create_autoencoder(xnoisy, activation_function['tf'], d['apply_activation_to_output'], d['current_apply_activation_to_embedding'], d['use_batchnorm'], training, W, bencode, bdecode)
    
    # define loss
    print('defining loss...', flush=True)
    if d['activation_function'] == 'sigmoid' and d['apply_activation_to_output']:
        loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=x, logits=xhat))
    else:
        loss = tf.reduce_mean(tf.squared_difference(x, xhat)) # squared error loss

    # define optimizer and training function
    print('defining optimizer and training function...', flush=True)
    optimizer = tf.train.AdamOptimizer(learning_rate=d['learning_rate'], epsilon=d['epsilon'], beta1=d['beta1'], beta2=d['beta2'])
    train_ops = optimizer.minimize(loss, global_step=global_step)
    
    # define update ops and add to train ops (if using batch norm)
    if d['use_batchnorm']:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        train_ops = [train_ops, update_ops]
    
    # collect batch norm variables
    if d['use_batchnorm']:
        bn_gammas = tf.global_variables(scope='batch_normalization.{0,2}/gamma:0')
        print(bn_gammas, flush=True)
        bn_betas = tf.global_variables(scope='batch_normalization.{0,2}/beta:0')
        bn_moving_means = tf.global_variables(scope='batch_normalization.{0,2}/moving_mean:0')
        bn_moving_variances = tf.global_variables(scope='batch_normalization.{0,2}/moving_variance:0')
    
    # define bottleneck layer preactivation
#    bottleneck_preactivation = tf.matmul(h[-2], W[-1]) + bencode[-1]


        
        
    # INITIALIZE TENSORFLOW SESSION
    print('initializing tensorflow session...', flush=True)
    init = tf.global_variables_initializer()
    session_config = configure_session(d['processor'], d['gpu_memory_fraction'])
    with tf.Session(config=session_config) as sess:
        sess.run(init)
       
        

        
        # TRAINING
        print('training...', flush=True)
        sess.run(training_and_validation_data.initializer, feed_dict={training_and_validation_data_initializer: np.append(dataset['train'].matrix, dataset['valid'].matrix, 0)})
        validation_id = -1
        batch_and_validation_ids = np.full(dataset['train'].shape[0]+dataset['valid'].shape[0], validation_id, dtype=batch_ids.dtype)
        is_train = np.append(np.ones(dataset['train'].shape[0], dtype='bool'), np.zeros(dataset['valid'].shape[0], dtype='bool'))
        is_valid = ~is_train
        training_step = 0
        i = 0
        overfitting_score = 0
        stopearly = False
        starttime = time.time()
        
        with open('{0}/log_layer{1!s}_finetuning{2!s}.txt'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), mode='wt', buffering=1) as fl:
            fl.write('\t'.join(['step', 'train_loss', 'valid_loss', 'train_noisy_loss', 'valid_noisy_loss', 'time']) + '\n')
            
            for epoch in range(d['current_epochs']):
                if stopearly:
                    break
                    
                # randomize assignment of training examples to batches
                np.random.shuffle(batch_ids)
                batch_and_validation_ids[is_train] = batch_ids
                
                for batch in range(d['batches']):
                    training_step += 1
                    
                    # select mini-batch
                    selected = batch_and_validation_ids == batch
                    
                    # update weights
                    sess.run(train_ops, feed_dict={training:True, selection_mask:selected, noise_prob:d['noise_probability'], noise_stdv:d['noise_sigma']})
                    
                    # record training and validation errors
                    if training_step == reporting_steps[i]:
                        train_losses[i] = sess.run(loss, feed_dict={training:False, selection_mask:is_train, noise_prob:0, noise_stdv:0})
                        train_noisy_losses[i] = sess.run(loss, feed_dict={training:False, selection_mask:is_train, noise_prob:d['noise_probability'], noise_stdv:d['noise_sigma']})
                        valid_losses[i] = sess.run(loss, feed_dict={training:False, selection_mask:is_valid, noise_prob:0, noise_stdv:0})
                        valid_noisy_losses[i] = sess.run(loss, feed_dict={training:False, selection_mask:is_valid, noise_prob:d['noise_probability'], noise_stdv:d['noise_sigma']})
                        print('step:{0:1.6g}, train loss:{1:1.3g}, valid loss:{2:1.3g}, train noisy loss:{3:1.3g},valid noisy loss:{4:1.3g}, time:{5:1.6g}'.format(reporting_steps[i], train_losses[i], valid_losses[i], train_noisy_losses[i], valid_noisy_losses[i], time.time() - starttime), flush=True)
                        fl.write('\t'.join(['{0:1.6g}'.format(x) for x in [reporting_steps[i], train_losses[i], valid_losses[i], train_noisy_losses[i], valid_noisy_losses[i], time.time() - starttime]]) + '\n')
                            
                        # save current weights, reconstructions, and projections
                        if training_step >= d['startsavingstep'] or training_step == reporting_steps[-1]:
                            with open('{0}/intermediate_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], training_step), 'wb') as fw:
                                pickle.dump((sess.run(global_step), sess.run(W), sess.run(bencode), sess.run(bdecode)), fw)
                            if d['use_batchnorm']:
                                with open('{0}/intermediate_batchnorm_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], training_step), 'wb') as fw:
                                    pickle.dump((sess.run(bn_gammas), sess.run(bn_betas), sess.run(bn_moving_means), sess.run(bn_moving_variances)), fw)

                            # stop early if overfitting
                            if valid_losses[i] >= 1.01*(np.insert(valid_losses[:i], 0, np.inf).min()):
                                overfitting_score += 1
                            else:
                                overfitting_score = 0
                            if overfitting_score == d['overfitting_score_max']:
                                stopearly = True
                                print('stopping early!', flush=True)
                                break
                        i += 1
                        
        # end tensorflow session
        print('closing tensorflow session...', flush=True)
                        
                        
                        
                        
    # ROLL BACK IF OVERFITTING
    if stopearly:
        print('rolling back...', flush=True)
        reporting_steps = reporting_steps[:i+1]
        train_losses = train_losses[:i+1]
        valid_losses = valid_losses[:i+1]
        train_noisy_losses = train_noisy_losses[:i+1]
        valid_noisy_losses = valid_noisy_losses[:i+1]
#        selected_step = max([reporting_steps[i-d['overfitting_score_max']], d['startsavingstep']])
    else:
        print('completed all training steps...', flush=True)
#        selected_step = reporting_steps[-1]
    selected_step = min([max([reporting_steps[np.argmin(valid_losses)], d['startsavingstep']]), reporting_steps[-1]])
    print('selected step:{0}...'.format(selected_step), flush=True)
    
    
    
    
    # SAVE RESULTS
    print('saving results...', flush=True)
    with open('{0}/optimization_path_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), 'wb') as fw:
        pickle.dump({'reporting_steps':reporting_steps, 'valid_losses':valid_losses, 'train_losses':train_losses, 'valid_noisy_losses':valid_noisy_losses, 'train_noisy_losses':train_noisy_losses}, fw)
    if d['current_dimensions'] == d['all_dimensions'] and (not d['use_finetuning'] or d['current_finetuning_run'] > 0):
        shutil.copyfile('{0}/intermediate_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], selected_step),
                        '{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']))
        if d['use_batchnorm']:
            shutil.copyfile('{0}/intermediate_batchnorm_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], selected_step),
                            '{0}/batchnorm_variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']))
    else:
        shutil.move('{0}/intermediate_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], selected_step),
                    '{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']))
        if d['use_batchnorm']:
            shutil.move('{0}/intermediate_batchnorm_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], selected_step),
                        '{0}/batchnorm_variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']))
    with open('{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), 'rb') as fr:
        W, Be, Bd = pickle.load(fr)[1:] # global_step, W, bencode, bdecode
    if d['use_batchnorm']:
        with open('{0}/batchnorm_variables_layer{1!s}_finetuning{2!s}.pickle'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), 'rb') as fr:
            batchnorm_variables = pickle.load(fr) # gammas, betas, moving_means, moving_variances
        batchnorm_encode_variables, batchnorm_decode_variables = sdae_apply_functions.align_batchnorm_variables(batchnorm_variables, d['current_apply_activation_to_embedding'], d['apply_activation_to_output'])
    recon = {}
    embed = {}
    error = {}
    embed_preactivation = {}
    for partition in partitions:
        if d['use_batchnorm']:
            recon[partition], embed[partition], error[partition] = sdae_apply_functions.encode_and_decode(dataset[partition], W, Be, Bd, activation_function['np'], d['current_apply_activation_to_embedding'], d['apply_activation_to_output'], return_embedding=True, return_reconstruction_error=True, bn_encode_variables=batchnorm_encode_variables, bn_decode_variables=batchnorm_decode_variables)
            embed_preactivation[partition] = sdae_apply_functions.encode(dataset[partition], W, Be, activation_function['np'], apply_activation_to_embedding=False, bn_variables=batchnorm_encode_variables)
        else:
            recon[partition], embed[partition], error[partition] = sdae_apply_functions.encode_and_decode(dataset[partition], W, Be, Bd, activation_function['np'], d['current_apply_activation_to_embedding'], d['apply_activation_to_output'], return_embedding=True, return_reconstruction_error=True)
            embed_preactivation[partition] = sdae_apply_functions.encode(dataset[partition], W, Be, activation_function['np'], apply_activation_to_embedding=False)
        print('{0} reconstruction error: {1:1.3g}'.format(partition, error[partition]), flush=True)
        if d['current_dimensions'] == d['all_dimensions'] and (not d['use_finetuning'] or d['current_finetuning_run'] > 0):
            datasetIO.save_datamatrix('{0}/{1}_embedding_layer{2!s}_finetuning{3!s}.pickle'.format(d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed[partition])
            datasetIO.save_datamatrix('{0}/{1}_embedding_layer{2!s}_finetuning{3!s}.txt.gz'.format(d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed[partition])
            if d['current_apply_activation_to_embedding']:
                datasetIO.save_datamatrix('{0}/{1}_embedding_preactivation_layer{2!s}_finetuning{3!s}.pickle'.format(d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed_preactivation[partition])
                datasetIO.save_datamatrix('{0}/{1}_embedding_preactivation_layer{2!s}_finetuning{3!s}.txt.gz'.format(d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed_preactivation[partition])
            
    
    
        
    # PLOT LOSS
    print('plotting loss...', flush=True)
    fg, ax = plt.subplots(1, 1, figsize=(3.25,2.25))
    ax.set_position([0.55/3.25, 0.45/2.25, 2.6/3.25, 1.7/2.25])
    ax.semilogx(reporting_steps, train_losses, ':r', linewidth=1, label='train')
    ax.semilogx(reporting_steps, valid_losses, '-g', linewidth=1, label='valid')
    ax.semilogx(reporting_steps, train_noisy_losses, '--b', linewidth=1, label='train,noisy')
    ax.semilogx(reporting_steps, valid_noisy_losses, '-.k', linewidth=1, label='valid,noisy')
    ax.legend(loc='best', fontsize=8)
    ax.set_ylabel('loss', fontsize=8)
    ax.set_xlabel('steps (selected step:{0!s})'.format(selected_step), fontsize=8)
    ax.set_xlim(reporting_steps[0]-1, reporting_steps[-1]+1)
    # ax.set_ylim(0, 1)
    ax.tick_params(axis='both', which='major', left=True, right=True, bottom=True, top=False,
                   labelleft=True, labelright=False, labelbottom=True, labeltop=False, labelsize=8)
    fg.savefig('{0}/optimization_path_layer{1!s}_finetuning{2!s}.png'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=600)
    plt.close()
        
        
        
        
    # PLOT RECONSTRUCTIONS
    print('plotting reconstructions...', flush=True)
    num_recons = min([d['reconstruction_rows']*d['reconstruction_cols'], dataset['valid'].shape[0]])
    x_valid = dataset['valid'].matrix[:num_recons,:]
    xr_valid = recon['valid'].matrix[:num_recons,:]
    if x_valid.shape[1] > 1000:
        x_valid = x_valid[:,:1000]
        xr_valid = xr_valid[:,:1000]
    lb = np.append(x_valid, xr_valid, 1).min(1)
    ub = np.append(x_valid, xr_valid, 1).max(1)
    if d['apply_activation_to_output']:
        if d['activation_function'] == 'sigmoid':
            lb[:] = -0.05
            ub[:] = 1.05
        elif d['activation_function'] == 'tanh':
            lb[:] = -1.05
            ub[:] = 1.05
    fg, axs = plt.subplots(d['reconstruction_rows'], d['reconstruction_cols'], figsize=(6.5,3.25))
    for i, ax in enumerate(axs.reshape(-1)):
        if i < num_recons:
            ax.plot(x_valid[i,:], xr_valid[i,:], 'ok', markersize=0.5, markeredgewidth=0, alpha=0.1)
            ax.set_ylim(lb[i], ub[i])
            ax.set_xlim(lb[i], ub[i])
            ax.tick_params(axis='both', which='major', left=False, right=False, bottom=False, top=False, labelleft=False, labelright=False, labelbottom=False, labeltop=False, pad=4)
            ax.set_frame_on(False)
            ax.axvline(lb[i], linewidth=1, color='k')
            ax.axvline(ub[i], linewidth=1, color='k')
            ax.axhline(lb[i], linewidth=1, color='k')
            ax.axhline(ub[i], linewidth=1, color='k')
        else:
            fg.delaxes(ax)
    fg.savefig('{0}/reconstructions_layer{1!s}_finetuning{2!s}.png'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=1200)
    plt.close()
    
    
    
    
    # PLOT 2D EMBEDDING
    if d['current_dimensions'][-1] == 2  and (not d['use_finetuning'] or d['current_finetuning_run'] > 0):
        print('plotting 2d embedding...', flush=True)
        fg, ax = plt.subplots(1, 1, figsize=(6.5,6.5))
        ax.set_position([0.15/6.5, 0.15/6.5, 6.2/6.5, 6.2/6.5])
        ax.plot(embed['train'].matrix[:,0], embed['train'].matrix[:,1], 'ok', markersize=2, markeredgewidth=0, alpha=0.5, zorder=0)
        ax.plot(embed['valid'].matrix[:,0], embed['valid'].matrix[:,1], 'or', markersize=2, markeredgewidth=0, alpha=1.0, zorder=1)
        ax.tick_params(axis='both', which='major', bottom=False, top=False, labelbottom=False, labeltop=False, left=False, right=False, labelleft=False, labelright=False, pad=4)
        ax.set_frame_on(False)
        fg.savefig('{0}/embedding_layer{1!s}_finetuning{2!s}.png'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=600)
        plt.close()
        
        if d['current_apply_activation_to_embedding']:
            fg, ax = plt.subplots(1, 1, figsize=(6.5,6.5))
            ax.set_position([0.15/6.5, 0.15/6.5, 6.2/6.5, 6.2/6.5])
            ax.plot(embed_preactivation['train'].matrix[:,0], embed_preactivation['train'].matrix[:,1], 'ok', markersize=2, markeredgewidth=0, alpha=0.5, zorder=0)
            ax.plot(embed_preactivation['valid'].matrix[:,0], embed_preactivation['valid'].matrix[:,1], 'or', markersize=2, markeredgewidth=0, alpha=1.0, zorder=1)
            ax.tick_params(axis='both', which='major', bottom=False, top=False, labelbottom=False, labeltop=False, left=False, right=False, labelleft=False, labelright=False, pad=4)
            ax.set_frame_on(False)
            fg.savefig('{0}/embedding_preactivation_layer{1!s}_finetuning{2!s}.png'.format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=600)
            plt.close()
        
        
        
        
    print('done training phase.', flush=True)

    return d['current_hidden_layer'], d['current_finetuning_run'], d['current_epochs']

Example #45

0

Show file

File: specpolfinalstokes.py Project: ilkiewicz/polsalt

def specpolfinalstokes(infilelist,logfile='salt.log',debug=False,  \
        HW_Cal_override=False,Linear_PolZeropoint_override=False,PAZeropoint_override=False):
    """Combine the raw stokes and apply the polarimetric calibrations

    Parameters
    ----------
    infilelist: list
        List of filenames that include an extracted spectrum

    logfile: str
        Name of file for logging

    """
    """
    _l: line in calibration file
    _i: index in file list
    _j: rawstokes = waveplate position pair index (enumeration within config, including repeats)
    _J: cycle number idx (0,1,..) for each rawstokes
    _k: combstokes = waveplate position pair index (enumeration within config, repeats combined)
    _K: pair = waveplate position pair index (enumeration within obs)
    _p: pair = waveplate position pair # (eg 0,1,2,3 = 0 4  1 5  2 6  3 7 for LINEAR-HI, sorted in h0 order)
    _s: normalized linear stokes for zeropoint correction (0,1) = (q,u) 
    _S: unnormalized raw stokes within waveplate position pair: (eg 0,1 = I,Q)
    _F: unnormalized final stokes (eg 0,1,2 = I,Q,U)
    """
    calhistorylist = ["PolCal Model: 20170429",]

    patternlist = open(datadir+'wppaterns.txt','r').readlines()
    patternpairs = dict();  patternstokes = dict(); patterndict = dict()
    for p in patternlist:
        if p.split()[0] == '#': continue
        patterndict[p.split()[0]]=np.array(p.split()[3:]).astype(int).reshape((-1,2))
        patternpairs[p.split()[0]]=(len(p.split())-3)/2
        patternstokes[p.split()[0]]=int(p.split()[1])

    if len(glob.glob('specpol*.log')): logfile=glob.glob('specpol*.log')[0]
    with logging(logfile, debug) as log:

        log.message('specpolfinalstokes version: 20171226', with_header=False)          
    # organize data using names. 
    #   allrawlist = infileidx,object,config,wvplt,cycle for each infile.
        obsdict=obslog(infilelist)
        files = len(infilelist)
        allrawlist = []
        for i in range(files):
            object,config,wvplt,cycle = os.path.basename(infilelist[i]).rsplit('.',1)[0].rsplit('_',3)
            if (config[0]!='c')|(wvplt[0]!='h')|(not cycle.isdigit()):
                log.message('File '+infilelist[i]+' is not a raw stokes file.'  , with_header=False) 
                continue
            allrawlist.append([i,object,config,wvplt,cycle])
        configlist = sorted(list(set(ele[2] for ele in allrawlist)))       # unique configs

    # input correct HWCal and TelZeropoint calibration files
        dateobs = obsdict['DATE-OBS'][0].replace('-','')
        HWCalibrationfile = datedfile(datadir+"RSSpol_HW_Calibration_yyyymmdd_vnn.txt",dateobs)
        hwav_l,heff_l,hpa_l = np.loadtxt(HWCalibrationfile,dtype=float,unpack=True,usecols=(0,1,2),ndmin=2)
        TelZeropointfile = datedfile(datadir+"RSSpol_Linear_TelZeropoint_yyyymmdd_vnn.txt",dateobs)
        twav_l,tq0_l,tu0_l,err_l = np.loadtxt(TelZeropointfile,dtype=float,unpack=True,ndmin=2)

    # input PAZeropoint file and get correct entry
        dpadatever,dpa = datedline(datadir+"RSSpol_Linear_PAZeropoint.txt",dateobs).split()
        dpa = float(dpa)

    # prepare calibration keyword documentation            
        pacaltype = "Equatorial"
        if HW_Cal_override: 
            Linear_PolZeropoint_override=True
            PAZeropoint_override=True
            pacaltype = "Instrumental"
            calhistorylist.append("HWCal: Uncalibrated")
        elif Linear_PolZeropoint_override:
            PAZeropoint_override=True
            calhistorylist.extend(["HWCal: "+os.path.basename(HWCalibrationfile),"PolZeropoint: Null"])
        elif PAZeropoint_override: 
            calhistorylist.extend(["HWCal: "+os.path.basename(HWCalibrationfile),  \
                "PolZeropoint: "+os.path.basename(TelZeropointfile), "PAZeropoint: Null"])
        else:
            calhistorylist.extend(["HWCal: "+os.path.basename(HWCalibrationfile),    \
                "PolZeropoint: "+os.path.basename(TelZeropointfile), \
                "PAZeropoint: RSSpol_Linear_PAZeropoint.txt "+str(dpadatever)+" "+str(dpa)])

        log.message('  PA type: '+pacaltype, with_header=False) 
        if len(calhistorylist): log.message('  '+'\n  '.join(calhistorylist), with_header=False) 

        chifence_d = 2.2*np.array([6.43,4.08,3.31,2.91,2.65,2.49,2.35,2.25])    # *q3 for upper outer fence outlier for each dof

    # do one config at a time.  
    #   rawlist = infileidx,object,config,wvplt,cycle for each infile *in this config*. 
    #   rawlist is sorted with cycle varying fastest
    #   rawstokes = len(rawlist).   j is idx in rawlist.  

        for conf in configlist:
            log.message("\nConfiguration: %s" % conf, with_header=False) 
            rawlist = [entry for entry in allrawlist if entry[2]==conf]
            for col in (4,3,1,2): rawlist = sorted(rawlist,key=operator.itemgetter(col))            
            rawstokes = len(rawlist)            # rawlist is sorted with cycle varying fastest
            wav0 = pyfits.getheader(infilelist[rawlist[0][0]],'SCI')['CRVAL1']
            dwav = pyfits.getheader(infilelist[rawlist[0][0]],'SCI')['CDELT1']
            wavs = pyfits.getheader(infilelist[rawlist[0][0]],'SCI')['NAXIS1']
            wav_w = wav0 + dwav*np.arange(wavs)

        # interpolate HW, telZeropoint calibration wavelength dependence for this config
            okcal_w = np.ones(wavs).astype(bool)
            if not HW_Cal_override:
                heff_w = interp1d(hwav_l,heff_l,kind='cubic',bounds_error=False)(wav_w) 
                hpar_w = -interp1d(hwav_l,hpa_l,kind='cubic',bounds_error=False)(wav_w)
                okcal_w &= ~np.isnan(heff_w) 
                hpar_w[~okcal_w] = 0.
            if not Linear_PolZeropoint_override: 
                tel0_sw = interp1d(twav_l,np.array([tq0_l,tu0_l]),kind='cubic',bounds_error=False)(wav_w)
                okcal_w &= ~np.isnan(tel0_sw[0])
                tel0_sw /= 100.     # table is in % 
          
        # get spectrograph calibration file, spectrograph coordinates 
            grating = pyfits.getheader(infilelist[rawlist[0][0]])['GRATING']
            grang = pyfits.getheader(infilelist[rawlist[0][0]])['GR-ANGLE'] 
            artic = pyfits.getheader(infilelist[rawlist[0][0]])['AR-ANGLE'] 
            SpecZeropointfile = datedfile(datadir+ 
                "RSSpol_Linear_SpecZeropoint_"+grating+"_yyyymmdd_vnn.txt",dateobs)
            if len(SpecZeropointfile): calhistorylist.append(SpecZeropointfile)
          
        # get all rawstokes data
        #   comblist = last rawlistidx,object,config,wvplt,cycles,wppat 
        #   one entry for each set of cycles that needs to be combined (i.e, one for each wvplt)
            stokes_jSw = np.zeros((rawstokes,2,wavs)) 
            var_jSw = np.zeros_like(stokes_jSw)
            covar_jSw = np.zeros_like(stokes_jSw)
            bpm_jSw = np.zeros_like(stokes_jSw).astype(int)
            comblist = []

            for j in range(rawstokes):
                i,object,config,wvplt,cycle = rawlist[j]
                if j==0:
                    cycles = 1
                    lampid = pyfits.getheader(infilelist[i],0)['LAMPID'].strip().upper()
                    telpa = float(pyfits.getheader(infilelist[i],0)['TELPA'])
                    if lampid != "NONE": pacaltype ="Instrumental"
                    if pacaltype == "Equatorial": eqpar_w = hpar_w + dpa + (telpa % 180)
              # if object,config,wvplt changes, start a new comblist entry
                else:   
                    if rawlist[j-1][1:4] != rawlist[j][1:4]: cycles = 1
                    else: cycles += 1
                wppat = pyfits.getheader(infilelist[i])['WPPATERN'].upper()
                stokes_jSw[j] = pyfits.open(infilelist[i])['SCI'].data.reshape((2,-1))
                var_jSw[j] = pyfits.open(infilelist[i])['VAR'].data.reshape((2,-1))
                covar_jSw[j] = pyfits.open(infilelist[i])['COV'].data.reshape((2,-1))
                bpm_jSw[j] = pyfits.open(infilelist[i])['BPM'].data.reshape((2,-1))

            # apply telescope zeropoint calibration, q rotated to raw coordinates
                if not Linear_PolZeropoint_override:
                    trkrho = pyfits.getheader(infilelist[i])['TRKRHO']
                    dpatelraw_w = -(22.5*float(wvplt[1]) + hpar_w + trkrho + dpa) 
                    rawtel0_sw =    \
                        specpolrotate(tel0_sw,0,0,dpatelraw_w,normalized=True)[0]
                    rawtel0_sw[:,okcal_w] *= heff_w[okcal_w]
                    stokes_jSw[j,1,okcal_w] -= stokes_jSw[j,0,okcal_w]*rawtel0_sw[0,okcal_w]     
                if cycles==1:
                    comblist.append((j,object,config,wvplt,1,wppat))
                else:
                    comblist[-1] = (j,object,config,wvplt,cycles,wppat)

        # combine multiple cycles as necessary.  Absolute stokes is on a per cycle basis.
        # polarimetric combination on normalized stokes basis 
        #  to avoid coupling mean syserr into polarimetric spectral features
            combstokess = len(comblist)
            stokes_kSw = np.zeros((combstokess,2,wavs)) 
            var_kSw = np.zeros_like(stokes_kSw)
            covar_kSw = np.zeros_like(stokes_kSw)
            cycles_kw = np.zeros((combstokess,wavs)).astype(int)
            chi2cycle_kw = np.zeros((combstokess,wavs))
            badcyclechi_kw = np.zeros((combstokess,wavs),dtype=bool)
            havecyclechi_k = np.zeros(combstokess,dtype=bool)

          # obslist = first comblist idx,object,config,wppat,pairs
          # k = idx in comblist

            obslist = []
            jlistk = []             # list of rawstokes idx for each comblist entry
            Jlistk = []             # list of cycle number for each comblist entry

            obsobject = ''
            obsconfig = ''
            chi2cycle_j = np.zeros(rawstokes)
            syserrcycle_j = np.zeros(rawstokes)
            iscull_jw = np.zeros((rawstokes,wavs),dtype=bool)
            stokes_kSw = np.zeros((combstokess,2,wavs))
            var_kSw = np.zeros_like(stokes_kSw)
            nstokes_kw = np.zeros((combstokess,wavs))
            nvar_kw = np.zeros_like(nstokes_kw)
            ncovar_kw = np.zeros_like(nstokes_kw)
            chi2cyclenet_k = np.zeros(combstokess)
            syserrcyclenet_k = np.zeros(combstokess)

            for k in range(combstokess):         
                j,object,config,wvplt,cycles,wppat = comblist[k]
                jlistk.append(range(j-cycles+1,j+1))                                
                Jlistk.append([int(rawlist[jj][4])-1 for jj in range(j-cycles+1,j+1)])  # J = cycle-1, counting from 0        
                nstokes_Jw = np.zeros((cycles,wavs))
                nvar_Jw = np.zeros((cycles,wavs))
                ncovar_Jw = np.zeros((cycles,wavs))
                bpm_Jw = np.zeros((cycles,wavs))
                ok_Jw = np.zeros((cycles,wavs),dtype=bool)

                for J,j in enumerate(jlistk[k]):
                    bpm_Jw[J] = bpm_jSw[j,0]
                    ok_Jw[J] = (bpm_Jw[J] ==0)
                    nstokes_Jw[J][ok_Jw[J]] = stokes_jSw[j,1][ok_Jw[J]]/stokes_jSw[j,0][ok_Jw[J]]
                    nvar_Jw[J][ok_Jw[J]] = var_jSw[j,1][ok_Jw[J]]/(stokes_jSw[j,0][ok_Jw[J]])**2
                    ncovar_Jw[J][ok_Jw[J]] = covar_jSw[j,1][ok_Jw[J]]/(stokes_jSw[j,0][ok_Jw[J]])**2

            # Culling:  for multiple cycles, compare each cycle with every other cycle (dof=1).
            # bad wavelengths flagged for P < .02% (1/2000): chisq  > 13.8  (chi2.isf(q=.0002,df=1))
            # for cycles>2, vote to cull specific pair/wavelength, otherwise cull wavelength

                cycles_kw[k] =  (1-bpm_Jw).sum(axis=0).astype(int)
                okchi_w = (cycles_kw[k] > 1)
                chi2lim = 13.8 
                havecyclechi_k[k] = okchi_w.any()
                if cycles > 1:
                    ok_Jw[J] = okchi_w & (bpm_Jw[J] ==0)
                    chi2cycle_JJw = np.zeros((cycles,cycles,wavs))
                    badcyclechi_JJw = np.zeros((cycles,cycles,wavs))
                    ok_JJw = ok_Jw[:,None,:] & ok_Jw[None,:,:] 
                    nstokes_JJw = nstokes_Jw[:,None] - nstokes_Jw[None,:]
                    nvar_JJw = nvar_Jw[:,None] + nvar_Jw[None,:]                           
                    chi2cycle_JJw[ok_JJw] = nstokes_JJw[ok_JJw]**2/nvar_JJw[ok_JJw]

                    triuidx = np.triu_indices(cycles,1)                 # _i enumeration of cycle differences
                    chi2cycle_iw = chi2cycle_JJw[triuidx]
                    badcyclechi_w = (chi2cycle_iw > chi2lim).any(axis=(0))
                    badcyclechiall_w = (badcyclechi_w & (ok_JJw[triuidx].reshape((-1,wavs)).sum(axis=0)<3))
                    badcyclechicull_w = (badcyclechi_w & np.logical_not(badcyclechiall_w))

                    wavcull_W = np.where(badcyclechicull_w)[0]          # cycles>2, cull by voting
                    if wavcull_W.shape[0]:
                        for W,w in enumerate(wavcull_W):                       
                            J_I = np.array(triuidx).T[np.argsort(chi2cycle_iw[:,w])].flatten()
                            _,idx = np.unique(J_I,return_index=True)
                            Jcull = J_I[np.sort(idx)][-1]
                            jcull = jlistk[k][Jcull] 
                            iscull_jw[jcull,w] = True                   # for reporting
                            bpm_jSw[jcull,:,w] = 1
                    else:
                        for j in jlistk[k]:
                            iscull_jw[j] = badcyclechiall_w             # for reporting
                            bpm_jSw[j][:,badcyclechiall_w] = 1
                    for J,j in enumerate(jlistk[k]):
                        bpm_Jw[J] = bpm_jSw[j,0]

                    if debug:
                        obsname = object+"_"+config 
                        ok_Jw = okchi_w[None,:] & (bpm_Jw ==0)
                        np.savetxt(obsname+"_nstokes_Jw_"+str(k)+".txt",np.vstack((wav_w,ok_Jw.astype(int),    \
                            nstokes_Jw,nvar_Jw)).T, fmt="%8.2f "+cycles*"%3i "+cycles*"%10.6f "+cycles*"%10.12f ")                        
                        np.savetxt(obsname+"_chi2cycle_iw_"+str(k)+".txt",np.vstack((wav_w,okchi_w.astype(int),    \
                            chi2cycle_iw.reshape((-1,wavs)),badcyclechi_w,ok_JJw[triuidx].reshape((-1,wavs)).sum(axis=0))).T, \
                            fmt="%8.2f %3i "+chi2cycle_iw.shape[0]*"%10.7f "+" %2i %2i") 
                        np.savetxt(obsname+"_Jcull_kw_"+str(k)+".txt",np.vstack((wav_w,okchi_w.astype(int),    \
                            iscull_jw[jlistk[k]].astype(int).reshape((-1,wavs)))).T, fmt="%8.2f %3i "+cycles*" %3i") 

                if ((object != obsobject) | (config != obsconfig)):
                    obslist.append([k,object,config,wppat,1])
                    obsobject = object; obsconfig = config
                else:
                    obslist[-1][4] +=1

          # Now combine cycles, using normalized stokes to minimize systematic errors

            # first normalize cycle members J at wavelengths where all cycles have data:
                cycles_kw[k] =  (1-bpm_Jw).sum(axis=0).astype(int)
                ok_w = (cycles_kw[k] > 0)
                okall_w = (cycles_kw[k] == cycles)
                normint_J = np.array(stokes_jSw[jlistk[k],0][:,okall_w].sum(axis=1))
                normint_J /= np.mean(normint_J)
                stokes_JSw = stokes_jSw[jlistk[k]]/normint_J[:,None,None]
                var_JSw = var_jSw[jlistk[k]]/normint_J[:,None,None]**2
                covar_JSw = covar_jSw[jlistk[k]]/normint_J[:,None,None]**2

                for J in range(cycles):
                    okJ_w = ok_w & (bpm_Jw[J] ==0)
                  # average the intensity
                    stokes_kSw[k,0,okJ_w] += stokes_JSw[J,0,okJ_w]/cycles_kw[k][okJ_w]
                    var_kSw[k,0,okJ_w] += var_JSw[J,0,okJ_w]/cycles_kw[k][okJ_w]**2
                    covar_kSw[k,0,okJ_w] += covar_JSw[J,0,okJ_w]/cycles_kw[k][okJ_w]**2
                  # now the normalized stokes
                    nstokes_kw[k][okJ_w] += (stokes_JSw[J,1][okJ_w]/stokes_JSw[J,0][okJ_w])/cycles_kw[k][okJ_w]
                    nvar_kw[k][okJ_w] += (var_JSw[J,1][okJ_w]/stokes_JSw[J,0][okJ_w]**2)/cycles_kw[k][okJ_w]**2
                    ncovar_kw[k][okJ_w] += (covar_JSw[J,1][okJ_w]/stokes_JSw[J,0][okJ_w]**2)/cycles_kw[k][okJ_w]**2
                stokes_kSw[k,1] = nstokes_kw[k]*stokes_kSw[k,0]
                var_kSw[k,1] = nvar_kw[k]*stokes_kSw[k,0]**2 
                covar_kSw[k,1] = ncovar_kw[k]*stokes_kSw[k,0]**2         
                if debug:
                    obsname = object+"_"+config 
                    np.savetxt(obsname+"_stokes_kSw_"+str(k)+".txt",np.vstack((wav_w,ok_w.astype(int),    \
                            stokes_kSw[k])).T, fmt="%8.2f %3i "+2*"%12.3f ")                    

            # compute mean chisq for each pair having multiple cycles  
                if cycles > 1:
                    nstokeserr_Jw = np.zeros((cycles,wavs))
                    nerr_Jw = np.zeros((cycles,wavs))
                    for J in range(cycles):
                        okJ_w = ok_w & (bpm_Jw[J] ==0)
                        nstokes_Jw[J][okJ_w] = stokes_JSw[J,1][okJ_w]/stokes_JSw[J,0][okJ_w]
                        nvar_Jw[J][okJ_w] = var_JSw[J,1][okJ_w]/(stokes_JSw[J,0][okJ_w])**2                    
                        nstokeserr_Jw[J] = (nstokes_Jw[J] - nstokes_kw[k])
                        nvar_w = nvar_Jw[J] - nvar_kw[k]
                        okall_w &= (nvar_w > 0.)
                        nerr_Jw[J,okall_w] = np.sqrt(nvar_w[okall_w])

                    nstokessyserr_J = np.average(nstokeserr_Jw[:,okall_w],weights=1./nerr_Jw[:,okall_w],axis=1)
                    nstokeserr_Jw -= nstokessyserr_J[:,None]                   
                    for J,j in enumerate(jlistk[k]):
                        loc,scale = norm.fit(nstokeserr_Jw[J,okall_w]/nerr_Jw[J,okall_w])
                        chi2cycle_j[j] = scale**2
                        syserrcycle_j[j] = nstokessyserr_J[J]
                    chi2cyclenet_k[k] = chi2cycle_j[jlistk[k]].mean()
                    syserrcyclenet_k[k] = np.sqrt((syserrcycle_j[jlistk[k]]**2).sum())/len(jlistk[k])

                    if debug:   
                        obsname = object+"_"+config
                        chisqanalysis(obsname,nstokeserr_Jw,nerr_Jw,okall_w)
                                                                     
        # for each obs combine raw stokes, apply efficiency and PA calibration as appropriate for pattern, and save
            obss = len(obslist)

            for obs in range(obss):

                k0,object,config,wppat,pairs = obslist[obs]
                patpairs = patternpairs[wppat]
                klist = range(k0,k0+pairs)                                      # entries in comblist for this obs
                obsname = object+"_"+config

                wplist = [comblist[k][3][1:] for k in klist]
                patwplist = sorted((patpairs*"%1s%1s " % tuple(patterndict[wppat].flatten())).split())
                plist = [patwplist.index(wplist[P]) for P in range(pairs)]

                k_p = np.zeros(patpairs,dtype=int)                              
                k_p[plist] = klist                                                # idx in klist for each pair idx
                cycles_p = np.zeros_like(k_p)
                cycles_p[plist] = np.array([comblist[k][4] for k in klist])       # number of cycles in comb
                cycles_pw = np.zeros((patpairs,wavs),dtype=int)
                cycles_pw[plist] = cycles_kw[klist]                               # of ok cycles for each wavelength
                havecyclechi_p = np.zeros(patpairs,dtype=bool)
                havecyclechi_p[plist] = havecyclechi_k[klist]

                havelinhichi_p = np.zeros(patpairs,dtype=bool)
               
              # name result to document hw cycles included
                kplist = list(k_p)
                if cycles_p.max()==cycles_p.min(): kplist = [klist[0],] 

                for p in range(len(kplist)):
                    obsname += "_"
                    j0 = comblist[k_p[p]][0] - cycles_p[p] + 1
                    for j in range(j0,j0+cycles_p[p]): obsname+=rawlist[j][4][-1]
                log.message("\n  Observation: %s  Date: %s" % (obsname,dateobs), with_header=False)
                finstokes = patternstokes[wppat]   

                if pairs != patpairs:
                    if (pairs<2):
                        log.message(('  Only %1i pair, skipping observation' % pairs), with_header=False)
                        continue
                    elif ((max(plist) < 2) | (min(plist) > 1)):
                        log.message('  Pattern not usable, skipping observation', with_header=False)
                        continue

                stokes_Fw = np.zeros((finstokes,wavs))
                var_Fw = np.zeros_like(stokes_Fw)
                covar_Fw = np.zeros_like(stokes_Fw)

            # normalize pairs in obs at wavelengths _W where all pair/cycles have data:
                okall_w = okcal_w & (cycles_pw[plist] == cycles_p[plist,None]).all(axis=0)     
                normint_K = stokes_kSw[klist,0][:,okall_w].sum(axis=1)
                normint_K /= np.mean(normint_K)
                stokes_kSw[klist] /= normint_K[:,None,None]
                var_kSw[klist] /= normint_K[:,None,None]**2
                covar_kSw[klist] /= normint_K[:,None,None]**2

            # first, the intensity
                stokes_Fw[0] = stokes_kSw[klist,0].sum(axis=0)/pairs
                var_Fw[0] = var_kSw[klist,0].sum(axis=0)/pairs**2 
                covar_Fw[0] = covar_kSw[klist,0].sum(axis=0)/pairs**2         
            # now, the polarization stokes
                if wppat.count('LINEAR'):
                    var_Fw = np.vstack((var_Fw,np.zeros(wavs)))           # add QU covariance
                    if (wppat=='LINEAR'):
                     # wavelengths with both pairs having good, calibratable data in at least one cycle
                        ok_w = okcal_w & (cycles_pw[plist] > 0).all(axis=0)
                        bpm_Fw = np.repeat((np.logical_not(ok_w))[None,:],finstokes,axis=0)
                        stokes_Fw[1:,ok_w] = stokes_kSw[klist,1][:,ok_w]*(stokes_Fw[0,ok_w]/stokes_kSw[klist,0][:,ok_w])
                        var_Fw[1:3,ok_w] = var_kSw[klist,1][:,ok_w]*(stokes_Fw[0,ok_w]/stokes_kSw[klist,0][:,ok_w])**2
                        covar_Fw[1:,ok_w] = covar_kSw[klist,1][:,ok_w]*(stokes_Fw[0,ok_w]/stokes_kSw[klist,0][:,ok_w])**2
                        if debug:                       
                            np.savetxt(obsname+"_stokes.txt",np.vstack((wav_w,ok_w.astype(int),stokes_Fw)).T,    \
                                fmt="%8.2f  "+"%2i "+3*" %10.6f")
                            np.savetxt(obsname+"_var.txt",np.vstack((wav_w,ok_w.astype(int),var_Fw)).T, \
                                fmt="%8.2f  "+"%2i "+4*"%14.9f ")
                            np.savetxt(obsname+"_covar.txt",np.vstack((wav_w,ok_w.astype(int),covar_Fw)).T, \
                                fmt="%8.2f  "+"%2i "+3*"%14.9f ")                       

                    elif wppat=='LINEAR-HI':
                     # for Linear-Hi, must go to normalized stokes in order for the pair combination to cancel systematic errors
                     # each pair p at each wavelength w is linear combination of pairs, including primary p and secondary sec_p
                     # linhi chisq is from comparison of primary and secondary
                     # evaluate wavelengths with at least both pairs 0,2 or 1,3 having good, calibratable data in at least one cycle: 
                        ok_pw = okcal_w[None,:] & (cycles_pw > 0)
                        ok_w = (ok_pw[0] & ok_pw[2]) | (ok_pw[1] & ok_pw[3])
                        bpm_Fw = np.repeat((np.logical_not(ok_w))[None,:],finstokes,axis=0)
                        stokespri_pw = np.zeros((patpairs,wavs))
                        varpri_pw = np.zeros_like(stokespri_pw)
                        covarpri_pw = np.zeros_like(stokespri_pw)
                        stokespri_pw[plist] = nstokes_kw[klist]
                        varpri_pw[plist] = nvar_kw[klist]
                        covarpri_pw[plist] = ncovar_kw[klist]
                        haveraw_pw = (cycles_pw > 0)
                        pricof_ppw = np.identity(patpairs)[:,:,None]*haveraw_pw[None,:,:]                      

                        qq = 1./np.sqrt(2.)
                        seccofb_pp = np.array([[ 0,1,  0,-1],[1, 0,1,  0],[  0,1, 0,1],[-1,  0,1, 0]])*qq    # both secs avail
                        seccof1_pp = np.array([[qq,1,-qq, 0],[1,qq,0, qq],[-qq,1,qq,0],[-1, qq,0,qq]])*qq    # only 1st sec                        
                        seccof2_pp = np.array([[qq,0, qq,-1],[0,qq,1,-qq],[ qq,0,qq,1],[ 0,-qq,1,qq]])*qq    # only 2nd sec
                        seclist_p = np.array([[1,3],[0,2],[1,3],[0,2]])
                        havesecb_pw = haveraw_pw[seclist_p].all(axis=1)
                        onlysec1_pw = (np.logical_not(havesecb_pw) & haveraw_pw[seclist_p][:,0] & havesecb_pw[seclist_p][:,1])
                        onlysec2_pw = (np.logical_not(havesecb_pw) & haveraw_pw[seclist_p][:,1] & havesecb_pw[seclist_p][:,0])
                        seccof_ppw = seccofb_pp[:,:,None]*havesecb_pw[:,None,:] + \
                                    seccof1_pp[:,:,None]*onlysec1_pw[:,None,:] + \
                                    seccof2_pp[:,:,None]*onlysec2_pw[:,None,:] 
                        stokessec_pw = (seccof_ppw*stokespri_pw[:,None,:]).sum(axis=0)
                        varsec_pw = (seccof_ppw**2*varpri_pw[:,None,:]).sum(axis=0)
                        covarsec_pw = (seccof_ppw**2*covarpri_pw[:,None,:]).sum(axis=0)

                        havesec_pw = (havesecb_pw | onlysec1_pw | onlysec2_pw)
                        prisec_pw = (haveraw_pw & havesec_pw)
                        onlypri_pw = (haveraw_pw & np.logical_not(havesec_pw))
                        onlysec_pw = (np.logical_not(haveraw_pw) & havesec_pw)
                        
                        cof_ppw = onlypri_pw[:,None,:]*pricof_ppw + onlysec_pw[:,None,:]*seccof_ppw +   \
                                    0.5*prisec_pw[:,None,:]*(pricof_ppw+seccof_ppw)

                    # now do the combination
                        stokes_pw = (cof_ppw*stokespri_pw[None,:,:]).sum(axis=1)
                        var_pw = (cof_ppw**2*varpri_pw[None,:,:]).sum(axis=1)
                        covar_pw = (cof_ppw**2*covarpri_pw[None,:,:]).sum(axis=1)
                        covarprisec_pw = 0.5*varpri_pw*np.logical_or(onlysec1_pw,onlysec2_pw)
                        covarqu_w = (cof_ppw[0]*cof_ppw[2]*varpri_pw).sum(axis=0)

                    # cull wavelengths based on chisq between primary and secondary
                        chi2linhi_pw = np.zeros((patpairs,wavs))
                        badlinhichi_w = np.zeros(wavs)
                        havelinhichi_p = prisec_pw.any(axis=1)
                        linhichis = havelinhichi_p.sum()
                        chi2linhi_pw[prisec_pw] = ((stokespri_pw[prisec_pw] - stokessec_pw[prisec_pw])**2 / \
                            (varpri_pw[prisec_pw] + varsec_pw[prisec_pw] - 2.*covarprisec_pw[prisec_pw]))

                        q3_p = np.percentile(chi2linhi_pw[:,okall_w].reshape((4,-1)),75,axis=1)
                        badlinhichi_w[ok_w] = ((chi2linhi_pw[:,ok_w] > (chifence_d[2]*q3_p)[:,None])).any(axis=0)               
                        ok_w &= np.logical_not(badlinhichi_w)
                        okall_w &= np.logical_not(badlinhichi_w)
                        chi2linhi_p = np.zeros(patpairs)
                        chi2linhi_p[havelinhichi_p] = (chi2linhi_pw[havelinhichi_p][:,ok_w]).sum(axis=1)/    \
                            (prisec_pw[havelinhichi_p][:,ok_w]).sum(axis=1)                        
                        syserrlinhi_pw = np.zeros((patpairs,wavs))
                        varlinhi_pw = np.zeros((patpairs,wavs))
                        syserrlinhi_p = np.zeros(patpairs)
                        syserrlinhi_pw[prisec_pw] = (stokespri_pw[prisec_pw] - stokessec_pw[prisec_pw])
                        varlinhi_pw[prisec_pw] = varpri_pw[prisec_pw] + varsec_pw[prisec_pw] - 2.*covarprisec_pw[prisec_pw]
                        syserrlinhi_p[havelinhichi_p] = np.average(syserrlinhi_pw[havelinhichi_p][:,okall_w], \
                            weights=1./np.sqrt(varlinhi_pw[havelinhichi_p][:,okall_w]),axis=1)

                        if debug:
                            np.savetxt(obsname+"_have_pw.txt",np.vstack((wav_w,ok_pw.astype(int),haveraw_pw,havesecb_pw,    \
                                onlysec1_pw,onlysec2_pw,havesec_pw,prisec_pw,onlypri_pw,onlysec_pw)).T,   \
                                fmt="%8.2f  "+9*"%2i %2i %2i %2i  ") 
                            np.savetxt(obsname+"_seccof_ppw.txt",np.vstack((wav_w,ok_pw.astype(int),seccof_ppw.reshape((16,-1)))).T,   \
                                fmt="%8.2f  "+4*"%2i "+16*" %6.3f") 
                            np.savetxt(obsname+"_cof_ppw.txt",np.vstack((wav_w,ok_pw.astype(int),cof_ppw.reshape((16,-1)))).T,   \
                                fmt="%8.2f  "+4*"%2i "+16*" %6.3f")                        
                            np.savetxt(obsname+"_stokes.txt",np.vstack((wav_w,ok_pw.astype(int),stokespri_pw,stokes_pw)).T,    \
                                fmt="%8.2f  "+4*"%2i "+8*" %10.6f")
                            np.savetxt(obsname+"_var.txt",np.vstack((wav_w,ok_pw.astype(int),varpri_pw,var_pw)).T, \
                                fmt="%8.2f  "+4*"%2i "+8*"%14.9f ")
                            np.savetxt(obsname+"_covar.txt",np.vstack((wav_w,ok_pw.astype(int),covarpri_pw,covar_pw)).T, \
                                fmt="%8.2f  "+4*"%2i "+8*"%14.9f ")                       
                            np.savetxt(obsname+"_chi2linhi_pw.txt",np.vstack((wav_w,stokes_Fw[0],ok_pw.astype(int),   \
                                chi2linhi_pw)).T,  fmt="%8.2f %10.0f "+4*"%2i "+4*"%10.4f ")

                        stokes_Fw[1:] = stokes_pw[[0,2]]*stokes_Fw[0]                        
                        var_Fw[1:3] = var_pw[[0,2]]*stokes_Fw[0]**2
                        var_Fw[3] = covarqu_w*stokes_Fw[0]**2
                        covar_Fw[1:] = covar_pw[[0,2]]*stokes_Fw[0]**2
                        bpm_Fw = ((bpm_Fw==1) | np.logical_not(ok_w)).astype(int)

                # document chisq results, combine flagoffs, compute mean chisq for observation, combine with final bpm
                    if (havecyclechi_p.any() | havelinhichi_p.any()):
                        chi2cyclenet = 0.
                        syserrcyclenet = 0.
                        chi2linhinet = 0.
                        syserrlinhinet = 0.
                        if havecyclechi_p.any():
                            log.message(("\n"+14*" "+"{:^"+str(5*patpairs)+"}{:^"+str(8*patpairs)+"}{:^"+str(6*patpairs)+"}")\
                                .format("culled","sys %err","mean chisq"), with_header=False)
                            log.message((9*" "+"HW "+patpairs*" %4s"+patpairs*" %7s"+patpairs*" %5s") \
                                % tuple(3*patwplist),with_header=False)
                            jlist = sum([jlistk[k] for k in klist],[])
                            Jlist = list(set(sum([Jlistk[k] for k in klist],[])))
                            Jmax = max(Jlist)
                            ok_pJ = np.zeros((patpairs,Jmax+1),dtype=bool)
                            for p in plist: ok_pJ[p][Jlistk[k_p[p]]] = True
 
                            syserrcycle_pJ = np.zeros((patpairs,Jmax+1))
                            syserrcycle_pJ[ok_pJ] = syserrcycle_j[jlist]
                            syserrcyclenet_p = np.zeros(patpairs)
                            syserrcyclenet_p[plist] = syserrcyclenet_k[klist]
                            syserrcyclenet = np.sqrt((syserrcyclenet_p**2).sum()/patpairs) 
                            chi2cycle_pJ = np.zeros((patpairs,Jmax+1))
                            chi2cycle_pJ[ok_pJ] = chi2cycle_j[jlist]
                            chi2cyclenet_p = np.zeros(patpairs)
                            chi2cyclenet_p[plist] = chi2cyclenet_k[klist]
                            chi2cyclenet = chi2cyclenet_p.sum()/patpairs
                            culls_pJ = np.zeros((patpairs,Jmax+1),dtype=int)
                            culls_pJ[ok_pJ] = iscull_jw[jlist].sum(axis=1)                            

                            if cycles_p.max() > 2:
                                for J in set(Jlist):
                                    log.message((("   cycle %2i: "+patpairs*"%4i "+patpairs*"%7.3f "+patpairs*"%5.2f ") %     \
                                        ((J+1,)+tuple(culls_pJ[:,J])+tuple(100.*syserrcycle_pJ[:,J])+tuple(chi2cycle_pJ[:,J]))), \
                                                with_header=False)

                            netculls_p = [iscull_jw[jlistk[k_p[p]]].all(axis=0).sum() for p in range(patpairs)]
                            log.message(("    net    : "+patpairs*"%4i "+patpairs*"%7.3f "+patpairs*"%5.2f ") %     \
                                 (tuple(netculls_p)+tuple(100*syserrcyclenet_p)+tuple(chi2cyclenet_p)), with_header=False)
                        if (havelinhichi_p.any()):
                            log.message(("\n"+14*" "+"{:^"+str(5*patpairs)+"}{:^"+str(8*patpairs)+"}{:^"+str(6*patpairs)+"}")\
                                .format("culled","sys %err","mean chisq"), with_header=False)
                            log.message((9*" "+"HW "+(4*patpairs/2)*" "+" all"+(4*patpairs/2)*" "+patpairs*" %7s"+patpairs*" %5s") \
                                % tuple(2*patwplist),with_header=False)
                            chicount = int(badlinhichi_w.sum())
                            chi2linhinet = chi2linhi_p.sum()/(havelinhichi_p.sum())
                            syserrlinhinet = np.sqrt((syserrlinhi_p**2).sum()/(havelinhichi_p.sum()))
                            log.message(("      Linhi: "+(2*patpairs)*" "+"%3i "+(2*patpairs)*" "+patpairs*"%7.3f "+patpairs*"%5.2f ") % \
                                ((chicount,)+tuple(100.*syserrlinhi_p)+tuple(chi2linhi_p)), with_header=False)

                        chi2qudof = (chi2cyclenet+chi2linhinet)/(int(chi2cyclenet>0)+int(chi2linhinet>0))
                        syserr = np.sqrt((syserrcyclenet**2+syserrlinhinet**2)/    \
                            (int(syserrcyclenet>0)+int(syserrlinhinet>0)))
              
                        log.message(("\n  Estimated sys %%error: %5.3f%%   Mean Chisq: %6.2f") % \
                            (100.*syserr,chi2qudof), with_header=False)

                    if not HW_Cal_override:
                # apply hw efficiency, equatorial PA rotation calibration
                        stokes_Fw[1:,ok_w] /= heff_w[ok_w]
                        var_Fw[1:,ok_w] /= heff_w[ok_w]**2
                        covar_Fw[1:,ok_w] /= heff_w[ok_w]**2
                        stokes_Fw,var_Fw,covar_Fw = specpolrotate(stokes_Fw,var_Fw,covar_Fw,eqpar_w)

                # save final stokes fits file for this observation.  Strain out nans.
                    infile = infilelist[rawlist[comblist[k][0]][0]]
                    hduout = pyfits.open(infile)
                    hduout['SCI'].data = np.nan_to_num(stokes_Fw.reshape((3,1,-1)))
                    hduout['SCI'].header['CTYPE3'] = 'I,Q,U'
                    hduout['VAR'].data = np.nan_to_num(var_Fw.reshape((4,1,-1)))
                    hduout['VAR'].header['CTYPE3'] = 'I,Q,U,QU'
                    hduout['COV'].data = np.nan_to_num(covar_Fw.reshape((3,1,-1)))
                    hduout['COV'].header['CTYPE3'] = 'I,Q,U,QU'

                    hduout['BPM'].data = bpm_Fw.astype('uint8').reshape((3,1,-1))
                    hduout['BPM'].header['CTYPE3'] = 'I,Q,U'

                    hduout[0].header['WPPATERN'] = wppat
                    hduout[0].header['PATYPE'] = pacaltype
                    if len(calhistorylist):
                        for line in calhistorylist: hduout[0].header.add_history(line)

                    if (havecyclechi_p.any() | havelinhichi_p.any()): 
                        hduout[0].header['SYSERR'] = (100.*syserr,'estimated % systematic error')
                    
                    outfile = obsname+'_stokes.fits'
                    hduout.writeto(outfile,overwrite=True,output_verify='warn')
                    log.message('\n    '+outfile+' Stokes I,Q,U', with_header=False)

                # apply flux calibration, if available
                    fluxcal_w = specpolflux(outfile,logfile=logfile)
                    if fluxcal_w.shape[0]>0:
                        stokes_Fw *= fluxcal_w
                        var_Fw *= fluxcal_w**2
                        covar_Fw *= fluxcal_w**2

                # calculate, print means (stokes averaged in unnorm space)
                    avstokes_f, avvar_f, avwav = spv.avstokes(stokes_Fw,var_Fw[:-1],covar_Fw,wav_w) 
                    avstokes_F = np.insert(avstokes_f,0,1.)
                    avvar_F = np.insert(avvar_f,0,1.)           
                    spv.printstokes(avstokes_F,avvar_F,avwav,tcenter=np.pi/2.,textfile='tmp.log')
                    log.message(open('tmp.log').read(), with_header=False)
                    os.remove('tmp.log')
                     
#               elif wppat.count('CIRCULAR'):  TBS 

#               elif wppat=='ALL-STOKES':  TBS

            # end of obs loop
        # end of config loop
    return

Example #46

0

Show file

File: JA_Parallel_BiGRU.py Project: xiaoshengjun/NANHM-for-GEC

    def translate(self, xs, max_length=100):
        print("Now translating")
        batch = len(xs)
        print("batch", batch)
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            wxs = [
                np.array([source_word_ids.get(w, UNK) for w in x],
                         dtype=np.int32) for x in xs
            ]
            wx_len = [len(wx) for wx in wxs]
            wx_section = np.cumsum(wx_len[:-1])
            valid_wx_section = np.insert(wx_section, 0, 0)
            cxs = [
                np.array(
                    [source_char_ids.get(c, UNK) for c in list("".join(x))],
                    dtype=np.int32) for x in xs
            ]

            wexs = sequence_embed(self.embed_xw, wxs)
            cexs = sequence_embed(self.embed_xc, cxs)

            wexs_f = wexs
            wexs_b = [wex[::-1] for wex in wexs]
            cexs_f = cexs
            cexs_b = [cex[::-1] for cex in cexs]

            _, hfw = self.encoder_fw(None, wexs_f)
            h1, hbw = self.encoder_bw(None, wexs_b)
            _, hfc = self.encoder_fc(None, cexs_f)
            h2, hbc = self.encoder_bc(None, cexs_b)

            hbw = [F.get_item(h, range(len(h))[::-1]) for h in hbw]
            hbc = [F.get_item(h, range(len(h))[::-1]) for h in hbc]
            htw = list(map(lambda x, y: F.concat([x, y], axis=1), hfw, hbw))
            htc = list(map(lambda x, y: F.concat([x, y], axis=1), hfc, hbc))
            ht = list(map(lambda x, y: F.concat([x, y], axis=0), htw, htc))

            ys = self.xp.full(batch, EOS, 'i')
            result = []
            h = F.concat([h1, h2], axis=2)
            for i in range(max_length):
                eys = self.embed_y(ys)
                eys = chainer.functions.split_axis(eys, batch, 0)
                h_list, h_bar_list, c_s_list, z_s_list = self.decoder(
                    h, ht, eys)
                cys = chainer.functions.concat(h_list, axis=0)
                wy = self.W(cys)
                ys = self.xp.argmax(wy.data, axis=1).astype('i')
                result.append(ys)
                h = F.transpose_sequence(h_list)[-1]
                h = F.reshape(h, (self.n_layers, h.shape[0], h.shape[1]))

        result = cuda.to_cpu(self.xp.stack(result).T)

        # Remove EOS taggs
        outs = []
        for y in result:
            inds = np.argwhere(y == EOS)
            if len(inds) > 0:
                y = y[:inds[0, 0]]
            outs.append(y)
        return outs

Example #47

0

Show file

File: sdss26.py Project: winash1618/myproject

def LocalSearch(pi4,x,y):
    num=random.choice(pi4)
    pj=pp[num,:]
    here=np.where(pi4==num)
    pi5=np.delete(pi4,here)
    ss2=np.zeros(x-1, dtype=int)
    d2=np.zeros((x-1,y), dtype=int)
    f2=np.zeros((x-1,y), dtype=int)
    d2,ss2=setuptime(pi4,x-1,y)
    f2=tailtime(pi4,x-1,y)
    minor=100000
    for hero in range(x):
        cmax=np.zeros((y), dtype=int)
        d3=np.zeros(y, dtype=int)
        for j in range(y):
            if hero==0:
                if j==0:
                    if (ss2[hero]+pj[j])>kk[j+1,0,r[hero]]:
                        d3[j]=ss2[hero]+pj[j]
                    else:
                        d3[j]=kk[j+1,0,r[hero]]
                elif j < y-1:
                    if d3[j-1]+pj[j]>kk[j+1,0,r[hero]]:
                        d3[j]=d3[j-1]+pj[j]
                    else:
                        d3[j]=kk[j+1,0,r[hero]]
                else:
                    d3[j]=d3[j-1]+pj[j]
            elif hero<x-1:
                if j==0:
                    if (ss2[hero]+pj[j])>(d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]):
                        d3[j]=ss2[hero]+pj[j]
                    else:
                        d3[j]=d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]
                elif j < y-1:
                    if d3[j-1]+pj[j]>d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]:
                        d3[j]=d3[j-1]+pj[j]
                    else:
                        d3[j]=d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]
                else:
                    d3[j]=d3[j-1]+pj[j]
            else:
                ss20=d2[hero-1,0]+kk[0,r[hero-1],r[hero]]  
                if j==0:
                    if (ss20+pj[j])>(d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]):
                        d3[j]=ss20+pj[j]
                    else:
                        d3[j]=d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]
                elif j < y-1:
                    if d3[j-1]+pj[j]>d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]:
                        d3[j]=d3[j-1]+pj[j]
                    else:
                        d3[j]=d2[hero-1,j+1]+kk[j+1,r[hero-1],r[hero]]
                else:
                    d3[j]=d3[j-1]+pj[j]
        if hero<x-1:
            cmax[j]=d3[j]+f2[hero,j]+kk[j,r[hero],r[hero+1]]
        else:
            cmax[j]=d3[j]
    print(d3)
    pi5=np.insert(pi5,hero,num)
    print(pi5,cmax)
    print(setuptime1(pi6,x,y),tailtime(pi6,x,y),pi6,x,y)

    return pi5

Example #48

0

Show file

 def insert(self, timestamp, datum):
     insert_idx = np.searchsorted(self.data_ts, timestamp)
     self.data_ts = np.insert(self.data_ts, insert_idx, timestamp)
     self.data.insert(insert_idx, datum)

Example #49

0

Show file

File: tess_seg2_detrend.py Project: tasoc/LightcurveCorrections

def get_ensemble_correction(ifile, star_names, star_array, eclat, eclon):
    """
    Function that takes all input stars for a sector and uses them to find a detrending
    function using ensemble photometry for a star 'star_names[ifile]', where ifile is the
    index for the star in the star_array and star_names list.

    Parameters:
        ifile (int): index for the relevant star in the star_names list (and consequently also
                    the star_array, eclat and eclon lists).
        star_names (ndarray): Labels of all the names of the stars in a given sector.
        star_array (ndarray): An array of class instances holding metadata on each star.
            (i.e. flux, time, mean flux, std of flux).
        eclat (ndarray): Ecliptic latitude for all stars.
        eclon (ndarray): Ecliptic longitude for all stars.

    Returns:
        pp (scipy.interpolate.PchipInterpolator): Interpolation function for the ensemble photometry
            trend for your given star.
    """

    dist = np.zeros([2, len(star_names)])
    dist[0] = range(len(star_names))
    dist[1] = np.sqrt((eclat - eclat[ifile])**2 + (eclon - eclon[ifile])**2)

    #artificially increase distance to the star itself, so when we sort by distance it ends up last
    dist = np.transpose(dist)
    #dist[ifile][1] = 10*np.pi
    dist[ifile][1] = 10000.0
    #sort by distance
    sort_dist = np.sort(dist, 0)
    #set up initial search radius to build ensemble so that 20 stars are included
    search_radius = sort_dist[19][1]
    #20 works well for 20s cadence...more for longer?

    #set up start/end times for stellar time series
    time_start = np.amin(star_array[ifile].time)
    time_end = np.max(star_array[ifile].time)

    #set minimum range parameter...this is log10 photometric range, and stars more variable than this will be
    #excluded from the ensemble
    min_range = -2.0
    min_range0 = min_range
    flag = 1

    #start loop to build ensemble
    while True:
        #num_star is number of stars in ensemble
        num_star = 0
        #full_time,flux,weight are time,flux,weight points in the ensemble
        full_time = np.array([])
        full_flux = np.array([])
        full_flag = np.array([])
        full_weight = np.array([])
        tflux = np.array([])
        comp_list = np.array([])

        #loop through all other stars to build ensemble
        #exclude stars outside search radius, flagged, too active (either relatively or absolutely)
        #excluding stars with negative flux is only required because the synthetic data have some flawed
        #light curves that are <0. Probably can remove this with real data.

        # #Put the selection conditions into a boolean array for all stars simultaneously
        # sel = (dist[:,1] < search_radius) & (np.log10(drange) < min_range) & (drange < 10*drange[ifile])
        for test_star in range(len(star_names[:])):
            if (dist[test_star][1] < search_radius
                    and np.log10(star_array[test_star].drange) < min_range
                    and star_array[test_star].drange <
                    10 * star_array[ifile].drange):

                num_star += 1
                #calculate relative flux for star to be added to ensemble
                test0 = star_array[test_star].time
                test1 = star_array[test_star].flux
                test1 = test1 / star_array[test_star].fmean
                #calculate weight for star to be added to the ensemble. weight is whitened stdev relative to mean flux
                weight = np.ones_like(test1)
                weight = weight * star_array[test_star].fmean / star_array[
                    test_star].fstd

                #add time, flux, weight to ensemble light curve. flux is weighted flux
                full_time = np.append(full_time, test0)
                full_flux = np.append(full_flux, np.multiply(test1, weight))
                full_weight = np.append(full_weight, weight)
                #tflux is total unweighted flux
                tflux = np.append(tflux, test1)
                comp_list = np.append(comp_list, test_star)

        #set up time array with 0.5-day resolution which spans the time range of the time series
        #then histogram the data based on that array
        gx = np.arange(time_start, time_end, 0.5)
        n = np.histogram(full_time, gx)
        n = np.asarray(n[0])
        n2 = np.histogram(star_array[ifile].time, gx)
        n2 = np.asarray(n2[0])
        #if the least-populated bin has less than 2000 points, increase the size of the ensemble by first
        #increasing the level of acceptable variability until it exceeds the variability of the star. Once that happens,
        #increase the search radius and reset acceptable variability back to initial value. If the search radius exceeds
        #a limiting value (pi/4 at this point), accept that we can't do any better.
        #if np.min(n[0])<400:
        #print np.min(n[n2>0])
        if np.min(n[n2 > 0]) < 1000:
            #print min_range
            min_range = min_range + 0.3
            if min_range > np.log10(np.max(star_array[ifile].drange)):
                #if (search_radius < 0.5):
                if (search_radius < 100):
                    #search_radius = search_radius+0.1
                    search_radius = search_radius + 10
                else:
                    search_radius = search_radius * 1.1
                    min_range = min_range0

            #if search_radius > np.pi/4:
            if search_radius > 400:
                break
        else:
            break
    #clean up ensemble points by removing NaNs
    full_time = full_time[~np.isnan(full_flux)]
    full_weight = full_weight[~np.isnan(full_flux)]
    full_flux = full_flux[~np.isnan(full_flux)]
    tflux = tflux[~np.isnan(full_flux)]

    #sort ensemble into time order
    idx = np.argsort(full_time)
    full_time = full_time[idx]
    full_flux = full_flux[idx]
    full_weight = full_weight[idx]

    #temporary copies of ensemble components
    full_time0 = full_time
    full_flux0 = full_flux
    full_weight0 = full_weight

    #set up temporary files

    temp_time = full_time
    temp_flux = full_flux
    temp_weight = full_weight

    #simplify by discarding ensemble points outside the temporal range of the stellar time series
    temp_time = full_time[(full_time > time_start) & (full_time < time_end)]
    temp_flux = full_flux[(full_time > time_start) & (full_time < time_end)]
    temp_weight = full_weight[(full_time > time_start)
                              & (full_time < time_end)]

    full_time = temp_time
    full_flux = temp_flux
    full_weight = temp_weight

    #identify locations where there is a break in the time series. If there is at least one break, identify
    #segments and label ensemble points by segment; bidx2 is the label. If there are no breaks, then identify
    #only one segment and label accordingly
    break_locs = np.where(np.diff(full_time) > 0.1)
    if np.size(break_locs) > 0:
        if (break_locs[0][-1] < np.size(full_time)):
            break_locs = np.append(break_locs, np.size(full_time) - 1)
            break_locs = np.insert(break_locs, 0, 0)
            cts, bin_edges = np.histogram(full_time, full_time[break_locs])
            bidx2 = np.digitize(full_time, full_time[break_locs])
            num_segs = np.size(break_locs) - 1
    else:
        cts, bin_edges = np.histogram(
            full_time, np.squeeze(np.append(full_time[0], full_time[-1])))
        bidx2 = np.digitize(
            full_time, np.squeeze(np.append(full_time[0], full_time[-1] + 1)))
        num_segs = 1
        break_locs = np.append(0, np.size(full_time) - 1)

    #pp will be components of spline fit to ensemble for each segment
    pp_ensemble = []
    #set up influx, inweight,intime as flux/weight/time of ensemble segment-by-segment
    for iseg in range(num_segs):
        influx = full_flux[bidx2 - 1 == iseg]
        inweight = full_weight[bidx2 - 1 == iseg]
        intime = full_time[bidx2 - 1 == iseg]

        intime0 = intime
        influx0 = influx

    #initialize bin size in days. We will fit the ensemble with splines
    bin_size = 2.0
    for ib in range(7):
        gx = np.arange(time_start - .5 * bin_size, time_end + bin_size,
                       bin_size)
        # bidx  = np.digitize(full_time,gx)
        bidx = np.digitize(temp_time, gx)
        bidx = bidx - 1
        # n, bin_edges = np.histogram(full_time,gx) #bin data
        n, bin_edges = np.histogram(temp_time, gx)  #bin data
        #if there are too few points in the least-populated bin after the first couple of iterations, break out
        #and stop decreasing the size of the bins
        ttflux = []
        ttweight = []
        ttime = []
        #bin by bin build temporary arrays for weight, time, flux
        for ix in range(len(n)):
            ttweight = np.append(ttweight, np.nanmean(temp_weight[bidx == ix]))
            ttime = np.append(ttime, np.nanmean(temp_time[bidx == ix]))
            ttflux = np.append(
                ttflux,
                np.nanmedian(
                    np.divide(temp_flux[bidx == ix], temp_weight[bidx == ix])))
        ottime = ttime  #keep track of originals since we will modify the tt arrays
        otflux = ttflux
        #clean up any NaNs
        ttime = np.asarray(ttime)
        ttflux = np.asarray(ttflux)

        w1 = ttime[~np.isnan(ttflux)]
        w2 = ttflux[~np.isnan(ttflux)]

        # pp = scipy.interpolate.splrep(w1,w2,k=3) #interpolate a spline across the bins

        counter = len(ttime)
        while counter > 0:
            pp = scipy.interpolate.pchip(w1, w2)
            diff1 = np.divide(temp_flux, temp_weight) - pp(temp_time)
            sdiff = 4 * np.nanstd(diff1)
            counter = len(diff1[np.abs(diff1) > sdiff])
            temp_time = temp_time[np.abs(diff1) < sdiff]
            temp_flux = temp_flux[np.abs(diff1) < sdiff]
            temp_weight = temp_weight[np.abs(diff1) < sdiff]

        pp = scipy.interpolate.pchip(w1, w2)

        break_locs = np.where(np.diff(star_array[ifile].time) >
                              0.1)  #find places where there is a break in time
        break_locs = np.array(break_locs)
        if break_locs.size > 0:  #set up boundaries to correspond with breaks
            break_locs = np.array(break_locs) + 1
            break_locs.astype(int)
            if (np.max(break_locs) < len(star_array[ifile].time)):
                break_locs = np.append(break_locs,
                                       len(star_array[ifile].time) - 1)
            digit_bounds = star_array[ifile].time
            digit_bounds = np.array(digit_bounds)
            digit_bounds = digit_bounds[break_locs]
            if digit_bounds[0] > np.min(full_time):
                digit_bounds = np.append(
                    np.min(full_time) - 1e-5, digit_bounds)
            if digit_bounds[-1] < np.max(full_time):
                digit_bounds = np.append(digit_bounds,
                                         np.max(full_time) + 1e-5)
            if digit_bounds[0] > np.min(star_array[ifile].time):
                digit_bounds = np.append(
                    np.min(star_array[ifile].time) - 1e-5, digit_bounds)
            if digit_bounds[-1] < np.max(star_array[ifile].time):
                digit_bounds = np.append(digit_bounds,
                                         np.max(star_array[ifile].time) + 1e-5)

            bincts, edges = np.histogram(star_array[ifile].time, digit_bounds)
            bidx = np.digitize(star_array[ifile].time,
                               digit_bounds)  #binning for star
            bidx = bidx - 1
            bincts2, edges = np.histogram(full_time, full_time[break_locs])
            bidx2 = np.digitize(full_time,
                                full_time[break_locs])  #binning for ensemble
            bidx2 = bidx2 - 1
            num_segs = len(break_locs)
        else:
            bincts, edges = np.histogram(
                star_array[ifile].time,
                [star_array[ifile].time[0], star_array[ifile].time[-1]])
            bidx = np.digitize(
                star_array[ifile].time,
                [star_array[ifile].time[0], star_array[ifile].time[-1]
                 ])  #binning for star
            bidx = bidx - 1
            bincts2, edges = np.histogram(full_time,
                                          [full_time[0], full_time[-1]])
            bidx2 = np.digitize(
                full_time,
                [full_time[0], full_time[-1]])  #binning for ensemble
            bidx2 = bidx2 - 1
            num_segs = 1

        tscale = []
        for iseg in range(num_segs):
            influx = np.array(star_array[ifile].flux)
            intime = np.array(star_array[ifile].time)
            influx = influx[bidx == iseg]
            intime = intime[bidx == iseg]

            # fun = lambda x: np.sum(np.square(np.divide(influx,np.median(influx))-x*scipy.interpolate.splev(intime,pp)))
            fun = lambda x: np.sum(
                np.square(
                    np.divide(influx, np.median(influx)) - x * pp(intime)))
            tscale = np.append(tscale, sciopt.fminbound(
                fun, 0.9,
                1.5))  #this is a last fix to scaling, not currently used
            tbidx = deepcopy(bidx)

        bin_size = bin_size / 2

    return pp

Example #50

0

Show file

File: eemn_v5.py Project: crazyapril/eemn

 def load(self):
     if self.loaded:
         return
     with open(self.filepath, 'rb') as f:
         message = Decoder().process(f.read())
     queryer = DataQuerent(NodePathParser())
     self._lons = []
     self._lats = []
     self._wind = []
     self._pres = []
     for subset in range(52):
         # lat
         try:
             values = queryer.query(
                 message, '@[{}] > {}'.format(subset,
                                              self.CODE_LAT)).all_values()
         except IndexError:
             raw_lats = np.empty(41)
             raw_lats[:] = np.nan
         else:
             raw_lats = np.array(values[0][3], dtype='float')[:, 0]
             raw_lats = np.insert(raw_lats, 0, values[0][1])
         self._lats.append(raw_lats)
         # lon
         try:
             values = queryer.query(
                 message, '@[{}] > {}'.format(subset,
                                              self.CODE_LON)).all_values()
         except IndexError:
             raw_lons = np.empty(41)
             raw_lons[:] = np.nan
         else:
             raw_lons = np.array(values[0][3], dtype='float')[:, 0]
             raw_lons = np.insert(raw_lons, 0, values[0][1])
         raw_lons[raw_lons < 0] = raw_lons[raw_lons < 0] + 360
         self._lons.append(raw_lons)
         # wind
         try:
             values = queryer.query(
                 message,
                 '@[{}] > {}'.format(subset,
                                     self.CODE_WIND)).all_values(flat=True)
         except IndexError:
             raw_wind = np.empty(41)
             raw_wind[:] = np.nan
         else:
             raw_wind = np.array(values[0], dtype='float') * 1.94  # to kt
         self._wind.append(raw_wind)
         # pres
         try:
             values = queryer.query(
                 message,
                 '@[{}] > {}'.format(subset,
                                     self.CODE_PRES)).all_values(flat=True)
         except IndexError:
             raw_pres = np.empty(41)
             raw_pres[:] = np.nan
         else:
             raw_pres = np.array(values[0], dtype='float') / 100  # to hPa
         self._pres.append(raw_pres)
     self.invalid_indices = []
     self.invalid_majors = []
     self._lats = self.compact_mean(self._lats)
     self._lons = self.compact_mean(self._lons)
     self._wind = self.compact_mean(self._wind)
     self._pres = self.compact_mean(self._pres)
     invalid_index = min(self.invalid_indices)
     invalid_major = min(self.invalid_majors)
     print(invalid_index, invalid_major)
     self.cut_major(self._lats, invalid_major)
     self.cut_major(self._lons, invalid_major)
     self.cut_major(self._wind, invalid_major)
     self.cut_major(self._pres, invalid_major)
     self._lats[-1, invalid_index:] = np.nan
     self._lons[-1, invalid_index:] = np.nan
     self._wind[-1, invalid_index:] = np.nan
     self._pres[-1, invalid_index:] = np.nan
     self._maxwind = np.nanmax(self._wind, axis=1)
     self._minpres = np.nanmin(self._pres, axis=1)
     #print(self._maxwind)
     #print(self._minpres)
     self.loaded = True

Example #51

0

Show file

File: warp.py Project: LuletterSoul/CAST

def fill(pre_el, next_el, position, sample_space, output):
    for j in range(sample_space):
        sample = (pre_el + next_el) // (sample_space + 1) * (j + 1)
        output = np.insert(output, position + j, sample.reshape(2), axis=0)
    return output

Example #52

0

Show file

def compute_FI_and_GL(X,
                      y,
                      indices_to_target,
                      target_weights,
                      is_multi_label=True,
                      path_to_keras_model=None):
    """
	compute FL and GL for the given inputs
	"""

    ## Now, start localisation !!! ##
    from sklearn.preprocessing import Normalizer
    from collections.abc import Iterable
    norm_scaler = Normalizer(norm="l1")
    total_cands = {}
    FIs = None
    grad_scndcr = None

    #t0 = time.time()
    ## slice inputs
    target_X = X[indices_to_target]
    target_y = y[indices_to_target]

    # get loss func
    loss_func = model_util.get_loss_func(is_multi_label=is_multi_label)
    model = None
    for idx_to_tl, vs in target_weights.items():
        t1 = time.time()
        t_w, lname = vs
        model = load_model(path_to_keras_model, compile=False)
        if idx_to_tl == 0:
            # meaning the model doesn't specify the input layer explicitly
            prev_output = target_X
        else:
            prev_output = model.layers[idx_to_tl - 1].output
        layer_config = model.layers[idx_to_tl].get_config()

        if model_util.is_FC(lname):
            from_front = []
            if idx_to_tl == 0 or idx_to_tl - 1 == 0:
                prev_output = target_X
            else:
                t_model = Model(inputs=model.input,
                                outputs=model.layers[idx_to_tl - 1].output)
                prev_output = t_model.predict(target_X)
            if len(prev_output.shape) == 3:
                prev_output = prev_output.reshape(prev_output.shape[0],
                                                  prev_output.shape[-1])

            for idx in tqdm(range(t_w.shape[-1])):
                assert int(
                    prev_output.shape[-1]) == t_w.shape[0], "{} vs {}".format(
                        int(prev_output.shape[-1]), t_w.shape[0])

                output = np.multiply(prev_output,
                                     t_w[:,
                                         idx])  # -> shape = prev_output.shape
                output = np.abs(output)
                output = norm_scaler.fit_transform(output)
                output = np.mean(output, axis=0)
                from_front.append(output)

            from_front = np.asarray(from_front)
            from_front = from_front.T
            from_behind = compute_gradient_to_output(path_to_keras_model,
                                                     idx_to_tl, target_X)
            #print ("shape", from_front.shape, from_behind.shape)
            FIs = from_front * from_behind
            ############ FI end #########

            # Gradient
            grad_scndcr = compute_gradient_to_loss(path_to_keras_model,
                                                   idx_to_tl,
                                                   target_X,
                                                   target_y,
                                                   loss_func=loss_func)
            # G end
        elif model_util.is_C2D(lname):
            is_channel_first = layer_config['data_format'] == 'channels_first'
            if idx_to_tl == 0 or idx_to_tl - 1 == 0:
                prev_output_v = target_X
            else:
                t_model = Model(inputs=model.input,
                                outputs=model.layers[idx_to_tl - 1].output)
                prev_output_v = t_model.predict(target_X)
            tr_prev_output_v = np.moveaxis(
                prev_output_v, [1, 2, 3],
                [3, 1, 2]) if is_channel_first else prev_output_v

            kernel_shape = t_w.shape[:2]
            strides = layer_config['strides']
            padding_type = layer_config['padding']
            if padding_type == 'valid':
                paddings = [0, 0]
            else:
                if padding_type == 'same':
                    #P = ((S-1)*W-S+F)/2
                    true_ws_shape = [t_w.shape[0],
                                     t_w.shape[-1]]  # Channel_in, Channel_out
                    paddings = [
                        int(((strides[i] - 1) * true_ws_shape[i] - strides[i] +
                             kernel_shape[i]) / 2) for i in range(2)
                    ]
                elif not isinstance(padding_type, str) and isinstance(
                        padding_type, Iterable):  # explicit paddings given
                    paddings = list(padding_type)
                    if len(paddings) == 1:
                        paddings = [paddings[0], paddings[0]]
                else:
                    print(
                        "padding type: {} not supported".format(padding_type))
                    paddings = [0, 0]
                    assert False

                # add padding
                if is_channel_first:
                    paddings_per_axis = [[0, 0], [0, 0],
                                         [paddings[0], paddings[0]],
                                         [paddings[1], paddings[1]]]
                else:
                    paddings_per_axis = [[0, 0], [paddings[0], paddings[0]],
                                         [paddings[1], paddings[1]], [0, 0]]

                tr_prev_output_v = np.pad(tr_prev_output_v,
                                          paddings_per_axis,
                                          mode='constant',
                                          constant_values=0)  # zero-padding

            if is_channel_first:
                num_kernels = int(prev_output.shape[1])  # Channel_in
            else:  # channels_last
                assert layer_config[
                    'data_format'] == 'channels_last', layer_config[
                        'data_format']
                num_kernels = int(prev_output.shape[-1])  # Channel_in
            assert num_kernels == t_w.shape[2], "{} vs {}".format(
                num_kernels, t_w.shape[2])
            #print ("t_w***", t_w.shape)

            # H x W
            if is_channel_first:
                # the last two (front two are # of inputs and # of kernels (Channel_in))
                input_shape = [int(v) for v in prev_output.shape[2:]]
            else:
                input_shape = [int(v) for v in prev_output.shape[1:-1]]

            # (W1−F+2P)/S+1, W1 = input volumne , F = kernel, P = padding
            n_mv_0 = int((input_shape[0] - kernel_shape[0] + 2 * paddings[0]) /
                         strides[0] + 1)  # H_out
            n_mv_1 = int((input_shape[1] - kernel_shape[1] + 2 * paddings[1]) /
                         strides[1] + 1)  # W_out

            n_output_channel = t_w.shape[-1]  # Channel_out
            from_front = []
            # move axis for easier computation
            for idx_ol in tqdm(range(n_output_channel)):  # t_w.shape[-1]
                for i in range(n_mv_0):  # H
                    for j in range(n_mv_1):  # W
                        curr_prev_output_slice = tr_prev_output_v[:, i *
                                                                  strides[0]:
                                                                  i *
                                                                  strides[0] +
                                                                  kernel_shape[
                                                                      0], :, :]
                        curr_prev_output_slice = curr_prev_output_slice[:, :, j * strides[
                            1]:j * strides[1] + kernel_shape[1], :]
                        output = curr_prev_output_slice * t_w[:, :, :, idx_ol]
                        sum_output = np.sum(np.abs(output))
                        output = output / sum_output
                        sum_output = np.nan_to_num(output, posinf=0.)
                        output = np.mean(output, axis=0)
                        from_front.append(output)

            from_front = np.asarray(from_front)
            #from_front.shape: [Channel_out * n_mv_0 * n_mv_1, F1, F2, Channel_in]
            if is_channel_first:
                from_front = from_front.reshape(
                    (n_output_channel, n_mv_0, n_mv_1, kernel_shape[0],
                     kernel_shape[1], int(prev_output.shape[1])))
            else:  # channels_last
                from_front = from_front.reshape(
                    (n_mv_0, n_mv_1, n_output_channel, kernel_shape[0],
                     kernel_shape[1], int(prev_output.shape[-1])))

            # [F1,F2,Channel_in, Channel_out, n_mv_0, n_mv_1]
            # 	or [F1,F2,Channel_in, n_mv_0, n_mv_1,Channel_out]
            from_front = np.moveaxis(from_front, [0, 1, 2], [3, 4, 5])
            # [Channel_out, H_out(n_mv_0), W_out(n_mv_1)]
            from_behind = compute_gradient_to_output(path_to_keras_model,
                                                     idx_to_tl,
                                                     target_X,
                                                     by_batch=True)

            #t1 = time.time()
            # [F1,F2,Channel_in, Channel_out, n_mv_0, n_mv_1] (channels_firs)
            # or [F1,F2,Channel_in,n_mv_0, n_mv_1,Channel_out] (channels_last)
            FIs = from_front * from_behind
            #t2 = time.time()
            #print ('Time for multiplying front and behind results: {}'.format(t2 - t1))
            #FIs = np.mean(np.mean(FIs, axis = -1), axis = -1) # [F1, F2, Channel_in, Channel_out]
            if is_channel_first:
                FIs = np.sum(np.sum(FIs, axis=-1),
                             axis=-1)  # [F1, F2, Channel_in, Channel_out]
            else:
                FIs = np.sum(np.sum(FIs, axis=-2),
                             axis=-2)  # [F1, F2, Channel_in, Channel_out]
            #t3 = time.time()
            #print ('Time for computing mean for FIs: {}'.format(t3 - t2))
            ## Gradient
            # will be [F1, F2, Channel_in, Channel_out]
            grad_scndcr = compute_gradient_to_loss(path_to_keras_model,
                                                   idx_to_tl,
                                                   target_X,
                                                   target_y,
                                                   by_batch=True,
                                                   loss_func=loss_func)
        elif model_util.is_LSTM(lname):  #
            from scipy.special import expit as sigmoid
            num_weights = 2
            assert len(t_w) == num_weights, t_w
            # t_w_kernel:
            # (input_feature_size, 4 * num_units). t_w_recurr_kernel: (num_units, 4 * num_units)
            t_w_kernel, t_w_recurr_kernel = t_w

            # get the previous output, which will be the input of the lstm
            if model_util.is_Input(type(model.layers[idx_to_tl - 1]).__name__):
                prev_output = target_X
            else:
                # shape = (batch_size, time_steps, input_feature_size)
                t_model = Model(inputs=model.input,
                                outputs=model.layers[idx_to_tl - 1].output)
                prev_output = t_model.predict(target_X)

            assert len(prev_output.shape) == 3, prev_output.shape
            num_features = prev_output.shape[
                -1]  # the dimension of features that will be processed by the model

            num_units = t_w_recurr_kernel.shape[0]
            assert t_w_kernel.shape[
                0] == num_features, "{} (kernel) vs {} (input)".format(
                    t_w_kernel.shape[0], num_features)

            # hidden state and cell state sequences computation
            # generate a temporary model that only contains the target lstm layer
            # but with the modification to return sequences of hidden and cell states
            temp_lstm_layer_inst = lstm_layer.LSTM_Layer(
                model.layers[idx_to_tl])
            hstates_sequence, cell_states_sequence = temp_lstm_layer_inst.gen_lstm_layer_from_another(
                prev_output)
            init_hstates, init_cell_states = lstm_layer.LSTM_Layer.get_initial_state(
                model.layers[idx_to_tl])
            if init_hstates is None:
                init_hstates = np.zeros((len(target_X), num_units))
            if init_cell_states is None:
                # shape = (batch_size, num_units)
                init_cell_states = np.zeros((len(target_X), num_units))

            # shape = (batch_size, time_steps + 1, num_units)
            hstates_sequence = np.insert(hstates_sequence,
                                         0,
                                         init_hstates,
                                         axis=1)
            # shape = (batch_size, time_steps + 1, num_units)
            cell_states_sequence = np.insert(cell_states_sequence,
                                             0,
                                             init_cell_states,
                                             axis=1)
            bias = model.layers[idx_to_tl].get_weights()[
                -1]  # shape = (4 * num_units,)
            indices_to_each_gates = np.array_split(np.arange(num_units * 4), 4)

            ## prepare all the intermediate outputs and the variables that will be used later
            idx_to_input_gate = 0
            idx_to_forget_gate = 1
            idx_to_cand_gate = 2
            idx_to_output_gate = 3

            # for kenerl, weight shape = (input_feature_size, num_units)
            # and for recurrent, (num_units, num_units), bias (num_units)
            # and the shape of all the intermedidate outpu is "(batch_size, time_step, num_units)"

            # input
            t_w_kernel_I = t_w_kernel[:,
                                      indices_to_each_gates[idx_to_input_gate]]
            t_w_recurr_kernel_I = t_w_recurr_kernel[:, indices_to_each_gates[
                idx_to_input_gate]]
            bias_I = bias[indices_to_each_gates[idx_to_input_gate]]
            I = sigmoid(
                np.dot(prev_output, t_w_kernel_I) +
                np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_I) +
                bias_I)

            # forget
            t_w_kernel_F = t_w_kernel[:,
                                      indices_to_each_gates[idx_to_forget_gate]]
            t_w_recurr_kernel_F = t_w_recurr_kernel[:, indices_to_each_gates[
                idx_to_forget_gate]]
            bias_F = bias[indices_to_each_gates[idx_to_forget_gate]]
            F = sigmoid(
                np.dot(prev_output, t_w_kernel_F) +
                np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_F) +
                bias_F)

            # cand
            t_w_kernel_C = t_w_kernel[:,
                                      indices_to_each_gates[idx_to_cand_gate]]
            t_w_recurr_kernel_C = t_w_recurr_kernel[:, indices_to_each_gates[
                idx_to_cand_gate]]
            bias_C = bias[indices_to_each_gates[idx_to_cand_gate]]
            C = np.tanh(
                np.dot(prev_output, t_w_kernel_C) +
                np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_C) +
                bias_C)

            # output
            t_w_kernel_O = t_w_kernel[:,
                                      indices_to_each_gates[idx_to_output_gate]]
            t_w_recurr_kernel_O = t_w_recurr_kernel[:, indices_to_each_gates[
                idx_to_output_gate]]
            bias_O = bias[indices_to_each_gates[idx_to_output_gate]]
            # shape = (batch_size, time_steps, num_units)
            O = sigmoid(
                np.dot(prev_output, t_w_kernel_O) +
                np.dot(hstates_sequence[:, :-1, :], t_w_recurr_kernel_O) +
                bias_O)

            # set arguments to compute forward impact for the neural weights from these four gates
            t_w_kernels = {
                'input': t_w_kernel_I,
                'forget': t_w_kernel_F,
                'cand': t_w_kernel_C,
                'output': t_w_kernel_O
            }
            t_w_recurr_kernels = {
                'input': t_w_recurr_kernel_I,
                'forget': t_w_recurr_kernel_F,
                'cand': t_w_recurr_kernel_C,
                'output': t_w_recurr_kernel_O
            }

            consts = {}
            consts['input'] = get_constants('input', F, I, C, O,
                                            cell_states_sequence)
            consts['forget'] = get_constants('forget', F, I, C, O,
                                             cell_states_sequence)
            consts['cand'] = get_constants('cand', F, I, C, O,
                                           cell_states_sequence)
            consts['output'] = get_constants('output', F, I, C, O,
                                             cell_states_sequence)

            # from_front's shape = (num_units, (num_features + num_units) * 4)
            # gate_orders = ['input', 'forget', 'cand', 'output']
            from_front, gate_orders = lstm_local_front_FI_for_target_all(
                prev_output, hstates_sequence[:, :-1, :], num_units,
                t_w_kernels, t_w_recurr_kernels, consts)

            from_front = from_front.T  # ((num_features + num_units) * 4, num_units)
            N_k_rk_w = int(from_front.shape[0] / 4)
            assert N_k_rk_w == num_features + num_units, "{} vs {}".format(
                N_k_rk_w, num_features + num_units)

            ## from behind
            from_behind = compute_gradient_to_output(
                path_to_keras_model, idx_to_tl, target_X,
                by_batch=True)  # shape = (num_units,)

            #t1 = time.time()
            # shape = (N_k_rk_w, num_units)
            FIs_combined = from_front * from_behind
            #print ("Shape", from_behind.shape, FIs_combined.shape)
            #t2 = time.time()
            #print ('Time for multiplying front and behind results: {}'.format(t2 - t1))

            # reshaping
            FIs_kernel = np.zeros(
                t_w_kernel.shape
            )  # t_w_kernel's shape (num_features, num_units * 4)
            FIs_recurr_kernel = np.zeros(
                t_w_recurr_kernel.shape
            )  # t_w_recurr_kernel's shape (num_units, num_units * 4)
            # from (4 * N_k_rk_w, num_units) to 4 * (N_k_rk_w, num_units)
            for i, FI_p_gate in enumerate(
                    np.array_split(FIs_combined, 4, axis=0)):
                # FI_p_gate's shape = (N_k_rk_w, num_units)
                # 	-> will divided into (num_features, num_units) & (num_units, num_units)
                # local indices that will split FI_p_gate (shape = (N_k_rk_w, num_units))
                # since we append the weights in order of a kernel weight and a recurrent kernel weight
                indices_to_features = np.arange(num_features)
                indices_to_units = np.arange(num_units) + num_features
                #FIs_kernel[indices_to_features + (i * N_k_rk_w)]
                # = FI_p_gate[indices_to_features] # shape = (num_features, num_units)
                #FIs_recurr_kernel[indices_to_units + (i * N_k_rk_w)]
                # = FI_p_gate[indices_to_units] # shape = (num_units, num_units)
                FIs_kernel[:, i * num_units:(i + 1) * num_units] = FI_p_gate[
                    indices_to_features]  # shape = (num_features, num_units)
                FIs_recurr_kernel[:, i * num_units:(
                    i + 1) * num_units] = FI_p_gate[
                        indices_to_units]  # shape = (num_units, num_units)

            #t3 =time.time()
            FIs = [FIs_kernel, FIs_recurr_kernel
                   ]  # [(num_features, num_units*4), (num_units, num_units*4)]
            #print ('Time for formatting: {}'.format(t3 - t2))

            ## Gradient
            grad_scndcr = compute_gradient_to_loss(path_to_keras_model,
                                                   idx_to_tl,
                                                   target_X,
                                                   target_y,
                                                   by_batch=True,
                                                   loss_func=loss_func)

        else:
            print("Currenlty not supported: {}. (shoulde be filtered before)".
                  format(lname))
            import sys
            sys.exit()

        #t2 = time.time()
        #print ("Time for computing cost for the {} layer: {}".format(idx_to_tl, t2 - t1))
        if not model_util.is_LSTM(target_weights[idx_to_tl]
                                  [1]):  # only one weight variable to process
            pairs = np.asarray([grad_scndcr.flatten(), FIs.flatten()]).T
            total_cands[idx_to_tl] = {'shape': FIs.shape, 'costs': pairs}
        else:  # currently, all of them go into here
            total_cands[idx_to_tl] = {'shape': [], 'costs': []}
            pairs = []
            for _FIs, _grad_scndcr in zip(FIs, grad_scndcr):
                pairs = np.asarray([_grad_scndcr.flatten(), _FIs.flatten()]).T
                total_cands[idx_to_tl]['shape'].append(_FIs.shape)
                total_cands[idx_to_tl]['costs'].append(pairs)

    #t3 = time.time()
    #print ("Time for computing total costs: {}".format(t3 - t0))
    return total_cands

Example #53

0

Show file

Alpha = 0.0001  # Learning rate
Epsilon = 0.000001

fileReader = open(sys.argv[1], 'r')
lines = fileReader.readlines()  #fileReader is now on EOF

for c in lines[
        0]:  #counts how many commas has the file, so while its a csv file it should work for any dimension
    if c == ',':
        nFeatures = nFeatures + 1
Theta = np.array([np.ones(nFeatures + 1)])
print("# features", nFeatures)

y = np.array([[i.split(',')[nFeatures][:-1]] for i in lines], dtype=float)
X = np.array([k.split(',')[0:nFeatures] for k in lines], dtype=float)
X = np.insert(X, 0, 1, axis=1)  #insert a column of 1's so we can use the bias

print("theta shape:", Theta.shape)
print("y shape:", y.shape)
print("X shape:", X.shape)
'''
for scailing features, if needed
f=1
for f in range(nFeatures):  # scailing data by some policy, changing it may improve your models performance

	maxFeature = np.max(X[:,f],0)

	if maxFeature < 1.0:
		continue
	else:
		X[:,f] = X[:,f] / maxFeature  										# scailing(i.e. normalize) the data (because of de disantce between mean values of the features)

Example #54

0

Show file

File: oneVSall_logistic_regression.py Project: khaledmohamed00/Stanford-Machine-Learning-Andrew-Ng-Python-implementation-

    #print probabilities
    #print ' '
    max_=np.argmax(probabilities)
    #print max_
    if max_==0:
        predicitions.append(10)
    else:
     predicitions.append(max_)
  
  return predicitions  
    
def evaluate(y,predicitions):
    correct=[1 if (a==b) else 0   for a,b in zip(y,predicitions)]
    return (1.0*sum(correct))/len(y)*100.0
    
data=loadmat('/mnt/407242D87242D1F8/study/anaconda/OneVsAll_logistic_Regression/data/ex4data1.mat')
X=data['X']
y=data['y']
X = np.insert(X, 0, values=np.ones(5000), axis=1)
zero=[1 if (item==10) else 0 for item in data['y']]

X=np.array(X).reshape([5000,401])
y=np.array(y).reshape([5000,1])

beta=1
no_label=10
theta_all=oneVSall(X,y,beta,no_label)
predicitions=predict(X,theta_all)
print 'Training evaluation',evaluate(y,predicitions)

Example #55

0

Show file

def read_rof(filename):
    """ Reads ROF binary file (generated by Rigol power supply)
    and returns it's data in numpy.ndarray format
    and file info (header) in dict format

    :param filename: ROF binary file name
    :return: data, head
    :rtype: numpy.ndarray, dict
    """
    with open(filename, "rb") as fid:
        data = list()

        # read header info
        head = dict()
        head["filetype"] = fid.read(3).decode(ENCODING)
        fid.read(1)  # unused last byte of file type value
        head["model"] = model_dict[fid.read(1)]

        fid.read(1)  # unused byte
        head["data_info_len"] = struct.unpack('1h', fid.read(2))[0]
        head["data_len"] = struct.unpack('1i', fid.read(4))[0]
        head["head_crc"] = fid.read(2)
        head["data_crc"] = fid.read(2)
        head["period"] = struct.unpack('1i', fid.read(4))[0]
        head["points"] = struct.unpack('1i', fid.read(4))[0]
        head["oldest_data_subscript"] = fid.read(4)

        # values number == points * number_of_cahnnels * 2
        data_values = head["points"] * ch_num[
            head["model"]] * 2  # 2 columns (voltage, current)
        data_bytes = data_values * DATA_BYTES
        raw_data = fid.read(data_bytes)

        data = np.ndarray(shape=(head["points"], ch_num[head["model"]] * 2),
                          dtype=DATA_FORMAT,
                          buffer=raw_data)

        # convert to float
        data = data.astype(np.float32)

        # convert to volts and amperes
        data = data * VOLTS_AMPERES_COEFF

        # get time column
        x_data = np.array(
            [val * head["period"] for val in range(head["points"])],
            dtype=data.dtype,
            order="F")

        # add time column
        data = np.insert(data, 0, x_data, axis=1)

    # print("================================")
    # print("\n".join(str(line) for line in data))
    #
    # for idx, line in enumerate(data):
    #     if idx == 30:
    #         break
    #     print(CSV_SEPARATOR.join(str(val) for val in line))

    return data, head

Example #56

0

Show file

File: analyseEBLSST-OC-crowd.py Project: andrewbowen19/ClusterEclipsingBinaries

def saveHist(histAll,
             histObs,
             histRec,
             bin_edges,
             xtitle,
             fname,
             filters=['u_', 'g_', 'r_', 'i_', 'z_', 'y_', 'all']):
    c1 = '#5687A6'  #Dali Blue (Andrew's AAS Poster)
    c2 = '#A62B1F'  #Dai Red
    c3 = '#BF8A26'  #Dali Beige
    fig, ax1 = plt.subplots(
        figsize=(8, 6), sharex=True)  #can change to include cdf with ax1, ax2

    histAll = np.insert(histAll, 0, 0)
    histObs = np.insert(histObs, 0, 0)
    for f in filters:
        histRec[f] = np.insert(histRec[f], 0, 0)

    #PDF
    ax1.step(bin_edges, histAll / np.sum(histAll), color=c1)
    ax1.step(bin_edges, histObs / np.sum(histObs), color=c2)
    for f in filters:
        lw = 1
        if (f == 'all'):
            lw = 0.5
    ax1.step(bin_edges,
             histRec[f] / np.sum(histRec[f]),
             color=c3,
             linewidth=lw)
    ax1.set_ylabel('PDF')
    ax1.set_yscale('log')
    ax1.set_title('Open Clusters - Baseline (crowding)', fontsize=16)
    ax1.set_xlabel(xtitle)
    #CDF
    #cdfAll = []
    #cdfObs = []
    #cdfRec = dict()
    #for f in filters:
    #		cdfRec[f] = []

    #	for i in range(len(histAll)):
    #		cdfAll.append(np.sum(histAll[:i])/np.sum(histAll))
    #	for i in range(len(histObs)):
    #		cdfObs.append(np.sum(histObs[:i])/np.sum(histObs))
    #	for f in filters:
    #	for i in range(len(histRec[f])):
    #		cdfRec[f].append(np.sum(histRec[f][:i])/np.sum(histRec[f]))
    #ax2.step(bin_edges, cdfAll, color=c1)
    #ax2.step(bin_edges, cdfObs, color=c2)
    #for f in filters:
    #	lw = 1
    #	if (f == 'all'):
    #		lw = 0.5
    #	ax2.step(bin_edges, cdfRec[f], color=c3, linewidth=lw)
    #ax2.set_ylabel('CDF')

    #ax2.set_xlabel(xtitle)
    fig.subplots_adjust(hspace=0)
    fig.savefig(os.path.join('.', 'plots', fname + '.pdf'),
                format='pdf',
                bbox_inches='tight')

    #write to a text file
    with open(os.path.join('.', 'eblsst_files', fname + '.csv'), 'w') as fl:
        outline = 'binEdges,histAll,histObs'
        for f in filters:
            outline += ',' + f + 'histRec'
        outline += '\n'
        fl.write(outline)
        for i in range(len(bin_edges)):
            outline = str(bin_edges[i]) + ',' + str(histAll[i]) + ',' + str(
                histObs[i])
            for f in filters:
                outline += ',' + str(histRec[f][i])
            outline += '\n'
            fl.write(outline)

Example #57

0

Show file

File: logistic_regression.py Project: Shanshan-IC/MachineLearningNotes

 def predict(self, X):
     X = np.insert(X, 0, 1, axis=1)
     return self.sigmoid(np.dot(X, self.w))

Example #58

0

Show file

File: pcaGPU.py Project: Parall-UD/sallfus

def fusion_images(multispectral, panchromatic, save_image=False, savepath=None, timeCondition=True):
    end = 0
    start = 0

    #Verifica que ambas imagenes cumplan con las condiciones
    if multispectral.shape[2] == 3:
        print('The Multispectral image has '+str(multispectral.shape[2])+' channels and size of '+str(multispectral.shape[0])+'x'+str(multispectral.shape[1]))
    else:
        sys.exit('The first image is not multispectral')

    if len(panchromatic.shape) == 2:
        print(' The Panchromatic image has a size of '+str(panchromatic.shape[0])+'x'+str(panchromatic.shape[1]))
    else:
        sys.exit('The second image is not panchromatic')

    size_rgb = multispectral.shape

    # Definición del tamaño del bloque
    BLOCK_SIZE = 32


    # Convierte a float32 y separa las bandas RGB de la multispectral
    m_host = multispectral.astype(np.float32)
    r_host = m_host[:,:,0].astype(np.float32)
    g_host = m_host[:,:,1].astype(np.float32)
    b_host = m_host[:,:,2].astype(np.float32)
    size_rgb = multispectral.shape
    # Convierte la pancromatica a float32
    panchromatic_host = panchromatic.astype(np.float32)


    # Inicial el time_calculated de ejecucion
    start=time.time()

    # Se pasan los array en el host al device
    r_gpu = gpuarray.to_gpu(r_host)
    g_gpu = gpuarray.to_gpu(g_host)
    b_gpu = gpuarray.to_gpu(b_host)
    p_gpu = gpuarray.to_gpu(panchromatic_host)

    # Se calcula la media de cada una de las bandas y se forma un arreglo con estos valores, todo esto en GPU
    mean_r_gpu = misc.mean(r_gpu)
    mean_g_gpu = misc.mean(g_gpu)
    mean_b_gpu = misc.mean(b_gpu)

    # Se obtiene el numero de bandas
    n_bands = size_rgb[2]

    # Se aparta memoria en GPU
    r_gpu_subs = gpuarray.zeros_like(r_gpu,np.float32)
    g_gpu_subs = gpuarray.zeros_like(g_gpu,np.float32)
    b_gpu_subs = gpuarray.zeros_like(b_gpu,np.float32)

    # Se realiza la resta de su respectiva media a cada uno de los pixeles de cada banda,
    substract( r_gpu, mean_r_gpu.get(), r_gpu_subs)
    substract( g_gpu, mean_g_gpu.get(), g_gpu_subs)
    substract( b_gpu, mean_b_gpu.get(), b_gpu_subs)

    # Se divide cada una de las bandas después de ser restada su media, en un conjunto de submatrices cuadradas del tamaño del bloque
    r_subs_split = split(r_gpu_subs.get(),BLOCK_SIZE,BLOCK_SIZE)
    g_subs_split = split(g_gpu_subs.get(),BLOCK_SIZE,BLOCK_SIZE)
    b_subs_split = split(b_gpu_subs.get(),BLOCK_SIZE,BLOCK_SIZE)

    #Se obtiene la matrix de varianza y covarianza
    mat_var_cov = varianza_cov(r_subs_split,g_subs_split,b_subs_split)

    # Coeficiente para diaganalizar ortogonalmente
    coefficient = 1.0/((size_rgb[0]*size_rgb[1])-1)

    # Matriz diagonalizada ortogonalmente
    ortogonal_matrix = mat_var_cov*coefficient

    # Se calcula la traza de las sucesivas potencias de la matriz ortogonal inicial
    polynomial_trace = successive_powers(ortogonal_matrix)


    # Se calculan los coeficientes del polinomio caracteristico
    characteristic_polynomial = polynomial_coefficients(polynomial_trace, ortogonal_matrix)

    # Se obtienen las raices del polinomio caracteristico
    characteristic_polynomial_roots = np.roots(np.insert(characteristic_polynomial,0,1))


    # Los vectores propios aparecen en la diagonal de la matriz eigenvalues_mat
    eigenvalues_mat = np.diag(characteristic_polynomial_roots)


    # Vectores propios para cada valor propio
    eigenvectors_mat = -1*ortogonal_matrix[1:n_bands,0]

    # Se calcular los vectores propios normalizados
    # Cada vector propio es una columna de la matriz mat_ortogonal_base
    mat_ortogonal_base, q_matrix = eigenvectors_norm(eigenvalues_mat, ortogonal_matrix, eigenvectors_mat)
    q_matrix_list = q_matrix.tolist()
    q_matrix_cpu = np.array(q_matrix_list).astype(np.float32)
    w1 = q_matrix_cpu[0,:]
    w2 = (-1)*q_matrix_cpu[1,:]
    w3 = q_matrix_cpu[2,:]
    eigenvectors = np.array((w1,w2,w3))

    # Se calcula la inversa de los vectores propios
    inv_eigenvectors = la.inv(eigenvectors)
    inv_list = inv_eigenvectors.tolist()
    inv_eigenvector_cpu = np.array(inv_list).astype(np.float32)

    # Se realiza la división de las bandas en submatrices del tamaño del bloque
    r_subs_split_cp = split(r_host,BLOCK_SIZE,BLOCK_SIZE)
    g_subs_split_cp = split(g_host,BLOCK_SIZE,BLOCK_SIZE)
    b_subs_split_cp = split(b_host,BLOCK_SIZE,BLOCK_SIZE)

    # Se calculan los componentes principales con las bandas originales y los vectores propios
    pc_1,pc_2,pc_3 = componentes_principales_original(r_subs_split_cp,g_subs_split_cp,b_subs_split_cp,q_matrix_cpu,r_host.shape[0], BLOCK_SIZE)

    # Se realiza la división en submatrices de la pancromática, el componente principal 2 y 3, del tamaño del bloque,
    p_subs_split_nb = split(panchromatic_host,BLOCK_SIZE,BLOCK_SIZE)
    pc_2_subs_split_nb = split(pc_2,BLOCK_SIZE,BLOCK_SIZE)
    pc_3_subs_split_nb = split(pc_3,BLOCK_SIZE,BLOCK_SIZE)

    # Se calculan los componentes con la pancromatica, componentes principales originales 2 y 3, y la inversa de los vectores propios
    nb1,nb2,nb3 = componentes_principales_panchromartic(p_subs_split_nb,pc_2_subs_split_nb,pc_3_subs_split_nb,inv_eigenvector_cpu,r_host.shape[0], BLOCK_SIZE)

    nb11 = nb1.astype(np.float32)
    nb22 = nb2.astype(np.float32)
    nb33 = nb3.astype(np.float32)


    nb11_gpu = gpuarray.to_gpu(nb11)
    nb22_gpu = gpuarray.to_gpu(nb22)
    nb33_gpu = gpuarray.to_gpu(nb33)

    # Se separa espacio en memoria para las matrices resultado de realizar el ajuste
    nb111_gpu = gpuarray.empty_like(nb11_gpu)
    nb222_gpu = gpuarray.empty_like(nb22_gpu)
    nb333_gpu = gpuarray.empty_like(nb33_gpu)

    # Se realiza un ajuste cuando los valores de cada pixel es menor a 0, en GPU
    negative_adjustment(nb11_gpu,nb111_gpu)
    negative_adjustment(nb22_gpu,nb222_gpu)
    negative_adjustment(nb33_gpu,nb333_gpu)

    nb111_cpu = nb111_gpu.get().astype(np.uint8)
    nb222_cpu = nb222_gpu.get().astype(np.uint8)
    nb333_cpu = nb333_gpu.get().astype(np.uint8)


    end = time.time()


    fusioned_image=np.stack((nb111_cpu,nb222_cpu,nb333_cpu),axis=2);
    if(save_image):
        # Guarda la imagen resultando de acuerdo al tercer parametro establecido en la linea de ejecución del script
        if(savepath != None):
            t = skimage.io.imsave(savepath+'/pcagpu_image.tif',fusioned_image, plugin='tifffile')
        else:
            t = skimage.io.imsave('pcagpu_image.tif',fusioned_image, plugin='tifffile')
    #time_calculated de ejecución para la transformada de Brovey en GPU
    time_calculated = (end-start)
    if(timeCondition):
        return {"image": fusioned_image, "time" :  time_calculated}
    else:
        return fusioned_image

Example #59

0

Show file

	def isCheck(self,showDiagonal=False):
		#Returns "white" if white is in check, "black" if black is in check
		#Otherwise returns Empty list
		#First check if white king is in check
		checks = []
		diagonalCheckGivers = [3,5]#Bishop,queen
		verticalCheckGivers = [4,5]#Rook, queen

		try:
			whiteKing = zip(*np.where(self.board == 6))[0]
		except:
			self.showBoard()
		whiteKing = zip(*np.where(self.board == 6))[0]
		###print whiteKing
		kingColumn = self.board[:,whiteKing[1]]
		kingColumn = kingColumn[kingColumn != 0]
		kingRow = self.board[whiteKing[0]]
		kingRow = kingRow[kingRow != 0]
		kingRightDiagonal = np.array([])
		row = whiteKing[0]
		column = whiteKing[1]
		while row >= 0 and column >= 0:
			row -= 1
			column -= 1
		row += 1
		column += 1
		while row < 8 and column < 8:
			if self.board[row][column] != 0:
				kingRightDiagonal = np.append(kingRightDiagonal, self.board[row][column])
			row += 1
			column += 1
		kingLeftDiagonal = np.array([])
		row = whiteKing[0]
		column = whiteKing[1]
		while row >= 0 and column < 8:
			row -= 1
			column += 1
		row += 1
		column -= 1
		while row < 8 and column >= 0:
			if self.board[row][column] != 0:
				kingLeftDiagonal = np.append(kingLeftDiagonal, self.board[row][column])
			row += 1
			column -= 1
		###print kingColumn
		###print kingRow
		###print kingRightDiagonal
		###print kingLeftDiagonal

		for row in [kingColumn,kingRow]:
			row = np.insert(row,0,0)
			row = np.append(row,0)
			kingIndex = np.where(row == 6)[0][0]
			if showDiagonal:
				print row	
			in_front = row[kingIndex + 1]			
			if -1*in_front in verticalCheckGivers:
				##print("check from front")
				checks.append("white")
			in_back = row[kingIndex - 1]
			if -1*in_back in verticalCheckGivers:
				checks.append("white")
			
		for row in [kingLeftDiagonal,kingRightDiagonal]:
			row = np.insert(row,0,0)
			row = np.append(row,0)
			if showDiagonal:
				print row
			kingIndex = np.where(row == 6)[0][0]
			in_front = row[kingIndex + 1]
			if -1*in_front in diagonalCheckGivers:
				checks.append("white")
			in_back = row[kingIndex - 1]
			if -1*in_back in diagonalCheckGivers:
				checks.append("white")
			if whiteKing[0] < 7:
				if 0 < whiteKing[1] < 7:
					if self.board[whiteKing[0] + 1][whiteKing[1] + 1] == -1 or self.board[whiteKing[0] + 1][whiteKing[1] - 1] == -1:
						checks.append("white")
				elif whiteKing[1] == 0:
					if self.board[whiteKing[0] + 1][whiteKing[1] + 1] == -1:
						checks.append("white")
				else:
					if self.board[whiteKing[0] + 1][whiteKing[1] - 1] == -1:
						checks.append("white")
		
		#Knights
		for l in (-2,-1,1,2):
			for m in (-2,-1,1,2):
				if abs(l) != abs(m) and 0 <= whiteKing[0] + l <= 7 and 0 <= whiteKing[1] + m <= 7:
					if self.board[whiteKing[0] + l][whiteKing[1] + m] == -2:
						checks.append("white")
		#Kings
		for l in (-1,0,1):
			for m in (-1,0,1):
				if 0 <= whiteKing[0] + l <= 7 and 0 <= whiteKing[1] + m <= 7:
					if self.board[whiteKing[0] + l][whiteKing[1] + m] == -6:
						checks.append("white") 
		
		try:
			blackKing = zip(*np.where(self.board == -6))[0]
		except:
			self.showBoard()
		blackKing = zip(*np.where(self.board == -6))[0]
		###print blackKing
		kingColumn = self.board[:,blackKing[1]]
		kingColumn = kingColumn[kingColumn != 0]
		kingRow = self.board[blackKing[0]]
		kingRow = kingRow[kingRow != 0]
		kingRightDiagonal = np.array([])
		row = blackKing[0]
		column = blackKing[1]
		while row >= 0 and column >= 0:
			row -= 1
			column -= 1
		row += 1
		column += 1
		while row < 8 and column < 8:
			if self.board[row][column] != 0:
				kingRightDiagonal = np.append(kingRightDiagonal, self.board[row][column])
			row += 1
			column += 1
		kingLeftDiagonal = np.array([])
		row = blackKing[0]
		column = blackKing[1]
		while row >= 0 and column < 8:
			row -= 1
			column += 1
		row += 1
		column -= 1
		while row < 8 and column >= 0:
			if self.board[row][column] != 0:
				kingLeftDiagonal = np.append(kingLeftDiagonal, self.board[row][column])
			row += 1
			column -= 1
		###print kingColumn
		###print kingRow
		###print kingRightDiagonal
		###print kingLeftDiagonal
		for row in [kingColumn,kingRow]:
			row = np.insert(row,0,0)
			row = np.append(row,0)
			kingIndex = np.where(row == -6)[0][0]
			if showDiagonal:
				print row
			in_front = row[kingIndex + 1]
			if in_front in verticalCheckGivers:
				checks.append("black")
			in_back = row[kingIndex - 1]
			if in_back in verticalCheckGivers:
				checks.append("black")
			
		for row in [kingLeftDiagonal,kingRightDiagonal]:
			row = np.insert(row,0,0)
			row = np.append(row,0)
			kingIndex = np.where(row == -6)[0][0]
			if showDiagonal:
				print row
			in_front = row[kingIndex + 1]
			if in_front in diagonalCheckGivers:
				checks.append("black")
			in_back = row[kingIndex - 1]
			if in_back in diagonalCheckGivers:
				checks.append("black")
			if blackKing[0] > 0:
				if 0 < blackKing[1] < 7:
					if self.board[blackKing[0] - 1][blackKing[1] + 1] == 1 or self.board[blackKing[0] - 1][blackKing[1] - 1] == 1:
						checks.append("black")
				elif blackKing[1] == 0:
					if self.board[blackKing[0] - 1][blackKing[1] + 1] == 1:
						checks.append("black")
				else:
					if self.board[blackKing[0] - 1][blackKing[1] - 1] == 1:
						checks.append("black")
			
		#Knights
		for l in (-2,-1,1,2):
			for m in (-2,-1,1,2):
				if abs(l) != abs(m) and 0 <= blackKing[0] + l <= 7 and 0 <= blackKing[1] + m <= 7:
					if self.board[blackKing[0] + l][blackKing[1] + m] == 2:
						checks.append("black")
		#Kings
		for l in (-1,0,1):
			for m in (-1,0,1):
				if 0 <= blackKing[0] + l <= 7 and 0 <= blackKing[1] + m <= 7:
					if self.board[blackKing[0] + l][blackKing[1] + m] == 6:
						checks.append("black")
		##print checks
		return checks

Example #60

0

Show file

def generate_diag_and_features(dataset, path_dataset=""):
    path_dataset = "./data/" + dataset + "/" if not len(
        path_dataset) else path_dataset
    filepath = path_dataset + dataset + ".conf"
    dataset_type, filt_parameters, thresh, perslay_parameters, optim_parameters = load_config(
        filepath=filepath)

    if "REDDIT" in dataset:
        print(
            "Unfortunately, REDDIT data are not available yet for memory issues.\n"
        )
        print("Moreover, the link we used to download the data,")
        print("http://www.mit.edu/~pinary/kdd/datasets.tar.gz")
        print("is down at the commit time (May 23rd).")
        print(
            "We will update this repository when we figure out a workaround.")
        return
    # if "REDDIT" in dataset:
    #     _prepreprocess_reddit(dataset)
    if os.path.isfile(path_dataset + dataset + ".hdf5"):
        os.remove(path_dataset + dataset + ".hdf5")
    diag_file = h5py.File(path_dataset + dataset + ".hdf5")
    list_filtrations = filt_parameters["names"]
    [
        diag_file.create_group(str(filtration))
        for filtration in filt_parameters["names"]
    ]
    list_hks_times = np.unique(
        [filtration.split("_")[1] for filtration in list_filtrations])
    if dataset_type == "graph":

        # preprocessing
        pad_size = 1
        for graph_name in os.listdir(path_dataset + "mat/"):
            A = np.array(loadmat(path_dataset + "mat/" + graph_name)["A"],
                         dtype=np.float32)
            pad_size = np.max((A.shape[0], pad_size))

        features = pd.DataFrame(
            index=range(len(os.listdir(path_dataset + "mat/"))),
            columns=["label"] + ["eval" + str(i) for i in range(pad_size)] + [
                name + "-percent" + str(i) for name, i in itertools.product(
                    [f for f in list_hks_times if "hks" in f], 10 *
                    np.arange(11))
            ])

        for idx, graph_name in enumerate((os.listdir(path_dataset + "mat/"))):
            name = graph_name.split("_")
            gid = int(name[name.index("gid") + 1]) - 1
            A = np.array(loadmat(path_dataset + "mat/" + graph_name)["A"],
                         dtype=np.float32)
            num_vertices = A.shape[0]
            label = int(name[name.index("lb") + 1])
            L = csgraph.laplacian(A, normed=True)
            egvals, egvectors = eigh(L)
            basesimplex = get_base_simplex(A)

            eigenvectors = np.zeros([num_vertices, pad_size])
            eigenvals = np.zeros(pad_size)
            eigenvals[:min(pad_size, num_vertices)] = np.flipud(
                egvals)[:min(pad_size, num_vertices)]
            eigenvectors[:, :min(pad_size, num_vertices)] = np.fliplr(
                egvectors)[:, :min(pad_size, num_vertices)]
            graph_features = []
            graph_features.append(eigenvals)

            for fhks in list_hks_times:
                hks_time = float(fhks.split("-")[0])
                # persistence
                filtration_val = hks_signature(egvectors,
                                               egvals,
                                               time=hks_time)
                dgmOrd0, dgmExt0, dgmRel1, dgmExt1 = apply_graph_extended_persistence(
                    A, filtration_val, basesimplex)
                diag_file["Ord0_" + str(hks_time) + "-hks"].create_dataset(
                    name=str(gid), data=dgmOrd0)
                diag_file["Ext0_" + str(hks_time) + "-hks"].create_dataset(
                    name=str(gid), data=dgmExt0)
                diag_file["Rel1_" + str(hks_time) + "-hks"].create_dataset(
                    name=str(gid), data=dgmRel1)
                diag_file["Ext1_" + str(hks_time) + "-hks"].create_dataset(
                    name=str(gid), data=dgmExt1)
                # features
                graph_features.append(
                    np.percentile(
                        hks_signature(eigenvectors, eigenvals, time=hks_time),
                        10 * np.arange(11)))
            features.loc[gid] = np.insert(np.concatenate(graph_features), 0,
                                          label)
        features['label'] = features['label'].astype(int)

    elif dataset_type == "orbit":

        def _gen_orbit(num_pts_per_orbit, param):
            X = np.zeros([num_pts_per_orbit, 2])
            xcur, ycur = np.random.rand(), np.random.rand()
            for idx in range(num_pts_per_orbit):
                xcur = (xcur + param * ycur * (1. - ycur)) % 1
                ycur = (ycur + param * xcur * (1. - xcur)) % 1
                X[idx, :] = [xcur, ycur]
            return X

        labs = []
        count = 0
        num_diag_per_param = 1000 if "5K" in dataset else 20000
        for lab, r in enumerate([2.5, 3.5, 4.0, 4.1, 4.3]):
            print("Generating", num_diag_per_param,
                  "orbits and diagrams for r = ", r, "...")
            for dg in range(num_diag_per_param):
                X = _gen_orbit(num_pts_per_orbit=1000, param=r)
                alpha_complex = gd.AlphaComplex(points=X)
                simplex_tree = alpha_complex.create_simplex_tree(
                    max_alpha_square=1e50)
                simplex_tree.persistence()
                diag_file["Alpha0"].create_dataset(
                    name=str(count),
                    data=np.array(
                        simplex_tree.persistence_intervals_in_dimension(0)))
                diag_file["Alpha1"].create_dataset(
                    name=str(count),
                    data=np.array(
                        simplex_tree.persistence_intervals_in_dimension(1)))
                orbit_label = {"label": lab, "pcid": count}
                labs.append(orbit_label)
                count += 1
        labels = pd.DataFrame(labs)
        labels.set_index("pcid")
        features = labels[["label"]]

    features.to_csv(path_dataset + dataset + ".csv")
    return diag_file.close()