Beispiel #1
0
    def _decision_function(self, X, labels):
        # Initialize the score array
        scores = np.zeros([X.shape[0], ])

        small_indices = np.where(
            np.isin(labels, self.small_cluster_labels_))[0]
        large_indices = np.where(
            np.isin(labels, self.large_cluster_labels_))[0]

        if small_indices.shape[0] != 0:
            # Calculate the outlier factor for the samples in small clusters
            dist_to_large_center = cdist(X[small_indices, :],
                                         self._large_cluster_centers)

            scores[small_indices] = np.min(dist_to_large_center, axis=1)

        if large_indices.shape[0] != 0:
            # Calculate the outlier factor for the samples in large clusters
            large_centers = self.cluster_centers_[labels[large_indices]]

            scores[large_indices] = pairwise_distances_no_broadcast(
                X[large_indices, :], large_centers)

        if self.use_weights:
            # Weights are calculated as the number of elements in the cluster
            scores = scores * self.cluster_sizes_[labels]

        return scores.ravel()
Beispiel #2
0
    def test_isin(self):

        bv = self.bv
        test_bv = BlockVector(2)
        a = np.array([1.1, 3.3])
        b = np.array([5.5, 7.7])
        test_bv[0] = a
        test_bv[1] = b

        res = pn.isin(bv, test_bv)
        for bid, blk in enumerate(bv):
            self.assertEqual(blk.size, res[bid].size)
            res_flat = np.isin(blk, test_bv[bid])
            self.assertTrue(np.allclose(res[bid], res_flat))

        c = np.concatenate([a, b])
        res = pn.isin(bv, c)
        for bid, blk in enumerate(bv):
            self.assertEqual(blk.size, res[bid].size)
            res_flat = np.isin(blk, c)
            self.assertTrue(np.allclose(res[bid], res_flat))

        res = pn.isin(bv, test_bv, invert=True)
        for bid, blk in enumerate(bv):
            self.assertEqual(blk.size, res[bid].size)
            res_flat = np.isin(blk, test_bv[bid], invert=True)
            self.assertTrue(np.allclose(res[bid], res_flat))

        c = np.concatenate([a, b])
        res = pn.isin(bv, c, invert=True)
        for bid, blk in enumerate(bv):
            self.assertEqual(blk.size, res[bid].size)
            res_flat = np.isin(blk, c, invert=True)
            self.assertTrue(np.allclose(res[bid], res_flat))
Beispiel #3
0
def run_single(path, min_flow_area, max_gradient):
    logger.info("Analyzing scenario at {}".format(path))

    gr = GridH5ResultAdmin(os.path.join(path, GRIDADMIN_NAME),
                           os.path.join(path, RESULTS_NAME))


    lines2d2d_valid, lines1d2d_active, lines1d2d_valid = filter_lines(
        gr,
        min_flow_area=min_flow_area,
        max_gradient=max_gradient,
    )

    groups = group_nodes(lines2d2d_valid.line)

    cell_data = gr.cells.subset('2D_ALL').only("id", "cell_coords").data

    overlast_ids, plas_ids, modelfout_ids = classify_nodes(
        node_id_2d=cell_data['id'],
        groups=groups,
        lines1d2d_active=lines1d2d_active,
        lines1d2d_valid=lines1d2d_valid,
    )

    cell_data['case'] = np.full(cell_data['id'].size, '', dtype='S10')
    cell_data['case'][np.isin(cell_data['id'], plas_ids)] = 'plas'
    cell_data['case'][np.isin(cell_data['id'], overlast_ids)] = 'overlast'
    cell_data['case'][np.isin(cell_data['id'], modelfout_ids)] = 'modelfout'
    return cell_data, gr.epsg_code
Beispiel #4
0
    def __init__(self, skim_dict, orig_zones, dest_zones, transpose=False):

        omx_shape = skim_dict.skim_info['omx_shape']
        logger.info("init AccessibilitySkims with %d dest zones %d orig zones omx_shape %s" %
                    (len(dest_zones), len(orig_zones), omx_shape, ))

        assert len(orig_zones) <= len(dest_zones)
        assert np.isin(orig_zones, dest_zones).all()
        assert len(np.unique(orig_zones)) == len(orig_zones)
        assert len(np.unique(dest_zones)) == len(dest_zones)

        self.skim_dict = skim_dict
        self.transpose = transpose

        if omx_shape[0] == len(orig_zones):
            # no slicing required
            self.slice_map = None
        else:
            # 2-d boolean slicing in numpy is a bit tricky
            # data = data[orig_map, dest_map]          # <- WRONG!
            # data = data[orig_map, :][:, dest_map]    # <- RIGHT
            # data = data[np.ix_(orig_map, dest_map)]  # <- ALSO RIGHT

            skim_index = list(range(omx_shape[0]))
            orig_map = np.isin(skim_index, skim_dict.offset_mapper.map(orig_zones))
            dest_map = np.isin(skim_index, skim_dict.offset_mapper.map(dest_zones))

            if not dest_map.all():
                # not using the whole skim matrix
                logger.info("%s skim zones not in dest_map: %s" %
                            ((~dest_map).sum(), np.ix_(~dest_map)))

            self.slice_map = np.ix_(orig_map, dest_map)
Beispiel #5
0
 def _apply_BCs(self):
     r"""
     Applies all the boundary conditions that have been specified, by
     adding values to the *A* and *b* matrices.
     """
     if 'pore.bc_rate' in self.keys():
         # Update b
         ind = np.isfinite(self['pore.bc_rate'])
         self.b[ind] = self['pore.bc_rate'][ind]
     if 'pore.bc_value' in self.keys():
         f = np.abs(self.A.diagonal()).mean()
         # Update b (impose bc values)
         ind = np.isfinite(self['pore.bc_value'])
         self.b[ind] = self['pore.bc_value'][ind] * f
         # Update b (substract quantities from b to keep A symmetric)
         x_BC = np.zeros(self.b.shape)
         x_BC[ind] = self['pore.bc_value'][ind]
         self.b[~ind] -= (self.A.tocsr() * x_BC)[~ind]
         # Update A
         P_bc = self.toindices(ind)
         indrow = np.isin(self.A.row, P_bc)
         indcol = np.isin(self.A.col, P_bc)
         self.A.data[indrow] = 0  # Remove entries from A for all BC rows
         self.A.data[indcol] = 0  # Remove entries from A for all BC cols
         datadiag = self.A.diagonal()  # Add diagonal entries back into A
         datadiag[P_bc] = np.ones_like(P_bc, dtype=np.float64) * f
         self.A.setdiag(datadiag)
         self.A.eliminate_zeros()  # Remove 0 entries
 def _get_cat_and_ncat(self, X):
     if self.category_name_is_set_ is False:
         raise NeedToSetCategoriesException()
     cat_X = X[np.isin(self.corpus_.get_category_names_by_row(),
                       [self.category_name] + self.neutral_category_names), :]
     ncat_X = X[np.isin(self.corpus_.get_category_names_by_row(),
                        self.not_category_names + self.neutral_category_names), :]
     if len(self.neutral_category_names) > 0:
         neut_X = [np.isin(self.corpus_.get_category_names_by_row(), self.neutral_category_names)]
         cat_X = vstack([cat_X, neut_X])
         ncat_X = vstack([ncat_X, neut_X])
     return cat_X, ncat_X
Beispiel #7
0
  def add_single_detected_image_info(self, image_id, detections_dict):
    """Adds detections for a single image to be used for evaluation.

    Args:
      image_id: A unique string/integer identifier for the image.
      detections_dict: A dictionary containing -
        standard_fields.DetectionResultFields.detection_boxes: A numpy array of
          structures with shape [N, 1], representing N tuples, each tuple
          containing the same number of named bounding boxes.
          Each box is of the format [y_min, x_min, y_max, x_max] (as an example
          see datatype vrd_box_data_type, single_box_data_type above).
        standard_fields.DetectionResultFields.detection_scores: float32 numpy
          array of shape [N] containing detection scores for the boxes.
        standard_fields.DetectionResultFields.detection_classes: A numpy array
          of structures shape [N, 1], representing the class labels of the
          corresponding bounding boxes and possibly additional classes (see
          datatype label_data_type above).
    """
    if image_id not in self._image_ids:
      logging.warn('No groundtruth for the image with id %s.', image_id)
      # Since for the correct work of evaluator it is assumed that groundtruth
      # is inserted first we make sure to break the code if is it not the case.
      self._image_ids.update([image_id])
      self._negative_labels[image_id] = np.array([])
      self._evaluatable_labels[image_id] = np.array([])

    num_detections = detections_dict[
        standard_fields.DetectionResultFields.detection_boxes].shape[0]
    detection_class_tuples = detections_dict[
        standard_fields.DetectionResultFields.detection_classes]
    detection_box_tuples = detections_dict[
        standard_fields.DetectionResultFields.detection_boxes]
    negative_selector = np.zeros(num_detections, dtype=bool)
    selector = np.ones(num_detections, dtype=bool)
    # Only check boxable labels
    for field in detection_box_tuples.dtype.fields:
      # Verify if one of the labels is negative (this is sure FP)
      negative_selector |= np.isin(detection_class_tuples[field],
                                   self._negative_labels[image_id])
      # Verify if all labels are verified
      selector &= np.isin(detection_class_tuples[field],
                          self._evaluatable_labels[image_id])
    selector |= negative_selector
    self._evaluation.add_single_detected_image_info(
        image_key=image_id,
        detected_box_tuples=self._process_detection_boxes(
            detection_box_tuples[selector]),
        detected_scores=detections_dict[
            standard_fields.DetectionResultFields.detection_scores][selector],
        detected_class_tuples=detection_class_tuples[selector])
Beispiel #8
0
def isin(element, test_elements, assume_unique=False, invert=False):

    if isinstance(element, BlockVector) and isinstance(test_elements, BlockVector):
        assert not element.has_none, 'Operation not allowed with None blocks. Specify all blocks in BlockVector'
        assert not test_elements.has_none, 'Operation not allowed with None blocks. Specify all blocks in BlockVector'
        assert element.nblocks == test_elements.nblocks, 'Operation on BlockVectors need the same number of blocks on each operand'
        res = BlockVector(element.nblocks)
        for i in range(element.nblocks):
            res[i] = isin(element[i],
                          test_elements[i],
                          assume_unique=assume_unique,
                          invert=invert)
        return res

    elif isinstance(element, BlockVector) and isinstance(test_elements, np.ndarray):

        assert not element.has_none, 'Operation not allowed with None blocks. Specify all blocks in BlockVector'
        res = BlockVector(element.nblocks)
        for i in range(element.nblocks):
            res[i] = isin(element[i],
                          test_elements,
                          assume_unique=assume_unique,
                          invert=invert)
        return res

    elif isinstance(element, np.ndarray) and isinstance(test_elements, np.ndarray):

        return np.isin(element,
                       test_elements,
                       assume_unique=assume_unique,
                       invert=invert)

    else:
        raise NotImplementedError()
Beispiel #9
0
def postprocess_clustered_data(which_cluster_each_point_is_in, points_to_be_clustered, min_cluster_size):
    clusters_to_keep, which_cluster_each_point_is_in_to_keep, points_to_be_clustered_to_keep = discard_noise(which_cluster_each_point_is_in, points_to_be_clustered)
    cluster_indices, cluster_sizes = np.unique(which_cluster_each_point_is_in_to_keep, return_counts=True)
    logging.info("Cluster sizes: {}".format(cluster_sizes))
    big_enough_cluster_indices = cluster_indices[cluster_sizes >= min_cluster_size]
    is_in_big_enough_cluster = np.isin(which_cluster_each_point_is_in_to_keep, big_enough_cluster_indices)
    return len(big_enough_cluster_indices), reindex_clusters(big_enough_cluster_indices, which_cluster_each_point_is_in_to_keep[is_in_big_enough_cluster]), points_to_be_clustered_to_keep[is_in_big_enough_cluster]
Beispiel #10
0
def test_in_transit():
    t = np.linspace(-20, 20, 1000)
    m_planet = np.array([0.3, 0.5])
    m_star = 1.45
    r_star = 1.5
    orbit = KeplerianOrbit(
        m_star=m_star,
        r_star=r_star,
        t0=np.array([0.5, 17.4]),
        period=np.array([10.0, 5.3]),
        ecc=np.array([0.1, 0.8]),
        omega=np.array([0.5, 1.3]),
        m_planet=m_planet,
    )

    r_pl = np.array([0.1, 0.03])
    coords = theano.function([], orbit.get_relative_position(t))()
    r2 = coords[0]**2 + coords[1]**2
    inds = theano.function([], orbit.in_transit(t, r=r_pl))()

    m = np.isin(np.arange(len(t)), inds)
    in_ = r2[inds] <= ((r_star + r_pl)**2)[None, :]
    in_ &= coords[2][inds] > 0
    assert np.all(np.any(in_, axis=1))

    out = r2[~m] > ((r_star + r_pl)**2)[None, :]
    out |= coords[2][~m] <= 0
    assert np.all(out)
Beispiel #11
0
 def check_probe(self):
     if not np.isin(self.chans, self.probe.chans).all():
         raise ValueError("Data chans are not a subset of probe chans. Wrong probe "
                          "specified in .json file?\n"
                          "Data chans:\n%s\n"
                          "Probe %r chans:\n%s"
                          % (self.chans, self.probename, self.probe.chans))
Beispiel #12
0
    def tour_available(self, window_row_ids, tdds):
        """
        test whether time window allows tour with specific tdd alt's time window

        Parameters
        ----------
        window_row_ids : pandas Series
            series of window_row_ids indexed by tour_id
        tdds : pandas series
            series of tdd_alt ids, index irrelevant

        Returns
        -------
        available : pandas Series of bool
            with same index as window_row_ids.index (presumably tour_id, but we don't care)
        """

        assert len(window_row_ids) == len(tdds)

        # numpy array with one tdd_footprints_df row for tdds
        tour_footprints = self.tdd_footprints[tdds.values.astype(int)]

        # numpy array with one windows row for each person
        windows = self.slice_windows_by_row_id(window_row_ids)

        # t0 = tracing.print_elapsed_time("slice_windows_by_row_id", t0, debug=True)

        x = tour_footprints + (windows << I_BIT_SHIFT)

        available = ~np.isin(x, COLLISION_LIST).any(axis=1)
        available = pd.Series(available, index=window_row_ids.index)

        return available
    def test_return_values(self):
        out = draw(self.cdf)
        ok_(out in range(self.n))

        size = 10
        out = draw(self.cdf, size)
        ok_(np.isin(out, range(self.n)).all())
def lofar2image(all_data, all_trgt,
                index_info, window_size, stride,
                run_indices_info,
                filepath=None,
                dtype=np.float64):
    fold_runs = np.concatenate([np.extract([np.isin(run, index_info).all() for run in cls_runs], cls_runs)
                                for cls_runs in run_indices_info.runs.values()])
    pruned_indexes = np.concatenate([range(run[0], run[-1] - window_size, stride) for run in fold_runs])

    data_shape = (pruned_indexes.shape[0],
                  window_size,
                  all_data.shape[1],
                  1)
    if not filepath is None:
        image_data = np.memmap(filename=filepath, shape=data_shape, mode='w+', dtype=dtype)
    else:
        image_data = np.zeros(shape=data_shape, dtype=dtype)

    trgt_image = np.zeros(shape=data_shape[0])

    for image_index, spectre_index in enumerate(pruned_indexes):
        new_data = all_data[spectre_index:spectre_index + window_size, :]
        new_data = np.array(new_data.reshape(new_data.shape[0], new_data.shape[1], 1), np.float64)
        image_data[image_index] = new_data
        trgt_image[image_index] = all_trgt[spectre_index]
    print 'trgt'
    print np.unique(trgt_image)
    return [image_data, trgt_image]
Beispiel #15
0
    def window_periods_in_states(self, window_row_ids, periods, states):
        """
        Return boolean array indicating whether specified window periods are in list of states.

        Internal DRY method to implement previous_tour_ends and previous_tour_begins

        Parameters
        ----------
        window_row_ids : pandas Series int
            series of window_row_ids indexed by tour_id
        periods : pandas series int
            series of tdd_alt ids, index irrelevant (one period per window_row_id)
        states : list of int
            presumably (e.g. I_EMPTY, I_START...)

        Returns
        -------
        pandas Series boolean
            indexed by window_row_ids.index
        """

        assert len(window_row_ids) == len(periods)

        window = self.slice_windows_by_row_id_and_period(window_row_ids, periods)

        return pd.Series(np.isin(window, states), window_row_ids.index)
Beispiel #16
0
    def set_gl_hl_mask(self, artist, hit_id = None,
                       cmask = 0.0, amask = 0.65):
        #
        #  logic is
        #     if artist_id is found within raidus from (x, y)
        #     and
        #     if it is the closet artist in the area of checking
        #     then return True

        if self._gl_id_data is None: return False
        if self._gl_mask_artist is None: return False

        # do not do this when hitest_map is updating..this is when
        # mouse dragging is going on
        if not get_glcanvas()._hittest_map_update: return
        x0, y0, id_dict, im, imd, im2 = self._gl_id_data
              
        arr = self._gl_mask_artist.get_array()

        for k in id_dict.keys():
            if (id_dict[k]() == artist):
               if hit_id is not None:
                   if len(hit_id) > 0:
                       mask = np.isin(imd, hit_id)
                       m = np.logical_and(im == k, mask)
                   else:
                       m = (im == k)                       
               else:
                   m = (im == k)
               
               c = self.figure.canvas.hl_color                           
               arr[:,:,:3][m] = np.array(c, copy=False)
               arr[:,:,3][m] = amask               
               break
Beispiel #17
0
    def check_criterion(self, compiled_record, trial_record, **kwargs):
        trial_number = np.asarray(compiled_record['trial_number'])
        current_step = np.asarray(compiled_record['current_step'])
        correct = np.asarray(compiled_record['correct'])
        protocol_name = np.asarray(compiled_record['protocol_name'])
        protocol_ver = np.asarray(compiled_record['protocol_version_number'])

        # filter out trial_numbers for current protocol_name and protocol_ver
        current_step = current_step[np.bitwise_and(protocol_name==protocol_name[-1],protocol_ver==protocol_ver[-1])]
        trial_number = trial_number[np.bitwise_and(protocol_name==protocol_name[-1],protocol_ver==protocol_ver[-1])]
        correct = correct[np.bitwise_and(protocol_name==protocol_name[-1],protocol_ver==protocol_ver[-1])]

        if self.num_trials_mode == 'consecutive':
            jumps = np.where(np.diff(trial_number)!=1) # jumps in trial number
            if not jumps[0]:
                which_trials = trial_number
            else:
                which_trials = trial_number[jump[0][-1]:] # from the last jump
        else:
            which_trials = trial_number

        if np.size(which_trials)<self.num_trials:
            graduate = False # dont graduate if the number of trials less than num required
        else:
            which_trials = which_trials[-self.num_trials:]
            filter =  np.isin(trial_number,which_trials)
            correct = correct[filter]
            perf = np.sum(correct)/np.size(correct)
            if perf >self.pct_correct:
                graduate = True
            else:
                graduate = False

        return graduate
Beispiel #18
0
def euc_dist(a, origins=0, cell_size=1):
    """Calculate the euclidean distance and/or allocation

    Parameters:
    -----------
    a : array
        numpy float or integer array
    origins : number, list or tuple
        The locations to calculate distance for.  Anything that is not a mask
        is an origin. If a single number is provided, a `mask` will be created
        using it.  A list/tuple of values can be used for multiple value
        masking.
    cell_size : float, int
        The cell size of the raster.  What does each cell represent on the
        ground.  1.0 is assumed
    """
    from scipy import ndimage as nd
    #
    cell_size = abs(cell_size)
    if cell_size == 0:
        cell_size = 1
    msk = (~np.isin(a, origins)).astype('int')
    dist = nd.distance_transform_edt(msk,
                                     sampling=cell_size,
                                     return_distances=True)
    return dist
def append_column(xray_evtfile,grp_fitsfile):
	hdu_xray = fits.open(xray_evtfile)
	xray_mod_pulse_number = hdu_xray['EVENTS'].data['MOD_PULSE_NUMBER']
	num_of_xrays = len(xray_mod_pulse_number)
	print(xray_mod_pulse_number.dtype)

	hdu_grp = fits.open(grp_fitsfile)
	grp_mod_pulse_number = hdu_grp['GRP'].data['NSEQpulse']
	num_of_grps = len(grp_mod_pulse_number)
	print(grp_mod_pulse_number.dtype)

	sys.stdout.write('Number of X-rays: %d\n' % num_of_xrays)
	sys.stdout.write('Number of GRPs: %d\n' % num_of_grps)

	xray_flag_isin_grp = np.isin(xray_mod_pulse_number,grp_mod_pulse_number)
	num_of_xrays_in_grp = np.sum(xray_flag_isin_grp==True)
	sys.stdout.write('Number of X-rays within GRPs: %d\n' % num_of_xrays_in_grp)

	new_columns = fits.ColDefs([
		fits.Column(name='MPGRP5.5',format='L',array=xray_flag_isin_grp)
		])

	hdu_xray_events_columns = hdu_xray['EVENTS'].columns

	xray_evtfile_grp = '%s_grp.evt' % os.path.splitext(xray_evtfile)[0]
	cmd = 'rm -f %s' % xray_evtfile_grp
	print(cmd);os.system(cmd)

	hdu_primary = fits.PrimaryHDU()
	hdu_events = fits.BinTableHDU.from_columns(hdu_xray_events_columns+new_columns,name='EVENTS')
	hdulist = fits.HDUList([hdu_primary,hdu_events])
	hdulist.writeto(xray_evtfile_grp)
Beispiel #20
0
 def add_component(self, c):
     """Initialize a new model component and prepare to save its optimized outputs. 
     The component name should be consistent across all order models. 
     
     Note that if a component name was initialized in the models for 1+ orders but 
     was not included in all order models, its RV values/uncertainties will be set 
     to NaNs and all other properties set to 0 for the excluded order(s).
     
     Parameters
     ----------
     c : a wobble.Model.Component object
     """
     if np.isin(c.name, self.component_names):
         print("Results: A component of name {0} has already been added.".format(c.name))
         return
     self.component_names.append(c.name)
     basename = c.name+'_'
     setattr(self, basename+'rvs', np.empty((self.R,self.N)) + np.nan)
     setattr(self, basename+'ivars_rvs', np.empty((self.R,self.N)) + np.nan)
     setattr(self, basename+'template_xs', [0 for r in range(self.R)])
     setattr(self, basename+'template_ys', [0 for r in range(self.R)])
     setattr(self, basename+'template_ivars', [0 for r in range(self.R)])
     if c.K > 0:
         setattr(self, basename+'basis_vectors', [0 for r in range(self.R)])
         setattr(self, basename+'basis_weights', [0 for r in range(self.R)])
     setattr(self, basename+'ys_predicted', [0 for r in range(self.R)])
     attrs = COMPONENT_NP_ATTRS
     if c.K > 0:
         attrs = np.append(attrs, OPT_COMPONENT_NP_ATTRS)
     for attr in attrs:
         setattr(self, basename+attr, [0 for r in range(self.R)])
Beispiel #21
0
 def volume_to_level(self, node, waterlevel):
     if node.current_vol > 0:
         # maxelev = node.parent.elev
         maxelev = self.dem.max()
         if node.elev:
             minelev = node.elev
         else:
             # TODO: This bound could be a lot better
             minelev = np.nanmin(self.dem)
         target_vol = node.current_vol
         elev = optimize.bisect(self.compute_vol, minelev, maxelev,
                             args=(node, target_vol))
         if node.name:
             mask = self.ws[node.level] == node.name
         else:
             leaves = []
             self.enumerate_leaves(node, level=node.level, stack=leaves)
             mask = np.isin(self.ws[node.level], leaves)
             boundary = list(chain.from_iterable([self.b[node.level].setdefault(pair, [])
                                                  for pair in combinations(leaves, 2)]))
             mask.flat[boundary] = True
         mask = np.flatnonzero(mask & (self.dem < elev))
         waterlevel.flat[mask] = elev
     else:
         if node.l:
             self.volume_to_level(node.l, waterlevel)
         if node.r:
             self.volume_to_level(node.r, waterlevel)
Beispiel #22
0
    def flagStats_single(self, fname):
        '''counter of all the primary and secondary flags

        '''
        import pandas as pd
        df = Dataset(fname, 'r')
        arr = [pd.Series({'time size': df['time'].size})]
        for vrbl in df.variables:
            if '_flagPrimary' in vrbl:
                dict = {}
                v = vrbl.split('_')[0]
                flagP = vrbl
                flagS = v+'_flagSecondary'
                pArr = df[flagP][:]
                for p in [1,2,3,4,9]:
                    # print flagP, p,':', df[flagP][:].tolist().count(p)
                    dict[flagP+'.'+str(p)] = df[flagP][:].tolist().count(p)
                for s in [1,2,3]:
                    # print flagS, s, ':', df[flagS][:].tolist().count(s)
                    pAtsArr = df[flagP][np.isin(df[flagS][:],s)]
                    # print flagS, s, '(3):', pAtsArr.tolist().count(3)
                    # print flagS, s, '(4):', pAtsArr.tolist().count(4)
                    dict[flagS+'.'+str(s)+'.3']=  pAtsArr.tolist().count(3)
                    dict[flagS+'.'+str(s)+'.4']=  pAtsArr.tolist().count(4)
                arr.append(pd.Series(dict))
        return pd.concat(arr)

        df.close()
Beispiel #23
0
 def mask_array_idx(self):
     if self._gl_array_idx is not None:
          array_idx = np.abs(self._gl_array_idx)
          mask = np.isin(array_idx, self._gl_hit_array_id)
          array_idx[mask] *= -1
          self._gl_array_idx  = array_idx                       
     self._update_a = True
     self.axes.figobj._bmp_update = False # ugly...!?
 def crossover(self, parent, pop):
     if np.random.rand() < self.cross_rate:
         i_ = np.random.randint(0, self.pop_size, size=1)                        # select another individual from pop
         cross_points = np.random.randint(0, 2, self.DNA_size).astype(np.bool)   # choose crossover points
         keep_city = parent[~cross_points]                                       # find the city number
         swap_city = pop[i_, np.isin(pop[i_].ravel(), keep_city, invert=True)]
         parent[:] = np.concatenate((keep_city, swap_city))
     return parent
    def test_payoff_values(self):
        possible_values = [0, 1]
        for payoff_array in self.g.payoff_arrays:
            ok_(np.isin(payoff_array, possible_values).all())

        max_num_dominated_subsets = \
            sum([comb(i, self.k, exact=True) for i in range(self.n)])
        ok_(self.g.payoff_arrays[0].sum() <= max_num_dominated_subsets)
        ok_((self.g.payoff_arrays[1].sum(axis=1) == self.k).all())
Beispiel #26
0
 def _convert_frame_index(self, pts):
     """
     Calculate frame index by current_container_index
     """
     # If the current container is 0, the pts is 128 (second frame)
     # cont_frame_idx -> 1
     # cont_mask: Return T only if the container=current container -> [T, T, T, F, F, F]
     # frame_mask: Return T only if the self.videoset.lookup.container_frame_idx=cont_frame_idx
     # -> [F, T(the second frame of the first container), F,
     #     F, T(the second frame of the second container), F, F]
     # videoset_idx: Return index which T in cont_mask and frame_mask
     cont_frame_idx = self.pts_to_idx(pts)
     cont_mask = np.isin(
         self.videoset.lookup.container_idx, self.current_container_index
     )
     frame_mask = np.isin(self.videoset.lookup.container_frame_idx, cont_frame_idx)
     videoset_idx = np.flatnonzero(cont_mask & frame_mask)[0]
     return videoset_idx
Beispiel #27
0
    def distribute_cards(self):
        deck = np.arange(5,57) #52 cards starting at 5 so that later code will make arrays starting at 2
        mycards = np.array([self.card1, self.card2])
        mask = np.isin(deck,mycards,invert =True)
        deck= deck[mask] #deletes my cards out of deck
        mask = np.isin(deck, self.tableCards,invert = True)
        deck = deck[mask] #deletes set tableCards out of deck
        mycards = np.array([self.card1, self.card2])
        mycards = np.append(mycards, self.tableCards) # [My first card, My second card, TableCards]

        #shuffles the deck
        alldecks = np.tile(deck, reps=(self.iterations, 1)) # creates copies of decks  = number of games
        temp_random = np.random.random(alldecks.shape) # create random number arrays that are the size of "alldecks"
        idx = np.argsort(temp_random, axis=-1) #creates ranking of random numbers Highest to Lowest
        shuffled = alldecks[np.arange(alldecks.shape[0])[:, None], idx] #organizes deck according to random ranking

        cards_at_end_of_game = 7 # each player will have 7 cards in their array
        shuffled_cards_to_append = cards_at_end_of_game - len(self.tableCards) # 7 total cards minus ones set to be on the table

        #makes opponent's cards for all games lookup table - later to be turned into numpy array
        cards_player = {}
        for i in range(0, self.player_amount-1):
            startingOppsCard= shuffled_cards_to_append + (i*2)
            endingOppsCard = startingOppsCard+2
            cards_player[i] = shuffled[:,startingOppsCard:endingOppsCard] #first bit of random cards are set aside for random draws
            np.delete(shuffled,shuffled[:,startingOppsCard:endingOppsCard])
            if len(self.tableCards) != 0:
                cards_player[i] = np.insert(cards_player[i] , 2 , self.tableCards[:,None],axis = 1) # puts set tablecards into players 7 cards

        cards_combined_mine = np.append(np.tile(mycards, reps=(self.iterations, 1)), shuffled, axis=1)[:,
                              0:cards_at_end_of_game] #repeats my cards iterations of time to match opponents random iteration arrays
        cards_combined_array = [cards_combined_mine] # create cards list that starts with my cards

        #append players 7 cards to yours
        for i in range(0, self.player_amount-1):
            cards_combined_array.append(np.append(cards_player[i], shuffled, axis=1)[:, 0:cards_at_end_of_game])

        cards_combined = np.stack(cards_combined_array, axis=-1)  # stack over last axis (=axis 3)
        cards = np.ceil(cards_combined / 4)
        suits = cards_combined % 4 * 1
        self.decks = np.stack((cards, suits), axis=2)  # [iterations, 7, card or suits, player_index]
        self.cards = self.decks[:, :, 0, :]  # [iterations, 7, player_index]
        self.suits = self.decks[:, :, 1, :]  # [iterations, 7, player_index]
        self.cards_sorted = np.sort(self.cards, axis=1)[:, ::-1, :]
Beispiel #28
0
 def setSelectedIndex(self, ll):
     array_idx = self.getvar('array_idx')
     if array_idx is None: return
     array_idx = array_idx.copy()
     mask = np.isin(array_idx, np.array(ll, copy=False))
     array_idx[mask] *= -1
     for a in self._artists:                  
         a._gl_hit_array_id = ll                  
         a._gl_array_idx  = array_idx                       
         a._update_a = True
Beispiel #29
0
  def add_single_detected_image_info(self, image_id, detections_dict):
    """Adds detections for a single image to be used for evaluation.

    Args:
      image_id: A unique string/integer identifier for the image.
      detections_dict: A dictionary containing -
        standard_fields.DetectionResultFields.detection_boxes: A numpy array of
          structures with shape [N, 1], representing N tuples, each tuple
          containing the same number of named bounding boxes.
          Each box is of the format [y_min, x_min, y_max, x_max] (as an example
          see datatype vrd_box_data_type, single_box_data_type above).
        standard_fields.DetectionResultFields.detection_scores: float32 numpy
          array of shape [N] containing detection scores for the boxes.
        standard_fields.DetectionResultFields.detection_classes: A numpy array
          of structures shape [N, 1], representing the class labels of the
          corresponding bounding boxes and possibly additional classes (see
          datatype label_data_type above).
    """
    num_detections = detections_dict[
        standard_fields.DetectionResultFields.detection_boxes].shape[0]
    detection_class_tuples = detections_dict[
        standard_fields.DetectionResultFields.detection_classes]
    detection_box_tuples = detections_dict[
        standard_fields.DetectionResultFields.detection_boxes]
    selector = np.ones(num_detections, dtype=bool)

    # Only check boxable labels
    for field in detection_box_tuples.dtype.fields:
      # Verify if one of the labels is negative (this is sure FP)
      selector |= np.isin(detection_class_tuples[field],
                          self._negative_labels[image_id])
      # Verify if all labels are verified
      selector |= np.isin(detection_class_tuples[field],
                          self._evaluatable_labels[image_id])

    self._evaluation.add_single_detected_image_info(
        image_key=image_id,
        detected_box_tuples=self._process_detection_boxes(
            detection_box_tuples[selector]),
        detected_scores=detections_dict[
            standard_fields.DetectionResultFields.detection_scores][selector],
        detected_class_tuples=detection_class_tuples[selector])
Beispiel #30
0
def main(squad_h5_path, weight_path):
    char_label = read_label(squad_h5_path)
    char_emb = read_char_emb(weight_path)

    show_char = list(string.ascii_letters + string.digits)
    show_id = np.where(np.isin(char_label, show_char))[0]

    show_emb = char_emb[show_id]
    show_label = char_label[show_id]

    hyp.plot(show_emb, '.', labels=show_label, n_clusters=10, reduce='TSNE', align='hyper')
Beispiel #31
0
def crop_cells(sample_id,
               image_dir,
               save_dir,
               min_nuc_size=100,
               cropsize=32,
               threshold=0.5):
    channel_names = ["w" + str(i) for i in range(1, 7)]
    channels = {}
    for cn in channel_names:
        channels[cn] = np.array(
            Image.open(image_dir + sample_id + "_" + cn + ".png"))

    nuclei = channels["w1"]
    (width, height) = nuclei.shape

    smoothed_nuclei = gaussian(nuclei, sigma=1.0)
    val = threshold_otsu(smoothed_nuclei)

    binary_nuclei = smoothed_nuclei > val
    binary_nuclei = remove_small_holes(binary_nuclei, min_size=100)

    labeled_nuclei = label(binary_nuclei)
    labeled_nuclei = clear_border(labeled_nuclei)
    labeled_nuclei = remove_small_objects(labeled_nuclei,
                                          min_size=min_nuc_size)

    unique, counts = np.unique(labeled_nuclei, return_counts=True)
    background_index = np.argmax(counts)
    counts = counts[unique != background_index]
    unique = unique[unique != background_index]

    cells_use = unique > -1
    for i in range(unique.size):
        mask = (labeled_nuclei == unique[i])

        y, x = center_of_mass(mask)
        x = np.int(x)
        y = np.int(y)

        c1 = y - cropsize // 2
        c2 = y + cropsize // 2
        c3 = x - cropsize // 2
        c4 = x + cropsize // 2

        if c1 < 0 or c2 >= height or c3 < 0 or c4 >= width:
            cells_use[i] = False

    unique = unique[cells_use]
    counts = counts[cells_use]

    mask = np.isin(labeled_nuclei,
                   unique[counts <= np.quantile(counts, threshold)])
    mask = 255 * mask.astype("uint8")
    Image.fromarray(mask)

    cells_use = counts <= np.quantile(counts, threshold)
    unique = unique[cells_use]
    counts = counts[cells_use]

    for i in range(unique.size):
        mask = (labeled_nuclei == unique[i])

        y, x = center_of_mass(mask)
        x = np.int(x)
        y = np.int(y)

        c1 = y - cropsize // 2
        c2 = y + cropsize // 2
        c3 = x - cropsize // 2
        c4 = x + cropsize // 2

        for cn, img in channels.items():
            cropped = img[c1:c2, c3:c4]
            image_name = save_dir + sample_id + "_cid" + str(
                i) + "_" + cn + "_cx" + str(x) + "_cy" + str(y) + ".png"

            #create path if it doesn't exist
            if not os.path.exists(os.path.dirname(image_name)):
                try:
                    os.makedirs(os.path.dirname(image_name))
                except OSError as exc:  # Guard against race condition
                    if exc.errno != errno.EEXIST:
                        raise

            Image.fromarray(cropped).save(image_name)
    return
Beispiel #32
0
def PlottingSingleFramecv2(
    cap,
    crop,
    coords,
    Dataframe,
    bodyparts2plot,
    tmpfolder,
    index,
    dotsize,
    pcutoff,
    alphavalue,
    colors,
    strwidth=4,
    savelabeled=True,
):
    """ Label frame and save under imagename / cap is not already cropped. """
    from skimage import io

    imagename1 = os.path.join(tmpfolder,
                              "img" + str(index).zfill(strwidth) + ".png")
    imagename2 = os.path.join(
        tmpfolder, "img" + str(index).zfill(strwidth) + "labeled.png")

    if not os.path.isfile(
            os.path.join(tmpfolder,
                         "img" + str(index).zfill(strwidth) + ".png")):
        plt.axis("off")
        cap.set_to_frame(index)
        frame = cap.read_frame()
        if frame is None:
            print("Frame could not be read.")
            return
        image = img_as_ubyte(frame)
        if crop:
            image = image[int(coords[2]):int(coords[3]),
                          int(coords[0]):int(coords[1]), :]

        io.imsave(imagename1, image)

        if savelabeled:
            if np.ndim(image) > 2:
                h, w, nc = np.shape(image)
            else:
                h, w = np.shape(image)

            bpts = Dataframe.columns.get_level_values("bodyparts")
            all_bpts = bpts.values[::3]
            df_x, df_y, df_likelihood = Dataframe.values.reshape(
                (Dataframe.shape[0], -1, 3)).T
            bplist = bpts.unique().to_list()
            if Dataframe.columns.nlevels == 3:
                map2bp = list(range(len(all_bpts)))
            else:
                map2bp = [bplist.index(bp) for bp in all_bpts]
            keep = np.flatnonzero(np.isin(all_bpts, bodyparts2plot))

            plt.figure(frameon=False, figsize=(w * 1.0 / 100, h * 1.0 / 100))
            plt.subplots_adjust(left=0,
                                bottom=0,
                                right=1,
                                top=1,
                                wspace=0,
                                hspace=0)
            plt.imshow(image)
            for i, ind in enumerate(keep):
                if df_likelihood[ind, index] > pcutoff:
                    plt.scatter(
                        df_x[ind, index],
                        df_y[ind, index],
                        s=dotsize**2,
                        color=colors(map2bp[i]),
                        alpha=alphavalue,
                    )
            plt.xlim(0, w)
            plt.ylim(0, h)
            plt.axis("off")
            plt.subplots_adjust(left=0,
                                bottom=0,
                                right=1,
                                top=1,
                                wspace=0,
                                hspace=0)
            plt.gca().invert_yaxis()
            plt.savefig(imagename2)
            plt.close("all")
xmmcoord = SkyCoord(xmm['RAJ2000'], xmm['DEJ2000'])
idx, d2d, _ = match_coordinates_sky(xmmcoord, bestmfcoord)
mask = d2d <= 5 * u.arcsec  #make sure match is within 5 arcsec (like in topcat)
idx = idx[mask]
xmmmf = bestmf[idx]

## match with chandra
print('Matching Chandra')
chan = Table.read('UDS_catalogues/chandra_catalogue.fits')
chan['RA'].unit = u.deg
chan['Dec'].unit = u.deg
chancoord = SkyCoord(chan['RA'], chan['Dec'])
idx, d2d, _ = match_coordinates_sky(chancoord, bestmfcoord)
mask = d2d <= 1 * u.arcsec  #make sure match is within 1 arcsec (like in topcat)
idx = idx[mask]
chanmf = bestmf[idx]

# combine chandra and xmm
print('Joining xray table')
xraymf = vstack([chanmf, xmmmf])
#%%
# boolean whether a source is seen in x-rays
xray = np.isin(bestmf['NUMBER_05B'], xraymf['NUMBER_05B'])
xraycol = Column(xray, 'X-ray')
bestmf.add_column(xraycol)

#%% Save the tables
semcom.write('mag_flux_tables/mag_flux_table_extra_clean_no06.fits')
bestmf.write('mag_flux_tables/mag_flux_table_best_extra_clean_no06.fits')
starsmf.write('mag_flux_tables/stars_mag_flux_table_extra_clean_no06.fits')
xraymf.write('mag_flux_tables/xray_mag_flux_table_best_extra_clean_no06.fits')
Beispiel #34
0
def test_multibuf_stack():
    size = 5
    bufsize = 9
    stack_num = 4
    cached_num = 3
    env = MyTestEnv(size)
    # test if CachedReplayBuffer can handle stack_num + ignore_obs_next
    buf4 = CachedReplayBuffer(
        ReplayBuffer(bufsize, stack_num=stack_num, ignore_obs_next=True),
        cached_num, size)
    # test if CachedReplayBuffer can handle corner case:
    # buffer + stack_num + ignore_obs_next + sample_avail
    buf5 = CachedReplayBuffer(
        ReplayBuffer(bufsize,
                     stack_num=stack_num,
                     ignore_obs_next=True,
                     sample_avail=True), cached_num, size)
    obs = env.reset(1)
    for i in range(18):
        obs_next, rew, done, info = env.step(1)
        obs_list = np.array([obs + size * i for i in range(cached_num)])
        act_list = [1] * cached_num
        rew_list = [rew] * cached_num
        done_list = [done] * cached_num
        obs_next_list = -obs_list
        info_list = [info] * cached_num
        batch = Batch(obs=obs_list,
                      act=act_list,
                      rew=rew_list,
                      done=done_list,
                      obs_next=obs_next_list,
                      info=info_list)
        buf5.add(batch)
        buf4.add(batch)
        assert np.all(buf4.obs == buf5.obs)
        assert np.all(buf4.done == buf5.done)
        obs = obs_next
        if done:
            obs = env.reset(1)
    # check the `add` order is correct
    assert np.allclose(
        buf4.obs.reshape(-1),
        [
            12,
            13,
            14,
            4,
            6,
            7,
            8,
            9,
            11,  # main_buffer
            1,
            2,
            3,
            4,
            0,  # cached_buffer[0]
            6,
            7,
            8,
            9,
            0,  # cached_buffer[1]
            11,
            12,
            13,
            14,
            0,  # cached_buffer[2]
        ]), buf4.obs
    assert np.allclose(
        buf4.done,
        [
            0,
            0,
            1,
            1,
            0,
            0,
            0,
            1,
            0,  # main_buffer
            0,
            0,
            0,
            1,
            0,  # cached_buffer[0]
            0,
            0,
            0,
            1,
            0,  # cached_buffer[1]
            0,
            0,
            0,
            1,
            0,  # cached_buffer[2]
        ]), buf4.done
    assert np.allclose(buf4.unfinished_index(), [10, 15, 20])
    indice = sorted(buf4.sample_index(0))
    assert np.allclose(indice, list(range(bufsize)) + [9, 10, 14, 15, 19, 20])
    assert np.allclose(buf4[indice].obs[..., 0], [
        [11, 11, 11, 12],
        [11, 11, 12, 13],
        [11, 12, 13, 14],
        [4, 4, 4, 4],
        [6, 6, 6, 6],
        [6, 6, 6, 7],
        [6, 6, 7, 8],
        [6, 7, 8, 9],
        [11, 11, 11, 11],
        [1, 1, 1, 1],
        [1, 1, 1, 2],
        [6, 6, 6, 6],
        [6, 6, 6, 7],
        [11, 11, 11, 11],
        [11, 11, 11, 12],
    ])
    assert np.allclose(buf4[indice].obs_next[..., 0], [
        [11, 11, 12, 13],
        [11, 12, 13, 14],
        [11, 12, 13, 14],
        [4, 4, 4, 4],
        [6, 6, 6, 7],
        [6, 6, 7, 8],
        [6, 7, 8, 9],
        [6, 7, 8, 9],
        [11, 11, 11, 12],
        [1, 1, 1, 2],
        [1, 1, 1, 2],
        [6, 6, 6, 7],
        [6, 6, 6, 7],
        [11, 11, 11, 12],
        [11, 11, 11, 12],
    ])
    indice = buf5.sample_index(0)
    assert np.allclose(sorted(indice), [2, 7])
    assert np.all(np.isin(buf5.sample_index(100), indice))
    # manually change the stack num
    buf5.stack_num = 2
    for buf in buf5.buffers:
        buf.stack_num = 2
    indice = buf5.sample_index(0)
    assert np.allclose(sorted(indice), [0, 1, 2, 5, 6, 7, 10, 15, 20])
    batch, _ = buf5.sample(0)
    # test Atari with CachedReplayBuffer, save_only_last_obs + ignore_obs_next
    buf6 = CachedReplayBuffer(
        ReplayBuffer(bufsize,
                     stack_num=stack_num,
                     save_only_last_obs=True,
                     ignore_obs_next=True), cached_num, size)
    obs = np.random.rand(size, 4, 84, 84)
    buf6.add(Batch(obs=[obs[2], obs[0]],
                   act=[1, 1],
                   rew=[0, 0],
                   done=[0, 1],
                   obs_next=[obs[3], obs[1]]),
             buffer_ids=[1, 2])
    assert buf6.obs.shape == (buf6.maxsize, 84, 84)
    assert np.allclose(buf6.obs[0], obs[0, -1])
    assert np.allclose(buf6.obs[14], obs[2, -1])
    assert np.allclose(buf6.obs[19], obs[0, -1])
    assert buf6[0].obs.shape == (4, 84, 84)
def contain(arr, target):
    newarr = np.asarray(arr)
    return np.isin(target, newarr)
Beispiel #36
0
def run(raster, cut_off):
    """The floodfill agorithm as described in the paper
    and implemented in Nogueira et al..

    :param raster: the raster data
    :type raster: numpy.ndarray
    :param cut_off: the cut-off value in days –
        this value is a temporal threshold of when to
        consider two fires distinct events.
    :type cut_off: int
    :return: a tuple of length two, containing two numpy.ndarrays
        with the same dimensions as the input 'raster'.
        The first item are the fire patches as connected components
        where each event has a unique id.
        The second item are the burn dates.
    :rtype: tuple
    """

    # get coordinates of burned pixels
    y_coor, x_coor = numpy.where(raster > 0)
    n_pixels = len(y_coor)
    logging.info(f"Found {n_pixels} candidate pixels.")

    # iterate through the pixels
    burn_date_raster = numpy.zeros_like(raster, dtype=numpy.int16)
    fire_id_raster = numpy.zeros_like(raster, dtype=numpy.uint16)
    fire_counter = 1
    for y, x in zip(y_coor, x_coor):

        pixel_date = raster[y, x]

        # get pixel neighbors
        neighbor_mask = _get_neighbors(x, y, raster)
        neighbor_ids = fire_id_raster[neighbor_mask]
        neighbor_dates = burn_date_raster[neighbor_mask]

        # meta data of pixel neighbors
        current_fire = numpy.abs(neighbor_dates - pixel_date) <= cut_off
        old_fire_present = neighbor_ids.sum() > 0
        n_current_fires = current_fire.sum()
        current_ids = numpy.unique(neighbor_ids[current_fire])
        n_current_ids = len(current_ids)

        # evaluate pixel
        if not old_fire_present or (old_fire_present and n_current_fires == 0):
            # new fire, because there are no previous
            # or only old fires registered in this neighborhood
            if fire_id_raster[y, x] > 0:
                # an old fire was already here
                logging.info("An old fire was already present"
                             f"at pixel ({y},{x}).")
                continue
            # we actually found a completely new fire
            fire_id_raster[y, x] = fire_counter
            burn_date_raster[y, x] = pixel_date
            fire_counter += 1
            continue

        if old_fire_present and n_current_fires > 0:
            if n_current_fires == 1 or \
                    (n_current_fires > 1 and n_current_ids == 1):
                fire_id_raster[y, x] = current_ids[0]
                burn_date_raster[y, x] = pixel_date
                continue
            if n_current_fires > 1 and n_current_ids > 1:
                # find all pixels with these ids
                which = numpy.isin(fire_id_raster, current_ids)
                fire_id_raster[which] = current_ids.min()  # assign lowest id
                fire_id_raster[y, x] = current_ids.min()
                burn_date_raster[y, x] = pixel_date

    return fire_id_raster, burn_date_raster
Beispiel #37
0
gameDataHomeDF = gameDataHomeDF.rename(columns={'home_team' : 'posteam', 'home_margin' : 'margin'})
gameDataAwayDF = gameDataDF.drop(columns=['home_team','home_score','away_score','home_margin'])
gameDataAwayDF = gameDataAwayDF.rename(columns={'away_team' : 'posteam', 'away_margin' : 'margin'})
gameDataDFFinal = pd.concat([gameDataHomeDF,gameDataAwayDF],ignore_index=True).sort_values(by=['game_id'])

## add a game number to the game data to use instead of week ##
gameDataDFFinal['game_number'] = gameDataDFFinal.groupby(['posteam','season']).cumcount() + 1
gameDataDFFinal = gameDataDFFinal.drop(columns=['state_of_game','game_url','type'])
## add game data to pbp data ##
pbpFinalDF = pd.merge(pbpDF2,gameDataDFFinal,on=['posteam','game_id'])
pbpDF2 = pbpFinalDF

## create EPAs to replicate @moo12152's work ##
pbpDF2['epa_baseline'] = pbpDF2['epa']
pbpDF2['epa_baseline_sum'] = pbpDF2['epa']
pbpDF2['epa_offense_defense_only'] = pbpDF2['epa'] * numpy.where((numpy.isin(pbpDF2['play_type'],st_plays)),0,1)
pbpDF2['epa_pass_only'] = pbpDF2['epa'] * numpy.where((numpy.isin(pbpDF2['play_type'],st_plays)),0,1) * numpy.where((pbpDF2['play_type'] == 'Run'),0,1)
pbpDF2['epa_rush_only'] = pbpDF2['epa'] * numpy.where((numpy.isin(pbpDF2['play_type'],st_plays)),0,1) * numpy.where((pbpDF2['play_type'] == 'Run'),1,0)

## calculate game totals ##
aggregationDict = {
    'epa_baseline' : 'mean',
    'epa_baseline_sum' : 'sum',
    'epa_offense_defense_only' : 'mean',
    'epa_pass_only' : 'mean',
    'epa_rush_only' : 'mean',
    'margin' : 'max',
}

gameDF = pbpDF2.groupby(['posteam','defteam','season','game_id','game_number']).agg(aggregationDict).reset_index()
gameDF = gameDF.sort_values(by=['posteam','game_id'])
Beispiel #38
0
tidx = np.random.choice(range(n_tones), n_targets, replace=False)
tcount = 0
end = 0
p = 0
for n in range(n_tones):
    start = end
    end = start + 3
    if n > 0:
        p = None
    while p == None:
        shuffle(pattern)
        if pattern[0] != seq[start - 1]:
            p = 0
    seq[start:end] = pattern
    if np.isin(n, tidx):
        seq[end] = 4
        end = end + 1

###################### Prepare psychopy task #################################

####preload stimuli:
sounds = [
    sound.Sound('{}/{}.wav'.format(stim_dir, int(s))) for s in np.unique(seq)
]

#### Prepare relevant keys:

#keyNext = 'space' # key to advance

Beispiel #39
0
    def validate_conventions(self, ds: loompy.LoomConnection) -> bool:
        """
		Validate the LoomConnection object against the attribute name/dtype conventions.

		Args:
			ds:			LoomConnection object
		
		Returns:
			True if the file conforms to the conventions, else False
		
		Remarks:
			Upon return, the instance attributes 'self.errors' and 'self.warnings' contain
			lists of errors and warnings.
		"""
        (n_genes, n_cells) = ds.shape

        self._warn("Description" in ds.attrs,
                   "Optional global attribute 'Description' is missing")
        self._warn("Journal" in ds.attrs,
                   "Optional global attribute 'Journal' is missing")
        self._warn("Authors" in ds.attrs,
                   "Optional global attribute 'Authors' is missing")
        self._warn("Title" in ds.attrs,
                   "Optional global attribute 'Title' is missing")
        self._warn("Year" in ds.attrs,
                   "Optional global attribute 'Year' is missing")
        self._warn("CreationDate" in ds.attrs,
                   "Optional global attribute 'CreationDate' is missing")

        if self._check("ClusterID" in ds.ca,
                       "Column attribute 'ClusterID' is missing"):
            self._check(np.issubdtype(ds.ca.ClusterID.dtype, np.int_),
                        "Column attribute 'ClusterID' must be integer dtype")
            self._check(
                len(np.unique(ds.ca.ClusterID)) == np.max(ds.ca.ClusterID)
                and np.min(ds.ca.ClusterID) == 0,
                "Column attribute 'ClusterID' must be integers 0, 1, 2, ... with no missing values"
            )
            self._check(
                ds.ca.ClusterID.shape == (n_cells, ),
                f"Column attribute 'ClusterID' must be 1-dimensional array of {n_cells} elements"
            )

        if "ClusterName" in ds.ca:
            self._check(
                ds.ca.ClusterName.dtype == object
                and np.issubdtype(ds.ca.ClusterName[0].dtype, np.str_),
                "Column attribute 'ClusterName' must be an array of strings")
            self._check(
                ds.ca.ClusterName.shape == (n_cells, ),
                f"Column attribute 'ClusterName' must be 1-dimensional array of {n_cells} elements"
            )
            one_to_one = True
            for cid in np.unique(ds.ca.ClusterID):
                if len(np.unique(
                        ds.ca.ClusterName[ds.ca.ClusterID == cid])) != 1:
                    one_to_one = False
                    break
            for cn in np.unique(ds.ca.ClusterName):
                if len(np.unique(
                        ds.ca.ClusterID[ds.ca.ClusterName == cn])) != 1:
                    one_to_one = False
                    break
            if not one_to_one:
                self._check(False,
                            "ClusterName must correspond 1:1 with ClusterID")
        else:
            self.warnings.append(
                "Optional column attribute 'ClusterName' is missing")

        if self._check("CellID" in ds.ca,
                       "Column attribute 'CellID' is missing"):
            self._check(
                ds.ca.CellID.dtype == object
                and np.issubdtype(ds.ca.CellID[0].dtype, np.str_),
                f"Column attribute 'CellID' must be an array of strings, not '{ds.ca.CellID[0].dtype}'"
            )
            self._check(
                ds.ca.CellID.shape == (n_cells, ),
                f"Column attribute 'CellID' must be 1-dimensional array of {n_cells} elements"
            )
            self._check(
                len(np.unique(ds.ca.CellID)) == n_cells,
                "Column attribute 'CellID' cannot contain duplicate values")

        if "Valid" in ds.ca:
            self._check(
                np.issubdtype(ds.ca.Valid.dtype, np.int_),
                f"Column attribute 'Valid' must be integer dtype, not '{ds.ca.Valid.dtype}'"
            )
            valids = np.unique(ds.ca.Valid)
            self._check(
                np.all(np.isin(ds.ca.Valid, [0, 1])),
                "Column attribute 'Valid' must be integers 0 or 1 only")
            self._check(
                ds.ca.Valid.shape == (n_cells, ),
                f"Column attribute 'Valid' must be 1-dimensional array of {n_cells} elements"
            )
        else:
            self.warnings.append(
                "Optional column attribute 'Valid' is missing")

        if "Outliers" in ds.ca:
            self._check(
                np.issubdtype(ds.ca.Outliers.dtype, np.int_),
                f"Column attribute 'Outliers' must be integer dtype, not '{ds.ca.Outliers.dtype}'"
            )
            self._check(
                np.all(np.isin(ds.ca.Outliers, [0, 1])),
                "Column attribute 'Outliers' must be integers 0 or 1 only")
            self._check(
                ds.ca.Outliers.shape == (n_cells, ),
                f"Column attribute 'Outliers' must be 1-dimensional array of {n_cells} elements"
            )
        else:
            self.warnings.append(
                "Optional column attribute 'Outliers' is missing")

        if self._check("Accession" in ds.ra,
                       "Row attribute 'Accession' is missing"):
            self._check(
                ds.ra.Accession.dtype == object
                and np.issubdtype(ds.ra.Accession[0].dtype, np.str_),
                f"Row attribute 'Accession' must be an array of strings, not '{ds.ra.Accession[0].dtype}'"
            )
            self._check(
                ds.ra.Accession.shape == (n_genes, ),
                f"Row attribute 'Accession' must be 1-dimensional array of {n_genes} elements"
            )
            self._check(
                len(np.unique(ds.ra.Accession)) == n_genes,
                "Row attribute 'Accession' cannot contain duplicate values")

        if self._check("Gene" in ds.ra, "Row attribute 'Gene' is missing"):
            self._check(
                ds.ra.Gene.dtype == object
                and np.issubdtype(ds.ra.Gene[0].dtype, np.str_),
                f"Row attribute 'Gene' must be an array of strings, not '{ds.ra.Gene[0].dtype}'"
            )
            self._check(
                ds.ra.Gene.shape == (n_genes, ),
                f"Row attribute 'Gene' must be 1-dimensional array of {n_genes} elements"
            )

        if "Valid" in ds.ra:
            self._check(
                np.issubdtype(ds.ra.Valid.dtype, np.int_),
                f"Row attribute 'Valid' must be integer dtype, not '{ds.ra.Valid.dtype}'"
            )
            valids = np.unique(ds.ra.Valid)
            self._check(np.all(np.isin(ds.ra.Valid, [0, 1])),
                        "Row attribute 'Valid' must be integers 0 or 1 only")
            self._check(
                ds.ra.Valid.shape == (n_cells, ),
                f"Row attribute 'Valid' must be 1-dimensional array of {n_cells} elements"
            )
        else:
            self.warnings.append("Optional row attribute 'Valid' is missing")

        if "Selected" in ds.ra:
            self._check(
                np.issubdtype(ds.ra.Selected.dtype, np.int_),
                f"Row attribute 'Selected' must be integer dtype, not '{ds.ra.Selected.dtype}'"
            )
            valids = np.unique(ds.ra.Selected)
            self._check(
                np.all(np.isin(ds.ra.Selected, [0, 1])),
                "Row attribute 'Selected' must be integers 0 or 1 only")
            self._check(
                ds.ra.Selected.shape == (n_cells, ),
                f"Row attribute 'Selected' must be 1-dimensional array of {n_cells} elements"
            )
        else:
            self.warnings.append(
                "Optional row attribute 'Selected' is missing")

        return len(self.errors) == 0
Beispiel #40
0
def sxmsk(scifile,
          infile,
          out='tsex',
          nrem=1,
          verb=True,
          retfull=False,
          center=None,
          getsky=False,
          **kwargs):
    '''
        Sextractor pass to mask objects that can affect the fit
        scfile is the input fits file, you can give full path.
        infile is the input config file to run sextractor
        out is the output name, you can give full path.
            .cat will be added to the output catalogue
            .fits will be added the output segmentation image
        nrem is the removal of central object.
            0 : all objects are masked
            1 : all but the central object are masked
            2 : all but the central and overlapping objects are masked
            3 : no objects are masked
        verb is to print output
        retfull is to return the original mask
        you can give any other parameters to the sextractor call with kwargs.
        center is given as py,px
    '''
    tcall = 'sex -c {0} {1} -CATALOG_NAME {2}.cat -CHECKIMAGE_TYPE SEGMENTATION -CHECKIMAGE_NAME {2}.fits'.format(
        infile, scifile, out)
    for key in kwargs:
        tcall = tcall + ' -{0} {1}'.format(key, kwargs[key])

    p = Popen(tcall.split(), stdout=PIPE, stderr=PIPE)
    p.wait()
    if verb:
        for l in p.stderr.readlines():
            print l[:-1]
    mskfit = pyfits.open("{0}.fits".format(out))
    try:
        mskfit[0].data += 1
    except:
        mskfit[0].data = mskfit[1].data
        mskfit[0].data += 1
    amsk = np.ones(mskfit[0].data.shape)
    amsk[mskfit[0].data != 1] = 0
    sexcat = pyfits.open("{0}.cat".format(out))[2].data
    if center is None:
        center = [mskfit[0].data.shape[0] / 2, mskfit[0].data.shape[1] / 2]
    idx = mskfit[0].data[center[0], center[1]]
    if idx == 1:
        if verb:
            print 'Something wrong here, no object at the center!'
        return np.ones(mskfit[0].shape), []
    tmsk = np.zeros(mskfit[0].data.shape).astype(int)
    tmsk[mskfit[0].data == idx] = 1
    a = [[0, 1, 0], [1, 1, 1], [0, 1, 0]]
    tmsk = np.round(fftconvolve(tmsk, a, mode='same'), 1).astype(int)
    tmsk[tmsk > 0] = 1
    t = np.unique(mskfit[0].data * tmsk)
    t = t[~np.isin(t, [0, 1, idx], assume_unique=True)]
    ''''''
    others = np.unique(mskfit[0].data)
    others = others[~np.isin(others, [1, idx] + list(t), assume_unique=True)]

    torem = {0: [], 1: [idx], 2: t, 3: others}
    txt = {0: '', 1: 'central', 2: 'overlapping', 3: 'other'}
    models = []
    for i in range(nrem + 1):
        a = np.isin(mskfit[0].data, torem[i])
        amsk[a] = mskfit[0].data[a]
        jidxs = np.array(torem[i]) - 2
        #this is still slow, but I do not know how to create a list of dictionaries. And I need it because of how I defined the models outside.
        #Does not make it right tho, but, it is not as bad as before :)
        models.extend([{
            0:
            'sersic',
            1:
            '{0} {1} 1 1'.format(sexcat['X_IMAGE'][jidx],
                                 sexcat['Y_IMAGE'][jidx]),
            3:
            '{0} 1'.format(sexcat['MAG_AUTO'][jidx]),
            4:
            '{0} 1'.format(sexcat['KRON_RADIUS'][jidx] *
                           sexcat['B_IMAGE'][jidx]),
            5:
            '4 1',
            9:
            '{0} 1'.format(sexcat['ELONGATION'][jidx]**-1),
            10:
            '{0} 1'.format(sexcat['THETA_IMAGE'][jidx] - 90),
            'Z':
            0,
            'mskidx':
            jidx + 2,
            'origin':
            txt[i]
        } for jidx in jidxs])
    if getsky:
        sexcat = pyfits.open("{0}.cat".format(out))[1].data[0][0]
        skyo = {}
        for l in sexcat:
            if 'EXPTIME' in l:
                t = l[10:].split()[0]
                if '\'' in t: t = t.replace('\'', '')
                skyo['exptime'] = float(t)
            if 'SEXBKGND' in l:
                t = l[10:].split()[0]
                if '\'' in t: t = t.replace('\'', '')
                skyo['sky'] = float(t)
            if 'SEXBKDEV' in l:
                t = l[10:].split()[0]
                if '\'' in t: t = t.replace('\'', '')
                skyo['skystd'] = float(t)

    if retfull:
        if getsky: return amsk, models, skyo
        return amsk, models
    amsk[amsk != 0] = 1
    if getsky: return amsk, models, skyo
    return amsk, models
    def __getitem__(self, idx):
        """
        Args: idx (int): Index in list to load image
        """
        assert self.rgb_lst[idx].split('.')[0] == self.gt_lst[idx].split(
            '.')[0]
        img_name = os.path.join(self.image_dir, self.rgb_lst[idx])
        gt_name = os.path.join(self.gt_dir, self.gt_lst[idx])
        with open(img_name, 'rb') as f:
            image = (Image.open(f).convert('RGB'))
        with open(gt_name, 'rb') as f:
            gt = (Image.open(f).convert('P'))
        idx = int(self.rgb_lst[idx].split('.')[0]) - 1
        params = self.params[idx]["poly_params"]
        line_lst = self.line_file[idx]["lines"]

        w, h = image.size
        image, gt = F.crop(image, h - 640, 0, 640,
                           w), F.crop(gt, h - 640, 0, 640, w)
        image = F.resize(image,
                         size=(self.resize, 2 * self.resize),
                         interpolation=Image.BILINEAR)
        gt = F.resize(gt,
                      size=(self.resize, 2 * self.resize),
                      interpolation=Image.NEAREST)
        gt = np.asarray(gt).copy()
        idx3 = np.isin(gt, 3)
        idx4 = np.isin(gt, 4)
        gt[idx3] = 0
        gt[idx4] = 0
        # params = [params[0], params[1]]
        hflip_input = np.random.uniform(0.0, 1.0) > 0.5 and self.flip_on
        if idx not in self.valid_idx and hflip_input:
            image, gt = F.hflip(image), F.hflip(gt)
            line_lst = mirror_list(line_lst)

            idx1 = np.isin(gt, 1)
            idx2 = np.isin(gt, 2)
            gt[idx1] = 2
            gt[idx2] = 1
            params = [params[1], params[0], params[3], params[2]]
            params = np.array(params)
            params = -params
            params[:, -1] = 1 + params[:, -1]

        gt = Image.fromarray(gt)
        params = torch.from_numpy(np.array(params)).float()
        image, gt = self.totensor(image).float(), (self.totensor(gt) *
                                                   255).long()

        y_val = gt.nonzero()[0, 1]
        horizon = torch.zeros(gt.size(1))
        horizon[0:y_val] = 1
        line_lst = np.array(line_lst[3:7])
        line_lst = torch.from_numpy(np.array(line_lst + 1))
        line_lst = line_lst.long()
        horizon = horizon.float()

        if idx in self.valid_idx:
            index = self.valid_idx.index(idx)
            return image, gt, params, idx, line_lst, horizon, index
        return image, gt, params, idx, line_lst, horizon
Beispiel #42
0
#Keep only unique channel COMIDs
#chandat,udx = np.unique(chandat[:,0],return_index=True)
#chandat = chandat[udx,:]
#termcode = termcode[udx]
#lOAD flow network (from-to list)
flow=np.loadtxt(f_lines,delimiter=',',skiprows=1,usecols=(0,1))

#keep COMIDS of everything within place requested by user
#Constraints:
#TERMCODE/location
#channel width
#channel slope
#tidal: currently ignored because the binary value is not sensitive to freshwater tidal regions suitable for habitat
comids=chandat[(np.char.find(termcode,place)==0) & (np.greater_equal(chandat[:,3],width)) & (np.less_equal(chandat[:,5],slope)),0]
#keep only flowlines/features that match the comids list selected by constraints
IDs = IDs[np.isin(dams_outs,comids)]
type = type[np.isin(dams_outs,comids)]
locations = locations[np.isin(dams_outs,comids),:]
termcode = termcode[np.isin(chandat[:,0],comids)]
flow = flow[np.isin(flow[:,0],comids)+np.isin(flow[:,1],comids)>0,:]
dams_outs = dams_outs[np.isin(dams_outs,comids)]
chandat = chandat[np.isin(chandat[:,0],comids)]

#temp maxslp to calculate maximum slope to flowline...
#maxslp = chandat[:,2]
#print chandat[:,0].size
#print maxslp.size
#optional input args: array of decision user can provide for each dam
#if len(sys.argv)>5:
#    f_decision = sys.argv[5]
#    decision = np.genfromtxt(f_decision,delimiter=',',dtype='str')
Beispiel #43
0
def score(index):
    return sum((boards[index][np.logical_not(np.isin(
        boards[index], called_nrs))]).flatten()) * called_nrs[-1]
Beispiel #44
0
 def grouped_indices_inverted(self):
     mask = np.isin(self.grouped_indices, self.selection)
     return np.flatnonzero(mask)
Beispiel #45
0
    def perform_graph_cut(self,
                          pairwise_term_scale=-np.infty,
                          scale_parameter=1.0,
                          normalize_uncertainties=True):
        """
        Segments every image using graph-cut. The graph built has nodes with energies based on GMM matching, and edges
        based on euclidean distance between neighbouring superpixels' feature vectors. The resulting cosegmentation is
        stored in images_cosegmented. This function also calculates for each superpixel an uncertainty score based on
        the difference in energy between the optimal assignment and opposite and stores
        these scores in images_superpixels_uncertainties_graph_cut.

        Note: Requires compute_gmm

        parameters:
            pairwise_term_scale (float): Used to scale the pairwise term in relation to the unary term.
            scale_parameter (float): Used to adjust the strength of the response in the pairwise term value
                depending on distance.
        """
        # perform graph-cut for every image
        for img in self.images:
            # Create a graph of N nodes with an estimate of 5 edges per node
            num_nodes = len(self.images_superpixels[img])
            graph = maxflow.Graph[float](num_nodes, num_nodes * 5)

            # Add the nodes
            nodes = graph.add_nodes(num_nodes)

            # If no scale is given initialize it as -infinity and set it to the largest unary term energy
            if pairwise_term_scale == -np.infty:
                compute_scale = True
            else:
                compute_scale = False

            energies_fg = np.zeros(len(self.images_superpixels[img]))
            energies_bg = np.zeros(len(self.images_superpixels[img]))
            edges = [dict() for i in self.images_superpixels[img]]

            # Initialize match terms: energy of assigning node to foreground or background
            for sp, fv in enumerate(
                    self.images_superpixels_feature_vector[img]):
                # set energy based on weighted log probability
                energies_fg[sp] = self.gmm_foreground.score_samples([fv])[0]
                energies_bg[sp] = self.gmm_background.score_samples([fv])[0]
                graph.add_tedge(nodes[sp], energies_fg[sp], energies_bg[sp])
                # Set pairwise_term_scale to largest energy
                if compute_scale:
                    if pairwise_term_scale < abs(energies_fg[sp]):
                        pairwise_term_scale = abs(energies_fg[sp])
                    if pairwise_term_scale < abs(energies_bg[sp]):
                        pairwise_term_scale = abs(energies_bg[sp])

            # Initialize smoothness terms: energy between neighbors
            for sp in self.images_superpixels[img]:
                fv = self.images_superpixels_feature_vector[img][sp]
                for nbr in self.images_superpixels_neighbours[img][sp]:
                    # Create two edges between superpixel and its neighbor with cost based on
                    # euclidean distance between their feature vectors
                    fv_neighbor = self.images_superpixels_feature_vector[img][
                        nbr]
                    edges[sp][nbr] = pairwise_term_scale * (np.e**(
                        -scale_parameter * abs(euclidean(fv, fv_neighbor))))
                    edges[nbr][sp] = pairwise_term_scale * (np.e**(
                        -scale_parameter * abs(euclidean(fv_neighbor, fv))))
                    graph.add_edge(nodes[sp], nodes[nbr], edges[sp][nbr],
                                   edges[nbr][sp])

            graph.maxflow()

            graph_cut = graph.get_grid_segments(nodes)

            # Initialize uncertainties array
            self.images_superpixels_uncertainties_graph_cut[img] = [
                0 for sp in self.images_superpixels[img]
            ]
            maximum_uncertainty = 0

            # Compute uncertainties of the graph-cut as the difference in energy between the assignments
            for sp in self.images_superpixels[img]:
                energy_difference = abs(energies_fg[sp] - energies_bg[sp])
                for nbr in self.images_superpixels_neighbours[img][sp]:
                    if graph_cut[sp] != graph_cut[nbr]:
                        energy_difference += abs(edges[sp][nbr]) + abs(
                            edges[nbr][sp])
                if energy_difference > maximum_uncertainty:
                    maximum_uncertainty = energy_difference
                self.images_superpixels_uncertainties_graph_cut[img][
                    sp] = energy_difference

            if normalize_uncertainties and maximum_uncertainty > 0:
                self.images_superpixels_uncertainties_graph_cut[img] = [
                    x / maximum_uncertainty for x in
                    self.images_superpixels_uncertainties_graph_cut[img]
                ]

            # Get a bool mask of the pixels for a given selection of superpixel IDs
            self.images_cosegmented[img] = np.where(
                np.isin(self.images_segmented[img], np.nonzero(graph_cut)), 0,
                1)
Beispiel #46
0
def focus_local_group(circ, locations):
    """Constructs an AttentionCircuit with an group of neighboring local operations in the focus.

  A transformation on such a group of local (i.e. single-qubit) operations is
  only permitted iff the operations in the focus are neighboring, i.e. not
  separated by other operations, which is equivalent to the condition whether
  between the first and the last operation in the group, there is no other
  (multi-qubit) operation which acts on the same qubit. If this is not the case,
  an OperationsNotAlignedError is raised.

  Args:
      circ: the full circuit.
      locations: the indices of the operations to be in the focus.

  Returns:
      an AttentionCircuit with the selected (local) operations in the focus.

  Raises:
      TypeError: if circ is not a Circuit, or if locations is not a sequence of
          int.
      ValueError: if locations is empty or contains duplicate elements, or if
          the focus does not consist of local operations which all act on the
          same qubit.
      IndexError: if one of the locations is out of bounds.
      OperationsNotAlignedError: if the locations do not correspond to a valid
          local group.
  """
    if not isinstance(circ, circuit.Circuit):
        raise TypeError('circ is not a Circuit (found type: %s)' %
                        type(circ).__name__)

    length = len(circ)
    locations = np.array([
        _check_and_convert_to_non_negative_circuit_index(
            location, length, 'location') for location in locations
    ])

    num_locations = locations.size

    if num_locations == 0:
        raise ValueError('locations must not be empty')

    locations = np.unique(locations)  # sorts and makes elements unique
    if locations.size != num_locations:
        raise ValueError('locations contains duplicate elements')

    # extract the focus from the input circuit
    focus = circ.subcircuit(locations)

    if not all(operation.get_num_qubits() == 1 for operation in focus):
        raise ValueError('focus contains non-local operations')

    active_qubit = focus.operation(0).get_qubits()
    if not all(operation.get_qubits() == active_qubit
               for operation in focus[1:]):
        raise ValueError('operations in the focus act on different qubits')

    # every operation between the first and last operation of the focus is put
    # into between as long as this operation does not belong to the focus itself
    between = np.arange(locations[0] + 1, locations[-1])
    between = between[~np.isin(between, locations)]
    between = circ.subcircuit(between)

    # check whether the operations in the focus are aligned
    # For a local group, this is equivalent to that no operation in "between"
    # affects on the qubit that all operations in the focus act on, i.e. every
    # operation in the focus has to commute trivially with every operation in
    # "between".
    if all(
            op1.commutes_trivially_with(op2)
            for op1, op2 in itertools.product(focus, between)):
        return AttentionCircuit(
            focus.get_operation_sequence(),
            TransformationContext(circ[:locations[0]], between,
                                  circ[locations[-1] + 1:]),
            locations=locations)
    else:
        raise OperationsNotAlignedError
Beispiel #47
0
    def build_occupancy_maps(self, human_states):
        """

        :param human_states:
        :return: tensor of shape (# human - 1, self.cell_num ** 2)
        """
        occupancy_maps = []
        for human in human_states:
            other_humans = np.concatenate([
                np.array([(other_human.px, other_human.py, other_human.vx,
                           other_human.vy)])
                for other_human in human_states if other_human != human
            ],
                                          axis=0)
            other_px = other_humans[:, 0] - human.px
            other_py = other_humans[:, 1] - human.py
            # new x-axis is in the direction of human's velocity
            human_velocity_angle = np.arctan2(human.vy, human.vx)
            other_human_orientation = np.arctan2(other_py, other_px)
            rotation = other_human_orientation - human_velocity_angle
            distance = np.linalg.norm([other_px, other_py], axis=0)
            other_px = np.cos(rotation) * distance
            other_py = np.sin(rotation) * distance

            # compute indices of humans in the grid
            other_x_index = np.floor(other_px / self.cell_size +
                                     self.cell_num / 2)
            other_y_index = np.floor(other_py / self.cell_size +
                                     self.cell_num / 2)
            other_x_index[other_x_index < 0] = float('-inf')
            other_x_index[other_x_index >= self.cell_num] = float('-inf')
            other_y_index[other_y_index < 0] = float('-inf')
            other_y_index[other_y_index >= self.cell_num] = float('-inf')
            grid_indices = self.cell_num * other_y_index + other_x_index
            occupancy_map = np.isin(range(self.cell_num**2), grid_indices)
            if self.om_channel_size == 1:
                occupancy_maps.append([occupancy_map.astype(int)])
            else:
                # calculate relative velocity for other agents
                other_human_velocity_angles = np.arctan2(
                    other_humans[:, 3], other_humans[:, 2])
                rotation = other_human_velocity_angles - human_velocity_angle
                speed = np.linalg.norm(other_humans[:, 2:4], axis=1)
                other_vx = np.cos(rotation) * speed
                other_vy = np.sin(rotation) * speed
                dm = [
                    list()
                    for _ in range(self.cell_num**2 * self.om_channel_size)
                ]
                for i, index in np.ndenumerate(grid_indices):
                    if index in range(self.cell_num**2):
                        if self.om_channel_size == 2:
                            dm[2 * int(index)].append(other_vx[i])
                            dm[2 * int(index) + 1].append(other_vy[i])
                        elif self.om_channel_size == 3:
                            dm[2 * int(index)].append(1)
                            dm[2 * int(index) + 1].append(other_vx[i])
                            dm[2 * int(index) + 2].append(other_vy[i])
                        else:
                            raise NotImplementedError
                for i, cell in enumerate(dm):
                    dm[i] = sum(dm[i]) / len(dm[i]) if len(dm[i]) != 0 else 0
                occupancy_maps.append([dm])

        return torch.from_numpy(np.concatenate(occupancy_maps, axis=0)).float()
#if save == True:
#    subfolder = input('subfolder? (leave blank if none, include / at end) ')
#    filepath = 'plots/new_catalogue/JK_lightcurve_comp/dcf/grid/'+subfolder+''

### Get monthly_numbers as this contains all possible months ###
month_info = fits.open('Images/Convolving_Images/monthly_numbers.fits')[1].data #get month count data
full_months = month_info['Month'] #extract month nanes

### Create J and K month arrays so can match where the data should go ###
kmonths = ['sep05','oct05','nov05','dec05', 'jan06', #'dec06', 
          'jan07', 'aug07', 'sep07', 'oct07', 'sep08', 'oct08', 'nov08', 
          'jul09', 'aug09', 'sep09', 'oct09', 'nov09', 'dec09', 'jan10', 
          'feb10', 'aug10', 'sep10', 'oct10', 'nov10', 'dec10', 'jan11', #'feb11', 
          'aug11', 'sep11', 'oct11', 'nov11', 'dec11', 'jan12', 'feb12', 
          'jul12', 'aug12', 'sep12', 'oct12', 'nov12']
kmask = np.isin(full_months, kmonths)

jmonths = ['sep05', 'oct05', 'nov05', 'dec05', 'jan06', 'oct06', 'nov06',
          'dec06', 'aug07', 'sep07', 'oct07', 'oct08', 'nov08', 'aug09',
          'sep09', 'oct09', 'nov09', 'dec09', 'aug10', 'sep10', 'oct10',
          'nov10', 'dec10', 'jan11', 'aug11', 'sep11', 'oct11', 'nov11',
          'dec11', 'jan12', 'jul12', 'aug12', 'sep12', 'oct12', 'nov12']
jmask = np.isin(full_months, jmonths)

testmonths = ['nov05', 'nov06', 'sep07', 'oct08', 'oct09', 'oct10', 'nov11', 'sep12']
testmask = np.isin(full_months, testmonths)

### set up month tick details ###
month_info = fits.open('Images/Convolving_Images/monthly_numbers.fits')[1].data #get month count data
full_months = month_info['Month'] #extract month nanes
tick_inds = np.load('Images/Convolving_Images/tick_inds_K.npy') #load tick locations
Beispiel #49
0
    print(sim)
    slopes, concentrations, mvirs = [], [], []

    # get z=0 properties (to limit computation)
    subs_0 = pd.read_pickle('derived_props/' + sim)

    scale_list = list_of_scales('elvis', sim)
    scale_list = scale_list[scale_list > 1 / (1 + 0.4)]
    for a_start in scale_list[:1]:
        # grab z=a_start properties
        # print("Computing for "+str(a_start))
        subs_z_full = get_halos_at_scale_elvis(sim, a_start)
        nhosts = 2 if '&' in sim else 1
        haloIDs = list(subs_z_full.index.values[0:nhosts])
        subs_z, halos_z = subs_z_full.drop(haloIDs), subs_z_full.loc[haloIDs]
        subs_z = subs_z[np.isin(subs_z.index, subs_0.index)]
        assert len(subs_z) == len(subs_0)
        subs_z['hostID'] = subs_0['hostID']

        # only include subhalos of main halo at z=a_start
        subs_z = subs_z[subs_z.pID == halos_z.loc[subs_z['hostID']].zID.values]

        # center, convert to spherical
        subs_z = center_on_hosts(hosts=halos_z, subs=subs_z)
        subs_z.x, subs_z.y, subs_z.z = subs_z.x * Mpc2km, subs_z.y * Mpc2km, subs_z.z * Mpc2km
        subs_z = compute_spherical_hostcentric_sameunits(df=subs_z)
        subs_z.x, subs_z.y, subs_z.z = subs_z.x * km2kpc, subs_z.y * km2kpc, subs_z.z * km2kpc
        subs_z.r = subs_z.r * km2kpc
        assert (subs_z.r < halos_z.loc[subs_z['hostID']].Rvir.values).all()

        # set up for finding new accretion time if necessary
from sklearn.ensemble import GradientBoostingRegressor
from scipy.stats import spearmanr
from joblib import dump, load
import re


metadata = pd.read_feather("../../metadata/lake_metadata_full.feather")
metadata.set_index('site_id', inplace=True)
glm_all_f = pd.read_csv("../../results/glm_transfer/RMSE_transfer_glm_pball.csv")
train_lakes = [re.search('nhdhr_(.*)', x).group(1) for x in np.unique(glm_all_f['target_id'].values)]
train_lakes_wp = np.unique(glm_all_f['target_id'].values) #with prefix

ids = pd.read_csv('../../metadata/pball_site_ids.csv', header=None)
ids = ids[0].values
n_lakes = len(train_lakes)
test_lakes = ids[~np.isin(ids, train_lakes)]
assert len(test_lakes) == 305

output_to_file = True

save_file_path = "../../results/pgmtl_results_ens_9source.csv"

#########################################################################################
#paste features found in "pbmtl_feature_selection.py" here
feats = ['n_obs_sp', 'n_obs_su', 'dif_max_depth', 'dif_surface_area',
       'dif_glm_strat_perc', 'perc_dif_max_depth', 'perc_dif_surface_area',
       'perc_dif_sqrt_surface_area']

#paste "k" value found in findEnsembleK.py here
k = 9
###################################################################################
def calcular_incentivos_y_costos_TMK(OutPut, ipc, tipincent, piva, pica, pgmf, mesanual):
    # start_time = time.time()
    # IDs de tipos de oferta
    tiposDeOferta = [1, 5, 6, 8, 12, 13]

    # determinar cuantos años han transcurrido
    OutPut['yipc'] = np.where(
        np.isin(OutPut['Id_T.Oferta'], tiposDeOferta),  # Si el tipo de oferta es hall, o Garantìa extendida
        (OutPut['TEMP_numeromes'] - 1) / 12,
        0
    )
    OutPut['yipc'] = np.floor(OutPut['yipc'])
    OutPut['yipc'] = OutPut['yipc'].fillna(0)

    OutPut['incentp'] = np.where(
        np.isin(OutPut['Id_T.Oferta'], tiposDeOferta),  # Si el tipo de oferta es hall, o Garantìa extendida,
        np.where(
            OutPut['Id_T.Prima'] == 1,  # Pregunta el tipo de prima, si es mensual:
            np.where(
                OutPut['¿Aplica IPC?'] == 'Si',  # Afectar el valor de la prima promedio por el IPC
                OutPut['% incentivo'] * (OutPut['Vlr. Prima Prom'] * ((1 + ipc) ** OutPut['yipc'])) * OutPut['nuevos'],  # Incentivo pagado
                OutPut['% incentivo'] * OutPut['Vlr. Prima Prom'] * OutPut['nuevos']  # Incentivo pagado
            ),
            np.where(
                OutPut['Id_T.Prima'] == 2,  # Si es anual
                np.where(
                    OutPut['¿Aplica IPC?'] == 'Si',  # Afectar el valor de la prima promedio por el IPC
                    OutPut['% incentivo'] * ((OutPut['Vlr. Prima Prom'] * ((1 + ipc) ** OutPut['yipc'])) / 12) * OutPut['nuevos'],
                    OutPut['% incentivo'] * (OutPut['Vlr. Prima Prom'] / 12) * OutPut['nuevos']
                ),
                # Si es ùnica
                np.where(
                    OutPut['¿Aplica IPC?'] == 'Si',  # Afectar el valor de la prima promedio por el IPC
                    OutPut['% incentivo'] * ((OutPut['Vlr. Prima Prom'] * ((1 + ipc) ** OutPut['yipc'])) / OutPut['Duración']) * OutPut['nuevos'],
                    OutPut['% incentivo'] * (OutPut['Vlr. Prima Prom'] / OutPut['Duración']) * OutPut['nuevos']
                ),
            )
        ),
        0
    )
    OutPut['incentp'] = OutPut['incentp'].fillna(0)

    OutPut['ipctmk'] = np.where(
        np.isin(OutPut['Id_T.Oferta'], tiposDeOferta) == False,
        (OutPut['TEMP_numeromes'] + mesanual - 2) / 12,
        0
    )
    OutPut['ipctmk'] = np.floor(OutPut['ipctmk'])
    OutPut['ipctmk'] = OutPut['ipctmk'].fillna(0)

    OutPut['tmkCost'] = np.where(
        np.isin(OutPut['Id_T.Oferta'], tiposDeOferta) == False,
        (OutPut['C/U. Venta TMKT'] * ((1 + ipc) ** OutPut['ipctmk']) * OutPut['nuevos']),
        0
    )
    OutPut['tmkCost'] = OutPut['tmkCost'].fillna(0)

    # Incentivos amortizados
    # OutPut['TEMP_upr-1'] = np.where( (OutPut['TEMP_numeromes'] <= 12), 0, OutPut.groupby(['TEMP_key_numeromeses'])['upr'].sum()['(' + (OutPut['Id_Tool']).astype(str)  + ', ' + (OutPut['TEMP_numeromes']-1).astype(str) + ')'] )
    OutPut['TEMP_upr-1'] = OutPut['upr'].shift(1)
    # Solo aplica para Nuevos
    OutPut['incent'] = np.where(
        OutPut['Tipo Proyección'] == 'Nuevo',
        np.where(
            OutPut['TEMP_numeromes'] == 1,
            OutPut['incentp'] - (((OutPut['upr'] + OutPut['gwpn']) / OutPut['Duración']) * OutPut['% incentivo']),
            OutPut['incentp'] - (((OutPut['upr'] + OutPut['gwpn']) / OutPut['Duración']) * OutPut['% incentivo']) + (((OutPut['TEMP_upr-1'] + OutPut['gwpn']) / OutPut['Duración']) * OutPut['% incentivo'])
        ),
        0
    )
    OutPut['incent'] = OutPut['incent'].fillna(0)

    # VAT de incentivos y de costos de TMKT
    OutPut['vatincent'] = np.where(
        np.isin(OutPut['Id_T.Oferta'], tiposDeOferta),  # Si el tipo de oferta es hall, o Garantìa extendida
        np.where(
            tipincent == "Pagados",
            OutPut['% VAT'] * OutPut['% Com Non'] * OutPut['incentp'] * piva,  # VAT pagado de incentivos
            OutPut['% VAT'] * OutPut['% Com Non'] * OutPut['incent'] * piva  # VAT amortizado de incentivos
        ),
        0
    )
    OutPut['vatincent'] = OutPut['vatincent'].fillna(0)

    OutPut['vatmk'] = np.where(
        np.isin(OutPut['Id_T.Oferta'], tiposDeOferta) == False,
        OutPut['% VAT'] * OutPut['% Com Non'] * OutPut['tmkCost'] * piva,
        0
    )
    OutPut['vatmk'] = OutPut['vatmk'].fillna(0)

    # Calculo de ica y 4x1000
    OutPut['ica'] = np.where(
        (OutPut['gwp'] * pica) > 0,
        OutPut['gwp'] * pica,
        0
    )
    OutPut['ica'] = OutPut['ica'].fillna(0)

    OutPut['gmf'] = np.where(
        (OutPut['gwp'] * pgmf) > 0,
        OutPut['gwp'] * pgmf,
        0
    )
    OutPut['gmf'] = OutPut['gmf'].fillna(0)

    #print("\n\n calcular_incentivos_y_costos_TMK \n--- %s seconds ---" % (time.time() - start_time))
    return OutPut
Beispiel #52
0
            print('Status Tree not present.  Returning Error.')
            print('\nError in %s' % inspect.stack()[0][3])
            print(e)
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            print(exc_type, fname, exc_tb.tb_lineno)
            sys.exit(1)
        filename = createFile(
            reader
        )  #Creates an analysis file if one does not exist.  Returns filename to load file.
        if filename is not None:
            with h5py.File(filename, 'a') as file:
                eventids = file['eventids'][...]
                dsets = list(file.keys())  #Existing datasets

                if not numpy.isin('time_delays', dsets):
                    print(
                        'time delays not present to perform impulsivity in run %i'
                        % run)
                    file.close()
                else:

                    if not numpy.isin('impulsivity', dsets):
                        file.create_group('impulsivity')
                    else:
                        print('impulsivity group already exists in file %s' %
                              filename)

                    time_delays_dsets = list(file['time_delays'].keys())
                    impulsivity_dsets = list(file['impulsivity'].keys())
Beispiel #53
0
def loaders(dataset,
            path,
            batch_size,
            num_workers,
            transform_train,
            transform_test,
            use_validation=True,
            val_size=5000,
            split_classes=None,
            shuffle_train=True):

    regression_problem = False
    try:
        ds = getattr(torchvision.datasets, dataset)
    except:
        if dataset == 'toy_regression':
            ds = regression_data.generate_toy_problem
            regression_problem = True
        if dataset == 'boston':
            ds = regression_data.generate_boston
            regression_problem = True

    path = os.path.join(path, dataset.lower())
    train_set = ds(root=path,
                   train=True,
                   download=True,
                   transform=transform_train)

    if use_validation:
        print("Using train (" + str(len(train_set.train_data) - val_size) +
              ") + validation (" + str(val_size) + ")")
        train_set.train_data = train_set.train_data[:-val_size]
        train_set.train_labels = train_set.train_labels[:-val_size]

        test_set = ds(root=path,
                      train=True,
                      download=True,
                      transform=transform_test)
        test_set.train = False
        test_set.test_data = test_set.train_data[-5000:]
        test_set.test_labels = test_set.train_labels[-5000:]
        delattr(test_set, 'train_data')
        delattr(test_set, 'train_labels')
    else:
        print('You are going to run models on the test set. Are you sure?')
        test_set = ds(root=path,
                      train=False,
                      download=True,
                      transform=transform_test)

    if split_classes is not None:
        assert dataset == 'CIFAR10'
        assert split_classes in {0, 1}

        print('Using classes:', end='')
        print(c10_classes[split_classes])
        train_mask = np.isin(train_set.train_labels,
                             c10_classes[split_classes])
        train_set.train_data = train_set.train_data[train_mask, :]
        train_set.train_labels = np.array(train_set.train_labels)[train_mask]
        train_set.train_labels = np.where(
            train_set.train_labels[:, None] == c10_classes[split_classes][
                None, :])[1].tolist()
        print('Train: %d/%d' %
              (train_set.train_data.shape[0], train_mask.size))

        test_mask = np.isin(test_set.test_labels, c10_classes[split_classes])
        test_set.test_data = test_set.test_data[test_mask, :]
        test_set.test_labels = np.array(test_set.test_labels)[test_mask]
        test_set.test_labels = np.where(
            test_set.test_labels[:, None] == c10_classes[split_classes][
                None, :])[1].tolist()
        print('Test: %d/%d' % (test_set.test_data.shape[0], test_mask.size))

    num_classes = max(train_set.train_labels) + 1
    if regression_problem:
        num_classes = 0

    return \
        {
            'train': torch.utils.data.DataLoader(
                train_set,
                batch_size=batch_size,
                shuffle=True and shuffle_train,
                num_workers=num_workers,
                pin_memory=True
            ),
            'test': torch.utils.data.DataLoader(
                test_set,
                batch_size=batch_size,
                shuffle=False,
                num_workers=num_workers,
                pin_memory=True
            ),
        }, \
        num_classes
Beispiel #54
0
def vectorize(da, var_ds, indice_name, slice_mode=None):
    ds = xr.decode_cf(da)

    time_value = ds.time.values
    year = ds.groupby(ds['time.year']).groups
    yearA = np.array([*year])

    i = 0
    season = ["MAM", "JJA", "SON"]
    centroid_time = []
    time_bnds = []
    time_start = datetime.datetime.now()

    if slice_mode in season:

        seasonA = ds.groupby(ds['time.season']).groups[slice_mode]
        for season_year in yearA:
            s_y = np.array(year[season_year])
            mask = np.isin(s_y, seasonA)
            interval_season = time_value[s_y[mask]]

            if i == 0:
                dataA = np.zeros(
                    (len(yearA), len(interval_season), len(ds.coords['lat']),
                     len(ds.coords['lon'])))
                time2compute = np.arange(len(interval_season))

            centroid_time.append(
                ds.time.sel(time=slice(interval_season[0],
                                       interval_season[-1])).mean())
            #time_num = cftime.date2num(time_interval, da.time.units, calendar=da.time.calendar)
            #centroid_time[i] = np.mean(time_num, da.time.units, calendar=da.time.calendar)[0]
            time_bnds.append([interval_season[0], interval_season[-1]])
            #data[str(centroid_time[i])] = ds[indice_name].sel(time=slice(interval_season[0], interval_season[-1])).values
            dataA[i, :] = ds[indice_name].sel(
                time=slice(interval_season[0], interval_season[-1])).values
            i += 1

    elif slice_mode == 'month':

        month_iteration = 0
        list_month = []
        time2compute = np.arange(31)
        for year_i in yearA:

            year_interval = [year[year_i][0], year[year_i][-1]]
            year_boundary = da.time.values[year_interval]
            da_subset = da.sel(time=slice(year_boundary[0], year_boundary[1]))
            ds_year = xr.decode_cf(da_subset)
            monthA = ds_year.groupby(ds_year['time.month']).groups

            for month in monthA:

                dat = np.zeros(
                    (31, len(ds.coords['lat']), len(ds.coords['lon'])))
                dat[:] = np.nan
                dat[np.arange(len(monthA[month])), :] = ds_year[
                    indice_name].values[monthA[month], :]
                list_month.append(dat)

                time_bnds.append([
                    da_subset.time.values[monthA[month]][0],
                    da_subset.time.values[monthA[month]][-1]
                ])
                centroid_time.append(np.mean(time_bnds[month_iteration]))

                month_iteration += 1

        dataA = np.asarray(list_month)

    lon = ds.lon.values
    lat = ds.lat.values

    #data = xr.Dataset({indice_name: (['time','time2compute','lat','lon'],dataA), 'time_bnds': (['time','bnds'], time_bnds)},coords={'time': centroid_time,'time2compute': time2compute,'lon': lon,'lat': lat})

    data = xr.Dataset(
        {
            indice_name: (['time', 'time2compute', 'lat', 'lon'], dataA),
            'time_bnds': (['time', 'bnds'], time_bnds)
        },
        coords={
            'time': centroid_time,
            'time2compute': time2compute,
            'lon': lon,
            'lat': lat
        })
    time_end = datetime.datetime.now()
    print('delta_time: ' + str(time_end - time_start))
    return data
Beispiel #55
0
Datei: dssm.py Projekt: Hins/dssm
        tf.global_variables_initializer().run()
        train_writer = tf.summary.FileWriter(
            cfg.summaries_dir + cfg.train_summary_writer_path, sess.graph)
        test_writer = tf.summary.FileWriter(
            cfg.summaries_dir + cfg.test_summary_writer_path, sess.graph)

        # load previous model to predict
        if os.path.exists(cfg.dssm_model_path + ".meta") == True:
            dssm_model = tf.train.import_meta_graph(cfg.dssm_model_path +
                                                    '.meta')
            dssm_model.restore(sess, cfg.dssm_model_path)
            for epoch_step in range(cfg.epoch_size):
                epoch_accuracy = 0.0
                for iter in range(cfg.iteration):
                    test_idx = iter % (sample_size / cfg.batch_size)
                    if np.isin(test_idx, train_index_list) == False:
                        real_prob = dssm_model.predict(test_idx)
                        print(real_prob.shape)
            sys.exit()

        # use the bigger one as iteration
        trainable = False
        for epoch_step in range(cfg.epoch_size):
            epoch_loss = 0.0
            for iter in range(iteration):
                train_idx = iter % (sample_size / cfg.batch_size)
                # if np.isin(train_idx, train_index_list) == True:
                if trainable == True:
                    tf.get_variable_scope().reuse_variables()
                trainable = True
                if iter % 100 == 0:
Beispiel #56
0
PS = np.zeros(D.ntot)
CCGP = []  #np.zeros((D.ntot, 100))
out_corr = []
d = np.zeros((n_compute, D.ntot))
pos_conds = []
for i, pos in tqdm(enumerate(D)):
    pos_conds.append(pos)
    # print('Dichotomy %d...'%i)
    # parallelism
    PS[i] = D.parallelism(z[idx, :], cond, clf)

    # CCGP
    cntxt = D.get_uncorrelated(100)
    out_corr.append(
        np.array([[(2 * np.isin(p, c) - 1).mean() for c in cntxt]
                  for p in this_task.positives]))

    CCGP.append(D.CCGP(z[idx, :], cond, gclf, cntxt, twosided=True))

    # shattering
    d[:, i] = D.coloring(cond)

# dclf.fit(z[idx_trn,:], d[np.isin(idx, idx_trn),:], tol=1e-5, max_iter=5000)
dclf.fit(z[idx, :], d, tol=1e-5)

# z = inputs.numpy()
z = rep(inputs.float()).detach().numpy()
# z = this_exp.test_data[0].detach().numpy()
# z = linreg.predict(this_exp.test_data[0])@W1.T
idx = np.random.choice(z.shape[0], n_compute, replace=False)
def kfold_nonnegative_regression(target_rdm,
                                 model_rdms,
                                 regression_type='linear',
                                 n_splits=10,
                                 n_repeats=None,
                                 random_state=None):
    '''Non-negative least squares linear regression on RDMs with k-fold cross-validation.
    Parameters
    ----------
    target_rdm: your brain data RDM (n_samples x n_samples)
    model_rdms: your model layer RDMs (n_samples x n_samples x n_layers)
    n_splits: how many cross_validated folds
    n_repeats: how many times to perform k-fold splits
    random_state: used if you want to use a particular set of random splits
    Attributes
    ----------
    r : correlation between predicted and actual RDM
    coefficients : the coefficients across k-fold splits
    intercepts : the intercepts across k-fold splits
    '''
    n_items = target_rdm.shape[0]

    predicted_rdm = np.zeros(target_rdm.shape)
    predicted_sum = np.zeros(target_rdm.shape)
    predicted_count = np.zeros(target_rdm.shape)

    coefficients = []
    intercepts = []
    i, j = np.triu_indices(target_rdm.shape[0], k=1)
    if n_repeats == None:
        kf = KFold(n_splits=n_splits, shuffle=True, random_state=random_state)
    if n_repeats != None:
        kf = RepeatedKFold(n_splits=n_splits,
                           n_repeats=n_repeats,
                           random_state=random_state)

    for train_indices, test_indices in kf.split(list(range(n_items))):

        # indices for training and test cells of matrix
        test_idx = (np.isin(i, test_indices) | np.isin(j, test_indices))
        train_idx = ~test_idx

        # target data (excluding test_indices)
        y_train = target_rdm[i[train_idx], j[train_idx]]

        # model data (excluding test_indices)
        X_train = model_rdms[i[train_idx], j[train_idx], :]

        # test data (test_indices)
        X_test = model_rdms[i[test_idx], j[test_idx], :]

        # fit the regression model
        if regression_type == 'linear':
            regression = LinearRegression(fit_intercept=True, positive=True)
            regression.fit(X_train, y_train)
        if regression_type == 'elastic_net':
            regression = ElasticNet(alpha=1.0, l1_ratio=0, positive=True)
            regression.fit(X_train, y_train)

        # predict the held out cells
        # note that for a k-fold procedure, some cells are predicted more than once
        # so we keep a sum and count, and later will average (sum/count) these predictions
        predicted_sum[i[test_idx], j[test_idx]] += regression.predict(X_test)
        predicted_count[i[test_idx], j[test_idx]] += 1

        # save the regression coefficients
        coefficients.append(regression.coef_)
        intercepts.append(regression.intercept_)

    predicted_rdm = predicted_sum / predicted_count
    coefficients = np.stack(coefficients)
    intercepts = np.stack(intercepts)

    # make sure each cell received one value
    cell_counts = predicted_count[np.triu_indices(target_rdm.shape[0], k=1)]
    assert cell_counts.min(
    ) >= 1, "A cell of the predicted matrix contains less than one value."

    # compute correlation between target and predicted upper triangle
    target = target_rdm[np.triu_indices(target_rdm.shape[0], k=1)]
    predicted = predicted_rdm[np.triu_indices(predicted_rdm.shape[0], k=1)]

    r = pearsonr(target, predicted)[0]

    return r, coefficients, intercepts
Beispiel #58
0
def generateGeotiffs(inDir, outDir, BRDF_Dir, exportOriginalDNB=False):

    # ----------------------------------------------------------------------------------------------------------

    #BRDF_Dir='/home/leonidas/Documents/phd/projects/Black_Marble/data/BRDF_vnp43ma4v001_h19v05'
    # inDir='/home/leonidas/Documents/phd/projects/Black_Marble/data/BlackMarble_VNP46A1'
    # outDir='/home/leonidas/Documents/phd/projects/MedianShift/data/geotiffs_tmp'
    # exportOriginalDNB=True

    resolution = 500

    # ----------------------------------------------------------------------------------------------------------
    xres = 527
    yres = 645

    xmin = 369503
    ymin = 4019222
    xmax = 633003
    ymax = 4341722

    greek_def = geometry.AreaDefinition(
        'ggrs', 'Greek Grid', 'ggrs', {
            'x_0': '500000',
            'y_0': '0',
            'lat_0': '0',
            'k': '0.9996',
            'lon_0': '24',
            'proj': 'tmerc',
            'ellps': 'GRS80',
            'units': 'm'
        }, xres, yres, [xmin, ymin, xmax, ymax])

    #os.chdir(inDir)
    if not os.path.exists(outDir): os.makedirs(outDir)
    #import re
    pattern = 'VNP46A1.A2018*.*h5'
    hdf_files = glob.glob(os.path.join(inDir, pattern))
    #hdf_files = [os.path.join(inDir,f) for f in os.listdir(inDir) if re.search(r'(^VNP46A1.A201[7-9](15[2-9]|16[0-9]|17[0-9]|18[0-9]|19[0-9]|20[0-9]|21[0-9]|22[0-9]|23[0-9]|24[0-3]).*\.h5$)', f)]
    hdf_files.sort()

    for hdf in hdf_files:
        #hdf=hdf_files[0]
        inFile = os.path.basename(hdf)
        print(inFile)

        jd = julian_days(inFile.split(".")[1])
        julian_day = jd['julian_day']

        BRDF_files = glob.glob("{}/VNP43MA4.{}.*.h5".format(
            BRDF_Dir, julian_day))
        if len(
                BRDF_files
        ) == 0:  #σε περίπτωση που δεν βρεθεί αντίστοιχο αρχείο BRDF, πήγαινε στο επόμενο VNP46A1
            continue
        BRDF_file = BRDF_files[0]
        outName = inFile.rsplit(
            '.', 1)[0]  # Parse out the file extension, keep file name

        tiff_files = glob.glob(os.path.join(outDir,
                                            "{}.*.tif".format(outName)))
        #print(tiff_files)
        #print(outDir)
        if len(tiff_files) > 0:
            print("Geotiff already exist: {}".format(outName))
            continue

        ymd = datetime.datetime.strptime(inFile.split(".")[1][1:8],
                                         '%Y%j').date().strftime('%Y%m%d')

        brdf = BRDF_data(BRDF_file, greek_def)

        with h5py.File(hdf, 'r') as hdf:
            #hdf=  h5py.File(hdf, 'r')
            try:

                fileMetadata = hdf['HDFEOS']['GRIDS']['VNP_Grid_DNB'].attrs
                lrcLon = fileMetadata['EastBoundingCoord'][0]
                ulcLon = fileMetadata['WestBoundingCoord'][0]
                ulcLat = fileMetadata['NorthBoundingCoord'][0]
                lrcLat = fileMetadata['SouthBoundingCoord'][0]

                # ================== DNB ===============================
                #At-sensor DNB radiance, nW·cm-2·sr-1 (16-bit unsigned integer )
                scale_factor = 0.1  #DNB scale factor
                #DNB_fillvalue=65535 δεν χρειάζεται να κάνω masking με το fillvalue γιατί κάτά το export με το geotiff σε band.SetNoDataValue(fill_value) τα pixels με 65535 γίνονται Nodata
                DNB = hdf['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/'][
                    'DNB_At_Sensor_Radiance_500m'][...] * scale_factor

                # ================== LZA ===============================
                LZ_fillvalue = -32768
                LZ_scale_factor = 0.01
                Lunar_Zenith = hdf['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/'][
                    'Lunar_Zenith'][...] * LZ_scale_factor
                LZ_mask = np.isin(Lunar_Zenith, LZ_fillvalue)

                # ================== QF_DNB ===============================
                #QF_DNB_fillvalue=65535
                QF_DNB = hdf['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/'][
                    'QF_DNB'][...]
                mask_QF_DNB = np.isin(
                    QF_DNB, np.array([1, 2, 4, 8, 16, 256, 512, 1024, 2048]))

                # ================== QF_Cloud_Mask ===============================
                QF_Cloud_Mask = hdf['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/'][
                    'QF_Cloud_Mask'][...]
                #DNB_mask = np.isin(DNB, DNB_fillvalue)

                Cloud_Detection_Results_Confidence_Indicator = (QF_Cloud_Mask
                                                                & 192) >> 6
                mask_Cloud_Detection_Results_Confidence_Indicator = np.logical_not(
                    np.isin(Cloud_Detection_Results_Confidence_Indicator,
                            np.array([0])))

                Shadow = (QF_Cloud_Mask & 256) >> 8
                mask_Shadow = np.isin(Shadow, np.array([1]))

                Snow_Ice_Surface = (QF_Cloud_Mask & 1024) >> 10
                mask_Snow_Ice_Surface = np.isin(Snow_Ice_Surface,
                                                np.array([1]))

                Land_Water_Background = (QF_Cloud_Mask & 14) >> 1
                mask_Land_Water_Background = np.isin(
                    Land_Water_Background,
                    np.array([2, 3]))  # Inland and Sea Water

                # ================== UTC_time ===============================
                UTC_time_fillvalue = -999.9
                UTC_time = hdf['HDFEOS/GRIDS/VNP_Grid_DNB/Data Fields/'][
                    'UTC_Time'][...]
                UTC_time_mask = np.isin(UTC_time, UTC_time_fillvalue)
                UTC_time_elvidge = np.copy(UTC_time)

                # ================== Combine all Masks ===============================
                Mask = np.logical_or.reduce(
                    (LZ_mask, UTC_time_mask, mask_Land_Water_Background,
                     mask_Cloud_Detection_Results_Confidence_Indicator,
                     mask_Shadow, mask_Snow_Ice_Surface, mask_QF_DNB))

                # ================== apply fill_value to masked pixels ================
                fill_value = 65535
                DNB[Mask] = fill_value
                UTC_time[Mask] = UTC_time_fillvalue
                Lunar_Zenith[Mask] = LZ_fillvalue

                DNB = ma.masked_array(DNB, mask=(Mask))
                UTC_time = ma.masked_array(UTC_time, mask=(Mask))
                Lunar_Zenith = ma.masked_array(Lunar_Zenith, mask=(Mask))

                # ================== Export to Geotiffs ===============================
                #export DNB as geotiff

                XDim, YDim, = 2400, 2400
                #https://pyresample.readthedocs.io/en/latest/geo_def.html
                wgs84_def = geometry.AreaDefinition(
                    'WGS 84"', 'WGS_1984', 'WGS 84',
                    "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs", XDim,
                    YDim, [ulcLon, lrcLat, lrcLon, ulcLat])

                DNB_2100 = reproject(DNB, wgs84_def, greek_def)
                DNB_2100_ma = ma.masked_array(
                    DNB_2100.image_data, mask=(DNB_2100.image_data == 65535))
                Lunar_Zenith_2100 = reproject(Lunar_Zenith, wgs84_def,
                                              greek_def)
                UTC_time_2100 = reproject(UTC_time, wgs84_def,
                                          greek_def).image_data
                UTC_time_elvidge_2100 = reproject(UTC_time_elvidge, wgs84_def,
                                                  greek_def)
                UTC_time_2100_filtered = UTC_time_2100.compressed(
                )  #UTC_time_2100[UTC_time_2100>=0] #avoid fill value -999.9 #compressed: Return all the non-masked data as a 1-D array.

                print("Unique hours Values:{}".format(
                    np.unique(UTC_time_2100_filtered.astype('int'))))

                # if np.unique(UTC_time_2100_filtered.astype('int')).size==1: #αν τα δεδομένα δεν σπάνε σε δύο βραδιές

                #     mt_time_format = modeUTC_time(UTC_time_2100_filtered, ymd)

                #     DNB_corrected = correctDNB(mt_time_format,DNB_2100_ma, Lunar_Zenith_2100.image_data, brdf )
                #     #export DNB
                #     mydate="{}-{}".format(jd['year'],jd['julian_day_str'])
                #     geotiff(outDir,
                #             "{}.DNB_{}_#{}#".format(outName,mt_time_format, mydate),
                #             resolution,
                #             fill_value,
                #             DNB_2100,
                #             np.ma.filled(DNB_corrected, fill_value=fill_value))
                #     geotiff(outDir,
                #             "{}.DNB_{}_#{}#_time".format(outName,mt_time_format, mydate),
                #             resolution,
                #             fill_value,
                #             DNB_2100,
                #             UTC_time_elvidge_2100.image_data)
                #     if exportOriginalDNB:
                #         geotiff(outDir,
                #             "{}.original_DNB_{}_#{}#".format(outName,mt_time_format, mydate),
                #             resolution,
                #             fill_value,
                #             DNB_2100,
                #             np.ma.filled(DNB_2100_ma, fill_value=fill_value))

                # else: # αν τα δεδομένα σπάνε σε δυο βραδίες
                for i in np.unique(UTC_time_2100_filtered.astype('int')):
                    hour_limit = 20
                    if i > hour_limit:  #current date
                        #apply filter only for >20
                        mydate = "{}-{}".format(jd['year'],
                                                jd['julian_day_str'])
                        mt_time_format = modeUTC_time(
                            UTC_time_2100_filtered[
                                UTC_time_2100_filtered > hour_limit], ymd)
                        DNB_2100_ma_by_time = ma.masked_array(
                            DNB_2100_ma,
                            mask=(UTC_time_2100.astype('int') < hour_limit))
                        UTC_time_elvidge_2100_ma_by_time = ma.masked_array(
                            UTC_time_elvidge_2100.image_data,
                            mask=(
                                UTC_time_elvidge_2100.image_data.astype('int')
                                < hour_limit))

                    else:  #previous date
                        #apply filter only for <20
                        mt_time_format = modeUTC_time(
                            UTC_time_2100_filtered[
                                UTC_time_2100_filtered < hour_limit], ymd)
                        DNB_2100_ma_by_time = ma.masked_array(
                            DNB_2100_ma,
                            mask=(UTC_time_2100.astype('int') > hour_limit))
                        UTC_time_elvidge_2100_ma_by_time = ma.masked_array(
                            UTC_time_elvidge_2100.image_data,
                            mask=(
                                UTC_time_elvidge_2100.image_data.astype('int')
                                > hour_limit))
                        mydate = "{}-{}".format(jd['previous_day_year'],
                                                jd['previous_julian_day_str'])

                    DNB_corrected = correctDNB(mt_time_format,
                                               DNB_2100_ma_by_time,
                                               Lunar_Zenith_2100.image_data,
                                               brdf)

                    geotiff(
                        outDir,
                        "{}.DNB_{}_#{}#".format(outName, mt_time_format,
                                                mydate), resolution,
                        fill_value, DNB_2100,
                        np.ma.filled(DNB_corrected, fill_value=fill_value))
                    geotiff(
                        outDir, "{}.DNB_{}_#{}#_time".format(
                            outName, mt_time_format,
                            "{}-{}".format(jd['year'], jd['julian_day_str'])),
                        resolution, fill_value, DNB_2100,
                        np.ma.filled(UTC_time_elvidge_2100_ma_by_time,
                                     fill_value=fill_value))
                    if exportOriginalDNB:
                        geotiff(
                            outDir, "{}.original_DNB_{}_#{}#".format(
                                outName, mt_time_format, mydate), resolution,
                            fill_value, DNB_2100,
                            np.ma.filled(DNB_2100_ma_by_time,
                                         fill_value=fill_value))

            except Exception as e:
                print(e)
                print("An exception occurred.File:{}".format(str(inFile)))
                f = open('errors.txt', 'a')
                f.write("{}\n".format(
                    str(inFile)))  # python will convert \n to os.linesep
                f.close()
            finally:
                print("Done")
    def from_qm9_pretrained(root, dataset, target):
        if spk is None:
            raise ImportError(
                '`SchNet.from_qm9_pretrained` requires `schnetpack`.')

        assert target >= 0 and target <= 12

        units = [1] * 12
        units[0] = ase.units.Debye
        units[1] = ase.units.Bohr**3
        units[5] = ase.units.Bohr**2

        root = osp.expanduser(osp.normpath(root))
        makedirs(root)
        folder = 'trained_schnet_models'
        if not osp.exists(osp.join(root, folder)):
            path = download_url(SchNet.url, root)
            extract_zip(path, root)
            os.unlink(path)

        name = f'qm9_{qm9_target_dict[target]}'
        path = osp.join(root, 'trained_schnet_models', name, 'split.npz')

        split = np.load(path)
        train_idx = split['train_idx']
        val_idx = split['val_idx']
        test_idx = split['test_idx']

        # Filter the splits to only contain characterized molecules.
        idx = dataset.data.idx
        assoc = idx.new_empty(idx.max().item() + 1)
        assoc[idx] = torch.arange(idx.size(0))

        train_idx = assoc[train_idx[np.isin(train_idx, idx)]]
        val_idx = assoc[val_idx[np.isin(val_idx, idx)]]
        test_idx = assoc[test_idx[np.isin(test_idx, idx)]]

        path = osp.join(root, 'trained_schnet_models', name, 'best_model')

        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            state = torch.load(path, map_location='cpu')

        net = SchNet(hidden_channels=128, num_filters=128, num_interactions=6,
                     num_gaussians=50, cutoff=10.0,
                     atomref=dataset.atomref(target))

        net.embedding.weight = state.representation.embedding.weight

        for int1, int2 in zip(state.representation.interactions,
                              net.interactions):
            int2.mlp[0].weight = int1.filter_network[0].weight
            int2.mlp[0].bias = int1.filter_network[0].bias
            int2.mlp[2].weight = int1.filter_network[1].weight
            int2.mlp[2].bias = int1.filter_network[1].bias
            int2.lin.weight = int1.dense.weight
            int2.lin.bias = int1.dense.bias

            int2.conv.lin1.weight = int1.cfconv.in2f.weight
            int2.conv.lin2.weight = int1.cfconv.f2out.weight
            int2.conv.lin2.bias = int1.cfconv.f2out.bias

        net.lin1.weight = state.output_modules[0].out_net[1].out_net[0].weight
        net.lin1.bias = state.output_modules[0].out_net[1].out_net[0].bias
        net.lin2.weight = state.output_modules[0].out_net[1].out_net[1].weight
        net.lin2.bias = state.output_modules[0].out_net[1].out_net[1].bias

        mean = state.output_modules[0].atom_pool.average
        net.readout = 'mean' if mean is True else 'add'

        dipole = state.output_modules[0].__class__.__name__ == 'DipoleMoment'
        net.dipole = dipole

        net.mean = state.output_modules[0].standardize.mean.item()
        net.std = state.output_modules[0].standardize.stddev.item()

        if state.output_modules[0].atomref is not None:
            net.atomref.weight = state.output_modules[0].atomref.weight
        else:
            net.atomref = None

        net.scale = 1. / units[target]

        return net, (dataset[train_idx], dataset[val_idx], dataset[test_idx])
Beispiel #60
0
def sieve(mesh, area=None):
    """
    A mesh can consist of multiple separate subdomins on as single structure.
    This functions removes subdomains which are equal or smaller than the
    provided area. Default behaviours is to remove all subdomains except the
    largest one.
    """
    # select the nodes to remove based on multipolygon areas
    multipolygon = geom_to_multipolygon(mesh)
    areas = [polygon.area for polygon in multipolygon]
    if area is None:
        remove = np.where(areas < np.max(areas))[0].tolist()
    else:
        remove = list()
        for idx, patch_area in enumerate(areas):
            if patch_area <= area:
                remove.append(idx)

    # if the path surrounds the node, these need to be removed.
    vert2_mask = np.full((mesh.vert2['coord'].shape[0], ), False)
    for idx in remove:
        path = Path(multipolygon[idx].exterior.coords, closed=True)
        vert2_mask = vert2_mask | path.contains_points(mesh.vert2['coord'])

    # select any connected nodes; these ones are missed by
    # path.contains_point() because they are at the path edges.
    _node_neighbors = vertices_around_vertex(mesh)
    _idxs = np.where(vert2_mask)[0]
    for _idx in _idxs:
        vert2_mask[list(_node_neighbors[_idx])] = True

    # Also, there might be some dangling triangles without neighbors, which are
    # also missed by path.contains_point()
    for idx, neighbors in _node_neighbors.items():
        if len(neighbors) <= 2:
            vert2_mask[idx] = True

    # Mask out elements containing the unwanted nodes.
    tria3_mask = np.any(vert2_mask[mesh.tria3['index']], axis=1)

    # Renumber indexes ...
    # isolated node removal does not require elimination of triangles from
    # the table, therefore the length of the indexes is constant.
    # We must simply renumber the tria3 indexes to match the new node indexes.
    # Essentially subtract one, but going from the bottom of the index table
    # to the top.
    used_indexes = np.unique(mesh.tria3['index'])
    node_indexes = np.arange(mesh.vert2['coord'].shape[0])
    tria3_idxs = np.where(~np.isin(node_indexes, used_indexes))[0]
    tria3_IDtag = mesh.tria3['IDtag'].take(np.where(~tria3_mask)[0])
    tria3_index = mesh.tria3['index'][~tria3_mask, :].flatten()
    for idx in reversed(tria3_idxs):
        tria3_index[np.where(tria3_index >= idx)] -= 1
    tria3_index = tria3_index.reshape((tria3_IDtag.shape[0], 3))
    vert2_idxs = np.where(np.isin(node_indexes, used_indexes))[0]

    # update vert2
    mesh.vert2 = mesh.vert2.take(vert2_idxs, axis=0)

    # update value
    if len(mesh.value) > 0:
        mesh.value = mesh.value.take(vert2_idxs)

    # update tria3
    mesh.tria3 = np.array([(tuple(indices), tria3_IDtag[i])
                           for i, indices in enumerate(tria3_index)],
                          dtype=jigsaw_msh_t.TRIA3_t)