Ejemplo n.º 1
0
    def read_file(self, fn=None, header=0, g_inx=5, d_inx=6):
        ''' This method reads a csv file.

        PARAMETERS:

            fn: string - The name of the file to be read with path

            header: int - Number of header rows. 0 if no header, default.

        '''

        if fn == None:

            wr.warning('Filename could not be found.')

        else:
            if header == 0:

                self.dat = pd.read_csv(fn, header=None)

            else:

                self.dat = pd.read_csv(fn, skiprows=range(header), header=None)

        # Select rows global_list amd diffuse_list
        self.global_list = self.dat.iloc[:, g_inx].values
        self.diffuse_list = self.dat.iloc[:, d_inx].values
Ejemplo n.º 2
0
 def _unpack_args(self, num):
     warnings.warning('Deprecated and will be removed', DeprecationWarning)
     nargs = self.args
     if len(nargs) != 1:
         raise TypeError('{} directive expected exactly {} argument(s), '
                         'got {}'.format(self.name, num, nargs))
     return self.args
Ejemplo n.º 3
0
def feat_from_raw(raw):  # see features.py
    sampwidth = 2
    nchannels = 1
    nframes = len(raw) / sampwidth
    out = struct.unpack_from("%dh" % nframes * nchannels, raw)
    sig = np.reshape(np.array(out), (-1, nchannels)).squeeze()
    sig = sig.astype(np.float32)
    shp = sig.shape
    # wav should contain a single channel
    assert len(shp) == 1 or (len(shp) == 2 and shp[1] == 1)
    sig *= (2**(15 - sampwidth))

    with warnings.catch_warnings() as w:
        # ignore warnings resulting from empty signals parts
        warnings.filterwarnings('ignore',
                                message='divide by zero encountered in log',
                                category=RuntimeWarning,
                                module='sidekit')
        _, loge, _, mspec = mfcc(sig.astype(np.float32), get_mspec=True)

    # Management of short duration segments
    difflen = 0
    if len(loge) < 68:
        difflen = 68 - len(loge)
        warnings.warning(
            "media %s duration is short. Robust results require length of at least 720 milliseconds"
            % wavname)
        mspec = np.concatenate((mspec, np.ones((difflen, 24)) * np.min(mspec)))
        #loge = np.concatenate((loge, np.ones(difflen) * np.min(mspec)))

    return mspec, loge, difflen
Ejemplo n.º 4
0
    def append(self, tensor, check_unique=False):
        """
        Insert a new tensor at the end of the index.
        Be advised that this operation is linear on index size ($O(n)$).

        Parameters
        ----------
        tensor: numpy.ndarray or list
            A vector to insert into index.

        check_unique (optional, default: False): bool
            Defines if append method should verify the existence
            of a really similar tensor on the current index. In other words,
            it checks for the unicity of the value. Be advised that this check
            creates an overhead on the append process.
        """
        if sum(tensor) == 0.:
            raise NullTensorError

        if self._is_new_index:

            index_it = True

            if check_unique and len(self) > 1:

                self.tree.build(self.size << intmul >> self.trees)

                result = self.item(
                    self.index(tensor),
                    top=1,
                    distances=True
                )

                if result[1][0] <= .05:
                    warning(
                        'Tensor being indexed already exists in '
                        'the database and the check for duplicates '
                        'are on. Refusing to store again this tensor.'
                    )

                    index_it = False

                self.tree.unbuild()

            if index_it:
                self.tree.add_item(len(self), tensor)

        else:

            with Index(self.size, volatile=True, trees=self.trees) as tmp_idx:
                for value in self.values():
                    tmp_idx.append(value, check_unique)

                tmp_idx.append(tensor, check_unique)

                _temp_file = tmp_idx.path

            move(_temp_file, self.path)

            self.refresh()
Ejemplo n.º 5
0
    def set_block(self, row, col, value):
        assert row >= 0 and col >= 0, 'Indices must be positive'

        assert row < self.bshape[0] and col < self.bshape[
            1], 'Indices out of range'

        if value is None:
            self._blocks[row, col] = None
            self._block_mask[row, col] = False
        else:
            if isinstance(value, BaseBlockMatrix):
                assert_block_structure(value)
            elif isinstance(value, np.ndarray):
                if value.ndim != 2:
                    msg = 'blocks need to be sparse matrices or BlockMatrices'
                    raise ValueError(msg)
                msg = 'blocks need to be sparse matrices or BlockMatrices; a numpy array was given; copying the numpy array to a coo_matrix'
                logger.warning(msg)
                warnings.warning(msg)
                value = coo_matrix(value)
            else:
                assert isspmatrix(
                    value
                ), 'blocks need to be sparse matrices or BlockMatrices'

            nrows, ncols = value.shape
            self.set_row_size(row, nrows)
            self.set_col_size(col, ncols)
            self._blocks[row, col] = value
            self._block_mask[row, col] = True
Ejemplo n.º 6
0
	def fixed_get(self, key):
		import os
		import errno
		import warnings
		from webassets.cache import make_md5

		if not os.path.exists(self.directory):
			error_logger.warning("Cache directory {} doesn't exist, not going "
			                     "to attempt to read cache file".format(self.directory))
			return None

		try:
			hash = make_md5(self.V, key)
		except IOError as e:
			if e.errno != errno.ENOENT:
				raise
			return None

		filename = os.path.join(self.directory, '%s' % hash)
		try:
			f = io.open(filename, 'rb')
		except IOError as e:
			if e.errno != errno.ENOENT:
				error_logger.exception("Got an exception while trying to open webasset file {}".format(filename))
			return None
		try:
			result = f.read()
		finally:
			f.close()

		unpickled = webassets.cache.safe_unpickle(result)
		if unpickled is None:
			warnings.warning('Ignoring corrupted cache file %s' % filename)
		return unpickled
Ejemplo n.º 7
0
    def get_prepped_inputs(self, chunking, array_shuffle_seed=None, **kwargs):  # pylint: disable=too-many-locals
        sup = super(BaseClassSubsamplingSteppedInputsProvider, self)
        inputs = sup.get_prepped_data_label(chunking, **kwargs)

        # shuffle seeds for order of steps, and shuffling keeps
        if array_shuffle_seed is None:
            kseed, seed = None, None
        elif isinstance(array_shuffle_seed, (int, np.int_)):
            nr.seed(array_shuffle_seed)
            kseed, seed = nr.randint(41184535, size=2)

        keeps = self.keeping_decision(inputs, keep_seed=kseed, **kwargs)
        if keeps.shape[0] < 1:
            warning("Sub-sampling has resulted in zero-length selection, "
                    "ratios used: {} from given {}".format(
                        self.ratios, self._user_ratios))
            for _ in range(self.steps_per_chunk):
                # zero-length, but we honor the steps per chunk
                yield [i[keeps, ...] for i in inputs]

        starts, ends, aseed = self.se_for_chunksteps_maybeshuffled(
            len(keeps), shuffle_seed=seed, **kwargs)

        keeps = self.maybe_shuffle_array(keeps, aseed)
        for s, e in zip(starts, ends):
            yield [i[keeps[s:e], ...] for i in inputs]
Ejemplo n.º 8
0
    def se_for_chunksteps_maybeshuffled(self,
                                        len_input,
                                        shuffle_seed=None,
                                        **kwargs):  # pylint: disable=unused-argument
        # don't want to kill the training man
        if len_input < self.steps_per_chunk:
            warning("chunk size is smaller than steps_per_chunk: "
                    "{} v/s {}, will use the smaller value".format(
                        len_input, self.steps_per_chunk))
            spc = len_input
        else:
            spc = self.steps_per_chunk

        nsteps = len_input // spc

        starts = tuple(range(0, nsteps * spc, nsteps))
        ends = starts[1:] + (len_input, )

        # NOTE: I know that we can get pre-shuffled data from sup.get_prepped_inputs,
        # but ... This is to test an API, because ... there will be other implementations
        # that cannot shuffle the data as is ... like the ones that use striding tricks

        # shuffle seeds for order of steps, and shuffling data
        if shuffle_seed is None:
            oseed, aseed = None, None
        elif isinstance(shuffle_seed, (int, np.int_)):
            nr.seed(shuffle_seed)
            oseed, aseed = nr.randint(41184535, size=2)

        starts = self.maybe_shuffle_array(np.array(starts), oseed)
        ends = self.maybe_shuffle_array(np.array(ends), oseed)

        return starts, ends, aseed
Ejemplo n.º 9
0
def strip_html(html, include_metatags=True):
    try:
        html = recursively_decode_html_entities(html)
    except:
        e = sys.exc_info()[0]

        logging.warning(
            'Exception during recursively_decode_html_entities: %s', e)

    try:
        soup = bs4.BeautifulSoup(html, 'lxml')
    except:
        warnings.warning('lxml not found; unable to strip HTML.')

        return None

    # Remove javascript.
    [s.extract() for s in soup('script')]

    # Remove css.
    [s.extract() for s in soup('style')]

    content = []

    # First, extract meta tags.
    if include_metatags:
        content.extend(
            meta['content'] for meta in soup('meta')
            if 'content' in meta)

    # Add text content from the page.
    content.append(soup.get_text(' ', strip=True))

    return ' '.join(content)
Ejemplo n.º 10
0
def _maybe_promote_st(dtype):
    """
        Modified version of _maybe_promote found from xarray. This allows for ability to provide null values to
        ints and 'S1' datatype
    """
    # print('boobga')
    # N.B. these casting rules should match pandas
    if np.issubdtype(dtype, float):
        fill_value = np.nan
    elif np.issubdtype(dtype, int):
        # dtype = int
        fill_value = 0
    elif np.issubdtype(dtype, complex):
        fill_value = np.nan + np.nan * 1j
    elif np.issubdtype(dtype, np.datetime64):
        fill_value = np.datetime64('NaT')
    elif np.issubdtype(dtype, np.timedelta64):
        fill_value = np.timedelta64('NaT')
    elif np.issubdtype(dtype, 'S'):
        fill_value = ''  # fill with empty strings
    else:
        warnings.warning('CHECK THIS DATATYPE: ' + str(dtype))
        dtype = object
        fill_value = np.nan
    return np.dtype(dtype), fill_value
Ejemplo n.º 11
0
def strip_html(html, include_metatags=True):
    assert isinstance(html, str)

    try:
        html = recursively_decode_html_entities(html)
    except:
        logging.warning(
            'Exception during recursively_decode_html_entities: %s',
            sys.exc_info()[:2])

    try:
        soup = bs4.BeautifulSoup(html, 'lxml')
    except:
        warnings.warning('lxml not found; unable to strip HTML.')

        return None

    # Remove javascript.
    [s.extract() for s in soup('script')]

    # Remove css.
    [s.extract() for s in soup('style')]

    content = []

    # First, extract meta tags.
    if include_metatags:
        content.extend(meta['content'] for meta in soup('meta')
                       if 'content' in meta)

    # Add text content from the page.
    content.append(soup.get_text(' ', strip=True))

    return ' '.join(content)
Ejemplo n.º 12
0
def init_downsampling():

    global _ALREADY_APPLIED
    if _ALREADY_APPLIED:
        warnings.warning("Filtering to verify robustness should be applied "
                         "only once (init_downsampling was called multiple "
                         "times). This call has no effect.")
        return

    _ALREADY_APPLIED = True
    _draw_allowed_municipalities()
    _apply_downsampling_hooks()

    file_spec_part = "_robustness_s_%d_p_%f" % (ROBUSTNESS_SEED,
                                                ROBUSTNESS_PERCENTAGE)
    valid_in_num = "0123456789eE-abcdefghijklmnopqrstuvwxyz"
    file_spec_part = "".join(
        [k if k in valid_in_num else "_" for k in file_spec_part])
    add_output_dir_postfix(file_spec_part)

    if is_quiet():
        print("Downsampled to %d municipalities" %
              len(_ALLOWED_MUNICIPALITIES))
    else:
        print(
            "Allowed municipalities (%d): %s" %
            (len(_ALLOWED_MUNICIPALITIES), ",".join(_ALLOWED_MUNICIPALITIES)))
Ejemplo n.º 13
0
        def _reassociate(node, parent):
            if isinstance(node, (Symbol, Div)):
                return

            elif isinstance(node, Par):
                _reassociate(node.child, node)

            elif isinstance(node, (Sum, Sub, FunCall)):
                for n in node.children:
                    _reassociate(n, node)

            elif isinstance(node, Prod):
                children = explore_operator(node)
                # Reassociate symbols
                symbols = [n for n, p in children if isinstance(n, Symbol)]
                # Capture the other children and recur on them
                other_nodes = [(n, p) for n, p in children if not isinstance(n, Symbol)]
                for n, p in other_nodes:
                    _reassociate(n, p)
                # Create the reassociated product and modify the original AST
                children = zip(*other_nodes)[0] if other_nodes else ()
                children += tuple(sorted(symbols, key=reorder))
                reassociated_node = ast_make_expr(Prod, children, balance=False)
                parent.children[parent.children.index(node)] = reassociated_node

            else:
                warning('Unexpect node of type %s while reassociating', typ(node))
Ejemplo n.º 14
0
def test_oidc_config_fields(app, client):
    """
    Test that the configuration response at least contains the required fields.
    For fields which are recommended but not required, issue a warning.
    """
    response = client.get("/.well-known/openid-configuration")
    assert response.status_code == 200, response.data

    # Check for required fields.
    required_fields = [
        "issuer",
        "authorization_endpoint",
        "token_endpoint",
        "jwks_uri",
        "response_types_supported",
        "subject_types_supported",
        "id_token_signing_alg_values_supported",
    ]
    for field in required_fields:
        assert field in response.json

    # For recommended fields, warn if not contained in the response.
    recommended_fields = [
        "userinfo_endpoint",
        "registration_endpoint",
        "scopes_supported",
        "claims_supported",
    ]

    for field in recommended_fields:
        if field not in response.json:
            warnings.warning(
                "OIDC configuration response missing recommended field: " +
                field)
Ejemplo n.º 15
0
Archivo: cuba.py Proyecto: ninoc/pyigm
    def zinterp_jnu(self, zval, use_nearest=False):
        """Interpolate the Jnu grid at a given redshift

        Parameters
        ----------
        zval : float
          Redshift
        use_nearest : bool, optional
          Use nearest redshift instead??
        """
        # Do not interpolate beyond limits
        minz = np.min(self.z)
        maxz = np.max(self.z)
        if zval < minz:
            warn.warning('Input z was lower than z grid')
            print('Using z={:g}'.format(minz))
            return self.Jnu[:, 0].flatten()
        if zval > maxz:
            warn.warning('Input z was larger than z grid')
            print('Using z={:g}'.format(maxz))
            return self.Jnu[:, -1].flatten()

        # Find nearest? 
        if use_nearest:
            idx = np.argmin(np.abs(self.z-zval))
            return self.Jnu[:, idx].flatten()

        # Interpolate
        nval = self.energy.shape[0]
        jnu = np.zeros(nval)
        for ii in range(nval):
            jnu[ii] = interp1d(self.z, self.Jnu[ii, ])(zval)
        return jnu * self.Jnu.unit
Ejemplo n.º 16
0
    def zinterp_jnu(self, zval, use_nearest=False):
        """Interpolate the Jnu grid at a given redshift

        Parameters
        ----------
        zval : float
          Redshift
        use_nearest : bool, optional
          Use nearest redshift instead??
        """
        # Do not interpolate beyond limits
        minz = np.min(self.z)
        maxz = np.max(self.z)
        if zval < minz:
            warn.warning('Input z was lower than z grid')
            print('Using z={:g}'.format(minz))
            return self.Jnu[:, 0].flatten()
        if zval > maxz:
            warn.warning('Input z was larger than z grid')
            print('Using z={:g}'.format(maxz))
            return self.Jnu[:, -1].flatten()

        # Find nearest?
        if use_nearest:
            idx = np.argmin(np.abs(self.z - zval))
            return self.Jnu[:, idx].flatten()

        # Interpolate
        nval = self.energy.shape[0]
        jnu = np.zeros(nval)
        for ii in range(nval):
            jnu[ii] = interp1d(self.z, self.Jnu[ii, ])(zval)
        return jnu * self.Jnu.unit
Ejemplo n.º 17
0
def compute_fiducial(wcslist, bounding_box=None, domain=None):
    """
    For a celestial footprint this is the center.
    For a spectral footprint, it is the beginning of the range.

    This function assumes all WCSs have the same output coordinate frame.
    """
    if domain is not None:
        warnings.warning(
            "'domain' was deprecated in 0.8 and will be removed from next"
            "version. Use 'bounding_box' instead.")
    axes_types = wcslist[0].output_frame.axes_type
    spatial_axes = np.array(axes_types) == 'SPATIAL'
    spectral_axes = np.array(axes_types) == 'SPECTRAL'
    footprints = np.hstack(
        [w.footprint(bounding_box=bounding_box).T for w in wcslist])
    spatial_footprint = footprints[spatial_axes]
    spectral_footprint = footprints[spectral_axes]

    fiducial = np.empty(len(axes_types))
    if (spatial_footprint).any():
        lon, lat = spatial_footprint
        lon, lat = np.deg2rad(lon), np.deg2rad(lat)
        x_mean = np.mean(np.cos(lat) * np.cos(lon))
        y_mean = np.mean(np.cos(lat) * np.sin(lon))
        z_mean = np.mean(np.sin(lat))
        lon_fiducial = np.rad2deg(np.arctan2(y_mean, x_mean)) % 360.0
        lat_fiducial = np.rad2deg(
            np.arctan2(z_mean, np.sqrt(x_mean**2 + y_mean**2)))
        fiducial[spatial_axes] = lon_fiducial, lat_fiducial
    if (spectral_footprint).any():
        fiducial[spectral_axes] = spectral_footprint.min()
    return fiducial
Ejemplo n.º 18
0
def compute_fiducial(wcslist, bounding_box=None, domain=None):
    """
    For a celestial footprint this is the center.
    For a spectral footprint, it is the beginning of the range.

    This function assumes all WCSs have the same output coordinate frame.
    """
    if domain is not None:
        warnings.warning("'domain' was deprecated in 0.8 and will be removed from next"
                         "version. Use 'bounding_box' instead.")
    axes_types = wcslist[0].output_frame.axes_type
    spatial_axes = np.array(axes_types) == 'SPATIAL'
    spectral_axes = np.array(axes_types) == 'SPECTRAL'
    footprints = np.hstack([w.footprint(bounding_box=bounding_box).T for w in wcslist])
    spatial_footprint = footprints[spatial_axes]
    spectral_footprint = footprints[spectral_axes]

    fiducial = np.empty(len(axes_types))
    if (spatial_footprint).any():
        lon, lat = spatial_footprint
        lon, lat = np.deg2rad(lon), np.deg2rad(lat)
        x_mean = np.mean(np.cos(lat) * np.cos(lon))
        y_mean = np.mean(np.cos(lat) * np.sin(lon))
        z_mean = np.mean(np.sin(lat))
        lon_fiducial = np.rad2deg(np.arctan2(y_mean, x_mean)) % 360.0
        lat_fiducial = np.rad2deg(np.arctan2(z_mean, np.sqrt(x_mean ** 2 +
            y_mean ** 2)))
        fiducial[spatial_axes] = lon_fiducial, lat_fiducial
    if (spectral_footprint).any():
        fiducial[spectral_axes] = spectral_footprint.min()
    return fiducial
Ejemplo n.º 19
0
    def __init__(self, n_workers=None, threads_per_worker=None, processes=True,
                 loop=None, start=True, ip=None, scheduler_port=0,
                 silence_logs=logging.CRITICAL, diagnostics_port=8787,
                 services={}, worker_services={}, nanny=None, **worker_kwargs):
        if nanny is not None:
            warnings.warning("nanny has been deprecated, used processes=")
            processes = nanny
        self.status = None
        self.processes = processes
        self.silence_logs = silence_logs
        if silence_logs:
            for l in ['distributed.scheduler',
                      'distributed.worker',
                      'distributed.core',
                      'distributed.nanny']:
                logging.getLogger(l).setLevel(silence_logs)
        if n_workers is None and threads_per_worker is None:
            if processes:
                n_workers = _ncores
                threads_per_worker = 1
            else:
                n_workers = 1
                threads_per_worker = _ncores
        if n_workers is None and threads_per_worker is not None:
            n_workers = max(1, _ncores // threads_per_worker)
        if n_workers and threads_per_worker is None:
            # Overcommit threads per worker, rather than undercommit
            threads_per_worker = max(1, int(math.ceil(_ncores / n_workers)))

        self.loop = loop or IOLoop()
        if start and not self.loop._running:
            self._thread = Thread(target=self.loop.start)
            self._thread.daemon = True
            self._thread.start()
            while not self.loop._running:
                sleep(0.001)

        if diagnostics_port is not None:
            try:
                from distributed.bokeh.scheduler import BokehScheduler
            except ImportError:
                logger.debug("To start diagnostics web server please install Bokeh")
            else:
                services[('bokeh', diagnostics_port)] = BokehScheduler

        self.scheduler = Scheduler(loop=self.loop,
                                   services=services)
        self.scheduler_port = scheduler_port

        self.workers = []
        self.n_workers = n_workers
        self.threads_per_worker = threads_per_worker
        self.worker_services = worker_services
        self.worker_kwargs = worker_kwargs

        if start:
            sync(self.loop, self._start, ip)

        clusters_to_close.add(self)
 def memory(self):
     """Return function's memory attribute"""
     try:
         return self.function.memory
     except:
         warnings.warning(
             f'Function of {self.name} (self.function.name) has no memory attribute'
         )
Ejemplo n.º 21
0
 def __setitem__(self, name, value):
     v = self.__check(value, name)
     if v is not None:
         if name in self.__data:
             warning(
                 f"Overwriting field \"{name}\" which is already present in entry"
             )
         self.__data[name] = v
Ejemplo n.º 22
0
 def __getField(self, bibItem, field, required=False):
     if field in bibItem:
         return field
     if field in self.fieldAliases:
         for alias in self.fieldAliases[field]:
             if alias in bibItem:
                 return alias
     if required:
         warning(f"Could not find required field: {field}")
Ejemplo n.º 23
0
def plot_3d_field_line(magnetic_field,
                       xpos,
                       zpos,
                       yperiod,
                       cycles=20,
                       y_res=50):
    """Make a 3D plot of field lines

    Inputs
    ------
    magnetic_field - Magnetic field object

    xpos             Starting X location. Can be scalar or list/array
    zpos             Starting Z location. Can be scalar or list/array
    
    yperiod          Length of period in y domain

    cycles         - Number of times to go round in y [20]
    y_res          - Number of points in y in each cycle [50]
    """

    if not plotting_available:
        warnings.warning("matplotlib not available, unable to plot")
        return

    yperiod = float(yperiod)

    # Go round toroidally cycles times
    phivals_hires = np.linspace(0,
                                cycles * yperiod,
                                num=y_res * cycles,
                                endpoint=False)

    xpos = np.asfarray(xpos)
    zpos = np.asfarray(zpos)

    field_tracer = fieldtracer.FieldTracer(magnetic_field)
    result_hires = field_tracer.follow_field_lines(xpos, zpos, phivals_hires)

    # Get phivals_hires into [0,yperiod]
    phivals_hires_mod = np.remainder(phivals_hires, yperiod)
    # There are cycles sets of field lines y_res points long each
    # and we also need to transpose for reasons
    phivals_hires_mod = phivals_hires_mod.reshape((cycles, y_res)).T
    # Same for the result, but only swap first and second indices
    result_hires_mod = result_hires.reshape(
        (cycles, y_res, 2)).transpose(1, 0, 2)

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    for n in range(cycles):
        ax.plot(result_hires_mod[:, n, 0], result_hires_mod[:, n, 1],
                phivals_hires_mod[:, n])

    plt.show()

    return fig, ax
Ejemplo n.º 24
0
    def extract(self, **kwargs):
        """Return a dictionary of hoistable subexpressions."""
        if not self._check_loops(self.expr_info.loops):
            warning("Loop nest unsuitable for generalized licm. Skipping.")
            return

        symbols = visit(self.expr_info.outermost_parent)['symbols_dep']
        symbols = dict((s, [l.dim for l in dep]) for s, dep in symbols.items())
        return self._extract(self.stmt.rvalue, symbols, **kwargs)
Ejemplo n.º 25
0
 def golist_to_collapsed_gene_list(self, go_list):
     gene_set = set()
     for go_term in go_list:
         if go_term in self.go_to_gene_dict:
             gene_set.update(self.go_to_gene_dict[go_term])
         else:
             warnings.warning("Warning: GO Term " + go_term +
                              " not found in the GO-to-gene dictionary.")
     return list(gene_set)
Ejemplo n.º 26
0
 def __init__(self, size=CACHE_LINE_SIZE - HEADER_SIZE, **kwargs):
     sz = size + self.HEADER_SIZE
     if sz & (sz - 1) or sz % CACHE_LINE_SIZE:
         warnings.warning(
             "Size of state counter should be multiple of {} or smaller"
             "power of two sans header size ({}), perfect size is {}".
             format(CACHE_LINE_SIZE, self.HEADER_SIZE,
                    CACHE_LINE_SIZE - self.HEADER_SIZE))
     self.size = size
     super().__init__(**kwargs)
Ejemplo n.º 27
0
def get_name_from_target(target):
    import warnings, inspect
    previous_frame = inspect.currentframe().f_back
    (filename, line_number, function_name, lines,
     index) = inspect.getframeinfo(previous_frame)
    stack = inspect.stack()[1]
    warnings.warning(
        f"deprecated, use 'get_attribute_string(ast_object)' instead. Called from {stack[3]} ( {stack[1]}: {stack[2]})",
        DeprecationWarning)
    return get_attribute_string(target)
Ejemplo n.º 28
0
 def _send_ping(self, interval, event):
     while not event.wait(interval):
         self.last_ping_tm = time.time()
         if self.sock:
             try:
                 tradeStr = {"uri": "ping"}
                 params = json.dumps(tradeStr)
                 self.sock.ping(payload=params)
             except Exception as ex:
                 warnings.warning("send_ping routine terminated: {}".format(ex))
                 break
Ejemplo n.º 29
0
def recall(y_true, y_pred, argsort_kind='quicksort'):
    """Computes the Recall values w.r.t. descending `y_pred` values.

    Parameters
    ----------
    y_true: array, shape = [n_samples]
        True values, interpreted as strictly positive or not
        (i.e. converted to binary).
        Could be in {-1, +1} or {0, 1} or {False, True}.

    y_pred: array, shape = [n_samples]
        Predicted values.

    argsort_kind: str
        Sorting algorithm.

    Returns
    -------
    rec: array, shape = [n_samples]
        Recall array.
    """

    # -- basic checks and conversion
    assert len(y_true) == len(y_pred)
    assert np.isfinite(y_true).all()
    assert np.isfinite(y_pred).all()

    y_true = np.array(y_true, dtype=DTYPE)
    assert y_true.ndim == 1

    y_pred = np.array(y_pred, dtype=DTYPE)
    assert y_pred.ndim == 1

    n_uniques = np.unique(y_pred)
    if n_uniques.size == 1:
        raise ValueError('Rank of predicted values is ill-defined'
                         ' because all elements are equal')
    elif n_uniques.size < y_pred.size:
        warning('some predicted elements have exactly the same value.'
                ' output will most probably depend on the sorting'
                ' method used. Here "%s"' % argsort_kind)

    # -- actual computation
    idx = (-y_pred).argsort(kind=argsort_kind)

    tp = (y_true[idx] > 0).cumsum(dtype=DTYPE)

    y_true_n_pos = (y_true > 0).sum(dtype=DTYPE)
    if y_true_n_pos == 0:
        rec = np.zeros(tp.shape, dtype=DTYPE)
    else:
        rec = tp / y_true_n_pos

    return rec
Ejemplo n.º 30
0
    def execute_async(self, key, command, queue=None):
        if queue is not None:
            warnings.warning(
                'DaskExecutor does not support queues. All tasks will be run '
                'in the same cluster')

        def airflow_run():
            return subprocess.check_call(command, shell=True)

        future = self.client.submit(airflow_run, pure=False)
        self.futures[future] = key
def recall(y_true, y_pred, argsort_kind='quicksort'):
    """Computes the Recall values w.r.t. descending `y_pred` values.

    Parameters
    ----------
    y_true: array, shape = [n_samples]
        True values, interpreted as strictly positive or not
        (i.e. converted to binary).
        Could be in {-1, +1} or {0, 1} or {False, True}.

    y_pred: array, shape = [n_samples]
        Predicted values.

    argsort_kind: str
        Sorting algorithm.

    Returns
    -------
    rec: array, shape = [n_samples]
        Recall array.
    """

    # -- basic checks and conversion
    assert len(y_true) == len(y_pred)
    assert np.isfinite(y_true).all()
    assert np.isfinite(y_pred).all()

    y_true = np.array(y_true, dtype=DTYPE)
    assert y_true.ndim == 1

    y_pred = np.array(y_pred, dtype=DTYPE)
    assert y_pred.ndim == 1

    n_uniques = np.unique(y_pred)
    if n_uniques.size == 1:
        raise ValueError('Rank of predicted values is ill-defined'
                         ' because all elements are equal')
    elif n_uniques.size < y_pred.size:
        warning('some predicted elements have exactly the same value.'
                ' output will most probably depend on the sorting'
                ' method used. Here "%s"' % argsort_kind)

    # -- actual computation
    idx = (-y_pred).argsort(kind=argsort_kind)

    tp = (y_true[idx] > 0).cumsum(dtype=DTYPE)

    y_true_n_pos = (y_true > 0).sum(dtype=DTYPE)
    if y_true_n_pos == 0:
        rec = np.zeros(tp.shape, dtype=DTYPE)
    else:
        rec = tp / y_true_n_pos

    return rec
Ejemplo n.º 32
0
def addto_hdr(paramname, value):
    if header_params[paramname] == 'd':
        return prep_double(paramname, value)
    elif header_params[paramname] == 'i':
        return prep_int(paramname, value)
    elif header_params[paramname] == 'str':
        return prep_string(paramname) + prep_string(value)
    elif header_params[paramname] == 'flag':
        return prep_string(paramname)
    else:
        warnings.warning("key '%s' is unknown!" % paramname)
    return hdr
Ejemplo n.º 33
0
    def parseFile(self, bibFile):
        """ parseFile(bibFile)

            Parses the given ``*.bib`` file for entries and loads it in underlying data.

            :param bibFile: The :class:`file` instance to be parsed.
        """
        OUTSIDE = 0
        ENTRY_TYPE = 1
        COMMENT = 2

        self.__line = 0
        self.__column = 0

        mode = OUTSIDE

        c = '\n'
        while True:
            if c == '\n':
                self.__line += 1
                self.__column = 0
            c = bibFile.read(1)
            self.__column += 1
            if not c:
                self.__line = 0
                self.__column = 0
                return
            if mode == OUTSIDE:
                if not c.strip():
                    continue
                elif c == '@':
                    mode = ENTRY_TYPE
                    entryType = ''
                elif c == '%':
                    prevMode = mode
                    mode = COMMENT
                else:
                    warning(f"Omitted unexpected charater: \"{c}\"")
            elif mode == COMMENT:
                if c == '\n':
                    mode = prevMode
            elif mode == ENTRY_TYPE:
                if c == '{':
                    try:
                        self.__iadd__(self.parseEntry(bibFile, entryType.strip().lower()))
                    except (ValueError):
                        warning(f"Encountered unsupported bibTeX entry: {entryType.strip().lower()}")
                    mode = OUTSIDE
                elif c == '%':
                    prevMode = mode
                    mode = COMMENT
                else:
                    entryType += c
Ejemplo n.º 34
0
def addto_hdr(paramname, value):
    if header_params[paramname] == 'd':
        return prep_double(paramname, value)
    elif header_params[paramname] == 'i':
        return prep_int(paramname, value)
    elif header_params[paramname] == 'str':
        return prep_string(paramname) + prep_string(value)
    elif header_params[paramname] == 'flag':
        return prep_string(paramname)
    else:
        warnings.warning("key '%s' is unknown!" % paramname)
    return hdr
Ejemplo n.º 35
0
 def new_func(*args, **kwargs):
     warnings.simplefilter('always',
                           DeprecationWarning)  # turn off filter
     warnings.warning(
         'Use {0} instead of {1}, {1} will be removed in the future.'.
         format(new_name, func.__name__),
         category=DeprecationWarning,
         stacklevel=2,
     )
     warnings.simplefilter('default',
                           DeprecationWarning)  # reset filter
     return func(*args, **kwargs)
Ejemplo n.º 36
0
def isfullhouse(cards):
    cardnums = [card[:-1] for card in cards]
    c = Counter(cardnums)
    #print("C is infullhouse",c)
    highest = c.most_common(2)[0]
    try:
        second  = c.most_common(2)[1]l
    except:
        warnings.warning("Got Five of a Kind or Something")
    if (highest[1] == 3 and second[1] == 2):
        return True
    else:
        return False
Ejemplo n.º 37
0
    def set_fields(self, ra=None, dec=None, **kwargs):
        """
        """
        kwargs["width"] = kwargs.get("width", self.width)
        kwargs["height"] = kwargs.get("height", self.height)

        self._side_properties["fields"] = SurveyFieldBins(ra, dec, **kwargs)

        if self.cadence is not None and np.any(np.isnan(
                self.cadence['field'])):
            warnings.warning(
                "cadence was already set, field pointing will be updated")
            self._update_field_radec()
Ejemplo n.º 38
0
def plot_streamlines(grid, magnetic_field, y_slice=0, width=None, **kwargs):
    """Plot streamlines of the magnetic field in the poloidal plane

    Parameters
    ----------
    grid : :py:obj:`zoidberg.grid.Grid`
        Grid generated by Zoidberg
    magnetic_field : :py:obj:`zoidberg.field.MagneticField`
        Zoidberg magnetic field object
    y_slice : int, optional
        y-index to plot streamlines at
    width : float, optional
        If not None, line widths are proportional to the magnitude of
        the `magnetic_field` times `width`

    Returns
    -------
    fig, ax
        The matplotlib figure and axis used

    """

    if not plotting_available:
        warnings.warning("matplotlib not available, unable to plot")
        return

    fig, ax = plt.subplots(1, 1)
    full_slice = np.s_[:, y_slice, :]

    if width is not None:
        # Get the B field magnitude in the poloidal plane
        bxz_mag = np.sqrt(magnetic_field.b_mag**2 - magnetic_field.by**2)
        linewidth = width * (bxz_mag[full_slice] / bxz_mag.max()).T
    else:
        linewidth = 1

    ax.streamplot(grid.xarray,
                  grid.zarray,
                  magnetic_field.bx[full_slice].T,
                  magnetic_field.bz[full_slice].T,
                  linewidth=linewidth,
                  **kwargs)

    ax.set_xlabel("Radius [m]", fontsize=20)
    ax.set_ylabel("Height [m]", fontsize=20)
    ax.tick_params(axis='both', labelsize=15)

    plt.show()

    return fig, ax
Ejemplo n.º 39
0
def check_arguments(args):
    '''
    Checks to make sure train or test or both
    modes
    :param args.train
    :param args.test
    '''
    parser = argparse.ArgumentParser()
    if args.cluster:
        warnings.warning('Cluster module is still not fully functional')
    if not (args.train or args.test):
        parser.error('No action requested, add --train or --test')
    if (args.test) and not (args.test_input):
        parser.error("If testing, must specify test data, use -t/--test_input\
                     <<DATAFILE>>")
Ejemplo n.º 40
0
    def _recoil(self):
        """Increase the stack size if the kernel arrays exceed the stack limit
        threshold (at the C level)."""

        # Assume the size of a C type double is 8 bytes
        c_double_size = 8
        # Assume the stack size is 1.7 MB (2 MB is usually the limit)
        stack_size = 1.7*1024*1024

        decls = [d for d in self.decls.values() if d.sym.rank]
        size = sum([reduce(operator.mul, d.sym.rank) for d in decls])

        if size * c_double_size > stack_size:
            # Increase the stack size if the kernel's stack size seems to outreach
            # the space available
            try:
                resource.setrlimit(resource.RLIMIT_STACK, (resource.RLIM_INFINITY,
                                                           resource.RLIM_INFINITY))
            except resource.error:
                warning("Stack may blow up, and could not increase its size.")
Ejemplo n.º 41
0
    def expand(self, mode='standard', **kwargs):
        """Expand expressions over other expressions based on different heuristics.
        In the simplest example one can have: ::

            (X[i] + Y[j])*F + ...

        which could be transformed into: ::

            (X[i]*F + Y[j]*F) + ...

        When creating the expanded object, if the expanding term had already been
        hoisted, then the expansion itself is also lifted. For example, if: ::

            Y[j] = f(...)
            (X[i]*Y[j])*F + ...

        and we assume it has been decided (see below) the expansion should occur
        along the loop dimension ``j``, the transformation generates: ::

            Y[j] = f(...)*F
            (X[i]*Y[j]) + ...

        One may want to expand expressions for several reasons, which include

        * Exposing factorization opportunities;
        * Exposing high-level (linear algebra) operations (e.g., matrix multiplies)
        * Relieving register pressure; when, for example, ``(X[i]*Y[j])`` is
          computed in a loop L' different than the loop L'' in which ``Y[j]``
          is evaluated, and ``cost(L') > cost(L'')``;

        :param mode: multiple expansion strategies are possible, each exposing
            different, "hidden" opportunities for later code motion.

            * mode == 'standard': this heuristics consists of expanding along the
                loop dimension appearing the most in different (i.e., unique).
                This aims at making factorization more effective.
            * mode == 'all': expand when symbols depend on at least one of the
                expression's dimensions
            * mode == 'domain': expand when symbols depending on the expressions's
                domain are encountered.
            * mode == 'outdomain': expand when symbols independent of the
                expression's domain are encountered.
        """

        if mode == 'standard':
            retval = FindInstances.default_retval()
            symbols = FindInstances(Symbol).visit(self.stmt.rvalue, ret=retval)[Symbol]
            # The heuristics privileges domain dimensions
            dims = self.expr_info.out_domain_dims
            if not dims or self.expr_info.dimension >= 2:
                dims = self.expr_info.domain_dims
            # Get the dimension occurring most often
            occurrences = [tuple(r for r in s.rank if r in dims) for s in symbols]
            occurrences = [i for i in occurrences if i]
            if not occurrences:
                return self
            # Finally, establish the expansion dimension
            dimension = Counter(occurrences).most_common(1)[0][0]
            should_expand = lambda n: set(dimension).issubset(set(n.rank))
        elif mode in ['all', 'domain', 'outdomain']:
            info = visit(self.expr_info.outermost_loop, info_items=['symbols_dep'])
            symbols = defaultdict(set)
            for s, dep in info['symbols_dep'].items():
                symbols[s.symbol] |= {l.dim for l in dep}
            if mode == 'all':
                should_expand = lambda n: symbols.get(n.symbol) and \
                    any(r in self.expr_info.dims for r in symbols[n.symbol])
            elif mode == 'domain':
                should_expand = lambda n: symbols.get(n.symbol) and \
                    any(r in self.expr_info.domain_dims for r in symbols[n.symbol])
            elif mode == 'outdomain':
                should_expand = lambda n: symbols.get(n.symbol) and \
                    not symbols[n.symbol].issubset(set(self.expr_info.domain_dims))
        else:
            warning('Unknown expansion strategy. Skipping.')
            return

        # Perform the expansion
        self.expr_expander.expand(should_expand, kwargs.get('not_aggregate'))

        # Update known declarations
        self.decls.update(self.expr_expander.expanded_decls)
        return self
Ejemplo n.º 42
0
 def startfit(self):
     warnings.warning("Method renamed to startFit",
                      DeprecationWarning)
     self.startFit()
def cv_timeresolved(spiketrain, win=None, start=None, stop=None, step=None):
    """
    Evaluate the empirical coefficient of variation (CV) of the inter-spike
    intervals (ISIs) of one spike train (or a list of spike trains).
    By default computes the CV over the full time span of the data. However,
    it can compute the CV time-resolved as well.

    Given the vector v containing the observed ISIs of one spike train in
    the time window [t0, t1], the CV in [t0, t1] is defined as
                    CV := std(v)/mean(v).
    The CV of a list of spike trains is computed collecting the ISIs of all
    spike trains.

    The CV represents a measure of irregularity in the spiking activity. For
    For a time-stationary Poisson process, the theoretical CV is 1.

    Arguments
    ---------
    spiketrain : SpikeTrain or list of SpikeTrains
        a neo.SpikeTrain object (or a list of), for which to compute the CV
    win : Quantity (optional)
        the length of the time windows over which to compute the CV.
        If None, the CV is computed over the largest window possible;
        otherwise, the window slides along time (see argument 'step')
        Default: None
    start : Quantity, optional
        initial time for the computation of the CV. If None, the largest
        t_start among those of the input spike trains in `spiketrain` is used
        Default: None
    stop : Quantity, optional
        last time for the computation of the CV.
        If None, the smallest t_stop among those of the input spike trains
        in `spiketrain` is used
        Default: None
    step : Quantity, optional
        Time shift between two consecutive sliding windows.
        If None, successive windows are adjacent
        Default: None

    Returns
    -------
    values : array
        Array of CV values computed over consecutive time windows
    windows : array
        Array of shape (n, 2) of time windows over which the CV has been
        computed
    """

    # Convert spiketrain to a list if it is a SpikeTrain
    if type(spiketrain) == neo.core.SpikeTrain:
        spiketrain = [spiketrain]

    max_tstart = min([t.t_start for t in spiketrain])
    min_tstop = max([t.t_stop for t in spiketrain])

    if not (all([max_tstart == t.t_start for t in spiketrain]) and
                all([min_tstop == t.t_stop for t in spiketrain])):
        warnings.warning('spike trains have different t_start or t_stop'
                         ' values. CV computed for inner values only')

    t_start = max_tstart if start is None else start
    t_stop = min_tstop if stop is None else stop
    wlen = t_stop - t_start if win is None else win
    wstep = wlen if step is None else step

    # Convert all time quantities in dimensionless (_dl) units (meant in s)
    start_dl = float(t_start.simplified.base)
    stop_dl = float(t_stop.simplified.base)
    wlen_dl = float(wlen.simplified.base)
    step_dl = float(wstep.simplified.base)

    # Define the centers of the sliding windows where the CV must be computed
    cv_times = numpy.arange(wlen_dl / 2. + start_dl,
                            stop_dl - wlen_dl / 2. + step_dl / 2, step_dl)

    # Define the nx2 array of time windows within which to compute the CV
    windows = pq.s * numpy.array([numpy.max([cv_times - wlen_dl / 2.,
                                             start_dl * numpy.ones(
                                                 len(cv_times))], axis=0),
                                  numpy.min([cv_times +
                                             wlen_dl / 2.,
                                             stop_dl * numpy.ones(
                                                 len(cv_times))], axis=0)]).T

    # Compute the CV in each window defined above
    cv_values = numpy.zeros(len(cv_times))  # Initialize CV values to 0
    for i, w in enumerate(windows):
        x_sliced = [t.time_slice(w[0], w[1]) for t in spiketrain]
        cv_values[i] = cv(x_sliced)

    return cv_values, windows
Ejemplo n.º 44
0
    def fit_model(self, tol=1e-3, iter_max=100, h_step=2.0, epsil_0=10,
                  constant=True, verbose=True, missing='drop', **fit_kwargs):
        '''
        '''
        # Fit a normal linear model to the data

        if constant:
            x_const = sm.add_constant(self.x)
        else:
            x_const = self.x

        if self.weights is None:
            model = sm.OLS(self.y, x_const, missing=missing)
        else:
            model = sm.WLS(self.y, x_const, weights=self.weights,
                           missing=missing)
        init_lm = model.fit(**fit_kwargs)

        if verbose:
            print(init_lm.summary())

        epsil = epsil_0

        # Before we get into the loop, make sure that this was a bad fit
        if epsil_0 < tol:
            warnings.warning('Initial epsilon is smaller than tolerance. \
                             The tolerance should be set smaller.')
            return init_lm

        # Sum of residuals
        dev_0 = np.sum(init_lm.resid**2.)

        # Catch cases where a break isn't necessary
        self.break_fail_flag = False

        # Count
        it = 0

        # Now loop through and minimize the residuals by changing where the
        # breaking point is.
        while np.abs(epsil) > tol:
            U = (self.x - self.brk) * (self.x > self.brk)
            V = deriv_max(self.x, self.brk)

            X_all = np.vstack([self.x, U, V]).T
            if constant:
                X_all = sm.add_constant(X_all)

            if self.weights is None:
                model = sm.OLS(self.y, X_all, missing=missing)
            else:
                model = sm.WLS(self.y, X_all, weights=self.weights,
                               missing=missing)
            fit = model.fit()

            beta = fit.params[2]  # Get coef
            gamma = fit.params[3]  # Get coef

            # Adjust the break point
            new_brk = copy(self.brk)
            new_brk += (h_step * gamma) / beta

            # If the new break point is outside of the allowed range, reset
            # the step size to half of the original, then try stepping again
            h_it = 0
            if not (self.x > new_brk).any() or (self.x > new_brk).all():
                while True:
                    # Remove step taken
                    new_brk -= (h_step * gamma) / beta
                    # Now half the step and try again.
                    h_step /= 2.0
                    new_brk += (h_step * gamma) / beta
                    h_it += 1
                    if (self.x > new_brk).any() and not (self.x > new_brk).all():
                        self.brk = new_brk
                        break
                    if h_it >= 5:
                        self.break_fail_flag = True
                        it = iter_max + 1
                        warnings.warn("Cannot find good step-size, assuming\
                                       break not needed")
                        break
            else:
                self.brk = new_brk

            dev_1 = np.sum(fit.resid**2.)

            epsil = (dev_1 - dev_0) / (dev_0 + 1e-3)

            dev_0 = dev_1

            if verbose:
                print("Iteration: %s/%s" % (it + 1, iter_max))
                print(fit.summary())
                print("Break Point: " + str(self.brk))
                print("Epsilon: " + str(epsil))

            it += 1

            if it > iter_max:
                warnings.warn("Max iterations reached. \
                               Result may not be minimized.")
                break

        # Is the initial model without a break better?
        if self.break_fail_flag or np.sum(init_lm.resid**2) <= np.sum(fit.resid**2):
            # If the initial fit was better, the segmented fit failed.
            self.break_fail_flag = True

            self.brk = self.x.max()

            X_all = sm.add_constant(self.x)
        else:
            # With the break point hopefully found, do a final good fit
            U = (self.x - self.brk) * (self.x > self.brk)
            V = deriv_max(self.x, self.brk)

            X_all = np.vstack([self.x, U, V]).T
            X_all = sm.add_constant(X_all)

        if self.weights is None:
            model = sm.OLS(self.y, X_all, missing=missing)
        else:
            model = sm.WLS(self.y, X_all, weights=self.weights,
                           missing=missing)

        self.fit = model.fit()
        self._params = self.fit.params
        self._errs = self.fit.bse

        if not self.break_fail_flag:
            self.brk_err = brk_errs(self.params, fit.cov_params())
        else:
            self.brk_err = 0.0

        self.get_slopes()
Ejemplo n.º 45
0
def locfdr(zz, bre = 120, df = 7, pct = 0., pct0 = 1./4, nulltype = 1, type = 0, plot = 1, mult = None, mlests = None, 
		main = ' ', sw = 0, verbose = True, showplot = True, saveplot = False, saveroot = 'locfdr', saveext = 'pdf', savestamp = False):
	"""Computes local false discovery rates.

	This is Abhinav Nellore's Python implementation of the R function locfdr() v1.1.7, originally written by Bradley Efron, 
	Brit B. Turnbull, and Balasubramanian Narasimhan; and later enhanced by Alyssa Frazee, Leonardo Collado-Torres, and Jeffrey Leek 
	(see https://github.com/alyssafrazee/derfinder/blob/master/R/locfdrFit.R ). It is licensed under the GNU GPL v2.
	See COPYING for more information.
	
	The port is relatively faithful. Variable names are almost precisely the same; if the original variable name contained a period, that
	period is replaced by an underscore here. (So 'Cov2.out' in the R is 'Cov2_out' in the Python.)
	To access returned values:
	(in R)        --- results = locfdr(...)
					  results$fdr
			          results$z.2
	(in Python)   --- results = locfdr(...)
					  results['fdr']
					  results['z_2']
	Some returned values are pandas Series and DataFrames. An introduction to pandas data structures is available at
	http://pandas.pydata.org/pandas-docs/dev/dsintro.html .

	A nearly complete description of arguments and returned values may be found at 
	http://cran.r-project.org/web/packages/locfdr/vignettes/locfdr-example.pdf .

	Additional arguments in this version:
		 verbose: (True or False) --- If True, outputs warnings.
	     showplot: (True or False) --- If True, displays plot. Ignored if plot = 0.
	     saveplot: (True or False) --- If True, saves plot according to constraints specified by saveroot, saveext, and savestamp.
	     							   Ignored if plot = 0.
	     saveroot: (Any string that constitutes a valid filename.) --- Specifies prefix of file to save. Ignored if saveplot = False.
	     saveext: (Most valid image file extensions work here. Try 'png', 'pdf', 'ps', 'eps', or 'svg'.) --- Selects file format and extension.
	     	Ignored if saveplot = False.
	     savestamp: (True or False) --- If True, date/timestamp is appended to filename prefix; this helps prevent overwriting old saves.
	     	Ignored if saveplot = False.

	 Additional returned values in this version:
		yt: Heights of pink histogram bars that appear on the plots (i.e., heights of alt. density's histogram).
		x: Locations of pinkfl histogram bars that appear on the plots (locations of alt. density's histogram).
		mlest_lo AND mlest_hi: If the function outputs a warning message that reads "please rerun with mlest parameters = ...",
			these parameters are contained in mlest_lo and mlest_hi .
		needsfix: 1 if a rerun warning is output; otherwise 0.
		nulldens: y-values of estimated null distribution density.
		nulldens: y-values of estimated full (mixture) density."""
	call = it.stack()
	zz = np.array(zz)
	mlest_lo = None
	mlest_hi = None
	yt = None
	x = None
	needsfix = 0
	try:
		brelength = len(bre)
		lo = min(bre)
		up = max(bre)
		bre = brelength
	except TypeError:
		try:
			len(pct)
			lo = pct[0]
			up = pct[1]
			# the following line is present to mimic how R handles [if (pct > 0)] (see code below) when pct is an array
			pct = pct[0]
		except TypeError:
			if pct == 0:
				lo = min(zz)
				up = max(zz)
			elif pct < 0:
				med = np.median(zz)
				lo = med + (1 - pct) * (min(zz) - med)
				up = med + (1 - pct) * (max(zz) - med)
			elif pct > 0:
				lo = np.percentile(zz, pct * 100)
				up = np.percentile(zz, (1 - pct) * 100)
	zzz = np.array([max(min(el, up), lo) for el in zz])
	breaks = np.linspace(lo, up, bre)
	x = (breaks[1:] + breaks[0:-1]) / 2.
	y = np.histogram(zzz, bins = len(breaks) - 1)[0]
	yall = y
	K = len(y)
	N = len(zz)
	if pct > 0:
		y[0] = min(y[0], 1.)
		y[K-1] = min(y[K-1], 1)
	if not type:
		basismatrix = rf.ns(x, df)
		X = np.ones((basismatrix.shape[0], basismatrix.shape[1]+1), dtype=np.float64)
		X[:, 1:] = basismatrix
		f = glm("y ~ basismatrix", data = dict(y=np.matrix(y).transpose(), basismatrix=basismatrix), 
				family=families.Poisson()).fit().fittedvalues
	else:
		basismatrix = rf.poly(x, df)
		X = np.ones((basismatrix.shape[0], basismatrix.shape[1]+1), dtype=np.float64)
		X[:, 1:] = basismatrix
		f = glm("y ~ basismatrix", data = dict(y=np.matrix(y).transpose(), basismatrix=basismatrix), 
			family=families.Poisson()).fit().fittedvalues
	fulldens = f
	l = np.log(f)
	Fl = f.cumsum()
	Fr = f[::-1].cumsum()
	D = ((y - f) / np.sqrt((f + 1)))
	D = sum(np.power(D[1:(K-1)], 2)) / (K - 2 - df)
	if D > 1.5:
		wa.warn("f(z) misfit = " + str(round(D,1)) + ". Rerun with larger df.")
	if nulltype == 3:
		fp0 = pd.DataFrame(np.zeros((6,4)).fill(np.nan), index=['thest', 'theSD', 'mlest', 'mleSD', 'cmest', 'cmeSD'], 
			columns=['delta', 'sigleft', 'p0', 'sigright'])
	else:
		fp0 = pd.DataFrame(np.zeros((6,3)).fill(np.nan), index=['thest', 'theSD', 'mlest', 'mleSD', 'cmest', 'cmeSD'], 
			columns=['delta', 'sigma', 'p0'])
	fp0.loc['thest'][0:2] = np.array([0,1])
	fp0.loc['theSD'][0:2] = 0
	imax = l.argmax()
	xmax = x[imax]
	try:
		len(pct)
		pctlo = pct0[0]
		pctup = pct0[1]
	except TypeError:
		pctup = 1 - pct0
		pctlo = pct0
	lo0 = np.percentile(zz, pctlo*100)
	hi0 = np.percentile(zz, pctup*100)
	nx = len(x)
	i0 = np.array([i for i, el in enumerate(x) if el > lo0 and el < hi0])
	x0 = np.array([el for el in x if el > lo0 and el < hi0])
	y0 = np.array([el for i,el in enumerate(l) if x[i] > lo0 and x[i] < hi0])
	xsubtract = x0 - xmax
	X00 = np.zeros((2, len(xsubtract)))
	if nulltype == 3:
		X00[0, :] = np.power(xsubtract, 2)
		X00[1, :] = [max(el, 0)*max(el, 0) for el in xsubtract]
	else:
		X00[0, :] = xsubtract
		X00[1, :] = np.power(xsubtract, 2)
	X00 = X00.transpose()
	co = glm("y0 ~ X00", data = dict(y0=y0, X00=X00)).fit().params
	# these errors may not be necessary
	if nulltype == 3 and ((pd.isnull(co[1]) or pd.isnull(co[2])) or (co[1] >= 0 or co[1] + co[2] >= 0)):
			raise EstimationError('CM estimation failed. Rerun with nulltype = 1 or 2.')
	elif pd.isnull(co[2]) or co[2] >= 0:
		if nulltype == 2:
			raise EstimationError('CM estimation failed. Rerun with nulltype = 1.')
		elif nulltype != 3:
			xsubtract2 = x - xmax
			X0 = np.ones((3, len(xsubtract2)))
			X0[1, :] = xsubtract2
			X0[2, :] = np.power(xsubtract2, 2)
			X0 = X0.transpose()
			wa.warn('CM estimation failed; middle of histogram nonnormal')
	else:
		xsubtract2 = x - xmax
		X0 = np.ones((3, len(xsubtract2)))
		if nulltype == 3:
			X0[1, :] = np.power(xsubtract2, 2)
			X0[2, :] = [max(el, 0)*max(el, 0) for el in xsubtract2]
			sigs = np.array([1/np.sqrt(-2*co[1]), 1/np.sqrt(-2*(co[1]+co[2]))])
			fp0.loc['cmest'][0] = xmax
			fp0.loc['cmest'][1] = sigs[0]
			fp0.loc['cmest'][3] = sigs[1]
		else:
			X0[1, :] = xsubtract2
			X0[2, :] = np.power(xsubtract2, 2)
			xmaxx = -co[1] / (2 * co[2]) + xmax
			sighat = 1 / np.sqrt(-2 * co[2])
			fp0.loc['cmest'][[0,1]] = [xmaxx, sighat]
		X0 = X0.transpose()
		l0 = np.array((X0 * np.matrix(co).transpose()).transpose())[0]
		f0 = np.exp(l0)
		p0 = sum(f0) / float(sum(f))
		f0 = f0 / p0
		fp0.loc['cmest'][2] = p0
	b = 4.3 * np.exp(-0.26 * np.log10(N))
	if mlests == None:
		med = np.median(zz)
		sc = (np.percentile(zz, 75) - np.percentile(zz, 25)) / (2 * stats.norm.ppf(.75))
		mlests = lf.locmle(zz, xlim = np.array([med, b * sc]))
		if N > 5e05:
			if verbose:
				wa.warn('length(zz) > 500,000: an interval wider than the optimal one was used for maximum likelihood estimation. To use the optimal interval, rerun with mlests = [' + str(mlests[0]) + ', ' + str(b * mlests[1]) + '].')
			mlest_lo = mlests[0]
			mlest_hi = b * mlests[1]
			needsfix = 1
			mlests = lf.locmle(zz, xlim = [med, sc])
	if not pd.isnull(mlests[0]):
		if N > 5e05:
			b = 1
		if nulltype == 1:
			Cov_in = {'x' : x, 'X' : X, 'f' : f, 'sw' : sw}
			ml_out = lf.locmle(zz, xlim = [mlests[0], b * mlests[1]], d = mlests[0], s = mlests[1], Cov_in = Cov_in)
			mlests = ml_out['mle']
		else:
			mlests = lf.locmle(zz, xlim = [mlests[0], b * mlests[1]], d = mlests[0], s = mlests[1])
		fp0.loc['mlest'][0:3] = mlests[0:3]
		fp0.loc['mleSD'][0:3] = mlests[3:6]
	if (not (pd.isnull(fp0.loc['mlest'][0]) or pd.isnull(fp0.loc['mlest'][1]) or pd.isnull(fp0.loc['cmest'][0]) or pd.isnull(fp0.loc['cmest'][1]))) and nulltype > 1:
		if abs(fp0.loc['cmest'][0] - mlests[0]) > 0.05 or abs(np.log(fp0.loc['cmest'][1] / mlests[1])) > 0.05:
			wa.warn('Discrepancy between central matching and maximum likelihood estimates. Consider rerunning with nulltype = 1.')
	if pd.isnull(mlests[0]):
		if nulltype == 1:
			if pd.isnull(fp0.loc['cmest'][1]):
				raise EstimationError('CM and ML estimation failed; middle of histogram is nonnormal.')
			else:
				raise EstimationError('ML estimation failed. Rerun with nulltype = 2.')
		else:
			wa.warn('ML estimation failed.')
	if nulltype < 2:
		xmaxx = mlests[0]
		xmax = mlests[0]
		delhat = mlests[0]
		sighat = mlests[1]
		p0 = mlests[2]
		f0 = np.array([stats.norm.pdf(el, delhat, sighat) for el in x])
		f0 = (sum(f) * f0) / sum(f0)
	fdr = np.array([min(el, 1) for el in (p0 * (f0 / f))])
	f00 = np.exp(-np.power(x, 2) / 2)
	f00 = (f00 * sum(f)) / sum(f00)
	p0theo = sum(f[i0]) / sum(f00[i0])
	fp0.loc['thest'][2] = p0theo
	fdr0 = np.array([min(el, 1) for el in ((p0theo * f00) / f)])
	f0p = p0 * f0
	if nulltype == 0:
		f0p = p0theo * f00
	F0l = f0p.cumsum()
	F0r = f0p[::-1].cumsum()
	Fdrl = F0l / Fl
	Fdrr = (F0r / Fr)[::-1]
	Int = (1 - fdr) * f * (fdr < 0.9)
	if np.any([x[i] <= xmax and fdr[i] == 1 for i in xrange(len(fdr))]):
		xxlo = min([el for i,el in enumerate(x) if el <= xmax and fdr[i] == 1])
	else:
		xxlo = xmax
	if np.any([x[i] >= xmax and fdr[i] == 1 for i in xrange(len(fdr))]):
		xxhi = max([el for i,el in enumerate(x) if el >= xmax and fdr[i] == 1])
	else:
		xxhi = xmax
	indextest = [i for i,el in enumerate(x) if el >= xxlo and el <= xxhi]
	if len(indextest) > 0:
		fdr[indextest] = 1
	indextest = [i for i,el in enumerate(x) if el <= xmax and fdr0[i] == 1]
	if len(indextest) > 0:
		xxlo = min(x[indextest])
	else:
		xxlo = xmax
	indextest = [i for i,el in enumerate(x) if el >= xmax and fdr0[i] == 1]
	if len(indextest) > 0:
		xxhi = max(x[indextest])
	else:
		xxhi = xmax
	indextest = [i for i,el in enumerate(x) if el >= xxlo and el <= xxhi]
	if len(indextest) > 0:
		fdr0[indextest] = 1
	if nulltype == 1:
		indextest = [i for i,el in enumerate(x) if el >= mlests[0] - mlests[1] and el <= mlests[0] + mlests[1]]
		fdr[indextest] = 1
		fdr0[indextest] = 1
	p1 = sum((1 - fdr) * f) / N
	p1theo = sum((1 - fdr0) * f) / N
	fall = f + (yall - y)
	Efdr = sum((1 - fdr) * fdr * fall) / sum((1 - fdr) * fall)
	Efdrtheo = sum((1 - fdr0) * fdr0 * fall) / sum((1 - fdr0) * fall)
	iup = [i for i,el in enumerate(x) if el >= xmax]
	ido = [i for i,el in enumerate(x) if el <= xmax]
	Eleft = sum((1 - fdr[ido]) * fdr[ido] * fall[ido]) / sum((1 - fdr[ido]) * fall[ido])
	Eleft0 = sum((1 - fdr0[ido]) * fdr0[ido] * fall[ido])/sum((1 - fdr0[ido]) * fall[ido])
	Eright = sum((1 - fdr[iup]) * fdr[iup] * fall[iup])/sum((1 - fdr[iup]) * fall[iup])
	Eright0 = sum((1 - fdr0[iup]) * fdr0[iup] * fall[iup])/sum((1 - fdr0[iup]) * fall[iup])
	Efdr = np.array([Efdr, Eleft, Eright, Efdrtheo, Eleft0, Eright0])
	for i,el in enumerate(Efdr):
		if pd.isnull(el):
			Efdr[i] = 1
	Efdr = pd.Series(Efdr, index=['Efdr', 'Eleft', 'Eright', 'Efdrtheo', 'Eleft0', 'Eright0'])
	if nulltype == 0:
		f1 = (1 - fdr0) * fall
	else:
		f1 = (1 - fdr) * fall
	if mult != None:
		try:
			mul = np.ones(len(mult) + 1)
			mul[1:] = mult
		except TypeError:
			mul = np.array([1, mult])
		EE = np.zeros(len(mul))
		for m in xrange(len(EE)):
			xe = np.sqrt(mul[m]) * x
			f1e = rf.approx(xe, f1, x, rule = 2, ties = 'mean')
			f1e = (f1e * sum(f1)) / sum(f1e)
			f0e = f0
			p0e = p0
			if nulltype == 0:
				f0e = f00
				p0e = p0theo
			fdre = (p0e * f0e) / (p0e * f0e + f1e)
			EE[m] = sum(f1e * fdre) / sum(f1e)
		EE = EE / EE[0]
		EE = pd.Series(EE, index=mult)
	Cov2_out = lf.loccov2(X, X0, i0, f, fp0.loc['cmest'], N)
	Cov0_out = lf.loccov2(X, np.ones((len(x), 1)), i0, f, fp0.loc['thest'], N)
	if sw == 3:
		if nulltype == 0:
			Ilfdr = Cov0_out['Ilfdr']
		elif nulltype == 1:
			Ilfdr = ml_out['Ilfdr']
		elif nulltype == 2:
			Ilfdr = Cov2_out['Ilfdr']
		else:
			raise InputError('When sw = 3, nulltype must be 0, 1, or 2.')
		return Ilfdr
	if nulltype == 0:
		Cov = Cov0_out['Cov']
	elif nulltype == 1:
		Cov = ml_out['Cov_lfdr']
	else:
		Cov = Cov2_out['Cov']
	lfdrse = np.sqrt(np.diag(Cov))
	fp0.loc['cmeSD'][0:3] = Cov2_out.loc['stdev'][[1,2,0]]
	if nulltype == 3:
		fp0.loc['cmeSD'][3] = fp0['cmeSD'][1]
	fp0.loc['theSD'][2] = Cov0_out['stdev'][0]
	if sw == 2:
		if nulltype == 0:
			pds = fp0.loc['thest'][[2, 0, 1]]
			stdev = fp0.loc['theSD'][[2, 0, 1]]
			pds_ = Cov0_out['pds_'].transpose()
		elif nulltype == 1:
			pds = fp0.loc['mlest'][[2, 0, 1]]
			stdev = fp0.loc['mleSD'][[2, 0, 1]]
			pds_ = ml_out['pds_'].transpose()
		elif nulltype == 2:
			pds = fp0.loc['cmest'][[2, 0, 1]]
			stdev = fp0.loc['cmeSD'][[2, 0, 1]]
			pds_ = Cov2_out['pds_'].transpose()
		else:
			raise InputError('When sw = 2, nulltype must equal 0, 1, or 2.')
		pds_ = pd.DataFrame(pds_, columns=['p0', 'delhat', 'sighat'])
		pds = pd.Series(pds, index=['p0', 'delhat', 'sighat'])
 		stdev = pd.Series(stdev, index=['sdp0', 'sddelhat', 'sdsighat'])
		return pd.Series({'pds': pds, 'x': x, 'f': f, 'pds_' : pds_, 'stdev' : stdev})
	p1 = np.arange(0.01, 1, 0.01)
	cdf1 = np.zeros((2,99))
	cdf1[0, :] = p1
	if nulltype == 0:
		fd = fdr0
	else:
		fd = fdr
	for i in xrange(99):
		cdf1[1, i] = sum([el for j,el in enumerate(f1) if fd[j] <= p1[i]])
	cdf1[1, :] = cdf1[1, :] / cdf1[1, -1]
	cdf1 = cdf1.transpose()
	if nulltype != 0:
		mat = pd.DataFrame(np.vstack((x, fdr, Fdrl, Fdrr, f, f0, f00, fdr0, yall, lfdrse, f1)), 
			index=['x', 'fdr', 'Fdrleft', 'Fdrright', 'f', 'f0', 'f0theo', 'fdrtheo', 'counts', 'lfdrse', 'p1f1'])
	else:
		mat = pd.DataFrame(np.vstack((x, fdr, Fdrl, Fdrr, f, f0, f00, fdr0, yall, lfdrse, f1)), 
			index=['x', 'fdr', 'Fdrltheo', 'Fdrrtheo', 'f', 'f0', 'f0theo', 'fdrtheo', 'counts', 'lfdrsetheo', 'p1f1'])
	z_2 = np.array([np.nan, np.nan])
	m = sorted([(i, el) for i, el in enumerate(fd)], key=lambda nn: nn[1])[-1][0]
	if fd[-1] < 0.2:
		z_2[1] = rf.approx(fd[m:], x[m:], 0.2, ties = 'mean')
	if fd[0] < 0.2:
		z_2[0] = rf.approx(fd[0:m], x[0:m], 0.2, ties = 'mean')
	if nulltype == 0:
		nulldens = p0theo * f00
	else:
		nulldens = p0 * f0
	yt = np.array([max(el, 0) for el in (yall * (1 - fd))])
	# construct plots
	if plot > 0:
		try:
			import matplotlib.pyplot as plt
			import matplotlib.patches as patches
			import matplotlib.path as path
		except ImportError:
			print 'matplotlib is required for plotting, but it was not found. Rerun with plot = 0 to turn off plots.'
			print 'locfdr-python was tested on matplotlib 1.3.0.'
			raise
		fig = plt.figure(figsize=(14, 8))
		if plot == 4:
			histplot = fig.add_subplot(131)
			fdrFdrplot = fig.add_subplot(132)
			f1cdfplot = fig.add_subplot(133)
		elif plot == 2 or plot == 3:
			histplot = fig.add_subplot(121)
			if plot == 2:
				fdrFdrplot = fig.add_subplot(122)
			else:
				f1cdfplot = fig.add_subplot(122)
		elif plot == 1:
			histplot = fig.add_subplot(111)
		# construct histogram
		leftplt = breaks[:-1]
		rightplt = breaks[1:]
		bottomplt = np.zeros(len(leftplt))
		topplt = bottomplt + y
		XYplt = np.array([[leftplt,leftplt,rightplt,rightplt], [bottomplt,topplt,topplt,bottomplt]]).transpose()
		barpath = path.Path.make_compound_path_from_polys(XYplt)
		patch = patches.PathPatch(barpath, facecolor='white', edgecolor='#302f2f')
		histplot.add_patch(patch)
		histplot.set_xlim(leftplt[0], rightplt[-1])
		histplot.set_ylim(-1.5, (topplt.max()+1.5) * 0.1 + topplt.max())
		histplot.set_title(main)
		for k in xrange(K):
			histplot.plot([x[k], x[k]], [0, yt[k]], color='#e31d76', linewidth = 2)
		if nulltype == 3:
			histplot.set_xlabel('delta = ' + str(round(xmax, 3)) + ', sigleft = ' + str(round(sigs[0], 3))  
				+ ', sigright = ' + str(round(sigs[1], 3)) + ', p0 = ' + str(round(fp0.loc['cmest'][2], 3)))
		if nulltype == 1 or nulltype == 2:
			histplot.set_xlabel('MLE: delta = ' + str(round(mlests[0], 3)) + ', sigma = ' + str(round(mlests[1], 3))  
				+ ', p0 = ' + str(round(mlests[2], 3)) + '\nCME: delta = ' + str(round(fp0.loc['cmest'][0], 3)) 
				+  ', sigma = ' + str(round(fp0.loc['cmest'][1], 3)) + ', p0 = ' + str(round(fp0.loc['cmest'][2], 3)))
		histplot.set_ylabel('Frequency')
		histplot.plot(x, f, color='#3bbf53', linewidth = 3)
		if nulltype == 0:
			histplot.plot(x, p0theo * f00, linewidth = 3, linestyle = 'dashed', color = 'blue')
		else:
			histplot.plot(x, p0 * f0, linewidth = 3, linestyle = 'dashed', color = 'blue')
		if not pd.isnull(z_2[1]): 
			histplot.plot([z_2[1]], [-0.5], marker = '^', markersize = 16, markeredgecolor = 'red', markeredgewidth = 1.3, color = 'yellow')
		if not pd.isnull(z_2[0]): 
			histplot.plot([z_2[0]], [-0.5], marker = '^', markersize = 16, markeredgecolor = 'red', markeredgewidth = 1.3, color = 'yellow')
		if nulltype == 1 or nulltype == 2:
			Ef = Efdr[0]
		elif nulltype == 0: 
			Ef = Efdr[3]
		# construct fdr + Fdr plot
		if plot == 2 or plot == 4:
			if nulltype == 0:
				fdd = fdr0
			else:
				fdd = fdr
			fdrFdrplot.plot(x, fdd, linewidth = 3, color = 'black')
			fdrFdrplot.plot(x, Fdrl, linewidth = 3, color = 'red', linestyle = 'dashed')
			fdrFdrplot.plot(x, Fdrr, linewidth = 3, color = 'green', linestyle = 'dashed')
			fdrFdrplot.set_ylim(-0.05, 1.1)
			fdrFdrplot.set_title('fdr (solid); Fdr\'s (dashed)')
			fdrFdrplot.set_xlabel('Efdr = ' + str(round(Ef, 3)))
			fdrFdrplot.set_ylabel('fdd (black), Fdrl (red), and Fdrr (green)')
			fdrFdrplot.plot([0, 0], [0, 1], linestyle = 'dotted', color = 'red')
			fdrFdrplot.axhline(linestyle = 'dotted', color = 'red')
		# construct plot of f1 cdf of estimated fdr curve
		if plot == 3 or plot == 4:
			if sum([1 for el in cdf1[:, 1] if pd.isnull(el)]) == cdf1.shape[0]:
				wa.warning('cdf1 is not available.')
			else:
				f1cdfplot.plot(cdf1[:, 0], cdf1[:, 1], linewidth = 3, color = 'black')
				f1cdfplot.set_xlabel('fdr level\nEfdr = ' + str(round(Ef, 3)))
				f1cdfplot.set_ylabel('f1 proportion < fdr level')
				f1cdfplot.set_title('f1 cdf of estimated fdr')
				f1cdfplot.set_ylim(0, 1)
				f1cdfplot.plot([0.2, 0.2], [0, cdf1[19, 1]], color = 'blue', linestyle = 'dashed')
				f1cdfplot.plot([0, 0.2], [cdf1[19, 1], cdf1[19, 1]], color = 'blue', linestyle = 'dashed')
				f1cdfplot.text(0.05, cdf1[19, 1], str(round(cdf1[19, 1], 2)))
		if saveplot:
			if savestamp:
				import time, datetime
				plt.savefig(saveroot + '_' + '-'.join(str(el) for el in list(tuple(datetime.datetime.now().timetuple())[:6])) + '.' + saveext)
			else:
				plt.savefig(saveroot + '.' + saveext)
		if showplot:
			plt.show()
	if nulltype == 0:
		ffdr = rf.approx(x, fdr0, zz, rule = 2, ties = 'ordered')
	else:
		ffdr = rf.approx(x, fdr, zz, rule = 2, ties = 'ordered')
	if mult != None:
		return {'fdr' : ffdr, 'fp0' : fp0, 'Efdr' : Efdr, 'cdf1' : cdf1, 'mat' : mat, 'z_2' : z_2, 'yt' : yt, 'call' : call, 'x' : x, 'mlest_lo' : mlest_lo, 'mlest_hi' : mlest_hi, 'needsfix' : needsfix, 'nulldens' : nulldens, 'fulldens' : fulldens, 'mult' : EE}
	return {'fdr' : ffdr, 'fp0' : fp0, 'Efdr' : Efdr, 'cdf1' : cdf1, 'mat' : mat, 'z_2' : z_2, 'yt' : yt, 'call' : call, 'x' : x, 'mlest_lo' : mlest_lo, 'mlest_hi' : mlest_hi, 'needsfix' : needsfix, 'nulldens' : nulldens, 'fulldens' : fulldens}
Ejemplo n.º 46
0
 def _get_segment(cls, user, date):
     if in_transaction():
         warnings.warning('Inside a transaction: may cause performance issues.',
                       RuntimeWarning, stacklevel=3)
     return cls.get_segment(user, date)
Ejemplo n.º 47
0
 def setdata(self, x, y, sigmay=None, xmin=None, xmax=None):
     warnings.warning("Method renamed to setData",
                      DeprecationWarning)
     self.setData(x, y, sigmay, xmin, xmax)
Ejemplo n.º 48
0
    def __init__(self, loader, groups=None, filename=C.DEFAULT_HOST_LIST):
        if groups is None:
            groups = dict()

        self.names = os.listdir(filename)
        self.names.sort()
        self.directory = filename
        self.parsers = []
        self.hosts = {}
        self.groups = groups

        self._loader = loader

        for i in self.names:

            # Skip files that end with certain extensions or characters
            if any(i.endswith(ext) for ext in C.DEFAULT_INVENTORY_IGNORE):
                continue
            # Skip hidden files
            if i.startswith('.') and not i.startswith('./'):
                continue
            # These are things inside of an inventory basedir
            if i in ("host_vars", "group_vars", "vars_plugins"):
                continue
            fullpath = os.path.join(self.directory, i)
            if os.path.isdir(fullpath):
                parser = InventoryDirectory(loader=loader, groups=groups, filename=fullpath)
            else:
                parser = get_file_parser(fullpath, self.groups, loader)
                if parser is None:
                    #FIXME: needs to use display
                    import warnings
                    warnings.warning("Could not find parser for %s, skipping" % fullpath)
                    continue

            self.parsers.append(parser)

            # retrieve all groups and hosts form the parser and add them to
            # self, don't look at group lists yet, to avoid
            # recursion trouble, but just make sure all objects exist in self
            newgroups = parser.groups.values()
            for group in newgroups:
                for host in group.hosts:
                    self._add_host(host)
            for group in newgroups:
                self._add_group(group)

            # now check the objects lists so they contain only objects from
            # self; membership data in groups is already fine (except all &
            # ungrouped, see later), but might still reference objects not in self
            for group in self.groups.values():
                # iterate on a copy of the lists, as those lists get changed in
                # the loop
                # list with group's child group objects:
                for child in group.child_groups[:]:
                    if child != self.groups[child.name]:
                        group.child_groups.remove(child)
                        group.child_groups.append(self.groups[child.name])
                # list with group's parent group objects:
                for parent in group.parent_groups[:]:
                    if parent != self.groups[parent.name]:
                        group.parent_groups.remove(parent)
                        group.parent_groups.append(self.groups[parent.name])
                # list with group's host objects:
                for host in group.hosts[:]:
                    if host != self.hosts[host.name]:
                        group.hosts.remove(host)
                        group.hosts.append(self.hosts[host.name])
                    # also check here that the group that contains host, is
                    # also contained in the host's group list
                    if group not in self.hosts[host.name].groups:
                        self.hosts[host.name].groups.append(group)

        # extra checks on special groups all and ungrouped
        # remove hosts from 'ungrouped' if they became member of other groups
        if 'ungrouped' in self.groups:
            ungrouped = self.groups['ungrouped']
            # loop on a copy of ungrouped hosts, as we want to change that list
            for host in frozenset(ungrouped.hosts):
                if len(host.groups) > 1:
                    host.groups.remove(ungrouped)
                    ungrouped.hosts.remove(host)

        # remove hosts from 'all' if they became member of other groups
        # all should only contain direct children, not grandchildren
        # direct children should have dept == 1
        if 'all' in self.groups:
            allgroup = self.groups['all' ]
            # loop on a copy of all's  child groups, as we want to change that list
            for group in allgroup.child_groups[:]:
                # groups might once have beeen added to all, and later be added
                # to another group: we need to remove the link wit all then
                if len(group.parent_groups) > 1 and allgroup in group.parent_groups:
                    # real children of all have just 1 parent, all
                    # this one has more, so not a direct child of all anymore
                    group.parent_groups.remove(allgroup)
                    allgroup.child_groups.remove(group)
                elif allgroup not in group.parent_groups:
                    # this group was once added to all, but doesn't list it as
                    # a parent any more; the info in the group is the correct
                    # info
                    allgroup.child_groups.remove(group)
Ejemplo n.º 49
0
def svg_shapes_to_df(svg_source, xpath='//svg:path | //svg:polygon',
                     namespaces=INKSCAPE_NSMAP):
    '''
    Construct a data frame with one row per vertex for all shapes in
    :data:`svg_source``.

    Arguments
    ---------
    svg_source : str or file-like
        A file path, URI, or file-like object.
    xpath : str, optional
        XPath path expression to select shape nodes.

        By default, all ``svg:path`` and ``svg:polygon`` elements are selected.
    namespaces : dict, optional
        Key/value mapping of XML namespaces.

    Returns
    -------
    pandas.DataFrame
        Frame with one row per vertex for all shapes in :data:`svg_source`,
        with the following columns:
         - ``vertex_i``: The index of the vertex within the corresponding
           shape.
         - ``x``: The x-coordinate of the vertex.
         - ``y``: The y-coordinate of the vertex.
         - other: attributes of the SVG shape element (e.g., ``id``, ``fill``,
            etc.)
    '''
    from lxml import etree

    e_root = etree.parse(svg_source)
    frames = []
    attribs_set = set()

    # Get list of attributes that are set in any of the shapes (not including
    # the `svg:path` `"d"` attribute or the `svg:polygon` `"points"`
    # attribute).
    #
    # This, for example, collects attributes such as:
    #
    #  - `fill`, `stroke` (as part of `"style"` attribute)
    #  - `"transform"`: matrix, scale, etc.
    for shape_i in e_root.xpath(xpath, namespaces=namespaces):
        attribs_set.update(shape_i.attrib.keys())

    for k in ('d', 'points'):
        if k in attribs_set:
            attribs_set.remove(k)

    attribs = list(sorted(attribs_set))

    # Always add 'id' attribute as first attribute.
    if 'id' in attribs:
        attribs.remove('id')
    attribs.insert(0, 'id')

    for shape_i in e_root.xpath(xpath, namespaces=namespaces):
        # Gather shape attributes from SVG element.
        base_fields = [shape_i.attrib.get(k, None) for k in attribs]

        if shape_i.tag == '{http://www.w3.org/2000/svg}path':
            # Decode `svg:path` vertices from [`"d"`][1] attribute.
            #
            # [1]: https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/d
            points_i = [base_fields + [i] +
                        map(float, [m.group(v) for v in 'xy'])
                        for i, m in enumerate(cre_path_command
                                              .finditer(shape_i.attrib['d']))]
        elif shape_i.tag == '{http://www.w3.org/2000/svg}polygon':
            # Decode `svg:polygon` vertices from [`"points"`][2] attribute.
            #
            # [2]: https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/points
            points_i = [base_fields + [i] + map(float, v.split(','))
                        for i, v in enumerate(shape_i.attrib['points']
                                              .strip().split(' '))]
        else:
            warnings.warning('Unsupported shape tag type: %s' % shape_i.tag)
            continue
        frames.extend(points_i)
    if not frames:
        # There were no shapes found, so set `frames` list to `None` to allow
        # an empty data frame to be created.
        frames = None
    return pd.DataFrame(frames, columns=attribs + ['vertex_i', 'x', 'y'])
Ejemplo n.º 50
0
    def factorize(self, mode='standard', **kwargs):
        """Factorize terms in the expression. For example: ::

            A[i]*B[j] + A[i]*C[j]

        becomes ::

            A[i]*(B[j] + C[j]).

        :param mode: multiple factorization strategies are possible, each exposing
                     different, "hidden" opportunities for code motion.

            * mode == 'standard': factorize symbols along the dimension that appears
                most often in the expression.
            * mode == 'all': factorize symbols depending on at least one of the
                expression's dimensions.
            * mode == 'domain': factorize symbols depending on the expression's domain.
            * mode == 'outdomain': factorize symbols independent of the expression's
                domain.
            * mode == 'constants': factorize symbols independent of any loops enclosing
                the expression.
        """

        if mode == 'standard':
            retval = FindInstances.default_retval()
            symbols = FindInstances(Symbol).visit(self.stmt.rvalue, ret=retval)[Symbol]
            # The heuristics privileges domain dimensions
            dims = self.expr_info.out_domain_dims
            if not dims or self.expr_info.dimension >= 2:
                dims = self.expr_info.domain_dims
            # Get the dimension occurring most often
            occurrences = [tuple(r for r in s.rank if r in dims) for s in symbols]
            occurrences = [i for i in occurrences if i]
            if not occurrences:
                return self
            # Finally, establish the factorization dimension
            dimension = Counter(occurrences).most_common(1)[0][0]
            should_factorize = lambda n: set(dimension).issubset(set(n.rank))
        elif mode in ['all', 'domain', 'outdomain', 'constants']:
            info = visit(self.expr_info.outermost_loop, info_items=['symbols_dep'])
            symbols = defaultdict(set)
            for s, dep in info['symbols_dep'].items():
                symbols[s.symbol] |= {l.dim for l in dep}
            if mode == 'all':
                should_factorize = lambda n: symbols.get(n.symbol) and \
                    any(r in self.expr_info.dims for r in symbols[n.symbol])
            elif mode == 'domain':
                should_factorize = lambda n: symbols.get(n.symbol) and \
                    any(r in self.expr_info.domain_dims for r in symbols[n.symbol])
            elif mode == 'outdomain':
                should_factorize = lambda n: symbols.get(n.symbol) and \
                    not symbols[n.symbol].issubset(set(self.expr_info.domain_dims))
            elif mode == 'constants':
                should_factorize = lambda n: not symbols.get(n.symbol)
        else:
            warning('Unknown factorization strategy. Skipping.')
            return

        # Perform the factorization
        self.expr_factorizer.factorize(should_factorize)
        return self
Ejemplo n.º 51
0
    def fit_model(self, tol=1e-3, iter_max=100, h_step=2.0, epsil_0=10,
                  constant=True, verbose=True):
        '''
        '''
        # Fit a normal linear model to the data
        if constant:
            x_const = sm.add_constant(self.x)
            model = sm.OLS(self.y, x_const)
        else:
            model = sm.OLS(self.y, self.x)
        init_lm = model.fit()

        if verbose:
            print init_lm.summary()

        epsil = epsil_0

        # Before we get into the loop, make sure that this was a bad fit
        if epsil_0 < tol:
            warnings.warning('Initial epsilon is smaller than tolerance. \
                             The tolerance should be set smaller.')
            return init_lm

        # Sum of residuals
        dev_0 = np.sum(init_lm.resid**2.)

        # Count
        it = 0
        h_it = 0

        # Now loop through and minimize the residuals by changing where the
        # breaking point is.
        while np.abs(epsil) > tol:
            U = (self.x - self.brk) * (self.x > self.brk)
            V = deriv_max(self.x, self.brk)

            X_all = np.vstack([self.x, U, V]).T
            if constant:
                X_all = sm.add_constant(X_all)

            model = sm.OLS(self.y, X_all)
            fit = model.fit()

            beta = fit.params[2]  # Get coef
            gamma = fit.params[3]  # Get coef

            # Adjust the break point
            new_brk = copy(self.brk)
            new_brk += (h_step * gamma) / beta

            # If the new break point is outside of the allowed range, reset
            # the step size to half of the original, then try stepping again
            if not (self.x > new_brk).any():
                while True:
                    h_step /= 2.0
                    new_brk += (h_step * gamma) / beta
                    h_it += 1
                    if (self.x > new_brk).any():
                        self.brk = new_brk
                        break
                    if h_it >= 5:
                        raise ValueError("Cannot find suitable step size. \
                                          Check number of breaks.")
            else:
                self.brk = new_brk

            dev_1 = np.sum(fit.resid**2.)

            epsil = (dev_1 - dev_0) / (dev_0 + 1e-3)

            dev_0 = dev_1

            if verbose:
                print "Iteration: %s/%s" % (it+1, iter_max)
                print fit.summary()
                print "Break Point: " + str(self.brk)
                print "Epsilon: " + str(epsil)

            it += 1

            if it > iter_max:
                warnings.warning("Max iterations reached. \
                                 Result may not be minimized.")
                break

        # With the break point hopefully found, do a final good fit
        U = (self.x - self.brk) * (self.x > self.brk)
        V = deriv_max(self.x, self.brk)

        X_all = np.vstack([self.x, U, V]).T
        X_all = sm.add_constant(X_all)

        model = sm.OLS(self.y, X_all)
        self.fit = model.fit()
        self._params = self.fit.params
        cov_matrix = self.fit.cov_params()
        self._errs = np.asarray([np.sqrt(cov_matrix[i, i])
                                 for i in range(cov_matrix.shape[0])])

        self.brk_err = brk_errs(fit.params, fit.cov_params())

        self.get_slopes()

        return self
Ejemplo n.º 52
0
def wcs_from_footprints(dmodels, refmodel=None, transform=None, bounding_box=None, domain=None):
    """
    Create a WCS from a list of input data models.

    A fiducial point in the output coordinate frame is created from  the
    footprints of all WCS objects. For a spatial frame this is the center
    of the union of the footprints. For a spectral frame the fiducial is in
    the beginning of the footprint range.
    If ``refmodel`` is None, the first WCS object in the list is considered
    a reference. The output coordinate frame and projection (for celestial frames)
    is taken from ``refmodel``.
    If ``transform`` is not suplied, a compound transform is created using
    CDELTs and PC.
    If ``bounding_box`` is not supplied, the bounding_box of the new WCS is computed
    from bounding_box of all input WCSs.

    Parameters
    ----------
    dmodels : list of `~jwst.datamodels.DataModel`
        A list of data models.
    refmodel : `~jwst.datamodels.DataModel`, optional
        This model's WCS is used as a reference.
        WCS. The output coordinate frame, the projection and a
        scaling and rotation transform is created from it. If not supplied
        the first model in the list is used as ``refmodel``.
    transform : `~astropy.modeling.core.Model`, optional
        A transform, passed to :meth:`~gwcs.wcstools.wcs_from_fiducial`
        If not supplied Scaling | Rotation is computed from ``refmodel``.
    bounding_box : tuple, optional
        Bounding_box of the new WCS.
        If not supplied it is computed from the bounding_box of all inputs.
    """
    if domain is not None:
        warnings.warning("'domain' was deprecated in 0.8 and will be removed from next"
                         "version. Use 'bounding_box' instead.")
        bb = _domain_to_bounding_box(domain)
    else:
        bb = bounding_box
    wcslist = [im.meta.wcs for im in dmodels]
    if not isiterable(wcslist):
        raise ValueError("Expected 'wcslist' to be an iterable of WCS objects.")
    if not all([isinstance(w, WCS) for w in wcslist]):
        raise TypeError("All items in wcslist are to be instances of gwcs.WCS.")
    if refmodel is None:
        refmodel = dmodels[0]
    else:
        if not isinstance(refmodel, DataModel):
            raise TypeError("Expected refmodel to be an instance of DataModel.")

    fiducial = compute_fiducial(wcslist, bb)

    prj = astmodels.Pix2Sky_TAN()

    if transform is None:
        transform = []
        wcsinfo = pointing.wcsinfo_from_model(refmodel)
        sky_axes, spec, other = gwutils.get_axes(wcsinfo)
        rotation = astmodels.AffineTransformation2D(wcsinfo['PC'])
        transform.append(rotation)
        if sky_axes:
            cdelt1, cdelt2 = wcsinfo['CDELT'][sky_axes]
            scale = np.sqrt(np.abs(cdelt1 * cdelt2))
            scales = astmodels.Scale(scale) & astmodels.Scale(scale)
            transform.append(scales)

        if transform:
            transform = functools.reduce(lambda x, y: x | y, transform)

    out_frame = refmodel.meta.wcs.output_frame
    wnew = wcs_from_fiducial(fiducial, coordinate_frame=out_frame,
                             projection=prj, transform=transform)

    footprints = [w.footprint().T for w in wcslist]
    domain_bounds = np.hstack([wnew.backward_transform(*f) for f in footprints])
    for axs in domain_bounds:
        axs -= axs.min()
    bounding_box = []
    for axis in out_frame.axes_order:
        axis_min, axis_max = domain_bounds[axis].min(), domain_bounds[axis].max()
        bounding_box.append((axis_min, axis_max))
    bounding_box = tuple(bounding_box)
    ax1, ax2 = np.array(bounding_box)[sky_axes]
    offset1 = (ax1[1] - ax1[0]) / 2
    offset2 = (ax2[1] - ax2[0]) / 2
    offsets = astmodels.Shift(-offset1) & astmodels.Shift(-offset2)

    wnew.insert_transform('detector', offsets, after=True)
    wnew.bounding_box = bounding_box
    return wnew
Ejemplo n.º 53
0
 def warn(self):
     see_above = self
     # the next line is what the user will see after the error is printed
     warning (see_above, SymPyDeprecationWarning)
Ejemplo n.º 54
0
def deprecation(message):
    warnings.warning("<" + message + "> is deprecated!", DeprecationWarning, stacklevel=2)
Ejemplo n.º 55
0
def average_precision(y_true, y_pred, integration='trapz',
                      argsort_kind='quicksort'):
    """Computes the Average Precision (AP) from the recall and precision
    arrays. Different 'integration' methods can be used.

    Parameters
    ----------
    y_true: array, shape = [n_samples]
        True values, interpreted as strictly positive or not
        (i.e. converted to binary).
        Could be in {-1, +1} or {0, 1} or {False, True}.

    y_pred: array, shape = [n_samples]
        Predicted values.

    integration: str, optional
        Type of 'integration' method used to compute the average precision:
            'trapz': trapezoidal rule (default)
            'voc2010': see http://goo.gl/glxdO and http://goo.gl/ueXzr
            'voc2007': see http://goo.gl/E1YyY

    argsort_kind: str
        Sorting algorithm.

    Returns
    -------
    ap: float
        Average Precision

    Note
    ----
    'voc2007' method is here only for legacy purposes. We do not recommend
    its use since even simple trivial cases like a perfect match between
    true values and predicted values do not lead to an average precision of 1.
    """

    # -- basic checks and conversion
    assert len(y_true) == len(y_pred)
    assert np.isfinite(y_true).all()
    assert np.isfinite(y_pred).all()
    assert integration in ['trapz', 'voc2010', 'voc2007']

    y_true = np.array(y_true, dtype=DTYPE)
    assert y_true.ndim == 1

    y_pred = np.array(y_pred, dtype=DTYPE)
    assert y_pred.ndim == 1

    n_uniques = np.unique(y_pred)
    if n_uniques.size == 1:
        raise ValueError('Rank of predicted values is ill-defined'
                         ' because all elements are equal')
    elif n_uniques.size < y_pred.size:
        warning('some predicted elements have exactly the same value.'
                ' output will most probably depend on the sorting'
                ' method used. Here "%s"' % argsort_kind, UserWarning)

    # -- actual computation
    rec = recall(y_true, y_pred, argsort_kind=argsort_kind)
    prec = precision(y_true, y_pred, argsort_kind=argsort_kind)

    if integration == 'trapz':
        if rec[0] != 0.:
            rec = np.concatenate(([0.], rec))
            prec = np.concatenate(([prec[0]], prec))
        ap = trapz(prec, rec)

    elif integration == 'voc2010':
        mrec = np.concatenate(([0.], rec, [1.]))
        mpre = np.concatenate(([0.], prec, [0.]))
        rng = np.arange(len(mpre) - 1)[::-1]
        for i in rng:
            mpre[i] = max(mpre[i], mpre[i + 1])
        sel = np.nonzero(mrec[1:] != mrec[0:-1])[0] + 1
        ap = ((mrec[sel] - mrec[sel - 1]) * mpre[sel]).sum()
        if np.isnan(ap):
            ap = 0.

    elif integration == 'voc2007':
        ap = 0.
        rng = np.arange(0, 1.1, .1)
        for th in rng:
            p = prec[rec >= th]
            if len(p) > 0:
                ap += p.max() / rng.size

    return ap
Ejemplo n.º 56
0
    def licm(self, **kwargs):
        """Perform generalized loop-invariant code motion."""
        if not self._check_loops(self.expr_info.loops):
            warning("Loop nest unsuitable for generalized licm. Skipping.")
            return

        symbols = visit(self.header, info_items=['symbols_dep'])['symbols_dep']
        symbols = dict((s, [l.dim for l in dep]) for s, dep in symbols.items())

        extracted = True
        expr_dims_loops = self.expr_info.loops_from_dims
        expr_outermost_loop = self.expr_info.outermost_loop
        inv_dep = {}
        while extracted:
            extracted = self._extract(self.stmt.rvalue, symbols, **kwargs)
            for dep, subexprs in extracted.items():
                # -1) Remove identical subexpressions
                subexprs = uniquify(subexprs)

                # 0) Determine the loop nest level where invariant expressions
                # should be hoisted. The goal is to hoist them as far as possible
                # in the loop nest, while minimising temporary storage.
                # We distinguish six hoisting cases:
                if len(dep) == 0:
                    # As scalar (/wrap_loop=None/), outside of the loop nest;
                    place = self.header
                    wrap_loop = ()
                    next_loop = expr_outermost_loop
                elif len(dep) == 1 and is_perfect_loop(expr_outermost_loop):
                    # As scalar, outside of the loop nest;
                    place = self.header
                    wrap_loop = (expr_dims_loops[dep[0]],)
                    next_loop = expr_outermost_loop
                elif len(dep) == 1 and len(expr_dims_loops) > 1:
                    # As scalar, within the loop imposing the dependency
                    place = expr_dims_loops[dep[0]].children[0]
                    wrap_loop = ()
                    next_loop = od_find_next(expr_dims_loops, dep[0])
                elif len(dep) == 1:
                    # As scalar, right before the expression (which is enclosed
                    # in just a single loop, we can claim at this point)
                    place = expr_dims_loops[dep[0]].children[0]
                    wrap_loop = ()
                    next_loop = place.children[place.children.index(self.stmt)]
                elif set(dep).issuperset(set(self.expr_info.domain_dims)) and \
                        not any([self.expr_graph.is_written(e) for e in subexprs]):
                    # As n-dimensional vector, where /n == len(dep)/, outside of
                    # the loop nest
                    place = self.header
                    wrap_loop = tuple(expr_dims_loops.values())
                    next_loop = expr_outermost_loop
                else:
                    # As vector, within the outermost loop imposing the dependency
                    place = expr_dims_loops[dep[0]].children[0]
                    wrap_loop = tuple(expr_dims_loops[dep[i]] for i in range(1, len(dep)))
                    next_loop = od_find_next(expr_dims_loops, dep[0])

                # 1) Create the new invariant temporary symbols
                loop_size = tuple([l.size for l in wrap_loop])
                loop_dim = tuple([l.dim for l in wrap_loop])
                inv_syms = [Symbol(self._hoisted_sym % {
                    'loop_dep': '_'.join(dep).upper() if dep else 'CONST',
                    'expr_id': self.expr_id,
                    'round': self.counter,
                    'i': i
                }, loop_size) for i in range(len(subexprs))]
                inv_decls = [Decl(self.expr_info.type, s) for s in inv_syms]
                inv_syms = [Symbol(s.symbol, loop_dim) for s in inv_syms]

                # 2) Keep track of new declarations for later easy access
                for d in inv_decls:
                    d.scope = LOCAL
                    self.decls[d.sym.symbol] = d

                # 3) Replace invariant subtrees with the proper temporary
                to_replace = dict(zip(subexprs, inv_syms))
                n_replaced = ast_replace(self.stmt.rvalue, to_replace)

                # 4) Update symbol dependencies
                for s, e in zip(inv_syms, subexprs):
                    self.expr_graph.add_dependency(s, e)
                    if n_replaced[str(s)] > 1:
                        self.expr_graph.add_dependency(s, s)
                    symbols[s] = dep

                # 5) Create the body containing invariant statements
                subexprs = [dcopy(e) for e in subexprs]
                inv_stmts = [Assign(s, e) for s, e in zip(dcopy(inv_syms), subexprs)]

                # 6) Track necessary information for AST construction
                inv_info = (loop_dim, place, next_loop, wrap_loop)
                if inv_info not in inv_dep:
                    inv_dep[inv_info] = (inv_decls, inv_stmts)
                else:
                    inv_dep[inv_info][0].extend(inv_decls)
                    inv_dep[inv_info][1].extend(inv_stmts)

        for inv_info, (inv_decls, inv_stmts) in sorted(inv_dep.items()):
            loop_dim, place, next_loop, wrap_loop = inv_info
            # Create the hoisted code
            if wrap_loop:
                outer_wrap_loop = ast_make_for(inv_stmts, wrap_loop[-1])
                for l in reversed(wrap_loop[:-1]):
                    outer_wrap_loop = ast_make_for([outer_wrap_loop], l)
                code = inv_decls + [outer_wrap_loop]
                wrap_loop = outer_wrap_loop
            else:
                code = inv_decls + inv_stmts
                wrap_loop = None
            # Insert the new nodes at the right level in the loop nest
            ofs = place.children.index(next_loop)
            place.children[ofs:ofs] = code + [FlatBlock("\n")]
            # Track hoisted symbols
            for i, j in zip(inv_stmts, inv_decls):
                self.hoisted[j.sym.symbol] = (i, j, wrap_loop, place)

        # Finally, make sure symbols are unique in the AST
        self.stmt.rvalue = dcopy(self.stmt.rvalue)
Ejemplo n.º 57
0
def train_model(args, seed, proxy, pred):
    trained_model = Training()
    if args.train:
        '''
        The program is essentially run in one of two mutually exclusive modes
        (training or test)
        :param train if True, being parsing and training model file
        '''
        verbose_print(args.verbose, "Training model")
        if args.datain:
            warnings.warning("WARNING: The pickle datatype is inherently\
                             insecure. A quick question: do you trust the\
                             source of your model? Pickle files can contain\
                             corrupt code and executable commands.\
                             They can take over your computer and install\
                             malicious code on your computer or server. Use\
                             caution! Your best bet is to train your own\
                             models and run those! Use --datain at your own\
                             risk")
            continue_program = raw_input("Press [Y/y] if you want to continue")
            if continue_program in ['Y', 'y']:
                trained_model = existing_training_model(args, seed)
            else:
                exit()
        else:
            distance = False
            training_data = False
            verbose_print(args.verbose, "Reading training set")
            (user, experimental, chemofeatures, fingerprint) = check_features(args)
            if (args.distance is True) or (args.cluster is True) or (args.impute is True):
                '''These functions all require a distance matrix, which is best
                collected using the fingerprint data'''
                fingerprint = True
            training = rt.Read(args.input, pred, user=user, id_name=_id,
                               weights=args.weight)
            '''This block of code generally works on feature collection and
            parsing, including the removal of fully redundant features. The
            difference between remove_static=True and False is whether or not
            to get rid of fully redundant features. Since the distance matrix
            is the same, regardless, it is run using original data'''
            training_data = add_pubchem_features(training, args, user=user,
                                                 proxy=proxy,
                                                 fingerprint=fingerprint,
                                                 experimental=experimental,
                                                 chemofeatures=chemofeatures,
                                                 id_name=_id, chunks=_chunks)
            if (args.cluster is True) or (args.distance is True) or (args.impute is True):
                verbose_print(args.verbose, "Creating distance matrix")
                '''Collect distance matrix using the original dataset'''
                distance = collect_distance_matrix(training_data)
            '''Extract features from the user and PubChem data'''
            verbose_print(args.verbose, "Extracting features")
            training_data = extract_features(training_data, args, user=user,
                                             fingerprint=fingerprint,
                                             experimental=experimental,
                                             chemofeatures=chemofeatures,
                                             remove_static=True)
            '''Discretize the y-values for the the classification process.
            If no split value is provided then the default for the program
            is to break the value at the median
            '''
            if training_data.compound:
                train = bt.Process(training_data, split_value=args.split_value,
                                   verbose=args.verbose)
                if args.impute is True:
                    train.impute_values(distance=distance,
                                        verbose=args.verbose)
                if args.selection is True:
                    train.feature_selection(verbose=args.verbose,
                                            seed=args.random)
                '''If dataout parameter is set, it prints to pickle a file
                containing the features that were extracted. In later runs
                this can be specified as the data input using the datain
                parameter
                '''
                if args.dataout:
                    features_file = args.dataout + ".features"
                    with open(features_file, 'wb') as fid:
                        pickle.dump(train, fid)
                '''This is where the model is actually trained in the tm module'''
                model = tm.Train(train)
                model.train_model()
                trained_model.model = model
                '''If dataout parameter is set, it prints to pickle a file
                containing the RF model. In later runs this can be specified
                as the data input using the datain parameter
                '''
                if args.dataout:
                    model_file = args.dataout + ".model"
                    with open(model_file, 'wb') as fid:
                        pickle.dump(model, fid)
                if args.cv:
                    report_model_validation(model, args)
                if args.cluster:
                    cluster = cl.Clustering(training_data.compound, seed=args.random)
                    cluster.cluster_training(model)
                    trained_model.cluster = cluster
                    if args.dataout:
                        cluster_file = args.dataout + ".cluster"
                        with open(cluster_file, 'wb') as fid:
                            pickle.dump(cluster, fid)
    else:
        trained_model = False
    return trained_model
Ejemplo n.º 58
0
        def _generate_cpu_code(self, kernel, **kwargs):
            """Generate kernel code according to the various optimization options."""

            rewrite = kwargs.get('rewrite')
            vectorize = kwargs.get('vectorize')
            v_type, v_param = vectorize if vectorize else (None, None)
            align_pad = kwargs.get('align_pad')
            split = kwargs.get('split')
            toblas = kwargs.get('blas')
            unroll = kwargs.get('unroll')
            precompute = kwargs.get('precompute')
            dead_ops_elimination = kwargs.get('dead_ops_elimination')

            info = visit(kernel)
            decls = info['decls']
            # Structure up expressions and related metadata
            nests = defaultdict(OrderedDict)
            for stmt, expr_info in info['exprs'].items():
                parent, nest, domain = expr_info
                if not nest:
                    continue
                metaexpr = MetaExpr(check_type(stmt, decls), parent, nest, domain)
                nests[nest[0]].update({stmt: metaexpr})
            loop_opts = [CPULoopOptimizer(loop, header, decls, exprs)
                         for (loop, header), exprs in nests.items()]

            # Combining certain optimizations is meaningless/forbidden.
            if unroll and toblas:
                raise RuntimeError("BLAS forbidden with unrolling")
            if dead_ops_elimination and split:
                raise RuntimeError("Split forbidden with zero-valued blocks avoidance")
            if dead_ops_elimination and toblas:
                raise RuntimeError("BLAS forbidden with zero-valued blocks avoidance")
            if dead_ops_elimination and v_type and v_type != VectStrategy.AUTO:
                raise RuntimeError("SIMDization forbidden with zero-valued blocks avoidance")
            if unroll and v_type and v_type != VectStrategy.AUTO:
                raise RuntimeError("SIMDization forbidden with unrolling")
            if rewrite == 'auto' and len(info['exprs']) > 1:
                warning("Rewrite mode=auto forbidden with multiple expressions")
                warning("Switching to rewrite mode=2")
                rewrite = 2

            ### Optimization pipeline ###
            for loop_opt in loop_opts:
                # 0) Expression Rewriting
                if rewrite:
                    loop_opt.rewrite(rewrite)

                # 1) Dead-operations elimination
                if dead_ops_elimination:
                    loop_opt.eliminate_zeros()

                # 2) Splitting
                if split:
                    loop_opt.split(split)

                # 3) Precomputation
                if precompute:
                    loop_opt.precompute(precompute)

                # 4) Unroll/Unroll-and-jam
                if unroll:
                    loop_opt.unroll(dict(unroll))

                # 5) Vectorization
                if initialized and flatten(loop_opt.expr_domain_loops):
                    vect = LoopVectorizer(loop_opt)
                    if align_pad and not toblas:
                        # Padding and data alignment
                        vect.pad_and_align()
                    if v_type and v_type != VectStrategy.AUTO:
                        if isa['inst_set'] == 'SSE':
                            raise RuntimeError("SSE vectorization not supported")
                        # Specialize vectorization for the memory access pattern
                        # of the expression
                        vect.specialize(v_type, v_param)

                # 6) Conversion into blas calls
                if toblas:
                    self.blas = loop_opt.blas(toblas)

            # Ensure kernel is always marked static inline
            # Remove either or both of static and inline (so that we get the order right)
            kernel.pred = [q for q in kernel.pred if q not in ['static', 'inline']]
            kernel.pred.insert(0, 'inline')
            kernel.pred.insert(0, 'static')

            return loop_opts
def ff_timeresolved(x, win=None, start=None, stop=None, step=None):
    """
    Evaluates the empirical Fano Factor (FF) of the spike counts of
    a list of spike trains.
    By default computes the FF over the full time span of the data.
    However, it can compute the FF time-resolved as well.

    Given the vector v containing the observed spike counts (one per
    spike train) in the time window [t0, t1], the FF in [t0, t1] is:

                        FF := var(v)/mean(v).

    The FF is usually computed for spike trains representing the activity
    of the same neuron over different trials. The higher the FF, the larger
    the cross-trial non-stationarity.
    For a time-stationary Poisson process, the theoretical FF is 1.

    Parameters
    ----------
    x : list of SpikeTrain
        a list of spike trains for which to compute the FF of spike counts.
    win : Quantity or None (optional)
        Length of each time window over which to compute the FF.
        If None, the FF is computed over the largest window possible;
        otherwise, the window slides along time (see parameter step).
        Default: None
    start : Quantity or None (optional)
        starting time for the computation of the FF. If None, the largest
        t_start among those of the spike trains in x is used.
        Default: None
    stop : Quantity or None (optional)
        ending time for the computation of the FF. If None, the smallest
        t_stop among those of the spike trains in x is used.
        Default: None
    step : Quantity or None (optional)
        time shift between two consecutive sliding windows. If None,
        successive windows are adjacent.
        Default: None

    Returns
    -------
    values: array
        array of FF values computed over consecutive time windows
    windows: array of shape (..., 2)
        array of time windows over which the  FF has been computed

    """

    # Compute max(t_start) and min(t_stop) and check consistency
    max_tstart = min([t.t_start for t in x])
    min_tstop = max([t.t_stop for t in x])

    if not (all([max_tstart == t.t_start for t in x]) and
                all([min_tstop == t.t_stop for t in x])):
        warnings.warning('spike trains have different t_start or t_stop'
                         ' values. FF computed for inner values only')

    # Set start, stop, window length and step for the default cases
    t_start = max_tstart if start is None else start
    t_stop = min_tstop if stop is None else stop
    wlen = t_stop - t_start if win is None else win
    wstep = wlen if step is None else step

    # Convert all time quantities in dimensionless (_dl) units (meant in s)
    start_dl = float(t_start.simplified.base)
    stop_dl = float(t_stop.simplified.base)
    wlen_dl = float(wlen.simplified.base)
    step_dl = float(wstep.simplified.base)

    # Define the centers of the sliding windows where the FF must be computed
    ff_times = numpy.arange(wlen_dl / 2. + start_dl,
                            stop_dl - wlen_dl / 2. + step_dl / 2, step_dl)

    # Define the windows within which the FF must be computed (as Nx2 array)
    windows = pq.s * numpy.array([numpy.max([ff_times - wlen_dl / 2.,
                                             start_dl * numpy.ones(
                                                 len(ff_times))], axis=0),
                                  numpy.min([ff_times
                                             + wlen_dl / 2.,
                                             stop_dl * numpy.ones(
                                                 len(ff_times))], axis=0)]).T
    windows = windows.rescale(x[0].units)

    # Compute the FF in each window define above
    ff_values = numpy.zeros(len(ff_times))
    for i, w in enumerate(windows):
        x_sliced = [t.time_slice(w[0], w[1]) for t in x]
        ff_values[i] = fanofactor(x_sliced)

    return ff_values, windows