Example #1
0
def assert_almost_equal_inf(x, y, decimal=6, msg=None):
    x = np.atleast_1d(x)
    y = np.atleast_1d(y)
    assert_equal(np.isposinf(x), np.isposinf(y))
    assert_equal(np.isneginf(x), np.isneginf(y))
    assert_equal(np.isnan(x), np.isnan(y))
    assert_almost_equal(x[np.isfinite(x)], y[np.isfinite(y)])
def compute_frequencydomaincoef(x, data_length_sec, sampling_frequency, nfreq_bands, win_length_sec, stride_sec):
    n_channels = x.shape[1]
    n_timesteps = (data_length_sec - win_length_sec) / stride_sec + 1
    n_fbins = nfreq_bands
    xfreq = np.zeros((n_timesteps, 136))
    x2 = np.zeros((n_channels, n_fbins, n_timesteps))
    for i in range(n_channels):
        xc = np.zeros((n_fbins, n_timesteps))
        for frame_num, w in enumerate(range(0, data_length_sec - win_length_sec + 1, stride_sec)):
            xw = x[w * sampling_frequency: (w + win_length_sec) * sampling_frequency,i]#window
            fft = np.log10(np.absolute(np.fft.rfft(xw)))
            fft_freq = np.fft.rfftfreq(n=xw.shape[-1], d=1.0 / sampling_frequency) ## return the FFT sample  frequency
            xc[:nfreq_bands, frame_num] = group_into_bands(fft, fft_freq, nfreq_bands)
        x2[i, :, :] = xc
    for j in range(n_timesteps):

        x2[:, :, j][np.isneginf(x2[:, :, j])] = 0
        scaled = preprocessing.scale(x2[:, :, j], axis=0)

        matrix = CorrelationMatrix().apply(scaled)
        matrix[np.isneginf(matrix)] = 0
        matrix[np.isnan(matrix)] = 0

        eigenvalue = Eigenvalues().apply(matrix)

        freqdomaincor = upper_right_triangle(matrix)
        xfreq[j, :] = np.concatenate((freqdomaincor, eigenvalue))
    xfreq[np.isneginf(xfreq)] = 0
    xfreq[np.isnan(xfreq)] = 0
    return xfreq
Example #3
0
def min_sum_diffs(filename, args):
    """Sum of the differences (in dB) between responses and a reference response.
        
    Args:
        filename (str): Name of output file
        args (dict): 'refresp' key with path & filename of reference response; 'outputs' key with a list of names (IDs) of outputs (rxs) from input file
        
    Returns:
        diffdB (float): Sum of the differences (in dB) between responses and a reference response
    """

    # Load (from gprMax output file) the reference response
    f = h5py.File(args['refresp'], 'r')
    tmp = f['/rxs/rx1/']
    fieldname = list(tmp.keys())[0]
    refresp = np.array(tmp[fieldname])

    # Load (from gprMax output file) the response
    f = h5py.File(filename, 'r')
    nrx = f.attrs['nrx']
    
    diffdB = 0
    outputs = 0
    for rx in range(1, nrx + 1):
        output = f['/rxs/rx' + str(rx) + '/']
        if output.attrs['Name'] in args['outputs']:
            outputname = list(output.keys())[0]
            modelresp = np.array(output[outputname])
            # Calculate sum of differences
            tmp = 20 * np.log10(np.abs(modelresp - refresp) / np.amax(np.abs(refresp)))
            tmp = np.abs(np.sum(tmp[-np.isneginf(tmp)])) / len(tmp[-np.isneginf(tmp)])
            diffdB += tmp
            outputs += 1

    return diffdB / outputs
Example #4
0
def non_matches(arr, val):
    '''
    Given a ndarray and an arbitrary 
    value, including np.nan, np.inf, etc.,
    return an ndarray that contains 
    only elements that are *not* equal 
    to val.  
    
    :param arr: n-dimensional numpy array
    :type arr: numpy.ndarray
    :param val: value, including special values numpy.nan, numpy.inf, numpy.neginf, etc.
    :type val: ANY.
    '''
    
    # Special value?
    if np.isfinite(val):
        # No, just normal value:
        return arr[arr != val]
    # Is special value, such as numpy.nan.
    # Create ndarray with True/False entries
    # that reflect which entries are not equal
    # to val:
    elif np.isnan(val):
        cond = np.logical_not(np.isnan(arr))
    elif np.isinf(val):
        cond = np.logical_not(np.isinf(arr))
    elif np.isneginf(val):
        cond = np.logical_not(np.isneginf(arr))
    elif np.isposinf(val):
        cond = np.logical_not(np.isposinf(arr))
        
    # Use the True/False ndarray as a mask
    # over arr:
    return arr[cond]
Example #5
0
def imagesDiffer(imageArr1, imageArr2, skipMaskArr=None, rtol=1.0e-05, atol=1e-08):
    """Compare the pixels of two image arrays; return True if close, False otherwise
    
    Inputs:
    - image1: first image to compare
    - image2: second image to compare
    - skipMaskArr: pixels to ignore; nonzero values are skipped
    - rtol: relative tolerance (see below)
    - atol: absolute tolerance (see below)
    
    rtol and atol are positive, typically very small numbers.
    The relative difference (rtol * abs(b)) and the absolute difference "atol" are added together
    to compare against the absolute difference between "a" and "b".
    
    Return a string describing the error if the images differ significantly, an empty string otherwise
    """
    retStrs = []
    if skipMaskArr != None:
        maskedArr1 = numpy.ma.array(imageArr1, copy=False, mask = skipMaskArr)
        maskedArr2 = numpy.ma.array(imageArr2, copy=False, mask = skipMaskArr)
        filledArr1 = maskedArr1.filled(0.0)
        filledArr2 = maskedArr2.filled(0.0)
    else:
        filledArr1 = imageArr1
        filledArr2 = imageArr2

    nan1 = numpy.isnan(filledArr1)
    nan2 = numpy.isnan(filledArr2)
    if numpy.any(nan1 != nan2):
        retStrs.append("NaNs differ")

    posinf1 = numpy.isposinf(filledArr1)
    posinf2 = numpy.isposinf(filledArr2)
    if numpy.any(posinf1 != posinf2):
        retStrs.append("+infs differ")

    neginf1 = numpy.isneginf(filledArr1)
    neginf2 = numpy.isneginf(filledArr2)
    if numpy.any(neginf1 != neginf2):
        retStrs.append("-infs differ")

    # compare values that should be comparable (are neither infinite, nan nor masked)
    valSkipMaskArr = nan1 | nan2 | posinf1 | posinf2 | neginf1 | neginf2
    if skipMaskArr != None:
        valSkipMaskArr |= skipMaskArr
    valMaskedArr1 = numpy.ma.array(imageArr1, copy=False, mask = valSkipMaskArr)
    valMaskedArr2 = numpy.ma.array(imageArr2, copy=False, mask = valSkipMaskArr)
    valFilledArr1 = valMaskedArr1.filled(0.0)
    valFilledArr2 = valMaskedArr2.filled(0.0)
    
    if not numpy.allclose(valFilledArr1, valFilledArr2, rtol=rtol, atol=atol):
        errArr = numpy.abs(valFilledArr1 - valFilledArr2)
        maxErr = errArr.max()
        maxPosInd = numpy.where(errArr==maxErr)
        maxPosTuple = (maxPosInd[1][0], maxPosInd[0][0])
        errStr = "maxDiff=%s at position %s; value=%s vs. %s" % \
            (maxErr, maxPosTuple, valFilledArr1[maxPosInd][0], valFilledArr2[maxPosInd][0])
        retStrs.insert(0, errStr)
    return "; ".join(retStrs)
def figS4(data_dir=mydir, figname = 'FigS4', saveAs = 'eps'):
    models = ['lognorm', 'mete', 'zipf']
    fig = plt.figure()
    count = 0
    gs = gridspec.GridSpec(4, 4)
    #gs.update(wspace=0.1, hspace=0.1)
    for i in range(0, 4, 2):
        for j in range(0, 4, 2):
            if count < 2:
                ax = plt.subplot(gs[i:i+2, j:j+2], adjustable='box-forced')
                count += 1
            else:
                ax = plt.subplot(gs[i:i+2, 1:3], adjustable='box-forced')
            if i == 0 and j == 0:
                NSR2 = importData.import_NSR2_data(data_dir + \
                'data/NSR2/Stratified/lognorm_pln_NSR2_stratify.txt')
                ax.set_title("Lognormal", fontsize = 18)
                ll = np.asarray(list(((NSR2["ll"]))))
                ll = ll[np.isneginf(ll) == False]
                print 'Lognorm: mean = ' + str(np.mean(ll)) + ' std = ' + str(np.std(ll))
            elif i == 0 and j == 2:
                NSR2 = importData.import_NSR2_data(data_dir + \
                'data/NSR2/Stratified/zipf_mle_NSR2_stratify.txt')
                ax.set_title("Zipf", fontsize = 18)
                ll = np.asarray(list(((NSR2["ll"]))))
                ll = ll[np.isneginf(ll) == False]
                print 'Zipf: mean = ' + str(np.mean(ll)) + ' std = ' + str(np.std(ll))
            elif i == 2 and j == 0:
                NSR2 = importData.import_NSR2_data(data_dir + \
                'data/NSR2/Stratified/mete_NSR2_stratify.txt')
                ax.set_title("Log-series", fontsize = 18)
                ll = np.asarray(list(((NSR2["ll"]))))
                ll = ll[np.isneginf(ll) == False]
                print 'Log-series: mean = ' + str(np.mean(ll)) + ' std = ' + str(np.std(ll))
            else:
                continue

            ax.set( adjustable='box-forced')
            KDE = mo.CV_KDE(ll)
            #ax.hist(ll, 30, fc='gray', histtype='stepfilled', alpha=0.5, normed=True)
            ax.plot(KDE[0], KDE[1], linewidth=3, alpha=0.8 , color = 'blue')
            ax.yaxis.set_major_formatter(mticker.FormatStrFormatter('%.0E'))
            ax.xaxis.set_major_formatter(mticker.FormatStrFormatter('%.0E'))

            ax.set_xlim([min(KDE[0]), 0])
            plt.xticks(fontsize = 7)
            plt.yticks(fontsize = 7)
            ax.set_xlabel('Log-likelihood', fontsize = 16)
            ax.set_ylabel('Probability density', fontsize = 14)
            plt.setp(ax.get_xticklabels()[::2], visible=False)
            plt.setp(ax.get_yticklabels()[::2], visible=False)

    fig_name = str(mydir + 'figures/' + figname + '_RGB.' + saveAs)
    fig.subplots_adjust(left=0.1, bottom = 0.1,hspace=0.1)
    fig.tight_layout()#pad=1.2, w_pad=0.8, h_pad=0.8
    #fig.text(0.50, 0.017, 'Log-likelihood', ha='center', va='center', fontsize=15)
    #fig.text(0.04, 0.5, 'Probability', ha='center', va='center', rotation='vertical', fontsize=20)
    plt.savefig(fig_name, dpi=600, format = saveAs)
    plt.close()
Example #7
0
 def test_infinity_neg(self):
     x = -numpy.inf
     y = self.sendAndReceive(x)
     self.assertEqual(y, x)
     self.assert_(numpy.isneginf(x))
     self.assert_(numpy.isneginf(y))
     self.assertEqual(numpy.array(x).shape, y.shape)
     self.assertEqual(numpy.array(x).dtype, y.dtype)
Example #8
0
def _transform_data(pdata, levels, data_transform):
    """
    Return [pdata,plotlev,plotlab,extend,trans_base_list];
    if data_transform == False, trans_base_list = None.

    Notes:
    ------
    pdata: data used for contourf plotting.
    plotlev: the levels used in contourf plotting.
    extend: the value for parameter extand in contourf.
    trans_base_list: cf. mathex.plot_array_transg
    """
    if levels == None:
        ftuple = (pdata, None, None, "neither")
        if data_transform == True:
            raise Warning("Strange levels is None but data_transform is True")
    else:
        if data_transform == True:
            # make the data transform before plotting.
            pdata_trans, plotlev, plotlab, trans_base_list = mathex.plot_array_transg(pdata, levels, copy=True)
            if np.isneginf(plotlab[0]) and np.isposinf(plotlab[-1]):
                ftuple = (pdata_trans, plotlev[1:-1], plotlab, "both")
            elif np.isneginf(plotlab[0]) or np.isposinf(plotlab[-1]):
                raise ValueError(
                    """only one extreme set as infinitive, please
                    set both as infinitive if arrow colorbar is wanted."""
                )
            else:
                ftuple = (pdata_trans, plotlev, plotlab, "neither")
        # data_transform==False
        else:
            plotlev = pb.iteflat(levels)
            plotlab = pb.iteflat(levels)
            if np.isneginf(plotlab[0]) and np.isposinf(plotlab[-1]):
                # here the levels would be like [np.NINF,1,2,3,np.PINF]
                # in following contourf, all values <1 and all values>3 will be
                # automatically plotted in the color of two arrows.
                # easy to see in this example:
                # a=np.tile(np.arange(10),10).reshape(10,10);
                # fig,ax=g.Create_1Axes();
                # cs=ax.contourf(a,levels=np.arange(2,7),extend='both');
                # plt.colorbar(cs)
                ftuple = (pdata, plotlev[1:-1], plotlab, "both")
            elif np.isneginf(plotlab[0]) or np.isposinf(plotlab[-1]):
                raise ValueError(
                    """only one extreme set as infinitive, please
                    set both as infinitive if arrow colorbar is wanted."""
                )
            else:
                ftuple = (pdata, plotlev, plotlab, "neither")
    datalist = list(ftuple)

    if data_transform == True:
        datalist.append(trans_base_list)
    else:
        datalist.append(None)
    return datalist
Example #9
0
def _generate_colorbar_ticks_label(
    data_transform=False, colorbarlabel=None, trans_base_list=None, forcelabel=None, plotlev=None, plotlab=None
):
    """
    Return (colorbar_ticks,colorbar_labels)
    """
    # data_transform==True and levels!=None
    if data_transform == True:
        if colorbarlabel != None:
            colorbarlabel = pb.iteflat(colorbarlabel)
            transformed_colorbarlabel_ticks, x, y, trans_base_list = mathex.plot_array_transg(
                colorbarlabel, trans_base_list, copy=True
            )

        # Note if/else blocks are organized in 1st tire by check if the two
        # ends are -inf/inf and 2nd tire by check if colorbarlabel is None
        if np.isneginf(plotlab[0]) and np.isposinf(plotlab[-1]):
            if colorbarlabel != None:
                ftuple = (transformed_colorbarlabel_ticks, colorbarlabel)
            else:
                ftuple = (plotlev, plotlab[1:-1])
        elif np.isneginf(plotlab[0]) or np.isposinf(plotlab[-1]):
            raise ValueError("It's strange to set only side as infitive")
        else:
            if colorbarlabel != None:
                ftuple = (transformed_colorbarlabel_ticks, colorbarlabel)
            else:
                ftuple = (plotlev, plotlab)

    # data_transform==False
    else:
        if np.isneginf(plotlab[0]) and np.isposinf(plotlab[-1]):
            # if colorbarlabel is forced, then ticks and ticklabels will be forced.
            if colorbarlabel != None:
                ftuple = (colorbarlabel, colorbarlabel)
            # This by default will be done, it's maintained here only for clarity.
            else:
                ftuple = (plotlab[1:-1], plotlab[1:-1])
        elif np.isneginf(plotlab[0]) or np.isposinf(plotlab[-1]):
            raise ValueError("It's strange to set only side as infitive")
        else:
            if colorbarlabel != None:
                ftuple = (colorbarlabel, colorbarlabel)
            else:
                ftuple = (plotlab, plotlab)

    ftuple = list(ftuple)
    if forcelabel != None:
        if len(forcelabel) != len(ftuple[1]):
            raise ValueError(
                """the length of the forcelabel and the
                length of labeled ticks is not equal!"""
            )
        else:
            ftuple[1] = forcelabel

    return ftuple
Example #10
0
def _diagnose(self):

    # Update log.
    self.logger.debug("diagnose: data: shape: " + str(self.data.shape))
    self.logger.debug("diagnose: data: dtype: " + str(self.data.dtype))
    self.logger.debug("diagnose: data: size: %.2fMB", self.data.nbytes * 9.53674e-7)
    self.logger.debug("diagnose: data: nans: " + str(np.sum(np.isnan(self.data))))
    self.logger.debug("diagnose: data: -inf: " + str(np.sum(np.isneginf(self.data))))
    self.logger.debug("diagnose: data: +inf: " + str(np.sum(np.isposinf(self.data))))
    self.logger.debug("diagnose: data: positives: " + str(np.sum(self.data > 0)))
    self.logger.debug("diagnose: data: negatives: " + str(np.sum(self.data < 0)))
    self.logger.debug("diagnose: data: mean: " + str(np.mean(self.data)))
    self.logger.debug("diagnose: data: min: " + str(np.min(self.data)))
    self.logger.debug("diagnose: data: max: " + str(np.max(self.data)))

    self.logger.debug("diagnose: data_white: shape: " + str(self.data_white.shape))
    self.logger.debug("diagnose: data_white: dtype: " + str(self.data_white.dtype))
    self.logger.debug("diagnose: data_white: size: %.2fMB", self.data_white.nbytes * 9.53674e-7)
    self.logger.debug("diagnose: data_white: nans: " + str(np.sum(np.isnan(self.data_white))))
    self.logger.debug("diagnose: data_white: -inf: " + str(np.sum(np.isneginf(self.data_white))))
    self.logger.debug("diagnose: data_white: +inf: " + str(np.sum(np.isposinf(self.data_white))))
    self.logger.debug("diagnose: data_white: positives: " + str(np.sum(self.data_white > 0)))
    self.logger.debug("diagnose: data_white: negatives: " + str(np.sum(self.data_white < 0)))
    self.logger.debug("diagnose: data_white: mean: " + str(np.mean(self.data_white)))
    self.logger.debug("diagnose: data_white: min: " + str(np.min(self.data_white)))
    self.logger.debug("diagnose: data_white: max: " + str(np.max(self.data_white)))

    self.logger.debug("diagnose: data_dark: shape: " + str(self.data_dark.shape))
    self.logger.debug("diagnose: data_dark: dtype: " + str(self.data_dark.dtype))
    self.logger.debug("diagnose: data_dark: size: %.2fMB", self.data_dark.nbytes * 9.53674e-7)
    self.logger.debug("diagnose: data_dark: nans: " + str(np.sum(np.isnan(self.data_dark))))
    self.logger.debug("diagnose: data_dark: -inf: " + str(np.sum(np.isneginf(self.data_dark))))
    self.logger.debug("diagnose: data_dark: +inf: " + str(np.sum(np.isposinf(self.data_dark))))
    self.logger.debug("diagnose: data_dark: positives: " + str(np.sum(self.data_dark > 0)))
    self.logger.debug("diagnose: data_dark: negatives: " + str(np.sum(self.data_dark < 0)))
    self.logger.debug("diagnose: data_dark: mean: " + str(np.mean(self.data_dark)))
    self.logger.debug("diagnose: data_dark: min: " + str(np.min(self.data_dark)))
    self.logger.debug("diagnose: data_dark: max: " + str(np.max(self.data_dark)))

    self.logger.debug("diagnose: theta: shape: " + str(self.theta.shape))
    self.logger.debug("diagnose: theta: dtype: " + str(self.theta.dtype))
    self.logger.debug("diagnose: theta: size: %.2fMB", self.theta.nbytes * 9.53674e-7)
    self.logger.debug("diagnose: theta: nans: " + str(np.sum(np.isnan(self.theta))))
    self.logger.debug("diagnose: theta: -inf: " + str(np.sum(np.isneginf(self.theta))))
    self.logger.debug("diagnose: theta: +inf: " + str(np.sum(np.isposinf(self.theta))))
    self.logger.debug("diagnose: theta: positives: " + str(np.sum(self.theta > 0)))
    self.logger.debug("diagnose: theta: negatives: " + str(np.sum(self.theta < 0)))
    self.logger.debug("diagnose: theta: mean: " + str(np.mean(self.theta)))
    self.logger.debug("diagnose: theta: min: " + str(np.min(self.theta)))
    self.logger.debug("diagnose: theta: max: " + str(np.max(self.theta)))

    self.logger.info("diagnose [ok]")
Example #11
0
    def test_neginf(self):
        arr =np.empty(100)
        arr[:] = -np.inf
        for np_func, acml_func in self.vector_funcs:
            np_out = np_func(arr)
            acml_out = acml_func(arr)

            equal_nan = np.isnan(np_out) == np.isnan(acml_out)
            equal_posinf = np.isposinf(np_out) == np.isposinf(acml_out)
            equal_neginf = np.isneginf(np_out) == np.isneginf(acml_out)
            self.assertTrue( np.alltrue(equal_nan), msg="NaN-test failed for %s" % acml_func)
            self.assertTrue( np.alltrue(equal_posinf), msg="posinf-test failed for %s" % acml_func)
            self.assertTrue( np.alltrue(equal_neginf), msg="neginf-test failed for %s" % acml_func)
Example #12
0
def encode_fill_value(v, dtype):
    # early out
    if v is None:
        return v
    if dtype.kind == 'f':
        if np.isnan(v):
            return 'NaN'
        elif np.isposinf(v):
            return 'Infinity'
        elif np.isneginf(v):
            return '-Infinity'
        else:
            return float(v)
    elif dtype.kind in 'ui':
        return int(v)
    elif dtype.kind == 'b':
        return bool(v)
    elif dtype.kind in 'SV':
        v = base64.standard_b64encode(v)
        if not PY2:  # pragma: py2 no cover
            v = str(v, 'ascii')
        return v
    elif dtype.kind == 'U':
        return v
    elif dtype.kind in 'mM':
        return int(v.view('u8'))
    else:
        return v
Example #13
0
def set_logp_to_neg_inf(X, logp, bounds):
    """Set `logp` to negative infinity when `X` is outside the allowed bounds.

    # Arguments
        X: tensorflow.Tensor
            The variable to apply the bounds to
        logp: tensorflow.Tensor
            The log probability corrosponding to `X`
        bounds: list of `Region` objects
            The regions corrosponding to allowed regions of `X`

    # Returns
        logp: tensorflow.Tensor
            The newly bounded log probability
    """
    conditions = []
    for l, u in bounds:
        lower_is_neg_inf = not isinstance(l, tf.Tensor) and np.isneginf(l)
        upper_is_pos_inf = not isinstance(u, tf.Tensor) and np.isposinf(u)

        if not lower_is_neg_inf and upper_is_pos_inf:
            conditions.append(tf.greater(X, l))
        elif lower_is_neg_inf and not upper_is_pos_inf:
            conditions.append(tf.less(X, u))
        elif not (lower_is_neg_inf or upper_is_pos_inf):
            conditions.append(tf.logical_and(tf.greater(X, l), tf.less(X, u)))

    if len(conditions) > 0:
        is_inside_bounds = conditions[0]
        for condition in conditions[1:]:
            is_inside_bounds = tf.logical_or(is_inside_bounds, condition)

        logp = tf.select(is_inside_bounds, logp, tf.fill(tf.shape(X), config.dtype(-np.inf)))

    return logp
Example #14
0
    def calculate(self, g, level_number_density, lines_lower_level_index,
        lines_upper_level_index, metastability, lines):
        n_lower = level_number_density.values.take(lines_lower_level_index,
            axis=0, mode='raise')
        n_upper = level_number_density.values.take(lines_upper_level_index,
            axis=0, mode='raise')
        g_lower = self.get_g_lower(g, lines_lower_level_index)
        g_upper = self.get_g_upper(g, lines_upper_level_index)
        meta_stable_upper = self.get_metastable_upper(metastability,
                                                      lines_upper_level_index)

        stimulated_emission_factor = ne.evaluate('1 - ((g_lower * n_upper) / '
                                                 '(g_upper * n_lower))')
        stimulated_emission_factor[n_lower == 0.0] = 0.0
        stimulated_emission_factor[np.isneginf(stimulated_emission_factor)]\
            = 0.0
        stimulated_emission_factor[meta_stable_upper &
                                   (stimulated_emission_factor < 0)] = 0.0
        if self.nlte_species:
            nlte_lines_mask = \
                np.zeros(stimulated_emission_factor.shape[0]).astype(bool)
            for species in self.nlte_species:
                nlte_lines_mask |= (lines.atomic_number == species[0]) & \
                                   (lines.ion_number == species[1])
            stimulated_emission_factor[(stimulated_emission_factor < 0) &
                nlte_lines_mask[np.newaxis].T] = 0.0
        return stimulated_emission_factor
Example #15
0
    def _update_parameters(self):
        """
        Update parameters of the acquisition required to evaluate the function. In particular:
            * Sample representer points repr_points
            * Compute their log values repr_points_log
            * Compute belief locations logP
        """
        self.repr_points, self.repr_points_log = self.sampler.get_samples(self.num_repr_points, self.proposal_function, self.burn_in_steps)

        if np.any(np.isnan(self.repr_points_log)) or np.any(np.isposinf(self.repr_points_log)):
            raise RuntimeError("Sampler generated representer points with invalid log values: {}".format(self.repr_points_log))

        # Removing representer points that have 0 probability of being the minimum (corresponding to log probability being minus infinity)
        idx_to_remove = np.where(np.isneginf(self.repr_points_log))[0]
        if len(idx_to_remove) > 0:
            idx = list(set(range(self.num_repr_points)) - set(idx_to_remove))
            self.repr_points = self.repr_points[idx, :]
            self.repr_points_log = self.repr_points_log[idx]

        # We predict with the noise as we need to make sure that var is indeed positive definite.
        mu, _ = self.model.predict(self.repr_points)
        # we need a vector
        mu = np.ndarray.flatten(mu)
        var = self.model.predict_covariance(self.repr_points)
        
        self.logP, self.dlogPdMu, self.dlogPdSigma, self.dlogPdMudMu = epmgp.joint_min(mu, var, with_derivatives=True)
        # add a second dimension to the array
        self.logP = np.reshape(self.logP, (self.logP.shape[0], 1))
    def convert_to_log(self, img):
        log_img = np.ones(img.shape, np.float32)
        with np.errstate(divide='ignore'):
            log_img = np.log(img, log_img)
            log_img[np.isneginf(log_img)] = 0

        return np.nan_to_num(log_img)
Example #17
0
	def prior_probabilities ( self, theta ):
		""" The method that calculates the prior (log) probabilities. This is based on the prior distributions given in prior_distributions, and assumes independence, so we just add them up. 
		"""
		p = numpy.array([ numpy.log ( getattr ( self, self.parameters[i]).pdf ( theta[i])) for i in xrange(len(self.parameters)) ]).sum()
		if numpy.isneginf(p):
			p = numpy.log(1.0E-300)
		return p
    def Draw(self, args=None):
        """Draw the various functions"""

        if not args or "SAME" not in args:
            # make a 'blank' function to occupy the complete range of x values:
            lower_lim = min([lim[0] for lim in self.functions_dict.keys()])
            if np.isneginf(lower_lim):
                lower_lim = -999
            upper_lim = max([lim[1] for lim in self.functions_dict.keys()])
            if np.isposinf(upper_lim):
                upper_lim = 999
            blank = ROOT.TF1("blank" + str(np.random.randint(0, 10000)), "1.5", lower_lim, upper_lim)
            blank.Draw()
            max_value = max([func.GetMaximum(lim[0], lim[1])
                             for lim, func in self.functions_dict.iteritems()]) * 1.1
            blank.SetMaximum(max_value)
            min_value = min([func.GetMinimum(lim[0], lim[1])
                             for lim, func in self.functions_dict.iteritems()]) * 0.9
            blank.SetMinimum(min_value)
            ROOT.SetOwnership(blank, False)  # NEED THIS SO IT ACTUALLY GETS DRAWN. SERIOUSLY, WTF?!
            blank.SetLineColor(ROOT.kWhite)

        # now draw the rest of the functions
        args = "" if not args else args
        for func in self.functions_dict.values():
            func.Draw("SAME" + args)
Example #19
0
def gradient_desc_ridge(X, Y, W, alpha, lambd, num_iter=1000, conv_tol=0.01, check_interval=500):
    c = float("inf")
    log("Learn Rate", alpha)
    for i in range(num_iter):
        #
        # delta =  2/N SIGMA[(XW - Y)*x] + 2 * \lambd * W
        diff = predict(X, W) - Y
        delta = np.sum(np.multiply(X, diff), axis=0)  # sum top to bottom for each attribute
        delta = delta * 2.0 / len(Y)
        delta = np.array([delta]).transpose()  # restore vector shape of (n_attr x 1)
        delta = delta + (2 * lambd * W)  # Vectors addition

        W = W - alpha * delta

        if i % check_interval == 0:
            predY = predict(X, W)
            newcost = MSECost(predY, Y)

            log("#%d, cost = %.8g" % (i, newcost))
            if np.isnan(newcost) or np.isinf(newcost) or np.isneginf(newcost):
                raise Exception("ERROR: number overflow, please adjust learning rate")
            diff = abs(newcost - c)
            c = newcost
            if diff < conv_tol:
                log("Converged with tolerance %f " % conv_tol)
                break
        if not quiet and i % (check_interval * 10) == 0:
            print(W.flatten())
    return W
Example #20
0
    def calculate(self, g, level_number_density, lines_lower_level_index,
        lines_upper_level_index, metastability, lines):
        n_lower = level_number_density.values.take(lines_lower_level_index,
            axis=0, mode='raise')
        n_upper = level_number_density.values.take(lines_upper_level_index,
            axis=0, mode='raise')
        g_lower = self.get_g_lower(g, lines_lower_level_index)
        g_upper = self.get_g_upper(g, lines_upper_level_index)
        meta_stable_upper = self.get_metastable_upper(metastability,
                                                      lines_upper_level_index)

        stimulated_emission_factor = ne.evaluate('1 - ((g_lower * n_upper) / '
                                                 '(g_upper * n_lower))')
        stimulated_emission_factor[n_lower == 0.0] = 0.0
        stimulated_emission_factor[np.isneginf(stimulated_emission_factor)]\
            = 0.0
        stimulated_emission_factor[meta_stable_upper &
                                   (stimulated_emission_factor < 0)] = 0.0
        if self.nlte_species:
            nlte_lines_mask = lines.reset_index().apply(
                    lambda row:
                    (row.atomic_number, row.ion_number) in self.nlte_species,
                    axis=1
            ).values
            stimulated_emission_factor[(stimulated_emission_factor < 0) &
                nlte_lines_mask[np.newaxis].T] = 0.0
        return stimulated_emission_factor
def takeRatio(num,den):
    toReturn = num.copy()
    toReturn['data'] = np.log(num['data']/den['data'])
    whereBad = np.isnan(toReturn['data']) | np.isinf(toReturn['data']) | np.isneginf(toReturn['data'])
    toReturn['data'][whereBad] = 0.0
    
    return toReturn
Example #22
0
def traverse_data(datum, is_numpy=is_numpy, use_numpy=True):
    """recursively dig until a flat list is found
    if numpy is available convert the flat list to a numpy array
    and send off to transform_array() to handle nan, inf, -inf
    otherwise iterate through items in array converting non-json items

    Args:
        datum (list) : a list of values or lists
        is_numpy: True if numpy is present (see imports)
        use_numpy: toggle numpy as a dependency for testing purposes
    """
    is_numpy = is_numpy and use_numpy
    if is_numpy and not any(isinstance(el, (list, tuple)) for el in datum):
        return transform_array(np.asarray(datum))
    datum_copy = []
    for item in datum:
        if isinstance(item, (list, tuple)):
            datum_copy.append(traverse_data(item))
        elif isinstance(item, float):
            if np.isnan(item):
                item = 'NaN'
            elif np.isposinf(item):
                item = 'Infinity'
            elif np.isneginf(item):
                item = '-Infinity'
            datum_copy.append(item)
        else:
            datum_copy.append(item)
    return datum_copy
Example #23
0
 def likelihood_function ( self, theta ):
     """For example! This function ought to be overridden by the user, and maybe extended with whatever extra parameters you need to get hold of your observations, or model driver parameters.
     This function method calculates the likelihood function for a vector M{\theta}. Usually, you have a model you run with these parameters as inputs (+ some driver data), and some observations that go with the output of the forward model output. These two sets of values are combined in some sort of cost function/likelihood function. A common criterion is to assume that the model is able to perfectly replicate the observations (given a proper parametrisation). The only mismatch between model output and observations is then given by the uncertainty with which the measurement is performed, and we can encode this as a zero-mean Normal distribution. The variance of this distribution is then related to the observational error. If different measurements are used, a multivariate normal is useful, and correlation between observations can also be included, if needs be.
     """
     means = numpy.matrix([-3.0, 2.8])
     means = numpy.matrix([-5.0, 5])
     sigma1 = 1.0
     sigma2 = 2#0.5
     rho = -0.5#-0.1
     covar = numpy.matrix([[sigma1*sigma1,rho*sigma1*sigma2],[rho*sigma1*sigma2,sigma2*sigma2]])
     inv_covar = numpy.linalg.inv ( covar ) # numpy.matrix([[  5.26315789,   9.47368421],\
                     #[  9.47368421,  21.05263158]])
     det_covar = numpy.linalg.det( covar ) #0.047499999999999987
     N = means.shape[0]
     X = numpy.matrix(means- theta)
     #X = theta
     #p = full_gauss_den(X, means, covar, True)
     #This is just lazy... Using libraries to invert a 2x2 matrix & calc. its determinant....
     #Also, the log calculations could be done more efficiently and stored, but...
     p = pow(1.0/(2*numpy.pi), N/2.)
     p = p / numpy.sqrt ( numpy.linalg.det (covar))
     #p = 0.73025296137109341 # Precalc'ed
     #p = p *    numpy.exp (-0.5*X*inv_covar*X.transpose())
     a = X*inv_covar*X.T
     p = p*numpy.exp(-0.5*a)
     #pdb.set_trace()
     p = numpy.log(p)
     if numpy.isneginf(p):
         p = numpy.log(1.0E-300)
     return p
Example #24
0
def map_to_range(v, oi, oa, ni, na):
    if numpy.isinf(v):
        return na
    elif numpy.isneginf(v):
        return ni
    else:
        return(((v - oi) * (na - ni)) / (oa - oi)) + ni
    def optimize_A(self, A):
        """Find optimal transformation matrix A by minimization.

        Parameters
        ----------
        A : ndarray
        The transformation matrix A.

        Returns
        -------
        A : ndarray
            The transformation matrix.
        """
        flat_map, square_map = get_maps(A)
        alpha = to_flat(1.0 * A, flat_map)

        obj = lambda x: -1 * self.objective_function(x, self.T, self.right_eigenvectors, square_map, self.populations)
        self.obj = obj
        self.alpha = alpha.copy()

        logger.info("Initial value of objective function: f = %f", obj(alpha))

        alpha = scipy.optimize.anneal(obj, alpha, lower=0.0, maxiter=1, schedule="boltzmann", dwell=1000, feps=1E-3, boltzmann=2.0, T0=1.0)[0]

        alpha = scipy.optimize.fmin(obj, alpha, full_output=True, xtol=1E-4, ftol=1E-4, maxfun=5000, maxiter=100000)[0]

        logger.info("Final value: f = %f" % (obj(alpha)))

        if np.isneginf(obj(alpha)):
            raise(ValueError("Error: minimization has not located a feasible point."))

        A = to_square(alpha, square_map)

        return A
Example #26
0
 def filt_butter(data, samp_freq, butter_freq, axis=-1):
     '''
     Filter data with a 2nd order butterworth filter.
     
     Parameters
     ==========
       data: ndarray
       samp_freq: sampling period (s)
       butter_freq: [cutoff_low, cutoff_high] (Hz), can be infinite
       axis (optional): axis along which to filter, default = -1
     Returns
     =======
       filtNs: filtered version of data
     '''
     order = 2
     ny = 0.5 / samp_freq # Nyquist frequency
     cof = butter_freq / ny # normalized cutoff freq
     if np.isneginf(cof[0]) and np.isfinite(cof[1]):
         # lowpass
         cof1 = cof[1]
         b, a = scipy.signal.butter(order, cof1, btype='low')
         filtNs = scipy.signal.filtfilt(b, a, data, axis=axis)
     elif np.isfinite(cof[0]) and np.isinf(cof[1]):
         # highpass
         cof1 = cof[0]
         b, a = scipy.signal.butter(order, cof1, btype='high')
         filtNs = scipy.signal.filtfilt(b, a, data, axis=axis)
     elif np.isfinite(cof[0]) and np.isfinite(cof[1]):
         # bandpass
         b, a = scipy.signal.butter(order, cof, btype='band')
         filtNs = scipy.signal.filtfilt(b, a, data, axis=axis)
     else:
         raise Exception('filt_butter called with bad cutoff frequency')
     filtNs /= samp_freq # normalize to rate
     return filtNs
Example #27
0
def merciless_print(i, node, fn):
    """Debugging theano. Prints inputs and outputs at every point.
    In case NaN, Inf or -Inf is detected, fires up the pdb debugger."""
    print ''
    print '-------------------------------------------------------'
    print 'Node %s' % str(i)
    theano.printing.debugprint(node)
    print 'Inputs : %s' % [input for input in fn.inputs]
    print 'Outputs: %s' % [output for output in fn.outputs]
    print 'Node:'
    for output in fn.outputs:
        try:
            if numpy.isnan(output[0]).any():
                print '*** NaN detected ***'
                theano.printing.debugprint(node)
                print 'Inputs : %s' % [input[0] for input in fn.inputs]
                print 'Outputs: %s' % [output[0] for output in fn.outputs]
                pdb.set_trace()
                raise ValueError('Found NaN in computation!')
            if numpy.isposinf(output[0]).any() or numpy.isneginf(output[0]).any():
                print '*** Inf detected ***'
                theano.printing.debugprint(node)
                print 'Inputs : %s' % [input[0] for input in fn.inputs]
                print 'Outputs: %s' % [output[0] for output in fn.outputs]
                pdb.set_trace()
                raise ValueError('Found Inf in computation!')
        except TypeError:
            logging.debug('Couldn\'t check node for NaN/Inf: {0}'.format(node))
Example #28
0
def traverse_data(obj, is_numpy=is_numpy, use_numpy=True):
    """ Recursively traverse an object until a flat list is found.

    If NumPy is available, the flat list is converted to a numpy array
    and passed to transform_array() to handle ``nan``, ``inf``, and
    ``-inf``.

    Otherwise, iterate through all items, converting non-JSON items

    Args:
        obj (list) : a list of values or lists
        is_numpy (bool, optional): Whether NumPy is availanble
            (default: True if NumPy is importable)
        use_numpy (bool, optional) toggle NumPy as a dependency for testing
            This argument is only useful for testing (default: True)
    """
    is_numpy = is_numpy and use_numpy
    if is_numpy and all(isinstance(el, np.ndarray) for el in obj):
        return [transform_array(el) for el in obj]
    obj_copy = []
    for item in obj:
        if isinstance(item, (list, tuple)):
            obj_copy.append(traverse_data(item))
        elif isinstance(item, float):
            if np.isnan(item):
                item = 'NaN'
            elif np.isposinf(item):
                item = 'Infinity'
            elif np.isneginf(item):
                item = '-Infinity'
            obj_copy.append(item)
        else:
            obj_copy.append(item)
    return obj_copy
Example #29
0
def msr2k(rvnames, rvs, trunclb, truncub, G):
    # robustnes
    klb = trunclb[0]; kub=truncub[0];
    # reliability
    corr = np.eye(len(rvnames))
    probdata = ProbData(names=rvnames, rvs=rvs, corr=corr, nataf=False)
    analysisopt = AnalysisOpt(gradflag='DDM', recordu=False, recordx=False,
            flagsens=False, verbose=False)
    # limit state 1
    def gf1(x, param=None):
        m, C, Sre, Na = x
        K = C*(Sre**m)*(G**m)*(np.pi**(m/2.))*Na
        return K-kub
    def dgdq1(x, param=None):
        m, C, Sre, Na = x
        Srem = Sre**m; Gm = G**m; pim2 = np.pi**(m/2.)
        dgdm = C*np.log(Sre)*Srem*Gm*pim2*Na+C*Srem*np.log(G)*Gm*pim2*Na+\
               C*Srem*Gm*np.log(np.pi)*pim2*0.5*Na
        dgdC = Srem*Gm*pim2*Na
        dgdSre = C*m*(Sre**(m-1.))*Gm*pim2*Na
        dgdNa = C*Srem*Gm*pim2
        return [dgdm, dgdC, dgdSre, dgdNa]
    gfunc1 = Gfunc(gf1, dgdq1)
    formBeta1 = CompReliab(probdata, gfunc1, analysisopt)

    # limit state 2
    def gf2(x, param=None):
        m, C, Sre, Na = x
        K = C*(Sre**m)*(G**m)*(np.pi**(m/2))*Na
        return klb-K
    def dgdq2(x, param=None):
        m, C, Sre, Na = x
        Srem = Sre**m; Gm = G**m; pim2 = np.pi**(m/2)
        dgdm = C*np.log(Sre)*Srem*Gm*pim2*Na+C*Srem*np.log(G)*Gm*pim2*Na+\
               C*Srem*Gm*np.log(np.pi)*pim2*0.5*Na
        dgdC = Srem*Gm*pim2*Na
        dgdSre = C*m*(Sre**(m-1.))*Gm*pim2*Na
        dgdNa = C*Srem*Gm*pim2
        return [-dgdm, -dgdC, -dgdSre, -dgdNa]
    gfunc2 = Gfunc(gf2, dgdq2)
    formBeta2 = CompReliab(probdata, gfunc2, analysisopt)

    # system reliability
    try:
        if np.isneginf(klb):
            formresults = formBeta1.form_result()
            pf = formresults.pf1
        elif np.isposinf(kub):
            formresults = formBeta2.form_result()
            pf = formresults.pf1
        else:
            sysBeta = SysReliab([formBeta1, formBeta2], [2])
            sysformres = sysBeta.mvn_msr(sysBeta.syscorr)
            pf = sysformres.pf
        # formresults = formBeta2.form_result()
        # pf = formresults.pf1
    except np.linalg.LinAlgError:
        pf = 0.
    return pf
Example #30
0
    def to_standard_form(self,):
        """
        Return an instance of StandardLP by factoring this problem.
        """
        A = self.A.tocsc(copy=True)
        b = self.b.copy()
        c = self.c.copy()
        r = self.r.copy()
        l = self.l.copy()
        u = self.u.copy()
        f = self.f

        # abort if lower bound equals -Infinity
        if np.isneginf(self.l).any():
            raise ValueError('Lower bounds (l) contains -inf.')


        # shift lower bounds to zero (x <- x-l) so that new problem
        #  has the following form
        #
        #     optimize c^Tx + c^Tl
        #
        #     s.t. b-Al <= Ax <= b-Al+r
        #             0 <=  x <= u-l

        # indices where u is not +inf
        ind = np.where(np.isposinf(u)==False)[0]
        u[ind] -= l[ind]

        b = b - A.dot(l)
        f += np.dot(c,l)

        # Convert equality constraints to a pair of inequalities
        A = vstack([A,A]) # Double A matrix

        b = np.r_[b,b]
        b[:self.m] *= -1
        b[self.m:] += r

        # add upper bounds
        nubs = len(ind)
        Aubs = coo_matrix((np.ones(nubs), (np.arange(nubs,ind))))
        b = np.r_[b,u[ind]]
        A = vstack([A,Aubs])

        #  Now lp has the following form,
	    #
	    #  maximize c^Tx + c^Tl
        #
	    # s.t. -Ax <= -b
	    #       Ax <=  b+r-l
	    #        x <=  u-l
	    #        x >=  0

        assert A.shape[0] == b.shape[0]

        lp = StandardLP(A,b,c,f=f)

        return lp
Example #31
0
def replace_neginf(array):
    temp = array
    minval = (array[np.where(np.isfinite(array))[0]]).min()
    temp[np.where(np.isneginf(temp))[0]] = minval - 1e-300
    return temp
Example #32
0
def _scobit_utility_transform(systematic_utilities,
                              alt_IDs,
                              rows_to_alts,
                              shape_params,
                              intercept_params,
                              intercept_ref_pos=None,
                              *args,
                              **kwargs):
    """
    Parameters
    ----------
    systematic_utilities : 1D ndarray.
        All elements should be ints, floats, or longs. Should contain the
        systematic utilities of each observation per available alternative.
        Note that this vector is formed by the dot product of the design matrix
        with the vector of utility coefficients.
    alt_IDs : 1D ndarray.
        All elements should be ints. There should be one row per obervation per
        available alternative for the given observation. Elements denote the
        alternative corresponding to the given row of the design matrix.
    rows_to_alts : 2D scipy sparse matrix.
        There should be one row per observation per available alternative and
        one column per possible alternative. This matrix maps the rows of the
        design matrix to the possible alternatives for this dataset. All
        elements should be zeros or ones.
    shape_params : None or 1D ndarray.
        If an array, each element should be an int, float, or long. There
        should be one value per shape parameter of the model being used.
    intercept_params : None or 1D ndarray.
        If an array, each element should be an int, float, or long. If J is the
        total number of possible alternatives for the dataset being modeled,
        there should be J-1 elements in the array.
    intercept_ref_pos : int, or None, optional.
        Specifies the index of the alternative, in the ordered array of unique
        alternatives, that is not having its intercept parameter estimated (in
        order to ensure identifiability). Should only be None if
        `intercept_params` is None.

    Returns
    -------
    transformations : 2D ndarray.
        Should have shape `(systematic_utilities.shape[0], 1)`. The returned
        array contains the transformed utility values for this model. All
        elements should be ints, floats, or longs.
    """
    # Figure out what indices are to be filled in
    if intercept_ref_pos is not None and intercept_params is not None:
        needed_idxs = range(intercept_params.shape[0] + 1)
        needed_idxs.remove(intercept_ref_pos)

        if len(intercept_params.shape) > 1 and intercept_params.shape[1] > 1:
            # Get an array of zeros with shape
            # (num_possible_alternatives, num_parameter_samples)
            all_intercepts = np.zeros(
                (rows_to_alts.shape[1], intercept_params.shape[1]))
            # For alternatives having their intercept estimated, replace the
            # zeros with the current value of the estimated intercepts
            all_intercepts[needed_idxs, :] = intercept_params
        else:
            # Get an array of zeros with shape (num_possible_alternatives,)
            all_intercepts = np.zeros(rows_to_alts.shape[1])
            # For alternatives having their intercept estimated, replace the
            # zeros with the current value of the estimated intercepts
            all_intercepts[needed_idxs] = intercept_params
    else:
        # Create a full set of intercept parameters including the intercept
        # constrained to zero
        all_intercepts = np.zeros(rows_to_alts.shape[1])

    # Figure out what intercept values correspond to each row of the
    # systematic utilities
    long_intercepts = rows_to_alts.dot(all_intercepts)

    # Convert the shape parameters back into their 'natural parametrization'
    natural_shapes = np.exp(shape_params)
    natural_shapes[np.isposinf(natural_shapes)] = max_comp_value
    # Figure out what shape values correspond to each row of the
    # systematic utilities
    long_natural_shapes = rows_to_alts.dot(natural_shapes)

    # Calculate the data dependent part of the transformation
    # Also, along the way, guard against numeric underflow or overflow
    exp_neg_v = np.exp(-1 * systematic_utilities)
    exp_neg_v[np.isposinf(exp_neg_v)] = max_comp_value

    powered_term = np.power(1 + exp_neg_v, long_natural_shapes)
    powered_term[np.isposinf(powered_term)] = max_comp_value

    term_2 = np.log(powered_term - 1)
    # Guard against overvlow
    too_big_idx = np.isposinf(powered_term)
    term_2[too_big_idx] = (-1 * long_natural_shapes[too_big_idx] *
                           systematic_utilities[too_big_idx])

    transformations = long_intercepts - term_2
    # Guard against overflow
    transformations[np.isposinf(transformations)] = max_comp_value
    transformations[np.isneginf(transformations)] = -1 * max_comp_value

    # Be sure to return a 2D array since other functions will be expecting that
    if len(transformations.shape) == 1:
        transformations = transformations[:, np.newaxis]

    return transformations
    def __getitem__(self, idx):
        """Generate one batch of data"""

        # Initialization
        X = np.empty([
            self.batch_size * 4, self.dim[0], self.dim[1], self.dim[2],
            self.dim[3]
        ])

        Y = np.empty([self.batch_size * 4, self.num_out])

        batch = self.list_IDs[idx * self.batch_size:(idx + 1) *
                              self.batch_size]

        # Generate data
        c = 0
        for i, ID in enumerate(batch):

            # Load input and output

            raw_vol_in = np.array(
                sio.loadmat(self.in_folder[ID]).get("bModes"))
            raw_vol_in[np.isneginf(raw_vol_in)] = -151
            raw_vol_in = np.nan_to_num(raw_vol_in)

            tmp_vol_out = np.array(
                sio.loadmat(self.out_folder[ID]).get('regVars'))
            tmp_vol_out[tmp_vol_out < 1e-6] = 0
            print(tmp_vol_out[0])
            #            tmp_vol_out = np.nan_to_num(tmp_vol_out)

            tmp_vol_in = np.empty([self.dim[0], self.dim[1], self.dim[2]])

            for j in range(
                    self.dim[2]):  # Extract input image in dim(128,128,99)

                tmp_vol_in[:, :, j] = raw_vol_in[:, j * self.dim[0]:self.dim[
                    1] + j * self.dim[1]]  # selects all rows and shifts with

            # Call the data augmentation function
            Vols = AugTrain_reg(tmp_vol_in, tmp_vol_out, self.num_out,
                                self.minmax)

            #            X_aug = Vols[0]
            #            Y_aug = Vols[1]

            X[i * c, ] = Vols[0][0]  # original and augmented images in X
            X[i * c + 1, ] = Vols[0][1]
            X[i * c + 2, ] = Vols[0][2]
            X[i * c + 3, ] = Vols[0][3]

            Y[i * c, ] = Vols[1][0]
            Y[i * c + 1, ] = Vols[1][1]
            Y[i * c + 2, ] = Vols[1][2]
            Y[i * c + 3, ] = Vols[1][3]
            c = c + 4
            #print(Y)

        print("Shape in  datagentrain: " + str(type(X[1][1][1][45][0])))
        X = np.moveaxis(X, -2, 1)
        print(X.shape)
        print(Y.shape)
        return X, Y
Example #34
0
  def testKernelResultsUsingTruncatedDistribution(self):
    def log_prob(x):
      return tf.where(
          x >= 0.,
          -x - x**2,  # Non-constant gradient.
          tf.fill(x.shape, tf.cast(-np.inf, x.dtype)))
    # This log_prob has the property that it is likely to attract
    # the flow toward, and below, zero...but for x <=0,
    # log_prob(x) = -inf, which should result in rejection, as well
    # as a non-finite log_prob.  Thus, this distribution gives us an opportunity
    # to test out the kernel results ability to correctly capture rejections due
    # to finite AND non-finite reasons.
    # Why use a non-constant gradient?  This ensures the leapfrog integrator
    # will not be exact.

    num_results = 1000
    # Large step size, will give rejections due to integration error in addition
    # to rejection due to going into a region of log_prob = -inf.
    step_size = 0.2
    num_leapfrog_steps = 5
    num_chains = 2

    # Start multiple independent chains.
    initial_state = tf.convert_to_tensor([0.1] * num_chains)

    states, kernel_results = tfp.mcmc.sample_chain(
        num_results=num_results,
        current_state=initial_state,
        kernel=tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=log_prob,
            step_size=step_size,
            num_leapfrog_steps=num_leapfrog_steps,
            seed=_set_seed(42)),
        parallel_iterations=1)

    states_, kernel_results_ = self.evaluate([states, kernel_results])
    pstates_ = kernel_results_.proposed_state

    neg_inf_mask = np.isneginf(
        kernel_results_.proposed_results.target_log_prob)

    # First:  Test that the mathematical properties of the above log prob
    # function in conjunction with HMC show up as expected in kernel_results_.

    # We better have log_prob = -inf some of the time.
    self.assertLess(0, neg_inf_mask.sum())
    # We better have some rejections due to something other than -inf.
    self.assertLess(neg_inf_mask.sum(), (~kernel_results_.is_accepted).sum())
    # We better have accepted a decent amount, even near end of the chain.
    self.assertLess(
        0.1, kernel_results_.is_accepted[int(0.9 * num_results):].mean())
    # We better not have any NaNs in states or log_prob.
    # We may have some NaN in grads, which involve multiplication/addition due
    # to gradient rules.  This is the known "NaN grad issue with tf.where."
    self.assertAllEqual(
        np.zeros_like(states_),
        np.isnan(kernel_results_.proposed_results.target_log_prob))
    self.assertAllEqual(
        np.zeros_like(states_),
        np.isnan(states_))
    # We better not have any +inf in states, grads, or log_prob.
    self.assertAllEqual(
        np.zeros_like(states_),
        np.isposinf(kernel_results_.proposed_results.target_log_prob))
    self.assertAllEqual(
        np.zeros_like(states_),
        np.isposinf(
            kernel_results_.proposed_results.grads_target_log_prob[0]))
    self.assertAllEqual(np.zeros_like(states_),
                        np.isposinf(states_))

    # Second:  Test that kernel_results is congruent with itself and
    # acceptance/rejection of states.

    # Proposed state is negative iff proposed target log prob is -inf.
    np.testing.assert_array_less(pstates_[neg_inf_mask], 0.)
    np.testing.assert_array_less(0., pstates_[~neg_inf_mask])

    # Acceptance probs are zero whenever proposed state is negative.
    acceptance_probs = np.exp(np.minimum(
        kernel_results_.log_accept_ratio, 0.))
    self.assertAllEqual(
        np.zeros_like(pstates_[neg_inf_mask]),
        acceptance_probs[neg_inf_mask])

    # The move is accepted ==> state = proposed state.
    self.assertAllEqual(
        states_[kernel_results_.is_accepted],
        pstates_[kernel_results_.is_accepted],
    )
    # The move was rejected <==> state[t] == state[t - 1].
    for t in range(1, num_results):
      for i in range(num_chains):
        if kernel_results_.is_accepted[t, i]:
          self.assertNotEqual(states_[t, i], states_[t - 1, i])
        else:
          self.assertEqual(states_[t, i], states_[t - 1, i])
Example #35
0
def _score_text(input_file,
                vocabulary,
                scorer,
                output_file,
                log_base=None,
                subword_marking=None,
                word_level=False):
    """Reads text from ``input_file``, computes perplexity using
    ``scorer``, and writes to ``output_file``.

    :type input_file: file object
    :param input_file: a file that contains the input sentences in SRILM n-best
                       format

    :type vocabulary: Vocabulary
    :param vocabulary: vocabulary that provides mapping between words and word
                       IDs

    :type scorer: TextScorer
    :param scorer: a text scorer for rescoring the input sentences

    :type output_file: file object
    :param output_file: a file where to write the output n-best list in SRILM
                        format

    :type log_base: int
    :param log_base: if set to other than None, convert log probabilities to
                     this base

    :type subword_marking: str
    :param subword_marking: if other than None, vocabulary is subwords;
        "word-boundary" indicates <w> token separates words, "prefix-affix"
        indicates subwords are prefixed/affixed with +

    :type word_level: bool
    :param word_level: if set to True, also writes word-level statistics
    """

    scoring_iter = \
        ScoringBatchIterator(input_file,
                             vocabulary,
                             batch_size=16,
                             max_sequence_length=None,
                             map_oos_to_unk=False)
    log_scale = 1.0 if log_base is None else numpy.log(log_base)

    total_logprob = 0.0
    num_sentences = 0
    num_tokens = 0
    num_words = 0
    num_probs = 0
    num_unks = 0
    num_zeroprobs = 0
    for word_ids, words, mask in scoring_iter:
        class_ids, membership_probs = vocabulary.get_class_memberships(
            word_ids)
        logprobs = scorer.score_batch(word_ids, class_ids, membership_probs,
                                      mask)
        for seq_index, seq_logprobs in enumerate(logprobs):
            seq_word_ids = word_ids[:, seq_index]
            seq_mask = mask[:, seq_index]
            seq_word_ids = seq_word_ids[seq_mask == 1]
            seq_words = words[seq_index]
            merged_words, merged_logprobs = _merge_subwords(
                seq_words, seq_logprobs, subword_marking)

            # total logprob of this sequence
            seq_logprob = sum(lp for lp in merged_logprobs
                              if (lp is not None) and (not numpy.isneginf(lp)))
            # total logprob of all sequences
            total_logprob += seq_logprob
            # number of tokens, which may be subwords, including <unk>'s
            num_tokens += len(seq_word_ids)
            # number of words, including <s>'s and <unk>'s
            num_words += len(merged_words)
            # number of word probabilities computed (may not include <unk>'s)
            num_seq_probs = sum((lp is not None) and (not numpy.isneginf(lp))
                                for lp in merged_logprobs)
            num_probs += num_seq_probs
            # number of unks and zeroprobs (just for reporting)
            num_unks += sum(lp is None for lp in merged_logprobs)
            num_zeroprobs += sum((lp is not None) and numpy.isneginf(lp)
                                 for lp in merged_logprobs)
            # number of sequences
            num_sentences += 1

            if word_level:
                output_file.write("# Sentence {0}\n".format(num_sentences))
                _write_word_scores(vocabulary, merged_words, merged_logprobs,
                                   output_file, log_scale)
                output_file.write("Sentence perplexity: {0}\n\n".format(
                    numpy.exp(-seq_logprob / num_seq_probs)))

    output_file.write("Number of sentences: {0}\n".format(num_sentences))
    output_file.write("Number of words: {0}\n".format(num_words))
    output_file.write("Number of tokens: {0}\n".format(num_tokens))
    output_file.write(
        "Number of predicted probabilities: {0}\n".format(num_probs))
    output_file.write("Number of excluded (OOV) words: {0}\n".format(num_unks))
    output_file.write(
        "Number of zero probabilities: {0}\n".format(num_zeroprobs))
    if num_words > 0:
        cross_entropy = -total_logprob / num_probs
        perplexity = numpy.exp(cross_entropy)
        output_file.write(
            "Cross entropy (base e): {0}\n".format(cross_entropy))
        if log_base is not None:
            cross_entropy /= log_scale
            output_file.write("Cross entropy (base {1}): {0}\n".format(
                cross_entropy, log_base))
        output_file.write("Perplexity: {0}\n".format(perplexity))
Example #36
0
def find_cdf_limits(q,
                    f,
                    a,
                    b,
                    args=(),
                    exponent=1.1,
                    maxiter=100,
                    return_iterations=False):
    """find arguments xl, xu of cdf f such that f(xl)<=q & f(xu)>=1-q"""
    # f is assumed to be a monoton. incr. function from (a,b) to [0, 1]

    # x has various ranges
    # y has range [0, 1]
    # g maps from y to x
    # g_inv maps from x to y

    # map from [0, 1] to the actual domain of f
    if np.isneginf(a) and np.isposinf(b):
        gs = [lambda y: np.log(y / (1. - y)), lambda y: np.log((1. - y) / y)]
    elif np.isneginf(a):
        gs = [lambda y: (y - 1.) / y + b, lambda y: y / (1. - y) + b]
    elif np.isposinf(b):
        gs = [lambda y: y / (1. - y) + a, lambda y: (1. - y) / y + a]
    else:
        gs = [lambda y: y * (b - a) + a, lambda y: (1. - y) * (b - a) + a]

    # limit_type 0/1 is lower/upper limit
    for limit_type in range(2):
        g = gs[limit_type]
        for i_n, n in enumerate(range(1, maxiter)):
            y = 2**(-n**exponent)
            if i_n == 0:
                fval = np.array(f(g(y), *args))
                limit = np.array(g(y) * np.ones_like(fval))
                if limit_type == 0:
                    bad = np.array((fval > q))
                else:
                    bad = np.array((fval < 1 - q))
            else:
                sh = [np.array(_)[bad] for _ in args]
                fval[bad] = f(g(y), *sh)
                limit[bad] = g(y)
                if limit_type == 0:
                    bad[bad] = (fval[bad] > q)
                else:
                    bad[bad] = (fval[bad] < 1 - q)

            nbad = np.sum(bad)
            if nbad == 0 or y == 0:
                break

        if limit_type == 0:
            lower_limit = limit
            n_lower_limit = n
            if nbad > 0:
                warnings.warn(
                    'Maximum number of iterations ({}) exceeded '
                    'while determining lower limit.'.format(maxiter),
                    AccuracyWarning)
        else:
            upper_limit = limit
            n_upper_limit = n
            if nbad > 0:
                warnings.warn(
                    'Maximum number of iterations ({}) exceeded '
                    'while determining upper limit.'.format(maxiter),
                    AccuracyWarning)

    if return_iterations:
        return lower_limit, upper_limit, n_lower_limit, n_upper_limit
    else:
        return lower_limit, upper_limit
Example #37
0
    def false_map_borders_cir(self):
        """
        Creates map of FP/FNs overlaid on CIR image with cloud borders
        """
        plt.ioff()
        for img in self.img_list:
            img_path = data_path / 'images' / img
            stack_path = img_path / 'stack' / 'stack.tif'
            plot_path = data_path / self.batch / 'plots' / img
            band_combo_dir = data_path / 'band_combos'

            try:
                plot_path.mkdir(parents=True)
            except FileExistsError:
                pass

            with rasterio.open(str(stack_path), 'r') as ds:
                data = ds.read()
                data = data.transpose(
                    (1, -1, 0)
                )  # Not sure why the rasterio.read output is originally (D, W, H)
                data[data == -999999] = np.nan
                data[np.isneginf(data)] = np.nan

            # Get flooded image (remove perm water)
            flood_index = data.shape[2] - 1
            perm_index = data.shape[2] - 2
            indices = np.where((data[:, :, flood_index] == 1)
                               & (data[:, :, perm_index] == 1))
            rows, cols = zip(indices)
            true_flood = data[:, :, flood_index]
            true_flood[rows, cols] = 0
            # Now convert to a gray color image
            true_flood_rgb = np.zeros(
                (true_flood.shape[0], true_flood.shape[1], 4), 'uint8')
            true_flood_rgb[:, :, 0] = true_flood * 174
            true_flood_rgb[:, :, 1] = true_flood * 236
            true_flood_rgb[:, :, 2] = true_flood * 238
            true_flood_rgb[:, :, 3] = true_flood * 255
            # Make non-flood pixels transparent
            indices = np.where((true_flood_rgb[:, :, 0] == 0)
                               & (true_flood_rgb[:, :, 1] == 0)
                               & (true_flood_rgb[:, :, 2] == 0)
                               & (true_flood_rgb[:, :, 3] == 0))
            true_flood_rgb[indices] = 0
            true_flood_rgb = Image.fromarray(true_flood_rgb, mode='RGBA')

            for pctl in self.pctls:
                # Get CIR image
                cir_file = band_combo_dir / '{}'.format(img + '_cir_img' +
                                                        '.png')
                cir_img = Image.open(cir_file)

                # Get FP/FN image
                comparison_img_file = plot_path / '{}'.format('false_map' +
                                                              str(pctl) +
                                                              '.png')
                flood_overlay = Image.open(comparison_img_file)
                flood_overlay_arr = np.array(flood_overlay)
                indices = np.where((flood_overlay_arr[:, :, 0] == 0)
                                   & (flood_overlay_arr[:, :, 1] == 0)
                                   & (flood_overlay_arr[:, :, 2] == 0)
                                   & (flood_overlay_arr[:, :, 3] == 255))
                flood_overlay_arr[indices] = 0
                # Change red to lime green
                red_indices = np.where((flood_overlay_arr[:, :, 0] == 255)
                                       & (flood_overlay_arr[:, :, 1] == 0)
                                       & (flood_overlay_arr[:, :, 2] == 0)
                                       & (flood_overlay_arr[:, :, 3] == 255))
                flood_overlay_arr[red_indices] = [0, 255, 64, 255]
                flood_overlay = Image.fromarray(flood_overlay_arr, mode='RGBA')

                # Create cloud border image
                clouds_dir = data_path / 'clouds'
                clouds = np.load(clouds_dir /
                                 '{0}'.format(img + '_clouds.npy'))
                clouds[np.isnan(data[:, :, 0])] = np.nan
                cloudmask = np.less(clouds,
                                    np.nanpercentile(clouds, pctl),
                                    where=~np.isnan(clouds))

                from scipy.ndimage import binary_dilation, binary_erosion
                cloudmask_binary = cloudmask.astype('int')
                cloudmask_border = binary_dilation(cloudmask_binary,
                                                   iterations=3)
                cloudmask_border = (cloudmask_border - cloudmask_binary)
                # Convert border to yellow
                border = np.zeros(
                    (cloudmask_border.shape[0], cloudmask_border.shape[1], 4),
                    'uint8')
                border[:, :, 0] = cloudmask_border * 255
                border[:, :, 1] = cloudmask_border * 255
                border[:, :, 2] = cloudmask_border * 0
                border[:, :, 3] = cloudmask_border * 255
                # Make non-border pixels transparent
                indices = np.where((border[:, :, 0] == 0)
                                   & (border[:, :, 1] == 0)
                                   & (border[:, :, 2] == 0)
                                   & (border[:, :, 3] == 0))
                border[indices] = 0
                border_rgb = Image.fromarray(border, mode='RGBA')

                # Plot all layers together
                cir_img.paste(true_flood_rgb, (0, 0), true_flood_rgb)
                cir_img.paste(flood_overlay, (0, 0), flood_overlay)
                cir_img.paste(border_rgb, (0, 0), border_rgb)
                cir_img.save(
                    plot_path /
                    '{}'.format('false_map_border_cir' + str(pctl) + '.png'),
                    dpi=(300, 300))
Example #38
0
def numeric_summary(tensor):
    """Get a text summary of a numeric tensor.

  This summary is only available for numeric (int*, float*, complex*) and
  Boolean tensors.

  Args:
    tensor: (`numpy.ndarray`) the tensor value object to be summarized.

  Returns:
    The summary text as a `RichTextLines` object. If the type of `tensor` is not
    numeric or Boolean, a single-line `RichTextLines` object containing a
    warning message will reflect that.
  """
    def _counts_summary(counts, skip_zeros=True, total_count=None):
        """Format values as a two-row table."""
        if skip_zeros:
            counts = [(count_key, count_val) for count_key, count_val in counts
                      if count_val]
        max_common_len = 0
        for count_key, count_val in counts:
            count_val_str = str(count_val)
            common_len = max(len(count_key) + 1, len(count_val_str) + 1)
            max_common_len = max(common_len, max_common_len)

        key_line = debugger_cli_common.RichLine("|")
        val_line = debugger_cli_common.RichLine("|")
        for count_key, count_val in counts:
            count_val_str = str(count_val)
            key_line += _pad_string_to_length(count_key, max_common_len)
            val_line += _pad_string_to_length(count_val_str, max_common_len)
        key_line += " |"
        val_line += " |"

        if total_count is not None:
            total_key_str = "total"
            total_val_str = str(total_count)
            max_common_len = max(len(total_key_str) + 1, len(total_val_str))
            total_key_str = _pad_string_to_length(total_key_str,
                                                  max_common_len)
            total_val_str = _pad_string_to_length(total_val_str,
                                                  max_common_len)
            key_line += total_key_str + " |"
            val_line += total_val_str + " |"

        return debugger_cli_common.rich_text_lines_from_rich_line_list(
            [key_line, val_line])

    if not isinstance(tensor, np.ndarray) or not np.size(tensor):
        return debugger_cli_common.RichTextLines(
            ["No numeric summary available due to empty tensor."])
    elif (np.issubdtype(tensor.dtype, np.float)
          or np.issubdtype(tensor.dtype, np.complex)
          or np.issubdtype(tensor.dtype, np.integer)):
        counts = [("nan", np.sum(np.isnan(tensor))),
                  ("-inf", np.sum(np.isneginf(tensor))),
                  ("-",
                   np.sum(
                       np.logical_and(tensor < 0.0,
                                      np.logical_not(np.isneginf(tensor))))),
                  ("0", np.sum(tensor == 0.0)),
                  ("+",
                   np.sum(
                       np.logical_and(tensor > 0.0,
                                      np.logical_not(np.isposinf(tensor))))),
                  ("+inf", np.sum(np.isposinf(tensor)))]
        output = _counts_summary(counts, total_count=np.size(tensor))

        valid_array = tensor[np.logical_not(
            np.logical_or(np.isinf(tensor), np.isnan(tensor)))]
        if np.size(valid_array):
            stats = [("min", np.min(valid_array)),
                     ("max", np.max(valid_array)),
                     ("mean", np.mean(valid_array)),
                     ("std", np.std(valid_array))]
            output.extend(_counts_summary(stats, skip_zeros=False))
        return output
    elif tensor.dtype == np.bool:
        counts = [
            ("False", np.sum(tensor == 0)),
            ("True", np.sum(tensor > 0)),
        ]
        return _counts_summary(counts, total_count=np.size(tensor))
    else:
        return debugger_cli_common.RichTextLines([
            "No numeric summary available due to tensor dtype: %s." %
            tensor.dtype
        ])
Example #39
0
def mvstdnormcdf(lower, upper, corrcoef, **kwds):
    '''standardized multivariate normal cumulative distribution function

    This is a wrapper for scipy.stats.kde.mvn.mvndst which calculates
    a rectangular integral over a standardized multivariate normal
    distribution.

    This function assumes standardized scale, that is the variance in each dimension
    is one, but correlation can be arbitrary, covariance = correlation matrix

    Parameters
    ----------
    lower, upper : array_like, 1d
       lower and upper integration limits with length equal to the number
       of dimensions of the multivariate normal distribution. It can contain
       -np.inf or np.inf for open integration intervals
    corrcoef : float or array_like
       specifies correlation matrix in one of three ways, see notes
    optional keyword parameters to influence integration
        * maxpts : int, maximum number of function values allowed. This
             parameter can be used to limit the time. A sensible
             strategy is to start with `maxpts` = 1000*N, and then
             increase `maxpts` if ERROR is too large.
        * abseps : float absolute error tolerance.
        * releps : float relative error tolerance.

    Returns
    -------
    cdfvalue : float
        value of the integral


    Notes
    -----
    The correlation matrix corrcoef can be given in 3 different ways
    If the multivariate normal is two-dimensional than only the
    correlation coefficient needs to be provided.
    For general dimension the correlation matrix can be provided either
    as a one-dimensional array of the upper triangular correlation
    coefficients stacked by rows, or as full square correlation matrix

    See Also
    --------
    mvnormcdf : cdf of multivariate normal distribution without
        standardization

    Examples
    --------

    >>> print mvstdnormcdf([-np.inf,-np.inf], [0.0,np.inf], 0.5)
    0.5
    >>> corr = [[1.0, 0, 0.5],[0,1,0],[0.5,0,1]]
    >>> print mvstdnormcdf([-np.inf,-np.inf,-100.0], [0.0,0.0,0.0], corr, abseps=1e-6)
    0.166666399198
    >>> print mvstdnormcdf([-np.inf,-np.inf,-100.0],[0.0,0.0,0.0],corr, abseps=1e-8)
    something wrong completion with ERROR > EPS and MAXPTS function values used;
                        increase MAXPTS to decrease ERROR; 1.048330348e-006
    0.166666546218
    >>> print mvstdnormcdf([-np.inf,-np.inf,-100.0],[0.0,0.0,0.0], corr,
                            maxpts=100000, abseps=1e-8)
    0.166666588293

    '''
    n = len(lower)
    #don't know if converting to array is necessary,
    #but it makes ndim check possible
    lower = np.array(lower)
    upper = np.array(upper)
    corrcoef = np.array(corrcoef)

    correl = np.zeros(n * (n - 1) / 2.0)  #dtype necessary?

    if (lower.ndim != 1) or (upper.ndim != 1):
        raise ValueError, 'can handle only 1D bounds'
    if len(upper) != n:
        raise ValueError, 'bounds have different lengths'
    if n == 2 and corrcoef.size == 1:
        correl = corrcoef
        #print 'case scalar rho', n
    elif corrcoef.ndim == 1 and len(corrcoef) == n * (n - 1) / 2.0:
        #print 'case flat corr', corrcoeff.shape
        correl = corrcoef
    elif corrcoef.shape == (n, n):
        #print 'case square corr',  correl.shape
        correl = corrcoef[np.tril_indices(n, -1)]


#        for ii in range(n):
#            for jj in range(ii):
#                correl[ jj + ((ii-2)*(ii-1))/2] = corrcoef[ii,jj]
    else:
        raise ValueError, 'corrcoef has incorrect dimension'

    if not 'maxpts' in kwds:
        if n > 2:
            kwds['maxpts'] = 10000 * n

    lowinf = np.isneginf(lower)
    uppinf = np.isposinf(upper)
    infin = 2.0 * np.ones(n)

    np.putmask(infin, lowinf, 0)  # infin.putmask(0,lowinf)
    np.putmask(infin, uppinf, 1)  #infin.putmask(1,uppinf)
    #this has to be last
    np.putmask(infin, lowinf * uppinf, -1)

    ##    #remove infs
    ##    np.putmask(lower,lowinf,-100)# infin.putmask(0,lowinf)
    ##    np.putmask(upper,uppinf,100) #infin.putmask(1,uppinf)

    #print lower,',',upper,',',infin,',',correl
    #print correl.shape
    #print kwds.items()
    error, cdfvalue, inform = scipy.stats.kde.mvn.mvndst(
        lower, upper, infin, correl, **kwds)
    if inform:
        print 'something wrong', informcode[inform], error
    return cdfvalue
Example #40
0
def RDP_depend_pate_gaussian(params, alpha):
    """
    Return the data-dependent RDP of GNMAX (proposed in PATE2)
    Bounds RDP from above of GNMax given an upper bound on q (Theorem 6).

    Args:
      logq: Natural logarithm of the probability of a non-argmax outcome.
      sigma: Standard deviation of Gaussian noise.
      orders: An array_like list of Renyi orders.

    Returns:
      Upper bound on RPD for all orders. A scalar if orders is a scalar.

    Raises:
      ValueError: If the input is malformed.
    """
    logq = params['logq']
    sigma = params['sigma']

    if alpha == 1:
        p = np.exp(logq)
        w = (2 * p - 1) * (logq - _log1mexp(logq))
        return w
    if logq > 0 or sigma < 0 or np.any(alpha < 1):  # not defined for alpha=1
        raise ValueError("Inputs are malformed.")

    if np.isneginf(logq):  # If the mechanism's output is fixed, it has 0-DP.
        print('isneginf', logq)
        if np.isscalar(alpha):
            return 0.
        else:
            return np.full_like(alpha, 0., dtype=np.float)

    variance = sigma**2

    # Use two different higher orders: mu_hi1 and mu_hi2 computed according to
    # Proposition 10.
    mu_hi2 = math.sqrt(variance * -logq)
    mu_hi1 = mu_hi2 + 1

    orders_vec = np.atleast_1d(alpha)

    ret = orders_vec / variance  # baseline: data-independent bound

    # Filter out entries where data-dependent bound does not apply.
    mask = np.logical_and(mu_hi1 > orders_vec, mu_hi2 > 1)

    rdp_hi1 = mu_hi1 / variance
    rdp_hi2 = mu_hi2 / variance

    log_a2 = (mu_hi2 - 1) * rdp_hi2

    # Make sure q is in the increasing wrt q range and A is positive.
    if (np.any(mask) and logq <= log_a2 - mu_hi2 *
        (math.log(1 + 1 / (mu_hi1 - 1)) + math.log(1 + 1 / (mu_hi2 - 1)))
            and -logq > rdp_hi2):
        # Use log1p(x) = log(1 + x) to avoid catastrophic cancellations when x ~ 0.
        log1q = _log1mexp(logq)  # log1q = log(1-q)
        log_a = (alpha - 1) * (log1q - _log1mexp(
            (logq + rdp_hi2) * (1 - 1 / mu_hi2)))
        log_b = (alpha - 1) * (rdp_hi1 - logq / (mu_hi1 - 1))

        # Use logaddexp(x, y) = log(e^x + e^y) to avoid overflow for large x, y.
        log_s1 = utils.stable_logsumexp_two(log1q + log_a, logq + log_b)
        log_s = np.logaddexp(log1q + log_a, logq + log_b)
        ret[mask] = np.minimum(ret, log_s / (alpha - 1))[mask]
    # print('alpha ={} mask {}'.format(alpha,ret))
    if ret[mask] < 0:
        print('negative ret', ret)
        print('log_s1 ={} log_s = {}'.format(log_s1, log_s))
        print('alpha = {} mu_hi1 ={}'.format(alpha, mu_hi1))
        print('log1q = {} log_a = {} log_b={} log_s = {}'.format(
            log1q, log_a, log_b, log_s))
        ret[mask] = 1. / (sigma**2) * alpha
        # print('replace ret with', ret)
    assert np.all(ret >= 0)

    if np.isscalar(alpha):
        return np.asscalar(ret)
    else:
        return ret
Example #41
0
 lambda x: x.dot(np.eye(x.shape[-1])),
 lambda x: da.tensordot(x, np.ones(x.shape[:2]), axes=[(0, 1), (0, 1)]),
 lambda x: x.sum(axis=0),
 lambda x: x.max(axis=0),
 lambda x: x.sum(axis=(1, 2)),
 lambda x: x.astype(np.complex128),
 lambda x: x.map_blocks(lambda x: x * 2),
 lambda x: x.map_overlap(lambda x: x * 2, depth=0, trim=True, boundary="none"),
 lambda x: x.map_overlap(lambda x: x * 2, depth=0, trim=False, boundary="none"),
 lambda x: x.round(1),
 lambda x: x.reshape((x.shape[0] * x.shape[1], x.shape[2])),
 lambda x: abs(x),
 lambda x: x > 0.5,
 lambda x: x.rechunk((4, 4, 4)),
 lambda x: x.rechunk((2, 2, 1)),
 lambda x: np.isneginf(x),
 lambda x: np.isposinf(x),
 pytest.param(
     lambda x: np.zeros_like(x),
     marks=pytest.mark.xfail(
         SPARSE_VERSION < parse_version("0.13.0"),
         reason="https://github.com/pydata/xarray/issues/5654",
     ),
 ),
 pytest.param(
     lambda x: np.ones_like(x),
     marks=pytest.mark.xfail(
         SPARSE_VERSION < parse_version("0.13.0"),
         reason="https://github.com/pydata/xarray/issues/5654",
     ),
 ),
Example #42
0
    def GetDatasetsProto(self, datasets, features=None):
        """Generates the feature stats proto from dictionaries of feature values.

    Args:
      datasets: An array of dictionaries, one per dataset, each one containing:
          - 'entries': The dictionary of features in the dataset from the parsed
            examples.
          - 'size': The number of examples parsed for the dataset.
          - 'name': The name of the dataset.
      features: A list of strings that is a whitelist of feature names to create
          feature statistics for. If set to None then all features in the
            dataset
          are analyzed. Defaults to None.

    Returns:
      The feature statistics proto for the provided datasets.
    """
        features_seen = set()
        whitelist_features = set(features) if features else None
        all_datasets = self.datasets_proto()

        # TODO(jwexler): Add ability to generate weighted feature stats
        # if there is a specified weight feature in the dataset.

        # Initialize each dataset
        for dataset in datasets:
            all_datasets.datasets.add(name=dataset['name'],
                                      num_examples=dataset['size'])
        # This outer loop ensures that for each feature seen in any of the provided
        # datasets, we check the feature once against all datasets.
        for outer_dataset in datasets:
            for key, value in outer_dataset['entries'].items():
                # If we have a feature whitelist and this feature is not in the
                # whitelist then do not process it.
                # If we have processed this feature already, no need to do it again.
                if ((whitelist_features and key not in whitelist_features)
                        or key in features_seen):
                    continue
                features_seen.add(key)
                # Default to type int if no type is found, so that the fact that all
                # values are missing from this feature can be displayed.
                feature_type = value[
                    'type'] if 'type' in value else self.fs_proto.INT
                # Process the found feature for each dataset.
                for j, dataset in enumerate(datasets):
                    feat = all_datasets.datasets[j].features.add(
                        type=feature_type, name=key)
                    value = dataset['entries'].get(key)
                    has_data = value is not None and (
                        value['vals'].size != 0 if isinstance(
                            value['vals'], np.ndarray) else value['vals'])
                    commonstats = None
                    # For numeric features, calculate numeric statistics.
                    if feat.type in (self.fs_proto.INT, self.fs_proto.FLOAT):
                        featstats = feat.num_stats
                        commonstats = featstats.common_stats
                        if has_data:
                            nums = value['vals']
                            featstats.std_dev = np.asscalar(np.std(nums))
                            featstats.mean = np.asscalar(np.mean(nums))
                            featstats.min = np.asscalar(np.min(nums))
                            featstats.max = np.asscalar(np.max(nums))
                            featstats.median = np.asscalar(np.median(nums))
                            featstats.num_zeros = len(nums) - np.count_nonzero(
                                nums)

                            nums = np.array(nums)
                            num_nan = len(nums[np.isnan(nums)])
                            num_posinf = len(nums[np.isposinf(nums)])
                            num_neginf = len(nums[np.isneginf(nums)])

                            # Remove all non-finite (including NaN) values from the numeric
                            # values in order to calculate histogram buckets/counts. The
                            # inf values will be added back to the first and last buckets.
                            nums = nums[np.isfinite(nums)]
                            counts, buckets = np.histogram(nums)
                            hist = featstats.histograms.add()
                            hist.type = self.histogram_proto.STANDARD
                            hist.num_nan = num_nan
                            for bucket_count in range(len(counts)):
                                bucket = hist.buckets.add(
                                    low_value=buckets[bucket_count],
                                    high_value=buckets[bucket_count + 1],
                                    sample_count=np.asscalar(
                                        counts[bucket_count]))
                                # Add any negative or positive infinities to the first and last
                                # buckets in the histogram.
                                if bucket_count == 0 and num_neginf > 0:
                                    bucket.low_value = float('-inf')
                                    bucket.sample_count += num_neginf
                                elif bucket_count == len(
                                        counts) - 1 and num_posinf > 0:
                                    bucket.high_value = float('inf')
                                    bucket.sample_count += num_posinf
                            if not hist.buckets:
                                if num_neginf:
                                    hist.buckets.add(low_value=float('-inf'),
                                                     high_value=float('-inf'),
                                                     sample_count=num_neginf)
                                if num_posinf:
                                    hist.buckets.add(low_value=float('inf'),
                                                     high_value=float('inf'),
                                                     sample_count=num_posinf)

                            self._PopulateQuantilesHistogram(
                                featstats.histograms.add(), nums.tolist())
                    elif feat.type == self.fs_proto.STRING:
                        featstats = feat.string_stats
                        commonstats = featstats.common_stats
                        if has_data:
                            strs = value['vals']
                            featstats.avg_length = np.mean(
                                np.vectorize(len)(strs))
                            vals, counts = np.unique(strs, return_counts=True)
                            featstats.unique = len(vals)
                            sorted_vals = sorted(zip(counts, vals),
                                                 reverse=True)
                            for val_index, val in enumerate(sorted_vals):
                                if val[1].dtype.type is np.str_:
                                    printable_val = val[1]
                                else:
                                    try:
                                        printable_val = val[1].decode(
                                            'UTF-8', 'strict')
                                    except UnicodeDecodeError:
                                        printable_val = '__BYTES_VALUE__'
                                bucket = featstats.rank_histogram.buckets.add(
                                    low_rank=val_index,
                                    high_rank=val_index,
                                    sample_count=np.asscalar(val[0]),
                                    label=printable_val)
                                if val_index < 2:
                                    featstats.top_values.add(
                                        value=bucket.label,
                                        frequency=bucket.sample_count)
                    # Add the common stats regardless of the feature type.
                    if has_data:
                        commonstats.num_missing = value['missing']
                        commonstats.num_non_missing = (
                            all_datasets.datasets[j].num_examples -
                            featstats.common_stats.num_missing)
                        commonstats.min_num_values = np.asscalar(
                            np.min(value['counts']))
                        commonstats.max_num_values = np.asscalar(
                            np.max(value['counts']))
                        commonstats.avg_num_values = np.asscalar(
                            np.mean(value['counts']))
                        if 'feat_lens' in value and value['feat_lens']:
                            self._PopulateQuantilesHistogram(
                                commonstats.feature_list_length_histogram,
                                value['feat_lens'])
                        self._PopulateQuantilesHistogram(
                            commonstats.num_values_histogram, value['counts'])
                    else:
                        commonstats.num_non_missing = 0
                        commonstats.num_missing = all_datasets.datasets[
                            j].num_examples

        return all_datasets
Example #43
0
    def eval_node_probs(self):
        """Update probability density estimates.
        """
        if (self.mimic_speed == False):
            # Create mutual info matrix
            mutual_info = np.zeros([self.length, self.length])
            for i in range(self.length - 1):
                for j in range(i + 1, self.length):
                    mutual_info[i, j] = -1 * mutual_info_score(
                        self.keep_sample[:, i], self.keep_sample[:, j])

        elif (self.mimic_speed == True):
            # Set ignore error to ignore dividing by zero
            np.seterr(divide='ignore', invalid='ignore')

            # get length of the sample which survived from mimic iteration
            len_sample_kept = self.keep_sample.shape[0]
            # get the length of the bit sequence / problem size
            len_prob = self.keep_sample.shape[1]

            # Expand the matrices to so each row corresponds to a row by row combination of the list of samples
            permuted_rows = np.repeat(self.keep_sample, self.length).reshape(
                len_sample_kept, len_prob * len_prob)
            duplicated_rows = np.hstack(([self.keep_sample] * len_prob))

            # Compute the mutual information matrix in bulk
            # This is done by iterating through the list of possible feature values ((max_val-1)^2).
            # For example, a binary string would go through 00 01 10 11, for a total of 4 iterations.

            # First initialize the mutual info matrix.
            mutual_info_vectorized = np.zeros([self.length * self.length])
            # Pre-compute the clusters U and V which gets computed multiple times in the inner loop.
            cluster_U = {}
            cluster_V = {}
            cluster_U_sum = {}
            cluster_V_sum = {}
            for i in range(0, self.max_val):
                cluster_U[i] = (duplicated_rows == i)
                cluster_V[i] = (permuted_rows == i)
                cluster_U_sum[i] = np.sum(duplicated_rows == i, axis=0)
                cluster_V_sum[i] = np.sum(permuted_rows == i, axis=0)

            # Compute the mutual information for all sample to sample combination
            # Done for each feature combination i & j ((max_val-1)^2)
            for i in range(0, self.max_val):
                for j in range(0, self.max_val):
                    # |U_i AND V_j|/N Length of cluster matching for feature pair i j over sample length N
                    # This is the first term in the MI computation
                    MI_first_term = np.sum(cluster_U[i] * cluster_V[j], axis=0)
                    MI_first_term = np.divide(MI_first_term, len_sample_kept)

                    # compute the second term of the MI matrix
                    # Length |U_i||V_j|, for the particular feature pair
                    UV_length = (cluster_U_sum[i] * cluster_V_sum[j])
                    MI_second_term = np.log(MI_first_term) - np.log(
                        UV_length) + np.log(len_sample_kept)
                    # remove the nans and negative infinity, there shouldn't be any
                    MI_second_term[np.isnan(MI_second_term)] = 0
                    MI_second_term[np.isneginf(MI_second_term)] = 0

                    # Combine the first and second term
                    # Add the whole MI matrix for the feature to the previously computed values
                    mutual_info_vectorized = mutual_info_vectorized + MI_first_term * MI_second_term

            # Need to multiply by negative to get the mutual information, and reshape (Full Matrix)
            mutual_info_full = -mutual_info_vectorized.reshape(
                self.length, self.length)
            # Only get the upper triangle matrix above the identity row.
            mutual_info = np.triu(mutual_info_full, k=1)
            # Possible enhancements, currently we are doing double the computation required.
            # Pre set the matrix so the computation is only done for rows that are needed. To do for the future.

        # Find minimum spanning tree of mutual info matrix
        mst = minimum_spanning_tree(csr_matrix(mutual_info))

        # Convert minimum spanning tree to depth first tree with node 0 as root
        dft = depth_first_tree(csr_matrix(mst.toarray()), 0, directed=False)
        dft = np.round(dft.toarray(), 10)

        # Determine parent of each node
        parent = np.argmin(dft[:, 1:], axis=0)

        # Get probs
        probs = np.zeros([self.length, self.max_val, self.max_val])

        probs[0, :] = np.histogram(self.keep_sample[:, 0],
                                   np.arange(self.max_val + 1),
                                   density=True)[0]

        for i in range(1, self.length):
            for j in range(self.max_val):
                subset = self.keep_sample[np.where(
                    self.keep_sample[:, parent[i - 1]] == j)[0]]

                if not len(subset):
                    probs[i, j] = 1 / self.max_val
                else:
                    probs[i, j] = np.histogram(subset[:, i],
                                               np.arange(self.max_val + 1),
                                               density=True)[0]

        # Update probs and parent
        self.node_probs = probs
        self.parent_nodes = parent
Example #44
0
def mvstdnormcdf(lower, upper, corrcoef,maxpts = None, **kwds):
    '''standardized multivariate normal cumulative distribution function

    This is a wrapper for scipy.stats.kde.mvn.mvndst which calculates
    a rectangular integral over a standardized multivariate normal
    distribution.
    
    This function assumes standardized scale, that is the variance in each dimension
    is one, but correlation can be arbitrary, covariance = correlation matrix

    Parameters
    ----------
    lower, upper : array_like, 1d
       lower and upper integration limits with length equal to the number
       of dimensions of the multivariate normal distribution. It can contain
       -np.inf or np.inf for open integration intervals
    corrcoef : float or array_like
       specifies correlation matrix in one of three ways, see notes
    optional keyword parameters to influence integration
        * maxpts : int, maximum number of function values allowed. This 
             parameter can be used to limit the time. A sensible 
             strategy is to start with `maxpts` = 1000*N, and then
             increase `maxpts` if ERROR is too large.
        * abseps : float absolute error tolerance.
        * releps : float relative error tolerance.

    Returns
    -------
    cdfvalue : float
        value of the integral


    Notes
    -----
    The correlation matrix corrcoef can be given in 3 different ways
    If the multivariate normal is two-dimensional than only the
    correlation coefficient needs to be provided.
    For general dimension the correlation matrix can be provided either
    as a one-dimensional array of the upper triangular correlation
    coefficients stacked by rows, or as full square correlation matrix

    See Also
    --------
    mvnormcdf : cdf of multivariate normal distribution without
        standardization

    Examples
    --------

    >>> print mvstdnormcdf([-np.inf,-np.inf], [0.0,np.inf], 0.5)
    0.5
    >>> corr = [[1.0, 0, 0.5],[0,1,0],[0.5,0,1]]    
    >>> assert Matrix(0.166666399198) == mvstdnormcdf(
    ...    [-np.inf,-np.inf,-100.0], 
    ...    [0.0,0.0,0.0], 
    ...    corr, abseps=2e-6
    ... )
    
    >>> 
    >>> assert Matrix(0.166666588293) == mvstdnormcdf(
    ...     [-np.inf,-np.inf,-100.0],
    ...     [    0.0,    0.0,   0.0],
    ...     corr, abseps=1e-8)                                                  #doctest: +IGNORE_EXCEPTION_DETAIL                                                 
    Traceback (most recent call last):                                         
    ...
    MvnDstError: completion with ERROR > EPS and MAXPTS function values used;
                 increase MAXPTS to decrease ERROR, ERROR = 1.8253048422e-07   
    
    >>> assert Matrix(0.166666588293) == mvstdnormcdf(
    ...    [-np.inf,-np.inf,-100.0],
    ...    [0.0,0.0,0.0],
    ...    corr,maxpts=1000000, abseps=1e-8
    ... )
    
    
    '''
    n = len(lower)
    #don't know if converting to array is necessary,
    #but it makes ndim check possible
    lower = np.array(lower)
    upper = np.array(upper)
    corrcoef = np.array(corrcoef)
    
    correl = np.zeros(n*(n-1)/2.0)  #dtype necessary?
    
    if (lower.ndim != 1) or (upper.ndim != 1):
        raise ValueError, 'can handle only 1D bounds'
    if len(upper) != n:
        raise ValueError, 'bounds have different lengths'
    if n==2 and corrcoef.size==1:
        correl = corrcoef
        #print 'case scalar rho', n
    elif corrcoef.ndim == 1 and len(corrcoef) == n*(n-1)/2.0:
        #print 'case flat corr', corrcoeff.shape
        correl = corrcoef
    elif corrcoef.shape == (n,n):
        correl = corrcoef[np.tri(n,n,-1,dtype=bool)]
    else:
        raise ValueError, 'corrcoef has incorrect dimension'

    if maxpts is None:
        maxpts = 10000*n


    lowinf = np.isneginf(lower)
    uppinf = np.isposinf(upper)
    infin = 2.0*np.ones(n)
    
    infin[lowinf] = 0
    infin[uppinf] = 1
    infin[lowinf & uppinf] = -1


    error, cdfvalue, inform = mvndst(lower,upper,infin,correl,maxpts,**kwds)
    
    if inform:
        raise MvnDstError(inform, error)
        
    return cdfvalue
Example #45
0
def makeDailyChannelOffsetSignal( ):

    from functions.TAfunctions import SMA, MoveMax, jumpTheChannelTest
    import functions.allstats
    from functions.UpdateSymbols_inHDF5 import *
    from functions.GetParams import GetParams

    file4path = os.path.join( os.getcwd(), "pyTAAAweb_DailyChannelOffsetSignal_status.params" )
    figure4path = os.path.join( os.getcwd(), "pyTAAA_web", "PyTAAA_DailyChannelOffsetSignalV.png" )

    symbol_directory = os.path.join( os.getcwd(), "symbols" )
    symbol_file = "Naz100_Symbols.txt"
    symbols_file = os.path.join( symbol_directory, symbol_file )

    adjClose, symbols, datearray, _, _ = loadQuotes_fromHDF( symbols_file )

    ###
    ### get last date already processed
    ###
    _dates = []
    avgPctChannel = []
    numAboveBelowChannel = []
    try:
        with open( file4path, "r" ) as f:
            # get number of lines in file
            lines = f.read().split("\n")
            numlines = len (lines)
            for i in range(numlines):
                statusline = lines[i]
                statusline_list = statusline.split(" ")
                statusline_list = filter(None, statusline_list)
                if len( statusline_list ) == 3:
                    _dates.append( datetime.datetime.strptime( statusline_list[0], '%Y-%m-%d') )
                    avgPctChannel.append( float(statusline_list[1].split('%')[0])/100. )
                    numAboveBelowChannel.append( float(statusline_list[2]) )
    except:
        print " Error: unable to read updates from pyTAAAweb_numberUptrendingStocks_status.params"
        print ""
    #print "_dates = ", _dates
    last_date = _dates[-1].date()
    print "   ...inside makeDailyChannelOffsetSignal... last_date = ", last_date

    # parameters for signal
    params = GetParams()
    minperiod = params['minperiod']
    maxperiod = params['maxperiod']
    incperiod = params['incperiod']
    numdaysinfit = params['numdaysinfit']
    offset = params['offset']

    print "minperiod,maxperiod,incperiod,numdaysinfit,offset = ", minperiod,maxperiod,incperiod,numdaysinfit,offset

    # process for each date
    print "\n  ... inside makeDailyChannelOffsetSignal ..."
    dailyChannelOffsetSignal = np.zeros( adjClose.shape[1], 'float' )
    dailyCountDowntrendChannelOffsetSignal = np.zeros( adjClose.shape[1], 'float' )
    #for idate in range(numdaysinfit+incperiod,adjClose.shape[1])
    for idate in range(adjClose.shape[1]):
        if datearray[idate] >= last_date :
            #if datearray[idate] > datetime.date(1992,1,1) :
            #if datearray[idate] > datetime.date(1992,1,1) :
            if idate%10 == 0:
                print "   ...idate, datearray[idate] = ", idate, datearray[idate]
            # process all symbols
            numberDowntrendSymbols = 0
            dailyChannelPct = []
            ##print "     ... symbols = ", symbols
            floatChannelGainsLosses = []
            floatStdevsAboveChannel = []
            for i, symbol in enumerate(symbols):
                #print "     ... symbol = ", symbol
                quotes = adjClose[i,idate-numdaysinfit-offset-1:idate].copy()

                channelGainLoss, numStdDevs, pctChannel = \
                                                recentTrendAndStdDevs( \
                                                quotes, \
                                                datearray,\
                                                minperiod=minperiod,\
                                                maxperiod=maxperiod,\
                                                incperiod=incperiod,\
                                                numdaysinfit=numdaysinfit,\
                                                offset=offset)


                floatChannelGainsLosses.append(channelGainLoss)
                floatStdevsAboveChannel.append(numStdDevs)

            floatChannelGainsLosses = np.array(floatChannelGainsLosses)
            floatChannelGainsLosses[np.isinf(floatChannelGainsLosses)] = -999.
            floatChannelGainsLosses[np.isneginf(floatChannelGainsLosses)] = -999.
            floatChannelGainsLosses[np.isnan(floatChannelGainsLosses)] = -999.
            floatChannelGainsLosses = floatChannelGainsLosses[floatChannelGainsLosses != -999.]
            floatStdevsAboveChannel = np.array(floatStdevsAboveChannel)
            floatStdevsAboveChannel[np.isinf(floatStdevsAboveChannel)] = -999.
            floatStdevsAboveChannel[np.isneginf(floatStdevsAboveChannel)] = -999.
            floatStdevsAboveChannel[np.isnan(floatStdevsAboveChannel)] = -999.
            floatStdevsAboveChannel = floatStdevsAboveChannel[floatStdevsAboveChannel != -999.]
            ##print "floatChannelGainsLosses.shape = ", floatChannelGainsLosses.shape
            trimmeanGains = np.mean(floatChannelGainsLosses[np.logical_and(\
                                    floatChannelGainsLosses>np.percentile(floatChannelGainsLosses,5),\
                                    floatChannelGainsLosses<np.percentile(floatChannelGainsLosses,95)\
                                    )])
            trimmeanStdevsAboveChannel = np.mean(floatStdevsAboveChannel[np.logical_and(\
                                    floatStdevsAboveChannel>np.percentile(floatStdevsAboveChannel,5),\
                                    floatStdevsAboveChannel<np.percentile(floatStdevsAboveChannel,95)\
                                    )])

            #print "idate= ",idate,str(datearray[idate])
            textmessage2 = ''
            with open( file4path, "a" ) as ff:
                textmessage2 = "\n"+str(datearray[idate])+"  "+\
                              format(trimmeanGains,"8.2%")+"  "+\
                              format(trimmeanStdevsAboveChannel,"7.1f")
                ff.write(textmessage2)
                print "textmessage2 = ", textmessage2
            #print "idate= ",idate, str(datearray[idate])


    ##########################################
    # make plot
    ##########################################

    ###
    ### make a combined plot
    ### 1. get percent of uptrending stocks
    ###
    _dates = []
    avgPctChannel = []
    numAboveBelowChannel = []
    try:
        with open( file4path, "r" ) as f:
            # get number of lines in file
            lines = f.read().split("\n")
            numlines = len (lines)
            for i in range(numlines):
                statusline = lines[i]
                statusline_list = statusline.split(" ")
                statusline_list = filter(None, statusline_list)
                if len( statusline_list ) == 3:
                    _dates.append( datetime.datetime.strptime( statusline_list[0], '%Y-%m-%d') )
                    avgPctChannel.append( float(statusline_list[1].split('%')[0])/100. )
                    numAboveBelowChannel.append( float(statusline_list[2]) )

    except:
        print " Error: unable to read updates from pyTAAAweb_numberUptrendingStocks_status.params"
        print ""

    _dates = np.array(_dates)
    avgPctChannel = np.array(avgPctChannel)
    numAboveBelowChannel = np.array(numAboveBelowChannel)
    print " avgPctChannel min, mean, max = ", avgPctChannel.min(),avgPctChannel.mean(),avgPctChannel.max()
    print "\n\n numAboveBelowChannel = ", numAboveBelowChannel
    print " numAboveBelowChannel min, mean, max = ", numAboveBelowChannel.min(),numAboveBelowChannel.mean(),numAboveBelowChannel.max()
    plt.figure(4,figsize=(9,7))
    plt.clf()
    plt.grid(True)
    numDaysToPlot = 252*3
    plt.plot( _dates[-numDaysToPlot:], np.clip(avgPctChannel[-numDaysToPlot:]*100.,-200.,200.), 'r-', lw=.1)
    plt.plot( _dates[-numDaysToPlot:], numAboveBelowChannel[-numDaysToPlot:], 'b-', lw=.25)
    plt.title("pyTAAA History Plot\nChannel Offset Signal")
    plt.savefig(figure4path)
    figure4path = 'PyTAAA_DailyChannelOffsetSignalV2.png'  # re-set to name without full path
    figure4_htmlText = "\n<br><h3>Channel Offset Signal</h3>\n"
    figure4_htmlText = figure4_htmlText + "\nPlot shows up/down trending in last few days compared to trend for stocks in Nasdaq 100.\n"
    figure4_htmlText = figure4_htmlText + '''<br><img src="'''+figure4path+'''" alt="PyTAAA by DonaldPG" width="850" height="500"><br>\n'''

    ###
    ### make a combined plot
    ### 2. make plot showing trend below B&H and trade-system Value
    ###
    file3path = os.path.join( os.getcwd(), "pyTAAAweb_backtestPortfolioValue.params" )
    backtestDate = []
    backtestBHvalue = []
    backtestSystemvalue = []
    try:
        with open( file3path, "r" ) as f:
            # get number of lines in file
            lines = f.read().split("\n")
            numlines = len (lines)
            for i in range(numlines):
                try:
                    statusline = lines[i]
                    statusline_list = statusline.split(" ")
                    if len( statusline_list ) == 5:
                        backtestDate.append( datetime.datetime.strptime( statusline_list[0], '%Y-%m-%d') )
                        backtestBHvalue.append( float(statusline_list[2]) )
                        backtestSystemvalue.append( float(statusline_list[4]) )
                except:
                    break
    except:
        print " Error: unable to read updates from pyTAAAweb_backtestPortfolioValue.params"
        print ""

    figure5path = os.path.join( os.getcwd(), "pyTAAA_web", "PyTAAA_backtestWithOffsetChannelSignal.png" )
    plt.figure(5,figsize=(9,7))
    plt.clf()
    subplotsize = gridspec.GridSpec(2,1,height_ratios=[5,3])
    plt.subplot(subplotsize[0])
    plt.grid(True)
    plt.yscale('log')
    plotmax = 1.e10
    plt.ylim([1000,max(10000,plotmax)])
    numDaysToPlot = 252*10
    numDaysToPlot = len( backtestBHvalue )
    plt.plot( backtestDate[-numDaysToPlot:], backtestBHvalue[-numDaysToPlot:], 'r-', lw=1.25, label='Buy & Hold')
    plt.plot( backtestDate[-numDaysToPlot:], backtestSystemvalue[-numDaysToPlot:], 'k-', lw=1.25, label='Trading System')
    plt.legend(loc=2,prop={'size':9})
    plt.title("pyTAAA History Plot\n Portfolio Value")
    plt.text( backtestDate[-numDaysToPlot+50], 2500, "Backtest updated "+datetime.datetime.now().strftime("%A, %d. %B %Y %I:%M%p"), fontsize=7.5 )
    plt.subplot(subplotsize[1])
    plt.grid(True)
    plt.ylim(-100, 100)
    plt.plot( _dates[-numDaysToPlot:], np.clip(avgPctChannel[-numDaysToPlot:]*100.,-200.,200.), 'r-', lw=.1, label='avg Pct offset channel')
    plt.plot( _dates[-numDaysToPlot:], numAboveBelowChannel[-numDaysToPlot:], 'b-', lw=.25, label='number above/below offset channel')
    plt.legend(loc=3,prop={'size':6})
    plt.savefig(figure5path)
    figure5path = 'PyTAAA_backtestWithOffsetChannelSignal.png'  # re-set to name without full path
    figure5_htmlText = "\n<br><h3>Daily backtest with offset Channel trend signal</h3>\n"
    figure5_htmlText = figure5_htmlText + "\nCombined backtest with offset Channel trend signal.\n"
    figure5_htmlText = figure5_htmlText + '''<br><img src="'''+figure5path+'''" alt="PyTAAA by DonaldPG" width="850" height="500"><br>\n'''

    return figure4_htmlText, figure5_htmlText
Example #46
0
def _output_vectors_text(input_file,
                         vocabulary,
                         scorer,
                         output_file,
                         log_base=None):
    """Reads text from ``input_file``, computes perplexity using
    ``scorer``, and writes to ``output_file``.

    :type input_file: file object
    :param input_file: a file that contains the input sentences in SRILM n-best
                       format

    :type vocabulary: Vocabulary
    :param vocabulary: vocabulary that provides mapping between words and word
                       IDs

    :type scorer: TextScorer
    :param scorer: a text scorer for rescoring the input sentences

    :type output_file: file object
    :param output_file: a file where to write the output n-best list in SRILM
                        format

    :type log_base: int
    :param log_base: if set to other than None, convert log probabilities to
                     this base
    """

    scoring_iter = \
        ScoringBatchIterator(input_file,
                             vocabulary,
                             batch_size=16,
                             max_sequence_length=None,
                             map_oos_to_unk=False)
    log_scale = 1.0 if log_base is None else numpy.log(log_base)

    total_logprob = 0.0
    num_sentences = 0
    num_tokens = 0
    num_words = 0
    num_probs = 0
    num_unks = 0
    num_zeroprobs = 0
    all_word_ids = numpy.arange(vocabulary.num_words())
    all_class_ids, membership_probs = vocabulary.get_class_memberships(
        all_word_ids)
    for word_ids, words, mask in scoring_iter:
        class_ids, _ = vocabulary.get_class_memberships(word_ids)

        membership_probs_output_vec = numpy.tile(
            membership_probs, (word_ids.shape[0], word_ids.shape[1], 1))
        logprobs = scorer.score_batch_output(word_ids, class_ids,
                                             all_class_ids,
                                             membership_probs_output_vec, mask)
        for seq_index, seq_logprobs in enumerate(logprobs):
            seq_word_ids = word_ids[:, seq_index]
            seq_mask = mask[:, seq_index]
            seq_word_ids = seq_word_ids[seq_mask == 1]
            seq_words = words[seq_index]
            #TODO: Rename the variables properly to remove the hack below
            merged_words, merged_logprobs = seq_words, seq_logprobs

            # total logprob of this sequence
            seq_logprob = sum(
                lp[seq_word_ids[idx + 1]]
                for idx, lp in enumerate(merged_logprobs)
                if (lp[seq_word_ids[idx + 1]] is not None) and (
                    not numpy.isneginf(lp[seq_word_ids[idx + 1]])))
            # total logprob of all sequences
            total_logprob += seq_logprob
            # number of tokens, which may be subwords, including <unk>'s
            num_tokens += len(seq_word_ids)
            # number of words, including <s>'s and <unk>'s
            num_words += len(merged_words)
            # number of word probabilities computed (may not include <unk>'s)
            num_seq_probs = sum((lp[seq_word_ids[idx + 1]] is not None) and (
                not numpy.isneginf(lp[seq_word_ids[idx + 1]]))
                                for idx, lp in enumerate(merged_logprobs))
            num_probs += num_seq_probs
            # number of unks and zeroprobs (just for reporting)
            num_unks += sum(lp[seq_word_ids[idx + 1]] is None
                            for idx, lp in enumerate(merged_logprobs))
            num_zeroprobs += sum((lp[seq_word_ids[idx + 1]] is not None)
                                 and numpy.isneginf(lp[seq_word_ids[idx + 1]])
                                 for idx, lp in enumerate(merged_logprobs))
            # number of sequences
            num_sentences += 1

            output_file.write("# Sentence {0}\n".format(num_sentences))
            _write_output_vectors(vocabulary, merged_words, merged_logprobs,
                                  output_file, log_scale)
            output_file.write("Sentence perplexity: {0}\n\n".format(
                numpy.exp(-seq_logprob / num_seq_probs)))

    output_file.write("Number of sentences: {0}\n".format(num_sentences))
    output_file.write("Number of words: {0}\n".format(num_words))
    output_file.write("Number of tokens: {0}\n".format(num_tokens))
    output_file.write(
        "Number of predicted probabilities: {0}\n".format(num_probs))
    output_file.write("Number of excluded (OOV) words: {0}\n".format(num_unks))
    output_file.write(
        "Number of zero probabilities: {0}\n".format(num_zeroprobs))
    if num_words > 0:
        cross_entropy = -total_logprob / num_probs
        perplexity = numpy.exp(cross_entropy)
        output_file.write(
            "Cross entropy (base e): {0}\n".format(cross_entropy))
        if log_base is not None:
            cross_entropy /= log_scale
            output_file.write("Cross entropy (base {1}): {0}\n".format(
                cross_entropy, log_base))
        output_file.write("Perplexity: {0}\n".format(perplexity))
Example #47
0
def check_all_log_values_are_valid(feature_vector: List[float],
                                   value_vector: List[float]) -> bool:
    return not np.any(
        np.isneginf(np.concatenate((feature_vector, value_vector))))
Example #48
0
def island_abm(rho=0.01,
               alpha=1.5,
               phi=0.4,
               pi=0.4,
               eps=0.1,
               lambda_param=1,
               T=100,
               N=50,
               _RNG_SEED=0):
    """ Islands growth model
    Parameters
    ----------
    rho :
    alpha :
    phi : float, required
    eps :
    lambda_param: (Default = 1)
    T : int, required
    The number of periods for the simulation
    N : int, optional (Default = 50)
    Number of firms
    _RNG_SEED : int, optional (Default = 0)
    Random number seen
    Output
    ------
    GDP : array, length = [,T]
    Simulated GPD
    """
    # Set random number seed
    np.random.seed(_RNG_SEED)

    T_2 = int(T / 2)

    GDP = np.zeros((T, 1))

    # Distributions
    # Precompute random binomial draws
    xy = np.random.binomial(1, pi, (T, T))
    xy[T_2, T_2] = 1

    # Containers
    s = np.zeros((T, T))
    A = np.ones((N, 6))

    # Initializations
    A[:, 1] = T_2
    A[:, 2] = T_2
    m = np.zeros((T, T))
    m[T_2, T_2] = N
    dest = np.zeros((N, 2))

    """ Begin ABM Code """
    for t in range(T):
        w = np.zeros((N, N))
        signal = np.zeros((N, N))

        for i in range(N):
            for j in range(N):
                if i != j:
                    if A[j, 0] == 1:
                        w[i, j] = np.exp(-rho * (np.abs(A[j, 1] - A[i, 1]) + \
                                                 np.abs(A[j, 2] - A[i, 2])))

                        if np.random.rand() < w[i, j]:
                            signal[i, j] = s[int(A[j, 1]), int(A[j, 2])]

            if A[i, 0] == 1:
                A[i, 4] = s[int(A[i, 1]), int(A[i, 2])] * \
                          m[int(A[i, 1]), int(A[i, 2])] ** alpha
                A[i, 3] = s[int(A[i, 1]), int(A[i, 2])]

            if A[i, 0] == 3:
                A[i, 4] = 0
                rnd = np.random.rand()
                if rnd <= 0.25:
                    A[i, 1] += 1
                else:
                    if rnd <= 0.5:
                        A[i, 1] -= 1
                    else:
                        if rnd <= 0.75:
                            A[i, 2] += 1
                        else:
                            A[i, 2] -= 1

                if xy[int(A[i, 1]), int(A[i, 2])] == 1:
                    A[i, 0] = 1
                    m[int(A[i, 1]), int(A[i, 2])] += 1
                    if m[int(A[i, 1]), int(A[i, 2])] == 1:
                        s[int(A[i, 1]), int(A[i, 2])] = \
                            (1 + int(np.random.poisson(lambda_param))) * \
                            (A[i, 1] + A[i, 2]) + phi * A[i, 5] + np.random.randn()

            if (A[i, 0] == 1) and (np.random.rand() <= eps):
                A[i, 0] = 3
                A[i, 5] = A[i, 4]
                m[int(A[i, 1]), int(A[i, 2])] -= 1

            if t > T / 100:
                if A[i, 0] == 2:
                    A[i, 4] = 0
                    if dest[i, 0] != A[i, 1]:
                        if dest[i, 0] > A[i, 1]:
                            A[i, 1] += 1
                        else:
                            A[i, 1] -= 1
                    else:
                        if dest[i, 1] != A[i, 2]:
                            if dest[i, 1] > A[i, 2]:
                                A[i, 2] += 1
                            else:
                                A[i, 2] -= 1
                    if (dest[i, 0] == A[i, 1]) and (dest[i, 1] == A[i, 2]):
                        A[i, 0] = 1
                        m[int(dest[i, 0]), int(dest[i, 1])] += 1
                if A[i, 0] == 1:
                    best_sig = np.max(signal[i, :])
                    if best_sig > s[int(A[i, 1]), int(A[i, 2])]:
                        A[i, 0] = 2
                        A[i, 5] = A[i, 4]
                        m[int(A[i, 1]), int(A[i, 2])] -= 1
                        index = np.where(signal[i, :] == best_sig)[0]
                        if index.shape[0] > 1:
                            ind = int(index[int(np.random.uniform(0, len(index)))])
                        else:
                            ind = int(index)
                        dest[i, 0] = A[ind, 1]
                        dest[i, 1] = A[ind, 2]

        GDP[t, 0] = np.sum(A[:, 4])

    #JH fix around the divide by zero error supressed in original code
    np.seterr(divide='ignore')
    log_GDP = np.log(GDP)
    np.seterr(divide='warn')
    log_GDP[np.isneginf(log_GDP)] = 0

    return log_GDP
Example #49
0
    def fit(self, X):
        """Estimate model parameters with the expectation-maximization
        algorithm.

        A initialization step is performed before entering the em
        algorithm. If you want to avoid this step, set the keyword
        argument init_params to the empty string '' when creating the
        GMM object. Likewise, if you would like just to do an
        initialization, set n_iter=0.

        Parameters
        ----------
        X : array_like, shape (n, n_features)
            List of n_features-dimensional data points.  Each row
            corresponds to a single data point.
        """
        ## initialization step
        X = np.asarray(X, dtype=np.float)
        if X.ndim == 1:
            X = X[:, np.newaxis]
        if X.shape[0] < self.n_components:
            raise ValueError(
                'GMM estimation with %s components, but got only %s samples' %
                (self.n_components, X.shape[0]))

        max_log_prob = -np.infty

        for _ in range(self.n_init):
            if 'm' in self.init_params or not hasattr(self, 'means_'):
                self.means_ = cluster.KMeans(
                    n_clusters=self.n_components,
                    random_state=self.random_state).fit(X).cluster_centers_

            if 'w' in self.init_params or not hasattr(self, 'weights_'):
                self.weights_ = np.tile(1.0 / self.n_components,
                                        self.n_components)

            if 'c' in self.init_params or not hasattr(self, 'covars_'):
                cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1])
                if not cv.shape:
                    cv.shape = (1, 1)
                self.covars_ = \
                    distribute_covar_matrix_to_match_covariance_type(
                        cv, self.covariance_type, self.n_components)

            # EM algorithms
            log_likelihood = []
            # reset self.converged_ to False
            self.converged_ = False
            for i in range(self.n_iter):
                # Expectation step
                curr_log_likelihood, responsibilities = self.score_samples(X)
                log_likelihood.append(curr_log_likelihood.sum())

                # Check for convergence.
                if i > 0 and abs(log_likelihood[-1] - log_likelihood[-2]) < \
                        self.thresh:
                    self.converged_ = True
                    break

                # Maximization step
                self._do_mstep(X, responsibilities, self.params,
                               self.min_covar)

            # if the results are better, keep it
            if self.n_iter:
                if log_likelihood[-1] > max_log_prob:
                    max_log_prob = log_likelihood[-1]
                    best_params = {'weights': self.weights_,
                                   'means': self.means_,
                                   'covars': self.covars_}
        # check the existence of an init param that was not subject to
        # likelihood computation issue.
        if np.isneginf(max_log_prob) and self.n_iter:
            raise RuntimeError(
                "EM algorithm was never able to compute a valid likelihood " +
                "given initial parameters. Try different init parameters " +
                "(or increasing n_init) or check for degenerate data.")
        # self.n_iter == 0 occurs when using GMM within HMM
        if self.n_iter:
            self.covars_ = best_params['covars']
            self.means_ = best_params['means']
            self.weights_ = best_params['weights']
        return self
Example #50
0
    def get_format_func(self, elem, **options):
        missing_opt = self.check_options(**options)
        if missing_opt:
            raise Exception("Missing options: {}".format(missing_opt))

        floatmode = options['floatmode']
        precision = None if floatmode == 'unique' else options['precision']
        suppress_small = options['suppress_small']
        sign = options['sign']
        infstr = options['infstr']
        nanstr = options['nanstr']
        exp_format = False
        pad_left, pad_right = 0, 0

        # only the finite values are used to compute the number of digits
        finite = umath.isfinite(elem)
        finite_vals = elem[finite]
        nonfinite_vals = elem[~finite]

        # choose exponential mode based on the non-zero finite values:
        abs_non_zero = umath.absolute(finite_vals[finite_vals != 0])
        if len(abs_non_zero) != 0:
            max_val = np.max(abs_non_zero)
            min_val = np.min(abs_non_zero)
            with np.errstate(over='ignore'):  # division can overflow
                if max_val >= 1.e8 or (not suppress_small and
                                       (min_val < 0.0001
                                        or max_val / min_val > 1000.)):
                    exp_format = True

        # do a first pass of printing all the numbers, to determine sizes
        if len(finite_vals) == 0:
            trim, exp_size, unique = '.', -1, True
        elif exp_format:
            trim, unique = '.', True
            if floatmode == 'fixed':
                trim, unique = 'k', False
            strs = (format_float_scientific(x,
                                            precision=precision,
                                            unique=unique,
                                            trim=trim,
                                            sign=sign == '+')
                    for x in finite_vals)
            frac_strs, _, exp_strs = zip(*(s.partition('e') for s in strs))
            int_part, frac_part = zip(*(s.split('.') for s in frac_strs))
            exp_size = max(len(s) for s in exp_strs) - 1

            trim = 'k'
            precision = max(len(s) for s in frac_part)

            # this should be only 1 or 2. Can be calculated from sign.
            pad_left = max(len(s) for s in int_part)
            # pad_right is only needed for nan length calculation
            pad_right = exp_size + 2 + precision

            unique = False
        else:
            trim, unique = '.', True
            if floatmode == 'fixed':
                trim, unique = 'k', False
            strs = (format_float_positional(x,
                                            precision=precision,
                                            fractional=True,
                                            unique=unique,
                                            trim=trim,
                                            sign=sign == '+')
                    for x in finite_vals)
            int_part, frac_part = zip(*(s.split('.') for s in strs))
            pad_left = max(len(s) for s in int_part)
            pad_right = max(len(s) for s in frac_part)
            exp_size = -1

            if floatmode in ['fixed', 'maxprec_equal']:
                precision = pad_right
                unique = False
                trim = 'k'
            else:
                unique = True
                trim = '.'

        # account for sign = ' ' by adding one to pad_left
        if sign == ' ' and not any(np.signbit(finite_vals)):
            pad_left += 1

        # account for nan and inf in pad_left
        if len(nonfinite_vals) != 0:
            nanlen, inflen = 0, 0
            if np.any(umath.isinf(nonfinite_vals)):
                neginf = sign != '-' or np.any(np.isneginf(nonfinite_vals))
                inflen = len(infstr) + neginf
            if np.any(umath.isnan(elem)):
                nanlen = len(nanstr)
            offset = pad_right + 1  # +1 for decimal pt
            pad_left = max(nanlen - offset, inflen - offset, pad_left)

        def print_nonfinite(x):
            with errstate(invalid='ignore'):
                if umath.isnan(x):
                    ret = ('+' if sign == '+' else '') + nanstr
                else:  # isinf
                    infsgn = '-' if x < 0 else '+' if sign == '+' else ''
                    ret = infsgn + infstr
                return ' ' * (pad_left + pad_right + 1 - len(ret)) + ret

        if exp_format:

            def print_finite(x):
                return format_float_scientific(x,
                                               precision=precision,
                                               unique=unique,
                                               trim=trim,
                                               sign=sign == '+',
                                               pad_left=pad_left,
                                               exp_digits=exp_size)
        else:

            def print_finite(x):
                return format_float_positional(x,
                                               precision=precision,
                                               unique=unique,
                                               fractional=True,
                                               trim=trim,
                                               sign=sign == '+',
                                               pad_left=pad_left,
                                               pad_right=pad_right)

        def fmt(x):
            if umath.isfinite(x):
                return print_finite(x)
            else:
                return print_nonfinite(x)

        return fmt
Example #51
0
    def _fit(self, X, w=None, y=None, do_prediction=False):
        """Estimate model parameters with the EM algorithm.

        A initialization step is performed before entering the
        expectation-maximization (EM) algorithm. If you want to avoid
        this step, set the keyword argument init_params to the empty
        string '' when creating the GMM object. Likewise, if you would
        like just to do an initialization, set n_iter=0.

        Parameters
        ----------
        X : array_like, shape (n, n_features)
            List of n_features-dimensional data points.  Each row
            corresponds to a single data point.
        w : array-like, shape = [n_samples] (optional)
            Sample weights

        Returns
        -------
        responsibilities : array, shape (n_samples, n_components)
            Posterior probabilities of each mixture component for each
            observation.
        """

        # initialization step
        X = check_array(X,
                        dtype=np.float64,
                        ensure_min_samples=2,
                        estimator=self)
        if X.shape[0] < self.n_components:
            raise ValueError(
                'GMM estimation with %s components, but got only %s samples' %
                (self.n_components, X.shape[0]))

        max_log_prob = -np.infty

        if self.verbose > 0:
            print('Expectation-maximization algorithm started.')

        for init in range(self.n_init):
            if self.verbose > 0:
                print('Initialization ' + str(init + 1))
                start_init_time = time()

            if 'm' in self.init_params or not hasattr(self, 'means_'):
                self.means_ = cluster.KMeans(
                    n_clusters=self.n_components,
                    random_state=self.random_state).fit(X).cluster_centers_
                if self.verbose > 1:
                    print('\tMeans have been initialized.')

            if 'w' in self.init_params or not hasattr(self, 'weights_'):
                self.weights_ = np.tile(1.0 / self.n_components,
                                        self.n_components)
                if self.verbose > 1:
                    print('\tWeights have been initialized.')

            if 'c' in self.init_params or not hasattr(self, 'covars_'):
                cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1])
                if not cv.shape:
                    cv.shape = (1, 1)
                self.covars_ = \
                    distribute_covar_matrix_to_match_covariance_type(
                        cv, self.covariance_type, self.n_components)
                if self.verbose > 1:
                    print('\tCovariance matrices have been initialized.')

            # EM algorithms
            current_log_likelihood = None
            # reset self.converged_ to False
            self.converged_ = False

            # this line should be removed when 'thresh' is removed in v0.18
            tol = (self.tol if self.thresh is None else self.thresh /
                   float(X.shape[0]))

            for i in range(self.n_iter):
                if self.verbose > 0:
                    print('\tEM iteration ' + str(i + 1))
                    start_iter_time = time()
                prev_log_likelihood = current_log_likelihood
                # Expectation step
                log_likelihoods, responsibilities = self.score_samples(X, w)
                current_log_likelihood = log_likelihoods.mean()

                # Check for convergence.
                # (should compare to self.tol when deprecated 'thresh' is
                # removed in v0.18)
                if prev_log_likelihood is not None:
                    change = abs(current_log_likelihood - prev_log_likelihood)
                    if self.verbose > 1:
                        print('\t\tChange: ' + str(change))
                    if change < tol:
                        self.converged_ = True
                        if self.verbose > 0:
                            print('\t\tEM algorithm converged.')
                        break

                # Maximization step
                self._do_mstep(X, w, responsibilities, self.params,
                               self.min_covar)
                if self.verbose > 1:
                    print('\t\tEM iteration ' + str(i + 1) +
                          ' took {0:.5f}s'.format(time() - start_iter_time))

            # if the results are better, keep it
            if self.n_iter:
                if current_log_likelihood > max_log_prob:
                    max_log_prob = current_log_likelihood
                    best_params = {
                        'weights': self.weights_,
                        'means': self.means_,
                        'covars': self.covars_
                    }
                    if self.verbose > 1:
                        print('\tBetter parameters were found.')

            if self.verbose > 1:
                print('\tInitialization ' + str(init + 1) +
                      ' took {0:.5f}s'.format(time() - start_init_time))

        # check the existence of an init param that was not subject to
        # likelihood computation issue.
        if np.isneginf(max_log_prob) and self.n_iter:
            raise RuntimeError(
                "EM algorithm was never able to compute a valid likelihood " +
                "given initial parameters. Try different init parameters " +
                "(or increasing n_init) or check for degenerate data.")

        if self.n_iter:
            self.covars_ = best_params['covars']
            self.means_ = best_params['means']
            self.weights_ = best_params['weights']
        else:  # self.n_iter == 0 occurs when using GMM within HMM
            # Need to make sure that there are responsibilities to output
            # Output zeros because it was just a quick initialization
            responsibilities = np.zeros((X.shape[0], self.n_components))

        return responsibilities
Example #52
0
def estimate_deltas(
    G,
    intervened_node: str,
    n_timesteps: int,
    start_year: int,
    start_month: int,
    country: Optional[str] = "South Sudan",
    state: Optional[str] = None,
):
    """ Utility function that estimates Rate of Change (deltas) for the
    intervened node per timestep. This will use the units that the CAG
    was parameterized with. WARNING: The state and country should be same as what was
    passed to G.parameterize() or else you could get mismatched data.

    Deltas are estimated by percent change between each time step. (i.e,
    (current - next)/current). Heuristics are in place to handle NAN and INF
    values. If changed from 0 to 0 (NAN case), then delta = 0. If increasing
    from 0 (+INF case), then delta = positive absolute mean of all finite
    deltas. If decreasing from 0 (-INF case), then delta = negative absolute
    mean of all finite deltas.

    See function get_true_values to see how the data is aggregated to fill in
    values for missing time points which calculating the deltas.

    Args:
        G: A completely parameterized and quantified CAG with indicators,
        estimated transition matrx, and indicator values.

        intervened_node: A string of the full name of the node in which we
        are intervening on.

        n_timesteps: Number of time steps.

        start_year: The starting year (e.g, 2012).

        start_month: The starting month (1-12).

    Returns:
        1D numpy array of deltas.
    """

    intervener_indicator = list(
        G.nodes(data=True)[intervened_node]["indicators"].keys())[0]

    query_base = " ".join([
        f"select * from indicator",
        f"where `Variable` like '{intervener_indicator}'",
    ])

    query_parts = {"base": query_base}

    if country is not None:
        check_q = query_parts["base"] + f"and `Country` is '{country}'"
        check_r = list(engine.execute(check_q))
        if check_r == []:
            warnings.warn(
                f"Selected Country not found for {intervener_indicator}! Using default settings (South Sudan)"
            )
            query_parts["country"] = f"and `Country` is 'South Sudan'"
        else:
            query_parts["country"] = f"and `Country` is '{country}'"
    if state is not None:
        check_q = query_parts["base"] + f"and `State` is '{state}'"
        check_r = list(engine.execute(check_q))
        if check_r == []:
            warnings.warn(
                f"Selected State not found for {intervener_indicator}! Using default settings (Aggregration over all States)"
            )
            query_parts["state"] = ""
        else:
            query_parts["state"] = f"and `State` is '{state}'"

    unit = list(
        G.nodes(data=True)[intervened_node]["indicators"].values())[0].unit

    int_vals = np.zeros(n_timesteps + 1)
    int_vals[0] = list(
        G.nodes(data=True)[intervened_node]["indicators"].values())[0].mean
    year = start_year
    month = start_month
    for j in range(1, n_timesteps + 1):
        query_parts["year"] = f"and `Year` is '{year}'"
        query_parts["month"] = f"and `Month` is '{month}'"

        query = " ".join(query_parts.values())
        results = list(engine.execute(query))

        if results != []:
            int_vals[j] = np.mean(
                [float(r["Value"]) for r in results if r["Unit"] == unit])

            if month == 12:
                year = year + 1
                month = 1
            else:
                month = month + 1
            continue

        query_parts["month"] = ""
        query = " ".join(query_parts.values())
        results = list(engine.execute(query))

        if results != []:
            int_vals[j] = np.mean(
                [float(r["Value"]) for r in results if r["Unit"] == unit])

            if month == 12:
                year = year + 1
                month = 1
            else:
                month = month + 1
            continue

        query_parts["year"] = ""
        query = " ".join(query_parts.values())
        results = list(engine.execute(query))

        if results != []:
            int_vals[j] = np.mean(
                [float(r["Value"]) for r in results if r["Unit"] == unit])

            if month == 12:
                year = year + 1
                month = 1
            else:
                month = month + 1
            continue

    per_ch = np.roll(int_vals, -1) - int_vals

    per_ch = per_ch / int_vals

    per_mean = np.abs(np.mean(per_ch[np.isfinite(per_ch)]))

    per_ch[np.isnan(per_ch)] = 0
    per_ch[np.isposinf(per_ch)] = per_mean
    per_ch[np.isneginf(per_ch)] = -per_mean

    return np.delete(per_ch, -1)
Example #53
0
    def create_discretised_variables(network, data, node_names, bin_count=4, infinite_extremes=True,
                                     decimal_places=4, mode='EqualFrequencies',
                                     zero_crossing=True, defined_bins: List[Tuple[float, float]] = None):
        node_names = [str(name) for name in node_names]
        if defined_bins is None:
            options = bayesServerDiscovery().DiscretizationOptions()
            options.setInfiniteExtremes(infinite_extremes)
            options.setSuggestedBinCount(bin_count)

            # reads data from either a Pandas dataframe or dask, so will support out of memory and in-memory.
            data_reader_cmd = bayesianpy.data.DaskDataset(data[node_names]).create_data_reader_command().create()

            if mode == 'EqualFrequencies':
                ef = bayesServerDiscovery().EqualFrequencies()
            elif mode == 'EqualIntervals':
                ef = bayesServerDiscovery().EqualIntervals()
            else:
                raise ValueError("mode not recognised")

            columns = jp.java.util.Arrays.asList(
                [bayesServerDiscovery().DiscretizationColumn(name) for name in node_names])
            column_intervals = ef.discretize(data_reader_cmd, columns,
                                             bayesServerDiscovery().DiscretizationAlgoOptions())

            for i, interval in enumerate(column_intervals):

                intervals = list(interval.getIntervals().toArray())
                if zero_crossing:
                    end_point_value = 0.5

                    zero = bayesServer().Interval(jp.java.lang.Double(jp.java.lang.Double.NEGATIVE_INFINITY),
                                                  jp.java.lang.Double(end_point_value),
                                                  bayesServer().IntervalEndPoint.CLOSED,
                                                  bayesServer().IntervalEndPoint.OPEN)

                    if 0.5 < intervals[0].getMaximum().floatValue():
                        # if the interval starts and ends at end_point_value then remove it
                        if intervals[0].getMaximum() == end_point_value:
                            intervals.pop(0)
                        else:
                            intervals[0].setMinimum(jp.java.lang.Double(0.5))
                            intervals[0].setMinimumEndPoint(bayesServer().IntervalEndPoint.CLOSED)

                        intervals = [zero] + intervals

                v = bayesServer().Variable(node_names[i], bayesServer().VariableValueType.DISCRETE)
                v.setStateValueType(bayesServer().StateValueType.DOUBLE_INTERVAL)
                n = bayesServer().Node(v)
                for interval in intervals:
                    v.getStates().add(
                          bayesServer().State("{}".format(Builder._create_interval_name(interval, decimal_places)),
                                        interval))

                network.getNodes().add(n)
                yield n

        else:
            for node in node_names:
                intervals = []
                for bin in defined_bins:
                    minEndPoint = bayesServer().IntervalEndPoint.CLOSED
                    maxEndPoint = bayesServer().IntervalEndPoint.OPEN

                    if np.isneginf(float(bin[0])):
                        a = jp.java.lang.Double(jp.java.lang.Double.NEGATIVE_INFINITY)
                    else:
                        a = jp.java.lang.Double(bin[0])

                    if np.isposinf(float(bin[1])):
                        b = jp.java.lang.Double(jp.java.lang.Double.POSITIVE_INFINITY)
                    else:
                        b = jp.java.lang.Double(bin[1])

                    intervals.append(
                        bayesServer().Interval(a, b, minEndPoint,
                                               maxEndPoint))

                v = bayesServer().Variable(node, bayesServer().VariableValueType.DISCRETE)
                v.setStateValueType(bayesServer().StateValueType.DOUBLE_INTERVAL)
                n = bayesServer().Node(v)
                for interval in intervals:
                    v.getStates().add(
                        bayesServer().State("{}".format(Builder._create_interval_name(interval, decimal_places)),
                                            interval))

                network.getNodes().add(n)
                yield n
Example #54
0
 def calculate_log_score(self, pssm):
     pssm = np.log2(pssm) * 2
     np.place(pssm, np.isneginf(pssm), -20)
     return pssm
Example #55
0
def display_rotated_image_and_wfc3_image(combined_image1, flist, wfc3_image, target_font_size, ff = -8.1, log = True, save_filename = 'rotated_img', cmap1 = 'jet', clim1 = None, clim2 = None, save = False, ax1_title = None, ax2_title = None):
    '''
    ########################################################################################################################
    #This function displays the rotated multi-slit image next to the WFC3 image
    #Inputs:
    #    combined_image1: the multi-slit image array
    #    wfc3_image: the file name of the WFC3 image
    #    ff: fudge factor used for additional rotation in rotate_image; default = -8.1
    #    log: display the log of the image; default = True
    #    save_filename: save images to this filename; default = 'rotated_img'
    #    cmap1: Color map to use; default = None - use default matplotlib colorbar
    #    clim1: lower contrast limit; default = None - use default matplotlib clim
    #    clim2: upper contrast limit; default = None - use default matplotlib clim
    #    save: switch to enable user to save the file (to save_filename); default = False
    #Output:
    #    if the save keyword is set then the images will be displayed
    #Calls to:
    #    rotate_image
    #Called from:
    #    create_image
    ########################################################################################################################
    '''
    wfc3_img  = pyfits.getdata(wfc3_image, 0)
    rot_img = rotate_image(combined_image1, 64.0072, 166.002207094, fudge_factor = ff)
    fig = pylab.figure(figsize = [30, 20])
    ax1 = fig.add_subplot(1,2,1)
    ax2 = fig.add_subplot(1,2,2)
    #new_colormap = make_custom_colormap()
    if not cmap1: cmap1 = 'jet'
    #pdb.set_trace()
    new_colormap = getattr(matplotlib.cm, cmap1)
    new_colormap1 = getattr(matplotlib.cm, 'jet')
    norm1 = colors.Normalize(vmin = np.min(np.log10(wfc3_img)[np.isfinite(np.log10(wfc3_img))]) + 0.01, vmax = np.max(np.log10(wfc3_img)[np.isfinite(np.log10(wfc3_img))]))
    new_colormap.set_under('white')
    #pdb.set_trace()
    ax2.imshow(np.log10(wfc3_img), interpolation = 'nearest', cmap = new_colormap1, norm = norm1)

    if log:
        rot_img_log = np.log10(rot_img)
        nan_indx = np.isnan(rot_img_log)
        inf_indx = np.isinf(rot_img_log)
        neg_inf_indx = np.isneginf(rot_img_log)
        rot_img_log[nan_indx] = 0
        rot_img_log[inf_indx] = 0
        rot_img_log[neg_inf_indx] = 0
        rot_img_log[nan_indx] = np.min(rot_img_log) - 1
        rot_img_log[inf_indx] = np.min(rot_img_log) - 1
        rot_img_log[neg_inf_indx] = np.min(rot_img_log) - 1
        #norm2 = colors.Normalize(vmin = np.min(rot_img_log[np.isfinite(rot_img_log)]) + 0.01, vmax = np.max(rot_img_log[np.isfinite(rot_img_log)]))
        norm2 = colors.Normalize(vmin = 0, vmax = np.max(rot_img_log[np.isfinite(rot_img_log)]))
        cax = ax1.imshow(rot_img_log, interpolation = 'nearest', cmap = new_colormap, norm = norm2)
    else:
        norm2 = colors.Normalize(vmin = np.min(rot_img) + 0.01, vmax = np.max(rot_img))
        cax = ax1.imshow(rot_img, interpolation = 'nearest')
    ax2.set_xlim(1300, 2000)
    ax2.set_ylim(1500, 2100)
    ax1.set_xlim(-100, 1100)
    ax1.set_ylim(-20, 490)
    fig.colorbar(cax)
    #if cmap1:
    #    cax.set_cmap(cmap1)
    #pdb.set_trace()
    if clim1:
        cax.set_clim(clim1, clim2)
    ax1 = mark_boundaries(flist, slit_size, combined_image1, rot_img, ax1, 64.0072, 166.002207094, fudge_factor = ff, target_font_size = target_font_size)
    if ax1_title:
        ax1.set_title(ax1_title)
    if ax2_title:
        ax2.set_title(ax2_title)
    pdb.set_trace()
    if save:
        pylab.savefig(save_filename+'.pdf')
Example #56
0
 def __log(self, x):
     log = np.log(x)
     log[np.isneginf(log)] = -1e6
     return log
Example #57
0
    def fit(self, X):
        """
        Run the EM algorithm to specified convergence.

        Parameters
        ----------
        X : array_like, shape (n,) + d
            List of data points assumed that the dimensions are such that
            `np.prod(X.shape[1:])==n_features`
        """
        random_state = check_random_state(self.random_state)
        X = np.asarray(X, dtype=self.binary_type)
        if X.ndim == 1:
            X = X[:, np.newaxis]

        data_shape = X.shape[1:]
        # flatten data to just be binary vectors
        data_length = np.prod(data_shape)
        if len(data_shape) > 1:
            X = X.reshape(X.shape[0], data_length)

        if X.shape[0] < self.n_components:
            raise ValueError(
                'BernoulliMM estimation with %s components, but got only %s samples'
                % (self.n_components, X.shape[0]))

        inv_X = 1 - X
        max_log_prob = -np.infty

        # if debug_plot:
        #     plw = ag.plot.PlottingWindow(subplots=(1, self.num_mix), figsize=(self.num_mix*3, 3))

        for cur_init in range(self.n_init):
            if self.verbose:
                print("Current parameter initialization: {0}".format(cur_init))

            if 'm' in self.init_params or not hasattr(self, 'means_'):
                if self.verbose:
                    print("Initializing means")
                indices = np.arange(X.shape[0])
                random_state.shuffle(indices)
                self.means_ = np.array(
                    tuple(
                        np.clip(X[indices[i::self.n_components]].mean(0),
                                self.min_prob, 1 - self.min_prob)
                        for i in range(self.n_components)))

                self.log_odds_, self.log_inv_mean_sums_ = _compute_log_odds_inv_means_sums(
                    self.means_)

            if 'w' in self.init_params or not hasattr(self, 'weights_'):
                if self.verbose:
                    print("Initializing weights")

                self.weights_ = np.tile(1.0 / self.n_components,
                                        self.n_components)

            log_likelihood = []
            self.iterations = 0
            self.converged_ = False
            for i in range(self.n_iter):
                # Expectation Step
                curr_log_likelihood, responsibilities = self.eval(X)
                log_likelihood.append(curr_log_likelihood.sum())
                if self.verbose:
                    print("Iteration {0}: loglikelihood {1}".format(
                        i, log_likelihood[-1]))

                # check for convergence
                if i > 0 and abs(log_likelihood[-1] - log_likelihood[-2])/abs(log_likelihood[-2]) < \
                   self.thresh:
                    self.converged_ = True
                    break

                # ag.info("Iteration {0}: loglikelihood {1}".format(self.iterations, loglikelihood))
                # maximization step
                self._do_mstep(X, responsibilities, self.params, self.min_prob)

            if self.n_iter:
                if log_likelihood[-1] > max_log_prob:
                    if self.verbose:
                        print("updated best params for {0}".format(
                            self.score(X).sum()))
                    max_log_prob = log_likelihood[-1]
                    best_params = {
                        'weights': self.weights_,
                        'means': self.means_
                    }

        # check the existence of an init param that was not subject to
        # likelihood computation issue.
        if np.isneginf(max_log_prob) and self.n_iter:
            raise RuntimeError(
                "EM algorithm was never able to compute a valid likelihood " +
                "given initial parameters. Try different init parameters " +
                "(or increasing n_init) or check for degenerate data.")

        if len(data_shape) > 1:
            X = X.reshape(*((X.shape[0], ) + data_shape))

        if self.n_iter:
            self.means_ = best_params['means']
            self.log_odds_, self.log_inv_mean_sums_ = _compute_log_odds_inv_means_sums(
                self.means_)
            self.weights_ = best_params['weights']

        return self
def unbounded_bivariate_normal_integral(rho, xl, yl):
  """Computes the unbounded bivariate normal integral.
  
  Computes the probability that ``X>=xl and Y>=yl`` where X and Y are jointly
  Gaussian random variables, with mean ``[0., 0.]`` and covariance matrix
  ``[[1., rho], [rho, 1.]]``.
  
  Note: to compute the probability that ``X < xl and Y < yl``, use
  ``unbounded_bivariate_normal_integral(rho, -xl, -yl)``. 

  Inputs:
      :rho: Correlation coefficient of the bivariate normal random variable
      :xl, yl: Lower bounds of the integral
  
  Ported from a Matlab implementation by Alan Genz which, in turn, is based on
  the method described by
      Drezner, Z and G.O. Wesolowsky, (1989),
      On the computation of the bivariate normal inegral,
      Journal of Statist. Comput. Simul. 35, pp. 101-107,
  
  Copyright statement of Alan Genz's version:
  ***************
  Copyright (C) 2013, Alan Genz,  All rights reserved.               

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided the following conditions are met:
    - Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    - Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in 
      the documentation and/or other materials provided with the 
      distribution.
    - The contributor name(s) may not be used to endorse or promote 
      products derived from this software without specific prior 
      written permission.
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 
  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 
  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 
  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF USE
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."""
  
  rho = max(-1., min(1., rho))

  if np.isposinf(xl) or np.isposinf(yl):
    return 0.
  elif np.isneginf(xl):
    return 1. if np.isneginf(yl) else _cdf(-yl)
  elif np.isneginf(yl):
    return _cdf(-xl)
  elif rho == 0:
    return _cdf(-xl)*_cdf(-yl)
  
  tp = 2.*np.pi
  h, k = xl, yl
  hk = h*k
  bvn = 0.
  
  if np.abs(rho) < 0.3:
    # Gauss Legendre points and weights, n =  6
    w = np.array([0.1713244923791705, 0.3607615730481384, 0.4679139345726904])
    x = np.array([0.9324695142031522, 0.6612093864662647, 0.2386191860831970])
  elif np.abs(rho) < 0.75:
    # Gauss Legendre points and weights, n = 12
    w = np.array([0.04717533638651177, 0.1069393259953183, 0.1600783285433464,
                  0.2031674267230659, 0.2334925365383547, 0.2491470458134029])
    x = np.array([0.9815606342467191, 0.9041172563704750, 0.7699026741943050,
                  0.5873179542866171, 0.3678314989981802, 0.1252334085114692])
  else:
    # Gauss Legendre points and weights, n = 20
    w = np.array([.01761400713915212, .04060142980038694, .06267204833410906,
                  .08327674157670475, 0.1019301198172404, 0.1181945319615184,
                  0.1316886384491766, 0.1420961093183821, 0.1491729864726037,
                  0.1527533871307259])
    x = np.array([0.9931285991850949, 0.9639719272779138, 0.9122344282513259,
                  0.8391169718222188, 0.7463319064601508, 0.6360536807265150,
                  0.5108670019508271, 0.3737060887154196, 0.2277858511416451,
                  0.07652652113349733])
  
  w = np.tile(w, 2)
  x = np.concatenate([1.-x, 1.+x])
  
  if np.abs(rho) < 0.925:
    hs = .5 * (h*h + k*k)
    asr = .5*np.arcsin(rho)
    sn = np.sin(asr*x)
    bvn = np.dot(w, np.exp((sn*hk-hs)/(1.-sn**2)))
    bvn = bvn*asr/tp + _cdf(-h)*_cdf(-k) 
  else:
    if rho < 0.:
      k = -k
      hk = -hk
    if np.abs(rho) < 1.:
      ass = 1.-rho**2
      a = np.sqrt(ass)
      bs = (h-k)**2
      asr = -.5*(bs/ass + hk)
      c = (4.-hk)/8.
      d = (12.-hk)/80. 
      if asr > -100.:
        bvn = a*np.exp(asr)*(1.-c*(bs-ass)*(1.-d*bs)/3. + c*d*ass**2)
      if hk  > -100.:
        b = np.sqrt(bs)
        sp = np.sqrt(tp)*_cdf(-b/a)
        bvn = bvn - np.exp(-.5*hk)*sp*b*(1. - c*bs*(1.-d*bs)/3.)
      a = .5*a
      xs = (a*x)**2
      asr = -.5*(bs/xs + hk)
      inds = [i for i, asr_elt in enumerate(asr) if asr_elt>-100.]
      xs = xs[inds]
      sp = 1. + c*xs*(1.+5.*d*xs)
      rs = np.sqrt(1.-xs)
      ep = np.exp(-.5*hk*xs / (1.+rs)**2)/rs
      bvn = (a*np.dot(np.exp(asr[inds])*(sp-ep), w[inds]) - bvn)/tp
    if rho > 0:
      bvn +=  _cdf(-max(h, k)) 
    elif h >= k:
      bvn = -bvn
    else:
      if h < 0.:
        L = _cdf(k)-_cdf(h)
      else:
        L = _cdf(-h)-_cdf(-k)
      bvn =  L - bvn
  
  return max(0., min(1., bvn))
Example #59
0
    df.set_index('date', inplace=True)
    print(' Data found')

    # Save raw data
    outfile_raw = station_name.replace(
        ' ', '_') + '_raw_flow_data_' + sd.replace('-', '') + '_' + ed.replace(
            '-', '') + '.csv'
    df.to_csv(os.path.join(outfolder_raw, outfile_raw))
    print(' Raw data saved to ' + outfile_raw)

    # Calculate variables (logflow1, logflow2, logflow3)
    df_vars = pd.DataFrame(index=df.index)
    for i in range(0, 3):  # logflow1 - logflow3
        df_vars['logflow' + str(i + 1)] = round(
            np.log10(df['flow'].shift(i + 1, freq='D').astype(float)), 5)
        df_vars['logflow' + str(i + 1)][np.isneginf(
            df_vars['logflow' + str(i + 1)])] = round(np.log10(0.005), 5)

    # Save file to directory
    outfile = station_name.replace(' ', '_') + '_Flow_Variables_' + sd.replace(
        '-', '') + '_' + ed.replace('-', '') + '.csv'
    df_vars.to_csv(os.path.join(outfolder, outfile))
    print('  Flow variables calculated and saved to ' + outfile)

    # Summary of data
    missing = len(pd.DatetimeIndex(freq='D', start=sd, end=ed)) - len(df_vars)

    sum_dict = {
        'ID': station_no,
        'Start Date': str(df_vars.index[0].date()),
        'End Date': str(df_vars.index[-1].date()),
        'Missing Days': missing