コード例 #1
0
def format_1cs_df(input_df,
                  group_col_list,
                  time_col_list,
                  value_col_list=list(),
                  field_separator='\1xf',
                  record_seperator='\1xe'):
    if len(value_col_list) > 0:
        record_separator_columns = [
            group_col_list[-1], time_col_list[-1], value_col_list[-1]
        ]
    else:
        record_separator_columns = [group_col_list[-1], time_col_list[-1]]

    df = input_df.astype(str)
    df_col_list = df.columns.tolist()
    new_line_char = '\n'
    structure_df = _DataFrame(_np.tile(_np.array(df.columns), len(
        df.index)).reshape(len(df.index), -1),
                              index=df.index,
                              columns=df.columns) + '='
    key_value_df = structure_df.add(df)
    for index, col in enumerate(df_col_list):

        if index == len(df_col_list) - 1:
            key_value_df[col] = key_value_df[col].apply(
                lambda row_value: str(row_value) + new_line_char)
        elif col in record_separator_columns:
            key_value_df[col] = key_value_df[col].apply(
                lambda row_value: str(row_value) + record_seperator)
        else:
            key_value_df[col] = key_value_df[col].apply(
                lambda row_value: str(row_value) + field_separator)

    return key_value_df
コード例 #2
0
def ReadFilesToDF(files, delim=None):
    """
	ReadFilesToDF is a helper routine that takes the nominal dataframes (can be singletons)
	labels data index according to any found date index , or monotonically increasing sequence
	"""
    allR = []
    for indexa, elem in enumerate(files):
        try:
            c0 = _read_csv(elem,
                           error_bad_lines=False,
                           header=None,
                           delimiter=delim,
                           engine='python')
        except _EmptyDataError as err:
            c0 = _DataFrame()
            c0[0] = [0]
        try:
            c0['date'] = _parse(elem, fuzzy=True)
        except ValueError as err:
            dateFound = FindDate(elem)
            if len(dateFound) == 0:
                c0['date'] = indexa
            else:
                c0['date'] = dateFound[0]
        allR.append(c0)
    merged = _concat(allR, ignore_index=True)
    merged.index = merged['date']
    return allR, merged
コード例 #3
0
ファイル: geometry.py プロジェクト: anaavu/rcview-pytools
def _to_SpatialDataFrame(self, spatial_reference=None):
    """Return an arcgis spatially-enabled data frame.

    Arguments:
    spatial_reference  Either None (the default), a spatial reference integer
                       code, or a definition dictionary (for example
                       {'wkid': 3857}). If None, the spatial reference will be
                       extracted from the GeoDataFrame if it is defined using an
                       EPSG code.
    """
    if not spatial_reference:
        crs = self.crs
        if crs and 'init' in crs:
            if isinstance(crs, dict):
                crs = crs['init']
            if 'epsg' in crs:
                m = _re.search('epsg:(\d+)', crs)
                if m:
                    spatial_reference = int(m.groups()[0])

    if not spatial_reference:
        spatial_reference = 4326
        _warnings.simplefilter('always', UserWarning)
        _warnings.warn(
            'Unable to extract a spatial reference, assuming latitude/longitude (wkid 4326).'
        )

    sdf = _DataFrame(data=self.drop('geometry', axis=1))
    sdf['SHAPE'] = self.geometry.apply(_as_arcgis,
                                       spatial_reference=spatial_reference)

    return sdf
コード例 #4
0
def PacketAnalysis(df):
    # Only keep non-zero entries
    df = df.dropna()
    df = df[(df.iloc[:,1:].T != 0).any()]

    dfValue = df.iloc[:,1]

    dfStart = df[dfValue > 0]['time']
    dfEnd = df[dfValue < 0]['time'].reset_index(drop=True)

    startList = []
    endList = []

    for burstStart in dfStart:
        laterEnds = dfEnd[(dfEnd > burstStart)]

        if laterEnds.shape[0] > 0:
            burstEnd = laterEnds.sort_values().iloc[0]
        else:
            burstEnd = None

        startList.append(burstStart)
        endList.append(burstEnd)

    return _DataFrame({'start':startList, 'end':endList}, 
        columns=['start', 'end'])
コード例 #5
0
 def dataframe(self):
     """Pandas DataFrame representation of this path."""
     if not self._dataframe:
         if _DataFrame:
             self._dataframe = _DataFrame(self._results)
         else:
             logger.debug("'pandas' package not installed")
     return self._dataframe
コード例 #6
0
def get_historical_price(fromSymbol, toSymbol=_CURR, frequency='d', limit=2000):
    '''
    :param frequency: 'd', 'h' or 'm'
    :param limit: number of bars to return. I think max is 2000
    :return: get the last bars of fromSymbol in unit of fromSymbol
    '''
    url = f"{_URL_CRYPTOCOMPARE}/{FREQUENCY[frequency]}?fsym={fromSymbol}&tsym={toSymbol}&limit={limit}"  #&e={exchange}"
    rawdata = _requests.get(url).content.decode()
    parseddata = _loads(rawdata)
    try:
        df = _DataFrame(parseddata['Data'])
        if not df.empty:
            df = df.set_index('time')
            df.index = _to_datetime(df.index, unit='s')
    except Exception as inst:
        print(f"{type(inst)}: {inst}")    # the exception instance
        df = _DataFrame()
    return df
コード例 #7
0
def ReadData(paths, delim=None):
    """
	ReadData requires list of wild card paths, read in each set and store in master data frame
	"""
    merged = _DataFrame()
    for indexa, elem in enumerate(paths):
        files = sorted(_glob(elem))
        allR, m1 = ReadFilesToDF(files, delim=delim)
        merged[indexa] = m1[0]
    return merged
コード例 #8
0
def PacketDetect(df, time, thresh=0, windowSize=1):
    dt = time[1] - time[0]

    df = LowPassFilter(df, dt, windowSize=windowSize)    

    # Filter out non-peaks
    df[(df < thresh)] = 0
    df[(df != 0)] = 1
    
    df = df.diff()
    
    df = _DataFrame(df)
    df.insert(0, 'time', time)
    
    return df
コード例 #9
0
def EvaluateSets(p1, p2, cols=None):
    if cols is None:
        cols = p1.columns

    df = _DataFrame(index=cols,
                    columns=['Euclid', 'Supremum', 'MSE', 'RMSE', 'NRMSE'])

    for col in cols:
        df['Euclid'][col] = Euclid(p1[col], p2[col])
        df['Supremum'][col] = Supremum(p1[col], p2[col])
        df['MSE'][col] = MSE(p1[col], p2[col])
        df['RMSE'][col] = RMSE(p1[col], p2[col])
        df['NRMSE'][col] = NRMSE(p1[col], p2[col])

    df
    return df
コード例 #10
0
ファイル: xps.py プロジェクト: ddkn/xps
    def get_data(self, return_dict=False):
        data_dict = {
            'peak_id'           : [self.__peak_id],
            'binding_energy'    : [self.__be],
            'kinetic_energy'    : [self.get_kinetic_energy()],
            'transmission_func' : [self.get_transmission()],
            'sf_wagner'         : [self.__sf_wagner],
            'sf_machine'        : [self.get_sf_machine()],
            'peak'              : [self.__peak_area],
            'peak_corrected'    : [self.get_peak_correction()],
        }

        if hasattr(self, '_XPSPeak__matrix_component'):
            data_dict['matrix_component'] = [self.__matrix_component]

        if return_dict == True:
            return data_dict
        return _DataFrame(data=data_dict)
コード例 #11
0
def _from_records(vals, keys, typs=None, idxs=None):
    """ create a table from a series of records
    """
    if numpy.size(vals):
        assert numpy.ndim(vals) in (1, 2)
        vals = (numpy.array(vals) if numpy.ndim(vals) == 2 else numpy.reshape(
            vals, (-1, 1)))
    else:
        vals = numpy.reshape([], (0, len(keys)))
    nrows, ncols = numpy.shape(vals)

    idxs = (_RangeIndex(stop=nrows, name=IDX_KEY)
            if idxs is None else _Int64Index(idxs, name=IDX_KEY))
    typs = (object, ) * ncols if typs is None else typs
    typs = tuple(map(dt_, typs))
    assert len(keys) == len(typs) == ncols and len(idxs) == nrows
    assert IDX_KEY not in keys

    cols = numpy.transpose(vals)
    data = _OrderedDict((key, _Series(data=col, dtype=typ, index=idxs))
                        for col, key, typ in zip(cols, keys, typs))

    return _DataFrame(data=data, index=idxs)
コード例 #12
0
def _matricize_ybus(item):

    raw_n_ord = _lower(_odr.Circuit.YNodeOrder())
    mtx = _matricize(item)
    return _DataFrame(data=mtx, index=raw_n_ord, columns=raw_n_ord)
コード例 #13
0
def openTSDB_data_processor(metric_names=None,
                            query_string=None,
                            query_offset=20,
                            tolerance=0.0):

    logger = _getLogger(__name__)
    logger.info('read failure tolerance is %s' % tolerance)
    logger.info('number of metric names: %d' % (len(metric_names)))
    metrics = list(
        set([
            'm=none:' + elem['metric']
            for elem in _itertools.chain.from_iterable(metric_names)
        ]))
    logger.info('number of metrics as set: %d' % (len(metrics)))
    response_df_list = [(_requests.get('%s%s' % (query_string, "&".join(
        metrics[queryIndex:queryIndex + query_offset]))).json())
                        for queryIndex in range(0, len(metrics), query_offset)]

    flattened_list = list(_itertools.chain(*response_df_list))
    logger.info('number of elements in response list: %d' %
                (len(flattened_list)))
    if len(metrics) > 0:
        logger.info(str(metrics[:1]))
    super_dict = {'metric': [], 'dps': [], 'tags': [], 'aggregateTags': []}
    num_defective_records = 0
    num_correct_records = 0
    for ite in flattened_list:
        if ite == 'error' or type(ite) is not dict:
            num_defective_records += 1
        else:
            merge_dicts(super_dict, ite)
            num_correct_records += 1

    max_defective_records = num_defective_records * query_offset
    total_num_queries = max_defective_records + num_correct_records
    logger.info('number of read request errors are: %s' %
                num_defective_records)
    logger.info('max number of failed reads are: %s' % max_defective_records)
    logger.info('number of total queries sent are %s' % total_num_queries)
    if total_num_queries > 0:
        read_error_perc = max_defective_records / total_num_queries
    else:
        #in state where metrics were sent to tsdb for query, but no time series were returned.
        logger.warn('total_num_queries is zero.  no input data found in tsdb')
        raise ValueError("NoRecordsFound")

    if read_error_perc > tolerance:
        raise ValueError(
            'Stopping execution as read errors exceeded tolerance for num metrics queried that do not exist in OTSDB. Read error percent is %s and tolerance is %s'
            % (read_error_perc, tolerance))

    causal_data_frame = _DataFrame.from_dict(super_dict)

    new_df = causal_data_frame.drop(
        ['dps', 'aggregateTags', 'tags'],
        1).assign(**_DataFrame(causal_data_frame.dps.values.tolist()))
    transposed_df = new_df.T
    cleaned_causal_df = transposed_df.rename(columns=dict(
        zip(transposed_df.columns.tolist(), transposed_df.iloc[0].astype(
            str)))).drop(transposed_df.index[0])
    numeric_causal_df = cleaned_causal_df.apply(_to_numeric, errors='ignore')
    return numeric_causal_df
コード例 #14
0
def processInputData(inputa,
                     fieldSeparator=None,
                     recordSeparator=None,
                     otsdbExcludeCharRegex='[^a-zA-Z\d\-_%s/]+',
                     otsdbExcludeCharReplacer='_'):
    logger = _getLogger(__name__)
    logger.info("in processInputData")

    output = {}
    if fieldSeparator is None or recordSeparator is None:
        return output
    engine = 'c'
    if len(recordSeparator) > 1:
        engine = 'python'
    lines = inputa.strip().split('\n')

    partsAll = [elem.strip().split(recordSeparator) for elem in lines]
    subfields = []
    for selector in range(3):
        subfields.append(
            [elem[selector].split(fieldSeparator) for elem in partsAll])

    for i in range(len(subfields[0])):

        clean_row_part(subfields[0][i],
                       otsdbExcludeCharRegex,
                       otsdbExcludeCharReplacer=otsdbExcludeCharReplacer,
                       excludeChars='')
        clean_row_part(subfields[1][i],
                       otsdbExcludeCharRegex,
                       otsdbExcludeCharReplacer=otsdbExcludeCharReplacer,
                       excludeChars='')
        clean_row_part(subfields[2][i],
                       otsdbExcludeCharRegex,
                       otsdbExcludeCharReplacer=otsdbExcludeCharReplacer,
                       excludeChars='.')

    columns = [elem.split('=')[0] for elem in subfields[0][0]]
    times = [elem.split('=')[0] for elem in subfields[1][0]]
    values = [elem.split('=')[0] for elem in subfields[2][0]]
    dfAll = []
    for currentFieldIndex, currentField in enumerate(subfields):
        d1 = []
        logger.info(currentFieldIndex)
        for indexa, elema in enumerate(currentField):
            try:
                d1.append(
                    {elem.split('=')[0]: elem.split('=')[1]
                     for elem in elema})
            except Exception as e:
                logger.info(e)
                logger.debug(elema)
                logger.debug((d1[0].keys()))
                raise (e)
        dfAll.append(_DataFrame(d1))
    dfMaster = _concat(dfAll, axis=1)

    logger.info('before timestamp normalization')
    timestamp = normalizeTimeStamps(dfMaster, time_columns=times)
    dfMaster['timestamp'] = timestamp
    if logger.isEnabledFor(_DEBUG):
        logger.debug("type debug:%s" % (type(timestamp[0])))
        logger.debug("%s" % (timestamp[0]))
        logger.debug("type debug:%s" % (type(dfMaster['timestamp'][0])))
        logger.debug("%s" % (dfMaster['timestamp'][0]))
    output = {}
    output['df'] = dfMaster
    output['time_columns'] = times
    output['value_columns'] = values
    output['group_columns'] = columns
    logger.info("times=%s, values=%s, columns=%s" % (times, values, columns))
    logger.info("leave processInputData")
    return output
コード例 #15
0
        self.semimin = self.semimaj * (1 - self.flatten)  # b
        self.semiminsq = self.semimin * self.semimin  # b**2
        self.ecc1sq = self.flatten * (2 - self.flatten)
        self.ecc2sq = self.ecc1sq / (1 - self.ecc1sq)


#         self.ecc1       = sqrt(self.ecc1sq)
#         self.n          = float(self.f / (2 - self.f))
#         self.n2         = self.n ** 2
#         self.meanradius = (2 * self.semimaj + self.semimin)/3

# Geodetic Reference System 1980
# www.epsg-registry.org/export.htm?gml=urn:ogc:def:ellipsoid:EPSG::7019
GRS80 = Ellipsoid(6378137, 298.257222101)

# World Geodetic System 1984
# www.epsg-registry.org/export.htm?gml=urn:ogc:def:ellipsoid:EPSG::7030
WGS84 = Ellipsoid(6378137, 298.257223563)

# Australian National Spheroid
# www.epsg-registry.org/export.htm?gml=urn:ogc:def:ellipsoid:EPSG::7003
ANS = Ellipsoid(6378160, 298.25)

# International (Hayford) 1924
# www.epsg-registry.org/export.htm?gml=urn:ogc:def:ellipsoid:EPSG::7022
INTL24 = Ellipsoid(6378388, 297)

SISRE_COEF_DF = _DataFrame(data=[[0.99, 0.98, 0.98, 0.98, 0.98],
                                 [127, 54, 49, 45, 61]],
                           columns=['C_IGSO', 'C', 'G', 'R', 'E'],
                           index=['alpha', 'beta'])