Beispiel #1
0
 def __init__(self, num=False, plot=True):
     exp = array([1 / 99.] * 100)
     DataFrame.__init__(self, {'Expected': exp,
                           'Last_2_Dig': _lt_(num=num)})
     self.set_index('Last_2_Dig', inplace=True)
     if plot:
         plot_expected(self, -2)
Beispiel #2
0
 def __init__(self, *args, **kwargs):
     """
     Initializing with a SensitiveFrame as input without specifying metadata
     will reset metadata to default. Metadeta will be removed if it does not
     correspond to a SensitiveFrame column.
     """
     DataFrame.__init__(self, *args, **kwargs)
Beispiel #3
0
    def __init__(self, state_fips, year=2014, read_from='api', key=None, variables=None, filepath=None,
                 impute_emp=True):

        dtypes = {
            'NAICS2012': str,
            'county': str,
            'state': str
        }

        if read_from == 'api':

            if not key:
                raise ValueError('If reading from Census API, must provide API key in the key parameter')

            baseurl = 'http://api.census.gov/data/{}/cbp'.format(year)

            if variables:
                cbp_vars = ','.join(variables)
            else:
                cbp_vars = 'EMP,EMP_F,ESTAB,NAICS2012,NAICS2012_TTL,GEO_TTL'

            url = '{baseurl}?get={cbp_vars}&for=county:*&in=state:{state_fips}&key={key}'.format(**locals())

            try:
                req = requests.get(url)
                req.raise_for_status()
            except requests.exceptions.HTTPError as err:
                print err
                sys.exit(1)

            results = pd.read_json(req.text, orient='index', dtype=dtypes).T
            results.columns = results.iloc[0]
            results.drop(results.index[0], inplace=True)

            cbp_dtypes = {
                'EMP': int,
                'ESTAB': int
            }

            for variable in cbp_dtypes.keys():
                results[variable] = results[variable].astype(cbp_dtypes[variable])

            data = results

            if impute_emp:
                data['EMP'] = data.apply(lambda x: emp_imputation[x.EMP_F] if x.EMP_F else x.EMP, axis=1)

            data.drop('EMP_F', axis=1, inplace=True)

        elif read_from == 'csv':

            if not filepath:
                raise ValueError('If reading from CSV file, must provide path to file in filepath parameter')

            data = pd.read_csv(filepath_or_buffer=filepath, dtype=dtypes)

        else:
            raise ValueError('Valid options for the read_from parameter are "api" and "csv".')

        DataFrame.__init__(self, data=data)
Beispiel #4
0
 def __init__(self, data=None, index=None, columns=None, dtype=None,
              copy=False, sampling_rate=1.0):
     """Construct dataframe-like object."""
     DataFrame.__init__(self, data=data, index=index, columns=columns,
                        dtype=dtype, copy=copy)
     if sampling_rate is not None:
         self.index = np.arange(0, len(self) * sampling_rate, sampling_rate)
Beispiel #5
0
 def __init__(self,x,y,d=0.025,verbose=False,xml=None,**kwargs):
     if not xml:
         xml = osm_xml_download(x,y,d,verbose)
     osm = etree.fromstring(xml.encode('utf-8'))
     data = [Series({key:value for key,value in node.attrib.items() if key in ['lat','lon']},name=node.attrib['id'])  for node in osm.findall('node')]
     DataFrame.__init__(self,data,dtype=float,**kwargs)
     self.street_setter(osm.findall('way'))
     self.index = self.index.astype(int)
Beispiel #6
0
    def __init__(self, plot=True):
        a = arange(10, 100)
        Expe = log10(1 + (1. / a))
        Sec_Dig = array(list(range(10)) * 9)

        df = DataFrame({'Expected': Expe, 'Sec_Dig': Sec_Dig})

        DataFrame.__init__(self, df.groupby('Sec_Dig').sum())

        if plot:
            plot_expected(self, 22)
Beispiel #7
0
    def __init__(self, digs, plot=True):
        _check_digs_(digs)
        dig_name = f'First_{digs}_Dig'
        Dig = arange(10 ** (digs - 1), 10 ** digs)
        Exp = log10(1 + (1. / Dig))

        DataFrame.__init__(self, {'Expected': Exp}, index=Dig)
        self.index.names = [dig_name]

        if plot:
            plot_expected(self, digs)
Beispiel #8
0
    def __init__(self, data=None, index=None, columns=_col_names, dtype=None,
                 copy=False, tick_id=None, unit_amount=None):
#        print tick_id, dates, data
        if tick_id == None:
            raise ValueError("tick_id must be set")
        if columns == None:
            columns = _col_names
        DataFrame.__init__(self, data, index, columns, dtype, copy)
        self.tick_id = tick_id
        self.sort(inplace=True)
        self.unit_amount = unit_amount
        self.fix_split()
Beispiel #9
0
    def __init__(self, plot=True, save_plot=None, save_plot_kwargs=None):

        exp, sec_digs = _gen_second_digits_()

        DataFrame.__init__(self, {'Expected': exp, 'Sec_Dig': sec_digs})
        self.set_index("Sec_Dig", inplace=True)

        if plot:
            plot_expected(self,
                          22,
                          save_plot=save_plot,
                          save_plot_kwargs=save_plot_kwargs)
Beispiel #10
0
    def __init__(self, digs, plot=True, save_plot=None, save_plot_kwargs=None):
        _check_digs_(digs)
        dig_name = f'First_{digs}_Dig'
        exp_array, dig_array = _gen_first_digits_(digs)

        DataFrame.__init__(self, {'Expected': exp_array}, index=dig_array)
        self.index.names = [dig_name]

        if plot:
            plot_expected(self,
                          digs,
                          save_plot=save_plot,
                          save_plot_kwargs=save_plot_kwargs)
Beispiel #11
0
 def __init__(self,
              num=False,
              plot=True,
              save_plot=None,
              save_plot_kwargs=None):
     exp, l2d = _gen_last_two_digits_(num=num)
     DataFrame.__init__(self, {'Expected': exp, 'Last_2_Dig': l2d})
     self.set_index('Last_2_Dig', inplace=True)
     if plot:
         plot_expected(self,
                       -2,
                       save_plot=save_plot,
                       save_plot_kwargs=save_plot_kwargs)
Beispiel #12
0
    def __init__(self,
                 data=None,
                 index=None,
                 columns=None,
                 dtype=None,
                 copy: bool = False,
                 start_date: datetime = START_DATE,
                 end_date: datetime = None,
                 include_holding: bool = False,
                 include_finance: bool = False,
                 include_managed: bool = False,
                 include_suspended: bool = False):

        if not end_date:
            end_date = datetime.today()

        if data is None:
            data, self.benchmarks, self.factors = download_latest_data(
                download_company_data=True)

            if not include_holding:
                data = data.loc[~data[HOLDING], :]

            if not include_finance:
                data = data.loc[data[FN_GUIDE_SECTOR] != '금융', :]

            if not include_managed:
                data = data.loc[~data[IS_MANAGED], :]

            if not include_suspended:
                data = data.loc[~data[IS_SUSPENDED], :]

            data = data.loc[(start_date <= data[DATE]) &
                            (data[DATE] <= end_date), :]

        else:
            _, self.benchmarks, self.factors = download_latest_data(
                download_company_data=False)

        self.benchmarks = self.benchmarks.loc[
            (start_date <= self.benchmarks[DATE]) &
            (self.benchmarks[DATE] <= end_date), :]
        self.factors = self.factors.loc[(start_date <= self.factors.index) &
                                        (self.factors.index <= end_date), :]

        DataFrame.__init__(self=self,
                           data=data,
                           index=index,
                           columns=columns,
                           dtype=dtype,
                           copy=copy)
Beispiel #13
0
 def __init__(self,
              data=None,
              index=None,
              columns=None,
              dtype=None,
              copy=False,
              sampling_rate=1.0):
     """Construct dataframe-like object."""
     DataFrame.__init__(self,
                        data=data,
                        index=index,
                        columns=columns,
                        dtype=dtype,
                        copy=copy)
     if sampling_rate is not None:
         self.index = np.arange(0, len(self) * sampling_rate, sampling_rate)
Beispiel #14
0
 def __init__(self,
              data=None,
              index=None,
              columns=_col_names,
              dtype=None,
              copy=False,
              tick_id=None,
              unit_amount=None):
     #        print tick_id, dates, data
     if tick_id == None:
         raise ValueError("tick_id must be set")
     if columns == None:
         columns = _col_names
     DataFrame.__init__(self, data, index, columns, dtype, copy)
     self.tick_id = tick_id
     self.sort(inplace=True)
     self.unit_amount = unit_amount
     self.fix_split()
Beispiel #15
0
    def __init__(self, data=None, date_column=None, *args, **kwargs) -> None:
        DataFrame.__init__(self, data, *args, **kwargs)

        if self.columns.nlevels > 1:
            # For now, I admit,
            # there are a lot of works to support MultiIndex dataframes
            raise ValueError(
                'stock-pandas does not support dataframes with MultiIndex columns'  # noqa:E501
            )

        if isinstance(data, StockDataFrame):
            copy_stock_metas(data, self)
        else:
            init_stock_metas(self)

        if date_column:
            self[date_column] = to_datetime(self[date_column])
            self.set_index(date_column, inplace=True)
Beispiel #16
0
 def __init__(self, r2labmap, columns=None):
     # the map object essentially carries the coordinates
     # system that you wish to use
     # for weird reasons related to pandas implementation,
     # we can't seem to do this:
     # self.r2labmap = r2labmap
     # as it triggers infinite recursion (go figure...)
     # turns out we don't absolutely need this apparently,
     # do let's proceed this way for now
     if columns is None:
         columns = dict()
     # ditto
     # self.columns = columns
     all_columns = ['x', 'y'] + list(columns.keys())
     DataFrame.__init__(self, index=r2labmap.indexes(), columns=all_columns)
     for node_id, (gridx, gridy) in r2labmap.iterate_nodes():
         self.loc[node_id]['x'] = gridx
         self.loc[node_id]['y'] = gridy
         for column, value in columns.items():
             self.loc[node_id][column] = value
Beispiel #17
0
    def __init__(self, data=None, index=None, columns=None, dtype=None, copy: bool = False,
                 start_date: str = START_DATE, end_date: str = None,
                 include_holding: bool = False, include_finance: bool = False,
                 include_managed: bool = False, include_suspended: bool = False):

        try:
            datetime.strptime(start_date, '%Y-%m-%d')
        except ValueError:
            raise ValueError("Incorrect data format, start_date should be YYYY-MM-DD")

        if not end_date:
            end_date = datetime.today().strftime('%Y-%m-%d')

        try:
            datetime.strptime(end_date, '%Y-%m-%d')
        except ValueError:
            raise ValueError("Incorrect data format, end_date should be YYYY-MM-DD")

        if data is None:
            data, self.benchmarks, self.factors = download_latest_data(download_company_data=True)

            if not include_holding:
                data = data.loc[~data[HOLDING], :]

            if not include_finance:
                data = data.loc[data[FN_GUIDE_SECTOR] != '금융', :]

            if not include_managed:
                data = data.loc[~data[IS_MANAGED], :]

            if not include_suspended:
                data = data.loc[~data[IS_SUSPENDED], :]

            data = data.loc[(start_date <= data[DATE]) & (data[DATE] <= end_date), :]
        else:
            _, self.benchmarks, self.factors = download_latest_data(download_company_data=False)

        DataFrame.__init__(self=self, data=data, index=index, columns=columns, dtype=dtype, copy=copy)
    def __init__(self,
                 data=None,
                 index=None,
                 columns=None,
                 dtype=None,
                 copy=False,
                 categories=None):
        """
        A DataFrame with categories information.

        Parameters
        ----------
        categories : list of columns
            Column names whose values are categorical.
        """
        DataFrame.__init__(self,
                           data=data,
                           index=index,
                           columns=columns,
                           dtype=dtype,
                           copy=copy)
        self.separator = '_'
        self._categories = categories or []
Beispiel #19
0
    def __init__(self, *args, **kwargs):

        skip = kwargs.get('skiplines', 1)
        times = kwargs.get('readtime', slice(0, None))
        name = kwargs.get('name', 'None')
        symb = kwargs.get('symb', 'o')
        files = kwargs.get('search_files', None)
        properties = kwargs.get('properties', None)
        lines = kwargs.get('maxlines', 0)
        search = kwargs.get('search_pattern', FPNUMBER)
        folder = kwargs.get('folder', None)
        plot_properties = kwargs.get('plot_properties', PlotProperties())
        show_func = kwargs.get('show_func', None)
        validate = kwargs.get('validate', True)
        preHooks = kwargs.get('preHooks', None)
        exclude = kwargs.get('exclude', [" "])  # FIXME
        times_stride = kwargs.get('times_stride', 1)
        times_range = kwargs.get('times_range', "all") # FIXME implement strides
        times_slice = times_range

        keys = ['skiplines',
                'readtime',
                'preHooks',
                'name',
                'symb',
                'search_files',
                'properties',
                'maxlines',
                'search_pattern',
                'folder',
                'plot_properties',
                'show_func',
                'exclude',
                'times_stride',
                'times_range',
                ]

        for k in keys:
            if k in kwargs:
                kwargs.pop(k)

        # TODO explain what happens here
        if folder is None:
            # super(FoamFrame, self).__init__(*args, **kwargs)
            DataFrame.__init__(self, *args, **kwargs)
        else:
            if preHooks:
                for hook in preHooks:
                    hook.execute()
            if (folder in case_data_base) and Database:
                print("re-importing", end=" ")
            else:
                print("importing", end=" ")
            print(name + ": ", end="")
            origins, data = import_foam_folder(
                path=folder,
                search=search,
                files=files,
                skiplines=skip,
                maxlines=lines,
                skiptimes=times,
                exclude=exclude,
                times_slice=times_slice
                )
            try:
                DataFrame.__init__(self, data)
            except Exception as e:
                print(e)
            self.properties = Props(
                origins,
                name,
                plot_properties,
                folder,
                symb,
                show_func)
            if validate and Database:
                self.validate_origins(folder, origins)
            # register to database
            if Database:
                case_data_base.sync()
Beispiel #20
0
 def __init__(self, *args, **kwargs):
     DataFrame.__init__(self, *args, **kwargs)
Beispiel #21
0
    def __init__(self, *args, **kwargs):

        skip = kwargs.get('skiplines', 1)
        times = kwargs.get('readtime', slice(0, None))
        name = kwargs.get('name', 'None')
        symb = kwargs.get('symb', 'o')
        files = kwargs.get('search_files', None)
        properties = kwargs.get('properties', None)
        lines = kwargs.get('maxlines', 0)
        search = kwargs.get('search_pattern', FPNUMBER)
        folder = kwargs.get('folder', None)
        plot_properties = kwargs.get('plot_properties', PlotProperties())
        show_func = kwargs.get('show_func', None)
        validate = kwargs.get('validate', True)
        preHooks = kwargs.get('preHooks', None)
        exclude = kwargs.get('exclude', [" "])  # FIXME
        times_stride = kwargs.get('times_stride', 1)
        times_range = kwargs.get('times_range',
                                 "all")  # FIXME implement strides
        times_slice = times_range

        keys = [
            'skiplines',
            'readtime',
            'preHooks',
            'name',
            'symb',
            'search_files',
            'properties',
            'maxlines',
            'search_pattern',
            'folder',
            'plot_properties',
            'show_func',
            'exclude',
            'times_stride',
            'times_range',
        ]

        for k in keys:
            if k in kwargs:
                kwargs.pop(k)

        # TODO explain what happens here
        if folder is None:
            # super(FoamFrame, self).__init__(*args, **kwargs)
            DataFrame.__init__(self, *args, **kwargs)
        else:
            if preHooks:
                for hook in preHooks:
                    hook.execute()
            if (folder in case_data_base) and Database:
                print("re-importing", end=" ")
            else:
                print("importing", end=" ")
            print(name + ": ", end="")
            origins, data = import_foam_folder(path=folder,
                                               search=search,
                                               files=files,
                                               skiplines=skip,
                                               maxlines=lines,
                                               skiptimes=times,
                                               exclude=exclude,
                                               times_slice=times_slice)
            try:
                DataFrame.__init__(self, data)
            except Exception as e:
                print(e)
            self.properties = Props(origins, name, plot_properties, folder,
                                    symb, show_func)
            if validate and Database:
                self.validate_origins(folder, origins)
            # register to database
            if Database:
                case_data_base.sync()
 def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False):
     DataFrame.__init__(self=self, data=data, index=index, columns=columns, dtype=dtype, copy=copy)
    def __init__(self, sourcefilename, sourceDataFrame, teamname, teamno):

        DataFrame.__init__(self, sourceDataFrame)
        self.SN = sourcefilename[0:15]
        self.data = sourceDataFrame
        self.teamname = teamname
        self.teamno = teamno
        self.home = 0 if self.teamno == 1 else 1
        self.oppteamname = sourcefilename[9:12] if self.teamno == 2 else sourcefilename[12:15]
        self.oppteamno = 2 if self.teamno == 1 else 1

        selector = sourceDataFrame[sourceDataFrame["team"] == teamname]
        self.FGM = calculate_FGM(selector)
        self.FGA = calculate_FGA(selector)
        self.FGP = float(self.FGM) / self.FGA if self.FGA != 0 else 0
        self.B3M = calculate_3M(selector)
        self.B3A = calculate_3A(selector)
        self.B3P = float(self.B3M) / self.B3A if self.B3A != 0 else 0
        self.FTM = calculate_FTM(selector)
        self.FTA = calculate_FTA(selector)
        self.FTP = float(self.FTM) / self.FTA if self.FTA != 0 else 0
        self.P = int(score_calculate(selector))
        self.ORB = calculate_ORB(selector)
        self.DRB = calculate_DRB(selector)
        self.TRB = self.ORB + self.DRB
        self.TOV = calculate_TOV(selector)
        self.TOVP = (
            self.TOV / (self.FGA + self.FTA * 0.44 + self.TOV) if (self.FGA + self.FTA * 0.44 + self.TOV) != 0 else 0
        )
        self.AST = calculate_AST(selector)
        self.F = calculate_F(selector)

        """-------------the code here will be used to produce rows that will aid the calculation of time interval-------------"""
        _set_dummies(self, self.teamname)

        combination_starter = self._combinaiton_reference()
        self.totalcombi = combination_starter.etype.count()

        """-------------this calculates for every row what the combination number is------------------ """
        counter = 0
        combinations = combination_starter.index
        for identifier in combinations:
            counter += 1
            self.data.ix[identifier:, "combination_number"] = counter

        """this is to calculate the time remaining of each row, notice that overtime is considered here"""

        self["time remaining"] = 0
        end_period = self.data.tail(1).period.item()
        self.data.ix[(self.data.period <= 4), "time remaining"] = (end_period - 4) * 5 * 60
        if end_period > 4:
            self.data.ix[(self.data.period > 4), "time remaining"] = (end_period - self.data.period) * 5 * 60
        self.data.ix[self.data.period <= 4, "time remaining"] += (4 - self.data.period) * 12 * 60
        self.data.ix[:, "time"] = pd.to_datetime(self.data.time, format="%M:%S")
        self.data.ix[:, "time remaining"] = (
            self.data.ix[:, "time remaining"]
            + pd.DatetimeIndex(self.data.time).minute * 60
            + pd.DatetimeIndex(self.data.time).second
        )

        self.MP = self.data.head(1).ix[:, "time remaining"].item() / 60

        oppselector = sourceDataFrame[sourceDataFrame["team"] == self.oppteamname]
        self.ORBP = float(self.ORB) / (self.ORB + calculate_DRB(oppselector))
        self.DRBP = float(self.DRB) / (self.DRB + calculate_ORB(oppselector))
        self.TRBP = float(self.TRB) / (self.TRB + calculate_DRB(oppselector) + calculate_ORB(oppselector))
        self.PTD = int(self.P - score_calculate(oppselector))
        self.STL = calculate_STL(oppselector)
        self.BLK = calculate_BLK(oppselector)
        self.interval = self.data.head(1).ix[:, "time remaining"].item()
        nameindex = detect_team(self.teamno)
        self.players = pd.Series(self.data[nameindex].values.ravel()).unique()
Beispiel #24
0
    def __init__(self, *args, **kwargs):

      skip = kwargs.get('skiplines', 1)
      times = kwargs.get('skiptimes', 1)
      name = kwargs.get('name', 'None')
      symb = kwargs.get('symb', 'o')
      files = kwargs.get('search_files', None)
      properties = kwargs.get('properties', None)
      lines = kwargs.get('maxlines', 0)
      search = kwargs.get('search_pattern', io.FPNUMBER)
      folder = kwargs.get('folder', None)
      plot_properties = kwargs.get('plot_properties', PlotProperties())
      show_func = kwargs.get('show_func', None)
      validate = kwargs.get('validate', True)
      preHooks = kwargs.get('preHooks', None)

      keys = [
          'skiplines',
          'skiptimes',
          'preHooks',
          'name',
          'symb',
          'search_files',
          'properties',
          'maxlines',
          'search_pattern',
          'folder',
          'plot_properties',
          'show_func']

      for k in keys:
        try:
            kwargs.pop(k)
        except:
            pass

      #TODO explain what happens here
      if folder == None:
           #super(FoamFrame, self).__init__(*args, **kwargs)
           DataFrame.__init__(self, *args, **kwargs)
      else:
           if preHooks:
                for hook in preHooks:
                    hook.execute()
           if case_data_base.has_key(folder) and Database:
                print "re-importing",
           else:
                print "importing",
           print name + ": ",
           origins, data = io.import_foam_folder(
                       path=folder,
                       search=search,
                       files=files,
                       skiplines=skip,
                       maxlines=lines,
                       skiptimes=times,
                  )
           DataFrame.__init__(self, data)
           self.properties = Props(
                origins,
                name,
                plot_properties,
                folder,
                # FIXME fix it for read logs
                data.index.levels[0],
                symb,
                show_func)
           if validate and Database:
                self.validate_origins(folder, origins)
           # register to database
           if Database:
                case_data_base.sync()
 def __init__(self, df):
     DataFrame.__init__(self, df)
     self.colIndxDict = dict(
         zip(self.columns.values, np.arange(len(self.columns))))
     print('col dict', self.colIndxDict)
     self.rawData = df.values
Beispiel #26
0
    def __init__(
            self,
            data=None,
            # from_constructor: Optional[bool] = bool,
            date_col: Optional[str] = None,
            to_datetime_kwargs: dict = {},
            time_frame: TimeFrameArg = None,
            cumulators: Optional[Cumulators] = None,
            source: Optional['MetaDataFrame'] = None,
            *args,
            **kwargs) -> None:
        """
        Creates a stock data frame

        Args:
            data (ndarray, Iterable, dict, DataFrame, StockDataFrame): data
            date_col (:obj:`str`, optional): If set, then the column named `date_col` will convert and set as the DateTimeIndex of the data frame
            to_datetime_kwargs (dict): the keyworded arguments to be passed to `pandas.to_datetime()`. It only takes effect if `date_col` is specified.
            time_frame (str, TimeFrame): defines the time frame of the stock
            source (:obj:`StockDataFrame`, optional): the source to copy meta data from if the source is a StockDataFrame. Defaults to `data`
            *args: other pandas.DataFrame arguments
            **kwargs: other pandas.DataFrame keyworded arguments
        """

        DataFrame.__init__(self, data, *args, **kwargs)

        if self.columns.nlevels > 1:
            # For now, I admit,
            # there are a lot of works to support MultiIndex dataframes
            raise ValueError(
                'stock-pandas does not support dataframes with MultiIndex columns'
            )

        if source is None:
            source = data

        is_meta_frame = isinstance(source, MetaDataFrame)

        if is_meta_frame:
            copy_stock_metas(source, self, data is not None)
        else:
            init_stock_metas(self)

        if (not is_meta_frame and date_col is None and time_frame is None):
            # Cases
            # 1.
            # StockDataFrame(dataframe)
            # 2.
            # created by self._constructor(new_data).__finalize__(self)
            # we will update cumulator data in __finalize__
            return

        # Cases
        # 1.
        # StockDataFrame(stockdataframe)
        # 2.
        # StockDataFrame(dataframe, date_col='time')

        self._cumulator.update(self,
                               source,
                               date_col=date_col,
                               to_datetime_kwargs=to_datetime_kwargs,
                               time_frame=time_frame,
                               cumulators=cumulators)
 def __init__(self, filename):
     DataFrame.__init__(self, data=BaseballDataFrame._create_df(filename))
     self.filepath = getcwd() + '/baseball-data/core/' + filename
Beispiel #28
0
 def __init__(self,data,*args,**kwargs):
     DataFrame.__init__(self,data,*args,**kwargs)