コード例 #1
0
ファイル: test_feather.py プロジェクト: sunchao/arrow
    def test_timestamp_with_nulls(self):
        df = pd.DataFrame({'test': [pd.datetime(2016, 1, 1),
                                    None,
                                    pd.datetime(2016, 1, 3)]})
        df['with_tz'] = df.test.dt.tz_localize('utc')

        self._check_pandas_roundtrip(df, null_counts=[1, 1])
コード例 #2
0
ファイル: live.py プロジェクト: nebw/bb_live
    def analyse_age_distribution(self, unique, counts):
        urllib.request.urlretrieve(
            'https://www.dropbox.com/s/ze3chu5mvetjwv2/TagsControl2016.xlsx?dl=1',
            'TagsControl2016.xlsx')

        age_data = pd.read_excel('TagsControl2016.xlsx')
        age_data.drop('Unnamed: 0', axis=1, inplace=True)

        age_data.Date = pd.to_datetime(age_data.Date)

        parity_indices = age_data.index[(age_data.Date >= pd.datetime(2016, 7, 25)) &
                                        (age_data.Date != pd.datetime(2016, 7, 26))]

        age_data.loc[parity_indices, 'From'] += 2048
        age_data.loc[parity_indices, 'To'] += 2048

        age_data['Age'] = [dt.days for dt in (pd.datetime.now() - age_data.Date)]

        age_by_idx = {}
        for index, row in age_data.iterrows():
            if row.From.is_integer() and row.To.is_integer():
                for idx in range(int(row.From), int(row.To)):
                    age_by_idx[idx] = row.Age

        ages = [age_by_idx[u] for u, c in zip(unique, counts) if u in age_by_idx.keys()]

        self.plot_age_distribution(ages)
コード例 #3
0
ファイル: _files.py プロジェクト: rstoneback/pysat
    def _attach_files(self, files_info):
        """Attaches info returned by instrument list_files routine to
        Instrument object.
        """

        if not files_info.empty:
            if (len(files_info.index.unique()) != len(files_info)):
                estr = 'WARNING! Duplicate datetimes in provided file '
                estr = '{:s}information.\nKeeping one of each '.format(estr)
                estr = '{:s}of the duplicates, dropping the rest.'.format(estr)
                print(estr)
                print(files_info.index.get_duplicates())

                idx = np.unique(files_info.index, return_index=True)
                files_info = files_info.ix[idx[1]]
                #raise ValueError('List of files must have unique datetimes.')

            self.files = files_info.sort_index()
            date = files_info.index[0]
            self.start_date = pds.datetime(date.year, date.month, date.day)
            date = files_info.index[-1]
            self.stop_date = pds.datetime(date.year, date.month, date.day)
        else:
            self.start_date = None
            self.stop_date = None
            # convert to object type
            # necessary if Series is empty, enables == checks with strings
            self.files = files_info.astype(np.dtype('O'))
コード例 #4
0
ファイル: factory.py プロジェクト: akkineniramesh/zipline
def create_test_df_source(sim_params=None, bars='daily'):
    if bars == 'daily':
        freq = pd.datetools.BDay()
    elif bars == 'minute':
        freq = pd.datetools.Minute()
    else:
        raise ValueError('%s bars not understood.' % freq)

    if sim_params:
        index = sim_params.trading_days
    else:
        if trading.environment is None:
            trading.environment = trading.TradingEnvironment()

        start = pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)
        end = pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)

        days = trading.environment.days_in_range(start, end)

        if bars == 'daily':
            index = days
        if bars == 'minute':
            index = pd.DatetimeIndex([], freq=freq)

            for day in days:
                day_index = trading.environment.market_minutes_for_day(day)
                index = index.append(day_index)

    x = np.arange(1, len(index) + 1)

    df = pd.DataFrame(x, index=index, columns=[0])

    return DataFrameSource(df), df
コード例 #5
0
ファイル: factory.py プロジェクト: akkineniramesh/zipline
def create_test_panel_ohlc_source(sim_params=None):
    start = sim_params.first_open \
        if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)

    end = sim_params.last_close \
        if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)

    if trading.environment is None:
        trading.environment = trading.TradingEnvironment()

    index = trading.environment.days_in_range(start, end)
    price = np.arange(0, len(index)) + 100
    high = price * 1.05
    low = price * 0.95
    open_ = price + .1 * (price % 2 - .5)
    volume = np.ones(len(index)) * 1000
    arbitrary = np.ones(len(index))

    df = pd.DataFrame({'price': price,
                       'high': high,
                       'low': low,
                       'open': open_,
                       'volume': volume,
                       'arbitrary': arbitrary},
                      index=index)
    panel = pd.Panel.from_dict({0: df})

    return DataPanelSource(panel), panel
コード例 #6
0
    def test_divide_df_single_column(self):
        x = pd.DataFrame(dict(a=[2.0, 7.0, -7.0, -7.00, 3.5]),
                         pd.date_range(pd.datetime(2015, 1, 1), periods=5))
        y = pd.DataFrame(dict(b=[2.0, 3.5, 2.0, -3.5, -3.5]),
                         pd.date_range(pd.datetime(2015, 1, 1), periods=5))
        ans = list(divide_df_single_column(x, y).iloc[:, 0])
        self.assertEqual(ans, [1., 2., -3.5, 2., -1.])

        x = pd.DataFrame(dict(a=[2.0, np.nan, -7.0, np.nan, 3.5]),
                         pd.date_range(pd.datetime(2015, 1, 1), periods=5))
        y = pd.DataFrame(dict(b=[2.0, 3.5, np.nan, np.nan, -3.5]),
                         pd.date_range(pd.datetime(2015, 1, 2), periods=5))
        
        ans = list(divide_df_single_column(x, y).iloc[:, 0])
        

        self.assertTrue(np.isnan(ans[0]))
        self.assertTrue(np.isnan(ans[1]))
        self.assertTrue(np.isnan(ans[3]))

        self.assertEqual(ans[2], -2.0)

        ans = list(divide_df_single_column(
            x, y, ffill=(True, False)).iloc[:, 0])
        self.assertEqual(ans[1], 1.0)

        ans = list(divide_df_single_column(
            x, y, ffill=(False, True)).iloc[:, 0])
        self.assertEqual(ans[4], 1.0)

        ans = list(divide_df_single_column(
            x, y, ffill=(True, True)).iloc[:, 0])
        self.assertEqual(list(ans)[1:], [1., -2., -2.0, 1., -1.])
コード例 #7
0
ファイル: test_sources.py プロジェクト: ChinaQuants/zipline
    def test_yahoo_bars_to_panel_source(self):
        env = TradingEnvironment()
        finder = AssetFinder(env.engine)
        stocks = ['AAPL', 'GE']
        env.write_data(equities_identifiers=stocks)
        start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)
        end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)
        data = factory.load_bars_from_yahoo(stocks=stocks,
                                            indexes={},
                                            start=start,
                                            end=end)
        check_fields = ['sid', 'open', 'high', 'low', 'close',
                        'volume', 'price']

        copy_panel = data.copy()
        sids = finder.map_identifier_index_to_sids(
            data.items, data.major_axis[0]
        )
        copy_panel.items = sids
        source = DataPanelSource(copy_panel)
        for event in source:
            for check_field in check_fields:
                self.assertIn(check_field, event)
            self.assertTrue(isinstance(event['volume'], (integer_types)))
            self.assertTrue(event['sid'] in sids)
コード例 #8
0
ファイル: factory.py プロジェクト: DKnight1900/zipline
def create_test_panel_source(sim_params=None, source_type=None):
    start = sim_params.first_open \
        if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)

    end = sim_params.last_close \
        if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)

    if trading.environment is None:
        trading.environment = trading.TradingEnvironment()

    index = trading.environment.days_in_range(start, end)

    price = np.arange(0, len(index))
    volume = np.ones(len(index)) * 1000

    arbitrary = np.ones(len(index))

    df = pd.DataFrame({'price': price,
                       'volume': volume,
                       'arbitrary': arbitrary},
                      index=index)
    if source_type:
        source_types = np.full(len(index), source_type)
        df['type'] = source_types

    panel = pd.Panel.from_dict({0: df})

    return DataPanelSource(panel), panel
コード例 #9
0
ファイル: _test_calendar.py プロジェクト: SGMAP-AGD/Tools
 def test_interval(self):
     year_interval = IntervalPeriod('a_year', '01/01/2013', '02/01/2014')
     self._generic_test(year_interval, 366)
     test_interval_hours = year_interval.build(four_years_of_hours)
     selected_list = four_years_of_hours[test_interval_hours].tolist()
     self.assertEqual(selected_list[0], pd.datetime(2013,1,1,0))
     self.assertEqual(selected_list[-1], pd.datetime(2014,1,1,23))
コード例 #10
0
def pull_date_dicts(y, m, d, step_size, n_loops):
    '''
    Save dictionaries of pittsburgh inspection pdf text for given date range
    
    INPUT:  y, m, d = ints, date to start loop
            step_size = number of days to include in each sub_file
            n_loops = number of files to create
    OUTPUT: pdf_main = dict, pdf text from all dates.
    
            intermediate dicts from each loop are pickled
            pdf_main is also pickled
    
    The date range is broken into chunks by step_size and n_loops, to ensure that data
    is incrementally saved (in case of connection failure or some other terminal error).
    '''
    start = pd.datetime(y, m, d)
    delta = pd.Timedelta(1, 'd')
    
    pdf_main = {}
    for i in xrange(n_loops):
        print '[%02d] START: %s' % (i, start.strftime("%Y%m%d"))
        pdfs = get_pdf_text(start, step_size)
        save_pdf_text(pdfs, '../data/pitt/pitt_%s.pkl' % start.strftime("%Y%m%d"))
        pdf_main = merge_two_dicts(pdf_main, pdfs)
        start += step_size * delta
    
    started = pd.datetime(y, m, d)
    ended = start - delta
    save_pdf_text(pdf_main, '../data/pitt/pitt_FULL_%s_to_%s.pkl' % (started.strftime("%Y%m%d"), ended.strftime("%Y%m%d")))
    
    return pdf_main
コード例 #11
0
    def test_get_trades_from_positions(self):
        positions = pd.DataFrame([np.nan, 2, 3, np.nan, 2, 3, 3.1, 4,
                                  3, 5, 7], pd.date_range(start=pd.datetime(2015, 1, 1), periods=11))
        price = pd.DataFrame([100, 103, np.nan, 106, 110, 105, np.nan, 106,
                              120, np.nan, 142], pd.date_range(start=pd.datetime(2015, 1, 1), periods=11))
        #trades=get_trades_from_positions(price, positions, delayfill, roundpositions, None, None, None, None)
        trades = get_trades_from_positions(
            price, positions, True, True, None, None, None, None)

        self.assertEqual(list(trades.trades), [
                         2.0, 1.0, -1.0, 1.0, 1.0, -1.0, 2.0, 2.0])
        self.assertEqual(list(trades.fill_price)[
                         :-1], [106.0, 106.0, 105.0, 106.0, 120.0, 142.0, 142.0])

        trades = get_trades_from_positions(
            price, positions, False, True, None, None, None, None)

        self.assertEqual(list(trades.trades), [
                         2.0, 1.0, -1.0, 1.0, 1.0, -1.0, 2.0, 2.0])
        self.assertEqual(list(trades.fill_price), [
                         103.0, 106.0, 110.0, 105.0, 106.0, 120.0, 142.0, 142.0])

        trades = get_trades_from_positions(
            price, positions, True, False, None, None, None, None)

        self.assertEqual(list(trades.trades), [
                         2.0, 1.0, -1.0, 1.0, 0.1, 0.9, -1.0, 2.0, 2.0])
        self.assertEqual(list(trades.fill_price)[
                         :-1], [106.0, 106.0, 105.0, 106.0, 106.0, 120.0, 120.0, 142.0, 142.0])
コード例 #12
0
def get_clean_violation_data():
    '''Main function for getting and cleaning violation data'''
    ##Get data
    violations = get_complaint_data()
    
    ##Clean and filter data
    violations = violations[['BoroID', 'Block', 'Lot', 'Class', 'ApprovedDate']]
    violations = violations[~(violations.isnull().any(axis=1))]
    violations = violations[violations.BoroID.isin(range(1,6))]
    violations = violations[violations.Class.isin(['A','B','C'])]
    
    violations.ApprovedDate = pd.to_datetime(violations.ApprovedDate)
    start = pd.datetime(2010,4,1)
    end = pd.datetime(2015,3,31)
    allowed_date_range_violation_approval = pd.date_range(start, end, freq='D')
    violations = violations[(violations['ApprovedDate'].isin(allowed_date_range_violation_approval))]
    violations['BBL'] = map(make_BBL, violations['BoroID'], violations['Block'], violations['Lot'])
    violations = violations.drop(['BoroID','Block','Lot'],axis=1)

    ## Group by BBL and class to construct final dataframe with index=BBL and total number of violations, by class, from 04/01/2010 to 03/31/2015 as features.
    
    grouped_by_BBL = violations.groupby(['BBL','Class']).size().reset_index() 
    grouped_by_BBL.columns = ['BBL','Class','Count']
    grouped_by_BBL = grouped_by_BBL.pivot('BBL','Class','Count')
    grouped_by_BBL = grouped_by_BBL.fillna(0)
    
    return grouped_by_BBL
コード例 #13
0
    def test_pandl(self):
        fx = pd.DataFrame([2.0] * 10, dt_range1)
        price = pd.DataFrame(
            [100, 103, 105, 106, 110, 105, 104.5, np.nan, 120, np.nan,
             142], dt_range2)

        trades = pd.concat([
            pd.DataFrame(
                dict(
                    trades=[2, 1, -1, np.nan, 1],
                    fill_price=[102.9, 105.5, 106.5, np.nan, 106.]),
                pd.date_range(start=pd.datetime(2015, 1, 2), periods=5)),
            pd.DataFrame(
                dict(trades=[-1, 1, -1], fill_price=[107, 119, 132]),
                pd.date_range(start=pd.datetime(2015, 1, 8), periods=3))
        ])

        ans = pandl(price, trades, marktomarket=True, fx=fx)
        np.testing.assert_almost_equal(ans.pandl_base[1:], [
            0.0, 10.4, 6., 14., -16., -9., 15., 48., 78., 40.
        ])

        ans2 = pandl(price, trades, marktomarket=False, fx=fx)

        np.testing.assert_almost_equal(ans2.pandl_base[1:],
                                       [10.4, 6., 0., -2., 6., 48., 78.])
コード例 #14
0
ファイル: test_misc.py プロジェクト: changhiskhan/pandas
    def test_datetime_name_accessors(self, time_locale):
        # Test Monday -> Sunday and January -> December, in that sequence
        if time_locale is None:
            # If the time_locale is None, day-name and month_name should
            # return the english attributes
            expected_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
                             'Friday', 'Saturday', 'Sunday']
            expected_months = ['January', 'February', 'March', 'April', 'May',
                               'June', 'July', 'August', 'September',
                               'October', 'November', 'December']
        else:
            with tm.set_locale(time_locale, locale.LC_TIME):
                expected_days = calendar.day_name[:]
                expected_months = calendar.month_name[1:]

        # GH#11128
        dti = pd.date_range(freq='D', start=datetime(1998, 1, 1),
                            periods=365)
        english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday',
                        'Friday', 'Saturday', 'Sunday']
        for day, name, eng_name in zip(range(4, 11),
                                       expected_days,
                                       english_days):
            name = name.capitalize()
            assert dti.weekday_name[day] == eng_name
            assert dti.day_name(locale=time_locale)[day] == name
            ts = Timestamp(datetime(2016, 4, day))
            with tm.assert_produces_warning(FutureWarning,
                                            check_stacklevel=False):
                assert ts.weekday_name == eng_name
            assert ts.day_name(locale=time_locale) == name
        dti = dti.append(DatetimeIndex([pd.NaT]))
        assert np.isnan(dti.day_name(locale=time_locale)[-1])
        ts = Timestamp(pd.NaT)
        assert np.isnan(ts.day_name(locale=time_locale))

        # GH#12805
        dti = pd.date_range(freq='M', start='2012', end='2013')
        result = dti.month_name(locale=time_locale)
        expected = Index([month.capitalize() for month in expected_months])

        # work around different normalization schemes
        # https://github.com/pandas-dev/pandas/issues/22342
        if not compat.PY2:
            result = result.str.normalize("NFD")
            expected = expected.str.normalize("NFD")

        tm.assert_index_equal(result, expected)

        for date, expected in zip(dti, expected_months):
            result = date.month_name(locale=time_locale)
            expected = expected.capitalize()

            if not compat.PY2:
                result = unicodedata.normalize("NFD", result)
                expected = unicodedata.normalize("NFD", result)

            assert result == expected
        dti = dti.append(DatetimeIndex([pd.NaT]))
        assert np.isnan(dti.month_name(locale=time_locale)[-1])
コード例 #15
0
ファイル: tsData.py プロジェクト: kantmp/CAmodule
def readRange(fileh,opt,st,dt,tname,key=False):
    #key=False 将9:30之前的处理掉-
    #合并返回数据 
    #对输入暂时不进行处理   
    start_date=pd.to_datetime(st)
    end_date=pd.to_datetime(dt)
    opt_name='OP'+str(opt)
    dtt=dtt=pd.date_range(start=start_date,end=end_date)
    #get Par    
    GG=[]
    for ik in dtt:
        tmp=fetchPartition(fileh,opt_name,ik)
        if tmp:
            GG.append(tmp)
    #get df,clean it ,concut it
    DD=pd.DataFrame()
    for jk in GG:
        data=fetchTable(jk,tname)
        df=pd.DataFrame.from_records(data,index=data['timestamp'].\
        astype('datetime64[ns]'),exclude=['timestamp'])
        #filter
        y=df.index[1].year
        m=df.index[1].month
        d=df.index[1].day
        t_filter=((df.index>pd.datetime(y,m,d,9,30))\
        &(df.index<=pd.datetime(y,m,d,11,30))) | (df.index>pd.datetime(y,m,d,13,0))
        DD=pd.concat([DD,df[t_filter]])
    
    return DD
コード例 #16
0
ファイル: data_loader.py プロジェクト: yjxiao/nyc-turnstile
    def __init__(self):
        # format of links to the txt files
        self.url_base = "http://web.mta.info/developers/data/nyct/turnstile/turnstile_{0}.txt"
        # first day of data
        self.begining_of_time = datetime(2010, 5, 1)
        # date when format of data changed
        self.new_era = datetime(2014, 10, 18)
        self.today = datetime.today()

        # prepare station df for old format data
        self.data_dir = "static/data/"
        station_df_path = os.path.join(self.data_dir, "station.pkl")
        if os.path.isfile(station_df_path):
            with open(station_df_path) as f:
                self.station_df = pickle.load(f)
        else:
            self.station_df = pd.read_excel(
                "http://web.mta.info/developers/resources/nyct/turnstile/Remote-Booth-Station.xls"
            )
            self.station_df.columns = [
                "UNIT", "C/A", "STATION", "LINENAME", "DIVISION"
            ]
            # save to data directory
            if not os.path.exists(self.data_dir):
                os.makedirs(self.data_dir)
            with open(station_df_path, "wb") as f:
                pickle.dump(self.station_df, f)
コード例 #17
0
ファイル: test_io.py プロジェクト: kaklise/pecos
def test_write_metrics1():
    filename = abspath(join(testdir, 'test_write_metrics1.csv'))
    if isfile(filename):
        os.remove(filename)
        
    metrics = pd.DataFrame({'metric1' : pd.Series([1.], index=[pd.datetime(2016,1,1)])})
    pecos.io.write_metrics(filename, metrics)
    assert_true(isfile(filename))
    
    from_file1 = pd.read_csv(filename)
    assert_equals(from_file1.shape, (1,2))
    
    # append another date
    metrics = pd.DataFrame({'metric1' : pd.Series([2.], index=[pd.datetime(2016,1,2)])})
    pecos.io.write_metrics(filename, metrics)
    
    from_file2 = pd.read_csv(filename)
    assert_equals(from_file2.shape, (2,2))
    
    # append another metric
    metrics = pd.DataFrame({'metric2' : pd.Series([3.], index=[pd.datetime(2016,1,2)])})
    pecos.io.write_metrics(filename, metrics)
    
    from_file3= pd.read_csv(filename)
    assert_equals(from_file3.shape, (2,3))
コード例 #18
0
ファイル: factory.py プロジェクト: BroadBeard/zipline
def create_test_df_source(sim_params=None, bars='daily'):
    if bars == 'daily':
        freq = pd.datetools.BDay()
    elif bars == 'minute':
        freq = pd.datetools.Minute()
    else:
        raise ValueError('%s bars not understood.' % freq)

    if sim_params:
        index = sim_params.trading_days
    else:
        start = pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)
        end = pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)
        index = pd.DatetimeIndex(
            start=start,
            end=end,
            freq=freq
        )
        if bars == 'minute':
            new_index = []
            for i in index:
                market_open = i.replace(hour=14,
                                        minute=31)
                market_close = i.replace(hour=21,
                                         minute=0)

                if i >= market_open and i <= market_close:
                    new_index.append(i)
            index = new_index
    x = np.arange(1, len(index) + 1)

    df = pd.DataFrame(x, index=index, columns=[0])

    return DataFrameSource(df), df
コード例 #19
0
def main():
    stock_name_list = ['IBM','AAPL','C']
    start = pd.datetime(2011,11,21)
    end = pd.datetime(2012,3,21)
    stock_class_list = {stock: Stock(stock,start,end) for stock in stock_name_list}
    pricecols = {stock:stock_class.closeprice for stock,stock_class in stock_class_list.iteritems()}
    closed_price_df = pd.DataFrame(pricecols)
    print closed_price_df.head()
コード例 #20
0
ファイル: test_transforms.py プロジェクト: snth/zipline
    def setUp(self):
        setup_logger(self)
        start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)
        end = pd.datetime(1994, 1, 1, 0, 0, 0, 0, pytz.utc)

        self.data = factory.load_from_yahoo(stocks=['AAPL'],
                                            indexes={},
                                            start=start, end=end)
コード例 #21
0
ファイル: test_orbits.py プロジェクト: rstoneback/pysat
 def test_single_orbit_call_orbit_starts_0_UT_using_next(self):
     self.testInst.load(2009,1)
     self.testInst.orbits.next()
     ans = (self.testInst.data.index[0] == pds.datetime(2009,1,1))
     ans2 = (self.testInst.data.index[-1] == (pds.datetime(2009,1,1,1,36,59) ))
     # print (ans,ans2)
     # print (self.testInst.data.index[0], self.testInst.data.index[-1])
     assert ans & ans2
コード例 #22
0
ファイル: data.py プロジェクト: TechSurfer1/pysystemtrade
    def _get_default_series(self):
        """
        What we return if currency rates match
        """
        DEFAULT_DATES = pd.date_range(start=pd.datetime(1970, 1, 1), end=pd.datetime(2050, 1, 1))
        DEFAULT_RATE_SERIES = pd.DataFrame(dict(fx=[1.0] * len(DEFAULT_DATES)), index=DEFAULT_DATES)

        return DEFAULT_RATE_SERIES
コード例 #23
0
ファイル: test_orbits.py プロジェクト: rstoneback/pysat
 def test_single_orbit_call_by_1_index(self):
     self.testInst.load(2009,1)
     self.testInst.orbits[1]
     ans = (self.testInst.data.index[0] == pds.datetime(2009,1,1,1,37))
     ans2 = (self.testInst.data.index[-1] == (pds.datetime(2009,1,1,3,13,59) ))
     # print (ans,ans2)
     # print (self.testInst.data.index[0], self.testInst.data.index[-1])
     assert ans & ans2
コード例 #24
0
ファイル: test_orbits.py プロジェクト: rstoneback/pysat
 def test_single_orbit_call_orbit_starts_off_0_UT_using_next(self):
     from dateutil.relativedelta import relativedelta as relativedelta
     self.testInst.load(2008,366)
     self.testInst.orbits.next()
     # print self.testInst.data.index[0], pds.datetime(2008,12,30, 23, 45), self.testInst.data.index[-1], (pds.datetime(2008,12,30, 23, 45)+relativedelta(hours=1, minutes=36, seconds=59) )
     ans = (self.testInst.data.index[0] == pds.datetime(2008,12,30, 23, 45))
     ans2 = (self.testInst.data.index[-1] == (pds.datetime(2008,12,30, 23, 45)+relativedelta(hours=1, minutes=36, seconds=59) ))
     assert ans & ans2
コード例 #25
0
ファイル: factory.py プロジェクト: andycwang/zipline
def create_test_df_source():
    start = pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc)
    end = pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc)
    index = pd.DatetimeIndex(start=start, end=end, freq=pd.datetools.day)
    x = np.arange(2., len(index) * 2 + 2).reshape((-1, 2))

    df = pd.DataFrame(x, index=index, columns=[0, 1])

    return DataFrameSource(df), df
コード例 #26
0
ファイル: resourcesModel.py プロジェクト: pkravik/kaggle
def getOutcomes(allData):
    
    outcomes = ['fully_funded','great_chat','is_exciting','at_least_1_teacher_referred_donor','at_least_1_green_donation','three_or_more_non_teacher_referred_donors','one_non_teacher_referred_donor_giving_100_plus','donation_from_thoughtful_donor']
    data = allData[outcomes]
    train = data[(allData['date_posted']>=pd.datetime(2011,7,1)) & (allData['date_posted']<pd.datetime(2013,7,1))]
    cv = data[(allData['date_posted']>=pd.datetime(2013,7,1)) & (allData['date_posted']<pd.datetime(2013,10,1))]
    predict = data[(allData['date_posted']<pd.datetime(2014,1,1)) & (allData['date_posted']>=pd.datetime(2012,1,1))]
    test = data[(allData['date_posted']>=pd.datetime(2013,10,1)) & (allData['date_posted']<pd.datetime(2014,1,1))]
    
    return train, cv, predict, test
コード例 #27
0
ファイル: Time.py プロジェクト: b1g3ar5/Finance
def friBeforeLastTues(yy,mm):
    first = p.datetime(yy,mm,1)
    lastDay = p.Timestamp(first).days_in_month
    lastDate = p.datetime(yy, mm, lastDay)
    dow = lastDate.isoweekday()
    if dow<2:
        inc = 7
    else:
        inc = 0
    return lastDay - 2 - dow - inc
コード例 #28
0
ファイル: test_construction.py プロジェクト: jakevdp/pandas
    def test_constructor_coverage(self):
        rng = date_range('1/1/2000', periods=10.5)
        exp = date_range('1/1/2000', periods=10)
        tm.assert_index_equal(rng, exp)

        msg = 'periods must be a number, got foo'
        with pytest.raises(TypeError, match=msg):
            date_range(start='1/1/2000', periods='foo', freq='D')

        with pytest.raises(ValueError):
            with tm.assert_produces_warning(FutureWarning):
                DatetimeIndex(start='1/1/2000', end='1/10/2000')

        with pytest.raises(TypeError):
            DatetimeIndex('1/1/2000')

        # generator expression
        gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10))
        result = DatetimeIndex(gen)
        expected = DatetimeIndex([datetime(2000, 1, 1) + timedelta(i)
                                  for i in range(10)])
        tm.assert_index_equal(result, expected)

        # NumPy string array
        strings = np.array(['2000-01-01', '2000-01-02', '2000-01-03'])
        result = DatetimeIndex(strings)
        expected = DatetimeIndex(strings.astype('O'))
        tm.assert_index_equal(result, expected)

        from_ints = DatetimeIndex(expected.asi8)
        tm.assert_index_equal(from_ints, expected)

        # string with NaT
        strings = np.array(['2000-01-01', '2000-01-02', 'NaT'])
        result = DatetimeIndex(strings)
        expected = DatetimeIndex(strings.astype('O'))
        tm.assert_index_equal(result, expected)

        from_ints = DatetimeIndex(expected.asi8)
        tm.assert_index_equal(from_ints, expected)

        # non-conforming
        msg = ("Inferred frequency None from passed values does not conform"
               " to passed frequency D")
        with pytest.raises(ValueError, match=msg):
            DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04'], freq='D')

        msg = ("Of the four parameters: start, end, periods, and freq, exactly"
               " three must be specified")
        with pytest.raises(ValueError, match=msg):
            date_range(start='2011-01-01', freq='b')
        with pytest.raises(ValueError, match=msg):
            date_range(end='2011-01-01', freq='B')
        with pytest.raises(ValueError, match=msg):
            date_range(periods=10, freq='D')
コード例 #29
0
ファイル: plot_summary_stat.py プロジェクト: twdb/sonde
def _calculate_historical_statistics(sonde_file, parameter, averaging,
                                    recent_years=3):
    sonde_data = _read_sonde_data(sonde_file)
    sonde_param_data = sonde_data[parameter]
    sonde_param_data[sonde_param_data < -900] = np.nan

    sonde_param_data[np.logical_and(sonde_param_data.index.month==2,
                                    sonde_param_data.index.day==29)] = np.nan
    sonde_param_data.dropna(inplace=True)


    final_year = sonde_param_data.index.year[-1]
    first_year = sonde_param_data.first_valid_index().year
    year_str = '(%s - %s)' % (first_year, final_year-1)

    historical_enddate = pd.datetime(final_year - 1, 12, 31, 23, 59)
    historical_data = sonde_param_data.ix[:historical_enddate]
    if averaging == 'monthly':
        grouped_monthly_data = historical_data.groupby(lambda d: d.month)
        hist_stat = grouped_monthly_data.describe()
        hist_stat = pd.DataFrame({
        'min':  grouped_monthly_data.min(),
        'mean': grouped_monthly_data.mean(),
        'max': grouped_monthly_data.max()})
        for year_ago in np.arange(recent_years):
            start_date = pd.datetime(final_year - year_ago, 1, 1)
            end_date = pd.datetime(final_year - year_ago, 12, 31, 23, 59)
            monthly_mean = _calculate_mean(sonde_param_data.ix[start_date:end_date], 'M')
            hist_stat[str(start_date.year)] = pd.DataFrame(monthly_mean.values,
                index=monthly_mean.index.month)
    else:
        grouped_daily_data = historical_data.groupby(lambda d:
            (d.month, d.day))
        hist_stat = pd.DataFrame({
            'min':  grouped_daily_data.min(),
            'mean': grouped_daily_data.mean(),
            'max': grouped_daily_data.max()})
        try: 
            hist_stat.index = np.arange(1,366)   #requiring historical data to have a minimum of one record for each day of year.
        except ValueError:
            raise ValueError("The merged data file %s doesn't have the minimum required record length of five years." % sonde_file)
        
        for year_ago in np.arange(recent_years):
            start_date = pd.datetime(final_year - year_ago, 1, 1)
            end_date = pd.datetime(final_year - year_ago, 12, 31, 23, 59)
            daily_mean = _calculate_mean(sonde_param_data.ix[start_date:end_date], 'D')
            daily_mean.index = daily_mean.index.dayofyear
            hist_stat[str(start_date.year)] = pd.DataFrame(daily_mean.values,
                index=daily_mean.index)

    hist_stat.year_range = year_str
    hist_stat.final_year = final_year
    hist_stat.first_year = first_year

    return hist_stat
コード例 #30
0
ファイル: test_construction.py プロジェクト: jakevdp/pandas
    def test_construction_outofbounds(self):
        # GH 13663
        dates = [datetime(3000, 1, 1), datetime(4000, 1, 1),
                 datetime(5000, 1, 1), datetime(6000, 1, 1)]
        exp = Index(dates, dtype=object)
        # coerces to object
        tm.assert_index_equal(Index(dates), exp)

        with pytest.raises(OutOfBoundsDatetime):
            # can't create DatetimeIndex
            DatetimeIndex(dates)
コード例 #31
0
import plotly.graph_objs as go
import pandas as pd
from app import app
terrorism = pd.read_csv('apps/data/terrorism.csv',
                        encoding='latin-1',
                        low_memory=False,
                        usecols=[
                            'iyear', 'imonth', 'iday', 'country_txt', 'city',
                            'longitude', 'latitude', 'nkill', 'nwound',
                            'summary', 'target1', 'gname'
                        ])

terrorism = terrorism[terrorism['imonth'] != 0]
terrorism['day_clean'] = [15 if x == 0 else x for x in terrorism['iday']]
terrorism['date'] = [
    pd.datetime(y, m, d) for y, m, d in zip(
        terrorism['iyear'], terrorism['imonth'], terrorism['day_clean'])
]

from app import app

layout = html.Div([
    html.Br(),
    html.H3('Global Terrorism Database: 1970 - 2016'),
    html.A('Explore Cities', href='/country'),
    dcc.Graph(id='map_world', config={'displayModeBar': False}),
    html.Div([
        dcc.RangeSlider(
            id='years',
            min=1970,
            max=2016,
コード例 #32
0
'''
Created on 3 Dec 2015

@author: rob
'''
import unittest

import pandas as pd
import numpy as np

from pysystemtrade.syscore.accounting import pandl, get_positions_from_forecasts, get_trades_from_positions

dt_range1 = pd.date_range(start=pd.datetime(2014, 12, 30), periods=10)
dt_range2 = pd.date_range(start=pd.datetime(2015, 1, 1), periods=11)


class Test(unittest.TestCase):
    def test_get_positions_from_forecasts(self):
        fx = pd.DataFrame([2.0] * 10, dt_range1)
        price = pd.DataFrame(
            [100, 103, 105, 106, 110, 105, np.nan, 106, 120, np.nan, 142],
            dt_range2)
        forecast = pd.DataFrame([
            np.nan, np.nan, np.nan, np.nan, 10.0, 10.0, 15.0, 15.0, 5.0, 0.0,
            -5.0
        ], dt_range2)
        value_of_price_point = 150.0

        daily_return_volatility = None
        position = get_positions_from_forecasts(price,
                                                daily_return_volatility,
コード例 #33
0
"""
First some constants
"""

CALENDAR_DAYS_IN_YEAR = 365.25

BUSINESS_DAYS_IN_YEAR = 256.0
ROOT_BDAYS_INYEAR = BUSINESS_DAYS_IN_YEAR**.5

WEEKS_IN_YEAR = CALENDAR_DAYS_IN_YEAR / 7.0
ROOT_WEEKS_IN_YEAR = WEEKS_IN_YEAR**.5

MONTHS_IN_YEAR = 12.0
ROOT_MONTHS_IN_YEAR = MONTHS_IN_YEAR**.5

ARBITRARY_START = pd.datetime(1900, 1, 1)

HOURS_PER_DAY = 24
MINUTES_PER_HOUR = 60
SECONDS_PER_HOUR = 60

SECONDS_IN_YEAR = CALENDAR_DAYS_IN_YEAR * HOURS_PER_DAY * MINUTES_PER_HOUR * SECONDS_PER_HOUR
UNIXTIME_CONVERTER = 1e9

UNIXTIME_IN_YEAR = UNIXTIME_CONVERTER * SECONDS_IN_YEAR

MONTH_LIST = ["F", "G", "H", "J", "K", "M", "N", "Q", "U", "V", "X", "Z"]


def month_from_contract_letter(contract_letter):
    """
コード例 #34
0
def process_data(base_path):
    import pandas as pd

    # processed_dataset = {}
    # validation == 1000 samples
    # train === 5000 samples
    # test === 1000 samples
    # convert to number of actions per week
    # edit out the badge outcome variables

    print("Processing raw data")

    output_fname = os.path.join(base_path, 'so_data.pkl')

    labels = ['train', 'valid', 'test']

    input_fname = os.path.join(csv_path, 'so_badges.csv')
    data = pd.read_csv(input_fname)
    data.Date = pd.to_datetime(data.Date)
    data['week'] = (data.Date - pd.datetime(year=2017, month=1, day=1)).dt.days

    data = data.groupby(['DummyUserId', 'week']).agg('sum').reset_index()
    badge_ixs = data[data.Electorate > 0]
    max_week = data.week.max()
    badge_ixs = badge_ixs[badge_ixs.week > 45]
    badge_ixs = badge_ixs[badge_ixs.week < max_week - 46]
    badge_ixs = badge_ixs.DummyUserId

    print(len(badge_ixs.unique()))

    indexes = badge_ixs.unique()
    train = np.random.choice(indexes, size=4000, replace=False)
    indexes = indexes[~np.in1d(indexes, train)]
    validate = np.random.choice(indexes, size=1000, replace=False)
    indexes = indexes[~np.in1d(indexes, validate)]
    test = np.random.choice(indexes, size=1000, replace=False)

    # data.set_index('DummyUserId', inplace=True)
    processed_dataset = {}

    for s, dset in enumerate([train, validate, test]):

        split = labels[s]
        processed_dataset[split] = {}

        sub_data = data[data.DummyUserId.isin(dset)]
        n_seqs = len(dset)

        processed_dataset[split]['sequence_lengths'] = torch.zeros(
            n_seqs, dtype=torch.long)
        processed_dataset[split]['sequences'] = []
        processed_dataset[split]['outcomes'] = []
        idx = 0

        for u_id, seqs in sub_data.groupby('DummyUserId'):
            seqs = seqs.sort_values('week')

            out = {}
            for b in BADGES:
                idxs = np.where(seqs[b] == 1)[0]
                if len(idxs) > 0:
                    out[b] = torch.tensor(idxs, dtype=torch.long)

            civic_duty = out['Electorate']
            days = 90

            action_vec = seqs[ACTIONS].values[civic_duty -
                                              days // 2:civic_duty +
                                              days // 2, :]
            out['Electorate'] = torch.tensor([days // 2], dtype=torch.long)

            processed_dataset[split]['sequence_lengths'][idx] = days
            processed_sequence = torch.tensor(action_vec, dtype=torch.long)
            processed_dataset[split]['sequences'].append(processed_sequence)

            processed_dataset[split]['outcomes'].append(out)
            idx += 1

    pickle.dump(processed_dataset, open(output_fname, "wb"),
                pickle.HIGHEST_PROTOCOL)
    print("dumped processed data to %s" % output_fname)
コード例 #35
0
def main(fobs, fcable, case_name, ring, term):

    # _________________________ CABLE ___________________________
    cable = nc.Dataset(fcable, 'r')
    Time = nc.num2date(cable.variables['time'][:],
                       cable.variables['time'].units)

    Rainf = pd.DataFrame(cable.variables['Rainf'][:, 0, 0], columns=['Rainf'])
    Rainf = Rainf * 1800.
    Rainf['dates'] = Time
    Rainf = Rainf.set_index('dates')
    Rainf = Rainf.resample("D").agg('sum')
    Rainf.index = Rainf.index - pd.datetime(2011, 12, 31)
    Rainf.index = Rainf.index.days

    var = pd.DataFrame(cable.variables[term][:, 0, 0], columns=['var'])
    #var = pd.DataFrame(cable.variables['Rnet'][:,0,0]-cable.variables['Qg'][:,0,0],columns=['var'])
    var['dates'] = Time
    var = var.set_index('dates')
    var = var.resample("D").agg('mean')
    var.index = var.index - pd.datetime(2011, 12, 31)
    var.index = var.index.days

    Tair = pd.DataFrame(cable.variables['Tair'][:, 0, 0] - 273.15,
                        columns=['Tair'])
    Tair['dates'] = Time
    Tair = Tair.set_index('dates')
    Tair = Tair.resample("D").agg('max')
    Tair.index = Tair.index - pd.datetime(2011, 12, 31)
    Tair.index = Tair.index.days

    # exclude rainday and the after two days of rain
    day = np.zeros((len(var)), dtype=bool)

    for i in np.arange(0, len(var)):
        if (Tair.values[i] >= 35. and Rainf.values[i] == 0.):
            day[i] = True

    event = 0
    con_max = 0
    i = 0
    while i < len(var) - 2:
        if np.all([day[i:i + 3]]):
            event += 1
            i += 3
            con = 3
            while day[i]:
                con += 1
                i += 1
        else:
            con = 0
            i += 1
        if con > con_max:
            con_max = con

    print(event)
    print(con_max)

    v = np.zeros((event, con_max))
    lct = np.zeros((event, con_max))
    v[:, :] = np.nan
    for con in np.arange(1, con_max + 1):
        lct[:, con - 1] = con

    i = 0
    j = 0
    while i < len(var) - 2:
        if (np.all([day[i:i + 3]])):
            print(Tair.index[i])
            print(var['var'].values[i])
            v[j, 0] = var['var'].values[i]
            v[j, 1] = var['var'].values[i + 1]
            v[j, 2] = var['var'].values[i + 2]
            i = i + 3
            cont_day = 3
            while day[i]:
                v[j, cont_day] = var['var'].values[i]
                i += 1
                cont_day += 1
            j += 1
        else:
            i += 1
    print(v)
    print(lct)
    #return np.ravel(v),np.ravel(lct);
    return v, lct
コード例 #36
0
          "ZEEL":"Media & Entertainment", 
          "HINDALCO":"Metals & Mining", "VEDL":"Metals & Mining", "JSWSTEEL":"Metals & Mining", "TATASTEEL":"Metals & Mining", "COALINDIA":"Metals & Mining",
          "CIPLA":"Pharma", "DRREDDY":"Pharma", "SUNPHARMA":"Pharma",
          "ADANIPORTS":"Shipping","MUNDRAPORT":"Shipping",
          "BHARTIARTL":"Telecom"
   
         }

df["SECTORS"] = df["Symbol"].map(sectors)
df


# In[4]:


df_2017 = df[(df.index >= pd.datetime(2017, 1, 1)) & (df.index <= pd.datetime(2017, 12, 31))]

cmp_17 = df_2017.groupby(["Symbol"])
companies_17 = cmp_17.resample('MS').mean()
companies_17


# In[5]:


df_2018 = df[(df.index >= pd.datetime(2018, 1, 1)) & (df.index <= pd.datetime(2018, 12, 31))]

cmp_18 = df_2018.groupby(["Symbol"])
companies_18 = cmp_18.resample('MS').mean()
companies_18
コード例 #37
0
def transform_editing_data_to_file_folder_structure(path_to_csv_actions,
                                                    path_to_csv_badges,
                                                    path_to_data_dir):
    '''
    Expecting data in the PIVOTED format from the Stack Overflow query editor. 
    Here the csv file has an index of userIds, and the columns are the date from 
    start to end. The values are the counts of edits that that user performed on that
    day. There is a separate file for the userId.
    '''
    import tqdm

    data_actions = pd.read_csv(path_to_csv_actions)
    badge_achievements = pd.read_csv(path_to_csv_badges)

    data_actions = data_actions[data_actions.UserId.isin(
        badge_achievements.UserId)]
    badge_achievements = badge_achievements[badge_achievements.UserId.isin(
        data_actions.UserId)]

    start_date = pd.datetime(year=2009, month=1, day=1)

    badge_achievements.Date = pd.to_datetime(badge_achievements.Date)
    badge_achievements['day'] = (badge_achievements.Date - start_date).dt.days

    user_ids = badge_achievements.UserId.unique()
    size_data = len(user_ids)

    np.random.seed(11)

    train = np.random.choice(user_ids,
                             size=int(np.floor(0.6 * size_data)),
                             replace=False)
    user_ids = user_ids[~np.in1d(user_ids, train)]
    validate = np.random.choice(user_ids,
                                size=int(np.floor(0.2 * size_data)),
                                replace=False)
    user_ids = user_ids[~np.in1d(user_ids, validate)]
    test = np.random.choice(user_ids,
                            size=int(np.floor(0.2 * size_data)),
                            replace=False)

    data_actions.set_index('UserId', inplace=True)
    badge_achievements.set_index('UserId', inplace=True)

    num_days = (badge_achievements.Date.max() - start_date).days

    for dset in [train, validate, test]:
        for user in tqdm.tqdm(dset):

            trajectory = data_actions.loc[user]
            trajectory = trajectory.reset_index()
            trajectory['index'] = pd.to_datetime(trajectory['index'])
            trajectory['day'] = (trajectory['index'] - start_date).dt.days
            trajectory.rename(columns={
                'index': 'date',
                user: '******'
            },
                              inplace=True)
            trajectory.sort_values('day', inplace=True)
            trajectory.set_index('day', inplace=True)
            trajectory = trajectory.reindex(range(num_days + 1), fill_value=0)

            action_trajectory = torch.tensor(trajectory[['num_actions'
                                                         ]].values,
                                             dtype=torch.long)
            torch.save(action_trajectory,
                       '{}/user_{}.pt'.format(path_to_data_dir, user))

    with open('{}/badge_achievements.json'.format(path_to_data_dir), 'w') as f:
        badge_dict = badge_achievements['day'].to_dict()
        badge_dict = {
            k: {
                'strunk_white': [int(v)]
            }
            for k, v in badge_dict.items()
        }
        json.dump(badge_dict, f)

    with open('{}/data_indexes.json'.format(path_to_data_dir), 'w') as f:
        obj = {}
        obj['train'] = [int(u) for u in train]
        obj['test'] = [int(u) for u in test]
        obj['validate'] = [int(u) for u in validate]
        json.dump(obj, f)
コード例 #38
0
def read_data(vids, data_path, t_division):

    this_data_file_name = t_division + 'data.hdf5'
    this_division_file_name = t_division + 'division.hdf5'
    data_path_file_names = os.listdir('../')
    if this_data_file_name in data_path_file_names and this_division_file_name in data_path_file_names:
        print('reading previously saved data')
        sum_hourly_viewers = pd.read_hdf('../' + this_division_file_name)
        in_file = h5py.File('../' + this_data_file_name, 'r')
        account_data = np.copy(in_file['account_data'])
        org_data = np.copy(in_file['org_data'])
        vids_out = np.copy(in_file['vids_out'])
        in_file.close()
    else:
        account_data = []
        org_data = []
        vids_out = []

        for vid_ind, vid in enumerate(vids):

            file_name = data_path + vid + '.06-17.08-02.hdf5'
            df = pd.read_hdf(file_name)
            if len(df) == 0:
                print(file_name, end='')
            else:
                df = df.append(
                    pd.DataFrame(
                        {
                            'first_start_time': pd.datetime(2016, 6, 16),
                            'account_id': '',
                            'org_id': ''
                        }, [-1]))
                df = df.append(
                    pd.DataFrame(
                        {
                            'first_start_time': pd.datetime(2016, 8, 3),
                            'account_id': '',
                            'org_id': ''
                        }, [len(df.account_id)]))

                df_reind = df.copy()
                df_reind = df_reind.set_index(['first_start_time'])

                year_month_day_hour = pd.to_datetime(
                    2016 * 1000000 + df_reind.index.month * 10000 +
                    df_reind.index.day * 100 + df_reind.index.hour,
                    format='%Y%m%d%H')
                df_reind['year_month_day_hour'] = year_month_day_hour
                num_current_viewers = df_reind.groupby(
                    'year_month_day_hour').account_id.nunique()
                num_current_orgs = df_reind.groupby(
                    'year_month_day_hour').org_id.nunique()

                sum_hourly_viewers = num_current_viewers.resample(
                    t_division).sum()
                sum_hourly_viewers = sum_hourly_viewers.fillna(0)
                sum_hourly_viewers = sum_hourly_viewers[
                    '2016-06-18 00:00:00':'2016-07-27 0:00:00']

                sum_hourly_orgs = num_current_orgs.resample(t_division).sum()
                sum_hourly_orgs = sum_hourly_orgs.fillna(0)
                sum_hourly_orgs = sum_hourly_orgs[
                    '2016-06-18 00:00:00':'2016-07-27 0:00:00']

                shu_array = sum_hourly_viewers.values.astype('float')
                sho_array = sum_hourly_orgs.values.astype('float')

                account_data.append(shu_array.tolist())
                org_data.append(sho_array.tolist())
                vids_out.append(vid)
                print(str(vid_ind) + ' ', end='')

        account_data = np.array(account_data)
        org_data = np.array(org_data)
        vids_out = np.array(vids_out)

        print('\nsaving data to ' + this_division_file_name + ' ' +
              this_data_file_name)
        sum_hourly_viewers.to_hdf('../' + this_division_file_name, 'w')
        out_file = h5py.File('../' + this_data_file_name, 'w')
        out_file.create_dataset('account_data', data=account_data)
        out_file.create_dataset('org_data', data=org_data)
        out_file.create_dataset('vids_out', data=vids_out)
        out_file.flush()
        out_file.close()

    t = sum_hourly_viewers.index
    return vids_out, t, account_data, org_data
コード例 #39
0
    def test_datetimeindex_constructor_misc(self):
        arr = ['1/1/2005', '1/2/2005', 'Jn 3, 2005', '2005-01-04']
        pytest.raises(Exception, DatetimeIndex, arr)

        arr = ['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04']
        idx1 = DatetimeIndex(arr)

        arr = [datetime(2005, 1, 1), '1/2/2005', '1/3/2005', '2005-01-04']
        idx2 = DatetimeIndex(arr)

        arr = [lib.Timestamp(datetime(2005, 1, 1)), '1/2/2005', '1/3/2005',
               '2005-01-04']
        idx3 = DatetimeIndex(arr)

        arr = np.array(['1/1/2005', '1/2/2005', '1/3/2005',
                        '2005-01-04'], dtype='O')
        idx4 = DatetimeIndex(arr)

        arr = to_datetime(['1/1/2005', '1/2/2005', '1/3/2005', '2005-01-04'])
        idx5 = DatetimeIndex(arr)

        arr = to_datetime(['1/1/2005', '1/2/2005', 'Jan 3, 2005', '2005-01-04'
                           ])
        idx6 = DatetimeIndex(arr)

        idx7 = DatetimeIndex(['12/05/2007', '25/01/2008'], dayfirst=True)
        idx8 = DatetimeIndex(['2007/05/12', '2008/01/25'], dayfirst=False,
                             yearfirst=True)
        tm.assert_index_equal(idx7, idx8)

        for other in [idx2, idx3, idx4, idx5, idx6]:
            assert (idx1.values == other.values).all()

        sdate = datetime(1999, 12, 25)
        edate = datetime(2000, 1, 1)
        idx = DatetimeIndex(start=sdate, freq='1B', periods=20)
        assert len(idx) == 20
        assert idx[0] == sdate + 0 * offsets.BDay()
        assert idx.freq == 'B'

        idx = DatetimeIndex(end=edate, freq=('D', 5), periods=20)
        assert len(idx) == 20
        assert idx[-1] == edate
        assert idx.freq == '5D'

        idx1 = DatetimeIndex(start=sdate, end=edate, freq='W-SUN')
        idx2 = DatetimeIndex(start=sdate, end=edate,
                             freq=offsets.Week(weekday=6))
        assert len(idx1) == len(idx2)
        assert idx1.offset == idx2.offset

        idx1 = DatetimeIndex(start=sdate, end=edate, freq='QS')
        idx2 = DatetimeIndex(start=sdate, end=edate,
                             freq=offsets.QuarterBegin(startingMonth=1))
        assert len(idx1) == len(idx2)
        assert idx1.offset == idx2.offset

        idx1 = DatetimeIndex(start=sdate, end=edate, freq='BQ')
        idx2 = DatetimeIndex(start=sdate, end=edate,
                             freq=offsets.BQuarterEnd(startingMonth=12))
        assert len(idx1) == len(idx2)
        assert idx1.offset == idx2.offset
コード例 #40
0
        price_regressor = False
        #time_series.columns = ['ds', 'y', 'weekends', 'snap', 'floor']
        time_series.columns = ['ds', 'y', 'weekends', 'snap']

    #time_max = np.max(time_series['y']) * 1.1
    #time_series['cap'] = time_max

    time_series['cum7'] = cum7[i, (start_date - 1):-28]
    # time_series['cum14'] = cum14[i, (start_date-1):-28]
    # time_series['cum28'] = cum28[i, (start_date-1):-28]
    # time_series['cum56'] = cum56[i, (start_date-1):-28]
    time_series['cum_max'] = cum_max[i, (start_date - 1):-28]
    time_series['cum_zero'] = cum_zero[i, (start_date - 1):-28]

    end_train = len(time_series) - 28
    time_series.loc[:, 'ds'] = pd.datetime(2011, 1, 29) + pd.to_timedelta(
        time_series['ds'] - 1, unit='d')

    m = Prophet(uncertainty_samples=0,
                holidays=holidays,
                changepoint_prior_scale=0.9,
                holidays_prior_scale=0.05,
                yearly_seasonality=5)  #growth='logistic')

    # m.add_country_holidays(country_name='US')

    if price_regressor == True:
        m.add_regressor('price')

    m.add_regressor('weekends')
    m.add_regressor('snap')
コード例 #41
0
 def __init__(self, ascategory=True, t0=pd.datetime(2000, 1, 1)):
     self.ascategory = ascategory
     self.t0 = t0
コード例 #42
0
#计算模型RFM的值
#groupby()得到一维数组,并且索引是groupby 的索引

#以id汇总销售日期,并取其最大值
recency_data=sales_data['ORDERDATE'].groupby(sales_data.index).max()

#以id汇总订单,比计算订单个数
frequency_values=sales_data['ORDERID'].groupby(sales_data.index).count()

#订单金额
monetary_value=sales_data['AMOUNTINFO'].groupby(sales_data.index).sum()
print(type(monetary_value))

#计算RMF的得分

deadline_data=pd.datetime(2017, 1, 1) #指定最后期限,计算时间间隔,也即是模型R值

r_interval=(deadline_data - recency_data).dt.days #针对series格式:Series.dt.days,Number of days for each element

#cut将根据值本身来选择箱子均匀间隔,qcut是根据这些值的频率来选择箱子的均匀间隔
#因此cut,lables可能不全部表现,比如最小值1,最大值10,除此之外没有,分5分,labels=【1,2,3,4,5】,最后的label只有1和5

r_score=pd.cut(x=r_interval,bins=5,labels=[5,4,3,2,1])#日期越小,越好;分位数的labels,针对的排序是由小到大,值最大,labels最靠后
f_score=pd.cut(x=frequency_values,bins=5,labels=[1,2,3,4,5])
m_score=pd.cut(x=monetary_value,bins=5,labels=[1,2,3,4,5])


#rfm数值合并,数据框
rfm_list=[r_score,f_score,m_score] #组成列表
rfm_col_names=['r_score','f_score','m_score']
コード例 #43
0
    parse_dates=["date"],
    skiprows=range(1, 66458909)  # 2016-01-01
)

df_test = pd.read_csv(
    "../Data/test.csv",
    usecols=[0, 1, 2, 3, 4],
    dtype={
        'onpromotion': bool
    },
    parse_dates=["date"]  # , date_parser=parser
).set_index(['store_nbr', 'item_nbr', 'date'])

items = pd.read_csv("../Data/items.csv", ).set_index("item_nbr")

df_2017 = df_train.loc[df_train.date >= pd.datetime(2017, 1, 1)]
del df_train

promo_2017_train = df_2017.set_index(["store_nbr", "item_nbr", "date"
                                      ])[["onpromotion"
                                          ]].unstack(level=-1).fillna(False)
promo_2017_train.columns = promo_2017_train.columns.get_level_values(1)
promo_2017_test = df_test[["onpromotion"]].unstack(level=-1).fillna(False)
promo_2017_test.columns = promo_2017_test.columns.get_level_values(1)
promo_2017_test = promo_2017_test.reindex(promo_2017_train.index).fillna(False)
promo_2017 = pd.concat([promo_2017_train, promo_2017_test], axis=1)
del promo_2017_test, promo_2017_train

df_2017 = df_2017.set_index(["store_nbr", "item_nbr", "date"
                             ])[["unit_sales"]].unstack(level=-1).fillna(0)
df_2017.columns = df_2017.columns.get_level_values(1)
コード例 #44
0
def download_volatility(config):
    """Downloads volatility data from OMI website."""

    url = 'https://realized.oxford-man.ox.ac.uk/images/oxfordmanrealizedvolatilityindices.zip'

    data_folder = config.data_folder
    csv_path = os.path.join(data_folder,
                            'oxfordmanrealizedvolatilityindices.csv')
    zip_path = os.path.join(data_folder,
                            'oxfordmanrealizedvolatilityindices.zip')

    download_and_unzip(url, zip_path, csv_path, data_folder)

    print('Unzip complete. Adding extra inputs')

    df = pd.read_csv(csv_path, index_col=0)  # no explicit index

    # Adds additional date/day fields
    idx = [str(s).split('+')[0]
           for s in df.index]  # ignore timezones, we don't need them
    dates = pd.to_datetime(idx)
    df['date'] = dates
    df['days_from_start'] = (dates - pd.datetime(2000, 1, 3)).days
    df['day_of_week'] = dates.dayofweek
    df['day_of_month'] = dates.day
    df['week_of_year'] = dates.weekofyear
    df['month'] = dates.month
    df['year'] = dates.year
    df['categorical_id'] = df['Symbol'].copy()

    # Processes log volatility
    vol = df['rv5_ss'].copy()
    vol.loc[vol == 0.] = np.nan
    df['log_vol'] = np.log(vol)

    # Adds static information
    symbol_region_mapping = {
        '.AEX': 'EMEA',
        '.AORD': 'APAC',
        '.BFX': 'EMEA',
        '.BSESN': 'APAC',
        '.BVLG': 'EMEA',
        '.BVSP': 'AMER',
        '.DJI': 'AMER',
        '.FCHI': 'EMEA',
        '.FTMIB': 'EMEA',
        '.FTSE': 'EMEA',
        '.GDAXI': 'EMEA',
        '.GSPTSE': 'AMER',
        '.HSI': 'APAC',
        '.IBEX': 'EMEA',
        '.IXIC': 'AMER',
        '.KS11': 'APAC',
        '.KSE': 'APAC',
        '.MXX': 'AMER',
        '.N225': 'APAC ',
        '.NSEI': 'APAC',
        '.OMXC20': 'EMEA',
        '.OMXHPI': 'EMEA',
        '.OMXSPI': 'EMEA',
        '.OSEAX': 'EMEA',
        '.RUT': 'EMEA',
        '.SMSI': 'EMEA',
        '.SPX': 'AMER',
        '.SSEC': 'APAC',
        '.SSMI': 'EMEA',
        '.STI': 'APAC',
        '.STOXX50E': 'EMEA'
    }

    df['Region'] = df['Symbol'].apply(lambda k: symbol_region_mapping[k])

    # Performs final processing
    output_df_list = []
    for grp in df.groupby('Symbol'):
        sliced = grp[1].copy()
        sliced.sort_values('days_from_start', inplace=True)
        # Impute log volatility values
        sliced['log_vol'].fillna(method='ffill', inplace=True)
        sliced.dropna()
        output_df_list.append(sliced)

    df = pd.concat(output_df_list, axis=0)

    output_file = config.data_csv_path
    print('Completed formatting, saving to {}'.format(output_file))
    df.to_csv(output_file)

    print('Done.')
コード例 #45
0
#TODO: Using the same data as before, instead of a two dimensional histogram, break it up as above


#text and annotation
plt.style.use('seaborn-whitegrid')
births = pd.read_csv(path + 'births.csv')
quartiles = np.percentile(births['births'], [25, 50, 75])
mu, sig = quartiles[1], 0.74 * (quartiles[2] - quartiles[0])
births = births.query('(births > @mu - 5 * @sig) & (births < @mu + 5 * @sig)')
births['day'] = births['day'].astype(int)
births.index = pd.to_datetime(10000 * births.year +
100 * births.month +
births.day, format='%Y%m%d')
births_by_date = births.pivot_table('births',
[births.index.month, births.index.day])
births_by_date.index = [pd.datetime(2012, month, day) for (month, day) in births_by_date.index]
fig, ax = plt.subplots(figsize=(12, 4))
births_by_date.plot(ax=ax);

fig, ax = plt.subplots(figsize=(12, 4))
births_by_date.plot(ax=ax)
# Add labels to the plot
style = dict(size=10, color='gray')
ax.text('2012-1-1', 3950, "New Year's Day", **style)
ax.text('2012-7-4', 4250, "Independence Day", ha='center', **style)
ax.text('2012-9-4', 4850, "Labor Day", ha='center', **style)
ax.text('2012-10-31', 4600, "Halloween", ha='right', **style)
ax.text('2012-11-25', 4450, "Thanksgiving", ha='center', **style)
ax.text('2012-12-25', 3850, "Christmas ", ha='right', **style)
# Label the axes
ax.set(title='USA births by day of year (1969-1988)',
コード例 #46
0
ax.set_title('Show only input flows')
plt.show()

# ***** 4. example ***************************************************
# Create a plot to show the balance around a bus.
# Order and colors are customisable.

inorder = [(('pv', 'electricity'), 'flow'),
           (('wind', 'electricity'), 'flow'),
           (('storage', 'electricity'), 'flow'),
           (('pp_gas', 'electricity'), 'flow')]

fig = plt.figure(figsize=(10, 5))
electricity_seq = views.node(results, 'electricity')['sequences']
plot_slice = oev.plot.slice_df(electricity_seq,
                               date_from=pd.datetime(2012, 2, 15))
my_plot = oev.plot.io_plot('electricity', plot_slice, cdict=cdict,
                           inorder=inorder, ax=fig.add_subplot(1, 1, 1),
                           smooth=False)
ax = shape_legend('electricity', **my_plot)
oev.plot.set_datetime_ticks(ax, plot_slice.index, tick_distance=48,
                            date_format='%d-%m-%H', offset=12)

ax.set_ylabel('Power in MW')
ax.set_xlabel('2012')
ax.set_title("Electricity bus, non-smoothed representation")

# ***** 5. example ***************************************************
# Create a plot to show the balance around a bus.
# Make a smooth plot even though it is not scientifically correct.
コード例 #47
0
ファイル: ML2.py プロジェクト: 17alperyildirim/KocPython2021
def hours_of_daylight(date, axis=23.44, latitude=47.61):
    """Compute the hours of daylight for the given date"""
    days = (date - pd.datetime(2000, 12, 21)).days
    m = (1. - np.tan(np.radians(latitude)) * np.tan(np.radians(axis) * np.cos(days * 2 * np.pi / 365.25)))
    return 24. * np.degrees(np.arccos(1 - np.clip(m, 0, 2))) / 180.
コード例 #48
0
# determine unique values in a column
users.occupation.nunique()      # count the number of unique values
users.occupation.unique()       # return the unique values

# replace all instances of a value in a column (must match entire value)
ufo.State.replace('Fl', 'FL', inplace=True)

# string methods are accessed via 'str'
ufo.State.str.upper()                               # converts to uppercase
ufo.Colors_Reported.str.contains('RED', na='False') # checks for a substring

# convert a string to the datetime format
ufo['Time'] = pd.to_datetime(ufo.Time)
ufo.Time.dt.hour                        # datetime format exposes convenient attributes
(ufo.Time.max() - ufo.Time.min()).days  # also allows you to do datetime "math"
ufo[ufo.Time > pd.datetime(2014, 1, 1)] # boolean filtering with datetime format

# setting and then removing an index
ufo.set_index('Time', inplace=True)
ufo.reset_index(inplace=True)

# sort a column by its index
ufo.State.value_counts().sort_index()

# change the data type of a column
drinks['beer'] = drinks.beer.astype('float')

# change the data type of a column when reading in a file
pd.read_csv('drinks.csv', dtype={'beer_servings':float})

# create dummy variables for 'continent' and exclude first dummy column
コード例 #49
0
from hydroDL.master import basins
from hydroDL.app import waterQuality
from hydroDL import kPath, utils
from hydroDL.model import trainTS
from hydroDL.post import axplot, figplot
from hydroDL.data import usgs, gageII, gridMET, ntn, transform
import torch
import os
import json
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import statsmodels.api as sm

startDate = pd.datetime(1979, 1, 1)
endDate = pd.datetime(2020, 1, 1)
sn = 1
codeLst = usgs.newC

dirSel = os.path.join(kPath.dirData, 'USGS', 'inventory', 'siteSel')
with open(os.path.join(dirSel, 'dictRB_Y30N5.json')) as f:
    dictSite = json.load(f)
siteNoLst = dictSite['comb']
t0 = time.time()

dirRoot = os.path.join(kPath.dirWQ, 'modelStat', 'WRTDS-W')
dirOut = os.path.join(dirRoot, 'B10')
for folder in [dirRoot, dirOut]:
    if not os.path.exists(folder):
        os.mkdir(folder)
コード例 #50
0
ファイル: Ass3.py プロジェクト: zzkulala/Project-C
    'Long Weighted Return Tc', 'Short Weighted Return Tc'
]]  # Remove useless columns
dfRaw2 = pd.read_csv('dbo_famafrench.txt', delimiter='\t')
dfRaw3 = dfRaw2.loc[:, ['d', 'mktrf', 'smb', 'hml', 'rf', 'umd']]
# De-normalized table
#dfRaw1['Row Labels'].dt.date
dfRaw1['Row Labels'] = pd.to_datetime(dfRaw1['Row Labels'])
dfRaw1.set_index('Row Labels', inplace=True)
dfRaw3['d'] = dfRaw3['d'].str.split(expand=True)
dfRaw3['d'] = pd.to_datetime(dfRaw3['d'])
dfRaw3.set_index('d', inplace=True)

result = dfRaw1.join(dfRaw3, how='inner')

start = pd.datetime(2001, 01, 01)
end = pd.datetime(2004, 12, 31)
q2Data = result[(result.index >= start) & (result.index <= end)]


##Q2 (a)
def ComputeAnnualStat(portfolioType, startYear, yearLength):
    annualReturn = []
    for i in range(yearLength):
        start = pd.datetime(startYear + i, 01, 01)
        end = pd.datetime(startYear + i, 12, 31)
        annualData = result[(result.index >= start) & (result.index <= end)]
        annualReturn.append(annualData[portfolioType].sum())

    meanAnlReturn = np.average(annualReturn)
    anlVolatility = np.std(annualReturn)
    anlSharpe = meanAnlReturn / anlVolatility
コード例 #51
0
ファイル: TimeSeries.py プロジェクト: njxjtu/LearnPython
import pandas as pd
import numpy as np
import scipy as sp

print(pd.datetime.now())  # current date and time
print(pd.Timestamp('2017-03-01'))  # timestamp
print(pd.Timestamp(1587687255, unit='s'))
print(pd.date_range("11:00", "13:30", freq="30min").time)
print(pd.to_datetime(pd.Series(['Jul 31, 2009', '2010-01-10', None])))
print(pd.to_datetime(['2005/11/23', '2010.12.31', None]))
print(pd.date_range('1/1/2011', periods=5))
print(pd.date_range('1/1/2011', periods=5, freq='M'))
start = pd.datetime(2011, 1, 1)
end = pd.datetime(2011, 1, 5)
print(pd.date_range(start, end))

print(pd.Timedelta('2 days 2 hours 15 minutes 30 seconds'))
print(pd.Timedelta(6, unit='h'))
print(pd.Timedelta(days=2))

s = pd.Series(pd.date_range('2012-1-1', periods=3, freq='D'))
td = pd.Series([pd.Timedelta(days=i) for i in range(3)])
df = pd.DataFrame(dict(A=s, B=td))

print(df)
df['C'] = df['A'] + df['B']
print(df)
コード例 #52
0
def main(fobs_Esoil, fobs_vwc, fcable, case_name, ring, layer, ep_type):

    est_esoil = pd.read_csv(fobs_Esoil,
                            usecols=['Ring', 'Date', 'wuTP', 'EfloorPred'])
    est_esoil['Date'] = pd.to_datetime(est_esoil['Date'],
                                       format="%d/%m/%Y",
                                       infer_datetime_format=False)
    est_esoil['Date'] = est_esoil['Date'] - pd.datetime(2011, 12, 31)
    est_esoil['Date'] = est_esoil['Date'].dt.days
    est_esoil = est_esoil.sort_values(by=['Date'])
    # divide neo into groups
    if ring == 'amb':
        subset = est_esoil[(est_esoil['Ring'].isin(['R2', 'R3', 'R6']))
                           & (est_esoil.Date > 366)]
    elif ring == 'ele':
        subset = est_esoil[(est_esoil['Ring'].isin(['R1', 'R4', 'R5']))
                           & (est_esoil.Date > 366)]
    else:
        subset = est_esoil[(est_esoil['Ring'].isin([ring]))
                           & (est_esoil.Date > 366)]

    subset = subset.groupby(by=["Date"]).mean()
    #subset['wuTP']   = subset['wuTP'].replace(NA, float('nan'))
    subset['wuTP'] = subset['wuTP'].clip(lower=0.)
    subset['wuTP'] = subset['wuTP'].replace(0., float('nan'))
    #subset['EfloorPred'] = subset['EfloorPred'].replace(NA, float('nan'))
    subset['EfloorPred'] = subset['EfloorPred'].clip(lower=0.)
    subset['EfloorPred'] = subset['EfloorPred'].replace(0., float('nan'))
    #subset = subset.xs('swc.tdr', axis=1, drop_level=True)
    #print(subset)

    # tdr at 30cm depth
    tdr_30 = pd.read_csv(fobs_vwc, usecols=['Ring', 'Date', 'vwcMean'])
    tdr_30['Date'] = pd.to_datetime(tdr_30['Date'],
                                    format="%d/%m/%Y",
                                    infer_datetime_format=False)
    tdr_30['Date'] = tdr_30['Date'] - pd.datetime(2011, 12, 31)
    tdr_30['Date'] = tdr_30['Date'].dt.days
    tdr_30 = tdr_30.sort_values(by=['Date'])
    # divide neo into groups
    if ring == 'amb':
        subset1 = tdr_30[(tdr_30['Ring'].isin(['R2', 'R3', 'R6']))
                         & (tdr_30.Date > 366)]
    elif ring == 'ele':
        subset1 = tdr_30[(tdr_30['Ring'].isin(['R1', 'R4', 'R5']))
                         & (tdr_30.Date > 366)]
    else:
        subset1 = tdr_30[(tdr_30['Ring'].isin([ring])) & (tdr_30.Date > 366)]

    subset1 = subset1.groupby(by=["Date"]).mean()
    subset1['vwcMean'] = subset1['vwcMean'].clip(lower=0.)
    subset1['vwcMean'] = subset1['vwcMean'].replace(0., float('nan'))
    #subset1['vwcMean']   = subset1['wuTP'].replace('NA', float('nan'))
    #print(subset1)

    # _________________________ CABLE ___________________________
    cable = nc.Dataset(fcable, 'r')
    Time = nc.num2date(cable.variables['time'][:],
                       cable.variables['time'].units)
    SoilMoist = pd.DataFrame(cable.variables['SoilMoist'][:, 0, 0, 0],
                             columns=['SoilMoist'])

    if layer == "6":
        SoilMoist['SoilMoist'] = ( cable.variables['SoilMoist'][:,0,0,0]*0.022 \
                                 + cable.variables['SoilMoist'][:,1,0,0]*0.058 \
                                 + cable.variables['SoilMoist'][:,2,0,0]*0.154 \
                                 + cable.variables['SoilMoist'][:,3,0,0]*(0.5-0.022-0.058-0.154) )/0.5
    elif layer == "13":
        SoilMoist['SoilMoist'] = ( cable.variables['SoilMoist'][:,0,0,0]*0.02 \
                                 + cable.variables['SoilMoist'][:,1,0,0]*0.05 \
                                 + cable.variables['SoilMoist'][:,2,0,0]*0.06 \
                                 + cable.variables['SoilMoist'][:,3,0,0]*0.13 \
                                 + cable.variables['SoilMoist'][:,3,0,0]*(0.5-0.02-0.05-0.06-0.13) )/0.5
    elif layer == "31uni":
        SoilMoist['SoilMoist'] = ( cable.variables['SoilMoist'][:,0,0,0]*0.15 \
                                 + cable.variables['SoilMoist'][:,1,0,0]*0.15 \
                                 + cable.variables['SoilMoist'][:,2,0,0]*0.15 \
                                 + cable.variables['SoilMoist'][:,3,0,0]*0.05 )/0.5
    elif layer == "31exp":
        SoilMoist['SoilMoist'] = ( cable.variables['SoilMoist'][:,0,0,0]*0.020440 \
                                 + cable.variables['SoilMoist'][:,1,0,0]*0.001759 \
                                 + cable.variables['SoilMoist'][:,2,0,0]*0.003957 \
                                 + cable.variables['SoilMoist'][:,3,0,0]*0.007035 \
                                 + cable.variables['SoilMoist'][:,4,0,0]*0.010993 \
                                 + cable.variables['SoilMoist'][:,5,0,0]*0.015829 \
                                 + cable.variables['SoilMoist'][:,6,0,0]*0.021546 \
                                 + cable.variables['SoilMoist'][:,7,0,0]*0.028141 \
                                 + cable.variables['SoilMoist'][:,8,0,0]*0.035616 \
                                 + cable.variables['SoilMoist'][:,9,0,0]*0.043971 \
                                 + cable.variables['SoilMoist'][:,10,0,0]*0.053205 \
                                 + cable.variables['SoilMoist'][:,11,0,0]*0.063318 \
                                 + cable.variables['SoilMoist'][:,12,0,0]*0.074311 \
                                 + cable.variables['SoilMoist'][:,13,0,0]*0.086183 \
                                 + cable.variables['SoilMoist'][:,14,0,0]*(0.5-0.466304))/0.5
    elif layer == "31para":
        SoilMoist['SoilMoist'] = ( cable.variables['SoilMoist'][:,0,0,0]*0.020440 \
                                 + cable.variables['SoilMoist'][:,1,0,0]*0.001759 \
                                 + cable.variables['SoilMoist'][:,2,0,0]*0.003957 \
                                 + cable.variables['SoilMoist'][:,3,0,0]*0.007035 \
                                 + cable.variables['SoilMoist'][:,4,0,0]*0.010993 \
                                 + cable.variables['SoilMoist'][:,5,0,0]*0.015829 \
                                 + cable.variables['SoilMoist'][:,6,0,0]*(0.5-0.420714))/0.5

    SoilMoist['dates'] = Time
    SoilMoist = SoilMoist.set_index('dates')
    SoilMoist = SoilMoist.resample("D").agg('mean')
    SoilMoist.index = SoilMoist.index - pd.datetime(2011, 12, 31)
    SoilMoist.index = SoilMoist.index.days
    SoilMoist = SoilMoist.sort_values(by=['dates'])

    ESoil = pd.DataFrame(cable.variables['ESoil'][:, 0, 0], columns=['ESoil'])
    ESoil = ESoil * 1800.
    ESoil['dates'] = Time
    ESoil = ESoil.set_index('dates')
    ESoil = ESoil.resample("D").agg('sum')
    ESoil.index = ESoil.index - pd.datetime(2011, 12, 31)
    ESoil.index = ESoil.index.days
    #print(ESoil)

    if ep_type == 'PotEvap':
        Ep = pd.DataFrame(cable.variables['PotEvap'][:, 0, 0], columns=['Ep'])
        Ep = Ep * 1800.
        Ep['dates'] = Time
        Ep = Ep.set_index('dates')
        Ep = Ep.resample("D").agg('sum')
        Ep.index = Ep.index - pd.datetime(2011, 12, 31)
        Ep.index = Ep.index.days
        #print(Ep)
    elif ep_type == 'Rnet-G':
        Ep = pd.DataFrame(cable.variables['Rnet'][:, 0, 0] -
                          cable.variables['Qg'][:, 0, 0],
                          columns=['Ep'])
        Ep['dates'] = Time
        Ep = Ep.set_index('dates')
        Ep = Ep.resample("D").agg('mean')
        Ep.index = Ep.index - pd.datetime(2011, 12, 31)
        Ep.index = Ep.index.days
        print(Ep * 86400 / 2454000)

    Rainf = pd.DataFrame(cable.variables['Rainf'][:, 0, 0], columns=['Rainf'])
    Rainf = Rainf * 1800.
    Rainf['dates'] = Time
    Rainf = Rainf.set_index('dates')
    Rainf = Rainf.resample("D").agg('sum')
    Rainf.index = Rainf.index - pd.datetime(2011, 12, 31)
    Rainf.index = Rainf.index.days

    rain = Rainf['Rainf'].loc[np.all(
        [Rainf.index.isin(subset.index),
         Rainf.index.isin(subset1.index)],
        axis=0)]
    esoil = ESoil['ESoil'].loc[np.all(
        [ESoil.index.isin(subset.index),
         ESoil.index.isin(subset1.index)],
        axis=0)]
    ep = Ep['Ep'].loc[np.all(
        [Ep.index.isin(subset.index),
         Ep.index.isin(subset1.index)], axis=0)]
    soilmoist = SoilMoist['SoilMoist'].loc[np.all([
        SoilMoist.index.isin(subset.index),
        SoilMoist.index.isin(subset1.index)
    ],
                                                  axis=0)]

    wuTP = subset['wuTP'].loc[np.all(
        [subset.index.isin(subset1.index),
         subset.index.isin(SoilMoist.index)],
        axis=0)]
    EfloorPred = subset['EfloorPred'].loc[np.all(
        [subset.index.isin(subset1.index),
         subset.index.isin(SoilMoist.index)],
        axis=0)]
    vwcMean = subset1['vwcMean'].loc[subset1.index.isin(subset.index)]
    #.loc[np.all([subset1.index.isin(subset.index), subset.index.isin(SoilMoist.index)],axis=0)]

    # exclude tdr soilmoisture < 0 or tdr esoil < 0
    mask = np.any([np.isnan(wuTP), np.isnan(vwcMean)], axis=0)
    print(mask)
    rain = rain[mask == False]
    esoil = esoil[mask == False]
    ep = ep[mask == False]
    soilmoist = soilmoist[mask == False]
    wuTP = wuTP[mask == False]
    EfloorPred = EfloorPred[mask == False]
    vwcMean = vwcMean[mask == False]
    print("any(rain>0.)")
    print(np.any(rain > 0.))

    # exclude rainday and the after two days of rain
    mask = np.ones((len(rain)), dtype=bool)
    #print(rain)
    if rain.values[0] > 0.:
        mask[0] = False
    if rain.values[0] > 0. or rain.values[1] > 0.:
        mask[1] = False
    for i in np.arange(2, len(rain)):
        if rain.values[i] > 0. or rain.values[i -
                                              1] > 0. or rain.values[i -
                                                                     2] > 0.:
            mask[i] = False
    rain = rain[mask == True]
    esoil = esoil[mask == True]
    ep = ep[mask == True]
    soilmoist = soilmoist[mask == True]
    wuTP = wuTP[mask == True]
    EfloorPred = EfloorPred[mask == True]
    vwcMean = vwcMean[mask == True]
    print("any(rain>0.)")
    print(np.any(rain > 0.))

    # exclude the days Rnet < 0.
    ep = ep.clip(lower=0.)
    ep = ep.replace(0., float('nan'))
    mask = np.isnan(ep)

    esoil = esoil[mask == False]
    ep = ep[mask == False]
    soilmoist = soilmoist[mask == False]
    wuTP = wuTP[mask == False]
    EfloorPred = EfloorPred[mask == False]
    vwcMean = vwcMean[mask == False]

    if ep_type == 'PotEvap':
        rate = esoil / ep
        rate_tdr = wuTP / ep  #wuTP/ep
    elif ep_type == 'Rnet-G':
        rate = esoil / (ep * 86400 / 2454000)
        rate_tdr = wuTP / (ep * 86400 / 2454000)

    print("-------------------------------------------------")
    print(np.any(esoil < 0.))
    print(np.any(ep < 0.))
    print(np.any(soilmoist < 0.))
    print(np.any(wuTP < 0.))
    print(np.any(vwcMean < 0.))

    print(esoil)
    print(ep)
    print(soilmoist)
    print(wuTP)
    print(vwcMean)
    print(rate)
    print(rate_tdr)
    print("-------------------------------------------------")

    # ____________________ Plot obs _______________________
    fig = plt.figure(figsize=[15, 10])
    fig.subplots_adjust(hspace=0.1)
    fig.subplots_adjust(wspace=0.05)
    plt.rcParams['text.usetex'] = False
    plt.rcParams['font.family'] = "sans-serif"
    plt.rcParams['font.sans-serif'] = "Helvetica"
    plt.rcParams['axes.labelsize'] = 14
    plt.rcParams['font.size'] = 14
    plt.rcParams['legend.fontsize'] = 10
    plt.rcParams['xtick.labelsize'] = 14
    plt.rcParams['ytick.labelsize'] = 14

    almost_black = '#262626'
    # change the tick colors also to the almost black
    plt.rcParams['ytick.color'] = almost_black
    plt.rcParams['xtick.color'] = almost_black

    # change the text colors also to the almost black
    plt.rcParams['text.color'] = almost_black

    # Change the default axis colors from black to a slightly lighter black,
    # and a little thinner (0.5 instead of 1)
    plt.rcParams['axes.edgecolor'] = almost_black
    plt.rcParams['axes.labelcolor'] = almost_black

    ax1 = fig.add_subplot(111)

    ax1.scatter(soilmoist, rate, s=2, marker='o', c='orange')
    ax1.scatter(vwcMean, rate_tdr, s=2, marker='o', c='green')
    ax1.set_xlim(0., 0.4)
    ax1.set_ylim(0., 1.)

    fig.savefig("EucFACE_Esoil_E0_theta_Gimeno-tdr_%s_%s_%s.png" \
                    % (ep_type, case_name, ring), bbox_inches='tight', pad_inches=0.1)
コード例 #53
0
 def remove_outlier(self):
     idx = pd.datetime(2017, 4, 15, 23)
     self.data.drop(index=idx, inplace=True)
     self.feat.drop(index=idx, inplace=True)
コード例 #54
0
ファイル: market_flow.py プロジェクト: theMLtrader/AlphaPy
def main(args=None):
    r"""MarketFlow Main Program

    Notes
    -----
    (1) Initialize logging.
    (2) Parse the command line arguments.
    (3) Get the market configuration.
    (4) Get the model configuration.
    (5) Create the model object.
    (6) Call the main MarketFlow pipeline.

    Raises
    ------
    ValueError
        Training date must be before prediction date.

    """

    # Logging

    logging.basicConfig(format="[%(asctime)s] %(levelname)s\t%(message)s",
                        filename="market_flow.log",
                        filemode='a',
                        level=logging.DEBUG,
                        datefmt='%m/%d/%y %H:%M:%S')
    formatter = logging.Formatter("[%(asctime)s] %(levelname)s\t%(message)s",
                                  datefmt='%m/%d/%y %H:%M:%S')
    console = logging.StreamHandler()
    console.setFormatter(formatter)
    console.setLevel(logging.INFO)
    logging.getLogger().addHandler(console)

    # Start the pipeline

    logger.info('*' * 80)
    logger.info("MarketFlow Start")
    logger.info('*' * 80)

    # Argument Parsing

    parser = argparse.ArgumentParser(description="MarketFlow Parser")
    parser.add_argument('--pdate',
                        dest='predict_date',
                        help="prediction date is in the format: YYYY-MM-DD",
                        required=False,
                        type=valid_date)
    parser.add_argument('--tdate',
                        dest='train_date',
                        help="training date is in the format: YYYY-MM-DD",
                        required=False,
                        type=valid_date)
    parser.add_mutually_exclusive_group(required=False)
    parser.add_argument('--predict', dest='predict_mode', action='store_true')
    parser.add_argument('--train', dest='predict_mode', action='store_false')
    parser.set_defaults(predict_mode=False)
    args = parser.parse_args()

    # Set train and predict dates

    if args.train_date:
        train_date = args.train_date
    else:
        train_date = pd.datetime(1900, 1, 1).strftime("%Y-%m-%d")

    if args.predict_date:
        predict_date = args.predict_date
    else:
        predict_date = datetime.date.today().strftime("%Y-%m-%d")

    # Verify that the dates are in sequence.

    if train_date >= predict_date:
        raise ValueError("Training date must be before prediction date")
    else:
        logger.info("Training Date: %s", train_date)
        logger.info("Prediction Date: %s", predict_date)

    # Read stock configuration file
    market_specs = get_market_config()

    # Read model configuration file

    model_specs = get_model_config()
    model_specs['predict_mode'] = args.predict_mode
    model_specs['predict_date'] = predict_date
    model_specs['train_date'] = train_date

    # Create directories if necessary

    output_dirs = [
        'config', 'data', 'input', 'model', 'output', 'plots', 'systems'
    ]
    for od in output_dirs:
        output_dir = SSEP.join([model_specs['directory'], od])
        if not os.path.exists(output_dir):
            logger.info("Creating directory %s", output_dir)
            os.makedirs(output_dir)

    # Create a model from the arguments

    logger.info("Creating Model")
    model = Model(model_specs)

    # Start the pipeline
    model = market_pipeline(model, market_specs)

    # Complete the pipeline

    logger.info('*' * 80)
    logger.info("MarketFlow End")
    logger.info('*' * 80)
コード例 #55
0
    print 'idx_lon: ', idx_lon, 'NetCDF lon: ', val_lon, 'given lon: ', target_lon
    print 'norm: ', least_norm
    print '\n'

    return idx_lat, idx_lon, val_lat, val_lon


if __name__ == "__main__":
    # Target lat/lon (CHANGE THIS)
    target_lat = -41.26101779
    target_lon = 148.166736

    # Generate filenames and read it to formulate dataframes (CHANGE THIS)
    path_to_file = '//home//thorweather//gfs_files//nc//'

    start_date = pd.datetime(2015, 1, 15)  #YYYY,month,day
    end_date = pd.datetime(2017, 3, 25)  #YYYY,month,day
    utc_datetime_range = pd.date_range(start=start_date,
                                       end=end_date,
                                       freq='6H')
    start_of_loop = 1

    for date_time in utc_datetime_range:

        # Generate filename (We can do it from reading netcdf directly)
        str_year = str(date_time.year)
        str_month = str('%02d' % (date_time.month))
        str_day = str('%02d' % (date_time.day))
        str_fcst_hour = str('%02d' % (date_time.hour))
        netcdf_filename = 'gfs.0p25.' + str_year + str_month + str_day + str_fcst_hour + '.f000.grib2.abrie233580.nc'
        file_path = path_to_file + netcdf_filename
コード例 #56
0
ファイル: test_datetime.py プロジェクト: mikedeltalima/pandas
    def test_comparisons_nat(self):

        fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0])
        fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0])

        didx1 = pd.DatetimeIndex([
            '2014-01-01', pd.NaT, '2014-03-01', pd.NaT, '2014-05-01',
            '2014-07-01'
        ])
        didx2 = pd.DatetimeIndex([
            '2014-02-01', '2014-03-01', pd.NaT, pd.NaT, '2014-06-01',
            '2014-07-01'
        ])
        darr = np.array([
            np_datetime64_compat('2014-02-01 00:00Z'),
            np_datetime64_compat('2014-03-01 00:00Z'),
            np_datetime64_compat('nat'),
            np.datetime64('nat'),
            np_datetime64_compat('2014-06-01 00:00Z'),
            np_datetime64_compat('2014-07-01 00:00Z')
        ])

        if _np_version_under1p8:
            # cannot test array because np.datetime('nat') returns today's date
            cases = [(fidx1, fidx2), (didx1, didx2)]
        else:
            cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)]

        # Check pd.NaT is handles as the same as np.nan
        with tm.assert_produces_warning(None):
            for idx1, idx2 in cases:

                result = idx1 < idx2
                expected = np.array([True, False, False, False, True, False])
                tm.assert_numpy_array_equal(result, expected)

                result = idx2 > idx1
                expected = np.array([True, False, False, False, True, False])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 <= idx2
                expected = np.array([True, False, False, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx2 >= idx1
                expected = np.array([True, False, False, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 == idx2
                expected = np.array([False, False, False, False, False, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 != idx2
                expected = np.array([True, True, True, True, True, False])
                tm.assert_numpy_array_equal(result, expected)

        with tm.assert_produces_warning(None):
            for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]:
                result = idx1 < val
                expected = np.array([False, False, False, False, False, False])
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 > val
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 <= val
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 >= val
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 == val
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 != val
                expected = np.array([True, True, True, True, True, True])
                tm.assert_numpy_array_equal(result, expected)

        # Check pd.NaT is handles as the same as np.nan
        with tm.assert_produces_warning(None):
            for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]:
                result = idx1 < val
                expected = np.array([True, False, False, False, False, False])
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 > val
                expected = np.array([False, False, False, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 <= val
                expected = np.array([True, False, True, False, False, False])
                tm.assert_numpy_array_equal(result, expected)
                result = idx1 >= val
                expected = np.array([False, False, True, False, True, True])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 == val
                expected = np.array([False, False, True, False, False, False])
                tm.assert_numpy_array_equal(result, expected)

                result = idx1 != val
                expected = np.array([True, True, False, True, True, True])
                tm.assert_numpy_array_equal(result, expected)
コード例 #57
0
import pysat
import pandas as pds
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt

# dates for demo
ssnDays = 67
startDate = pds.datetime(2009, 12, 21) - pds.DateOffset(days=ssnDays)
stopDate = pds.datetime(2009, 12, 21) + pds.DateOffset(days=ssnDays)


# define functions to customize data for application
def geo2mag(incoord):
    """geographic coordinate to magnetic coordinate (coarse):

    Parameters
    ----------
    incoord : numpy.array of shape (2,*)
        array([[glat0,glat1,glat2,...],[glon0,glon1,glon2,...]), 
        where glat, glon are geographic latitude and longitude
        (or if you have only one point it is [[glat,glon]]).

    Warnings
    --------
    Calculation of geomagnetic coordinates is approximate.
    Coordinates are for a geomagnetic dipole, not the full field.
    Location of geomagnetic dipole set for 2010.
    
    Returns
    -------    
コード例 #58
0
ファイル: test_misc.py プロジェクト: wakamori/pandas
    def test_datetimeindex_accessors(self):

        dti_naive = DatetimeIndex(freq='D',
                                  start=datetime(1998, 1, 1),
                                  periods=365)
        # GH 13303
        dti_tz = DatetimeIndex(freq='D',
                               start=datetime(1998, 1, 1),
                               periods=365,
                               tz='US/Eastern')
        for dti in [dti_naive, dti_tz]:

            self.assertEqual(dti.year[0], 1998)
            self.assertEqual(dti.month[0], 1)
            self.assertEqual(dti.day[0], 1)
            self.assertEqual(dti.hour[0], 0)
            self.assertEqual(dti.minute[0], 0)
            self.assertEqual(dti.second[0], 0)
            self.assertEqual(dti.microsecond[0], 0)
            self.assertEqual(dti.dayofweek[0], 3)

            self.assertEqual(dti.dayofyear[0], 1)
            self.assertEqual(dti.dayofyear[120], 121)

            self.assertEqual(dti.weekofyear[0], 1)
            self.assertEqual(dti.weekofyear[120], 18)

            self.assertEqual(dti.quarter[0], 1)
            self.assertEqual(dti.quarter[120], 2)

            self.assertEqual(dti.days_in_month[0], 31)
            self.assertEqual(dti.days_in_month[90], 30)

            self.assertEqual(dti.is_month_start[0], True)
            self.assertEqual(dti.is_month_start[1], False)
            self.assertEqual(dti.is_month_start[31], True)
            self.assertEqual(dti.is_quarter_start[0], True)
            self.assertEqual(dti.is_quarter_start[90], True)
            self.assertEqual(dti.is_year_start[0], True)
            self.assertEqual(dti.is_year_start[364], False)
            self.assertEqual(dti.is_month_end[0], False)
            self.assertEqual(dti.is_month_end[30], True)
            self.assertEqual(dti.is_month_end[31], False)
            self.assertEqual(dti.is_month_end[364], True)
            self.assertEqual(dti.is_quarter_end[0], False)
            self.assertEqual(dti.is_quarter_end[30], False)
            self.assertEqual(dti.is_quarter_end[89], True)
            self.assertEqual(dti.is_quarter_end[364], True)
            self.assertEqual(dti.is_year_end[0], False)
            self.assertEqual(dti.is_year_end[364], True)

            # GH 11128
            self.assertEqual(dti.weekday_name[4], u'Monday')
            self.assertEqual(dti.weekday_name[5], u'Tuesday')
            self.assertEqual(dti.weekday_name[6], u'Wednesday')
            self.assertEqual(dti.weekday_name[7], u'Thursday')
            self.assertEqual(dti.weekday_name[8], u'Friday')
            self.assertEqual(dti.weekday_name[9], u'Saturday')
            self.assertEqual(dti.weekday_name[10], u'Sunday')

            self.assertEqual(Timestamp('2016-04-04').weekday_name, u'Monday')
            self.assertEqual(Timestamp('2016-04-05').weekday_name, u'Tuesday')
            self.assertEqual(
                Timestamp('2016-04-06').weekday_name, u'Wednesday')
            self.assertEqual(Timestamp('2016-04-07').weekday_name, u'Thursday')
            self.assertEqual(Timestamp('2016-04-08').weekday_name, u'Friday')
            self.assertEqual(Timestamp('2016-04-09').weekday_name, u'Saturday')
            self.assertEqual(Timestamp('2016-04-10').weekday_name, u'Sunday')

            self.assertEqual(len(dti.year), 365)
            self.assertEqual(len(dti.month), 365)
            self.assertEqual(len(dti.day), 365)
            self.assertEqual(len(dti.hour), 365)
            self.assertEqual(len(dti.minute), 365)
            self.assertEqual(len(dti.second), 365)
            self.assertEqual(len(dti.microsecond), 365)
            self.assertEqual(len(dti.dayofweek), 365)
            self.assertEqual(len(dti.dayofyear), 365)
            self.assertEqual(len(dti.weekofyear), 365)
            self.assertEqual(len(dti.quarter), 365)
            self.assertEqual(len(dti.is_month_start), 365)
            self.assertEqual(len(dti.is_month_end), 365)
            self.assertEqual(len(dti.is_quarter_start), 365)
            self.assertEqual(len(dti.is_quarter_end), 365)
            self.assertEqual(len(dti.is_year_start), 365)
            self.assertEqual(len(dti.is_year_end), 365)
            self.assertEqual(len(dti.weekday_name), 365)

            dti.name = 'name'

            # non boolean accessors -> return Index
            for accessor in [
                    'year', 'month', 'day', 'hour', 'minute', 'second',
                    'microsecond', 'nanosecond', 'dayofweek', 'dayofyear',
                    'weekofyear', 'quarter', 'weekday_name'
            ]:
                res = getattr(dti, accessor)
                assert len(res) == 365
                assert isinstance(res, Index)
                assert res.name == 'name'

            # boolean accessors -> return array
            for accessor in [
                    'is_month_start', 'is_month_end', 'is_quarter_start',
                    'is_quarter_end', 'is_year_start', 'is_year_end'
            ]:
                res = getattr(dti, accessor)
                assert len(res) == 365
                assert isinstance(res, np.ndarray)

            # test boolean indexing
            res = dti[dti.is_quarter_start]
            exp = dti[[0, 90, 181, 273]]
            tm.assert_index_equal(res, exp)
            res = dti[dti.is_leap_year]
            exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name')
            tm.assert_index_equal(res, exp)

        dti = DatetimeIndex(freq='BQ-FEB',
                            start=datetime(1998, 1, 1),
                            periods=4)

        self.assertEqual(sum(dti.is_quarter_start), 0)
        self.assertEqual(sum(dti.is_quarter_end), 4)
        self.assertEqual(sum(dti.is_year_start), 0)
        self.assertEqual(sum(dti.is_year_end), 1)

        # Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
        # CBD requires np >= 1.7
        bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu')
        dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
        self.assertRaises(ValueError, lambda: dti.is_month_start)

        dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'])

        self.assertEqual(dti.is_month_start[0], 1)

        tests = [(Timestamp('2013-06-01', freq='M').is_month_start, 1),
                 (Timestamp('2013-06-01', freq='BM').is_month_start, 0),
                 (Timestamp('2013-06-03', freq='M').is_month_start, 0),
                 (Timestamp('2013-06-03', freq='BM').is_month_start, 1),
                 (Timestamp('2013-02-28', freq='Q-FEB').is_month_end, 1),
                 (Timestamp('2013-02-28', freq='Q-FEB').is_quarter_end, 1),
                 (Timestamp('2013-02-28', freq='Q-FEB').is_year_end, 1),
                 (Timestamp('2013-03-01', freq='Q-FEB').is_month_start, 1),
                 (Timestamp('2013-03-01', freq='Q-FEB').is_quarter_start, 1),
                 (Timestamp('2013-03-01', freq='Q-FEB').is_year_start, 1),
                 (Timestamp('2013-03-31', freq='QS-FEB').is_month_end, 1),
                 (Timestamp('2013-03-31', freq='QS-FEB').is_quarter_end, 0),
                 (Timestamp('2013-03-31', freq='QS-FEB').is_year_end, 0),
                 (Timestamp('2013-02-01', freq='QS-FEB').is_month_start, 1),
                 (Timestamp('2013-02-01', freq='QS-FEB').is_quarter_start, 1),
                 (Timestamp('2013-02-01', freq='QS-FEB').is_year_start, 1),
                 (Timestamp('2013-06-30', freq='BQ').is_month_end, 0),
                 (Timestamp('2013-06-30', freq='BQ').is_quarter_end, 0),
                 (Timestamp('2013-06-30', freq='BQ').is_year_end, 0),
                 (Timestamp('2013-06-28', freq='BQ').is_month_end, 1),
                 (Timestamp('2013-06-28', freq='BQ').is_quarter_end, 1),
                 (Timestamp('2013-06-28', freq='BQ').is_year_end, 0),
                 (Timestamp('2013-06-30', freq='BQS-APR').is_month_end, 0),
                 (Timestamp('2013-06-30', freq='BQS-APR').is_quarter_end, 0),
                 (Timestamp('2013-06-30', freq='BQS-APR').is_year_end, 0),
                 (Timestamp('2013-06-28', freq='BQS-APR').is_month_end, 1),
                 (Timestamp('2013-06-28', freq='BQS-APR').is_quarter_end, 1),
                 (Timestamp('2013-03-29', freq='BQS-APR').is_year_end, 1),
                 (Timestamp('2013-11-01', freq='AS-NOV').is_year_start, 1),
                 (Timestamp('2013-10-31', freq='AS-NOV').is_year_end, 1),
                 (Timestamp('2012-02-01').days_in_month, 29),
                 (Timestamp('2013-02-01').days_in_month, 28)]

        for ts, value in tests:
            self.assertEqual(ts, value)
コード例 #59
0
import numpy as np
import matplotlib.dates as mdates
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
from scipy.stats.stats import pearsonr
from global_functions import read_datatxt
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.rcParams['xtick.labelsize'] = 13
plt.rcParams['ytick.labelsize'] = 13
plt.rcParams['axes.labelsize'] = 16
plt.rcParams['axes.titlesize'] = 16

# Rn:
mdnRnA = np.loadtxt('../../../mdnRnA.txt', delimiter=',')
neuron = [64, 32]
startday = pd.datetime(2013, 7, 1)

## Plot:
CNN_loss = np.loadtxt('./CNN_Loss_Rn_{}_{}.txt'.format(neuron[0], neuron[1]),
                      delimiter=',')
train_loss = CNN_loss[0]
test_loss = CNN_loss[1]
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 6))
plt.plot(train_loss)
plt.plot(test_loss)
plt.ylabel('loss', fontsize=14)
plt.xlabel('epoch', fontsize=14)
plt.legend(['train', 'test'], loc='upper right')
plt.tight_layout()
plt.savefig('./CNN_Loss_Rn_{}_{}.pdf'.format(neuron[0], neuron[1]))
コード例 #60
0
 def el_to_dt(cell):
     yr = int(cell / 10000) + 1900
     mth = cell - int(cell / 10000) * 10000
     mth = int(mth / 100)
     day = cell - int(cell / 100) * 100
     return pd.datetime(yr, mth, day).date()