Example #1
0
    def test_empty_with_nrows_chunksize(self):
        # see gh-9535
        expected = DataFrame([], columns=['foo', 'bar'])
        result = self.read_csv(StringIO('foo,bar\n'), nrows=10)
        tm.assert_frame_equal(result, expected)

        result = next(iter(self.read_csv(
            StringIO('foo,bar\n'), chunksize=10)))
        tm.assert_frame_equal(result, expected)

        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False):
            result = self.read_csv(StringIO('foo,bar\n'),
                                   nrows=10, as_recarray=True)
            result = DataFrame(result[2], columns=result[1],
                               index=result[0])
            tm.assert_frame_equal(DataFrame.from_records(
                result), expected, check_index_type=False)

        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False):
            result = next(iter(self.read_csv(StringIO('foo,bar\n'),
                                             chunksize=10, as_recarray=True)))
            result = DataFrame(result[2], columns=result[1], index=result[0])
            tm.assert_frame_equal(DataFrame.from_records(result), expected,
                                  check_index_type=False)
Example #2
0
def convert_to_dataframe(array):

    def get_nonscalar_columns(array):
        first_row = array[0]
        bad_cols = np.array([x.ndim != 0 for x in first_row])
        col_names = np.array(array.dtype.names)
        bad_names = col_names[bad_cols]
        if not bad_names.size == 0:
            warnings.warn("Ignored the following non-scalar branches: {bad_names}"
                          .format(bad_names=", ".join(bad_names)), UserWarning)
        return list(bad_names)

    nonscalar_columns = get_nonscalar_columns(array)
    indices = list(filter(lambda x: x.startswith('__index__') and x not in nonscalar_columns, array.dtype.names))
    if len(indices) == 0:
        df = DataFrame.from_records(array, exclude=nonscalar_columns)
    elif len(indices) == 1:
        # We store the index under the __index__* branch, where
        # * is the name of the index
        df = DataFrame.from_records(array, index=indices[0], exclude=nonscalar_columns)
        index_name = indices[0][len('__index__'):]
        if not index_name:
            # None means the index has no name
            index_name = None
        df.index.name = index_name
    else:
        raise ValueError("More than one index found in file")
    return df
Example #3
0
    def get_pf_items(self):
        """Returns a tuple of 4 elements which can be used for further processing with
          ``pyfolio``

          returns, positions, transactions, gross_leverage

        Because the objects are meant to be used as direct input to ``pyfolio``
        this method makes a local import of ``pandas`` to convert the internal
        *backtrader* results to *pandas DataFrames* which is the expected input
        by, for example, ``pyfolio.create_full_tear_sheet``

        The method will break if ``pandas`` is not installed
        """
        # keep import local to avoid disturbing installations with no pandas
        import pandas
        from pandas import DataFrame as DF

        #
        # Returns
        cols = ["index", "return"]
        returns = DF.from_records(iteritems(self.rets["returns"]), index=cols[0], columns=cols)
        returns.index = pandas.to_datetime(returns.index)
        returns.index = returns.index.tz_localize("UTC")
        rets = returns["return"]
        #
        # Positions
        pss = self.rets["positions"]
        ps = [[k] + v for k, v in iteritems(pss)]
        cols = ps.pop(0)  # headers are in the first entry
        positions = DF.from_records(ps, index=cols[0], columns=cols)
        positions.index = pandas.to_datetime(positions.index)
        positions.index = positions.index.tz_localize("UTC")

        #
        # Transactions
        txss = self.rets["transactions"]
        txs = list()
        # The transactions have a common key (date) and can potentially happend
        # for several assets. The dictionary has a single key and a list of
        # lists. Each sublist contains the fields of a transaction
        # Hence the double loop to undo the list indirection
        for k, v in iteritems(txss):
            for v2 in v:
                txs.append([k] + v2)

        cols = txs.pop(0)  # headers are in the first entry
        transactions = DF.from_records(txs, index=cols[0], columns=cols)
        transactions.index = pandas.to_datetime(transactions.index)
        transactions.index = transactions.index.tz_localize("UTC")

        # Gross Leverage
        cols = ["index", "gross_lev"]
        gross_lev = DF.from_records(iteritems(self.rets["gross_lev"]), index=cols[0], columns=cols)

        gross_lev.index = pandas.to_datetime(gross_lev.index)
        gross_lev.index = gross_lev.index.tz_localize("UTC")
        glev = gross_lev["gross_lev"]

        # Return all together
        return rets, positions, transactions, glev
def main(args):
    if len(args) != 3:
        usage()
    
    in_scores_filename = args[0]
    out_scores_filename = get_out_filename(in_scores_filename)
    
    if os.path.exists(out_scores_filename):
        print >> sys.stderr, 'Error: output file "{0}" already exists' % out_scores_filename
        usage()
    
    in_texts_filename = args[1]
    out_texts_filename = get_out_filename(in_texts_filename)
    
    if os.path.exists(out_texts_filename):
        print >> sys.stderr, 'Error: output file "{0}" already exists' % out_texts_filename
        usage()
    
    n_pages = int(args[2])
    
    if n_pages > 1000:
        print 'Note: num. pages capped at 1000'
        n_pages = 1000
    
    from_date = find_from_date(in_scores_filename, in_texts_filename)
    
    scores, texts = get_questions(n_pages, from_date)
    
    scores_df = DataFrame.from_records(scores)
    scores_df = scores_df.set_index('question_id')
    scores_df = scores_df.to_csv(out_scores_filename, encoding = 'UTF-8')
    
    texts_df = DataFrame.from_records(texts)
    texts_df = texts_df.set_index('question_id', verify_integrity = True)
    texts_df = texts_df.to_csv(out_texts_filename, encoding = 'UTF-8')
Example #5
0
    def test_fromRecords_toRecords(self):
        # structured array
        K = 10

        recs = np.zeros(K, dtype='O,O,f8,f8')
        recs['f0'] = range(K // 2) * 2
        recs['f1'] = np.arange(K) / (K // 2)
        recs['f2'] = np.arange(K) * 2
        recs['f3'] = np.arange(K)

        lp = LongPanel.fromRecords(recs, 'f0', 'f1')
        self.assertEqual(len(lp.items), 2)

        lp = LongPanel.fromRecords(recs, 'f0', 'f1', exclude=['f2'])
        self.assertEqual(len(lp.items), 1)

        torecs = lp.toRecords()
        self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2)

        # DataFrame
        df = DataFrame.from_records(recs)
        lp = LongPanel.fromRecords(df, 'f0', 'f1', exclude=['f2'])
        self.assertEqual(len(lp.items), 1)

        # dict of arrays
        series = DataFrame.from_records(recs)._series
        lp = LongPanel.fromRecords(series, 'f0', 'f1', exclude=['f2'])
        self.assertEqual(len(lp.items), 1)
        self.assert_('f2' in series)

        self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)),
                          0, 1)
Example #6
0
    def test_fromRecords_toRecords(self):
        # structured array
        K = 10

        recs = np.zeros(K, dtype="O,O,f8,f8")
        recs["f0"] = range(K // 2) * 2
        recs["f1"] = np.arange(K) / (K // 2)
        recs["f2"] = np.arange(K) * 2
        recs["f3"] = np.arange(K)

        lp = LongPanel.fromRecords(recs, "f0", "f1")
        self.assertEqual(len(lp.items), 2)

        lp = LongPanel.fromRecords(recs, "f0", "f1", exclude=["f2"])
        self.assertEqual(len(lp.items), 1)

        torecs = lp.toRecords()
        self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2)

        # DataFrame
        df = DataFrame.from_records(recs)
        lp = LongPanel.fromRecords(df, "f0", "f1", exclude=["f2"])
        self.assertEqual(len(lp.items), 1)

        # dict of arrays
        series = DataFrame.from_records(recs)._series
        lp = LongPanel.fromRecords(series, "f0", "f1", exclude=["f2"])
        self.assertEqual(len(lp.items), 1)
        self.assert_("f2" in series)

        self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)), 0, 1)
def get_data_with_countries(year_of_color=1990, stat_code='WNTI_%', palette=None):  # nopep8
    if not palette:
        palette = WATER_COLOR_RANGE

    # Get the countries data frame
    countries = Country.objects.exclude(boundary='')
    countries = countries.filter(region__in=[1, 2, 3, 6, 7])  # Africa only
    countries = countries.values('name', 'boundary', 'id')
    countries_df = DataFrame.from_records(countries)
    countries_df['xs'], countries_df['ys'] = build_coords_lists(countries_df['boundary'])  # nopep8

    # Get the stats for access to water
    stats = StatValue.objects.filter(description__code=stat_code)
    stats = stats.values('value', 'year', 'country_id')
    stats_df = DataFrame.from_records(stats, coerce_float=True)

    # Pivot it before merging
    pivot_df = stats_df.pivot(columns='year', index='country_id', values='value')  # nopep8
    pivot_df['id'] = pivot_df.index

    # Merge the countries and stats together
    merged_df = merge(countries_df, pivot_df, how='left')
    merged_df = merged_df.fillna(value=-99)

    # Color it
    colored_df = update_active_data(merged_df, year_of_color, palette)

    # Otherwise things are sad!
    colored_df.columns = colored_df.columns.astype('str')
    return colored_df
Example #8
0
 def test_multiindex_dtype(self):
     df1 = DataFrame.from_records(
         {'a': [1, 2], 'b': [2.1, 1.5],
          'c': ['l1', 'l2']}, index=['a', 'b'])
     df2 = DataFrame.from_records(
         {'a': [1.0, 2.0], 'b': [2.1, 1.5],
          'c': ['l1', 'l2']}, index=['a', 'b'])
     self._assert_not_equal(df1, df2, check_index_type=True)
def stepfunction(dblstart, dblend, dbllaunch, dbltimenow, dblinterval=1/12):
    case1 = dbltimenow[(dbltimenow + dblinterval) <= dbllaunch]
    case2 = dbltimenow[dbltimenow >= dbllaunch]
    step_function_case1 = DataFrame.from_records(np.zeros([1, case1.shape[0]]), index=None, exclude=None, columns=case1)
    step_function_case2 = DataFrame.from_records(np.zeros([1, case2.shape[0]]), index=None, exclude=None, columns=case2)
    step_function_case1[:] = dblstart * dblinterval
    step_function_case2[:] = dblend * dblinterval
    step_function = step_function_case1.add(step_function_case2, fill_value=0)
    return step_function
Example #10
0
def convert_to_dataframe(array):
    """
    Creates a DataFrame from a structured array.
    Currently, this creates a copy of the data.
    """
    if 'index' in array.dtype.names:
        df = DataFrame.from_records(array, index='index')
    else:
        df = DataFrame.from_records(array)
    return df
Example #11
0
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(np.dtype([('year', 'i8'),
                               ('quarter', 'i8')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    ptesting.assert_frame_equal(dta.reset_index(), DataFrame.from_records(dta2))
Example #12
0
 def get_history_data(self, code, year, season):
     """
     新浪历史复权数据接口
     """
     res = self.session.get(url=URL_HISTORY_DATA(code, year, season))
     if res.status_code == 200:
         pattern_data = r'<div align="center">([\d\.]+)</div>'
         data = re.findall(pattern_data, res.text)
         records = util.slice_list(step=7, data_list=data)
         print(records)
         df = DataFrame.from_records(
             records,
             columns=[
                 'open',
                 'high',
                 'close',
                 'low',
                 'volume',
                 'amount',
                 'restoration_factor'
             ]
         )
         pattern_date = r'date=([\d]{4}-[\d]{2}-[\d]{2})'
         date = re.findall(pattern_date, res.text)
         df["date"] = date
         return df
     else:
         self.logger.debug("Status Code: {}".format(res.status_code))
         return False
Example #13
0
def test_datetime_roundtrip():
    dta = np.array([(1, datetime(2010, 1, 1), 2),
                    (2, datetime(2010, 2, 1), 3),
                    (4, datetime(2010, 3, 1), 5)],
                    dtype=[('var1', float), ('var2', object), ('var3', float)])
    buf = BytesIO()

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta, {"var2" : "tm"})

    writer.write_file()
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf)

    assert_equal(dta, dta2)

    dta = DataFrame.from_records(dta)
    buf = BytesIO()

    with pytest.warns(FutureWarning):
        writer = StataWriter(buf, dta, {"var2" : "tm"})

    writer.write_file()
    buf.seek(0)

    with pytest.warns(FutureWarning):
        dta2 = genfromdta(buf, pandas=True)

    ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
Example #14
0
 def generate_dataframe(self, symbols=None, date_index = None):
     """
     Generate a dataframe consisting of the currency prices (specified by symbols)
     from the start to end date
     """
     
     # Set defaults if necessary
     if symbols == None:
         symbols = Currency.objects.all().values_list('symbol')
     try:
         assert(date_index != None)
         assert(len(date_index > 0))
     except:
         start_date = date(2005,1,1)
         end_date = date.today()    
         date_index = date_range(start_date, end_date)
     
     currency_price_data = CurrencyPrices.objects.filter(currency__symbol__in=symbols, date__in=date_index.tolist()).values_list('date', 'currency__symbol', 'ask_price')
     try:
         # Generate numpy array from queryset data
         forex_data_array = np.core.records.fromrecords(currency_price_data, names=['date', 'symbol', 'ask_price'])
     except IndexError:
         # If there is no data, generate an empty array
         forex_data_array = np.core.records.fromrecords([(date(1900,1,1) ,"",0)], names=['date', 'symbol', 'ask_price'])
     df = DataFrame.from_records(forex_data_array, index='date')
     
     # Create pivot table
     df['date'] = df.index
     df = df.pivot(index='date', columns='symbol', values='ask_price')
     
     return df
Example #15
0
def _status_table(bot: Bot, update: Update) -> None:
    """
    Handler for /status table.
    Returns the current TradeThread status in table format
    :param bot: telegram bot
    :param update: message update
    :return: None
    """
    # Fetch open trade
    trades = Trade.query.filter(Trade.is_open.is_(True)).all()
    if get_state() != State.RUNNING:
        send_msg('*Status:* `trader is not running`', bot=bot)
    elif not trades:
        send_msg('*Status:* `no active order`', bot=bot)
    else:
        trades_list = []
        for trade in trades:
            # calculate profit and send message to user
            current_rate = exchange.get_ticker(trade.pair, False)['bid']
            trades_list.append([
                trade.id,
                trade.pair,
                shorten_date(arrow.get(trade.open_date).humanize(only_distance=True)),
                '{:.2f}%'.format(100 * trade.calc_profit_percent(current_rate))
            ])

        columns = ['ID', 'Pair', 'Since', 'Profit']
        df_statuses = DataFrame.from_records(trades_list, columns=columns)
        df_statuses = df_statuses.set_index(columns[0])

        message = tabulate(df_statuses, headers='keys', tablefmt='simple')
        message = "<pre>{}</pre>".format(message)

        send_msg(message, parse_mode=ParseMode.HTML)
Example #16
0
 def create_adjustment_reader(cls, tempdir):
     dbpath = tempdir.getpath('adjustments.sqlite')
     writer = SQLiteAdjustmentWriter(dbpath, cls.env.trading_days,
                                     MockDailyBarSpotReader())
     splits = DataFrame.from_records([
         {
             'effective_date': str_to_seconds('2014-06-09'),
             'ratio': (1 / 7.0),
             'sid': cls.AAPL,
         }
     ])
     mergers = DataFrame(
         {
             # Hackery to make the dtypes correct on an empty frame.
             'effective_date': array([], dtype=int),
             'ratio': array([], dtype=float),
             'sid': array([], dtype=int),
         },
         index=DatetimeIndex([]),
         columns=['effective_date', 'ratio', 'sid'],
     )
     dividends = DataFrame({
         'sid': array([], dtype=uint32),
         'amount': array([], dtype=float64),
         'record_date': array([], dtype='datetime64[ns]'),
         'ex_date': array([], dtype='datetime64[ns]'),
         'declared_date': array([], dtype='datetime64[ns]'),
         'pay_date': array([], dtype='datetime64[ns]'),
     })
     writer.write(splits, mergers, dividends)
     return SQLiteAdjustmentReader(dbpath)
Example #17
0
 def to_dataframe(self, flatten=False):
     from pandas import DataFrame
     if flatten:
         records, columns = self.itertriples(), ['path', 'name', 'id']
     else:
         records, columns = self.iteritems(), ['path', 'members']
     return DataFrame.from_records(records, columns=columns)
Example #18
0
	def get_xueqiu_stocks(		self
							,	stockTypeList	=	['sha','shb','sza','szb']
							,	columns 		=	CON.CONST_XUEQIU_QUOTE_ORDER_COLUMN
		):

		stock_xueqiu = None
		for stockType in stockTypeList:
			print( "正在从雪球获取:{}".format(C.EX_NAME[stockType]) )
			page = 1
			while True:
				response = self.session.get(
					CON.URL_XUEQIU_QUOTE_ORDER(page,columns,stockType)
				,	headers = CON.HEADERS_XUEQIU
				).json()
				df = DataFrame.from_records(response["data"], columns=response["column"])
				if stock_xueqiu is None:
					stock_xueqiu = df
				else:
					stock_xueqiu = stock_xueqiu.append(df)
				if df.size==0:
					break
				page += 1

		self.stock_xueqiu = stock_xueqiu
		return stock_xueqiu
Example #19
0
	def get_kline(self, symbol, period = '1day', fqType = 'normal', begin = None, end = None, dataframe = True):
		if end is None:
			end = util.time_now()
		if isinstance(begin, str):
			begin = util.date_to_timestamp( begin )
		if isinstance(end, str):
			end = util.date_to_timestamp( end )
		try:
			response = self.session.get(
					URL_XUEQIU_KLINE( symbol = symbol, period = period, fqType = fqType, begin = begin, end = end )
				,	headers = HEADERS_XUEQIU
				,	timeout = 3
				)
			kline = response.json()
			time.sleep(0.5)
		except Exception as e:
			self.logger.warning("{}".format(e))
			self.logger.info(response.text)
			time.sleep(3)
			return None

		if kline["success"]=='true':
			if dataframe:
				if kline["chartlist"] is not None:
					df = DataFrame.from_records( kline["chartlist"] )
					df["time"] = pandas.to_datetime( df["time"] )
					df["time"] += timedelta(hours=8)
					df["symbol"] = symbol
					return df
				else:
					return DataFrame()
			else:
				return kline["chartlist"]
		else:
			return None
Example #20
0
    def dataframe(self, table, limit=None, offset=None):
        """
        create a pandas dataframe from a table or query

        Parameters
        ----------

        table : table
            a table in this database or a query

        limit: integer
            an integer limit on the query

        offset: integer
            an offset for the query
        """
        from pandas import DataFrame

        if isinstance(table, basestring):
            table = getattr(self, table)
        records = table._table.select()
        if not limit is None:
            records = records.limit(limit)
        if not offset is None:
            records = records.offset(offset)
        records = list(records.execute())
        cols = [c.name for c in table._table.columns]
        return DataFrame.from_records(records, columns=cols)
Example #21
0
def webuse(data, baseurl='http://www.stata-press.com/data/r11/', as_df=True):
    """
    Parameters
    ----------
    data : str
        Name of dataset to fetch.
    baseurl : str
        The base URL to the stata datasets.
    as_df : bool
        If True, returns a `pandas.DataFrame`

    Returns
    -------
    dta : Record Array
        A record array containing the Stata dataset.

    Examples
    --------
    >>> dta = webuse('auto')

    Notes
    -----
    Make sure baseurl has trailing forward slash. Doesn't do any
    error checking in response URLs.
    """
    # lazy imports
    from statsmodels.iolib import genfromdta

    url = urljoin(baseurl, data+'.dta')
    dta = urlopen(url)
    dta = StringIO(dta.read())  # make it truly file-like
    if as_df:  # could make this faster if we don't process dta twice?
        return DataFrame.from_records(genfromdta(dta))
    else:
        return genfromdta(dta)
Example #22
0
def platform_expression(accession, require_age=True, require_gender=False, limit=None):
    genes = fetch_genes(9606)
    query = """
    SELECT sample.id, sample.age, sample.gender, expression.data 
    FROM expression 
    INNER JOIN sample 
    ON expression.sample_id=sample.id 
    INNER JOIN platform
    ON sample.platform_id=platform.id
    WHERE platform.accession=%s"""
    if require_age:
        query += "\nAND sample.age IS NOT NULL"
    if require_gender:
        query += "\nAND sample.gender IS NOT NULL"
    if limit:
        query += "\tLIMIT " + str(limit)
    c.execute(query, (accession,))
    samples, age, gender, expression = zip(*c)
    X = DataFrame.from_records(list(expression), 
                               index=samples, columns=genes.index)
    X.index.name = "Sample ID"
    X.columns.name = "Gene ID"
    P = DataFrame({"age": age, "gender": gender}, index=samples)
    P.index.name = "Sample"
    return X, P
Example #23
0
def tissue_expression_training_set(taxon_id=9606, limit=200):
    c.execute("""
    SELECT sample_term.sample_id, expression.data, 
        sample_term.term_id, sample_term.probability
    FROM sample_term
    INNER JOIN term
    ON term.id=sample_term.term_id
    INNER JOIN ontology
    ON ontology.id=term.ontology_id
    INNER JOIN sample
    ON sample.id=sample_term.sample_id
    INNER JOIN expression
    ON expression.sample_id=sample.id
    INNER JOIN platform
    ON sample.platform_id=platform.id
    INNER JOIN taxon
    ON platform.taxon_id=taxon.id
    WHERE ontology.namespace='BTO'
    AND sample_term.probability=1
    AND taxon.id=%s
    ORDER BY random()
    LIMIT %s""", (taxon_id, limit))
    samples, data, tissues, values = zip(*c)
    T = coo_to_df(zip(samples, tissues, values))
    T.index.name = "Sample ID"
    T.columns.name = "Term ID"
    c.execute("""SELECT id FROM gene WHERE gene.taxon_id=%s ORDER BY id""", 
              (taxon_id,))
    X = DataFrame.from_records(list(data),
                               index=samples, columns=[e[0] for e in c])
    return X,T
Example #24
0
def adjust(y, divs):
    """Return fully adjusted OHLCs data base on dividends

    Paramaters:
    y: numpy
    divs: numpy of dividends

    Return:
    DataFrame objects
    """
    index = DatetimeIndex([datetime.datetime.fromtimestamp(v) for v in y['time']])
    y = DataFrame.from_records(y, index=index, exclude=['time'])
    y['adjclose'] = y['close']

    for div in divs:
        d = Dividend(div)
        d.adjust(y)

    factor = y['adjclose'] / y['close']
    frame = y.copy()
    frame['open'] = frame['open'] * factor
    frame['high'] = frame['high'] * factor
    frame['low'] = frame['low'] * factor
    frame['close'] = frame['close'] * factor
    frame['volume'] = frame['volume'] * (1 / factor)
    return frame
Example #25
0
    def test_adjust_purchase(self):
        ohlcs = np.array([
                (1216915200, 24.889999389648438, 25.450000762939453,
                 24.709999084472656, 25.0, 486284.0, 1216462208.0)
                ], dtype=Day.DTYPE)

        dividends = np.array([
                (1058313600, 0.0, 0.0, 0.0, 0.11999999731779099),
                (1084233600, 0.20000000298023224, 0.0, 0.0, 0.09200000017881393),
                (1119225600, 0.5, 0.0, 0.0, 0.10999999940395355),
                (1140739200, 0.08589000254869461, 0.0, 0.0, 0.0),
                (1150416000, 0.0, 0.0, 0.0, 0.07999999821186066),
                (1158796800, 0.0, 0.0, 0.0, 0.18000000715255737),
                (1183507200, 0.0, 0.0, 0.0, 0.11999999731779099),
                (1217203200, 0.0, 0.0, 0.0, 0.2800000011920929),
                (1246579200, 0.30000001192092896, 0.0, 0.0, 0.10000000149011612),
                (1268611200, 0.0, 0.12999999523162842, 8.850000381469727, 0.0),
                (1277942400, 0.0, 0.0, 0.0, 0.20999999344348907),
                (1307664000, 0.0, 0.0, 0.0, 0.28999999165534973)                
                ], dtype=self.dtype)

        index = np.array([datetime.datetime.fromtimestamp(v) for v in ohlcs['time']],
                         dtype=object)
        y = DataFrame.from_records(ohlcs, index=index, exclude=['time'])
        y['adjclose'] = y['close']

        for div in dividends:
            d = Dividend(div)
            d.adjust(y)

        adjclose = y.xs(datetime.datetime(2008, 7, 25))['adjclose']
        self.assertTrue(self.floatEqual(adjclose, 17.28))
Example #26
0
    def digest_npbinary(self, file_name, **kwargs):
        try:
            from numpy import load
        except Exception:
            raise ImportError('Numpy is missing!')

        return DataFrame.from_records(load(file_name))
Example #27
0
 def make_equity_info(cls):
     cls.equity_info = ret = DataFrame.from_records([
         {
             'sid': 1,
             'symbol': 'A',
             'start_date': cls.dates[10],
             'end_date': cls.dates[13],
             'exchange': 'TEST',
         },
         {
             'sid': 2,
             'symbol': 'B',
             'start_date': cls.dates[11],
             'end_date': cls.dates[14],
             'exchange': 'TEST',
         },
         {
             'sid': 3,
             'symbol': 'C',
             'start_date': cls.dates[12],
             'end_date': cls.dates[15],
             'exchange': 'TEST',
         },
     ])
     return ret
Example #28
0
 def generate_dataframe(self, start_date=None, end_date=None):
     """
     """
     first_series_point = CurrencyPrices.objects.filter(currency=self)[0]
     last_series_point = CurrencyPrices.objects.filter(currency=self).reverse()[0]
     if start_date == None:
         start_date = first_series_point.date
     else:
         start_date = max(first_series_point.date, start_date)
     # Get a one day lag so the change wont be null
     temp_start_date = start_date - timedelta(days=3)
     
     if end_date == None:
         end_date = last_series_point.date
     else:
         end_date = min(last_series_point.date, end_date)
         
     currency_date = CurrencyPrices.objects.filter(currency=self,
                                                   date__gte=temp_start_date,
                                                   date__lte=end_date).values_list('date', 'ask_price', 'bid_price')
     currency_data_array = np.core.records.fromrecords(currency_date, names=['DATE', "ASK", "BID"])
     df = DataFrame.from_records(currency_data_array, index='DATE')  
     df = df.astype(float)
     df['MID'] = (df['ASK'] + df['BID']) / 2.0
     df['CHANGE'] = df['MID'].pct_change()
     
     required_dates = date_range(start_date,end_date)
     df = df.reindex(required_dates)
     
     return df
Example #29
0
def getAdjClosePrices(tickers, startdate, enddate):
    """ returns a ready to use pandas DataFrame and a Series with the startDate
    """
    Session = orm.sessionmaker(bind=db.GetEngine())        
    session = Session()
    conn = db.GetEngine().connect()
    # Query
    conn.execute("""CREATE TEMP TABLE Tickers (Cd Text)""")
    conn.execute("""INSERT INTO Tickers VALUES(?)""", zip(tickers))
    
    result = conn.execute("""SELECT ts.Cd, Date, AdjClose
                      FROM TimeSeries ts
                      INNER JOIN Tickers t ON ts.Cd = t.Cd
                      WHERE ts.Date >= ? AND ts.Date <= ?""", (startdate, enddate))
    rows = result.fetchall()

    # Create a pandas DataFrame
    pricesRaw = DataFrame.from_records(rows, columns=['Cd', 'Date', 'AdjClose'])
    # Convert Date strings into datetime so pandas can do time series stuff
    pricesRaw.Date = pd.to_datetime(pricesRaw.Date)
    seriesbegin = pricesRaw[['Cd','Date']].groupby('Cd').min()
    # Pivot DataFrame
    prices = pricesRaw.pivot(index='Date', columns='Cd', values='AdjClose')

    # Close DB and Cursor
    conn.close()
    return prices, seriesbegin
Example #30
0
    def init_class_fixtures(cls):
        super(ClosesAndVolumes, cls).init_class_fixtures()
        cls.first_asset_start = min(cls.equity_info.start_date)
        cls.last_asset_end = max(cls.equity_info.end_date)
        cls.assets = cls.asset_finder.retrieve_all(cls.asset_finder.sids)

        cls.trading_day = cls.trading_calendar.day

        # Add a split for 'A' on its second date.
        cls.split_asset = cls.assets[0]
        cls.split_date = cls.split_asset.start_date + cls.trading_day
        cls.split_ratio = 0.5
        cls.adjustments = DataFrame.from_records([
            {
                'sid': cls.split_asset.sid,
                'value': cls.split_ratio,
                'kind': MULTIPLY,
                'start_date': Timestamp('NaT'),
                'end_date': cls.split_date,
                'apply_date': cls.split_date,
            }
        ])

        cls.default_sim_params = SimulationParameters(
            start_session=cls.first_asset_start,
            end_session=cls.last_asset_end,
            trading_calendar=cls.trading_calendar,
            emission_rate='daily',
            data_frequency='daily',
        )