Example #1
0
 def __call__(self, path, fields, **kwargs):
     with h5py.File(path, 'w') as h5:
         i, step = 0, 300
         while True:
             order_book_ids = self._order_book_ids[i:i + step]
             df = rqdatac.get_price(order_book_ids,
                                    START_DATE,
                                    datetime.date.today(),
                                    '1d',
                                    adjust_type='none',
                                    fields=fields,
                                    expect_df=True)
             if not (df is None or df.empty):
                 df.reset_index(inplace=True)
                 df['datetime'] = [
                     convert_date_to_int(d) for d in df['date']
                 ]
                 del df['date']
                 df.set_index(['order_book_id', 'datetime'], inplace=True)
                 df.sort_index(inplace=True)
                 for order_book_id in df.index.levels[0]:
                     h5.create_dataset(
                         order_book_id,
                         data=df.loc[order_book_id].to_records(),
                         **kwargs)
             i += step
             yield len(order_book_ids)
             if i >= len(self._order_book_ids):
                 break
Example #2
0
 def __call__(self, path, fields, **kwargs):
     need_recreate_h5 = False
     with h5py.File(path, 'r') as h5:
         need_recreate_h5 = not self.h5_has_valid_fields(h5, fields)
     if need_recreate_h5:
         yield from GenerateDayBarTask(self._order_book_ids)(path, fields,
                                                             **kwargs)
     else:
         with h5py.File(path, 'a') as h5:
             for order_book_id in self._order_book_ids:
                 if order_book_id in h5:
                     try:
                         start_date = rqdatac.get_next_trading_date(
                             int(h5[order_book_id]['datetime'][-1] //
                                 1000000))
                     except ValueError:
                         h5.pop(order_book_id)
                         start_date = START_DATE
                 else:
                     start_date = START_DATE
                 df = rqdatac.get_price(order_book_id,
                                        start_date,
                                        END_DATE,
                                        '1d',
                                        adjust_type='none',
                                        fields=fields,
                                        expect_df=True)
                 if not (df is None or df.empty):
                     df = df[
                         fields]  # Future order_book_id like SC888 will auto add 'dominant_id'
                     df = df.loc[order_book_id]
                     df.reset_index(inplace=True)
                     df['datetime'] = [
                         convert_date_to_int(d) for d in df['date']
                     ]
                     del df['date']
                     df.set_index('datetime', inplace=True)
                     if order_book_id in h5:
                         data = np.array([
                             tuple(i) for i in chain(
                                 h5[order_book_id][:], df.to_records())
                         ],
                                         dtype=h5[order_book_id].dtype)
                         del h5[order_book_id]
                         h5.create_dataset(order_book_id,
                                           data=data,
                                           **kwargs)
                     else:
                         h5.create_dataset(order_book_id,
                                           data=df.to_records(),
                                           **kwargs)
                 yield 1
Example #3
0
    def __call__(self, path, fields, **kwargs):
        with h5py.File(path, 'a') as h5:
            for order_book_id in self._order_book_ids:
                if order_book_id in h5:
                    try:
                        start_date = rqdatac.get_next_trading_date(
                            int(h5[order_book_id]['datetime'][-1] // 1000000))
                    except ValueError:
                        h5.pop(order_book_id)
                        start_date = START_DATE
                else:
                    start_date = START_DATE
                df = rqdatac.get_price(order_book_id,
                                       start_date,
                                       END_DATE,
                                       '1d',
                                       adjust_type='none',
                                       fields=fields,
                                       expect_df=True)
                if not (df is None or df.empty):
                    df = df.loc[order_book_id]
                    df.reset_index(inplace=True)
                    df['datetime'] = [
                        convert_date_to_int(d) for d in df['date']
                    ]
                    del df['date']
                    df.set_index('datetime', inplace=True)

                    if order_book_id in h5:
                        data = np.array([
                            tuple(i) for i in chain(h5[order_book_id][:],
                                                    df.to_records())
                        ],
                                        dtype=h5[order_book_id].dtype)
                        del h5[order_book_id]
                        h5.create_dataset(order_book_id, data=data, **kwargs)
                    else:
                        h5.create_dataset(order_book_id,
                                          data=df.to_records(),
                                          **kwargs)
                yield 1