Esempio n. 1
0
 def open(self, name):
     if not path.exists(self.importpath):
         raise FileNotFoundError(path.join(self.dataroot, 'imported'))
     filename = path.join(self.importpath, name)
     if not path.exists(filename):
         raise FileExistsError(filename)
     self.store = HDFStore(filename)
Esempio n. 2
0
    def test_walk_groups(self):
        with tm.ensure_clean('walk_groups.hdf') as filename:
            store = HDFStore(filename, 'w')

            dfs = {
                'df1': pd.DataFrame([1, 2, 3]),
                'df2': pd.DataFrame([4, 5, 6]),
                'df3': pd.DataFrame([6, 7, 8]),
                'df4': pd.DataFrame([9, 10, 11]),
            }

            store.put('/first_group/df1', dfs['df1'])
            store.put('/first_group/df2', dfs['df2'])
            store.put('/second_group/df3', dfs['df3'])
            store.put('/second_group/third_group/df4', dfs['df4'])

            expect = {
                '/': ({'first_group', 'second_group'}, set()),
                '/first_group': (set(), {'df1', 'df2'}),
                '/second_group': ({'third_group'}, {'df3'}),
                '/second_group/third_group': (set(), {'df4'}),
            }

            for path, groups, frames in store.walk_groups():
                self.assertIn(path, expect)
                expect_groups, expect_frames = expect[path]

                self.assertEqual(expect_groups, set(groups))
                self.assertEqual(expect_frames, set(frames))
                for frame in frames:
                    frame_path = '/'.join([path, frame])
                    df = store.get(frame_path)
                    self.assert_(df.equals(dfs[frame]))
Esempio n. 3
0
def hdf():
    df = ts.get_hist_data('000875')
#     df.to_hdf('c:/day/store.h5','table')
    
    store = HDFStore('c:/day/store.h5')
    store['000875'] = df
    store.close()
Esempio n. 4
0
def quantitative_analysis(df_name, df_seq_col, df_quant_col, func=lambda x: x):
    print "Quantitative analysis of ", df_name

    store = HDFStore('_data_/ProteinDataStore.h5')
    summary = store['DataBases_Summary']
    df = store[df_name]
    df = df[[df_seq_col, df_quant_col]]
    renamed_col = '_'.join(df_quant_col.split(' '))
    print "Filling column ", renamed_col
    summary[renamed_col] = ['.'] * len(summary)
    print "Current summary shape: ", summary.shape

    seq_list = map(lambda x: re.sub(r'[^A-Z]', '', x), df[df_seq_col].values)
    for i in zip(seq_list, df[df_quant_col].values):
        query = np.where(summary['GlyGly Probabilities'] == i[0])[0]
        if len(query) != 0:
            index = query[0]
        else:
            print "Omitted data: ", i
            continue

        if not np.isnan(i[1]):
            try:
                tmp = func(i[1])
                summary.loc[index, renamed_col] = tmp
            except Exception as e:
                print i
                print e.message
        else:
            summary.loc[index, renamed_col] = '.'

    store['DataBases_Summary'] = summary
    store.close()
Esempio n. 5
0
 def to_hdf5(self, fname, complevel=9, complib='bzip2'):
     if os.path.exists(fname):
         logger.warning('Overwrite %s with current history', fname)
     history_store = HDFStore(fname, mode='w', complevel=complevel, complib=complib)
     for attribute in self._store_attributes:
         history_store[attribute] = getattr(self, attribute)
     history_store.close()
Esempio n. 6
0
def update_exchanges():
    """
    Updates data for exchanges such as NYSE

    """

    ####### LOAD DATE RANGES AND SYMBOLS
    start_date = Config(CFG).get('Exchange Data Start Date', 'default_start_date')
    end_date = datetime.datetime.now().strftime('%Y-%m-%d')
    symbols = [Config(CFG).get('Symbol List', 'list')]
    ####### BACKUP and UPDATE DB
    filename = Config(CFG).get("DB Locations", 'exchange_data')
    backup = Config(CFG).get("DB Locations", 'exchange_data_backup')
    file_update_backup(filename, backup)
    ####### START HDF5 INSTANCE
    operator = HDFStore(filename)

    for symbol in symbols:

        ####### PULL YAHOO FINANCE DATA
        data = get_daily_history(symbol, start_date, end_date)
        ####### PULL ADVANCES/DECLINES DATA
        data = data.merge(update_unicorn(symbol), left_index=True, right_index=True, how='outer')
        ####### SAVE DATA TO HDF5
        operator[symbol] = data

    operator.close()
Esempio n. 7
0
    def save(self, filename, force=False):
        """Save the trials and samples arrays from the current importer
        instance to a dataset inside a lzf compressed hdf5 file for later use.

        Args:
            param1: (string): filename, will be stored in self.importpath

        Optional Args:
            force: (boolean) Wether or not to overwrite an existing file
                             (default: False)
        """
        try:
            mkdir(self.importpath)
        except FileExistsError:
            pass

        filename = path.join(self.importpath, filename)
        if path.exists(filename):
            if force:
                unlink(filename)
            else:
                raise FileExistsError('Import file "' + filename +
                                      '" already exists.')

        self.__sort()
        self.store = HDFStore(filename, complib='lzo')
        self.store['samples'] = self.ds.samples
        self.store['targets'] = self.ds.targets
        self.store.close()
Esempio n. 8
0
def hdf():
    df = ts.get_hist_data('000875')
    #     df.to_hdf('c:/day/store.h5','table')

    store = HDFStore('c:/day/store.h5')
    store['000875'] = df
    store.close()
Esempio n. 9
0
def drop_with_low_probability(storename, df_name, loc_probability_colname, threshold=0.95):
    print 'Filtering by low probability in', df_name
    store = HDFStore(storename)
    df = store[df_name]
    if loc_probability_colname is not None:
        df = df[df[loc_probability_colname] >= threshold]
    store[df_name] = df
    store.close()
Esempio n. 10
0
def reindex_summary():
    store = HDFStore('_data_/ProteinDataStore.h5')
    data_summary = store['DataBases_Summary']
    range_index = [x for x in np.arange(len(data_summary))]
    print "Reindexing..."
    data_summary = data_summary.set_index([range_index])
    store['DataBases_Summary'] = data_summary
    store.close()
Esempio n. 11
0
 def test_legacy_read(self):
     pth = curpath()
     store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r')
     store['a']
     store['b']
     store['c']
     store['d']
     store.close()
Esempio n. 12
0
 def test_legacy_read(self):
     pth = curpath()
     store = HDFStore(os.path.join(pth, 'legacy.h5'), 'r')
     store['a']
     store['b']
     store['c']
     store['d']
     store.close()
Esempio n. 13
0
 def _check_roundtrip(self, obj, comparator):
     store = HDFStore(self.scratchpath, 'w')
     try:
         store['obj'] = obj
         retrieved = store['obj']
         comparator(retrieved, obj)
     finally:
         store.close()
         os.remove(self.scratchpath)
Esempio n. 14
0
def colorful_dump_summary_to_excel(output_filename, range_label='L1:U36229'):
    # < -2 dark green
    # -2 to -1 light green
    # -1 to  1 yellow
    # 1 to 2 Orange
    # > 2 red
    store = HDFStore('_data_/ProteinDataStore.h5')
    data_summary = store['DataBases_Summary']
    writer = ExcelWriter(output_filename + '.xlsx', engine='xlsxwriter')
    data_summary.to_excel(writer, 'DataBases_Summary', index=True)

    workbook = writer.book
    worksheet = writer.sheets['DataBases_Summary']

    # using pallete http://www.colourlovers.com/palette/3687876/
    blue = workbook.add_format({'bg_color': '#69D2E7', 'font_color': '#000000'})
    coral = workbook.add_format({'bg_color': '#A7DBD8', 'font_color': '#000000'})
    yellow = workbook.add_format({'bg_color': '#EAE319', 'font_color': '#000000'})
    orange = workbook.add_format({'bg_color': '#FA6900', 'font_color': '#000000'})
    red = workbook.add_format({'bg_color': '#E2434B', 'font_color': '#000000'})
    # empty = workbook.add_format({'bg_color': '#FFFFFF', 'font_color': '#000000'})
    #
    # worksheet.conditional_format(range_label, {'type': 'text',
    #                                            'criteria': 'begins with',
    #                                            'value': '.',
    #                                            'format': empty})

    worksheet.conditional_format(range_label, {'type': 'cell',
                                               'criteria': '<',
                                               'value': -2,
                                               'format': blue})

    worksheet.conditional_format(range_label, {'type': 'cell',
                                               'criteria': 'between',
                                               'minimum': -2,
                                               'maximum': -1,
                                               'format': coral})

    worksheet.conditional_format(range_label, {'type': 'cell',
                                               'criteria': 'between',
                                               'minimum': -1,
                                               'maximum': 1,
                                               'format': yellow})

    worksheet.conditional_format(range_label, {'type': 'cell',
                                               'criteria': 'between',
                                               'minimum': 1,
                                               'maximum': 2,
                                               'format': orange})

    worksheet.conditional_format(range_label, {'type': 'cell',
                                               'criteria': '>',
                                               'value': 2,
                                               'format': red})
    writer.save()
    store.close()
Esempio n. 15
0
def parse_one_and_save(input_file, output_store_name):
    sheet_name = 'All sites'
    skip_rows = [0]
    store = HDFStore(output_store_name)
    df = pd.ExcelFile(input_file).parse(sheetname=sheet_name,
                                        skiprows=skip_rows)
    name = (input_file.split('/')[1]).split('.')[0]
    print "Parsing ", name
    store[name] = df
    store.close()
Esempio n. 16
0
 def _check_roundtrip_table(self, obj, comparator):
     store = HDFStore(self.scratchpath, 'w')
     try:
         store.put('obj', obj, table=True)
         retrieved = store['obj']
         sorted_obj = _test_sort(obj)
         comparator(retrieved, sorted_obj)
     finally:
         store.close()
         os.remove(self.scratchpath)
Esempio n. 17
0
 def load(self, format='csv'):
     savefile = self.__savefile()
     if format == "csv":
         self.frame.from_csv(savefile + ".csv")
     elif format == "hdf":
         store = HDFStore(savefile + ".hdf")
         try:
             self.frame = store['data']
         finally:
             store.close()
Esempio n. 18
0
    def test_walk_groups(self):
        with tm.ensure_clean('walk_groups.hdf') as filename:
            store = HDFStore(filename, 'w')

            dfs = {
                'df1': pd.DataFrame([1,2,3]),
                'df2': pd.DataFrame([4,5,6]),
                'df3': pd.DataFrame([6,7,8]),
                'df4': pd.DataFrame([9,10,11]),
                }

            store.put('/first_group/df1', dfs['df1'])
            store.put('/first_group/df2', dfs['df2'])
            store.put('/second_group/df3', dfs['df3'])
            store.put('/second_group/third_group/df4', dfs['df4'])

            expect = {
                '/': ({'first_group', 'second_group'}, set()),
                '/first_group': (set(), {'df1', 'df2'}),
                '/second_group': ({'third_group'}, {'df3'}),
                '/second_group/third_group': (set(), {'df4'}),
            }

            for path, groups, frames in store.walk_groups():
                self.assertIn(path, expect)
                expect_groups, expect_frames = expect[path]

                self.assertEqual(expect_groups, set(groups))
                self.assertEqual(expect_frames, set(frames))
                for frame in frames:
                    frame_path = '/'.join([path, frame])
                    df = store.get(frame_path)
                    self.assert_(df.equals(dfs[frame]))
Esempio n. 19
0
def parse_list_and_save(list_of_files, output_store_name):
    sheet_name = 'All sites'
    skip_rows = [0]
    store = HDFStore(output_store_name)
    for _file_ in list_of_files:
        df = pd.ExcelFile(_file_).parse(sheetname=sheet_name,
                                        skiprows=skip_rows)
        name = (_file_.split('/')[2]).split('.')[0]
        print "Parsing ", name
        store[name] = df
    store.close()
Esempio n. 20
0
 def test_store_index_name(self):
     df = tm.makeDataFrame()
     df.index.name = 'foo'
     try:
         store = HDFStore(self.scratchpath)
         store['frame'] = df
         recons = store['frame']
         assert (recons.index.name == 'foo')
     finally:
         store.close()
         os.remove(self.scratchpath)
Esempio n. 21
0
def load_exchange_data(symbol):
    """
    Returns data for a specific exchange

    """
    filename = Config(CFG).get("DB Locations", 'exchange_data')
    operator = HDFStore(filename)
    data = operator[symbol]
    operator.close()
 
    return data
Esempio n. 22
0
 def test_store_index_name(self):
     df = tm.makeDataFrame()
     df.index.name = 'foo'
     try:
         store = HDFStore(self.scratchpath)
         store['frame'] = df
         recons = store['frame']
         assert(recons.index.name == 'foo')
     finally:
         store.close()
         os.remove(self.scratchpath)
Esempio n. 23
0
    def test_legacy_table_write(self):
        # legacy table types
        pth = curpath()
        df = tm.makeDataFrame()
        wp = tm.makePanel()

        store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'a')

        self.assertRaises(Exception, store.append, 'df1', df)
        self.assertRaises(Exception, store.append, 'wp1', wp)

        store.close()
Esempio n. 24
0
 def test_fixed_offset_tz(self):
     rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00')
     frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
     try:
         store = HDFStore(self.scratchpath)
         store['frame'] = frame
         recons = store['frame']
         self.assert_(recons.index.equals(rng))
         self.assertEquals(rng.tz, recons.index.tz)
     finally:
         store.close()
         os.remove(self.scratchpath)
Esempio n. 25
0
    def test_store_series_name(self):
        df = tm.makeDataFrame()
        series = df['A']

        try:
            store = HDFStore(self.scratchpath)
            store['series'] = series
            recons = store['series']
            assert (recons.name == 'A')
        finally:
            store.close()
            os.remove(self.scratchpath)
Esempio n. 26
0
 def test_timezones(self):
     rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
     frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
     try:
         store = HDFStore(self.scratchpath)
         store['frame'] = frame
         recons = store['frame']
         self.assert_(recons.index.equals(rng))
         self.assertEquals(rng.tz, recons.index.tz)
     finally:
         store.close()
         os.remove(self.scratchpath)
Esempio n. 27
0
def test_context(setup_path):
    with tm.ensure_clean(setup_path) as path:
        try:
            with HDFStore(path) as tbl:
                raise ValueError("blah")
        except ValueError:
            pass
    with tm.ensure_clean(setup_path) as path:
        with HDFStore(path) as tbl:
            tbl["a"] = tm.makeDataFrame()
            assert len(tbl) == 1
            assert type(tbl["a"]) == DataFrame
Esempio n. 28
0
    def test_store_series_name(self):
        df = tm.makeDataFrame()
        series = df['A']

        try:
            store = HDFStore(self.scratchpath)
            store['series'] = series
            recons = store['series']
            assert(recons.name == 'A')
        finally:
            store.close()
            os.remove(self.scratchpath)
Esempio n. 29
0
    def test_legacy_table_write(self):
        # legacy table types
        pth = curpath()
        df = tm.makeDataFrame()
        wp = tm.makePanel()

        store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'a')

        self.assertRaises(Exception, store.append, 'df1', df)
        self.assertRaises(Exception, store.append, 'wp1', wp)

        store.close()
Esempio n. 30
0
    def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):
        options = {}
        if compression:
            options['complib'] = _default_compressor

        store = HDFStore(self.scratchpath, 'w', **options)
        try:
            store['obj'] = obj
            retrieved = store['obj']
            comparator(retrieved, obj, **kwargs)
        finally:
            store.close()
            os.remove(self.scratchpath)
Esempio n. 31
0
    def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):
        options = {}
        if compression:
            options['complib'] = _default_compressor

        store = HDFStore(self.scratchpath, 'w', **options)
        try:
            store['obj'] = obj
            retrieved = store['obj']
            comparator(retrieved, obj, **kwargs)
        finally:
            store.close()
            os.remove(self.scratchpath)
Esempio n. 32
0
    def hdfWrite(self, path, excode, symbol, indata, kind1, kind2, kind3):
        # kind1为 'Rawdata'、'Stitch'、'Indicator'
        # kind2为 '00' '01'
        # kind3为 '1d' '60m' '30m' '15m' '5m' '1m'
        # 写各个频率的Rawdata: kind1='Rawdata',kind2=None,kind3='1d'
        # 写StitchRule:       kind1='Stitch', kind2='00',kind3=None
        # 写StitchData:       kind1='Stitch', kind2='00',kind3='1d'
        # 写Indicator:       kind1='Indicator',kind2='Indicator_name',kind3='params'
        store = HDFStore(path, mode='a')
        if kind1 == EXT_Rawdata:
            key = '/'.join([kind1, excode, symbol, kind3])
        elif kind1 == EXT_Stitch:
            key = '/'.join([kind1, excode, symbol, EXT_Rule, kind2
                            ]) if kind3 == None else '/'.join([
                                kind1, excode, symbol, EXT_Period, kind3, kind2
                            ])
        elif kind1 == EXT_Indicator:
            key = '/'.join([kind1, excode, symbol, kind2])
        else:
            print("kind not supported")
            return

        if kind1 == EXT_Indicator:
            f = h5py.File(path, 'a')
            try:
                store[key]
            except KeyError:  # 路径不存在时创建
                store[key] = indata
                f[key].attrs['Params'] = kind3
            else:
                if f[key].attrs['Params'] == kind3:  #Params匹配时合并
                    adddata = indata[~indata.index.isin(store[key].index)]
                    store.append(key, adddata)
                else:  # Params不匹配时覆盖
                    store[key] = indata
                    f[key].attrs['Params'] = kind3
            f.close()
            store.close()
        else:
            try:
                store[key]
            except KeyError:
                store[key] = indata
            else:
                adddata = indata[~indata.index.isin(store[key].index)]
                if kind2 in [EXT_Series_00, EXT_Series_01]:
                    adddata[EXT_Out_AdjFactor] = adddata[
                        EXT_Out_AdjFactor] * store[key][EXT_Out_AdjFactor].iloc[
                            -1] / adddata[EXT_Out_AdjFactor].iloc[0]
                store.append(key, adddata)
            store.close()
Esempio n. 33
0
 def hload(self,fname):
     from pandas.io.pytables import HDFStore
     store = HDFStore(fname,mode='r')
     self.clear()
     read = []
     for k in store.keys():
         if re.match('^_MISSING',k):
             v = store.get(k).to_dict().values()
             self._missing = v
             continue
         name = re.sub('^/','',k)
         self[name]=store[k]
         read.append(name)
     store.close()
Esempio n. 34
0
 def hsave(self,fname):
     from pandas.io.pytables import HDFStore
     LOGGER.debug('Saving HDF in %s\n%s',fname,self.report())
     store = HDFStore(fname,mode='w')
     for k,v in self.items():
         if re.match('^__',k):
             continue
         if isinstance(v,np.ndarray):
             v = Series(v)
         LOGGER.debug('Saving HDF for %s',k)
         store.put(k,v)
     if self._missing:
         store['_MISSING']=Series(self._missing)
     store.close()
Esempio n. 35
0
    def _check_roundtrip_table(self, obj, comparator, compression=False):
        options = {}
        if compression:
            options['complib'] = _default_compressor

        store = HDFStore(self.scratchpath, 'w', **options)
        try:
            store.put('obj', obj, table=True)
            retrieved = store['obj']
            sorted_obj = _test_sort(obj)
            comparator(retrieved, sorted_obj)
        finally:
            store.close()
            os.remove(self.scratchpath)
Esempio n. 36
0
def load_historical_data(start=datetime(2010, 1, 1), end=datetime.today(), filename='stock_data.h5'):
    store = HDFStore(filename)

    with open('companylist.csv', 'rb') as csvfile:
        spamreader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for row in spamreader:
            print row[0]
            try:
                stock_info = web.DataReader(row[0], "yahoo", start, end)
                store[row[0]] = stock_info
            except:
                print "Error on", row[0]

    store.close()
Esempio n. 37
0
 def test_legacy_table_read(self):
     # legacy table types
     pth = curpath()
     store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r')
     store.select('df1')
     store.select('df2')
     store.select('wp1')
     store.close()
Esempio n. 38
0
 def test_wide_table_dups(self):
     wp = tm.makePanel()
     try:
         store = HDFStore(self.scratchpath)
         store._quiet = True
         store.put('panel', wp, table=True)
         store.put('panel', wp, table=True, append=True)
         recons = store['panel']
         tm.assert_panel_equal(recons, wp)
     finally:
         store.close()
         os.remove(self.scratchpath)
Esempio n. 39
0
def pandas_roundtrip(filename, dma1, dma2):
    from pandas.io.pytables import HDFStore
    store = HDFStore(filename)
    store['dma1'] = dma1
    store['dma2'] = dma2
    dma1 = store['dma1']
    dma2 = store['dma2']
Esempio n. 40
0
def read_archive(hdf_path, items=['train_x', 'valid_x', 'test_x', 'train_y', 'valid_y', 'test_y']):
	'''
	convenience function used for retrieving data within a hdf archive

	Args:
		hdf_path (str):
			fullpath of file which data is stored in

		items opt(list):
			items to be retrieved
			default: ['train_x', 'valid_x', 'test_x', 'train_y', 'valid_y', 'test_y']
	'''
	hdf = HDFStore(hdf_path)
	output = map(lambda x: hdf[x], items)
	hdf.close()
	return output
Esempio n. 41
0
def dump_summary_to_excel(output_filename):
    # Save to XLSX
    store = HDFStore('_data_/ProteinDataStore.h5')
    data_summary = store['DataBases_Summary']
    writer = ExcelWriter(output_filename + '.xlsx', engine='xlsxwriter')
    data_summary.to_excel(writer, 'DataBases_Summary', index=True)
    writer.save()
def maybe_load():
  data_scheme = {'df_lines': 
                    ['line_ID', 
                      '''select line_ID, house_ref, match_ref, TS_ref, line_value, line_increment, snapshot_time, is_it_starting, RTV_ref, time_increment 
                          from Lines where RTV_ref in (1, 2, 3)'''],
                  'df_match_results': 
                    ['MR_ID', 
                      '''select MR_ID, RTV_ref, match_ref, actual_value, text_result
                          from Match_results where RTV_ref in (1, 2, 3)'''],                
                }

  loaded_data = {}

  from pandas.io.pytables import HDFStore

  with HDFStore('BRefDB.h5') as store:    
    for df_name in data_scheme: 
      if df_name in store:
        loaded_data[df_name] = store[df_name]
      else:
        import pymssql
        with pymssql.connect('.\\SQLEXPRESS', 'BB_miner', 'BB_3817_miner', "BRefDB") as conn:
          loaded_data[df_name] = pd.read_sql(data_scheme[df_name][1], conn, index_col=data_scheme[df_name][0])
  
  return loaded_data 
def save_to_store(loaded_data):
  from pandas.io.pytables import HDFStore

  with HDFStore('BRefDB.h5') as store:    
    for df_name in loaded_data:
      store[df_name] = loaded_data[df_name]
      print df_name + ' saved'
Esempio n. 44
0
def test_hdfstore_iteritems_deprecated(setup_path):
    with ensure_clean_path(setup_path) as path:
        df = DataFrame({"a": [1]})
        with HDFStore(path, mode="w") as hdf:
            hdf.put("table", df)
            with tm.assert_produces_warning(FutureWarning):
                next(hdf.iteritems())
Esempio n. 45
0
def make_summary(newcols):
    """

    :param newcols: column names in the main summary table
    :return: none
    """
    print "Making summary..."

    # open store end read base dataframe
    store = HDFStore('_data_/ProteinDataStore.h5')
    df1 = store['Mol_Cell_Proteomics_2011_Epub_2011_September1Supp2']

    # clean sequences
    LEN = len(df1)
    positions = [0] * LEN
    real_glygly = [0] * LEN
    clean_glygly = [0] * LEN
    for i in np.arange(LEN):
        positions[i] = df1['Position'].values[i]
        real_glygly[i] = clear_sequence(df1['GlyGly (K) Probabilities'].values[i])
        clean_glygly[i] = re.sub(r'[^A-Z]', '', real_glygly[i])

    # align with SwissProt Human and Rodents using blastp
    blastpID_HUMAN, blastpID_RODENTS = fetch_indentity_from_local_batch(clean_glygly)

    del df1
    print "Length test", len(positions) == len(real_glygly) == len(clean_glygly) == len(blastpID_HUMAN) == len(
        blastpID_RODENTS)

    # convert to pandas series
    clean_glygly = pd.Series(clean_glygly)
    blastpID_HUMAN = pd.Series(blastpID_HUMAN)
    blastpID_RODENTS = pd.Series(blastpID_RODENTS)

    # Create empty dataframe
    data_summary = pd.DataFrame(columns=newcols)

    # Combine everything required in dataframe
    data_summary['Position'] = positions
    data_summary['GlyGly (K) Probabilities'] = real_glygly
    data_summary['GlyGly Probabilities'] = clean_glygly
    data_summary['SP_ID_BLASTP_HUMAN'] = blastpID_HUMAN
    data_summary['SP_ID_BLASTP_RODENTS'] = blastpID_RODENTS

    # Save to HDF store
    store['DataBases_Summary'] = data_summary
    store.close()
Esempio n. 46
0
 def hdfRead(self,
             path,
             excode,
             symbol,
             kind1,
             kind2,
             kind3,
             startdate=EXT_Start,
             enddate=EXT_End,
             is_stitch=True):
     # kind1为 'Rawdata',Stitch','Indicator'
     # kind2为 '00' '01'
     # kind3为 '1d' '60m' '30m' '15m' '5m' '1m'
     # 读各个频率的Rawdata: kind1='Rawdata',kind2=None,kind3='1d'
     # 读StitchRule:       kind1='Stitch', kind2='00',kind3=None
     # 读STitchData:       kind1='Stitch', kind2='00',kind3='1d'
     # 读Indicator:       kind1='Indicator',kind2='Indicator_name',kind3=None
     store = HDFStore(path, mode='r')
     if kind1 == EXT_Rawdata:
         key = '/'.join([kind1, excode, symbol, kind3])
     elif kind1 == EXT_Stitch:
         key = '/'.join([kind1, excode, symbol, EXT_Rule, kind2
                         ]) if kind3 == None else '/'.join([
                             kind1, excode, symbol, EXT_Period, kind3, kind2
                         ])
     elif kind1 == EXT_Indicator:
         key = '/'.join([kind1, excode, symbol, kind2])
     else:
         print("kind not supported")
         return
     data = store[key].ix[(
         (store[key].index.get_level_values(0) >= pd.to_datetime(startdate))
         & (store[key].index.get_level_values(0) <= pd.to_datetime(enddate))
     ), :]
     if kind1 == EXT_Stitch and is_stitch == True and kind3 != None:
         data[EXT_Bar_Open] = data[EXT_AdjFactor] * data[EXT_Bar_Open]
         data[EXT_Bar_High] = data[EXT_AdjFactor] * data[EXT_Bar_High]
         data[EXT_Bar_Low] = data[EXT_AdjFactor] * data[EXT_Bar_Low]
         data[EXT_Bar_Close] = data[EXT_AdjFactor] * data[EXT_Bar_Close]
     store.close()
     if kind1 == EXT_Indicator:
         f = h5py.File(path, 'r')
         params = f[key].attrs['Params']
         f.close()
         return data, params
     return data
Esempio n. 47
0
def pandas_roundtrip(filename, dma1, dma2):
    # What's the best way to code this?
    from pandas.io.pytables import HDFStore
    store = HDFStore(filename)
    store['dma1'] = dma1
    store['dma2'] = dma2
    dma1 = store['dma1']
    dma2 = store['dma2']
Esempio n. 48
0
 def _exists(self) -> bool:
     path = self._get_load_path()
     if Path(path).is_file():
         with HDFStore(Path(path), mode="r") as hdfstore:
             key_with_slash = (self._key if self._key.startswith("/") else
                               "/" + self._key)
             if key_with_slash in hdfstore.keys():
                 return True
     return False
Esempio n. 49
0
 def test_legacy_table_read(self):
     # legacy table types
     pth = curpath()
     store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r')
     store.select('df1')
     store.select('df2')
     store.select('wp1')
     store.close()
Esempio n. 50
0
 def test_wide_table_dups(self):
     wp = tm.makePanel()
     try:
         store = HDFStore(self.scratchpath)
         store._quiet = True
         store.put('panel', wp, table=True)
         store.put('panel', wp, table=True, append=True)
         recons = store['panel']
         tm.assert_panel_equal(recons, wp)
     finally:
         store.close()
         os.remove(self.scratchpath)
Esempio n. 51
0
def analyze_existence(storename_to_append, gly_gly_seq_colname):
    print "Analyzing occurence in ", storename_to_append

    store = HDFStore('_data_/ProteinDataStore.h5')
    data_summary = store['DataBases_Summary']
    tmp_store_sequences = store[storename_to_append][gly_gly_seq_colname].values
    tmp_store_sequences = map(clear_sequence, tmp_store_sequences)

    # Make binary vector which represents existence
    # of the sequence in storename_to_append dataset
    existense_index = data_summary['GlyGly (K) Probabilities'].isin(tmp_store_sequences).values
    existense_index = np.asarray(existense_index, dtype=int)

    # Create new column in summary table
    data_summary[storename_to_append] = existense_index
    print np.sum(data_summary[storename_to_append])

    # Save to HDF store
    store['DataBases_Summary'] = data_summary
    store.close()
Esempio n. 52
0
    def test_legacy_table_read(self):
        # legacy table types
        pth = curpath()
        store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r')
        store.select('df1')
        store.select('df2')
        store.select('wp1')

        # force the frame
        store.select('df2', typ = 'legacy_frame')

        # old version (this still throws an exception though)
        import warnings
        warnings.filterwarnings('ignore', category=IncompatibilityWarning)
        self.assertRaises(Exception, store.select, 'wp1', Term('minor_axis','=','B'))
        warnings.filterwarnings('always', category=IncompatibilityWarning)

        store.close()