Example #1
0
def prep_data(data, balance=True):
	'''
	prepares a machine learning dataframe from mitdb HDFStore object

	Args:
		data (HDFStore): mitdb HDFStore data
		balance (bool, opt): balance arrythmia/not arrythmia classes

	Returns:
		DataFrame
	'''
	records = filter(lambda x: re.search('record', x), data.keys())
	records = [data[key] for key in records]
	
	data = DataFrame()
	for record in records:
		if record.arrythmia.sum() > 1:
			data = pd.concat([data, conform_data(record)])

	data.reset_index(drop=True, inplace=True)
	
	if balance:
		mask = data.y == 1
		size = data[mask].shape[0]
		index = np.random.choice(data[~mask].index, size)
		index = np.concatenate([index, data[mask].index])
		data = data.ix[index]
		data.reset_index(drop=True, inplace=True)
		
	return data
Example #2
0
    def test_join_multi_to_multi(self, join_type):
        # GH 20475
        leftindex = MultiIndex.from_product([list('abc'), list('xy'), [1, 2]],
                                            names=['abc', 'xy', 'num'])
        left = DataFrame({'v1': range(12)}, index=leftindex)

        rightindex = MultiIndex.from_product([list('abc'), list('xy')],
                                             names=['abc', 'xy'])
        right = DataFrame({'v2': [100 * i for i in range(1, 7)]},
                          index=rightindex)

        result = left.join(right, on=['abc', 'xy'], how=join_type)
        expected = (left.reset_index()
                        .merge(right.reset_index(),
                               on=['abc', 'xy'], how=join_type)
                        .set_index(['abc', 'xy', 'num'])
                    )
        assert_frame_equal(expected, result)

        msg = (r'len\(left_on\) must equal the number of levels in the index'
               ' of "right"')
        with pytest.raises(ValueError, match=msg):
            left.join(right, on='xy', how=join_type)

        with pytest.raises(ValueError, match=msg):
            right.join(left, on=['abc', 'xy'], how=join_type)
Example #3
0
def get_dataframe_option1(stock_list, date, zs_amplifier = 1):
    dframe = DataFrame()
    date = format_date(date, "%Y-%m-%d")
    stock_list = list(set(stock_list))
    # stock_list = ['601800', '600528']
    dframe_list = []
    for stock in stock_list:
        if stock == u'ZS000001':    # 上证指数
            tmp_frame = get_minly_frame(stock, date, id_type=0)
            zs_amplifier = 1
        else:
            tmp_frame = get_minly_frame(stock, date)
        tmp_frame = tmp_frame[['bartime', 'closeprice']]
        yesterday = get_lastN_date(date, 1)
        yeframe = get_mysqlData([stock],[yesterday])
        if len(yeframe) > 0:
            pre_close = yeframe.loc[0,'CLOSE_PRICE']
        else:
            pre_close = 10000
        # 计算涨跌幅,可以扩大幅度
        tmp_frame['closeprice'] = zs_amplifier * normalize_frame(tmp_frame['closeprice'], pre_close)
        tmp_frame.columns = ['barTime', stock]
        tmp_frame.set_index('barTime', inplace=True)
        dframe_list.append(tmp_frame)
    dframe = pd.concat(dframe_list, axis=1)

    dframe.reset_index(range(len(dframe)), inplace=True)
    return dframe
def clicksDataframe(clicks_data):
    clicks_dataframe = DataFrame(clicks_data, columns=['date', 'cardName', 'position', 'totalClicks', 'uniqueClicks'])
    clicks_dataframe = clicks_dataframe.apply(to_numeric, errors='ignore')
    clicks_dataframe.drop('date', axis=1, inplace=True)
    clicks_dataframe = clicks_dataframe.groupby(['cardName','position']).sum().sort_values(by='uniqueClicks',ascending=0)
    clicks_dataframe.reset_index(inplace=True)

    return clicks_dataframe
Example #5
0
 def _count_by_entity(data, var, entity, bornes):
     ''' Compte le nombre de 'var compris entre les 'bornes' au sein de l''entity' '''
     id = 'id' + entity
     qui = 'qui' + entity
     data.index = data[id]
     cond = (bornes[0] <= data[var]) & (data[var] <= bornes[1]) & (data[qui] > 1)
     col = DataFrame(data.loc[cond, :].groupby(id).size(), index = data.index).fillna(0)
     col.reset_index()
     return col
def nearestNeighborsSetup(filename, stateList):
  df_specimens = formatChecker(filename)
  print 'Getting the weather stations'
  with open('input/acis_station_ID.pickle') as f:
      weatherStationsMetaData = cPickle.load(f)

  # weatherStationsMetaData = weatherStations(stateList)
  # weatherStationsMetaData = read_csv('weatherStation/acis_station_ID.csv')
  df_stations = DataFrame.from_dict(weatherStationsMetaData, orient='index', dtype=None)
  '''Loads the lat/long coordinates of the specimens and weather stations into numpy arrays.
  NearestNeighborsResults() will return he number of K (nearest stations) with the index value.
  Then index will be replaced by the UID to match the ASIC data serve.'''
	#Number of points
  np1 = np.array(df_specimens['longitude']).size
  np2 = np.array(df_stations['longitude']).size

  #Search radius
  r = .25

  #Number of nearest stations returned
  k = 10

  d1 = np.empty((np1, 2))
  d2 = np.empty((np2, 2))
  d1[:, 0] = np.array(df_specimens['latitude'])
  d1[:, 1] = np.array(df_specimens['longitude'])

  d2[:, 0] = np.array(df_stations['latitude'])
  d2[:, 1] = np.array(df_stations['longitude'])
 
  result, distance = nearestNeighborsResults(d1.copy(), d2.copy(), r, k)
  columnindex = []
  closestStationList = [nearestNeighborsColumnString(x) for x in range(k)]
  for f in closestStationList: columnindex.append(f()),
  #temp variable for 0-N array
  t1 = np.arange(np2)
  #temp variable for 'uid' ID
  t2 = np.array(df_stations['uid'])
  df_results = DataFrame(result, columns=columnindex)
  #Creates a Pandas DataFrame
  uid_index = DataFrame({'0_closest_weather_station':  t1,
    'uid': t2})

  for index, column_name in enumerate(columnindex):
    temp = uid_index.rename(columns={'0_closest_weather_station': column_name, 'uid': column_name + "s"})
    df_results = df_results.reset_index().merge(temp, how='left', on= column_name, sort=False).sort('index')
    
    if index != 0:
      del df_results['level_0']

    del df_results[column_name]

  del df_results['index']
  df_results = df_results.reset_index()
  return concat([df_specimens, df_results], axis=1), distance, weatherStationsMetaData
Example #7
0
class ResetIndex:

    params = [None, 'US/Eastern']
    param_names = 'tz'

    def setup(self, tz):
        idx = date_range(start='1/1/2000', periods=1000, freq='H', tz=tz)
        self.df = DataFrame(np.random.randn(1000, 2), index=idx)

    def time_reest_datetimeindex(self, tz):
        self.df.reset_index()
Example #8
0
def append_village_areas(divname):
    im_vil = pd.read_csv('../data/%s_village_images.csv' % divname.lower())
    shape_helper = ShapeHelper('../data/shapefiles/fixed_village_shapefiles/%s/%s.shp' % (divname.lower(), divname.lower()),
                               lat_offset, lon_offset)
    areas = shape_helper.get_shape_areas('village')
    areas_df = DataFrame(areas, index=['area'])
    areas_df = areas_df.transpose()
    areas_df.reset_index(inplace=True)
    areas_df.rename(columns={'index': 'village'}, inplace=True)
    im_vil_areas = pd.merge(im_vil, areas_df, how='left')
    im_vil_areas.set_index('image', inplace=True)
    im_vil_areas.to_csv('../data/%s_village_areas_images.csv' % divname.lower())
def homePageToSubjectPageDataframe(data):
    subject_dataframe = DataFrame(data,columns=['date','page_title','views','uniqueViews'])
    subject_dataframe = subject_dataframe.apply(to_numeric, errors='ignore')
    subject_dataframe.drop('date', axis=1, inplace=True)
    subject_dataframe = subject_dataframe.groupby(['page_title']).sum().sort_values(by='uniqueViews',ascending=0)
    subject_dataframe.reset_index(inplace=True)
    subject_dataframe['subject'] = subject_dataframe['page_title'].apply(lambda title: strip_edx_page_title(title))
    subject_dataframe['totalViews'] = subject_dataframe['uniqueViews'].sum()
    subject_dataframe['Pct'] = (subject_dataframe['uniqueViews'] / subject_dataframe['totalViews'])
    subject_dataframe = subject_dataframe[(subject_dataframe['Pct']>0.0001)]

    return subject_dataframe[['subject','uniqueViews','Pct']]
Example #10
0
def sql2pandas(db_url, table_name, locriterion=None):
    """connects to database at db_url and converts psiturk datatable table_name
       to a pandas df.  Only includes trials that meet all criterion functions
       given in locriterion (default takes all trials)"""
    from sqlalchemy import MetaData, Table, create_engine
    from json import loads
    from pandas import DataFrame, concat

    data_column_name = 'datastring'
    # boilerplace sqlalchemy setup
    engine = create_engine(db_url)
    metadata = MetaData()
    metadata.bind = engine
    table = Table(table_name, metadata, autoload=True)
    # make a query and loop through
    s = table.select()
    tablerows = s.execute()

    # convert sql rows to lodicts, each containing a subject's full experiment
    # fields from orig datatable that you want attached to every trial
    expFields = ['uniqueid', 'assignmentid', 'workerid', 'hitid', 'status']
    expData = []
    for row in tablerows:
        try:
            subExpData = loads(row[data_column_name])
            for field in expFields:
                subExpData[field] = row[field]
            expData.append(subExpData)
        except:
            continue

    # turn from nested list to flat list of trials
    minidicts = []
    for subExpData in expData:
        for trial in subExpData['data']:
            trialdata = trial['trialdata']
            for field in expFields:
                trialdata[field] = subExpData[field]

            # check if trial valid if any criterion were passed
            includeThisTrial = True
            if locriterion:
                includeThisTrial = meetsCriterion(trialdata, locriterion)

            if includeThisTrial:
                minidicts.append(trialdata)

    # convert minidicts into dataframe!
    df = DataFrame(minidicts)
    # get rid of residue from minidfs
    df.reset_index(drop=True, inplace=True)
    return df
Example #11
0
class InfoTable(DataFrameWidget):
    def __init__(self, samples=None):
        self.initVars()
        super(InfoTable, self).__init__(self.table)

    def initVars(self):
        """Initialises variables."""
        self.columns = ["Plate ID", "Plate Name", "Plate Kea", "Well"]
        self.table = DataFrame(columns=self.columns)

    ########################################################################
    def update(self):
        plateID = self.table["Plate ID"]
        plateName = self.table["Plate Name"]
        plateKea = self.table["Plate Kea"]
        well = self.table["Well"]
        self.table = self.table.drop(labels=["Plate ID", "Plate Name", "Plate Kea", "Well"], axis=1)
        self.table.insert(0, "Plate ID", plateID)
        self.table.insert(1, "Plate Name", plateName)
        self.table.insert(2, "Plate Kea", plateKea)
        self.table.insert(3, "Well", well)
        self.setDataFrame(self.table)

    def append(self, appendage):
        self.table = self.table.append(appendage, ignore_index=True)
        self.update()

    def editPlates(self, edits):
        self.table = self.table.set_index("Plate ID")
        edits = edits.set_index("ID")
        self.table.update(edits)
        self.table = self.table.reset_index()

    def importPlateData(self, plateData, key):
        plateData = plateData.set_index(key)
        self.table = self.table.set_index(key)
        self.table.update(plateData)
        self.table = self.table.reset_index()

    def importSampleData(self, sampleData, tableKey, importKey):
        sampleData[tableKey] = sampleData[importKey]
        sampleData = sampleData.set_index(tableKey)
        self.table = self.table.set_index(tableKey)
        self.table = self.table.join(sampleData, rsuffix="_new")
        self.table = self.table.reset_index()

    def getKeaSexTestingData(self):
        table = self.table[["Plate ID", "Well", "Sample ID", "Plant Alt Names"]]
        table = table.set_index(["Plate ID", "Well"])
        table.rename(columns={"Plant Alt Names": "Plant AltName"}, inplace=True)
        return table
Example #12
0
    def _fill(self, df, year = None):
        """
        Takes age, sex profile (per capita transfers) found in df
        to fill year 'year' or all empty years if year is None
        This is a private method.
        Parameters
        ----------
        
        df : DataFrame
             a dataframe containing the profiles
        
        year : int, default None
               if None fill all the years else only the given year
        
        """        
        if not isinstance(df, DataFrame): 
            df = DataFrame(df)

        for col_name in df.columns:
            if col_name not in self._types:
                self.new_type(col_name)
                typ = col_name
                tmp = df[typ]
                tmp = tmp.unstack(level="year")
                tmp = tmp.dropna(axis=1, how="all")
                self._types_years[typ] = tmp.columns
                
            else:
                raise Exception("column already exists")
        
        if year is None:
            df_insert = df.reset_index(level='year', drop=True)
            years = sorted(self.index_sets['year'])
            list_df = [df_insert] * len(years)
            df_tot = concat(list_df, keys = years, names =['year'])
            df_tot = df_tot.reorder_levels(['age','sex','year'], axis=0)
            
        else:
            yr = year
            df_tot = None
            df_insert = df.reset_index()
            df_insert['year'] = yr
            if df_tot is None:
                df_tot = df_insert
            else:
                df_tot.append(df_insert, ignore_index=True)
                df_tot = df_tot.set_index(['age','sex','year'])
        
#         print df_tot
#         print len(df_tot)
        self.update(df_tot)
Example #13
0
    def _decode_solutions(self, solutions):
        decoded_solutions = DataFrame(columns=["targets", "fitness"])
        index = 0
        for solution in solutions:
            combinations = self._decoder(solution.candidate, flat=True, decompose=True)
            for targets in combinations:
                if len(targets) > 0:
                    decoded_solutions.loc[index] = [tuple(targets), solution.fitness]
                    index += 1

        decoded_solutions.drop_duplicates(inplace=True, subset="targets")
        decoded_solutions.reset_index(inplace=True)

        return decoded_solutions
Example #14
0
def get_cpu_sw_map(dfds, cap_time_usec, task_re):
    df_list = []
    dfsw_list = []
    for dfd in dfds:
        df = filter_df_core(dfd.df, task_re, True)
        # at this point we have a set of df that look like this:
        #         task_name  duration
        # 0     ASA.1.vcpu0      7954
        # 1     ASA.1.vcpu0      5475
        # 2     ASA.1.vcpu0      4151
        if df.empty:
            continue
        gb = df.groupby("task_name", as_index=False)

        # sum all duration for each task
        df = gb.aggregate(np.sum)
        if dfd.multiplier > 1.0:
            df["duration"] = (df["duration"] * dfd.multiplier).astype(int)
        df["percent"] = ((df["duration"] * 100 * 10) // cap_time_usec) / 10
        if len(dfds) > 1:
            df["task_name"] = df["task_name"] + "." + dfd.short_name
        df_list.append(df)

        # count number of rows with same task and cpu
        dfsw = DataFrame(gb.size())
        dfsw.reset_index(inplace=True)
        dfsw.rename(columns={0: "count"}, inplace=True)

        if dfd.multiplier > 1.0:
            dfsw["count"] = (dfsw["count"] * dfd.multiplier).astype(int)
        else:
            dfsw["count"] = dfsw["count"].astype(int)
        dfsw_list.append(dfsw)

    if not df_list:
        return None

    df = pandas.concat(df_list)
    df = df.drop("duration", axis=1)
    dfsw = pandas.concat(dfsw_list)
    df = pandas.merge(df, dfsw, on="task_name")
    # Result:
    #             task_name  percent  count
    # 0  ASA.01.vcpu0.1x218     72.0  1998
    # 1  ASA.01.vcpu0.2x208     61.8  2128
    # 2  ASA.02.vcpu0.2x208     58.9  2177

    # transform this into a dict where the key is the task_name and the value
    # is a list [percent, count]
    return df.set_index("task_name").T.to_dict("list")
Example #15
0
File: trade.py Project: iswdp/trade
def build_data(symbol_list, n = 15, flag = 1, blag = 10):
    train = DataFrame()
    test = DataFrame()
    for i in symbol_list:
        print i

        try:
            path = '45-165caps/' + i + '.csv'
            data = pd.read_csv(path)
            forward = forward_lag(data, i, flag)
            back = back_lag(data, i, blag)
            today_back = prediction_back_lag(data, i, blag)
            combined = combine_lags(forward, back)
            combined = combined.ix[combined['Forward Lag  1'] < .2,:].reset_index()
            del combined['index']

            #Train------------------------------------------------------------------
            random_sample = []
            for j in range(n):
                random_sample.append(random.randint(0,(len(combined) - 1)))
            data_slice = combined.ix[random_sample,:].reset_index()
            if len(train) == 0:
                train = data_slice
            else:
                train = pd.concat([train, data_slice], axis = 0)

            #Test-------------------------------------------------------------------
            data_slice = DataFrame(today_back.ix[len(today_back) - 1,:]).T

            if len(test) == 0:
                test = data_slice
            else:
                test = pd.concat([test, data_slice], axis = 0)
        except:
            print '\tSkipped'
            pass

    train = train.reset_index()
    del train['level_0']
    del train['index']

    test = test.reset_index()  
    del test['level_0']
    del test['index']

    combined.to_csv('combined1.csv', sep = ',', index = False)
    today_back.to_csv('today_back1.csv', sep = ',', index = False)

    return train, test
Example #16
0
    def _standardize_index(
            self, df_in: pd.DataFrame, symbol: str=None, datatype: str=None,
            barsize: str=None, tz: str=None):
        """Normalize input DataFrame index to MarketDataBlock standard.
        """
        # Add or starndardize index names in the input.
        if isinstance(df_in.index, pd.MultiIndex):
            df_in.reset_index(inplace=True)

        # Rename ambiguous column names.
        df_in.columns = [
            col_rename.get(col.strip().lower(), col.strip().lower())
            for col in df_in.columns]

        # Insert Symbol, DataType, Barsize columns from arguments if not
        # found in the input dataframe.
        for col in MarketDataBlock.data_index:
            if col not in df_in.columns:
                if locals().get(col.lower(), None) is None:
                    raise KeyError(
                        'No {0} argument and no {0} column in the DataFrame.'
                        .format(col))
                df_in.insert(0, col, locals()[col.lower()])

        # Convert datetime strings to pandas DatetimeIndex
        df_in['TickerTime'] = pd.DatetimeIndex(
            df_in['TickerTime'].apply(pd.Timestamp))

        # Standardize BarSize strings
        df_in['BarSize'] = df_in['BarSize'].map(timedur_standardize)

        # Set index to class-defined MultiIndex
        df_in.set_index(MarketDataBlock.data_index, inplace=True)

        # Set time zone so all DatetimeIndex are tz-aware
        df_in_tz = df_in.index.levels[self.__class__.dtlevel].tz
        if df_in_tz is None or isinstance(df_in_tz, timezone) or \
           isinstance(df_in_tz, pytz._FixedOffset):
            # Input df has naive time index, or tzinfo is not pytz.timezone()
            if tz is None:
                raise ValueError(
                    'Argument tz=None, and TickerTime.tzinfo is None(naive),'
                    'datetime.timezone, or pytz._FixedOffset.')
            if df_in_tz is None:
                df_in = df_in.tz_localize(tz, level=self.__class__.dtlevel)
            else:
                df_in = df_in.tz_convert(tz, level=self.__class__.dtlevel)

        return df_in
Example #17
0
def test_dti_reset_index_round_trip():
    dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D')
    d1 = DataFrame({'v': np.random.rand(len(dti))}, index=dti)
    d2 = d1.reset_index()
    assert d2.dtypes[0] == np.dtype('M8[ns]')
    d3 = d2.set_index('index')
    assert_frame_equal(d1, d3, check_names=False)

    # #2329
    stamp = datetime(2012, 11, 22)
    df = DataFrame([[stamp, 12.1]], columns=['Date', 'Value'])
    df = df.set_index('Date')

    assert df.index[0] == stamp
    assert df.reset_index()['Date'][0] == stamp
Example #18
0
 def test_frame_reset_index(self):
     dr = date_range('2012-06-02', periods=10, tz='US/Eastern')
     df = DataFrame(np.random.randn(len(dr)), dr)
     roundtripped = df.reset_index().set_index('index')
     xp = df.index.tz
     rs = roundtripped.index.tz
     self.assertEquals(xp, rs)
    def test_drop_multiindex_not_lexsorted(self):
        # GH 11640

        # define the lexsorted version
        lexsorted_mi = MultiIndex.from_tuples(
            [('a', ''), ('b1', 'c1'), ('b2', 'c2')], names=['b', 'c'])
        lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi)
        self.assertTrue(lexsorted_df.columns.is_lexsorted())

        # define the non-lexsorted version
        not_lexsorted_df = DataFrame(columns=['a', 'b', 'c', 'd'],
                                     data=[[1, 'b1', 'c1', 3],
                                           [1, 'b2', 'c2', 4]])
        not_lexsorted_df = not_lexsorted_df.pivot_table(
            index='a', columns=['b', 'c'], values='d')
        not_lexsorted_df = not_lexsorted_df.reset_index()
        self.assertFalse(not_lexsorted_df.columns.is_lexsorted())

        # compare the results
        tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)

        expected = lexsorted_df.drop('a', axis=1)
        with tm.assert_produces_warning(PerformanceWarning):
            result = not_lexsorted_df.drop('a', axis=1)

        tm.assert_frame_equal(result, expected)
 def test_frame_reset_index(self):
     dr = date_range("2012-06-02", periods=10, tz=self.tzstr("US/Eastern"))
     df = DataFrame(np.random.randn(len(dr)), dr)
     roundtripped = df.reset_index().set_index("index")
     xp = df.index.tz
     rs = roundtripped.index.tz
     self.assertEqual(xp, rs)
Example #21
0
 def test_delevel_infer_dtype(self):
     tuples = [tuple for tuple in cart_product(["foo", "bar"], [10, 20], [1.0, 1.1])]
     index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"])
     df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index)
     deleveled = df.reset_index()
     self.assert_(com.is_integer_dtype(deleveled["prm1"]))
     self.assert_(com.is_float_dtype(deleveled["prm2"]))
Example #22
0
    def test_infer_objects(self):
        # GH 11221
        df = DataFrame({'a': ['a', 1, 2, 3],
                        'b': ['b', 2.0, 3.0, 4.1],
                        'c': ['c', datetime(2016, 1, 1),
                              datetime(2016, 1, 2),
                              datetime(2016, 1, 3)],
                        'd': [1, 2, 3, 'd']},
                       columns=['a', 'b', 'c', 'd'])
        df = df.iloc[1:].infer_objects()

        assert df['a'].dtype == 'int64'
        assert df['b'].dtype == 'float64'
        assert df['c'].dtype == 'M8[ns]'
        assert df['d'].dtype == 'object'

        expected = DataFrame({'a': [1, 2, 3],
                              'b': [2.0, 3.0, 4.1],
                              'c': [datetime(2016, 1, 1),
                                    datetime(2016, 1, 2),
                                    datetime(2016, 1, 3)],
                              'd': [2, 3, 'd']},
                             columns=['a', 'b', 'c', 'd'])
        # reconstruct frame to verify inference is same
        tm.assert_frame_equal(df.reset_index(drop=True), expected)
Example #23
0
def process_matebook_data(directory, paramlist, storage_location):
    vidname = parse_screen_filename(directory)
    for filename in find_files(directory, 'track.tsv'):
        vidpath, flyID = parse_filename(filename)
        tag = vidname + "_" + flyID
        if not os.path.exists(storage_location + '/' + tag + '_arena.pickle'):
            fi = pd.read_table(filename, sep='\t', header = [0,1], skiprows=[2,3])
            tempdf = DataFrame(index = fi.index)
            if fi['Unnamed: 8_level_0', 'isMissegmented'].mean() >= 0.2:
                print "arena dropped for poor quality: ", tag
                continue
            elif fi['Unnamed: 8_level_0', 'isMissegmented'].mean() == 0.0:
                print "arena dropped because quality = 1: ", tag
                continue
            elif len(set(fi['Unnamed: 3_level_0', 'courtship'])) <=1:
                print "arena dropped because courtship = nan: ", tag
                continue
            else:
                for j in paramlist:
                    tempdf[j[1]] = fi[j[0],j[1]]
                    if 'movedAbs_u' in j:
                        tempdf[j[1]] = tempdf[j[1]] * FPS
            tempdf['Time'] = tempdf.index/FPS
            time_ID = vidpath.split('_',1)[-1].split('.',1)[0]
            tempdf = merge_jvision_data(tempdf.reset_index(), time_ID)
            tempdf.to_pickle(storage_location + '/'+ tag + '_arena.pickle')
            print ".....", tag, " processed to pickling."
    return 
Example #24
0
 def test_dti_reset_index_round_trip(self):
     dti = DatetimeIndex(start="1/1/2001", end="6/1/2001", freq="D")
     d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti)
     d2 = d1.reset_index()
     self.assert_(d2.dtypes[0] == np.datetime64)
     d3 = d2.set_index("index")
     assert_frame_equal(d1, d3)
Example #25
0
 def test_dti_reset_index_round_trip(self):
     dti = DatetimeIndex(start='1/1/2001', end='6/1/2001', freq='D')
     d1 = DataFrame({'v' : np.random.rand(len(dti))}, index=dti)
     d2 = d1.reset_index()
     self.assert_(d2.dtypes[0] == np.datetime64)
     d3 = d2.set_index('index')
     assert_frame_equal(d1, d3)
Example #26
0
 def test_frame_reset_index(self, tz):
     dr = date_range('2012-06-02', periods=10, tz=tz)
     df = DataFrame(np.random.randn(len(dr)), dr)
     roundtripped = df.reset_index().set_index('index')
     xp = df.index.tz
     rs = roundtripped.index.tz
     assert xp == rs
def get_travel_times(df):
    df = df[df['section'] != 0]
    g = df['time'].groupby([df['veh_id'], df['section']])
    res = DataFrame([g.max() - g.min(), g.min()]).T
    res.columns = ['tt', 'time']
    res = res.reset_index()
    return res
Example #28
0
    def test_set_reset_index(self):

        df = DataFrame({'A': range(10)})
        s = pd.cut(df.A, 5)
        df['B'] = s
        df = df.set_index('B')

        df = df.reset_index()
def trim_index_df(df: pd.DataFrame, index_names_to_keep: list, inplace=False):
    '''Drops all indexes except for specified index names.'''
    
    indexes_to_drop = list(df.index.names)
    try:
        indexes_to_drop.remove(index_names_to_keep)
    except ValueError:
        try:
            for idxn in index_names_to_keep:
                indexes_to_drop.remove(idxn)
        except ValueError:
            pass
    
    if inplace:
        df.reset_index(level=indexes_to_drop, drop=True, inplace=True)
    else:
        return df.reset_index(level=indexes_to_drop, drop=True)
Example #30
0
def seriesPosPrc_fitTimeFrame(dfList, PosPrc, ts, PosDirList):
	dataList = []
	for i in range(len(dfList)):
		tf = DataFrame(index = ts)
		df = dfList[i]
		tf[PosPrc] = df[PosPrc]
		tf = tf.fillna(-99999)
		tf.reset_index(inplace = True)
		tf['PosDir'] = PosDirList[i]
		for j in range(len(tf)):
			if tf.ix[j, PosPrc] == -99999:
				if j == 0: tf.ix[j, PosPrc] = 0
				elif tf.ix[j-1, 'PosDir'] != 0:
					tf.ix[j, PosPrc] = tf.ix[j-1, PosPrc]
				else:
					tf.ix[j, PosPrc] = 0
		dataList.append(np.asarray(tf[PosPrc]))
	return dataList