コード例 #1
0
ファイル: tests_pandas.py プロジェクト: CrazyPython/tqdm
def test_pandas_groupby_apply():
    """ Test pandas.DataFrame.groupby(...).progress_apply """
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        dfs = pd.DataFrame(randint(0, 50, (500, 3)),
                           columns=list('abc'))
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True))
        df.groupby(0).progress_apply(lambda x: None)
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True))
        dfs.groupby(['a']).progress_apply(lambda x: None)

        our_file.seek(0)

        # don't expect final output since no `leave` and
        # high dynamic `miniters`
        nexres = '100%|##########|'
        if nexres in our_file.read():
            our_file.seek(0)
            raise AssertionError("\nDid not expect:\n{0}\nIn:{1}\n".format(
                nexres, our_file.read()))
コード例 #2
0
def run_bootstrap_per_ASV(input_file):
    print(f'Reading in ASV file - {input_file} and running with {number_of_experiments} simulations')
    df = pd.read_csv(input_file)
    tqdm_pandas(tqdm())
    results = df.progress_apply(process_row, axis=1)
    joined_results = df.merge(results, how='left', on=['ID', 'Site'])
    joined_results.to_csv(f'./data/ASV_cultivation_bootstrapped_numbers_max_viability_{number_of_experiments}_simulations.joined.csv', index=False)
コード例 #3
0
def test_pandas_groupby_apply():
    """ Test pandas.DataFrame.groupby(...).progress_apply """
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        dfs = pd.DataFrame(randint(0, 50, (500, 3)), columns=list('abc'))
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True))
        df.groupby(0).progress_apply(lambda x: None)
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True))
        dfs.groupby(['a']).progress_apply(lambda x: None)

        our_file.seek(0)

        # don't expect final output since no `leave` and
        # high dynamic `miniters`
        nexres = '100%|##########|'
        if nexres in our_file.read():
            our_file.seek(0)
            raise AssertionError("\nDid not expect:\n{0}\nIn:{1}\n".format(
                nexres, our_file.read()))
コード例 #4
0
def loadandbuildindex2():
    global default
    nums = 1000

    df_artists, df_en_songs = load_data()
    df_en_songs = df_en_songs.iloc[:nums, :]

    # -- after load we should create corpus Documents for search system -- #
    # -- preprocessed data -- #
    tqdm_pandas(tqdm())

    clspreproccessed = ClassPreprocessed(text_column='Lyric',
                                         DRAREFREQ_words=True)
    df_new_songs = clspreproccessed.preprocessed(df_en_songs)
    # -- save -- #
    df_new_songs.to_csv('preprocessed_data.csv')

    print(df_new_songs['Lyric'][0])
    print('Most common words:\n')
    print(FREQWORDS)
    print('Most rare words:\n')
    print(RAREWORDS)

    # -- we should preprocessed title of songs -- #
    clspreproccessed2 = ClassPreprocessed(text_column='SName',
                                          DRAREFREQ_words=False,
                                          stopwords=False,
                                          lemmatize=False)
    df_new_songs = clspreproccessed2.preprocessed(df_new_songs, verbose=1)
    df_new_songs.to_csv('preprocessed_data.csv')
    try:
        index = np.random.choice(df_new_songs.index, size=1)[0]
        print('before: \n')
        print(df_en_songs.loc[index, ['Lyric', 'SName']].values)
        print('after: \n')
        print(df_new_songs.loc[index, ['Lyric', 'SName']].values)
    except:
        print('Choice works bad!')
    print('all Done!')
    # -- create invert index -- #
    # -- We have alseo information about Artist, Popularity, Genre of each music -- #
    # -- We can use it for getting more actual results -- #
    # -- We should use name artist and Genre and Popularity -- #
    # -- merge df_artist and df_songs -- #
    merge = pd.merge(df_new_songs,
                     df_artists,
                     how='inner',
                     left_on='ALink',
                     right_on='Link')
    # -- У нас есть ещё жанр Genre и Artist_name -- #
    # -- Мы должны этим воспользоваться -- #
    # -- Сгруппировать песни по Жанру и Артисту -- #
    # -- Тоже сделать что-то вроде базы данных, которая содержит список песен для каждого отдельного взятого певца -- #
    createDataInvertIndex(merge)
    build_index2(merge, new_columns=True)
    # -- Заполняем наш словарь отображений слов -- #
    load_embedding_WV()
    # -- По умолчанию вектор -- #
    default = sum(dictionary_fastWV.values()) / len(dictionary_fastWV)
コード例 #5
0
    def flatten_images_directory(self):
        tqdm_pandas(tqdm())
        logger.INFO("Flattening images data")
        self.build_all_images_dir()

        self.ref_dataset.progress_apply(
            lambda x: copyfile(x['origin_path'], x['destination_path']),
            axis=1)
コード例 #6
0
def calculate_featureset3(dataframe):
    # Word Mover's Distance
    tqdm_pandas(tqdm())
    dataframe['wmd'] = dataframe.progress_apply(
        lambda x: calc_wordmoversdist(x['question1'], x['question2']), axis=1)
    dataframe['norm_wmd'] = dataframe.progress_apply(
        lambda x: calc_norm_wordmover(x['question1'], x['question2']), axis=1)
    return dataframe
コード例 #7
0
def estimate_viability_range_for_underrepresented_taxa():
    print('*** Calculating maximum viability to explain observed number of wells ***')

    df = pd.read_csv(f'./data/ASV_cultivation_bootstrapped_numbers_max_viability_{number_of_experiments}_simulations.joined.csv')
    lower_than_expected_df = df[df.deviance <0]
    tqdm_pandas(tqdm())
    results = lower_than_expected_df.progress_apply(process_viability, axis=1)
    results.to_csv(f'./data/estimate_viability_range_for_underrepresented_taxa_{number_of_experiments}_simulations.csv', index=False)
コード例 #8
0
def test_pandas_apply_args_deprecation():
    """Test warning info in
    `pandas.Dataframe(Series).progress_apply(func, *args)`"""
    try:
        from tqdm import tqdm_pandas
    except ImportError as err:
        skip(str(err))

    with closing(StringIO()) as our_file:
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True, ncols=20))
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        df.progress_apply(lambda x: None, 1)  # 1 shall cause a warning
        # Check deprecation message
        res = our_file.getvalue()
        assert all([i in res for i in (
            "TqdmDeprecationWarning", "not supported",
            "keyword arguments instead")])
コード例 #9
0
def test_pandas_apply_args_deprecation():
    """Test warning info in
    `pandas.Dataframe(Series).progress_apply(func, *args)`"""
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except ImportError:
        raise SkipTest

    with closing(StringIO()) as our_file:
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True, ncols=20))
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        df.progress_apply(lambda x: None, 1)  # 1 shall cause a warning
        # Check deprecation message
        res = our_file.getvalue()
        assert all([i in res for i in (
            "TqdmDeprecationWarning", "not supported",
            "keyword arguments instead")])
コード例 #10
0
def test_pandas_apply():
    """ Test pandas.DataFrame.progress_apply """
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 100, (1000, 6)))
        tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True))
        df.progress_apply(lambda x: None)

        our_file.seek(0)

        if '/6' not in our_file.read():
            our_file.seek(0)
            raise AssertionError("\nExpected:\n{0}\nIn:{1}\n".format(
                '/6', our_file.read()))
コード例 #11
0
ファイル: tests_pandas.py プロジェクト: damaainan/tqdm
def test_pandas():
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 100, (1000, 6)))
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True))
        df.groupby(0).progress_apply(lambda x: None)

        our_file.seek(0)

        try:
            # don't expect final output since no `leave` and
            # high dynamic `miniters`
            assert '100%|##########| 101/101' not in our_file.read()
        except:
            raise AssertionError('Did not expect:\n\t100%|##########| 101/101')
コード例 #12
0
def test_pandas_leave():
    """ Test pandas with `leave=True` """
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 100, (1000, 6)))
        tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True))
        df.groupby(0).progress_apply(lambda x: None)

        our_file.seek(0)

        exres = '100%|##########| 101/101'
        if exres not in our_file.read():
            our_file.seek(0)
            raise AssertionError("\nExpected:\n{0}\nIn:{1}\n".format(
                exres, our_file.read()))
コード例 #13
0
ファイル: tests_pandas.py プロジェクト: CrazyPython/tqdm
def test_pandas_leave():
    """ Test pandas with `leave=True` """
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 100, (1000, 6)))
        tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True))
        df.groupby(0).progress_apply(lambda x: None)

        our_file.seek(0)

        exres = '100%|##########| 101/101'
        if exres not in our_file.read():
            our_file.seek(0)
            raise AssertionError("\nExpected:\n{0}\nIn:{1}\n".format(
                exres, our_file.read()))
コード例 #14
0
def test_pandas():
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 100, (1000, 6)))
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True))
        df.groupby(0).progress_apply(lambda x: None)

        our_file.seek(0)

        try:
            # don't expect final output since no `leave` and
            # high dynamic `miniters`
            assert '100%|##########| 101/101' not in our_file.read()
        except:
            raise AssertionError('Did not expect:\n\t100%|##########| 101/101')
コード例 #15
0
def test_pandas_deprecation():
    """Test bar object instance as argument deprecation"""
    try:
        from tqdm import tqdm_pandas
    except ImportError as err:
        skip(str(err))

    with closing(StringIO()) as our_file:
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True, ncols=20))
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        df.groupby(0).progress_apply(lambda x: None)
        # Check deprecation message
        assert "TqdmDeprecationWarning" in our_file.getvalue()
        assert "instead of `tqdm_pandas(tqdm(...))`" in our_file.getvalue()

    with closing(StringIO()) as our_file:
        tqdm_pandas(tqdm, file=our_file, leave=False, ascii=True, ncols=20)
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        df.groupby(0).progress_apply(lambda x: None)
        # Check deprecation message
        assert "TqdmDeprecationWarning" in our_file.getvalue()
        assert "instead of `tqdm_pandas(tqdm, ...)`" in our_file.getvalue()
コード例 #16
0
ファイル: tests_pandas.py プロジェクト: damaainan/tqdm
def test_pandas_leave():
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 100, (1000, 6)))
        tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True))
        df.groupby(0).progress_apply(lambda x: None)

        our_file.seek(0)

        try:
            assert '100%|##########| 101/101' in our_file.read()
        except:
            our_file.seek(0)
            raise AssertionError('\n'.join(('Expected:',
                                            '100%|##########| 101/101', 'Got:',
                                            our_file.read())))
コード例 #17
0
def test_pandas_leave():
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 100, (1000, 6)))
        tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True))
        df.groupby(0).progress_apply(lambda x: None)

        our_file.seek(0)

        try:
            assert '100%|##########| 101/101' in our_file.read()
        except:
            our_file.seek(0)
            raise AssertionError('\n'.join(
                ('Expected:', '100%|##########| 101/101', 'Got:',
                 our_file.read())))
コード例 #18
0
def test_pandas_apply():
    """ Test pandas.DataFrame[.series].progress_apply """
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        dfs = pd.DataFrame(randint(0, 50, (500, 3)), columns=list('abc'))
        tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True))
        df.progress_apply(lambda x: None)
        tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True))
        dfs.a.progress_apply(lambda x: None)

        our_file.seek(0)

        if our_file.read().count('100%') < 2:
            our_file.seek(0)
            raise AssertionError("\nExpected:\n{0}\nIn:{1}\n".format(
                '100% at least twice', our_file.read()))
コード例 #19
0
ファイル: tests_pandas.py プロジェクト: CrazyPython/tqdm
def test_pandas_apply():
    """ Test pandas.DataFrame[.series].progress_apply """
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except:
        raise SkipTest

    with closing(StringIO()) as our_file:
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        dfs = pd.DataFrame(randint(0, 50, (500, 3)),
                           columns=list('abc'))
        tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True))
        df.progress_apply(lambda x: None)
        tqdm_pandas(tqdm(file=our_file, leave=True, ascii=True))
        dfs.a.progress_apply(lambda x: None)

        our_file.seek(0)

        if our_file.read().count('100%') < 2:
            our_file.seek(0)
            raise AssertionError("\nExpected:\n{0}\nIn:{1}\n".format(
                '100% at least twice', our_file.read()))
コード例 #20
0
def test_pandas_deprecation():
    """Test bar object instance as argument deprecation"""
    try:
        from numpy.random import randint
        from tqdm import tqdm_pandas
        import pandas as pd
    except ImportError:
        raise SkipTest

    with closing(StringIO()) as our_file:
        tqdm_pandas(tqdm(file=our_file, leave=False, ascii=True, ncols=20))
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        df.groupby(0).progress_apply(lambda x: None)
        # Check deprecation message
        assert "TqdmDeprecationWarning" in our_file.getvalue()
        assert "instead of `tqdm_pandas(tqdm(...))`" in our_file.getvalue()

    with closing(StringIO()) as our_file:
        tqdm_pandas(tqdm, file=our_file, leave=False, ascii=True, ncols=20)
        df = pd.DataFrame(randint(0, 50, (500, 3)))
        df.groupby(0).progress_apply(lambda x: None)
        # Check deprecation message
        assert "TqdmDeprecationWarning" in our_file.getvalue()
        assert "instead of `tqdm_pandas(tqdm, ...)`" in our_file.getvalue()
コード例 #21
0
start_close_time['start_time'] = map(int,
                                     start_close_time['start_time'] / 1000)
start_close_time['close_time'] = map(int,
                                     start_close_time['close_time'] / 1000)

unique_app_name = np.unique(start_close_time['app_name'])
dict_label = dict(
    zip(list(unique_app_name), list(np.arange(0, len(unique_app_name), 1))))
import time
start_close_time['app_name'] = start_close_time['app_name'].apply(
    lambda row: str(dict_label[row]))

del start_close_time['start_time'], start_close_time['close_time']

from tqdm import tqdm, tqdm_pandas
tqdm_pandas(tqdm())


def dealed_row(row):
    app_name_list = list(row['app_name'])
    return ' '.join(app_name_list)


data_feature = start_close_time.groupby('id').progress_apply(
    lambda row: dealed_row(row)).reset_index()
data_feature = pd.merge(data_all, data_feature, on='id', how='left')
del data_feature['id']

count_vec = CountVectorizer()
count_csr_basic = count_vec.fit_transform(data_feature[0])
tfidf_vec = TfidfVectorizer()
コード例 #22
0
test_path = 'D:/data_science/kaggle_sound_classification/audio_test/'

#TQDM build
def tqdm_pandas(t):
  from pandas.core.frame import Series
  def inner(series, func, *args, **kwargs):
      t.total = series.size
      def wrapper(*args, **kwargs):
          t.update(1)
          return func(*args, **kwargs)
      result = series.apply(wrapper, *args, **kwargs)
      t.close()
      return result
  Series.progress_apply = inner

tqdm_pandas(tqdm_notebook())
tqdm.pandas(desc="my bar!")

#Feature engineering
SAMPLE_RATE = 22050
from scipy.stats import skew
print(os.listdir(os.getcwd()))    
tqdm.pandas
train_files = os.listdir(train_path)
test_files = os.listdir(test_path)
train_files = glob(train_path + '*.wav')
test_files = glob(test_path + '*.wav')
SAMPLE_RATE = 22050
def get_feature(fname):
    #b,_ = librosa.load(fname, res_type = 'kaiser_fast')
    b,_ = librosa.load(fname, res_type = 'kaiser_fast')
コード例 #23
0
import pandas as pd
from src.tools.tokenizer import tokenize, is_chinese, combine
import os
from os.path import join
from start_experiment import MANIFEST_FOLDER
from tqdm import tqdm, tqdm_pandas
from collections import Counter
import sentencepiece as spm
from start_experiment import MANIFEST_FOLDER
from tqdm import tqdm
tqdm_pandas(tqdm)
CORPUS = 'data/corpus.txt'
ENG_CORPUS = 'data/eng_corpus.txt'

TRAIN_MANIFEST_LIST_FORCORPUS_CH = [
    'aidatatang_200zh.csv', 'AISHELL-2.csv', 'c_500.csv', 'ce_200.csv',
    'data_aishell.csv', 'magic_data_train.csv', 'magic_data_dev.csv',
    'magic_data_test.csv', 'stcmds.csv', 'prime.csv'
]
TRAIN_MANIFEST_LIST_FORCORPUS_CH = [
    join(MANIFEST_FOLDER, i) for i in TRAIN_MANIFEST_LIST_FORCORPUS_CH
]

TRAIN_MANIFEST_LIST_FORCORPUS_EN = [
    'libri_100.csv', 'libri_360.csv', 'libri_500.csv'
]
TRAIN_MANIFEST_LIST_FORCORPUS_EN = [
    join(MANIFEST_FOLDER, i) for i in TRAIN_MANIFEST_LIST_FORCORPUS_EN
]

DEV_MANIFEST_LIST_FORCORPUS = [join(MANIFEST_FOLDER, 'ce_20_dev.csv')]
コード例 #24
0
test_df['fuzz_token_set_ratio'] = test_dd.apply(
    lambda x: fuzz.token_set_ratio(str(x['question1']), str(x['question2'])),
    axis=1,
    meta=('a', np.dtype('int64'))).compute(get=dask.multiprocessing.get)
test_df['fuzz_token_sort_ratio'] = test_dd.apply(
    lambda x: fuzz.token_sort_ratio(str(x['question1']), str(x['question2'])),
    axis=1,
    meta=('a', np.dtype('int64'))).compute(get=dask.multiprocessing.get)
print((time.time() - start_time))
del test_dd
###############################################################################

model = gensim.models.KeyedVectors.load_word2vec_format(
    '../../input/GoogleNews-vectors-negative300.bin.gz', binary=True)

tqdm_pandas(tqdm(desc="Train wmd:", total=len(train_df)))
train_df['wmd'] = train_df.progress_apply(
    lambda x: wmd(x['question1'], x['question2']), axis=1)
tqdm_pandas(tqdm(desc="Test wmd:", total=len(test_df)))
test_df['wmd'] = test_df.progress_apply(
    lambda x: wmd(x['question1'], x['question2']), axis=1)
del model

norm_model = gensim.models.KeyedVectors.load_word2vec_format(
    '../../input/GoogleNews-vectors-negative300.bin.gz', binary=True)
norm_model.init_sims(replace=True)
tqdm_pandas(tqdm(desc="Train norm wmd:", total=len(train_df)))
train_df['norm_wmd'] = train_df.progress_apply(
    lambda x: norm_wmd(x['question1'], x['question2']), axis=1)
tqdm_pandas(tqdm(desc="Test norm wmd:", total=len(test_df)))
test_df['norm_wmd'] = test_df.progress_apply(
コード例 #25
0
import pandas as pd
import numpy as np
from tqdm import tqdm, tqdm_pandas

df = pd.DataFrame(np.random.randint(0, 100, (100000, 6)))

# Create and register a new `tqdm` instance with `pandas`
# (can use tqdm_gui, optional kwargs, etc.)
tqdm_pandas(tqdm())

# Now you can use `progress_apply` instead of `apply`
df.progress_apply(lambda x: x**2)
# can also groupby:
# df.groupby(0).progress_apply(lambda x: x**2)


""" Source code for `tqdm_pandas` (really simple!) """
# def tqdm_pandas(t):
#   from pandas.core.frame import DataFrame
#   def inner(df, func, *args, **kwargs):
#       t.total = groups.size // len(groups)
#       def wrapper(*args, **kwargs):
#           t.update(1)
#           return func(*args, **kwargs)
#       result = df.apply(wrapper, *args, **kwargs)
#       t.close()
#       return result
#   DataFrame.progress_apply = inner
コード例 #26
0
ファイル: ratings.py プロジェクト: jotinha/diy-rex
import numpy as np
import pandas
import tqdm
from typing import Callable

from diyrex.cache import cacher

tqdm.tqdm_pandas(tqdm.tqdm)  # progress bars for groupby


def compute_implicit_ratings_func(signals: pandas.DataFrame,
                                  agg_func: Callable):
    ratings = signals.groupby(['user', 'item']).progress_apply(agg_func)

    # normalize
    ratings /= ratings.max()

    return ratings.to_frame(name='rating').reset_index()


def compute_implicit_ratings_1(signals):
    "rating is 1 if any interaction happened"
    return compute_implicit_ratings_func(signals, lambda g: 1)


def compute_implicit_ratings_2(signals: pandas.DataFrame):
    "rating is the number of days with interactions"
    return compute_implicit_ratings_func(signals,
                                         lambda g: g['date'].dt.date.nunique())

コード例 #27
0
import numpy as np
import pandas as pd
import tqdm
import pickle

progress_bar = tqdm.tqdm()
tqdm.tqdm_pandas(progress_bar)
#load data
from gym_splendor_code.envs.mechanics.abstract_observation import DeterministicObservation
from nn_models.utils.vectorizer import Vectorizer

raw_data_small = pd.read_pickle(
    '/home/tomasz/ML_Research/splendor/gym_open_ai-splendor/training_data/half_merged.pi'
)
#
vectorizer = Vectorizer()


def obs_to_state(obs: DeterministicObservation):
    return obs.recreate_state()


series_of_states = raw_data_small['observation'].progress_map(obs_to_state)

X_list = series_of_states.tolist()
n = len(X_list)
m = n - n % 5
X_list = X_list[0:m]
Y = np.array(raw_data_small['value'].tolist()[0:m]).reshape(m, 1)

indices = [i for i in range(len(Y)) if abs(Y[i]) == 1]