Example #1
0
    def test_answerPrototype6(self):
        # AnswerType.SIX
        dataset = cfdg.ohlcv().reset_index()
        exp = datetime.utcfromtimestamp(
            dataset[-1:]['index'].values[0].astype(datetime) / 1000000000)
        competition = CompetitionSpec(title='',
                                      type=CompetitionType.PREDICT,
                                      expiration=datetime.now() +
                                      timedelta(minutes=1),
                                      prize=1.0,
                                      dataset=dataset.iloc[:-1],
                                      metric=CompetitionMetric.ABSDIFF,
                                      targets=dataset.columns[-1],
                                      answer=dataset.iloc[-1:],
                                      dataset_key='index',
                                      when=exp)

        df = answerPrototype(competition)[['when', dataset.columns[-1]]]
        ans = pd.DataFrame([{
            'when': exp,
            dataset.columns[-1]: np.nan
        } for x in dataset['index'].iloc[:-1]],
                           index=[x for x in dataset['index'].iloc[:-1]
                                  ])[['when', dataset.columns[-1]]]
        print(df)
        print(ans)
        assert df.equals(ans)
Example #2
0
    def test_answerPrototype62(self):
        # AnswerType.SIX
        exp = datetime.now() + timedelta(minutes=2)
        competition = CompetitionSpec(
            title='',
            type=CompetitionType.PREDICT,
            when=exp,
            prize=1.0,
            dataset='https://feeds.citibikenyc.com/stations/stations.json',
            dataset_type=DatasetFormat.JSON,
            dataset_key='id',
            dataset_kwargs={'record_column': 'stationBeanList'},
            metric=CompetitionMetric.ABSDIFF,
            targets=['availableBikes', 'availableDocks'],
            expiration=exp)

        dataset = fetchDataset(competition)
        df = answerPrototype(competition)[[
            'when', 'availableBikes', 'availableDocks'
        ]]
        ans = pd.DataFrame([{
            'when': exp,
            'availableBikes': np.nan,
            'availableDocks': np.nan
        } for x in dataset.id],
                           index=[id for id in dataset.id])[[
                               'when', 'availableBikes', 'availableDocks'
                           ]]
        print(df)
        print(ans)
        assert df.equals(ans)
Example #3
0
def answerPredict1(competitionSpec, *args, **kwargs):
    from crowdsource.types.utils import answerPrototype, fetchDataset
    from sklearn import linear_model

    data = fetchDataset(competitionSpec)

    ans = answerPrototype(competitionSpec, data)
    when = competitionSpec.when

    if not when or competitionSpec.dataset_key:
        ''' TS prediction'''
        return

    for col in ans.columns:
        if col == competitionSpec.dataset_key:
            continue
        reg = linear_model.LinearRegression()
        x = data[col].index.astype(int).values.reshape(len(data[col].index), 1)
        y = data[col].values.reshape(len(data[col]), 1)
        reg.fit(x, y)

        ans.loc[when, col] = reg.predict([[
            when.timestamp() if hasattr(when, 'timestamp') else float(
                (time.mktime(when.timetuple()) + when.microsecond / 1000000.0))
        ]])

    return ans
Example #4
0
def answerPredictCitibike(competitionSpec, *args, **kwargs):
    from random import randint

    from crowdsource.types.utils import answerPrototype, fetchDataset

    dataset = fetchDataset(competitionSpec)
    answer = answerPrototype(competitionSpec, dataset)

    when = competitionSpec.when

    if not when or not competitionSpec.dataset_key:
        """TS prediction"""
        return

    for i in answer.index:
        for col in answer.columns:
            if col == "when":
                continue

            ran = randint(0, 10)
            answer.loc[i, col] = (
                dataset[dataset[competitionSpec.dataset_key] == i][col].iloc[0]
                + ran)

    return answer
Example #5
0
    def test_answerPrototype62(self):
        # AnswerType.SIX
        exp = datetime.now() + timedelta(minutes=2)
        competition = CompetitionSpec(
            title="",
            type=CompetitionType.PREDICT,
            when=exp,
            prize=1.0,
            dataset="https://feeds.citibikenyc.com/stations/stations.json",
            dataset_type=DatasetFormat.JSON,
            dataset_key="id",
            dataset_kwargs={"record_column": "stationBeanList"},
            metric=CompetitionMetric.ABSDIFF,
            targets=["availableBikes", "availableDocks"],
            expiration=exp,
        )

        dataset = fetchDataset(competition)
        df = answerPrototype(competition)[["when", "availableBikes", "availableDocks"]]
        ans = pd.DataFrame(
            [
                {"when": exp, "availableBikes": np.nan, "availableDocks": np.nan}
                for x in dataset.id
            ],
            index=[id for id in dataset.id],
        )[["when", "availableBikes", "availableDocks"]]
        print(df)
        print(ans)
        assert df.equals(ans)
Example #6
0
def foo5(competitionSpec, *args, **kwargs):
    if isinstance(competitionSpec.dataset, six.string_types):
        dataset = fetchDataset(competitionSpec)
    else:
        return

    answer = answerPrototype(competitionSpec, dataset)
    return answer.fillna(0)
Example #7
0
 def test_answerPrototype5(self):
     # AnswerType.FIVE
     exp = datetime.now() + timedelta(minutes=2)
     competition = CompetitionSpec(title='',
                                   type=CompetitionType.PREDICT,
                                   expiration=exp,
                                   prize=1.0,
                                   dataset='http://bonds.paine.nyc',
                                   dataset_type=DatasetFormat.JSON,
                                   metric=CompetitionMetric.ABSDIFF,
                                   targets={0: ['Price']})
     df = answerPrototype(competition)
     ans = pd.DataFrame([{'Price': np.nan}])
     print(df)
     print(ans)
     assert df.equals(ans)
Example #8
0
    def test_answerPrototype1(self):
        # AnswerType.ONE
        dataset = make_classification()
        competition = CompetitionSpec(title='',
                                      type=CompetitionType.CLASSIFY,
                                      expiration=datetime.now() +
                                      timedelta(minutes=1),
                                      prize=1.0,
                                      num_classes=2,
                                      dataset=pd.DataFrame(dataset[0]),
                                      metric=CompetitionMetric.LOGLOSS,
                                      answer=pd.DataFrame(dataset[1]))

        dataset = fetchDataset(competition)
        df = answerPrototype(competition)
        ans = pd.DataFrame([{'class': np.nan} for x in dataset.index])
        print(df)
        print(ans)
        assert df.equals(ans)
Example #9
0
def answerPredictCorporateBonds(competitionSpec, *args, **kwargs):
    from random import normalvariate
    from crowdsource.types.utils import answerPrototype, fetchDataset

    dataset = fetchDataset(competitionSpec)
    answer = answerPrototype(competitionSpec, dataset)

    if competitionSpec.when or not competitionSpec.dataset_key:
        '''next val prediction'''
        return

    for i in answer.index:
        for col in answer.columns:
            if col == competitionSpec.dataset_key:
                continue
            ran = normalvariate(0, 5.0)
            answer.loc[i, col] = dataset.loc[i, col] + ran

    return answer
Example #10
0
    def test_answerPrototype9(self):
        # AnswerType.NINE
        dataset = cfdg.ohlcv()
        competition = CompetitionSpec(
            title="",
            type=CompetitionType.PREDICT,
            expiration=datetime.now() + timedelta(minutes=1),
            prize=1.0,
            dataset=dataset.iloc[:-1],
            metric=CompetitionMetric.ABSDIFF,
            targets=dataset.columns[-1],
            answer=dataset.iloc[-1:],
        )

        df = answerPrototype(competition)
        ans = pd.DataFrame([{dataset.columns[-1]: np.nan} for x in dataset.index])
        ans = pd.DataFrame([{dataset.columns[-1]: np.nan}])
        print(df)
        print(ans)
        assert df.equals(ans)
Example #11
0
    def test_answerPrototype8(self):
        # AnswerType.EIGHT
        dataset = cfdg.ohlcv()
        exp = datetime.utcfromtimestamp(
            dataset[-1:].index.values[0].astype(datetime) / 1000000000)
        competition = CompetitionSpec(title='',
                                      type=CompetitionType.PREDICT,
                                      expiration=datetime.now() +
                                      timedelta(minutes=1),
                                      prize=1.0,
                                      dataset=dataset.iloc[:-1],
                                      metric=CompetitionMetric.ABSDIFF,
                                      targets=dataset.columns[-1],
                                      answer=dataset.iloc[-1:],
                                      when=exp)

        df = answerPrototype(competition)
        ans = pd.DataFrame([{dataset.columns[-1]: np.nan}], index=[exp])
        print(df)
        print(ans)
        assert df.equals(ans)
Example #12
0
    def test_answerPrototype7(self):
        # AnswerType.SEVEN
        dataset = cfdg.ohlcv().reset_index()
        competition = CompetitionSpec(title='',
                                      type=CompetitionType.PREDICT,
                                      expiration=datetime.now() +
                                      timedelta(minutes=1),
                                      prize=1.0,
                                      dataset=dataset.iloc[:-1],
                                      metric=CompetitionMetric.ABSDIFF,
                                      targets=dataset.columns[-1],
                                      dataset_key='index',
                                      answer=dataset.iloc[-1:])

        df = answerPrototype(competition)
        ans = pd.DataFrame([{
            dataset.columns[-1]: np.nan
        } for x in dataset['index'].iloc[:-1]],
                           index=dataset['index'].iloc[:-1])
        print(df)
        print(ans)
        assert df.equals(ans)
Example #13
0
 def test_answerPrototype2(self):
     # AnswerType.TWO
     exp = datetime.now() + timedelta(minutes=2)
     competition = CompetitionSpec(title='',
                                   type=CompetitionType.PREDICT,
                                   expiration=exp,
                                   when=exp,
                                   prize=1.0,
                                   dataset='http://bonds.paine.nyc',
                                   dataset_type=DatasetFormat.JSON,
                                   metric=CompetitionMetric.ABSDIFF,
                                   dataset_key='Name',
                                   targets={'ABC Corp': ['Price']})
     df = answerPrototype(competition)[['Price', 'when']]
     ans = pd.DataFrame([{
         'when': exp,
         'Price': np.nan
     }],
                        index=['ABC Corp'])[['Price', 'when']]
     print(df)
     print(ans)
     assert df.equals(ans)
Example #14
0
 def test_answerPrototype3(self):
     # AnswerType.THREE
     competition = CompetitionSpec(title='',
                                   type=CompetitionType.PREDICT,
                                   expiration=datetime.now() +
                                   timedelta(minutes=1),
                                   prize=1.0,
                                   dataset='http://bonds.paine.nyc',
                                   dataset_type=DatasetFormat.JSON,
                                   metric=CompetitionMetric.ABSDIFF,
                                   dataset_key='Name',
                                   targets={'ABC Corp': ['Price']})
     dataset = fetchDataset(competition)
     df = answerPrototype(competition, dataset)
     index = dataset[dataset['Name'] == 'ABC Corp'].index
     ans = pd.DataFrame([{
         'Name': 'ABC Corp',
         'Price': np.nan
     } for _ in index],
                        index=index)
     print(df)
     print(ans)
     assert df.equals(ans)