def test_answerPrototype6(self): # AnswerType.SIX dataset = cfdg.ohlcv().reset_index() exp = datetime.utcfromtimestamp( dataset[-1:]['index'].values[0].astype(datetime) / 1000000000) competition = CompetitionSpec(title='', type=CompetitionType.PREDICT, expiration=datetime.now() + timedelta(minutes=1), prize=1.0, dataset=dataset.iloc[:-1], metric=CompetitionMetric.ABSDIFF, targets=dataset.columns[-1], answer=dataset.iloc[-1:], dataset_key='index', when=exp) df = answerPrototype(competition)[['when', dataset.columns[-1]]] ans = pd.DataFrame([{ 'when': exp, dataset.columns[-1]: np.nan } for x in dataset['index'].iloc[:-1]], index=[x for x in dataset['index'].iloc[:-1] ])[['when', dataset.columns[-1]]] print(df) print(ans) assert df.equals(ans)
def test_answerPrototype62(self): # AnswerType.SIX exp = datetime.now() + timedelta(minutes=2) competition = CompetitionSpec( title='', type=CompetitionType.PREDICT, when=exp, prize=1.0, dataset='https://feeds.citibikenyc.com/stations/stations.json', dataset_type=DatasetFormat.JSON, dataset_key='id', dataset_kwargs={'record_column': 'stationBeanList'}, metric=CompetitionMetric.ABSDIFF, targets=['availableBikes', 'availableDocks'], expiration=exp) dataset = fetchDataset(competition) df = answerPrototype(competition)[[ 'when', 'availableBikes', 'availableDocks' ]] ans = pd.DataFrame([{ 'when': exp, 'availableBikes': np.nan, 'availableDocks': np.nan } for x in dataset.id], index=[id for id in dataset.id])[[ 'when', 'availableBikes', 'availableDocks' ]] print(df) print(ans) assert df.equals(ans)
def answerPredict1(competitionSpec, *args, **kwargs): from crowdsource.types.utils import answerPrototype, fetchDataset from sklearn import linear_model data = fetchDataset(competitionSpec) ans = answerPrototype(competitionSpec, data) when = competitionSpec.when if not when or competitionSpec.dataset_key: ''' TS prediction''' return for col in ans.columns: if col == competitionSpec.dataset_key: continue reg = linear_model.LinearRegression() x = data[col].index.astype(int).values.reshape(len(data[col].index), 1) y = data[col].values.reshape(len(data[col]), 1) reg.fit(x, y) ans.loc[when, col] = reg.predict([[ when.timestamp() if hasattr(when, 'timestamp') else float( (time.mktime(when.timetuple()) + when.microsecond / 1000000.0)) ]]) return ans
def answerPredictCitibike(competitionSpec, *args, **kwargs): from random import randint from crowdsource.types.utils import answerPrototype, fetchDataset dataset = fetchDataset(competitionSpec) answer = answerPrototype(competitionSpec, dataset) when = competitionSpec.when if not when or not competitionSpec.dataset_key: """TS prediction""" return for i in answer.index: for col in answer.columns: if col == "when": continue ran = randint(0, 10) answer.loc[i, col] = ( dataset[dataset[competitionSpec.dataset_key] == i][col].iloc[0] + ran) return answer
def test_answerPrototype62(self): # AnswerType.SIX exp = datetime.now() + timedelta(minutes=2) competition = CompetitionSpec( title="", type=CompetitionType.PREDICT, when=exp, prize=1.0, dataset="https://feeds.citibikenyc.com/stations/stations.json", dataset_type=DatasetFormat.JSON, dataset_key="id", dataset_kwargs={"record_column": "stationBeanList"}, metric=CompetitionMetric.ABSDIFF, targets=["availableBikes", "availableDocks"], expiration=exp, ) dataset = fetchDataset(competition) df = answerPrototype(competition)[["when", "availableBikes", "availableDocks"]] ans = pd.DataFrame( [ {"when": exp, "availableBikes": np.nan, "availableDocks": np.nan} for x in dataset.id ], index=[id for id in dataset.id], )[["when", "availableBikes", "availableDocks"]] print(df) print(ans) assert df.equals(ans)
def foo5(competitionSpec, *args, **kwargs): if isinstance(competitionSpec.dataset, six.string_types): dataset = fetchDataset(competitionSpec) else: return answer = answerPrototype(competitionSpec, dataset) return answer.fillna(0)
def test_answerPrototype5(self): # AnswerType.FIVE exp = datetime.now() + timedelta(minutes=2) competition = CompetitionSpec(title='', type=CompetitionType.PREDICT, expiration=exp, prize=1.0, dataset='http://bonds.paine.nyc', dataset_type=DatasetFormat.JSON, metric=CompetitionMetric.ABSDIFF, targets={0: ['Price']}) df = answerPrototype(competition) ans = pd.DataFrame([{'Price': np.nan}]) print(df) print(ans) assert df.equals(ans)
def test_answerPrototype1(self): # AnswerType.ONE dataset = make_classification() competition = CompetitionSpec(title='', type=CompetitionType.CLASSIFY, expiration=datetime.now() + timedelta(minutes=1), prize=1.0, num_classes=2, dataset=pd.DataFrame(dataset[0]), metric=CompetitionMetric.LOGLOSS, answer=pd.DataFrame(dataset[1])) dataset = fetchDataset(competition) df = answerPrototype(competition) ans = pd.DataFrame([{'class': np.nan} for x in dataset.index]) print(df) print(ans) assert df.equals(ans)
def answerPredictCorporateBonds(competitionSpec, *args, **kwargs): from random import normalvariate from crowdsource.types.utils import answerPrototype, fetchDataset dataset = fetchDataset(competitionSpec) answer = answerPrototype(competitionSpec, dataset) if competitionSpec.when or not competitionSpec.dataset_key: '''next val prediction''' return for i in answer.index: for col in answer.columns: if col == competitionSpec.dataset_key: continue ran = normalvariate(0, 5.0) answer.loc[i, col] = dataset.loc[i, col] + ran return answer
def test_answerPrototype9(self): # AnswerType.NINE dataset = cfdg.ohlcv() competition = CompetitionSpec( title="", type=CompetitionType.PREDICT, expiration=datetime.now() + timedelta(minutes=1), prize=1.0, dataset=dataset.iloc[:-1], metric=CompetitionMetric.ABSDIFF, targets=dataset.columns[-1], answer=dataset.iloc[-1:], ) df = answerPrototype(competition) ans = pd.DataFrame([{dataset.columns[-1]: np.nan} for x in dataset.index]) ans = pd.DataFrame([{dataset.columns[-1]: np.nan}]) print(df) print(ans) assert df.equals(ans)
def test_answerPrototype8(self): # AnswerType.EIGHT dataset = cfdg.ohlcv() exp = datetime.utcfromtimestamp( dataset[-1:].index.values[0].astype(datetime) / 1000000000) competition = CompetitionSpec(title='', type=CompetitionType.PREDICT, expiration=datetime.now() + timedelta(minutes=1), prize=1.0, dataset=dataset.iloc[:-1], metric=CompetitionMetric.ABSDIFF, targets=dataset.columns[-1], answer=dataset.iloc[-1:], when=exp) df = answerPrototype(competition) ans = pd.DataFrame([{dataset.columns[-1]: np.nan}], index=[exp]) print(df) print(ans) assert df.equals(ans)
def test_answerPrototype7(self): # AnswerType.SEVEN dataset = cfdg.ohlcv().reset_index() competition = CompetitionSpec(title='', type=CompetitionType.PREDICT, expiration=datetime.now() + timedelta(minutes=1), prize=1.0, dataset=dataset.iloc[:-1], metric=CompetitionMetric.ABSDIFF, targets=dataset.columns[-1], dataset_key='index', answer=dataset.iloc[-1:]) df = answerPrototype(competition) ans = pd.DataFrame([{ dataset.columns[-1]: np.nan } for x in dataset['index'].iloc[:-1]], index=dataset['index'].iloc[:-1]) print(df) print(ans) assert df.equals(ans)
def test_answerPrototype2(self): # AnswerType.TWO exp = datetime.now() + timedelta(minutes=2) competition = CompetitionSpec(title='', type=CompetitionType.PREDICT, expiration=exp, when=exp, prize=1.0, dataset='http://bonds.paine.nyc', dataset_type=DatasetFormat.JSON, metric=CompetitionMetric.ABSDIFF, dataset_key='Name', targets={'ABC Corp': ['Price']}) df = answerPrototype(competition)[['Price', 'when']] ans = pd.DataFrame([{ 'when': exp, 'Price': np.nan }], index=['ABC Corp'])[['Price', 'when']] print(df) print(ans) assert df.equals(ans)
def test_answerPrototype3(self): # AnswerType.THREE competition = CompetitionSpec(title='', type=CompetitionType.PREDICT, expiration=datetime.now() + timedelta(minutes=1), prize=1.0, dataset='http://bonds.paine.nyc', dataset_type=DatasetFormat.JSON, metric=CompetitionMetric.ABSDIFF, dataset_key='Name', targets={'ABC Corp': ['Price']}) dataset = fetchDataset(competition) df = answerPrototype(competition, dataset) index = dataset[dataset['Name'] == 'ABC Corp'].index ans = pd.DataFrame([{ 'Name': 'ABC Corp', 'Price': np.nan } for _ in index], index=index) print(df) print(ans) assert df.equals(ans)