def create(identifier: str, value: str, description: str) -> None: configuration: ConfigurationEntity = ConfigurationEntity() Utils.set_attributes(configuration, identifier=identifier, value=value, description=description) DAO.persist(configuration)
def update(portfolio: Tuple[Optional[str]]) -> None: for ticker in portfolio: stock: StockEntity = StockEntity() Utils.set_attributes(stock, ticker=ticker, isin=StockBO.isin(ticker)) DAO.persist(stock)
def init_sqlite3_db_connection(init_sqlite3_db): ### SET-UP ### dao_handler = DAO(path_to_db=init_sqlite3_db._path_to_db) yield dao_handler ### TEAR-DOWN ### dao_handler.destroy()
def create(summation: Decimal, funds: str, attempt: AttemptDTO) -> None: evaluation: EvaluationEntity = EvaluationEntity() evaluation.timestamp = Utils.now() evaluation.sum = str(summation) evaluation.funds = funds Utils.set_attributes(evaluation, amount_buy=str(attempt.amount_buy), distance_buy=str(attempt.distance_buy), delta_buy=str(attempt.delta_buy), amount_sell=str(attempt.amount_sell), distance_sell=str(attempt.distance_sell), delta_sell=str(attempt.delta_sell)) DAO.persist(evaluation)
def create_from_file(content: str) -> None: rows = json.loads(content) for row in rows: intraday: IntradayEntity = IntradayEntity() Utils.set_attributes(intraday, date=datetime.fromisoformat(row['date']), open=Decimal(row['open']), high=Decimal(row['high']), low=Decimal(row['low']), close=Decimal(row['close']), volume=Decimal(row['volume']), ticker=row['ticker']) DAO.persist(intraday)
def create_ticker(ticker: str) -> None: try: time_series = TimeSeries(key=os.environ.get('ALPHA_VANTAGE'), output_format='pandas') frame, meta_data = time_series.get_intraday(symbol=ticker.replace( '.', '-'), outputsize='full') frame = frame.reset_index() for index, row in frame.iterrows(): intraday = IntradayDAO.init(row, ticker, meta_data['6. Time Zone']) DAO.persist(intraday) except ValueError as e: logging.exception(e)
def _get_dao_handler(): # Read a temporary file called "tmp.txt" which contains the path to the test SQLite database with open('tmp.txt', 'r') as input_file: path_to_tmp_db_directory = input_file.readline().rstrip('\n') # Create a connection to the test SQLite database dao_handler = DAO(path_to_db=path_to_tmp_db_directory) return dao_handler
def submission_by_id(id): data_eval = get_data_eval(id) print(data_eval) abs_target = data_eval["abs"] cols_type = data_eval["cols_type"] feat_selection = data_eval["feat_selection"] new_features = data_eval["new_features"] norm = data_eval["norm"] inputation = data_eval["inputation"] model_name = data_eval["model_name"] if feat_selection == "select_by_corr_thresh": feat_selection = select_by_corr_thresh dao = DAO(new_features=new_features) train = process_data(dao=dao, dataset="train", cols_type=cols_type, norm=norm, inputation=inputation, new_features=new_features, feat_selection=feat_selection, max_na_count_columns=1.0) test = process_data(dao=dao, dataset="test", cols_type=cols_type, norm=norm, inputation=inputation, new_features=new_features, feat_selection=None, max_na_count_columns=1.0) use_cols = train.columns.tolist() use_cols.remove(TARGET) test = test[use_cols] dao = None gc.collect() model = pick_model(model_name) ev = Evaluator(model=model) print("READY!!!!") print("train", train.shape) print(train.head()) print() print("test", test.shape) print(test.head()) pred = ev.run(train, test, abs_target=abs_target) pred = pd.Series(pred) print("Predictions length:", len(pred)) print(pred.head()) test_ids = test.index.tolist() make_submission_file(id, pred, test_ids, data_eval)
def submission(model, norm, feat_selection, inputation, new_features, subm_name): dao = DAO(new_features=new_features) if norm: train = dao.get_normalized_data(dataset="train", inputation=inputation, max_na_count_columns=0.05) test = dao.get_normalized_data(dataset="test", inputation=inputation, max_na_count_columns=1) print(len(test)) else: train = dao.get_data(cols_type="numeric", dataset="train", max_na_count_columns=0.05) test = dao.get_data(cols_type="numeric", dataset="test", max_na_count_columns=0.05) test_ids = test.index.tolist() if feat_selection is None: feat_selection_name = "" else: feat_selection_name = feat_selection.__name__ columns = feat_selection(train) train_columns = columns + [TARGET] train = train[train_columns] test = test[columns] ev = Evaluator(model=model) pred = ev.run(train, test, abs_target=False) pred = pd.Series(pred).round(10) subm = pd.DataFrame() subm["ParcelId"] = test_ids subm["201610"] = pred subm["201611"] = pred subm["201612"] = pred subm["201710"] = pred subm["201711"] = pred subm["201712"] = pred subm_path = PathManager().get_submission_dir() + subm_name + ".csv" subm.to_csv(subm_path, index=False) subm_metadata = PathManager().get_submission_dir() + subm_name + ".json" with open(subm_metadata, 'w') as file: submission_dict = {} submission_dict["submission_name"] = subm_name submission_dict["norm"] = norm submission_dict["feat_selection"] = feat_selection_name submission_dict["model"] = model.get_model_name() submission_dict["inputation"] = inputation submission_dict["score"] = "" json.dump(submission_dict, file)
def update(identifier: str, value: Decimal) -> None: configuration = ConfigurationDAO.read_filter_by_identifier(identifier) configuration.value = value DAO.commit()
class H2ODeepLearning(H2OMlBase): def __init__(self, epochs=4): self.model = h2o.estimators.H2ODeepLearningEstimator( variable_importances=True, epochs=epochs) self.model_name = "H2ODeepLearning" H2OMlBase.__init__(self) class H2ODeepWater(H2OMlBase): def __init__(self): self.model = h2o.estimators.H2ODeepWaterEstimator() self.model_name = "H2ODeepWater" H2OMlBase.__init__(self) if __name__ == "__main__": model = H2OGradientBoosting() dao = DAO(train_file_name="train_complete_2016.csv") df_train = dao.get_normalized_data(max_na_count_columns=0.5) df_train = df_train.dropna() model.train(df_train, "logerror") pred = model.predict(df_train) print(pred) r2 = model.r2() print(r2)
import pandas as pd from sklearn import preprocessing from src.dao.dao import DAO dao = DAO() numeric_df = dao.get_data(cols_type="numeric", max_na_count_columns=0.05) #remove na rows numeric_df = numeric_df .dropna() print(numeric_df.head()[["regionidcity", "calculatedbathnbr", "fullbathcnt"]]) x = numeric_df.values min_max_scaler = preprocessing.MinMaxScaler() x_scaled = min_max_scaler.fit_transform(x) df = pd.DataFrame(x_scaled) df_norm = pd.DataFrame(df) df_norm.columns = numeric_df.columns print(df_norm.head()[["regionidcity", "calculatedbathnbr", "fullbathcnt"]])
good_cols.remove("logerror") picked_cols = [] for index, row in use_df_corr.loc[good_cols][good_cols].iterrows(): # print(index) use_row = row[row.index != index] high_correlateds = use_row[use_row > corr_threshold].index.tolist() for high_correlated in high_correlateds: if high_correlated in good_cols and not high_correlated in picked_cols: good_cols.remove(high_correlated) picked_cols.append(index) return good_cols if __name__ == "__main__": new_features_list = listdir(PathManager().get_new_features_dir()) new_features_list = [[new_features.replace(".csv", "")] for new_features in new_features_list] print("new_features_list:", new_features_list) dao = DAO(train_file_name="train_complete_2016.csv", new_features=["knn-longitude-latitude"]) df = dao.get_normalized_data(max_na_count_columns=0.05) df = df.dropna() print(select_by_corr_thresh(df)) print(df.columns.tolist()) #good_cols: ['longitude--latitude', 'bedroomcnt', 'structuretaxvaluedollarcnt', 'yearbuilt']
return self.results if __name__ == "__main__": cont = 0 for abs_target in [False, True]: for cols_type in ["all", "numeric"]: for feat_selection in [select_by_corr_thresh, None]: for new_features in [[], ["knn-longitude-latitude"], ["knn-longitude-latitude-signal"], [ "knn-longitude-latitude", "knn-longitude-latitude-signal" ]]: dao = DAO(train_file_name="train_complete_2016.csv", new_features=new_features) for norm in [True, False]: for inputation in ["column_mean", "fill_0"]: for model in [ H2OGradientBoosting(), H2ODeepLearning(), SKLearnLinearRegression(), SKLearnHuberRegressor() ]: print("\n\n\n") cont += 1 print("essay:", cont) df = process_data( dao=dao, dataset="train",