예제 #1
0
 def create(identifier: str, value: str, description: str) -> None:
     configuration: ConfigurationEntity = ConfigurationEntity()
     Utils.set_attributes(configuration,
                          identifier=identifier,
                          value=value,
                          description=description)
     DAO.persist(configuration)
예제 #2
0
 def update(portfolio: Tuple[Optional[str]]) -> None:
     for ticker in portfolio:
         stock: StockEntity = StockEntity()
         Utils.set_attributes(stock,
                              ticker=ticker,
                              isin=StockBO.isin(ticker))
         DAO.persist(stock)
예제 #3
0
def init_sqlite3_db_connection(init_sqlite3_db):
    ### SET-UP ###

    dao_handler = DAO(path_to_db=init_sqlite3_db._path_to_db)

    yield dao_handler

    ### TEAR-DOWN ###
    dao_handler.destroy()
예제 #4
0
 def create(summation: Decimal, funds: str, attempt: AttemptDTO) -> None:
     evaluation: EvaluationEntity = EvaluationEntity()
     evaluation.timestamp = Utils.now()
     evaluation.sum = str(summation)
     evaluation.funds = funds
     Utils.set_attributes(evaluation, amount_buy=str(attempt.amount_buy), distance_buy=str(attempt.distance_buy),
                          delta_buy=str(attempt.delta_buy), amount_sell=str(attempt.amount_sell),
                          distance_sell=str(attempt.distance_sell), delta_sell=str(attempt.delta_sell))
     DAO.persist(evaluation)
예제 #5
0
 def create_from_file(content: str) -> None:
     rows = json.loads(content)
     for row in rows:
         intraday: IntradayEntity = IntradayEntity()
         Utils.set_attributes(intraday,
                              date=datetime.fromisoformat(row['date']),
                              open=Decimal(row['open']),
                              high=Decimal(row['high']),
                              low=Decimal(row['low']),
                              close=Decimal(row['close']),
                              volume=Decimal(row['volume']),
                              ticker=row['ticker'])
         DAO.persist(intraday)
예제 #6
0
 def create_ticker(ticker: str) -> None:
     try:
         time_series = TimeSeries(key=os.environ.get('ALPHA_VANTAGE'),
                                  output_format='pandas')
         frame, meta_data = time_series.get_intraday(symbol=ticker.replace(
             '.', '-'),
                                                     outputsize='full')
         frame = frame.reset_index()
         for index, row in frame.iterrows():
             intraday = IntradayDAO.init(row, ticker,
                                         meta_data['6. Time Zone'])
             DAO.persist(intraday)
     except ValueError as e:
         logging.exception(e)
예제 #7
0
def _get_dao_handler():
    # Read a temporary file called "tmp.txt" which contains the path to the test SQLite database
    with open('tmp.txt', 'r') as input_file:
        path_to_tmp_db_directory = input_file.readline().rstrip('\n')

    # Create a connection to the test SQLite database
    dao_handler = DAO(path_to_db=path_to_tmp_db_directory)

    return dao_handler
예제 #8
0
def submission_by_id(id):
    data_eval = get_data_eval(id)
    print(data_eval)

    abs_target = data_eval["abs"]
    cols_type = data_eval["cols_type"]
    feat_selection = data_eval["feat_selection"]
    new_features = data_eval["new_features"]
    norm = data_eval["norm"]
    inputation = data_eval["inputation"]
    model_name = data_eval["model_name"]


    if feat_selection == "select_by_corr_thresh":
        feat_selection = select_by_corr_thresh


    dao = DAO(new_features=new_features)

    train = process_data(dao=dao, dataset="train", cols_type=cols_type, norm=norm, inputation=inputation,
                        new_features=new_features, feat_selection=feat_selection, max_na_count_columns=1.0)

    test = process_data(dao=dao, dataset="test", cols_type=cols_type, norm=norm, inputation=inputation,
                         new_features=new_features, feat_selection=None, max_na_count_columns=1.0)

    use_cols = train.columns.tolist()
    use_cols.remove(TARGET)
    test = test[use_cols]

    dao = None
    gc.collect()

    model = pick_model(model_name)
    ev = Evaluator(model=model)
    print("READY!!!!")
    print("train", train.shape)
    print(train.head())
    print()
    print("test", test.shape)
    print(test.head())

    pred = ev.run(train, test, abs_target=abs_target)
    pred = pd.Series(pred)

    print("Predictions length:", len(pred))
    print(pred.head())

    test_ids = test.index.tolist()

    make_submission_file(id, pred, test_ids, data_eval)
예제 #9
0
def submission(model, norm, feat_selection, inputation, new_features, subm_name):
    dao = DAO(new_features=new_features)

    if norm:
        train = dao.get_normalized_data(dataset="train", inputation=inputation, max_na_count_columns=0.05)
        test = dao.get_normalized_data(dataset="test", inputation=inputation, max_na_count_columns=1)
        print(len(test))
    else:
        train = dao.get_data(cols_type="numeric", dataset="train", max_na_count_columns=0.05)
        test = dao.get_data(cols_type="numeric", dataset="test", max_na_count_columns=0.05)

    test_ids = test.index.tolist()


    if feat_selection is None:
        feat_selection_name = ""
    else:
        feat_selection_name = feat_selection.__name__
        columns = feat_selection(train)
        train_columns = columns + [TARGET]
        train = train[train_columns]
        test = test[columns]


    ev = Evaluator(model=model)
    pred = ev.run(train, test, abs_target=False)

    pred = pd.Series(pred).round(10)
    subm = pd.DataFrame()
    subm["ParcelId"] = test_ids
    subm["201610"] = pred
    subm["201611"] = pred
    subm["201612"] = pred
    subm["201710"] = pred
    subm["201711"] = pred
    subm["201712"] = pred

    subm_path = PathManager().get_submission_dir() + subm_name + ".csv"
    subm.to_csv(subm_path, index=False)

    subm_metadata = PathManager().get_submission_dir() + subm_name + ".json"
    with open(subm_metadata, 'w') as file:
        submission_dict = {}
        submission_dict["submission_name"] = subm_name
        submission_dict["norm"] = norm
        submission_dict["feat_selection"] = feat_selection_name
        submission_dict["model"] = model.get_model_name()
        submission_dict["inputation"] = inputation
        submission_dict["score"] = ""

        json.dump(submission_dict, file)
예제 #10
0
 def update(identifier: str, value: Decimal) -> None:
     configuration = ConfigurationDAO.read_filter_by_identifier(identifier)
     configuration.value = value
     DAO.commit()
예제 #11
0

class H2ODeepLearning(H2OMlBase):
    def __init__(self, epochs=4):
        self.model = h2o.estimators.H2ODeepLearningEstimator(
            variable_importances=True, epochs=epochs)
        self.model_name = "H2ODeepLearning"
        H2OMlBase.__init__(self)


class H2ODeepWater(H2OMlBase):
    def __init__(self):
        self.model = h2o.estimators.H2ODeepWaterEstimator()
        self.model_name = "H2ODeepWater"
        H2OMlBase.__init__(self)


if __name__ == "__main__":
    model = H2OGradientBoosting()

    dao = DAO(train_file_name="train_complete_2016.csv")
    df_train = dao.get_normalized_data(max_na_count_columns=0.5)
    df_train = df_train.dropna()
    model.train(df_train, "logerror")

    pred = model.predict(df_train)
    print(pred)

    r2 = model.r2()
    print(r2)
예제 #12
0
import pandas as pd
from sklearn import preprocessing

from src.dao.dao import DAO

dao = DAO()
numeric_df = dao.get_data(cols_type="numeric", max_na_count_columns=0.05)

#remove na rows
numeric_df = numeric_df .dropna()

print(numeric_df.head()[["regionidcity", "calculatedbathnbr", "fullbathcnt"]])

x = numeric_df.values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
df = pd.DataFrame(x_scaled)

df_norm = pd.DataFrame(df)
df_norm.columns = numeric_df.columns

print(df_norm.head()[["regionidcity", "calculatedbathnbr", "fullbathcnt"]])

예제 #13
0
    good_cols.remove("logerror")
    picked_cols = []

    for index, row in use_df_corr.loc[good_cols][good_cols].iterrows():
        # print(index)
        use_row = row[row.index != index]
        high_correlateds = use_row[use_row > corr_threshold].index.tolist()
        for high_correlated in high_correlateds:
            if high_correlated in good_cols and not high_correlated in picked_cols:
                good_cols.remove(high_correlated)

        picked_cols.append(index)

    return good_cols


if __name__ == "__main__":
    new_features_list = listdir(PathManager().get_new_features_dir())
    new_features_list = [[new_features.replace(".csv", "")]
                         for new_features in new_features_list]
    print("new_features_list:", new_features_list)

    dao = DAO(train_file_name="train_complete_2016.csv",
              new_features=["knn-longitude-latitude"])
    df = dao.get_normalized_data(max_na_count_columns=0.05)
    df = df.dropna()

    print(select_by_corr_thresh(df))
    print(df.columns.tolist())

#good_cols: ['longitude--latitude', 'bedroomcnt', 'structuretaxvaluedollarcnt', 'yearbuilt']
예제 #14
0
        return self.results


if __name__ == "__main__":

    cont = 0
    for abs_target in [False, True]:
        for cols_type in ["all", "numeric"]:
            for feat_selection in [select_by_corr_thresh, None]:
                for new_features in [[], ["knn-longitude-latitude"],
                                     ["knn-longitude-latitude-signal"],
                                     [
                                         "knn-longitude-latitude",
                                         "knn-longitude-latitude-signal"
                                     ]]:
                    dao = DAO(train_file_name="train_complete_2016.csv",
                              new_features=new_features)
                    for norm in [True, False]:
                        for inputation in ["column_mean", "fill_0"]:
                            for model in [
                                    H2OGradientBoosting(),
                                    H2ODeepLearning(),
                                    SKLearnLinearRegression(),
                                    SKLearnHuberRegressor()
                            ]:
                                print("\n\n\n")
                                cont += 1
                                print("essay:", cont)

                                df = process_data(
                                    dao=dao,
                                    dataset="train",