Ejemplo n.º 1
0
def main():
    # params = Params(target_depth=7000)

    py_curve = np.array(get_py_curve(np.arange(-3000, 3001, 10)))
    logger.debug('py_curve:\n%s', py_curve)

    pass
def get_ml_veg_ind_results():
    remote_sensing_data = pd.read_sql_table(
        'ml_veg_ind_results',
        engine,
        index_col='id',
        parse_dates=True,
    )
    logger.debug("remote_sensing_data.shape: %s", remote_sensing_data.shape)
    return remote_sensing_data
Ejemplo n.º 3
0
def db_session():
    session: orm.Session = Session()

    try:
        yield session
    except Exception as exc:
        logger.debug("exc: %s", exc)
        session.rollback()
        raise
    else:
        session.commit()
        session.close()
Ejemplo n.º 4
0
def get_loaders(batch_size):
    raw_X = np.load(data_folder['X.npy'])
    raw_y = np.load(data_folder['y.npy'])
    scaler = MinMaxScaler()
    scaler.fit(raw_X.reshape((-1, 4)))
    X = scaler.transform(raw_X.reshape((-1, 4))).reshape((-1, 4, 4))
    y = scaler.transform(raw_y.reshape((-1, 4))).reshape((-1, 4))
    
    logger.debug("X.shape: %s", X.shape)
    logger.debug("y.shape: %s", y.shape)
    x_tensor = torch.from_numpy(X).float()
    y_tensor = torch.from_numpy(y).float()
    whole_dataset = TheDataset(x_tensor, y_tensor)
    
    train_data, test_data = train_test_split(whole_dataset)
    logger.debug("len(train_data): %s", len(train_data))
    logger.debug("len(test_data): %s", len(test_data))
    
    # Data loader
    train_loader = DataLoader(dataset=train_data,
                              batch_size=batch_size,
                              shuffle=True, )
    
    test_loader = DataLoader(dataset=test_data,
                             batch_size=batch_size,
                             shuffle=False)
    return train_loader, test_loader
def get_fields_data():
    fields_data = gpd.read_postgis('select * from geometry_yield_by_year;',
                                   engine,
                                   geom_col='geometry')
    logger.debug("fields_data:\n%s", fields_data.head(20).to_string())
    fields_data = fields_data[(fields_data.year >= 2016)
                              & (fields_data.year <= 2019)].copy()
    fields_data = fields_data[
        (fields_data.cult == 'Пшеница мягкая яровая (Triticum aestivum L)') |
        (fields_data.cult == 'Пшеница твердая яровая (Triticum durum Desf)'
         )].copy()
    fields_data: pd.DataFrame = fields_data[
        (fields_data.region == 'Костанайская область') |
        (fields_data.region == 'Северо-Казахстанская область') |
        (fields_data.region == 'Акмолинская область')].copy()
    # describe(fields_data)
    fields_data.dropna(subset=['yield'], inplace=True)
    logger.debug("fields_data:\n%s", fields_data)
    return fields_data
Ejemplo n.º 6
0
def get_grib_data():
    grib_folder = weather_data_folder['untarred']
    target_grib_filepaths = grib_folder.glob_search('*f000*')
    store = pd.HDFStore(weather_data_hdf_path)
    logger.debug("len(target_grib_filepaths): %s", len(target_grib_filepaths))
    dfs = list()
    for i, target_grib_filepath in enumerate(target_grib_filepaths):
        if target_grib_filepath.endswith('.idx'): continue
        if i % 100 == 0:
            logger.debug("i: %s", i)
        if len(dfs) == 10:
            return dfs
        # if i != 3: continue
        # try:
        df = read_grib_file(target_grib_filepath)
        key = 'data_' + get_md5(get_name(target_grib_filepath))
        # logger.debug("key: %s", key)
        store[key] = df

    store.close()
def describe(df, max_unique_vals=20):
    logger.debug("df.shape: %s", df.shape)
    for column in df.columns:
        unique_vals = df[column].unique()
        logger.debug("%s: %s", column, len(unique_vals))
        if len(unique_vals) < max_unique_vals:
            for unique_val in unique_vals:
                logger.debug("unique_val: %s", unique_val)
def get_target_fields():
    target_fields = gpd.read_postgis('select * from target_fields;',
                                     engine,
                                     geom_col='geometry')
    logger.debug("target_fields:\n%s", target_fields)
    target_fields = target_fields[target_fields.caption.map(
        lambda caption: caption is None or ('пар' not in caption))]
    # logger.debug("target_fields.head(20).to_string():\n%s", target_fields.head(20).to_string())
    target_fields.drop(columns=['class_number', 'flregion'], inplace=True)
    logger.debug("target_fields.head(20).to_string():\n%s",
                 target_fields.head(20).to_string())
    logger.debug("target_fields.shape: %s", target_fields.shape)
    # for column in target_fields.columns:
    #     unique_vals = target_fields[column].unique()
    #     logger.debug("%s: %s", column, len(unique_vals))
    #     if len(unique_vals) < 20:
    #         for unique_val in unique_vals:
    #             logger.debug("unique_val: %s", unique_val)
    return target_fields
Ejemplo n.º 9
0
def main():
    weather_data_store = pd.HDFStore(weather_data_hdf_path)
    combined_df = pd.DataFrame()
    structured_data = defaultdict(list)
    df_keys = [
        'sdwe',
        'fldcp',
        'r2',
        'sp',
        'SUNSD',
        'soilw_0_soilw_1',
        'gust',
        'hindex',
        'orog',
    ]
    for i, key in enumerate(weather_data_store.keys()[:200]):
        df: pd.DataFrame = weather_data_store[key]
        # logger.debug("df:\n%s", df)
        df.time = pd.DatetimeIndex(
            df.time.map(lambda str_time: parse(str_time)))
        df.set_index(['time'], inplace=True)
        # logger.debug("df:\n%s", df)
        # logger.debug("df.index: %s", df.index)

        # return
        # df_key = "_".join(df.columns.drop(['latitude', 'longitude']).tolist())
        # delim = '_QWE_'
        df_key = '_'.join(df.columns.tolist())
        # df_keys.add(df_key)
        structured_data[df_key].append(df)
        # if len(structured_data.get(df_key, [])) == 2:
        #     hm_dfs = structured_data[df_key]
        #     for hm_df in hm_dfs:
        #         logger.debug("hm_df:\n%s", hm_df)

        # df_merged = reduce(lambda left, right: pd.merge(left, right, on=['latitude', 'longitude', 'time'],
        #                                                 how='outer'),
        #                    structured_data[df_key])
        # df_merged = pd.concat(hm_dfs)
        # logger.debug("df_merged:\n%s", df_merged.head(20).to_string())
        # logger.debug("df_merged.shape: %s", df_merged.shape)
        # return
        # return
        # logger.debug("df:\n%s", df.head().to_string())
        # if combined_df.empty:
        #     combined_df = df
        # else:
        #     combined_df = pd.merge(combined_df, df, how='outer', on=['latitude', 'longitude'],
        #                            left_index=True, right_index=True)
        # # logger.debug("df:\n%s", df)
        # if len(gusts) % 100 == 0:
        #     logger.info("i: %s", i)
        # logger.debug("combined_df:\n%s", combined_df.head(200).to_string())
        # logger.debug("combined_df.shape: %s", combined_df.shape)
    # for df_key in df_keys:
    #     logger.debug("df_key: %s", df_key)
    # return
    themed_dfs = list()
    for df_key, dfs in structured_data.items():
        # df_merged = reduce(lambda left, right: pd.merge(left, right, on=['latitude', 'longitude'],
        #                                                 how='outer', left_index=True, right_index=True), dfs)
        themed_df = pd.concat(dfs)
        # logger.debug("themed_df:\n%s", themed_df.head(100).to_string())

        # return
        themed_dfs.append(themed_df)

    combined_df = pd.concat(themed_dfs)
    logger.debug("combined_df:\n%s", combined_df)
    for column in combined_df.columns:
        logger.debug("column: %s", column)
    logger.debug("combined_df.shape: %s", combined_df.shape)
    logger.debug("combined_df.head(200).to_string():\n%s",
                 combined_df.head(200).to_string())
    logger.debug("combined_df.info():\n%s", combined_df.info())
    sns.heatmap(combined_df.isnull(), cbar=False)
    plt.show()
    return

    ds = xr.open_dataset(grib_filename, engine='cfgrib')
    # logger.debug("ds:\n%s", ds)
    logger.debug("ds.variables:\n%s", sorted(ds.variables))
    data = Box()
    for var in sorted(ds.variables):
        logger.debug("var: %s", var)
        var_data = ds.variables[var]
        data[var] = var_data.values
        logger.debug("var_data.values.shape: %s", var_data.values.shape)
        # logger.debug("var_data:\n%s", var_data)
    return
    df = pd.DataFrame(data)
    logger.debug("df:\n%s", df)

    # with GribFile(grib_filename) as grib:
    #     logger.debug("len(grib): %s", len(grib))
    #     for msg in grib:
    #         logger.debug("msg: %s", msg)
    #         return

    # f = open('myfields.grib', 'rb')
    pass
Ejemplo n.º 10
0
def main():
    logger.debug("settings.POSTGIS:\n%s", settings.POSTGIS)
    pass
def main():
    X = list()
    y = list()

    for field_key in fields_veg_data_store.keys():
        # field_key = 'field_000115c74eeb8e1a44e5a4b1a606b62f'
        logger.debug("field_key: %s", field_key)
        field_df: pd.DataFrame = fields_veg_data_store[field_key].drop(
            columns=['id', 'divided_cadastre_user_id', 'results_dir', 'field'])

        field_df.sort_values('actual_date', inplace=True)
        field_df.actual_date = pd.DatetimeIndex(field_df.actual_date, )
        field_df.set_index('actual_date', inplace=True)
        field_df = field_df.drop(columns=field_df.columns.drop(
            [c for c in field_df.columns if 'mean' in c]))

        # field_id = field_df['divided_cadastre_user_id'].iloc[0]
        monthly_data: pd.DataFrame = field_df.resample('M', ).mean()
        # logger.debug("field_df:\n%s", field_df.to_string())
        # logger.debug("monthly_data:\n%s", monthly_data.to_string())
        # logger.debug("monthly_data.shape: %s", monthly_data.shape)
        # return

        for year in [2016, 2017, 2018, 2019]:
            # try:
            # logger.info("year: %s", year)
            annual_data: pd.DataFrame = monthly_data.loc[
                f'{date(year, 4, 1)}':f'{datetime.date(year, 11, 1)}'].copy()
            # logger.debug("annual_data:\n%s", annual_data)
            if annual_data.dropna().shape[0] < 6:
                # logger.debug("annual_data.dropna().shape[0]: %s", annual_data.dropna().shape[0])
                continue
            sequence = annual_data.dropna().values
            # logger.debug("sequence: %s", sequence)
            X_i, y_i = split_sequence(sequence, 4)
            X.append(X_i)
            y.append(y_i)

    logger.debug("len(y): %s", len(y))
    X = np.concatenate(X)
    y = np.concatenate(y)
    np.save(data_folder['X.npy'], X)
    np.save(data_folder['y.npy'], y)

    return
    df = veg_inds_store['remote_sensing_data'] = pd.read_sql_table(
        'remote_sensing_data_updated', engine)
    df['field'] = df.results_dir.map(lambda rd: rd.split('/')[-1])
    veg_inds_store['remote_sensing_data'] = df

    # df: pd.DataFrame = veg_inds_store['remote_sensing_data']
    logger.debug("df:\n%s", df.head(20).to_string())
    logger.debug("df.shape: %s", df.shape)
    for field, group in df.groupby('field'):
        logger.debug("field: %s", field)
        fields_veg_data_store[f'field_{field}'] = group

    veg_inds_store.close()
    fields_veg_data_store.close()

    # temporal_range = group.shape[0]
    # logger.debug("temporal_range: %s", temporal_range)
    # temporal_ranges.append(temporal_range)
    # plt.hist(temporal_ranges, bins=100, range=(0, 150))
    # plt.show()

    # df['field'] = df.results_dir.map(lambda rd: rd.split('/')[-1])
    # # df = df.apply(process_remote_sensing_fields, axis=1)
    # logger.debug("df:\n%s", df.head(20).to_string())
    # veg_inds_store['remote_sensing_data'] = df
    # for column in df.columns:
    #     logger.debug("column: %s", column)

    # df.drop(columns=['ndsi', 'temperature_dir', 'is_layer_created'], inplace=True)
    # for index_name in ['ndvi', 'ndmi', 'clgreen', 'gndvi']:
    #     logger.debug("df.shape: %s", df.shape)
    #     df["_".join([index_name, 'min'])] = None
    #     df["_".join([index_name, 'mean'])] = None
    #     df["_".join([index_name, 'max'])] = None
    # df = df.apply(process_remote_sensing_item, axis=1)
    # df.drop(columns=['ndvi', 'ndmi', 'clgreen', 'gndvi'], inplace=True)
    # veg_inds_store['remote_sensing_data_updated'] = df

    pass