Python bulk_insert_mappings 예제들, database.db_session.bulk_insert_mappings Python 예제들

예제 #1

0

파일 보기

파일: pihm_processing.py 프로젝트: mkozlowski-uncharted/ModelService

def ingest_to_db(InRaster, run_id, *,
                model_name, start, included_months, total_months,
                params, basin):

    # Add metadata object to DB
    meta = Metadata(run_id=run_id, 
                    model=model_name,
                    raw_output_link= f"https://model-service.worldmodelers.com/results/PIHM_results/{run_id}.tif",
                    run_label=f"{model_name} run for {basin} Basin.",
                    point_resolution_meters=200)
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    print("Storing parameters...")
    for pp, vv in params.items():

        if pp == 'basin':
            p_type = 'string'
        else:
            p_type = 'float'
            
        param = Parameters(run_id=run_id,
                          model=model_name,
                          parameter_name=pp,
                          parameter_value=vv,
                          parameter_type=p_type
                          )
        db_session.add(param)
        db_session.commit()        
        
    # iterate over the bands that should be included (1 per month)
    for month in range(1, included_months + 2):
        date_ = start + relativedelta(months=month-1)
        date_str = date_.strftime("%m/%d/%Y")        
        print(f"Processing {model_name} {date_str}")
        # Convert Raster to GeoPandas
        feature_name = m['outputs'][0]['name']
        feature_description = m['outputs'][0]['description']
        gdf = raster2gpd(InRaster,feature_name,band=month)

        print(f"Performing spatial merge")
        # Spatial merge on GADM to obtain admin areas
        gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')
        
        # Iterate over years for each band to ensure that there is continous
        # annual data
        # Set run fields: datetime, run_id, model
        gdf['datetime'] = date_
        gdf['run_id'] = run_id
        gdf['model'] = model_name
        gdf['feature_description'] = feature_description
        if 'geometry' in gdf:
            del(gdf['geometry'])
            del(gdf['index_right'])

        # perform bulk insert of entire geopandas DF
        print(f"Ingesting {date_str} of {model_name} for basin {basin} to database\n")
        db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records"))
        db_session.commit()

예제 #2

0

파일 보기

파일: FSC_processing.py 프로젝트: mkozlowski-uncharted/ModelService

def ingest2db(df_, fsc, params, run_id, model_name):

    # Add metadata object to DB
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        raw_output_link=
        f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.csv",
        run_label=f"{model_name} run for {params['shocked_region']} region.",
        point_resolution_meters=100000)
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    for pp, vv in params.items():
        param = Parameters(run_id=run_id,
                           model=model_name,
                           parameter_name=pp,
                           parameter_value=vv,
                           parameter_type="string")
        db_session.add(param)
        db_session.commit()

    # Ingest outputs to DB
    feature_name = fsc['outputs'][0]['name']
    feature_description = fsc['outputs'][0]['description']
    df_['datetime'] = datetime(year=2018, month=1, day=1)
    df_['run_id'] = run_id
    df_['model'] = model_name
    df_['feature_name'] = feature_name
    df_['feature_description'] = feature_description
    df_['feature_value'] = df_[feature_name].apply(lambda x: int(x))

    db_session.bulk_insert_mappings(Output, df_.to_dict(orient="records"))
    db_session.commit()

예제 #3

0

파일 보기

파일: Market_price_processing.py 프로젝트: mkozlowski-uncharted/ModelService

def ingest_to_db(InRaster, run_id, *, model_name, params, m):

    # Add metadata object to DB
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        raw_output_link=
        f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.tif",
        run_label=f"{model_name} run.",
        point_resolution_meters=1000000)
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    print("Storing parameters...")
    for pp, vv in params.items():
        if pp == 'year' or pp == 'month':
            p_type = 'integer'
        else:
            p_type = 'string'

        param = Parameters(run_id=run_id,
                           model=model_name,
                           parameter_name=pp,
                           parameter_value=vv,
                           parameter_type=p_type)
        db_session.add(param)
        db_session.commit()

    band = bands[params['commodity']]
    # Convert Raster to GeoPandas
    feature_name = m['outputs'][0]['name']
    feature_description = m['outputs'][0]['description']
    gdf = raster2gpd(InRaster, feature_name, band=band)

    print(f"GDF size is {gdf.shape[0]} before rounding lat/lon")
    gdf = gdf.drop_duplicates()
    print(f"GDF size is {gdf.shape[0]} after rounding lat/lon")

    print(f"Performing spatial merge")
    # Spatial merge on GADM to obtain admin areas
    gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

    # Iterate over years for each band to ensure that there is continous
    # annual data
    # Set run fields: datetime, run_id, model
    gdf['datetime'] = datetime(year=params['year'],
                               month=params['month'],
                               day=1)
    gdf['run_id'] = run_id
    gdf['model'] = model_name
    gdf['feature_description'] = feature_description
    if 'geometry' in gdf:
        del (gdf['geometry'])
        del (gdf['index_right'])

    # perform bulk insert of entire geopandas DF
    db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records"))
    db_session.commit()

예제 #4

0

파일 보기

파일: model.py 프로젝트: latusaki/twitter-location-stream

 def batch_create(cls, user_ids):
     """
     Batch create new user objects, from a list of user_ids
     This offers better performance compaired to individual create
     """
     db_session.bulk_insert_mappings(
         User, [dict(id=user_id) for user_id in user_ids])
     db_session.commit()

예제 #5

0

파일 보기

파일: initdb.py 프로젝트: linea-it/centaurus

def ingest_json(class_table):
    """ Ingest tests json in database
    
    Arguments:
        class_table {class} -- sqlalchemy model (DeclarativeMeta)
    """


    with open('{}/{}.json'.format(
       'tests/data', class_table.__tablename__
    )) as json_file:
        data = json.load(json_file)

    db_session.bulk_insert_mappings(
        class_table, [dict(i) for i in data]
    )
    db_session.commit()

예제 #6

0

파일 보기

def ingest_to_db(InRaster, run_id, *, model_name, m):

    # Add metadata object to DB
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        raw_output_link=
        f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.tif",
        run_label=f"{model_name} run.",
        point_resolution_meters=480)
    db_session.add(meta)
    db_session.commit()

    # iterate over the bands that should be included (1 per month)
    for year in range(2009, 2020):
        band = year - 2008
        print(f"Processing {model_name} for year {year}")
        # Convert Raster to GeoPandas
        feature_name = m['outputs'][0]['name']
        feature_description = m['outputs'][0]['description']
        gdf = raster2gpd(InRaster,
                         feature_name,
                         band=band,
                         nodataval=np.float64(0.0))

        print(f"Performing spatial merge")
        # Spatial merge on GADM to obtain admin areas
        gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

        # Iterate over years for each band to ensure that there is continous
        # annual data
        # Set run fields: datetime, run_id, model
        gdf['datetime'] = datetime(year=year, month=1, day=1)
        gdf['run_id'] = run_id
        gdf['model'] = model_name
        gdf['feature_description'] = feature_description
        if 'geometry' in gdf:
            del (gdf['geometry'])
            del (gdf['index_right'])

        # perform bulk insert of entire geopandas DF
        print(f"Ingesting {year} of {model_name} to database\n")
        db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records"))
        db_session.commit()

예제 #7

0

파일 보기

파일: chirps.py 프로젝트: mkozlowski-uncharted/ModelService

    def ingest2db(self):
        init_db()

        # Load Admin2 shape from GADM
        logging.info("Loading GADM shapes...")
        admin2 = gpd.read_file(f'{self.gadm}/gadm36_2.shp')
        admin2['country'] = admin2['NAME_0']
        admin2['state'] = admin2['NAME_1']
        admin2['admin1'] = admin2['NAME_1']
        admin2['admin2'] = admin2['NAME_2']
        admin2 = admin2[['geometry','country','state','admin1','admin2']]

        # Add metadata object to DB
        # TODO: add run_label and run_description
        meta = Metadata(run_id=self.run_id, 
                        model=self.name,
                        run_description=self.features[self._type]['run_description'],
                        raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}',
                        point_resolution_meters=5000)
        logging.info("Storing metadata...")
        db_session.add(meta)
        db_session.commit()

        # Add parameters to DB
        logging.info("Storing parameters...")
        for param_name, param_val in self.model_config.items():   
            if param_name != 'run_id':             
                if param_name == 'year':
                    param_type = 'integer'
                elif param_name == 'bbox':
                    param_type = 'array'
                    param_val = json.dumps(param_val)
                elif param_name == 'dekad':
                    param_type = 'integer'
                    param_val = int(param_val)
                else:
                    param_type = 'string'

                param = Parameters(run_id=self.run_id,
                                  model=self.name,
                                  parameter_name=param_name,
                                  parameter_value=param_val,
                                  parameter_type=param_type)
                db_session.add(param)
                db_session.commit()

        # Process tiff file into point data
        logging.info("Processing tiff...")
        InRaster = f"{self.result_path}/{self.result_name}.tiff"
        feature_name = self.features[self._type]['feature_name']
        feature_description = self.features[self._type]['feature_description']
        gdf = raster2gpd(InRaster,feature_name)
        
        # Spatial merge on GADM to obtain admin areas
        gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')
        
        # Set run fields: datetime, run_id, model
        # first convert dekad of year to day of year
        # note: dekad is a 10 day period so dekad 25 ends the 250th day of the year
        # since dekad 01 contains days 1 through 10 so dekad 01 should yield Jan 1 
        gdf['datetime'] = datetime(self.year, 1, 1) + timedelta((int(self.dekad) * 10) - 11)
        gdf['run_id'] = self.run_id
        gdf['model'] = self.name
        gdf['feature_description'] = feature_description
        del(gdf['geometry'])
        del(gdf['index_right'])

        # perform bulk insert of entire geopandas DF
        logging.info("Storing point data output...")
        db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records"))
        db_session.commit()

예제 #8

0

파일 보기

파일: atlas_processing.py 프로젝트: mkozlowski-uncharted/ModelService

        # iterate over the 4 bands
        for band, years in bands.items():
            print(f"Processing {model_name} band {band}")
            # Convert Raster to GeoPandas
            InRaster = f"data/{atlas_lookup[model_name]['tif']}"
            feature_name = atlas_lookup[model_name]['feature_name']
            feature_description = atlas_lookup[model_name][
                'feature_description']
            gdf = raster2gpd(InRaster, feature_name, band=band)

            print(f"Performing spatial merge")
            # Spatial merge on GADM to obtain admin areas
            gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

            # Iterate over years for each band to ensure that there is continous
            # annual data
            for year in years:
                # Set run fields: datetime, run_id, model
                gdf['datetime'] = datetime(year=year, month=1, day=1)
                gdf['run_id'] = run_id
                gdf['model'] = model_config['name']
                gdf['feature_description'] = feature_description
                if 'geometry' in gdf:
                    del (gdf['geometry'])
                    del (gdf['index_right'])

                # perform bulk insert of entire geopandas DF
                print(f"Ingesting {year} of {model_name} to database\n")
                db_session.bulk_insert_mappings(Output,
                                                gdf.to_dict(orient="records"))
                db_session.commit()

예제 #9

0

파일 보기

파일: dssat.py 프로젝트: mkozlowski-uncharted/ModelService

    def ingest2db(self):
        init_db()

        # Load Admin2 shape from GADM
        logging.info("Loading GADM shapes...")
        admin2 = gpd.read_file(f"{self.config['GADM']['GADM_PATH']}/gadm36_2.shp")
        admin2['country'] = admin2['NAME_0']
        admin2['state'] = admin2['NAME_1']
        admin2['admin1'] = admin2['NAME_1']
        admin2['admin2'] = admin2['NAME_2']
        admin2 = admin2[['geometry','country','state','admin1','admin2']]

        # Add metadata object to DB
        # TODO: add run_label and run_description
        logging.info("Storing metadata...")
        meta = Metadata(run_id=self.run_id, 
                        model=self.name,
                        run_description=self.descriptions['management_practice'][self.model_config['management_practice']],
                        raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}',
                        # 5 arc minutes (~10km)
                        point_resolution_meters=10000) 
        logging.info("Storing metadata...")
        db_session.add(meta)
        db_session.commit()

        # Add parameters to DB
        logging.info("Storing parameters...")
        for param_name, param_val in self.model_config.items():
            if param_name == 'run_id':
                pass
            else:
                param = Parameters(run_id=self.run_id,
                                  model=self.name,
                                  parameter_name=param_name,
                                  parameter_value=param_val,
                                  parameter_type=self.descriptions['parameters'][param_name])
                db_session.add(param)
                db_session.commit()

        # Process CSV and normalize it
        logging.info("Processing points...")

        # get result file path
        if self.model_config["management_practice"] == "combined":
            # combined CSV
            path = f"{self.result_path}/out/eth_docker/test/pp.csv"
        else:
            # individual management practices
            m = self.model_config["management_practice"]
            path = f"{self.result_path}/out/eth_docker/test/{m}/pp_{m}.csv"

        df = pd.read_csv(path, index_col=False)
        df['latitude'] = df['LATITUDE']
        df['longitude'] = df['LONGITUDE']
        df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1)
        df['year'] = df['HDAT'].apply(lambda x: int(str(x)[:4]))
        df['days'] = df['HDAT'].apply(lambda x: int(str(x)[4:]))
        df['datetime'] = df.apply(lambda x: datetime(x.year, 1, 1) + timedelta(x.days - 1), axis=1)
        df['run_id'] = self.run_id
        df['model'] = self.name
        df['Production'] = df['HWAH'] * df['HARVEST_AREA']

        # for combined runs only we need to convert the run name to an encoded 
        # float so that it can go into the database
        if 'RUN_NAME' in df:
            df['management_practice'] = df['RUN_NAME'].apply(lambda x: self.descriptions['encoding'][x])

        gdf = gpd.GeoDataFrame(df)

        # Spatial merge on GADM to obtain admin areas
        gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

        base_cols = ['run_id','model','latitude','longitude',
                     'datetime','admin1','admin2','state',
                     'country']

        feature_cols = ['feature_name','feature_description','feature_value']

        # Need to iterate over features to generate one GDF per feature
        # then upload the GDF per feature to ensure that rows are added for each
        # feature
        for feature_name, feature_description in self.descriptions['features'].items():
            # specific handling for "combined" file
            if feature_name == 'management_practice':
                if self.model_config["management_practice"] != "combined":
                    # if not a combined file, then just move onto the next 
                    # in the for loop and do nothing for this feature_name
                    continue
            cols_to_select = base_cols + [feature_name]
            gdf_ = gdf[cols_to_select] # generate new interim GDF
            gdf_['feature_name'] = feature_name
            gdf_['feature_description'] = feature_description
            gdf_['feature_value'] = gdf_[feature_name]
            gdf_ = gdf_[base_cols + feature_cols]

            # perform bulk insert of entire geopandas DF
            logging.info(f"Storing point data output for {feature_name}...")
            db_session.bulk_insert_mappings(Output, gdf_.to_dict(orient="records"))
            db_session.commit()

예제 #10

0

파일 보기

def ingest2db(year, df, filename):
    model_name = "flood_index_model"
    run_id = gen_run_id(year)
    init_db()

    # Load Admin2 shape from GADM
    print("Loading GADM shapes...")
    admin2 = gpd.read_file(f"{config['GADM']['GADM_PATH']}/gadm36_2.shp")
    admin2['country'] = admin2['NAME_0']
    admin2['state'] = admin2['NAME_1']
    admin2['admin1'] = admin2['NAME_1']
    admin2['admin2'] = admin2['NAME_2']
    admin2 = admin2[['geometry', 'country', 'state', 'admin1', 'admin2']]

    # Add metadata object to DB
    # TODO: add run_label and run_description
    print("Storing metadata...")
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        run_description=f"{model_name} run for {year}",
        raw_output_link=
        f'https://s3.amazonaws.com/world-modelers/flood_index_model/{filename}.nc',
        # 0.1 degrees (~10km)
        point_resolution_meters=10000)
    print("Storing metadata...")
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    print("Storing parameters...")
    param = Parameters(run_id=run_id,
                       model=model_name,
                       parameter_name="year",
                       parameter_value=year,
                       parameter_type="integer")
    db_session.add(param)
    db_session.commit()

    # Process CSV and normalize it
    print("Processing points...")

    df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1)
    df['run_id'] = run_id
    df['model'] = model_name

    gdf = gpd.GeoDataFrame(df)

    # Spatial merge on GADM to obtain admin areas
    gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

    base_cols = [
        'run_id', 'model', 'latitude', 'longitude', 'datetime', 'admin1',
        'admin2', 'state', 'country'
    ]

    feature_cols = ['feature_name', 'feature_description', 'feature_value']

    # Need to iterate over features to generate one GDF per feature
    # then upload the GDF per feature to ensure that rows are added for each
    # feature
    for feature_name, feature_description in features.items():
        cols_to_select = base_cols + [feature_name]
        gdf_ = gdf[cols_to_select]  # generate new interim GDF
        gdf_['feature_name'] = feature_name
        gdf_['feature_description'] = feature_description
        gdf_['feature_value'] = gdf_[feature_name]
        gdf_ = gdf_[base_cols + feature_cols]

        # perform bulk insert of entire geopandas DF
        print(f"Storing point data output for {feature_name}...")
        db_session.bulk_insert_mappings(Output, gdf_.to_dict(orient="records"))
        db_session.commit()

예제 #11

0

파일 보기

파일: twist.py 프로젝트: mkozlowski-uncharted/ModelService

    def ingest2db(self):
        init_db()

        # Add metadata object to DB
        desc = f"{self.name} run for {self.scenario_type} scenario with start year {self.start_year} and end year {self.end_year}"
        if self.scenario_type == 'production_failure_scenario':
            desc += f". Shock severity was set to {self.shock_severity} and the shocked region was {self.shocked_region}."

        logging.info("Storing metadata...")
        meta = Metadata(run_id=self.run_id,
                        model=self.name,
                        run_label=f"{self.name}: {self.scenario_type}",
                        run_description=desc,
                        raw_output_link=
                        f'https://s3.amazonaws.com/world-modelers/{self.key}')
        logging.info("Storing metadata...")
        db_session.add(meta)
        db_session.commit()

        # Add parameters to DB
        logging.info("Storing parameters...")
        for param_name, param_val in self.model_config.items():
            if param_name == 'run_id':
                pass
            else:
                param = Parameters(
                    run_id=self.run_id,
                    model=self.name,
                    parameter_name=param_name,
                    parameter_value=param_val,
                    parameter_type=self.descriptions['parameters'][param_name])
                db_session.add(param)
                db_session.commit()

        # Process CSV and normalize it
        logging.info("Processing timeseries...")
        df = pd.read_csv(self.output)
        df = df.transpose().reset_index()
        df = df.rename(
            columns=dict(zip(list(df.columns), list(df.iloc[0]))))[1:]
        df = df.rename(columns={'Unnamed: 0': 'Date'})
        df['datetime'] = df.Date.apply(lambda x: datetime(
            year=int(x.split('.')[1]), month=int(x.split('.')[0]), day=1))
        del (df['Date'])
        df['run_id'] = self.run_id
        df['model'] = self.name

        base_cols = ['run_id', 'model', 'datetime']
        feature_cols = ['feature_name', 'feature_description', 'feature_value']

        for feature_name, feature_description in self.descriptions[
                'features'].items():
            # some multi_twist outputs will not be present depending on the scenario type
            # so first check
            if feature_name in df:
                logging.info(
                    f"Storing point data output for {feature_name}...")
                cols_to_select = base_cols + [feature_name]
                df_ = df[cols_to_select]  # generate new interim DF
                df_['feature_name'] = feature_name
                df_['feature_description'] = feature_description.split('.')[0]
                df_['feature_value'] = df_[feature_name]
                df_ = df_[base_cols + feature_cols]

                # perform bulk insert of entire geopandas DF
                db_session.bulk_insert_mappings(Output,
                                                df_.to_dict(orient="records"))
                db_session.commit()

예제 #12

0

파일 보기

    def ingest2db(self):
        init_db()

        # Load Admin2 shape from GADM
        logging.info("Loading GADM shapes...")
        admin2 = gpd.read_file(f'{self.gadm}/gadm36_2.shp')
        admin2['country'] = admin2['NAME_0']
        admin2['state'] = admin2['NAME_1']
        admin2['admin1'] = admin2['NAME_1']
        admin2['admin2'] = admin2['NAME_2']
        admin2 = admin2[['geometry', 'country', 'state', 'admin1', 'admin2']]

        # Add metadata object to DB
        # TODO: add run_label and run_description
        meta = Metadata(run_id=self.run_id,
                        model=self.name,
                        run_description=self.run_description,
                        raw_output_link=
                        f'https://s3.amazonaws.com/world-modelers/{self.key}',
                        point_resolution_meters=1000)
        logging.info("Storing metadata...")
        db_session.add(meta)
        db_session.commit()

        # Add parameters to DB
        logging.info("Storing parameters...")
        for param_name, param_val in self.model_config['config'].items():
            if param_name == 'run_id':
                pass
            else:
                if param_name == 'year':
                    param_type = 'integer'
                elif param_name == 'month':
                    param_type = 'integer'
                else:
                    param_type = 'string'

                param = Parameters(run_id=self.run_id,
                                   model=self.name,
                                   parameter_name=param_name,
                                   parameter_value=param_val,
                                   parameter_type=param_type)
                db_session.add(param)
                db_session.commit()

        # Process tiff file into point data
        logging.info("Processing tiff...")
        InRaster = f"{self.install_path}/output/{self.key}"
        feature_name = self.feature_name
        feature_description = self.feature_description
        gdf = raster2gpd(InRaster, feature_name, band=self.band)

        # Spatial merge on GADM to obtain admin areas
        gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

        # Set run fields: datetime, run_id, model
        gdf['datetime'] = self.start_time
        gdf['run_id'] = self.run_id
        gdf['model'] = self.name
        gdf['feature_description'] = feature_description
        del (gdf['geometry'])
        del (gdf['index_right'])

        # perform bulk insert of entire geopandas DF
        logging.info("Storing point data output...")
        db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records"))
        db_session.commit()