def create_tables(): from models import Image, Imageuri, ImageFragment, Metadata, Annotation Image.create_table() Imageuri.create_table() ImageFragment.create_table() Metadata.create_table() Annotation.create_table()
def update_tags(self): self.TagTree.DeleteAllItems() root = self.TagTree.RootItem root = self.TagTree.AppendItem(root, "root") for item in Metadata.select(Metadata.field).distinct().order_by(Metadata.field): field = self.TagTree.AppendItem(root, item.field) results = Metadata.select(Metadata.value).distinct().order_by(Metadata.value) results = results.where(Metadata.field == item.field) for record in results: self.TagTree.AppendItem(field, record.value) self.TagTree.Expand(field)
def handle_T_key(self): # Get the File objects corresponding to the selected thumbnails files = File.select().where( File.md5 << [item.Text for item in self.get_selected_thumbs()]) # Determine the existing tags for these files. old_tags = Metadata.filter(Metadata.file << files, Metadata.field.not_in(['import-time'])) old_tags = sorted( list(set(['%s:"%s"' % (t.field, t.value) for t in old_tags]))) dialog = wx.TextEntryDialog(None, "Tags:", "Modifiy Tags", value=", ".join(old_tags)) if dialog.ShowModal() == wx.ID_OK: # Determine the new tags for these files. new_tags = dialog.GetValue() new_tags = [t.strip() for t in new_tags.split(",")] # Add any new tags that have been added. for token in set(new_tags) - set(old_tags): # Determine the actual field and tags if ':' in token: field, value = token.split(':', 1) else: field, value = 'tag', token value = value.strip('"') # Create records for all selected files. for file in files: try: Metadata(file=file, field=field, value=value).save() except IntegrityError: pass # Remove any tags that were removed. removed_tags = list(set(old_tags) - set(new_tags)) # Add any new tags that have been added. for token in set(old_tags) - set(new_tags): # Determine the actual field and tags if ':' in token: field, value = token.split(':', 1) else: field, value = 'tag', token # Create records for all selected files. value = value.strip('"') Metadata.delete().where(Metadata.file << files, Metadata.field == field, Metadata.value == value).execute() # Repaint the tag list. self.update_tags()
def update_tags(self): self.TagTree.DeleteAllItems() root = self.TagTree.RootItem root = self.TagTree.AppendItem(root, "root") for item in Metadata.select(Metadata.field).distinct().order_by( Metadata.field): field = self.TagTree.AppendItem(root, item.field) results = Metadata.select(Metadata.value).distinct().order_by( Metadata.value) results = results.where(Metadata.field == item.field) for record in results: self.TagTree.AppendItem(field, record.value) self.TagTree.Expand(field)
def ingest_to_db(InRaster, run_id, *, model_name, start, included_months, total_months, params, basin): # Add metadata object to DB meta = Metadata(run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/PIHM_results/{run_id}.tif", run_label=f"{model_name} run for {basin} Basin.", point_resolution_meters=200) db_session.add(meta) db_session.commit() # Add parameters to DB print("Storing parameters...") for pp, vv in params.items(): if pp == 'basin': p_type = 'string' else: p_type = 'float' param = Parameters(run_id=run_id, model=model_name, parameter_name=pp, parameter_value=vv, parameter_type=p_type ) db_session.add(param) db_session.commit() # iterate over the bands that should be included (1 per month) for month in range(1, included_months + 2): date_ = start + relativedelta(months=month-1) date_str = date_.strftime("%m/%d/%Y") print(f"Processing {model_name} {date_str}") # Convert Raster to GeoPandas feature_name = m['outputs'][0]['name'] feature_description = m['outputs'][0]['description'] gdf = raster2gpd(InRaster,feature_name,band=month) print(f"Performing spatial merge") # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Iterate over years for each band to ensure that there is continous # annual data # Set run fields: datetime, run_id, model gdf['datetime'] = date_ gdf['run_id'] = run_id gdf['model'] = model_name gdf['feature_description'] = feature_description if 'geometry' in gdf: del(gdf['geometry']) del(gdf['index_right']) # perform bulk insert of entire geopandas DF print(f"Ingesting {date_str} of {model_name} for basin {basin} to database\n") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def ingest2db(df_, fsc, params, run_id, model_name): # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.csv", run_label=f"{model_name} run for {params['shocked_region']} region.", point_resolution_meters=100000) db_session.add(meta) db_session.commit() # Add parameters to DB for pp, vv in params.items(): param = Parameters(run_id=run_id, model=model_name, parameter_name=pp, parameter_value=vv, parameter_type="string") db_session.add(param) db_session.commit() # Ingest outputs to DB feature_name = fsc['outputs'][0]['name'] feature_description = fsc['outputs'][0]['description'] df_['datetime'] = datetime(year=2018, month=1, day=1) df_['run_id'] = run_id df_['model'] = model_name df_['feature_name'] = feature_name df_['feature_description'] = feature_description df_['feature_value'] = df_[feature_name].apply(lambda x: int(x)) db_session.bulk_insert_mappings(Output, df_.to_dict(orient="records")) db_session.commit()
def ingest_to_db(InRaster, run_id, *, model_name, params, m): # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.tif", run_label=f"{model_name} run.", point_resolution_meters=1000000) db_session.add(meta) db_session.commit() # Add parameters to DB print("Storing parameters...") for pp, vv in params.items(): if pp == 'year' or pp == 'month': p_type = 'integer' else: p_type = 'string' param = Parameters(run_id=run_id, model=model_name, parameter_name=pp, parameter_value=vv, parameter_type=p_type) db_session.add(param) db_session.commit() band = bands[params['commodity']] # Convert Raster to GeoPandas feature_name = m['outputs'][0]['name'] feature_description = m['outputs'][0]['description'] gdf = raster2gpd(InRaster, feature_name, band=band) print(f"GDF size is {gdf.shape[0]} before rounding lat/lon") gdf = gdf.drop_duplicates() print(f"GDF size is {gdf.shape[0]} after rounding lat/lon") print(f"Performing spatial merge") # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Iterate over years for each band to ensure that there is continous # annual data # Set run fields: datetime, run_id, model gdf['datetime'] = datetime(year=params['year'], month=params['month'], day=1) gdf['run_id'] = run_id gdf['model'] = model_name gdf['feature_description'] = feature_description if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) # perform bulk insert of entire geopandas DF db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def handle_T_key(self): # Get the File objects corresponding to the selected thumbnails files = File.select().where(File.md5 << [item.Text for item in self.get_selected_thumbs()]) # Determine the existing tags for these files. old_tags = Metadata.filter(Metadata.file << files, Metadata.field.not_in(['import-time'])) old_tags = sorted(list(set(['%s:"%s"' % (t.field, t.value) for t in old_tags]))) dialog = wx.TextEntryDialog(None, "Tags:", "Modifiy Tags", value=", ".join(old_tags)) if dialog.ShowModal() == wx.ID_OK: # Determine the new tags for these files. new_tags = dialog.GetValue() new_tags = [t.strip() for t in new_tags.split(",")] # Add any new tags that have been added. for token in set(new_tags) - set(old_tags): # Determine the actual field and tags if ':' in token: field, value = token.split(':', 1) else: field, value = 'tag', token value = value.strip('"') # Create records for all selected files. for file in files: try: Metadata(file=file, field=field, value=value).save() except IntegrityError: pass # Remove any tags that were removed. removed_tags = list(set(old_tags) - set(new_tags)) # Add any new tags that have been added. for token in set(old_tags) - set(new_tags): # Determine the actual field and tags if ':' in token: field, value = token.split(':', 1) else: field, value = 'tag', token # Create records for all selected files. value = value.strip('"') Metadata.delete().where(Metadata.file << files, Metadata.field == field, Metadata.value == value).execute() # Repaint the tag list. self.update_tags()
def get_metadata(page): # Find the metadata table at top of page table = page.find("table", class_="infobox") if table == None: raise Exception("Metadata not found on wiki.") metadata = Metadata(table) return metadata
def process_dssat(df, params, dssat, model_name, file): """ Primary function for processing DSSAT """ run_id, model_config, run_obj = gen_run(model_name, params, file) # generate temp CSV and push it to S3 df.to_csv("tmp.csv", index=False) time.sleep(1) try: s3_bucket.upload_file("tmp.csv", run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except Exception as e: print(e) print("Retrying file upload...") try: s3_bucket.upload_file("tmp.csv", run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except: pass # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{file}", run_label=df.RUN_NAME.iloc[0], point_resolution_meters=10000) db_session.add(meta) db_session.commit() # Add parameters to DB for p_name, p_value in params.items(): if p_name == 'rainfall': p_value = float(p_value) param = Parameters(run_id=run_id, model=model_name, parameter_name=p_name, parameter_value=p_value, parameter_type=param_types[p_name]) db_session.add(param) db_session.commit() gdf = gpd.GeoDataFrame(df) gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') gdf['run_id'] = run_id gdf['model'] = model_name if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) return gdf, run_id
def save_graph(): # Angular sends json data by default title = request.json.get("title") settings = request.json.get("settings") username = request.cookies.get("username") new_graph = Graph(settings) db.session.add(new_graph) db.session.commit() new_meta = Metadata(title, new_graph.id, username) new_meta.generate_hash() db.session.add(new_meta) db.session.commit() return jsonify({ "url": new_graph.meta.short_url, "result": "Success", })
def add_metadata(self, fragment, nasp): """ Adds metadata to a given fragment with an optional namespace attribute """ try: imgmeta = Metadata.get(imgfrag=fragment, namespace=nasp) newimgmetaversion = imgmeta.version newimgmetaversion = newimgmetaversion + 1 up_query = Metadata.update(version=newimgmetaversion).where(imgfrag=fragment, namespace=nasp) up_query.execute() return imgmeta except Metadata.DoesNotExist: imgmeta = Metadata.create( imgfrag = fragment, version = 1, namespace = nasp,) imgmeta.save() return imgmeta
def process(df, params, m, model_name, file): """ Primary function for processing DSSAT """ run_id, model_config, run_obj = gen_run(model_name, params, file) try: s3_bucket.upload_file(file, run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except Exception as e: print(e) print("Retrying file upload...") try: s3_bucket.upload_file(file, run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except: pass # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{file}", run_label=f"Run for {model_name}", point_resolution_meters=m.get("point_resolution_meters", 1000)) db_session.add(meta) db_session.commit() # Add parameters to DB for p_name, p_value in params.items(): param = Parameters(run_id=run_id, model=model_name, parameter_name=p_name, parameter_value=p_value, parameter_type=get_type(p_value)) db_session.add(param) db_session.commit() df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1) gdf = gpd.GeoDataFrame(df) gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') gdf['run_id'] = run_id gdf['model'] = model_name if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) return gdf, run_id
def ingest_to_db(InRaster, run_id, *, model_name, m): # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.tif", run_label=f"{model_name} run.", point_resolution_meters=480) db_session.add(meta) db_session.commit() # iterate over the bands that should be included (1 per month) for year in range(2009, 2020): band = year - 2008 print(f"Processing {model_name} for year {year}") # Convert Raster to GeoPandas feature_name = m['outputs'][0]['name'] feature_description = m['outputs'][0]['description'] gdf = raster2gpd(InRaster, feature_name, band=band, nodataval=np.float64(0.0)) print(f"Performing spatial merge") # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Iterate over years for each band to ensure that there is continous # annual data # Set run fields: datetime, run_id, model gdf['datetime'] = datetime(year=year, month=1, day=1) gdf['run_id'] = run_id gdf['model'] = model_name gdf['feature_description'] = feature_description if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) # perform bulk insert of entire geopandas DF print(f"Ingesting {year} of {model_name} to database\n") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def image_detail(imageid): """ function that is when a specific image is requested the metadata of the requested image is loaded (if there is one) and given to the template """ try: image = Image.get(id=imageid) imgfrag = ImageFragment.get(image=image, x=0, y=0, visible=True) imgmeta = Metadata.get(imgfrag=imgfrag, namespace="http://www.w3.org/ns/ma-ont#") annos = Annotation.get(imgmeta=imgmeta) return render_template('image_detail.html', img=image,imgmeta=imgmeta) except Imageuri.DoesNotExist: flash('Die Bildnummer wurde nicht gefunden. Bitte waehle ein anderes Bild.', 'error') return render_template('images.html') except ImageFragment.DoesNotExist: return render_template('image_detail.html', img=image)
def test_create_metadata_and_verify_filters(): def pk(): n = 0 step = 1 while True: yield n n = n + step meta_data = [] # contains metadata n = 1000 primary_key = pk() for _ in range(n): random_title = ''.join(random.choices(string.ascii_uppercase, k=10)) meta_data.append(Metadata(next(primary_key), title=random_title)) f = Filter(meta_data, ['kind'], ['book']) assert len(meta_data) > len(f.tofilter) g = Filter(meta_data, ['kind', 'category'], ['book', 'textbook']) assert len(meta_data) > len(g.tofilter) assert len(f.tofilter) > len(g.tofilter)
def ImportJsonOnMenuSelection(self, event): openFileDialog = wx.FileDialog(self, "Open MEtadat file", "", "", "JSON files (*.json)|*.json", wx.FD_OPEN | wx.FD_FILE_MUST_EXIST) if openFileDialog.ShowModal() == wx.ID_CANCEL: return # the user changed idea... # proceed loading the file chosen by the user # this can be done with e.g. wxPython input streams: data = None with open(openFileDialog.GetPath(), 'r') as f: data = json.load(f) for md5, metadata in data.items(): try: f = File.get(File.md5 == md5) except Exception as e: logger.error("Unable to get file for md5:%s", md5) logger.exception(e) continue # Don't do more work with this file for field, values in metadata.items(): if field == 'import-time': return for value in values: try: Metadata(file=f, field=field, value=value).save() except peewee.IntegrityError as e: logger.info("Duplicate metadata ignored (%s, %s, %s)", md5, field, value) except Exception as e: logger.error("Unable to save metadata (%s, %s, %s)", md5, field, value) logger.exception(e) self.update_metadata() self.update_tags()
def load_data(conn, input_filepath): """ Load data from input file to postgres """ Base.metadata.drop_all(conn.engine) Base.metadata.create_all(conn.engine) data = pd.read_csv(input_filepath, dtype={ 'sha': str, 'pmcid': str, 'pubmed_id': str, 'doi': str, 'Microsoft Academic Paper ID': str, 'WHO #Covidence': str }) data.columns = [ 'cord_uid', 'sha', 'source_x', 'title', 'doi', 'pmcid', 'pubmed_id', 'license', 'abstract', 'publish_time', 'authors', 'journal', 'microsoft_academic_paper_id', 'who_covidence', 'has_pdf_parse', 'has_pmc_xml_parse', 'full_text_file', 'url' ] data['paper_key'] = data.apply( lambda row: row.sha.strip() if row.sha is not None else row.pmcid.strip(), axis=1) data = data.drop_duplicates(subset=['paper_key']) data = data.to_json(orient='records') data = json.loads(data) for rec in data: rec['paper_key'] = rec['sha'] if rec['sha'] is not None else rec[ 'pmcid'] if rec['paper_key'] is not None: conn.session.add(Metadata(**rec))
def get_metadata(self, imgfrag): return Metadata.select().where(imgfrag=imgfrag)
def test_create_no_primary_key_metadata(): with pytest.raises(TypeError): m = Metadata(title='Some title')
def process_herbage(herbage, scen, scenarios, grange): """ Primary function for processing grange """ # subset for the correct scenario herbage = herbage[herbage['scenario'] == scen] herbage['geometry'] = herbage.apply( lambda x: Point(x.longitude, x.latitude), axis=1) # obtain scenario parameters params = scenarios[scenarios['scenario'] == scen].iloc[0].to_dict() params = format_params(params) run_id, model_config, run_obj = gen_run(model_name, params) # generate temp CSV and push it to S3 herbage.to_csv("tmp_g.csv", index=False) time.sleep(1) try: s3_bucket.upload_file("tmp_g.csv", run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except Exception as e: print(e) print("Retrying file upload...") try: s3_bucket.upload_file("tmp_g.csv", run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except: pass # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.csv", run_label=herbage.description.iloc[0], point_resolution_meters=25000) db_session.add(meta) db_session.commit() # Add parameters to DB for param in grange['parameters']: # ensure that no null parameters are stored if not pd.isna(params[param['name']]): if param['metadata']['type'] == 'ChoiceParameter': p_type = 'string' elif param['name'] == 'fertilizer' or param[ 'name'] == 'sowing_window_shift': p_type = 'int' else: p_type = 'float' p_value = params[param['name']] param = Parameters(run_id=run_id, model=model_name, parameter_name=param['name'], parameter_value=p_value, parameter_type=p_type) db_session.add(param) db_session.commit() gdf = gpd.GeoDataFrame(herbage) gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') gdf['run_id'] = run_id gdf['model'] = model_name if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) return gdf, run_id
def test_create_empty_metadata(): with pytest.raises(TypeError): m = Metadata()
def test_create_no_title_metadata(): with pytest.raises(TypeError): m = Metadata(10)
2: [2014, 2013, 2012, 2011], 3: [2010, 2009, 2008, 2007], 4: [2006, 2005, 2004, 2003] } for model_name in models: model_config = {"config": {"format": 'tif'}, "name": model_name} run_id = sha256(json.dumps(model_config).encode('utf-8')).hexdigest() # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/result_file/{run_id}.tif", run_label=model_name.replace('_', ' ').title(), point_resolution_meters=2000) db_session.add(meta) db_session.commit() # iterate over the 4 bands for band, years in bands.items(): print(f"Processing {model_name} band {band}") # Convert Raster to GeoPandas InRaster = f"data/{atlas_lookup[model_name]['tif']}" feature_name = atlas_lookup[model_name]['feature_name'] feature_description = atlas_lookup[model_name][ 'feature_description'] gdf = raster2gpd(InRaster, feature_name, band=band)
def test_create_metadata(): m = Metadata(random.randint(1, 10), 'Some title') assert isinstance(m, Metadata)
files = [i for i in os.listdir('Africa_1km_Population/') if '.tif' in i] # Specify possible parameters years = [2000, 2005, 2010, 2015, 2020] for year in years: params = {'year': year} print(params) run_name = f"AFR_PPP_{year}_adj_v2.tif" run_id, model_config = gen_run(year) # Add metadata object to DB meta = Metadata(run_id=run_id, model=model_config['name'], raw_output_link= f"https://world-modelers.s3.amazonaws.com/results/world_population_africa/{run_name}", run_label="World Population Africa", point_resolution_meters=1000) db_session.add(meta) db_session.commit() # Add parameters to DB for name, val in params.items(): param = Parameters(run_id=run_id, model=model_config['name'], parameter_name=name, parameter_value=val, parameter_type='string') db_session.add(param) db_session.commit()
def ingest2db(self): init_db() # Load Admin2 shape from GADM logging.info("Loading GADM shapes...") admin2 = gpd.read_file(f"{self.config['GADM']['GADM_PATH']}/gadm36_2.shp") admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry','country','state','admin1','admin2']] # Add metadata object to DB # TODO: add run_label and run_description logging.info("Storing metadata...") meta = Metadata(run_id=self.run_id, model=self.name, run_description=self.descriptions['management_practice'][self.model_config['management_practice']], raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}', # 5 arc minutes (~10km) point_resolution_meters=10000) logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config.items(): if param_name == 'run_id': pass else: param = Parameters(run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=self.descriptions['parameters'][param_name]) db_session.add(param) db_session.commit() # Process CSV and normalize it logging.info("Processing points...") # get result file path if self.model_config["management_practice"] == "combined": # combined CSV path = f"{self.result_path}/out/eth_docker/test/pp.csv" else: # individual management practices m = self.model_config["management_practice"] path = f"{self.result_path}/out/eth_docker/test/{m}/pp_{m}.csv" df = pd.read_csv(path, index_col=False) df['latitude'] = df['LATITUDE'] df['longitude'] = df['LONGITUDE'] df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1) df['year'] = df['HDAT'].apply(lambda x: int(str(x)[:4])) df['days'] = df['HDAT'].apply(lambda x: int(str(x)[4:])) df['datetime'] = df.apply(lambda x: datetime(x.year, 1, 1) + timedelta(x.days - 1), axis=1) df['run_id'] = self.run_id df['model'] = self.name df['Production'] = df['HWAH'] * df['HARVEST_AREA'] # for combined runs only we need to convert the run name to an encoded # float so that it can go into the database if 'RUN_NAME' in df: df['management_practice'] = df['RUN_NAME'].apply(lambda x: self.descriptions['encoding'][x]) gdf = gpd.GeoDataFrame(df) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') base_cols = ['run_id','model','latitude','longitude', 'datetime','admin1','admin2','state', 'country'] feature_cols = ['feature_name','feature_description','feature_value'] # Need to iterate over features to generate one GDF per feature # then upload the GDF per feature to ensure that rows are added for each # feature for feature_name, feature_description in self.descriptions['features'].items(): # specific handling for "combined" file if feature_name == 'management_practice': if self.model_config["management_practice"] != "combined": # if not a combined file, then just move onto the next # in the for loop and do nothing for this feature_name continue cols_to_select = base_cols + [feature_name] gdf_ = gdf[cols_to_select] # generate new interim GDF gdf_['feature_name'] = feature_name gdf_['feature_description'] = feature_description gdf_['feature_value'] = gdf_[feature_name] gdf_ = gdf_[base_cols + feature_cols] # perform bulk insert of entire geopandas DF logging.info(f"Storing point data output for {feature_name}...") db_session.bulk_insert_mappings(Output, gdf_.to_dict(orient="records")) db_session.commit()
def UNTAGGED(): tagged_files = Metadata.select(Metadata.file).where(Metadata.field != 'import-time') return set(File.select().where(File.id.not_in(tagged_files)))
def to_json(f): d = defaultdict(set) for m in Metadata.select().where(Metadata.file == f): d[m.field].add(m.value) return d
def main(): if len(sys.argv) < 3: print("usage: album-dl yt_url wiki_url") exit() pool = ThreadPool(processes=1) if not os.path.exists("/tmp/album-dl"): os.makedirs("/tmp/album-dl") yt_url = sys.argv[1] #yt_url = input("Enter youtube url:\n") # yt_url = "https://www.youtube.com/watch?v=DHd51Y7dhW0&list=PLONR6CCwpAARTIZ69LUgLW1cdEyM4Rn5e" song_downloader = pool.apply_async(ytdl.song_titles, (yt_url, )) print("Downloading youtube playlist metadata...") wiki_url = sys.argv[2] # wiki_url = input("Enter wikipedia url:\n") # wiki_url = "https://en.wikipedia.org/wiki/When_Dream_and_Day_Unite" try: wiki_page = wiki.capture_page(wiki_url) metadata = wiki.get_metadata(wiki_page) wiki.download_art(wiki_page) track_tables = wiki.get_track_tables(wiki_page) table_indicies, track_renumber = select_tables(track_tables) tracks = wiki.get_tracks(track_tables, track_renumber, table_indicies) print() print(metadata) for track in tracks: print(track) while not confirm(): print("What do you want to change?") print("[artist|album|year|genre|track ##]") k = input(": ").lower() if k in Metadata.keys(): newval = input("New value for {}: ".format(k)) metadata[k] = newval elif k and k.split()[0] == "track": num = k.split()[1] for track in tracks: if num == track.num: newval = input("New title for track {}: ".format( track.num)) track.title = newval else: print("Not a valid field.") print() print(metadata) for track in tracks: print(track) print("Downloading youtube playlist metadata...") last_msg = "" current, total = ytdl.msg_status while not song_downloader.ready(): if ytdl.msg_status != last_msg: current, total = ytdl.msg_status yt_song_titles = song_downloader.get() mapping, unmatched = match.mapTitlesToFiles(tracks, yt_song_titles) print_mapping(mapping, unmatched) while not confirm(): if unmatched: song = unmatched.pop(0) print() for track in mapping: print(track) print("What does {} match with?".format(song.title)) i = input("> ") success = False for track in mapping: if track.num == i: mapping[track] = song print_mapping(mapping, unmatched) success = True break if not success: print("Not a valid track number") print_mapping(mapping, unmatched) ytdl.download_songs(yt_url) new_names = tag.tag_songs(tracks, metadata, mapping) path = "/home/rutrum/music/{}/{}".format(metadata["artist"], metadata["album"]) os.makedirs(path, exist_ok=True) for name in new_names: old_path = "/tmp/album-dl/{}.mp3".format(name["old"]) new_path = "{}/{}.mp3".format(path, name["new"]) shutil.move(old_path, new_path) # can move across file systems #os.rename(old_path, new_path) # cannot except Exception as ex: cprint(ex.args[0], "red", attrs=["bold"], file=sys.stderr) raise ex
def ingest2db(self): init_db() # Load Admin2 shape from GADM logging.info("Loading GADM shapes...") admin2 = gpd.read_file(f'{self.gadm}/gadm36_2.shp') admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry', 'country', 'state', 'admin1', 'admin2']] # Add metadata object to DB # TODO: add run_label and run_description meta = Metadata(run_id=self.run_id, model=self.name, run_description=self.run_description, raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}', point_resolution_meters=1000) logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config['config'].items(): if param_name == 'run_id': pass else: if param_name == 'year': param_type = 'integer' elif param_name == 'month': param_type = 'integer' else: param_type = 'string' param = Parameters(run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=param_type) db_session.add(param) db_session.commit() # Process tiff file into point data logging.info("Processing tiff...") InRaster = f"{self.install_path}/output/{self.key}" feature_name = self.feature_name feature_description = self.feature_description gdf = raster2gpd(InRaster, feature_name, band=self.band) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Set run fields: datetime, run_id, model gdf['datetime'] = self.start_time gdf['run_id'] = self.run_id gdf['model'] = self.name gdf['feature_description'] = feature_description del (gdf['geometry']) del (gdf['index_right']) # perform bulk insert of entire geopandas DF logging.info("Storing point data output...") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
def ingest2db(year, df, filename): model_name = "flood_index_model" run_id = gen_run_id(year) init_db() # Load Admin2 shape from GADM print("Loading GADM shapes...") admin2 = gpd.read_file(f"{config['GADM']['GADM_PATH']}/gadm36_2.shp") admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry', 'country', 'state', 'admin1', 'admin2']] # Add metadata object to DB # TODO: add run_label and run_description print("Storing metadata...") meta = Metadata( run_id=run_id, model=model_name, run_description=f"{model_name} run for {year}", raw_output_link= f'https://s3.amazonaws.com/world-modelers/flood_index_model/{filename}.nc', # 0.1 degrees (~10km) point_resolution_meters=10000) print("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB print("Storing parameters...") param = Parameters(run_id=run_id, model=model_name, parameter_name="year", parameter_value=year, parameter_type="integer") db_session.add(param) db_session.commit() # Process CSV and normalize it print("Processing points...") df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1) df['run_id'] = run_id df['model'] = model_name gdf = gpd.GeoDataFrame(df) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') base_cols = [ 'run_id', 'model', 'latitude', 'longitude', 'datetime', 'admin1', 'admin2', 'state', 'country' ] feature_cols = ['feature_name', 'feature_description', 'feature_value'] # Need to iterate over features to generate one GDF per feature # then upload the GDF per feature to ensure that rows are added for each # feature for feature_name, feature_description in features.items(): cols_to_select = base_cols + [feature_name] gdf_ = gdf[cols_to_select] # generate new interim GDF gdf_['feature_name'] = feature_name gdf_['feature_description'] = feature_description gdf_['feature_value'] = gdf_[feature_name] gdf_ = gdf_[base_cols + feature_cols] # perform bulk insert of entire geopandas DF print(f"Storing point data output for {feature_name}...") db_session.bulk_insert_mappings(Output, gdf_.to_dict(orient="records")) db_session.commit()
def ingest2db(self): init_db() # Add metadata object to DB desc = f"{self.name} run for {self.scenario_type} scenario with start year {self.start_year} and end year {self.end_year}" if self.scenario_type == 'production_failure_scenario': desc += f". Shock severity was set to {self.shock_severity} and the shocked region was {self.shocked_region}." logging.info("Storing metadata...") meta = Metadata(run_id=self.run_id, model=self.name, run_label=f"{self.name}: {self.scenario_type}", run_description=desc, raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}') logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config.items(): if param_name == 'run_id': pass else: param = Parameters( run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=self.descriptions['parameters'][param_name]) db_session.add(param) db_session.commit() # Process CSV and normalize it logging.info("Processing timeseries...") df = pd.read_csv(self.output) df = df.transpose().reset_index() df = df.rename( columns=dict(zip(list(df.columns), list(df.iloc[0]))))[1:] df = df.rename(columns={'Unnamed: 0': 'Date'}) df['datetime'] = df.Date.apply(lambda x: datetime( year=int(x.split('.')[1]), month=int(x.split('.')[0]), day=1)) del (df['Date']) df['run_id'] = self.run_id df['model'] = self.name base_cols = ['run_id', 'model', 'datetime'] feature_cols = ['feature_name', 'feature_description', 'feature_value'] for feature_name, feature_description in self.descriptions[ 'features'].items(): # some multi_twist outputs will not be present depending on the scenario type # so first check if feature_name in df: logging.info( f"Storing point data output for {feature_name}...") cols_to_select = base_cols + [feature_name] df_ = df[cols_to_select] # generate new interim DF df_['feature_name'] = feature_name df_['feature_description'] = feature_description.split('.')[0] df_['feature_value'] = df_[feature_name] df_ = df_[base_cols + feature_cols] # perform bulk insert of entire geopandas DF db_session.bulk_insert_mappings(Output, df_.to_dict(orient="records")) db_session.commit()
def ingest2db(self): init_db() # Load Admin2 shape from GADM logging.info("Loading GADM shapes...") admin2 = gpd.read_file(f'{self.gadm}/gadm36_2.shp') admin2['country'] = admin2['NAME_0'] admin2['state'] = admin2['NAME_1'] admin2['admin1'] = admin2['NAME_1'] admin2['admin2'] = admin2['NAME_2'] admin2 = admin2[['geometry','country','state','admin1','admin2']] # Add metadata object to DB # TODO: add run_label and run_description meta = Metadata(run_id=self.run_id, model=self.name, run_description=self.features[self._type]['run_description'], raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}', point_resolution_meters=5000) logging.info("Storing metadata...") db_session.add(meta) db_session.commit() # Add parameters to DB logging.info("Storing parameters...") for param_name, param_val in self.model_config.items(): if param_name != 'run_id': if param_name == 'year': param_type = 'integer' elif param_name == 'bbox': param_type = 'array' param_val = json.dumps(param_val) elif param_name == 'dekad': param_type = 'integer' param_val = int(param_val) else: param_type = 'string' param = Parameters(run_id=self.run_id, model=self.name, parameter_name=param_name, parameter_value=param_val, parameter_type=param_type) db_session.add(param) db_session.commit() # Process tiff file into point data logging.info("Processing tiff...") InRaster = f"{self.result_path}/{self.result_name}.tiff" feature_name = self.features[self._type]['feature_name'] feature_description = self.features[self._type]['feature_description'] gdf = raster2gpd(InRaster,feature_name) # Spatial merge on GADM to obtain admin areas gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') # Set run fields: datetime, run_id, model # first convert dekad of year to day of year # note: dekad is a 10 day period so dekad 25 ends the 250th day of the year # since dekad 01 contains days 1 through 10 so dekad 01 should yield Jan 1 gdf['datetime'] = datetime(self.year, 1, 1) + timedelta((int(self.dekad) * 10) - 11) gdf['run_id'] = self.run_id gdf['model'] = self.name gdf['feature_description'] = feature_description del(gdf['geometry']) del(gdf['index_right']) # perform bulk insert of entire geopandas DF logging.info("Storing point data output...") db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records")) db_session.commit()
params = { 'crop': crop, 'irrigation': irrig, 'nitrogen': nit, 'stat': stat } print(params) run_name = gen_global(crop, irrig, nit, stat) run_id, model_config = gen_run(crop, irrig, nit, stat) # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_config['name'], raw_output_link= f"https://world-modelers.s3.amazonaws.com/results/yield_anomalies_model/{run_name}", run_label="LPJmL Yield Anomalies", point_resolution_meters=52000) db_session.add(meta) db_session.commit() # Add parameters to DB for name, val in params.items(): param = Parameters(run_id=run_id, model=model_config['name'], parameter_name=name, parameter_value=val, parameter_type='string') db_session.add(param) db_session.commit()