コード例 #1
0
ファイル: app.py プロジェクト: annebaui/srimato
def create_tables():
    from models import Image, Imageuri, ImageFragment, Metadata, Annotation
    Image.create_table()
    Imageuri.create_table()
    ImageFragment.create_table()
    Metadata.create_table()
    Annotation.create_table()
コード例 #2
0
ファイル: __init__.py プロジェクト: mklauber/Curator
 def update_tags(self):
     self.TagTree.DeleteAllItems()
     root = self.TagTree.RootItem
     root = self.TagTree.AppendItem(root, "root")
     for item in Metadata.select(Metadata.field).distinct().order_by(Metadata.field):
         field = self.TagTree.AppendItem(root, item.field)
         results = Metadata.select(Metadata.value).distinct().order_by(Metadata.value)
         results = results.where(Metadata.field == item.field)
         for record in results:
             self.TagTree.AppendItem(field, record.value)
         self.TagTree.Expand(field)
コード例 #3
0
ファイル: __init__.py プロジェクト: mklauber/Curator
    def handle_T_key(self):
        # Get the File objects corresponding to the selected thumbnails
        files = File.select().where(
            File.md5 << [item.Text for item in self.get_selected_thumbs()])

        # Determine the existing tags for these files.
        old_tags = Metadata.filter(Metadata.file << files,
                                   Metadata.field.not_in(['import-time']))
        old_tags = sorted(
            list(set(['%s:"%s"' % (t.field, t.value) for t in old_tags])))

        dialog = wx.TextEntryDialog(None,
                                    "Tags:",
                                    "Modifiy Tags",
                                    value=", ".join(old_tags))
        if dialog.ShowModal() == wx.ID_OK:

            # Determine the new tags for these files.
            new_tags = dialog.GetValue()
            new_tags = [t.strip() for t in new_tags.split(",")]

            # Add any new tags that have been added.
            for token in set(new_tags) - set(old_tags):
                # Determine the actual field and tags
                if ':' in token:
                    field, value = token.split(':', 1)
                else:
                    field, value = 'tag', token
                value = value.strip('"')
                # Create records for all selected files.
                for file in files:
                    try:
                        Metadata(file=file, field=field, value=value).save()
                    except IntegrityError:
                        pass

            # Remove any tags that were removed.
            removed_tags = list(set(old_tags) - set(new_tags))
            # Add any new tags that have been added.
            for token in set(old_tags) - set(new_tags):
                # Determine the actual field and tags
                if ':' in token:
                    field, value = token.split(':', 1)
                else:
                    field, value = 'tag', token
                # Create records for all selected files.
                value = value.strip('"')
                Metadata.delete().where(Metadata.file << files,
                                        Metadata.field == field,
                                        Metadata.value == value).execute()
        # Repaint the tag list.
        self.update_tags()
コード例 #4
0
ファイル: __init__.py プロジェクト: mklauber/Curator
 def update_tags(self):
     self.TagTree.DeleteAllItems()
     root = self.TagTree.RootItem
     root = self.TagTree.AppendItem(root, "root")
     for item in Metadata.select(Metadata.field).distinct().order_by(
             Metadata.field):
         field = self.TagTree.AppendItem(root, item.field)
         results = Metadata.select(Metadata.value).distinct().order_by(
             Metadata.value)
         results = results.where(Metadata.field == item.field)
         for record in results:
             self.TagTree.AppendItem(field, record.value)
         self.TagTree.Expand(field)
コード例 #5
0
def ingest_to_db(InRaster, run_id, *,
                model_name, start, included_months, total_months,
                params, basin):

    # Add metadata object to DB
    meta = Metadata(run_id=run_id, 
                    model=model_name,
                    raw_output_link= f"https://model-service.worldmodelers.com/results/PIHM_results/{run_id}.tif",
                    run_label=f"{model_name} run for {basin} Basin.",
                    point_resolution_meters=200)
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    print("Storing parameters...")
    for pp, vv in params.items():

        if pp == 'basin':
            p_type = 'string'
        else:
            p_type = 'float'
            
        param = Parameters(run_id=run_id,
                          model=model_name,
                          parameter_name=pp,
                          parameter_value=vv,
                          parameter_type=p_type
                          )
        db_session.add(param)
        db_session.commit()        
        
    # iterate over the bands that should be included (1 per month)
    for month in range(1, included_months + 2):
        date_ = start + relativedelta(months=month-1)
        date_str = date_.strftime("%m/%d/%Y")        
        print(f"Processing {model_name} {date_str}")
        # Convert Raster to GeoPandas
        feature_name = m['outputs'][0]['name']
        feature_description = m['outputs'][0]['description']
        gdf = raster2gpd(InRaster,feature_name,band=month)

        print(f"Performing spatial merge")
        # Spatial merge on GADM to obtain admin areas
        gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')
        
        # Iterate over years for each band to ensure that there is continous
        # annual data
        # Set run fields: datetime, run_id, model
        gdf['datetime'] = date_
        gdf['run_id'] = run_id
        gdf['model'] = model_name
        gdf['feature_description'] = feature_description
        if 'geometry' in gdf:
            del(gdf['geometry'])
            del(gdf['index_right'])

        # perform bulk insert of entire geopandas DF
        print(f"Ingesting {date_str} of {model_name} for basin {basin} to database\n")
        db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records"))
        db_session.commit()    
コード例 #6
0
def ingest2db(df_, fsc, params, run_id, model_name):

    # Add metadata object to DB
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        raw_output_link=
        f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.csv",
        run_label=f"{model_name} run for {params['shocked_region']} region.",
        point_resolution_meters=100000)
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    for pp, vv in params.items():
        param = Parameters(run_id=run_id,
                           model=model_name,
                           parameter_name=pp,
                           parameter_value=vv,
                           parameter_type="string")
        db_session.add(param)
        db_session.commit()

    # Ingest outputs to DB
    feature_name = fsc['outputs'][0]['name']
    feature_description = fsc['outputs'][0]['description']
    df_['datetime'] = datetime(year=2018, month=1, day=1)
    df_['run_id'] = run_id
    df_['model'] = model_name
    df_['feature_name'] = feature_name
    df_['feature_description'] = feature_description
    df_['feature_value'] = df_[feature_name].apply(lambda x: int(x))

    db_session.bulk_insert_mappings(Output, df_.to_dict(orient="records"))
    db_session.commit()
コード例 #7
0
def ingest_to_db(InRaster, run_id, *, model_name, params, m):

    # Add metadata object to DB
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        raw_output_link=
        f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.tif",
        run_label=f"{model_name} run.",
        point_resolution_meters=1000000)
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    print("Storing parameters...")
    for pp, vv in params.items():
        if pp == 'year' or pp == 'month':
            p_type = 'integer'
        else:
            p_type = 'string'

        param = Parameters(run_id=run_id,
                           model=model_name,
                           parameter_name=pp,
                           parameter_value=vv,
                           parameter_type=p_type)
        db_session.add(param)
        db_session.commit()

    band = bands[params['commodity']]
    # Convert Raster to GeoPandas
    feature_name = m['outputs'][0]['name']
    feature_description = m['outputs'][0]['description']
    gdf = raster2gpd(InRaster, feature_name, band=band)

    print(f"GDF size is {gdf.shape[0]} before rounding lat/lon")
    gdf = gdf.drop_duplicates()
    print(f"GDF size is {gdf.shape[0]} after rounding lat/lon")

    print(f"Performing spatial merge")
    # Spatial merge on GADM to obtain admin areas
    gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

    # Iterate over years for each band to ensure that there is continous
    # annual data
    # Set run fields: datetime, run_id, model
    gdf['datetime'] = datetime(year=params['year'],
                               month=params['month'],
                               day=1)
    gdf['run_id'] = run_id
    gdf['model'] = model_name
    gdf['feature_description'] = feature_description
    if 'geometry' in gdf:
        del (gdf['geometry'])
        del (gdf['index_right'])

    # perform bulk insert of entire geopandas DF
    db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records"))
    db_session.commit()
コード例 #8
0
ファイル: __init__.py プロジェクト: mklauber/Curator
    def handle_T_key(self):
                # Get the File objects corresponding to the selected thumbnails
        files = File.select().where(File.md5 << [item.Text for item in self.get_selected_thumbs()])

        # Determine the existing tags for these files.
        old_tags = Metadata.filter(Metadata.file << files, Metadata.field.not_in(['import-time']))
        old_tags = sorted(list(set(['%s:"%s"' % (t.field, t.value) for t in old_tags])))

        dialog = wx.TextEntryDialog(None, "Tags:", "Modifiy Tags", value=", ".join(old_tags))
        if dialog.ShowModal() == wx.ID_OK:

            # Determine the new tags for these files.
            new_tags = dialog.GetValue()
            new_tags = [t.strip() for t in new_tags.split(",")]

            # Add any new tags that have been added.
            for token in set(new_tags) - set(old_tags):
                # Determine the actual field and tags
                if ':' in token:
                    field, value = token.split(':', 1)
                else:
                    field, value = 'tag', token
                value = value.strip('"')
                # Create records for all selected files.
                for file in files:
                    try:
                        Metadata(file=file, field=field, value=value).save()
                    except IntegrityError:
                        pass

            # Remove any tags that were removed.
            removed_tags = list(set(old_tags) - set(new_tags))
            # Add any new tags that have been added.
            for token in set(old_tags) - set(new_tags):
                # Determine the actual field and tags
                if ':' in token:
                    field, value = token.split(':', 1)
                else:
                    field, value = 'tag', token
                # Create records for all selected files.
                value = value.strip('"')
                Metadata.delete().where(Metadata.file << files,
                                        Metadata.field == field,
                                        Metadata.value == value).execute()
        # Repaint the tag list.
        self.update_tags()
コード例 #9
0
def get_metadata(page):

    # Find the metadata table at top of page
    table = page.find("table", class_="infobox")
    if table == None:
        raise Exception("Metadata not found on wiki.")

    metadata = Metadata(table)
    return metadata
コード例 #10
0
def process_dssat(df, params, dssat, model_name, file):
    """
    Primary function for processing DSSAT
    """

    run_id, model_config, run_obj = gen_run(model_name, params, file)

    # generate temp CSV and push it to S3
    df.to_csv("tmp.csv", index=False)
    time.sleep(1)
    try:
        s3_bucket.upload_file("tmp.csv",
                              run_obj['key'],
                              ExtraArgs={'ACL': 'public-read'})
    except Exception as e:
        print(e)
        print("Retrying file upload...")
        try:
            s3_bucket.upload_file("tmp.csv",
                                  run_obj['key'],
                                  ExtraArgs={'ACL': 'public-read'})
        except:
            pass

    # Add metadata object to DB
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        raw_output_link=
        f"https://model-service.worldmodelers.com/results/{model_name}_results/{file}",
        run_label=df.RUN_NAME.iloc[0],
        point_resolution_meters=10000)
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    for p_name, p_value in params.items():
        if p_name == 'rainfall':
            p_value = float(p_value)
        param = Parameters(run_id=run_id,
                           model=model_name,
                           parameter_name=p_name,
                           parameter_value=p_value,
                           parameter_type=param_types[p_name])
        db_session.add(param)
        db_session.commit()

    gdf = gpd.GeoDataFrame(df)
    gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')
    gdf['run_id'] = run_id
    gdf['model'] = model_name
    if 'geometry' in gdf:
        del (gdf['geometry'])
        del (gdf['index_right'])

    return gdf, run_id
コード例 #11
0
def save_graph():
    # Angular sends json data by default
    title = request.json.get("title")
    settings = request.json.get("settings")
    username = request.cookies.get("username")

    new_graph = Graph(settings)
    db.session.add(new_graph)
    db.session.commit()

    new_meta = Metadata(title, new_graph.id, username)
    new_meta.generate_hash()
    db.session.add(new_meta)
    db.session.commit()

    return jsonify({
        "url": new_graph.meta.short_url,
        "result": "Success",
    })
コード例 #12
0
ファイル: imagecontroller.py プロジェクト: annebaui/srimato
    def add_metadata(self, fragment, nasp):
    
        """ Adds metadata to a given fragment 
            with an optional namespace attribute 
        """ 

        try:
            imgmeta = Metadata.get(imgfrag=fragment, namespace=nasp)
            newimgmetaversion = imgmeta.version
            newimgmetaversion = newimgmetaversion + 1
            up_query = Metadata.update(version=newimgmetaversion).where(imgfrag=fragment, namespace=nasp)
            up_query.execute()
            return imgmeta
        except Metadata.DoesNotExist:

            imgmeta = Metadata.create(
                                imgfrag = fragment,
                                version = 1,
                                namespace = nasp,)
            imgmeta.save()
            return imgmeta
コード例 #13
0
def process(df, params, m, model_name, file):
    """
    Primary function for processing DSSAT
    """

    run_id, model_config, run_obj = gen_run(model_name, params, file)

    try:
        s3_bucket.upload_file(file,
                              run_obj['key'],
                              ExtraArgs={'ACL': 'public-read'})
    except Exception as e:
        print(e)
        print("Retrying file upload...")
        try:
            s3_bucket.upload_file(file,
                                  run_obj['key'],
                                  ExtraArgs={'ACL': 'public-read'})
        except:
            pass

    # Add metadata object to DB
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        raw_output_link=
        f"https://model-service.worldmodelers.com/results/{model_name}_results/{file}",
        run_label=f"Run for {model_name}",
        point_resolution_meters=m.get("point_resolution_meters", 1000))
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    for p_name, p_value in params.items():
        param = Parameters(run_id=run_id,
                           model=model_name,
                           parameter_name=p_name,
                           parameter_value=p_value,
                           parameter_type=get_type(p_value))
        db_session.add(param)
        db_session.commit()

    df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1)
    gdf = gpd.GeoDataFrame(df)
    gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')
    gdf['run_id'] = run_id
    gdf['model'] = model_name
    if 'geometry' in gdf:
        del (gdf['geometry'])
        del (gdf['index_right'])

    return gdf, run_id
コード例 #14
0
def ingest_to_db(InRaster, run_id, *, model_name, m):

    # Add metadata object to DB
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        raw_output_link=
        f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.tif",
        run_label=f"{model_name} run.",
        point_resolution_meters=480)
    db_session.add(meta)
    db_session.commit()

    # iterate over the bands that should be included (1 per month)
    for year in range(2009, 2020):
        band = year - 2008
        print(f"Processing {model_name} for year {year}")
        # Convert Raster to GeoPandas
        feature_name = m['outputs'][0]['name']
        feature_description = m['outputs'][0]['description']
        gdf = raster2gpd(InRaster,
                         feature_name,
                         band=band,
                         nodataval=np.float64(0.0))

        print(f"Performing spatial merge")
        # Spatial merge on GADM to obtain admin areas
        gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

        # Iterate over years for each band to ensure that there is continous
        # annual data
        # Set run fields: datetime, run_id, model
        gdf['datetime'] = datetime(year=year, month=1, day=1)
        gdf['run_id'] = run_id
        gdf['model'] = model_name
        gdf['feature_description'] = feature_description
        if 'geometry' in gdf:
            del (gdf['geometry'])
            del (gdf['index_right'])

        # perform bulk insert of entire geopandas DF
        print(f"Ingesting {year} of {model_name} to database\n")
        db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records"))
        db_session.commit()
コード例 #15
0
ファイル: views.py プロジェクト: annebaui/srimato
def image_detail(imageid):

    """
        function that is when a specific image is requested
        the metadata of the requested image is loaded (if there is one)
        and given to the template
    """

    try: 
        image = Image.get(id=imageid)
        imgfrag = ImageFragment.get(image=image, x=0, y=0, visible=True)
        imgmeta = Metadata.get(imgfrag=imgfrag, namespace="http://www.w3.org/ns/ma-ont#")
        annos = Annotation.get(imgmeta=imgmeta)
        return render_template('image_detail.html', img=image,imgmeta=imgmeta)
    except Imageuri.DoesNotExist:
        flash('Die Bildnummer wurde nicht gefunden. Bitte waehle ein anderes Bild.', 'error')
        return render_template('images.html')
    except ImageFragment.DoesNotExist:
        return render_template('image_detail.html', img=image)
コード例 #16
0
ファイル: tests.py プロジェクト: lvaliente/test-project
def test_create_metadata_and_verify_filters():
    def pk():
        n = 0
        step = 1
        while True:
            yield n
            n = n + step

    meta_data = []  # contains metadata

    n = 1000
    primary_key = pk()
    for _ in range(n):
        random_title = ''.join(random.choices(string.ascii_uppercase, k=10))
        meta_data.append(Metadata(next(primary_key), title=random_title))

    f = Filter(meta_data, ['kind'], ['book'])
    assert len(meta_data) > len(f.tofilter)

    g = Filter(meta_data, ['kind', 'category'], ['book', 'textbook'])

    assert len(meta_data) > len(g.tofilter)
    assert len(f.tofilter) > len(g.tofilter)
コード例 #17
0
def ImportJsonOnMenuSelection(self, event):
    openFileDialog = wx.FileDialog(self, "Open MEtadat file", "", "",
                                   "JSON files (*.json)|*.json",
                                   wx.FD_OPEN | wx.FD_FILE_MUST_EXIST)

    if openFileDialog.ShowModal() == wx.ID_CANCEL:
        return  # the user changed idea...

    # proceed loading the file chosen by the user
    # this can be done with e.g. wxPython input streams:
    data = None
    with open(openFileDialog.GetPath(), 'r') as f:
        data = json.load(f)

    for md5, metadata in data.items():
        try:
            f = File.get(File.md5 == md5)
        except Exception as e:
            logger.error("Unable to get file for md5:%s", md5)
            logger.exception(e)
            continue  # Don't do more work with this file
        for field, values in metadata.items():
            if field == 'import-time':
                return
            for value in values:
                try:
                    Metadata(file=f, field=field, value=value).save()
                except peewee.IntegrityError as e:
                    logger.info("Duplicate metadata ignored (%s, %s, %s)", md5,
                                field, value)
                except Exception as e:
                    logger.error("Unable to save metadata (%s, %s, %s)", md5,
                                 field, value)
                    logger.exception(e)
    self.update_metadata()
    self.update_tags()
コード例 #18
0
def load_data(conn, input_filepath):
    """
    Load data from input file to postgres
    """
    Base.metadata.drop_all(conn.engine)
    Base.metadata.create_all(conn.engine)

    data = pd.read_csv(input_filepath,
                       dtype={
                           'sha': str,
                           'pmcid': str,
                           'pubmed_id': str,
                           'doi': str,
                           'Microsoft Academic Paper ID': str,
                           'WHO #Covidence': str
                       })

    data.columns = [
        'cord_uid', 'sha', 'source_x', 'title', 'doi', 'pmcid', 'pubmed_id',
        'license', 'abstract', 'publish_time', 'authors', 'journal',
        'microsoft_academic_paper_id', 'who_covidence', 'has_pdf_parse',
        'has_pmc_xml_parse', 'full_text_file', 'url'
    ]

    data['paper_key'] = data.apply(
        lambda row: row.sha.strip()
        if row.sha is not None else row.pmcid.strip(),
        axis=1)
    data = data.drop_duplicates(subset=['paper_key'])
    data = data.to_json(orient='records')
    data = json.loads(data)
    for rec in data:
        rec['paper_key'] = rec['sha'] if rec['sha'] is not None else rec[
            'pmcid']
        if rec['paper_key'] is not None:
            conn.session.add(Metadata(**rec))
コード例 #19
0
ファイル: imagecontroller.py プロジェクト: annebaui/srimato
 def get_metadata(self, imgfrag):
     return Metadata.select().where(imgfrag=imgfrag)
コード例 #20
0
ファイル: tests.py プロジェクト: lvaliente/test-project
def test_create_no_primary_key_metadata():
    with pytest.raises(TypeError):
        m = Metadata(title='Some title')
コード例 #21
0
def process_herbage(herbage, scen, scenarios, grange):
    """
    Primary function for processing grange
    """

    # subset for the correct scenario
    herbage = herbage[herbage['scenario'] == scen]

    herbage['geometry'] = herbage.apply(
        lambda x: Point(x.longitude, x.latitude), axis=1)

    # obtain scenario parameters
    params = scenarios[scenarios['scenario'] == scen].iloc[0].to_dict()
    params = format_params(params)

    run_id, model_config, run_obj = gen_run(model_name, params)

    # generate temp CSV and push it to S3
    herbage.to_csv("tmp_g.csv", index=False)
    time.sleep(1)
    try:
        s3_bucket.upload_file("tmp_g.csv",
                              run_obj['key'],
                              ExtraArgs={'ACL': 'public-read'})
    except Exception as e:
        print(e)
        print("Retrying file upload...")
        try:
            s3_bucket.upload_file("tmp_g.csv",
                                  run_obj['key'],
                                  ExtraArgs={'ACL': 'public-read'})
        except:
            pass

    # Add metadata object to DB
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        raw_output_link=
        f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.csv",
        run_label=herbage.description.iloc[0],
        point_resolution_meters=25000)
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    for param in grange['parameters']:
        # ensure that no null parameters are stored
        if not pd.isna(params[param['name']]):
            if param['metadata']['type'] == 'ChoiceParameter':
                p_type = 'string'
            elif param['name'] == 'fertilizer' or param[
                    'name'] == 'sowing_window_shift':
                p_type = 'int'
            else:
                p_type = 'float'
            p_value = params[param['name']]

            param = Parameters(run_id=run_id,
                               model=model_name,
                               parameter_name=param['name'],
                               parameter_value=p_value,
                               parameter_type=p_type)
            db_session.add(param)
            db_session.commit()

    gdf = gpd.GeoDataFrame(herbage)
    gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')
    gdf['run_id'] = run_id
    gdf['model'] = model_name
    if 'geometry' in gdf:
        del (gdf['geometry'])
        del (gdf['index_right'])

    return gdf, run_id
コード例 #22
0
ファイル: tests.py プロジェクト: lvaliente/test-project
def test_create_empty_metadata():
    with pytest.raises(TypeError):
        m = Metadata()
コード例 #23
0
ファイル: tests.py プロジェクト: lvaliente/test-project
def test_create_no_title_metadata():
    with pytest.raises(TypeError):
        m = Metadata(10)
コード例 #24
0
        2: [2014, 2013, 2012, 2011],
        3: [2010, 2009, 2008, 2007],
        4: [2006, 2005, 2004, 2003]
    }

    for model_name in models:

        model_config = {"config": {"format": 'tif'}, "name": model_name}

        run_id = sha256(json.dumps(model_config).encode('utf-8')).hexdigest()

        # Add metadata object to DB
        meta = Metadata(
            run_id=run_id,
            model=model_name,
            raw_output_link=
            f"https://model-service.worldmodelers.com/result_file/{run_id}.tif",
            run_label=model_name.replace('_', ' ').title(),
            point_resolution_meters=2000)
        db_session.add(meta)
        db_session.commit()

        # iterate over the 4 bands
        for band, years in bands.items():
            print(f"Processing {model_name} band {band}")
            # Convert Raster to GeoPandas
            InRaster = f"data/{atlas_lookup[model_name]['tif']}"
            feature_name = atlas_lookup[model_name]['feature_name']
            feature_description = atlas_lookup[model_name][
                'feature_description']
            gdf = raster2gpd(InRaster, feature_name, band=band)
コード例 #25
0
ファイル: tests.py プロジェクト: lvaliente/test-project
def test_create_metadata():
    m = Metadata(random.randint(1, 10), 'Some title')
    assert isinstance(m, Metadata)
コード例 #26
0
    files = [i for i in os.listdir('Africa_1km_Population/') if '.tif' in i]

    # Specify possible parameters
    years = [2000, 2005, 2010, 2015, 2020]

    for year in years:
                    
        params = {'year': year}
        print(params)
        run_name = f"AFR_PPP_{year}_adj_v2.tif"
        run_id, model_config = gen_run(year)
                    
        # Add metadata object to DB
        meta = Metadata(run_id=run_id, 
                model=model_config['name'],
                raw_output_link= f"https://world-modelers.s3.amazonaws.com/results/world_population_africa/{run_name}",
                run_label="World Population Africa",
                point_resolution_meters=1000)
        db_session.add(meta)
        db_session.commit()
                    
        # Add parameters to DB
        for name, val in params.items():                
            param = Parameters(run_id=run_id,
                                model=model_config['name'],
                                parameter_name=name,
                                parameter_value=val,
                                parameter_type='string')
            db_session.add(param)
            db_session.commit()
                    
コード例 #27
0
    def ingest2db(self):
        init_db()

        # Load Admin2 shape from GADM
        logging.info("Loading GADM shapes...")
        admin2 = gpd.read_file(f"{self.config['GADM']['GADM_PATH']}/gadm36_2.shp")
        admin2['country'] = admin2['NAME_0']
        admin2['state'] = admin2['NAME_1']
        admin2['admin1'] = admin2['NAME_1']
        admin2['admin2'] = admin2['NAME_2']
        admin2 = admin2[['geometry','country','state','admin1','admin2']]

        # Add metadata object to DB
        # TODO: add run_label and run_description
        logging.info("Storing metadata...")
        meta = Metadata(run_id=self.run_id, 
                        model=self.name,
                        run_description=self.descriptions['management_practice'][self.model_config['management_practice']],
                        raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}',
                        # 5 arc minutes (~10km)
                        point_resolution_meters=10000) 
        logging.info("Storing metadata...")
        db_session.add(meta)
        db_session.commit()

        # Add parameters to DB
        logging.info("Storing parameters...")
        for param_name, param_val in self.model_config.items():
            if param_name == 'run_id':
                pass
            else:
                param = Parameters(run_id=self.run_id,
                                  model=self.name,
                                  parameter_name=param_name,
                                  parameter_value=param_val,
                                  parameter_type=self.descriptions['parameters'][param_name])
                db_session.add(param)
                db_session.commit()

        # Process CSV and normalize it
        logging.info("Processing points...")

        # get result file path
        if self.model_config["management_practice"] == "combined":
            # combined CSV
            path = f"{self.result_path}/out/eth_docker/test/pp.csv"
        else:
            # individual management practices
            m = self.model_config["management_practice"]
            path = f"{self.result_path}/out/eth_docker/test/{m}/pp_{m}.csv"

        df = pd.read_csv(path, index_col=False)
        df['latitude'] = df['LATITUDE']
        df['longitude'] = df['LONGITUDE']
        df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1)
        df['year'] = df['HDAT'].apply(lambda x: int(str(x)[:4]))
        df['days'] = df['HDAT'].apply(lambda x: int(str(x)[4:]))
        df['datetime'] = df.apply(lambda x: datetime(x.year, 1, 1) + timedelta(x.days - 1), axis=1)
        df['run_id'] = self.run_id
        df['model'] = self.name
        df['Production'] = df['HWAH'] * df['HARVEST_AREA']

        # for combined runs only we need to convert the run name to an encoded 
        # float so that it can go into the database
        if 'RUN_NAME' in df:
            df['management_practice'] = df['RUN_NAME'].apply(lambda x: self.descriptions['encoding'][x])

        gdf = gpd.GeoDataFrame(df)

        # Spatial merge on GADM to obtain admin areas
        gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

        base_cols = ['run_id','model','latitude','longitude',
                     'datetime','admin1','admin2','state',
                     'country']

        feature_cols = ['feature_name','feature_description','feature_value']

        # Need to iterate over features to generate one GDF per feature
        # then upload the GDF per feature to ensure that rows are added for each
        # feature
        for feature_name, feature_description in self.descriptions['features'].items():
            # specific handling for "combined" file
            if feature_name == 'management_practice':
                if self.model_config["management_practice"] != "combined":
                    # if not a combined file, then just move onto the next 
                    # in the for loop and do nothing for this feature_name
                    continue
            cols_to_select = base_cols + [feature_name]
            gdf_ = gdf[cols_to_select] # generate new interim GDF
            gdf_['feature_name'] = feature_name
            gdf_['feature_description'] = feature_description
            gdf_['feature_value'] = gdf_[feature_name]
            gdf_ = gdf_[base_cols + feature_cols]

            # perform bulk insert of entire geopandas DF
            logging.info(f"Storing point data output for {feature_name}...")
            db_session.bulk_insert_mappings(Output, gdf_.to_dict(orient="records"))
            db_session.commit()            
コード例 #28
0
def UNTAGGED():
    tagged_files = Metadata.select(Metadata.file).where(Metadata.field != 'import-time')
    return set(File.select().where(File.id.not_in(tagged_files)))
コード例 #29
0
 def to_json(f):
     d = defaultdict(set)
     for m in Metadata.select().where(Metadata.file == f):
         d[m.field].add(m.value)
     return d
コード例 #30
0
ファイル: __init__.py プロジェクト: mklauber/Curator
 def to_json(f):
     d = defaultdict(set)
     for m in Metadata.select().where(Metadata.file == f):
         d[m.field].add(m.value)
     return d
コード例 #31
0
ファイル: main.py プロジェクト: rutrum/album-dl
def main():

    if len(sys.argv) < 3:
        print("usage: album-dl yt_url wiki_url")
        exit()

    pool = ThreadPool(processes=1)
    if not os.path.exists("/tmp/album-dl"):
        os.makedirs("/tmp/album-dl")

    yt_url = sys.argv[1]
    #yt_url = input("Enter youtube url:\n")
    # yt_url = "https://www.youtube.com/watch?v=DHd51Y7dhW0&list=PLONR6CCwpAARTIZ69LUgLW1cdEyM4Rn5e"
    song_downloader = pool.apply_async(ytdl.song_titles, (yt_url, ))
    print("Downloading youtube playlist metadata...")

    wiki_url = sys.argv[2]
    # wiki_url = input("Enter wikipedia url:\n")
    # wiki_url = "https://en.wikipedia.org/wiki/When_Dream_and_Day_Unite"

    try:
        wiki_page = wiki.capture_page(wiki_url)

        metadata = wiki.get_metadata(wiki_page)

        wiki.download_art(wiki_page)

        track_tables = wiki.get_track_tables(wiki_page)

        table_indicies, track_renumber = select_tables(track_tables)
        tracks = wiki.get_tracks(track_tables, track_renumber, table_indicies)

        print()
        print(metadata)
        for track in tracks:
            print(track)

        while not confirm():
            print("What do you want to change?")
            print("[artist|album|year|genre|track ##]")
            k = input(": ").lower()
            if k in Metadata.keys():
                newval = input("New value for {}: ".format(k))
                metadata[k] = newval
            elif k and k.split()[0] == "track":
                num = k.split()[1]
                for track in tracks:
                    if num == track.num:
                        newval = input("New title for track {}: ".format(
                            track.num))
                        track.title = newval
            else:
                print("Not a valid field.")

            print()
            print(metadata)
            for track in tracks:
                print(track)

        print("Downloading youtube playlist metadata...")
        last_msg = ""
        current, total = ytdl.msg_status
        while not song_downloader.ready():
            if ytdl.msg_status != last_msg:
                current, total = ytdl.msg_status

        yt_song_titles = song_downloader.get()

        mapping, unmatched = match.mapTitlesToFiles(tracks, yt_song_titles)

        print_mapping(mapping, unmatched)

        while not confirm():
            if unmatched:
                song = unmatched.pop(0)
                print()
                for track in mapping:
                    print(track)
                print("What does {} match with?".format(song.title))
                i = input("> ")
                success = False
                for track in mapping:
                    if track.num == i:
                        mapping[track] = song
                        print_mapping(mapping, unmatched)
                        success = True
                        break

                if not success:
                    print("Not a valid track number")
                    print_mapping(mapping, unmatched)

        ytdl.download_songs(yt_url)

        new_names = tag.tag_songs(tracks, metadata, mapping)

        path = "/home/rutrum/music/{}/{}".format(metadata["artist"],
                                                 metadata["album"])
        os.makedirs(path, exist_ok=True)

        for name in new_names:
            old_path = "/tmp/album-dl/{}.mp3".format(name["old"])
            new_path = "{}/{}.mp3".format(path, name["new"])
            shutil.move(old_path, new_path)  # can move across file systems
            #os.rename(old_path, new_path)  # cannot

    except Exception as ex:
        cprint(ex.args[0], "red", attrs=["bold"], file=sys.stderr)
        raise ex
コード例 #32
0
    def ingest2db(self):
        init_db()

        # Load Admin2 shape from GADM
        logging.info("Loading GADM shapes...")
        admin2 = gpd.read_file(f'{self.gadm}/gadm36_2.shp')
        admin2['country'] = admin2['NAME_0']
        admin2['state'] = admin2['NAME_1']
        admin2['admin1'] = admin2['NAME_1']
        admin2['admin2'] = admin2['NAME_2']
        admin2 = admin2[['geometry', 'country', 'state', 'admin1', 'admin2']]

        # Add metadata object to DB
        # TODO: add run_label and run_description
        meta = Metadata(run_id=self.run_id,
                        model=self.name,
                        run_description=self.run_description,
                        raw_output_link=
                        f'https://s3.amazonaws.com/world-modelers/{self.key}',
                        point_resolution_meters=1000)
        logging.info("Storing metadata...")
        db_session.add(meta)
        db_session.commit()

        # Add parameters to DB
        logging.info("Storing parameters...")
        for param_name, param_val in self.model_config['config'].items():
            if param_name == 'run_id':
                pass
            else:
                if param_name == 'year':
                    param_type = 'integer'
                elif param_name == 'month':
                    param_type = 'integer'
                else:
                    param_type = 'string'

                param = Parameters(run_id=self.run_id,
                                   model=self.name,
                                   parameter_name=param_name,
                                   parameter_value=param_val,
                                   parameter_type=param_type)
                db_session.add(param)
                db_session.commit()

        # Process tiff file into point data
        logging.info("Processing tiff...")
        InRaster = f"{self.install_path}/output/{self.key}"
        feature_name = self.feature_name
        feature_description = self.feature_description
        gdf = raster2gpd(InRaster, feature_name, band=self.band)

        # Spatial merge on GADM to obtain admin areas
        gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

        # Set run fields: datetime, run_id, model
        gdf['datetime'] = self.start_time
        gdf['run_id'] = self.run_id
        gdf['model'] = self.name
        gdf['feature_description'] = feature_description
        del (gdf['geometry'])
        del (gdf['index_right'])

        # perform bulk insert of entire geopandas DF
        logging.info("Storing point data output...")
        db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records"))
        db_session.commit()
コード例 #33
0
def ingest2db(year, df, filename):
    model_name = "flood_index_model"
    run_id = gen_run_id(year)
    init_db()

    # Load Admin2 shape from GADM
    print("Loading GADM shapes...")
    admin2 = gpd.read_file(f"{config['GADM']['GADM_PATH']}/gadm36_2.shp")
    admin2['country'] = admin2['NAME_0']
    admin2['state'] = admin2['NAME_1']
    admin2['admin1'] = admin2['NAME_1']
    admin2['admin2'] = admin2['NAME_2']
    admin2 = admin2[['geometry', 'country', 'state', 'admin1', 'admin2']]

    # Add metadata object to DB
    # TODO: add run_label and run_description
    print("Storing metadata...")
    meta = Metadata(
        run_id=run_id,
        model=model_name,
        run_description=f"{model_name} run for {year}",
        raw_output_link=
        f'https://s3.amazonaws.com/world-modelers/flood_index_model/{filename}.nc',
        # 0.1 degrees (~10km)
        point_resolution_meters=10000)
    print("Storing metadata...")
    db_session.add(meta)
    db_session.commit()

    # Add parameters to DB
    print("Storing parameters...")
    param = Parameters(run_id=run_id,
                       model=model_name,
                       parameter_name="year",
                       parameter_value=year,
                       parameter_type="integer")
    db_session.add(param)
    db_session.commit()

    # Process CSV and normalize it
    print("Processing points...")

    df['geometry'] = df.apply(lambda x: Point(x.longitude, x.latitude), axis=1)
    df['run_id'] = run_id
    df['model'] = model_name

    gdf = gpd.GeoDataFrame(df)

    # Spatial merge on GADM to obtain admin areas
    gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')

    base_cols = [
        'run_id', 'model', 'latitude', 'longitude', 'datetime', 'admin1',
        'admin2', 'state', 'country'
    ]

    feature_cols = ['feature_name', 'feature_description', 'feature_value']

    # Need to iterate over features to generate one GDF per feature
    # then upload the GDF per feature to ensure that rows are added for each
    # feature
    for feature_name, feature_description in features.items():
        cols_to_select = base_cols + [feature_name]
        gdf_ = gdf[cols_to_select]  # generate new interim GDF
        gdf_['feature_name'] = feature_name
        gdf_['feature_description'] = feature_description
        gdf_['feature_value'] = gdf_[feature_name]
        gdf_ = gdf_[base_cols + feature_cols]

        # perform bulk insert of entire geopandas DF
        print(f"Storing point data output for {feature_name}...")
        db_session.bulk_insert_mappings(Output, gdf_.to_dict(orient="records"))
        db_session.commit()
コード例 #34
0
    def ingest2db(self):
        init_db()

        # Add metadata object to DB
        desc = f"{self.name} run for {self.scenario_type} scenario with start year {self.start_year} and end year {self.end_year}"
        if self.scenario_type == 'production_failure_scenario':
            desc += f". Shock severity was set to {self.shock_severity} and the shocked region was {self.shocked_region}."

        logging.info("Storing metadata...")
        meta = Metadata(run_id=self.run_id,
                        model=self.name,
                        run_label=f"{self.name}: {self.scenario_type}",
                        run_description=desc,
                        raw_output_link=
                        f'https://s3.amazonaws.com/world-modelers/{self.key}')
        logging.info("Storing metadata...")
        db_session.add(meta)
        db_session.commit()

        # Add parameters to DB
        logging.info("Storing parameters...")
        for param_name, param_val in self.model_config.items():
            if param_name == 'run_id':
                pass
            else:
                param = Parameters(
                    run_id=self.run_id,
                    model=self.name,
                    parameter_name=param_name,
                    parameter_value=param_val,
                    parameter_type=self.descriptions['parameters'][param_name])
                db_session.add(param)
                db_session.commit()

        # Process CSV and normalize it
        logging.info("Processing timeseries...")
        df = pd.read_csv(self.output)
        df = df.transpose().reset_index()
        df = df.rename(
            columns=dict(zip(list(df.columns), list(df.iloc[0]))))[1:]
        df = df.rename(columns={'Unnamed: 0': 'Date'})
        df['datetime'] = df.Date.apply(lambda x: datetime(
            year=int(x.split('.')[1]), month=int(x.split('.')[0]), day=1))
        del (df['Date'])
        df['run_id'] = self.run_id
        df['model'] = self.name

        base_cols = ['run_id', 'model', 'datetime']
        feature_cols = ['feature_name', 'feature_description', 'feature_value']

        for feature_name, feature_description in self.descriptions[
                'features'].items():
            # some multi_twist outputs will not be present depending on the scenario type
            # so first check
            if feature_name in df:
                logging.info(
                    f"Storing point data output for {feature_name}...")
                cols_to_select = base_cols + [feature_name]
                df_ = df[cols_to_select]  # generate new interim DF
                df_['feature_name'] = feature_name
                df_['feature_description'] = feature_description.split('.')[0]
                df_['feature_value'] = df_[feature_name]
                df_ = df_[base_cols + feature_cols]

                # perform bulk insert of entire geopandas DF
                db_session.bulk_insert_mappings(Output,
                                                df_.to_dict(orient="records"))
                db_session.commit()
コード例 #35
0
    def ingest2db(self):
        init_db()

        # Load Admin2 shape from GADM
        logging.info("Loading GADM shapes...")
        admin2 = gpd.read_file(f'{self.gadm}/gadm36_2.shp')
        admin2['country'] = admin2['NAME_0']
        admin2['state'] = admin2['NAME_1']
        admin2['admin1'] = admin2['NAME_1']
        admin2['admin2'] = admin2['NAME_2']
        admin2 = admin2[['geometry','country','state','admin1','admin2']]

        # Add metadata object to DB
        # TODO: add run_label and run_description
        meta = Metadata(run_id=self.run_id, 
                        model=self.name,
                        run_description=self.features[self._type]['run_description'],
                        raw_output_link= f'https://s3.amazonaws.com/world-modelers/{self.key}',
                        point_resolution_meters=5000)
        logging.info("Storing metadata...")
        db_session.add(meta)
        db_session.commit()

        # Add parameters to DB
        logging.info("Storing parameters...")
        for param_name, param_val in self.model_config.items():   
            if param_name != 'run_id':             
                if param_name == 'year':
                    param_type = 'integer'
                elif param_name == 'bbox':
                    param_type = 'array'
                    param_val = json.dumps(param_val)
                elif param_name == 'dekad':
                    param_type = 'integer'
                    param_val = int(param_val)
                else:
                    param_type = 'string'

                param = Parameters(run_id=self.run_id,
                                  model=self.name,
                                  parameter_name=param_name,
                                  parameter_value=param_val,
                                  parameter_type=param_type)
                db_session.add(param)
                db_session.commit()

        # Process tiff file into point data
        logging.info("Processing tiff...")
        InRaster = f"{self.result_path}/{self.result_name}.tiff"
        feature_name = self.features[self._type]['feature_name']
        feature_description = self.features[self._type]['feature_description']
        gdf = raster2gpd(InRaster,feature_name)
        
        # Spatial merge on GADM to obtain admin areas
        gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects')
        
        # Set run fields: datetime, run_id, model
        # first convert dekad of year to day of year
        # note: dekad is a 10 day period so dekad 25 ends the 250th day of the year
        # since dekad 01 contains days 1 through 10 so dekad 01 should yield Jan 1 
        gdf['datetime'] = datetime(self.year, 1, 1) + timedelta((int(self.dekad) * 10) - 11)
        gdf['run_id'] = self.run_id
        gdf['model'] = self.name
        gdf['feature_description'] = feature_description
        del(gdf['geometry'])
        del(gdf['index_right'])

        # perform bulk insert of entire geopandas DF
        logging.info("Storing point data output...")
        db_session.bulk_insert_mappings(Output, gdf.to_dict(orient="records"))
        db_session.commit()
コード例 #36
0
                    params = {
                        'crop': crop,
                        'irrigation': irrig,
                        'nitrogen': nit,
                        'stat': stat
                    }
                    print(params)
                    run_name = gen_global(crop, irrig, nit, stat)
                    run_id, model_config = gen_run(crop, irrig, nit, stat)

                    # Add metadata object to DB
                    meta = Metadata(
                        run_id=run_id,
                        model=model_config['name'],
                        raw_output_link=
                        f"https://world-modelers.s3.amazonaws.com/results/yield_anomalies_model/{run_name}",
                        run_label="LPJmL Yield Anomalies",
                        point_resolution_meters=52000)
                    db_session.add(meta)
                    db_session.commit()

                    # Add parameters to DB
                    for name, val in params.items():
                        param = Parameters(run_id=run_id,
                                           model=model_config['name'],
                                           parameter_name=name,
                                           parameter_value=val,
                                           parameter_type='string')
                        db_session.add(param)
                        db_session.commit()