Beispiel #1
0
    # "solids":{
    #     "push_new_data":{
    #         "config":{
    #             "commit":"Metadata",
    #             "branch":"dev"
    #         }
    #     }
    # }
}

################   SOLIDS   ##################


@dg.solid(
    input_defs=[
        dg.InputDefinition("cumulus", root_manager_key="cumulus_root"),
        dg.InputDefinition("wikidata", root_manager_key="wikidata_root"),
        dg.InputDefinition("portals", root_manager_key="portals_root"),
        dg.InputDefinition("camera", root_manager_key="camera_root"),
        dg.InputDefinition("images", root_manager_key="images_root"),
    ],
    output_defs=[dg.OutputDefinition(dagster_type=pd.DataFrame)],
)
def create_metadata(
    context,
    cumulus: main_dataframe_types,
    wikidata: main_dataframe_types,
    portals: main_dataframe_types,
    camera: gpd.GeoDataFrame,
    images: main_dataframe_types,
):
Beispiel #2
0
class Image:

    """
    Original image path. Attributes return id, jpg or tif extension.
    """

    def __init__(self, path):
        self.path = path
        self.jpg = str(os.path.split(self.path)[1].split(".")[0] + ".jpg")
        self.tif = str(os.path.split(self.path)[1])
        self.id = str(os.path.split(self.path)[1].split(".")[0])


@dg.solid(
    config_schema=dg.StringSource,
    input_defs=[dg.InputDefinition("metadata", root_manager_key="metadata_root")],
    output_defs=[dg.OutputDefinition(dagster_type=dict)],
)
def file_picker(context, metadata: dp.DataFrame):
    """
    Walks directory tree and glob relevant files.
    Returns dictionary with geolocated, backlog
    and files for review
    """
    source = context.solid_config
    metadata["Source ID"] = metadata["Source ID"].str.upper()
    has_kml = list(metadata.loc[metadata["Latitude"].notna(), "Source ID"])
    catalog = list(metadata["Source ID"])
    image_list = [
        Image(os.path.join(root, name))
        for root, dirs, files in os.walk(source)
Beispiel #3
0
import dagster as dg
import subprocess


@dg.solid(config_schema={
    "commit": dg.Field(dg.String),
    "branch": dg.Field(dg.String)
},
          input_defs=[dg.InputDefinition("commit", dagster_type=dg.Nothing)])
def push_new_data(context):
    """
    Push data to Git submodule
    and commit changes to main
    repository
    """

    commit = context.solid_config["commit"]
    branch = context.solid_config["branch"]

    submodule_push = [
        "pwd",
        "git checkout main",
        "git add .",
        f"git commit -a -m ':card_file_box: Update {commit} data'",
        "git push",
    ]

    for command in submodule_push:
        git_cli_sub = subprocess.Popen(
            command,
            shell=True,
Beispiel #4
0
from bokeh.transform import cumsum
from bokeh.plotting import output_file, show, figure
from bokeh.models import (
    CheckboxButtonGroup,
    CustomJS,
    LinearColorMapper,
    Row,
    HoverTool,
    Span,
    Title,
)


@dg.solid(
    input_defs=[
        dg.InputDefinition("metadata", root_manager_key="metadata_root"),
        dg.InputDefinition("camera", root_manager_key="camera_root"),
        dg.InputDefinition("cumulus", root_manager_key="cumulus_root"),
    ],
    output_defs=[dg.OutputDefinition(dagster_type=dp.DataFrame)],
)
def load_metadata(
    _,
    metadata: dp.DataFrame,
    camera: main_dataframe_types,
    cumulus: main_dataframe_types,
):
    """
    Merge relevant dataframes to access objects
    status and properties
    """
Beispiel #5
0
            collections = {}
            collections["data"] = collection
            collections["key"] = collection_path
            collections["type"] = "json"
            data.append(collections)

            print("Collection updated: {0}".format(collection_name))
    else:
        pass

    return data


@dg.solid(
    input_defs=[
        dg.InputDefinition("metadata", root_manager_key="metadata_root"),
        dg.InputDefinition("mapping", root_manager_key="mapping_root"),
    ],
    output_defs=[dg.DynamicOutputDefinition(dict)],
)
def get_items(context, metadata, mapping):
    ims = ((metadata["Latitude"].notna())
           & (metadata["Source URL"].notna())
           & (metadata["Media URL"].notna())
           & (metadata["First Year"].notna())
           & (metadata["Last Year"].notna()))
    jstor = metadata["Source"] != "Instituto Moreira Salles"
    metadata = metadata.loc[ims | jstor]
    metadata.fillna("", inplace=True)
    metadata.set_index("Source ID", inplace=True)
    mapping.set_index("Label:en", inplace=True)
Beispiel #6
0
import dagster as dg
import dagster_pandas as dp
from dagster_pandas.data_frame import DataFrame
from numpy.core.numeric import True_
import pandas as pd
from pandas._libs.tslibs import NaT
from tests.dataframe_types import *
from tests.objects_types import *


# solids cumulus
@dg.solid(
    input_defs=[dg.InputDefinition("root", root_manager_key="cumulus_root")],
    output_defs=[dg.OutputDefinition(dagster_type=dp.DataFrame)],
)
def xml_to_df(context, root):
    """
    Build Pandas DataFrame from XML file
    """
    # Find the uids

    uids = {}
    for thing in root[0][0]:
        uids[thing.attrib["uid"]] = thing[0].text

    table = {}
    for field in uids.values():
        table[field] = []

    outDict = {"table": table, "uids": uids}