Example #1
0
def write_metadata(context, metadata: dp.DataFrame, to_tag):
    """
    Write available metadata, including GPS tags,
    to high-res JPEGs
    """
    metadata.fillna(value="", inplace=True)
    metadata["Source ID"] = metadata["Source ID"].str.upper()
    metadata.set_index("Source ID", inplace=True)

    for item in tqdm(to_tag, "Embedding metadata in files..."):
        if item.endswith(".jpg"):
            basename = os.path.split(item)[1]
            name = basename.split(".")[0]
            date = metadata.loc[name.upper(), "Date"]
            byline = metadata.loc[name.upper(), "Creator"]
            headline = metadata.loc[name.upper(), "Title"]
            caption = metadata.loc[name.upper(), "Description (Portuguese)"]
            objecttype = metadata.loc[name.upper(), "Type"]
            # dimensions = f'{metadata.loc[name.upper(), "image_width"]}cm x {metadata.loc[name.upper(), "image_height"]}cm'
            keywords = metadata.loc[name.upper(), "Depicts"].split("||")
            latitude = metadata.loc[name.upper(), "Latitude"]
            longitude = metadata.loc[name.upper(), "Longitude"]
            # altitude = metadata.loc[name.upper(), "Altitude"]
            # imgdirection = metadata.loc[name.upper(), "heading"]

            params = [
                "-IPTC:Source=Instituto Moreira Salles/IMS",
                "-IPTC:CopyrightNotice=This image is in the Public Domain.",
                "-IPTC:City=Rio de Janeiro",
                "-IPTC:Province-State=RJ",
                "-IPTC:Country-PrimaryLocationName=Brasil",
                "-GPSLatitudeRef=S",
                "-GPSLongitudeRef=W",
                "-GPSAltitudeRef=0",
                "-GPSImgDirectionRef=T",
                f"-IPTC:DateCreated={date}",
                f"-IPTC:By-line={byline}",
                f"-IPTC:ObjectName={name}",
                f"-IPTC:Headline={headline}",
                f"-IPTC:Caption-Abstract={caption}",
                f"-IPTC:ObjectTypeReference={objecttype}",
                # f"-IPTC:Dimensions={dimensions}",
                f"-IPTC:Keywords={keywords}",
                f"-GPSLatitude={latitude}",
                f"-GPSLongitude={longitude}",
                # f"-GPSAltitude={altitude}",
                # f"-GPSImgDirection={imgdirection}",
            ]
            with ExifTool(executable_=context.solid_config) as et:
                for param in params:
                    param = param.encode(encoding="utf-8")
                    dest = item.encode(encoding="utf-8")
                    et.execute(param, dest)
    to_upload = to_tag
    return to_upload
Example #2
0
def transform_table_desc_df(context, table_desc: DataFrame) -> DataFrame:
    """
    Transform the DataFrame of data types in database table
    :param context: execution context
    :param table_desc: panda DataFrame containing details of the Postgres database table
    :return: panda DataFrame containing details of the Postgres database table
    :rtype: panda.DataFrame
    """

    table_desc.fillna('', inplace=True)

    # get names of indices for which column ignore contains #. i.e. comment lines
    index_names = table_desc[table_desc['ignore'].str.contains('#')].index
    # drop comment row indexes from dataFrame
    table_desc.drop(index_names, inplace=True)
    # drop column ignore from dataFrame
    table_desc.drop(['ignore'], axis=1, inplace=True)

    return table_desc
Example #3
0
def organise_creator(_, quickstate: dp.DataFrame):
    creators = {
        "Augusto Malta": "Q16495239",
        "Anônimo": "Q4233718",
        "Marc Ferrez": "Q3180571",
        "Georges Leuzinger": "Q5877879",
        "José dos Santos Affonso": "Q63993961",
        "N. Viggiani": "Q65619909",
        "Archanjo Sobrinho": "Q64009665",
        "F. Basto": "Q55089601",
        "J. Faria de Azevedo": "Q97570600",
        "S. H. Holland": "Q65619918",
        "Augusto Monteiro": "Q65619921",
        "Jorge Kfuri": "Q63166336",
        "Camillo Vedani": "Q63109123",
        "Fritz Büsch": "Q63109492",
        "Armando Pittigliani": "Q19607834",
        "Braz": "Q97487621",
        "Stahl & Wahnschaffe": "Q63109157",
        "Gomes Junior": "Q86942676",
        "A. Ruelle": "Q97570551",
        "Guilherme Santos": "Q55088608",
        "Albert Frisch": "Q21288396",
        "José Baptista Barreira Vianna": "Q63166517",
        "Alfredo Krausz": "Q63166405",
        "Therezio Mascarenhas": "Q97570728",
        "Torres": "Q65619905",
        "Theodor Preising": "Q63109140",
        "Augusto Stahl": "Q4821327",
        "Luiz Musso": "Q89538832",
        "Carlos Bippus": "Q63109147",
        "Thiele": "Q64825643",
        "Revert Henrique Klumb": "Q3791061",
        "Juan Gutierrez": "Q10312614",
        "F. Manzière": "Q65619915",
        "Antonio Luiz Ferreira": "Q97570558",
        "Etienne Farnier": "Q97570575",
        "José Francisco Corrêa": "Q10309433",
        "Chapelin": "Q97570376",
        "J. Teixeira": "Q89642578",
        "F. Garcia": "Q97570588",
        "A. de Barros Lobo": "Q97570363",
        "Bloch": "Q61041099",
    }

    def name2qid(name):
        """
        Takes a string and returns the
        corresponding Wikidata QID
        """
        try:
            qid = creators[f"{name}"]
        except KeyError:
            qid = ""
        return qid

    quickstate["P170"] = quickstate["P170"].apply(name2qid)
    quickstate = quickstate.drop(columns="date_accuracy")
    quickstate.name = "import_wikidata"

    def df2quickstatements(df):
        create_str = ""
        edit_str = ""
        str_props = ["Lpt-br", "Dpt-br", "Den", "P217", "P7835"]
        no_ref_props = ["Lpt-br", "Dpt-br", "Den"]
        for _, row in df.iterrows():
            row = dict(row)
            props = []
            if row["qid"]:
                for key in row.keys():
                    if row[key]:
                        if key in str_props:
                            row[key] = '"{0}"'.format(row[key])
                        prop_str = "|".join([
                            str(row["qid"]),
                            str(key).replace("P31_a", "P31"),
                            str(row[key]),
                        ])
                        if key == "P217":
                            prop_str += "|P195|Q71989864"
                        if key == "P195":
                            prop_str += "|P217|" + '"{0}"'.format(row["P217"])
                        if key not in no_ref_props:
                            prop_str += "|S248|Q64995339|S813|+{0}Z/11".format(
                                dt.now().strftime("%Y-%m-%dT00:00:00"))
                        props.append(prop_str)
                item_str = "||".join(props)
                if not edit_str:
                    edit_str += item_str
                else:
                    edit_str += "||" + item_str
            else:
                props.append("CREATE")
                for key in row.keys():
                    if row[key]:
                        if key in str_props:
                            row[key] = '"{0}"'.format(row[key])
                        prop_str = "|".join([
                            "LAST",
                            str(key).replace("P31_a", "P31"),
                            str(row[key]),
                        ])
                        if key == "P217":
                            prop_str += "|P195|Q71989864"
                        if key == "P195":
                            prop_str += "|P217|" + '"{0}"'.format(row["P217"])
                        if key not in no_ref_props:
                            prop_str += "|S248|Q64995339|S813|+{0}Z/11".format(
                                dt.now().strftime("%Y-%m-%dT00:00:00"))
                        props.append(prop_str)
                item_str = "||".join(props)
                if not create_str:
                    create_str += item_str
                else:
                    create_str += "||" + item_str

        return {"create": create_str, "edit": edit_str}

    quickstate.fillna("", inplace=True)

    with open("data/output/quickstatements_create.txt", "w+") as f:
        f.write(df2quickstatements(quickstate)["create"])

    with open("data/output/quickstatements_edit.txt", "w+") as f:
        f.write(df2quickstatements(quickstate)["edit"])

    return quickstate.set_index("qid")