def test_remove_stale_individuals__removed_amt(setup): individual_amt = len(list(f.get_collection("individuals").stream())) removed_amt = len(list( d.query_individuals("removed", "==", True).stream())) rollover.remove_stale_individuals(2012) results = list(f.get_collection("individuals").stream()) assert len(results) == individual_amt - removed_amt, f.docs2str(results)
def rollover_individuals(source_phenoyear, target_phenoyear, individual=None): """ Copy individuals to a new phenoyear, removing all fields that are specific for the phenoyear. :param source_phenoyear: :param target_phenoyear: :return: """ log.info("Rollover individuals of %i to %i", source_phenoyear, target_phenoyear) new_individuals = [] query = query_individuals("year", "==", source_phenoyear).where("source", "==", "globe") if individual: query = query.where("individual", "==", individual) for individual_doc in query.stream(): individual = individual_doc.to_dict() individual["id"] = "%i_%s" % (target_phenoyear, individual["individual"]) individual["year"] = target_phenoyear for key in [ "last_phenophase", "last_observation_date", "created", "modified" ]: individual.pop(key, None) new_individuals.append(individual) log.debug("rolling over individual %s", individual) log.info("Creating %i new individuals in %i", len(new_individuals), target_phenoyear) write_individuals(new_individuals, "id")
def test_rollover_individuals__documents(setup): rollover.rollover_individuals(2012, 2013) for individual_doc in d.query_individuals("year", "==", 2013).stream(): assert individual_doc.to_dict()["rolled"], individual_doc.to_dict() assert ("last_phenophase" not in individual_doc.to_dict()), individual_doc.to_dict() assert ("last_observation_date" not in individual_doc.to_dict()), individual_doc.to_dict()
def test_rollover_individuals__roll_amt(setup): individual_amt = len(list(f.get_collection("individuals").stream())) roll_amt = len(list(d.query_individuals("rolled", "==", True).stream())) rollover.rollover_individuals(2012, 2013) assert (len(list( f.get_collection("individuals").stream())) == individual_amt + roll_amt)
def test_delete_all_individuals(): d.write_individual("u1_i1", {"user": "******"}) d.write_individual("u1_i2", {"user": "******"}) d.write_individual("u2_i1", {"user": "******"}) d.write_individual("u2_i1", {"user": "******"}) e2e.delete_user_individuals("u1") for individual in d.query_individuals("user", "==", "u1").stream(): assert False, individual for individual in f.get_collection("individuals").stream(): assert individual.to_dict()["user"] == "u2"
def remove_stale_individuals(year: int): """ Remove all individuals in Firestore that have no observations for all sources (globe and meteoswiss) for the given phenoyear year. :param year: the phenoyear """ log.info("Remove stale individuals for %i", year) del_list = [] # split querying and deleting to avoid stream timeouts for individual_doc in query_individuals("year", "==", year).stream(): if not has_observations(individual_doc.to_dict()): del_list.append(individual_doc.id) for individual_id in del_list: log.debug("Remove individual %s", individual_id) delete_individual(individual_id) log.info("Removed %i stale individuals for %i", len(del_list), year)
def test_rollover_individuals__keys(setup): rollover.rollover_individuals(2012, 2013) for individual_doc in d.query_individuals("year", "==", 2013).stream(): assert individual_doc.id == "2013_" + individual_doc.to_dict( )["individual"]
def test_remove_stale_individuals__documents(setup): rollover.remove_stale_individuals(2012) for individual_doc in d.query_individuals("year", "==", 2012).stream(): assert not individual_doc.to_dict()["removed"], individual_doc.to_dict( )
def test_rollover_individuals__single_individual(setup): rollover.rollover_individuals(2012, 2020, "3") for individual_doc in d.query_individuals("year", "==", 2020).stream(): assert individual_doc.id == "2020_" + individual_doc.to_dict( )["individual"] assert individual_doc.to_dict()["individual"] == "3"
def process(year: int = None): if not year: year = d.get_phenoyear() observations = [] for observation_doc in (query_observation("year", "==", year).where( "source", "==", "globe").stream()): observation_dict = observation_doc.to_dict() observations.append(observation_dict) individuals_map = {} for individual_doc in (query_individuals("year", "==", year).where( "source", "==", "globe").stream()): individual_dict = individual_doc.to_dict() individuals_map[individual_dict["individual"]] = individual_dict results = [] for o in observations: # pylint: disable=invalid-name try: i = individuals_map[o["individual"]] results.append({ "OWNER": o["user"], "MEAS_OBJ_ID": o["individual"], "PLACENAME": i["name"], "MEAS_YEAR": o["year"], "MEAS_SPEC_ID": o["species"], "MEAS_PPH_1": o["phenophase"], "MEAS_ID": "", "MEAS_DATE": o["date"].strftime("%d.%m.%Y"), "MEAS_ALTGRP": "", "MEAS_INCR": "", "CREATED": o["created"].strftime("%d.%m.%Y %H:%M:%S"), "MODIFIED": o["modified"].strftime("%d.%m.%Y %H:%M:%S") if o["modified"] else "", "GEOPOS": "%s,%s" % (i["geopos"]["lat"], i["geopos"]["lng"]), "ALTITUDE": i["altitude"], "DESCRIPTION": i["description"], "EXPOSITION": i["exposition"], "GRADIENT": i["gradient"], "SHADE": i["shade"], "WATERING": i["watering"], "LESS100": i["less100"], "HABITAT": i["habitat"], "FOREST": i["forest"], "SPEC_ID": i["species"], "ID": "", "PARENT_ID": "", "MEAS_PPH_2": "", "NAME_DE": d.get_phenophase(o["species"], o["phenophase"])["de"], "NAME_FR": "", "NAME_EN": "", "NAME_IT": "", "FUNCTION": "MS_DATE", "IN_SEQUENCE": "", "MODIFIED_1": "", "TENANT": "GLOBE_CH", "FIRSTNAME": "", "LASTNAME": "", "ORGANISATION": "", "MODIFIED_2": "", "SPEC_SET_TENANT": d.get_species(o["species"])["de"], "SPEC_SET_DE": "", "SPEC_SET_FR": "", "SPEC_SET_EN": "", "SPEC_SET_IT": "", }) except Exception: # pylint: disable=broad-except log.error("Error processing observation, skipping %s", o, exc_info=True) with io.StringIO() as csv_string: dict_writer = csv.DictWriter(csv_string, results[0].keys(), delimiter=";") dict_writer.writeheader() dict_writer.writerows(results) storage.upload_string( None, "public/meteoswiss/export_%i.csv" % year, csv_string.getvalue(), content_type="text/csv", )