Beispiel #1
0
def import_promises():
    # just refresh everything
    Promise.objects.all().delete()

    df = pd.read_csv(settings.PROMISES_CSV)
    for index, row in df.iterrows():
        scope = row["scope"]
        gss_code = row["gss_code"]
        if pd.isnull(row["gss_code"]) or gss_code == "nan":
            continue

        try:
            council = Council.objects.get(gss_code=gss_code)
        except Council.DoesNotExist:
            print(
                "Could not find council to import promise: %s" % row["council"],
                file=sys.stderr,
            )
            continue

        if not pd.isnull(row["source_url"]):

            if scope == "Council operations":
                scope = "Council only"

            target_year = None
            non_numbers = re.compile(r"^(\d{4}).*$")
            # needs to be a string for the regexp to work
            target = str(char_from_text(row["target"]))

            # some of the entries in the sheet are not years or have slight
            # clarifications so remove those
            target = non_numbers.sub(r"\1", target)
            if len(target) == 4:
                target_year = target

            promise = Promise.objects.create(
                council=council,
                scope=PlanDocument.scope_code(scope),
                source=char_from_text(row["source_url"]),
                source_name=char_from_text(row["source_name"]),
                target_year=target_year,
                text=char_from_text(row["wording"]),
                notes=char_from_text(row["notes"]),
                has_promise=True,
            )

        elif scope == "No promise":
            promise = Promise.objects.create(
                council=council, scope=PlanDocument.scope_code(scope), has_promise=False
            )
Beispiel #2
0
    def import_questions(self):
        df = pd.read_csv(self.QUESTIONS_CSV)
        # pandas thinks this is a float which is unhelpful
        df["Scores"] = df["Scores"].astype(str)

        for index, row in df.iterrows():
            q_type = "Other"
            if row["Options"] in ("HEADER", "CHECKBOX"):
                q_type = row["Options"]

            code = self.normalise_section_code(row["question_id"])
            section = re.sub(r"(.*)_q[0-9].*", r"\1", code)
            plan_section = None
            try:
                plan_section = PlanSection.objects.get(code=section,
                                                       year=self.YEAR)
            except PlanSection.DoesNotExist:
                print("no section found for q {}, section {}".format(
                    code, section))
                continue

            question, created = PlanQuestion.objects.get_or_create(
                code=code, section=plan_section)
            max_score = 0
            if q_type != "HEADER":
                scores = char_from_text(row["Scores"])
                scores = scores.split(",")
                max_score = max(scores)
                parent = re.sub(r"([^q]*q[0-9]*).*", r"\1", code)
                question.parent = parent
            question.max_score = int(float(max_score))
            question.text = row["Question description"]
            question.question_type = q_type
            question.save()
Beispiel #3
0
def import_declarations():
    # if we have any bad data then do not update
    if not check_sheet_ok("motion_url"):
        print("failed to get good data, not updating")
        return

    # just refresh everything
    EmergencyDeclaration.objects.all().delete()

    df = pd.read_csv(settings.DECLARATIONS_CSV)
    for index, row in df.iterrows():
        made_declaration = char_from_text(row["made_declaration"])
        # skip unless starts with Y
        if not made_declaration.startswith("Y"):
            continue

        gss_code = row["gss_code"]
        if pd.isnull(row["gss_code"]) or gss_code == "nan":
            continue

        try:
            council = Council.objects.get(gss_code=gss_code)
        except Council.DoesNotExist:
            print(
                "Could not find council to import declaration: %s" % row["council"],
                file=sys.stderr,
            )
            continue

        if not pd.isnull(row["motion_url"]):

            declaration = EmergencyDeclaration.objects.create(
                council=council,
                date_declared=date_from_text(row["date_made"]),
                source_url=char_from_text(row["motion_url"]),
            )
Beispiel #4
0
    def get_plan_defaults_from_row(self, row):
        (start_year,
         end_year) = PlanDocument.start_and_end_year_from_time_period(
             row["time_period"])
        defaults = {
            "document_type": PlanDocument.document_type_code(row["type"]),
            "scope": PlanDocument.scope_code(row["scope"]),
            "status": PlanDocument.status_code(row["status"]),
            "well_presented": boolean_from_text(row["well_presented"]),
            "baseline_analysis": boolean_from_text(row["baseline_analysis"]),
            "notes": char_from_text(row["notes"]),
            "file_type": char_from_text(row["file_type"]),
            "charset": char_from_text(row["charset"]),
            "text": char_from_text(row["text"]),
            "start_year": start_year,
            "end_year": end_year,
            "date_last_found": date_from_text(row["date_retrieved"]),
            "title": "",
        }
        if char_from_text(row["title_checked"]).lower() == "y":
            defaults["title"] = char_from_text(row["title"])

        return defaults
Beispiel #5
0
    def get_changes(self):
        self.plans_to_process = {}
        df = pd.read_csv(settings.PROCESSED_CSV)
        council_add_count = 0
        plan_add_count = 0
        plan_update_count = 0
        plans_to_import = {}
        councils_in_sheet = set()
        councils_with_plan_in_sheet = set()

        self.start_council_plan_count = (Council.objects.annotate(
            num_plans=Count("plandocument")).filter(
                Q(plandocument__document_type=PlanDocument.ACTION_PLAN)
                | Q(plandocument__document_type=PlanDocument.CLIMATE_STRATEGY),
                num_plans__gt=0,
            ).count())
        self.start_document_count = PlanDocument.objects.count()
        self.start_plan_count = PlanDocument.objects.filter(
            Q(document_type=PlanDocument.ACTION_PLAN)
            | Q(document_type=PlanDocument.CLIMATE_STRATEGY)).count()

        for index, row in df.iterrows():
            gss_code = char_from_text(row["gss_code"])
            councils_in_sheet.update([gss_code])

            council_exists = Council.objects.filter(gss_code=gss_code).exists()

            if not council_exists:
                council_add_count += 1
                self.print_change("adding new council: %s",
                                  row["council"],
                                  verbosity=2)
                if not pd.isnull(row["url"]):
                    self.plans_to_process[index] = "new_council"
                    councils_with_plan_in_sheet.update([gss_code])
                    plan_add_count += 1
                    self.print_change("adding new plan for %s",
                                      row["council"],
                                      verbosity=2)
            elif not pd.isnull(row["url"]):
                councils_with_plan_in_sheet.update([gss_code])
                council = Council.objects.get(gss_code=gss_code)
                plan_exists = PlanDocument.objects.filter(
                    council=council, url=row["url"]).exists()

                council_plans = plans_to_import.get(gss_code, set())
                council_plans.update([row["url"]])
                plans_to_import[gss_code] = council_plans
                if not plan_exists:
                    self.plans_to_process[index] = "add"
                    plan_add_count += 1
                    self.print_change("adding new plan for %s",
                                      row["council"],
                                      verbosity=2)
                else:
                    plan = PlanDocument.objects.get(council=council,
                                                    url=row["url"])
                    diffs = 0
                    for key, value in self.get_plan_defaults_from_row(
                            row).items():
                        if getattr(plan, key) != value:
                            diffs = 1

                    if diffs != 0:
                        self.plans_to_process[index] = "update"
                        plan_update_count += 1
                        self.print_change("updating plan for %s",
                                          row["council"],
                                          verbosity=2)

        plans_to_delete = {}
        plans_to_delete_count = 0
        for council_code in plans_to_import.keys():
            council = Council.objects.get(gss_code=council_code)
            plans = PlanDocument.objects.filter(council=council, ).exclude(
                url__in=plans_to_import[council_code])
            for plan in plans:
                plans_to_delete_count += 1
                council_plans = plans_to_delete.get(council_code, set())
                council_plans.update([plan.url])
                self.print_change("deleting plan for %s",
                                  council.name,
                                  verbosity=2)
                plans_to_delete[council_code] = council_plans

        # if a council isn't in the sheet we should remove it entirely from the database
        councils_to_remove = Council.objects.exclude(
            gss_code__in=councils_in_sheet)
        councils_to_remove_count = councils_to_remove.count()

        # if a council is going to be removed completely then exclude it from the
        # list of councils where we're going to remove the plans
        councils_with_plans_to_remove = set(
            [council.gss_code for council in councils_to_remove])
        if councils_with_plans_to_remove:
            councils_with_plan_in_sheet.update(councils_with_plans_to_remove)

        # if a council is in the sheet but no longer has a plan we should remove all
        # their plans
        plans_from_removed_councils = PlanDocument.objects.exclude(
            council__gss_code__in=councils_with_plan_in_sheet)
        plans_from_removed_councils_count = plans_from_removed_councils.count()

        if self.verbosity >= 2:
            councils = councils_to_remove.all()
            for council in councils:
                self.print_change("%s will be completely removed" %
                                  council.name)

            councils = plans_from_removed_councils.distinct(
                "council__gss_code")
            for council in councils:
                self.print_change("%s will have all plans removed" %
                                  council.council.name)

        self.plans_to_delete = plans_to_delete
        self.councils_in_sheet = councils_in_sheet
        self.councils_with_plan_in_sheet = councils_with_plan_in_sheet

        if council_add_count > 0:
            self.print_change(
                "%d council%s will be added",
                council_add_count,
                pluralize(council_add_count),
            )
        if plan_add_count > 0:
            self.print_change("%d plan%s will be added", plan_add_count,
                              pluralize(plan_add_count))
        if plan_update_count > 0:
            self.print_change(
                "%d plan%s will be updated",
                plan_update_count,
                pluralize(plan_update_count),
            )
        if plans_to_delete_count > 0:
            self.print_change(
                "%d plan%s will be deleted",
                plans_to_delete_count,
                pluralize(plans_to_delete_count),
            )
        if plans_from_removed_councils_count > 0:
            self.print_change(
                "%d council%s will have all plans removed",
                plans_from_removed_councils_count,
                pluralize(plans_from_removed_councils_count),
            )
        if councils_to_remove_count > 0:
            council_list = [council.name for council in councils_to_remove]
            self.print_change(
                "%d council%s will be completely removed: [ %s ]",
                councils_to_remove_count,
                pluralize(councils_to_remove_count),
                ", ".join(council_list),
            )
Beispiel #6
0
    def update_database(self):
        df = pd.read_csv(settings.PROCESSED_CSV)
        for index, row in df.iterrows():
            council_url = char_from_text(row["website_url"])
            twitter_url = char_from_text(row["twitter_url"])
            twitter_name = char_from_text(row["twitter_name"])
            region = char_from_text(row["region"])
            county = char_from_text(row["county"])
            council, created = Council.objects.get_or_create(
                authority_code=char_from_text(row["authority_code"]),
                country=Council.country_code(row["country"]),
                defaults={
                    "authority_type": char_from_text(row["authority_type"]),
                    "name": row["council"],
                    "slug": PlanDocument.council_slug(row["council"]),
                    "gss_code": char_from_text(row["gss_code"]),
                    "whatdotheyknow_id": integer_from_text(row["wdtk_id"]),
                    "mapit_area_code": char_from_text(row["mapit_area_code"]),
                    "website_url": council_url,
                    "twitter_url": twitter_url,
                    "twitter_name": twitter_name,
                    "county": county,
                    "region": region,
                },
            )

            # check the council things that might change
            changed = False

            if char_from_text(row["authority_type"]) != council.authority_type:
                council.authority_type = char_from_text(row["authority_type"])
                changed = True

            if row["council"] != council.name:
                council.name = row["council"]
                council.slug = PlanDocument.council_slug(row["council"])
                changed = True

            if char_from_text(row["gss_code"]) != council.gss_code:
                council.gss_code = char_from_text(row["gss_code"])
                changed = True

            if council_url != "" and council.website_url != council_url:
                council.website_url = council_url
                changed = True

            if (council.twitter_name != ""
                    or council.twitter_name != twitter_name
                    or council.twitter_url != twitter_url):
                council.twitter_url = twitter_url
                council.twitter_name = twitter_name
                changed = True

            if council.region != region:
                council.region = region
                changed = True

            if council.county != county:
                council.county = county
                changed = True

            if changed is True:
                council.save()

            if not pd.isnull(row["url"]) and index in self.plans_to_process:
                document_file = open(row["plan_path"], "rb")
                file_object = File(document_file)
                defaults = {"file": file_object}
                defaults.update(self.get_plan_defaults_from_row(row))

                plan_document, created = PlanDocument.objects.update_or_create(
                    url=row["url"],
                    url_hash=PlanDocument.make_url_hash(row["url"]),
                    council=council,
                    defaults=defaults,
                )
                if created:
                    plan_document.date_first_found = date_from_text(
                        row["date_retrieved"])
                    plan_document.save()

        PlanDocument.objects.exclude(
            council__gss_code__in=self.councils_with_plan_in_sheet).delete()

        for council_code in self.plans_to_delete.keys():
            council = Council.objects.get(gss_code=council_code)
            plans = PlanDocument.objects.filter(
                council=council,
                url__in=self.plans_to_delete[council_code]).delete()

        Council.objects.exclude(gss_code__in=self.councils_in_sheet).delete()

        self.end_council_plan_count = (Council.objects.annotate(
            num_plans=Count("plandocument")).filter(
                Q(plandocument__document_type=PlanDocument.ACTION_PLAN)
                | Q(plandocument__document_type=PlanDocument.CLIMATE_STRATEGY),
                num_plans__gt=0,
            ).count())
        self.end_document_count = PlanDocument.objects.count()
        self.end_plan_count = PlanDocument.objects.filter(
            Q(document_type=PlanDocument.ACTION_PLAN)
            | Q(document_type=PlanDocument.CLIMATE_STRATEGY)).count()