Example #1
0
def process_row(row):
    default_data = dict(
        name_1=row['BNAME1'],
        name_2=row['BNAME2'],
        address_1=row['BADDR1'],
        address_2=row['BADDR2'],
        city=row['BCITY'],
        zip_code=row['BZIP'],
        county=row['BCOUNTY'],
        owner=row['ONAME1'],
        contact=row['CNAME1'],
    )
    building, __ = obj_update_or_create(
        Building, elbi=row['LICNO'], defaults=default_data)

    default_data = dict(
        tdlr_id=row['IDNO'],
        last_inspection=row['DT_CRT_INS'],
        next_inspection=row['DT_EXPIRY'],
        last_5year=row['ELV_5YEAR'],
        equipment_type=row['EQUIPMENT_TYPE'],
        drive_type=row['DRIVE_TYPE'],
        floors=row['FLOORS'],
        year_installed=row['YR_INSTALL'],
        building=building
    )
    elevator, __ = obj_update_or_create(
        Elevator, decal=row['SUB_NO'], defaults=default_data)
Example #2
0
    def handle(self, csv, *args, **options):
        assert os.path.isfile(csv)

        with open(csv, "r", encoding="windows-1252") as fh:
            row_count = sum(1 for row in fh) - 1
            fh.seek(0)
            reader = csv_lib.DictReader(fh)
            for row in tqdm(reader, total=row_count):
                location = Location_get(
                    street_address=row["Location Address"],
                    city=row["Location City"],
                    state=row["Location State"],
                    zip=row["Location Zip"],
                    name=row["Location Name"],
                )
                receipt, created = obj_update_or_create(
                    Receipt,
                    tabc_permit=row["TABC Permit Number"],
                    date=date_fmt(row["Obligation End Date"]),
                    defaults=dict(
                        taxpayer_name=row["Taxpayer Name"],
                        tax_number=row["Taxpayer Number"],
                        liquor=row["Liquor Receipts"],
                        wine=row["Wine Receipts"],
                        beer=row["Beer Receipts"],
                        cover=row["Cover Charge Receipts"],
                        total=row["Total Receipts"],
                        location_name=row["Location Name"],
                        location_number=row["Location Number"],
                        county_code=row["Location County"],
                        location=location,
                    ),
                )
Example #3
0
def import_data_from_api(data: List[dict]) -> int:
    created_count = 0
    for row in data:
        location, location_created = Location_get(
            street_address=row["taxpayer_address"],
            city=row["location_city"],
            state=row["location_state"],
            zip=row["location_zip"],
            name=row["location_name"],
        )
        if location_created:
            print(f"Created Location: {location.pk} - {location}")
        receipt, created = obj_update_or_create(
            Receipt,
            tabc_permit=row["tabc_permit_number"],
            date=row["obligation_end_date_yyyymmdd"].split("T")[0],
            defaults=dict(
                taxpayer_name=row["taxpayer_name"],
                tax_number=row["taxpayer_number"],
                liquor=row["liquor_receipts"],
                wine=row["wine_receipts"],
                beer=row["beer_receipts"],
                cover=row["cover_charge_receipts"],
                total=row["total_receipts"],
                location_name=row["location_name"],
                location_number=row["location_number"],
                county_code=row["location_county"],
                location=location,
            ),
        )
        if created:
            created_count += 1
    return created_count
Example #4
0
def pull_vpcs(region=None):
    if region is None:
        region = Region.objects.get(id=1)  # as defined by our initial data
    conn = boto.vpc.connect_to_region(
        region.code,
        aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
        aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
    )
    for boto_vpc in conn.get_all_vpcs():
        defaults = dict(
            region=region,
            name=boto_vpc.tags.get('Name'),
            cidr=boto_vpc.cidr_block,
            state=boto_vpc.state,
            tags=boto_vpc.tags,
        )
        obj_update_or_create(VPC, id=boto_vpc.id, defaults=defaults)
    def test_workflow(self):
        # Test creation
        with self.assertNumQueries(3):
            # 1. SELECT
            # 2. BEGIN
            # 3. INSERT
            foo, created = obj_update_or_create(FooModel,
                                                text="hi",
                                                defaults={"slug": "leopard"})
        self.assertTrue(created)
        self.assertEqual(foo.text, "hi")
        self.assertEqual(foo.slug, "leopard")

        # Test updating with nothing new
        with self.assertNumQueries(1):
            # 1. SELECT
            foo, created = obj_update_or_create(FooModel,
                                                text="hi",
                                                defaults={"slug": "leopard"})
        self.assertFalse(created)
        self.assertEqual(foo.text, "hi")
        self.assertEqual(foo.slug, "leopard")

        # Test updating with new data
        num_queries = 3 if django.VERSION < (2, 2) else 2
        with self.assertNumQueries(num_queries):
            # 1. SELECT
            # 2. BEGIN if Django<2.2  Django no longer always starts a transaction when a single query is being performed https://docs.djangoproject.com/en/2.2/releases/2.2/
            # 3. INSERT
            foo, created = obj_update_or_create(FooModel,
                                                text="hi",
                                                defaults={
                                                    "slug": "lemon",
                                                    "decimal": "0.01"
                                                })
        self.assertFalse(created)
        self.assertEqual(foo.text, "hi")
        self.assertEqual(foo.slug, "lemon")
        self.assertEqual(foo.decimal, "0.01")
Example #6
0
def pull_security_groups(region=None):
    if region is None:
        region = Region.objects.get(id=1)  # as defined by our initial data
    conn = boto.ec2.connect_to_region(
        region.code,
        aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
        aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
    )
    sg_cache = {}
    rs = conn.get_all_security_groups()
    for group in rs:
        defaults = {
            'description': group.description,
            'name': group.name,
            'region': region,
            'tags': group.tags,
            'vpc_id': group.vpc_id,
        }
        security_group, __ = obj_update_or_create(
            SecurityGroup,
            id=group.id,
            defaults=defaults,
        )
        sg_cache[group.id] = security_group
    # Do a second pass in case we run into a rule that grants a security group
    # we don't know about yet
    for group in rs:
        security_group = sg_cache[group.id]
        security_group.rules.clear()  # TODO don't do unnecessary SQL
        for rule in group.rules:
            rules = grant_to_rules(rule, sg_cache)
            security_group.rules.add(*rules)
        security_group.rules_egress.clear()  # TODO don't do unnecessary SQL
        for rule in group.rules_egress:
            rules = grant_to_rules(rule, sg_cache)
            security_group.rules_egress.add(*rules)
Example #7
0
def pull_ec2(region=None):
    if region is None:
        region = Region.objects.get(id=1)  # as defined by our initial data
    conn = boto.ec2.connect_to_region(
        region.code,
        aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
        aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
    )
    instances = conn.get_only_instances()
    for boto_instance in instances:
        data = Instance.data_from_boto_ec2(boto_instance, region=region)
        instance_id = data.pop('id')
        instance, __ = obj_update_or_create(
            Instance,
            id=instance_id,
            defaults=data,
        )
        # TODO don't do unnecessary SQL
        instance.security_groups.clear()
        for group in boto_instance.groups:
            security_group, __ = SecurityGroup.objects.get_or_create(
                id=group.id, region=region)
            instance.security_groups.add(security_group)
        logger.debug(instance)
Example #8
0
def _save_page(meeting_data, doc_data, bandc: BandC) -> bool:
    """
    Save one page worth of data, updating BandC, creating Meetings, and Documents.

    Returns
    -------
        True if there's another page to process (always False for now)
    """
    logger.info("save_page %s", bandc)

    if not meeting_data:
        return False

    # Populate meetings
    meetings = {}
    for row in meeting_data:
        meeting, created = obj_update_or_create(
            Meeting,
            bandc=bandc,
            date=row["date"],
            defaults={"title": row["title"]})

        scrape_logger.log_meeting(meeting, created)
        meetings[row["date"]] = {
            "meeting": meeting,
            "docs": set(meeting.documents.values_list("url", flat=True)),
        }
    if not bandc.latest_meeting or bandc.latest_meeting.date < row["date"]:
        bandc.latest_meeting = meeting
        bandc.save()

    # Populate documents
    for row in doc_data:
        defaults = dict(title=row["title"], type=row["type"])
        if "/edims/document.cfm" in row["url"]:
            defaults["edims_id"] = row["url"].rsplit("=", 2)[-1]
        doc, created = Document.objects.get_or_create(
            url=row["url"],
            meeting=meetings[row["date"]]["meeting"],
            defaults=defaults,
        )
        scrape_logger.log_document(doc, created)
        if not created:
            try:
                meetings[row["date"]]["docs"].remove(row["url"])
            except KeyError:
                pass
        if doc.scrape_status == "toscrape":
            doc.refresh()

    # Look for stale documents
    stale_documents: List[str] = []
    for meeting in meetings.values():
        stale_documents.extend(meeting["docs"])

    # Deal with stale documents
    if stale_documents:
        print("These docs are stale:", stale_documents)
        Document.objects.filter(url__in=stale_documents).update(active=False)

    return False  # TODO