def process_row(row): default_data = dict( name_1=row['BNAME1'], name_2=row['BNAME2'], address_1=row['BADDR1'], address_2=row['BADDR2'], city=row['BCITY'], zip_code=row['BZIP'], county=row['BCOUNTY'], owner=row['ONAME1'], contact=row['CNAME1'], ) building, __ = obj_update_or_create( Building, elbi=row['LICNO'], defaults=default_data) default_data = dict( tdlr_id=row['IDNO'], last_inspection=row['DT_CRT_INS'], next_inspection=row['DT_EXPIRY'], last_5year=row['ELV_5YEAR'], equipment_type=row['EQUIPMENT_TYPE'], drive_type=row['DRIVE_TYPE'], floors=row['FLOORS'], year_installed=row['YR_INSTALL'], building=building ) elevator, __ = obj_update_or_create( Elevator, decal=row['SUB_NO'], defaults=default_data)
def handle(self, csv, *args, **options): assert os.path.isfile(csv) with open(csv, "r", encoding="windows-1252") as fh: row_count = sum(1 for row in fh) - 1 fh.seek(0) reader = csv_lib.DictReader(fh) for row in tqdm(reader, total=row_count): location = Location_get( street_address=row["Location Address"], city=row["Location City"], state=row["Location State"], zip=row["Location Zip"], name=row["Location Name"], ) receipt, created = obj_update_or_create( Receipt, tabc_permit=row["TABC Permit Number"], date=date_fmt(row["Obligation End Date"]), defaults=dict( taxpayer_name=row["Taxpayer Name"], tax_number=row["Taxpayer Number"], liquor=row["Liquor Receipts"], wine=row["Wine Receipts"], beer=row["Beer Receipts"], cover=row["Cover Charge Receipts"], total=row["Total Receipts"], location_name=row["Location Name"], location_number=row["Location Number"], county_code=row["Location County"], location=location, ), )
def import_data_from_api(data: List[dict]) -> int: created_count = 0 for row in data: location, location_created = Location_get( street_address=row["taxpayer_address"], city=row["location_city"], state=row["location_state"], zip=row["location_zip"], name=row["location_name"], ) if location_created: print(f"Created Location: {location.pk} - {location}") receipt, created = obj_update_or_create( Receipt, tabc_permit=row["tabc_permit_number"], date=row["obligation_end_date_yyyymmdd"].split("T")[0], defaults=dict( taxpayer_name=row["taxpayer_name"], tax_number=row["taxpayer_number"], liquor=row["liquor_receipts"], wine=row["wine_receipts"], beer=row["beer_receipts"], cover=row["cover_charge_receipts"], total=row["total_receipts"], location_name=row["location_name"], location_number=row["location_number"], county_code=row["location_county"], location=location, ), ) if created: created_count += 1 return created_count
def pull_vpcs(region=None): if region is None: region = Region.objects.get(id=1) # as defined by our initial data conn = boto.vpc.connect_to_region( region.code, aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, ) for boto_vpc in conn.get_all_vpcs(): defaults = dict( region=region, name=boto_vpc.tags.get('Name'), cidr=boto_vpc.cidr_block, state=boto_vpc.state, tags=boto_vpc.tags, ) obj_update_or_create(VPC, id=boto_vpc.id, defaults=defaults)
def test_workflow(self): # Test creation with self.assertNumQueries(3): # 1. SELECT # 2. BEGIN # 3. INSERT foo, created = obj_update_or_create(FooModel, text="hi", defaults={"slug": "leopard"}) self.assertTrue(created) self.assertEqual(foo.text, "hi") self.assertEqual(foo.slug, "leopard") # Test updating with nothing new with self.assertNumQueries(1): # 1. SELECT foo, created = obj_update_or_create(FooModel, text="hi", defaults={"slug": "leopard"}) self.assertFalse(created) self.assertEqual(foo.text, "hi") self.assertEqual(foo.slug, "leopard") # Test updating with new data num_queries = 3 if django.VERSION < (2, 2) else 2 with self.assertNumQueries(num_queries): # 1. SELECT # 2. BEGIN if Django<2.2 Django no longer always starts a transaction when a single query is being performed https://docs.djangoproject.com/en/2.2/releases/2.2/ # 3. INSERT foo, created = obj_update_or_create(FooModel, text="hi", defaults={ "slug": "lemon", "decimal": "0.01" }) self.assertFalse(created) self.assertEqual(foo.text, "hi") self.assertEqual(foo.slug, "lemon") self.assertEqual(foo.decimal, "0.01")
def pull_security_groups(region=None): if region is None: region = Region.objects.get(id=1) # as defined by our initial data conn = boto.ec2.connect_to_region( region.code, aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, ) sg_cache = {} rs = conn.get_all_security_groups() for group in rs: defaults = { 'description': group.description, 'name': group.name, 'region': region, 'tags': group.tags, 'vpc_id': group.vpc_id, } security_group, __ = obj_update_or_create( SecurityGroup, id=group.id, defaults=defaults, ) sg_cache[group.id] = security_group # Do a second pass in case we run into a rule that grants a security group # we don't know about yet for group in rs: security_group = sg_cache[group.id] security_group.rules.clear() # TODO don't do unnecessary SQL for rule in group.rules: rules = grant_to_rules(rule, sg_cache) security_group.rules.add(*rules) security_group.rules_egress.clear() # TODO don't do unnecessary SQL for rule in group.rules_egress: rules = grant_to_rules(rule, sg_cache) security_group.rules_egress.add(*rules)
def pull_ec2(region=None): if region is None: region = Region.objects.get(id=1) # as defined by our initial data conn = boto.ec2.connect_to_region( region.code, aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY, ) instances = conn.get_only_instances() for boto_instance in instances: data = Instance.data_from_boto_ec2(boto_instance, region=region) instance_id = data.pop('id') instance, __ = obj_update_or_create( Instance, id=instance_id, defaults=data, ) # TODO don't do unnecessary SQL instance.security_groups.clear() for group in boto_instance.groups: security_group, __ = SecurityGroup.objects.get_or_create( id=group.id, region=region) instance.security_groups.add(security_group) logger.debug(instance)
def _save_page(meeting_data, doc_data, bandc: BandC) -> bool: """ Save one page worth of data, updating BandC, creating Meetings, and Documents. Returns ------- True if there's another page to process (always False for now) """ logger.info("save_page %s", bandc) if not meeting_data: return False # Populate meetings meetings = {} for row in meeting_data: meeting, created = obj_update_or_create( Meeting, bandc=bandc, date=row["date"], defaults={"title": row["title"]}) scrape_logger.log_meeting(meeting, created) meetings[row["date"]] = { "meeting": meeting, "docs": set(meeting.documents.values_list("url", flat=True)), } if not bandc.latest_meeting or bandc.latest_meeting.date < row["date"]: bandc.latest_meeting = meeting bandc.save() # Populate documents for row in doc_data: defaults = dict(title=row["title"], type=row["type"]) if "/edims/document.cfm" in row["url"]: defaults["edims_id"] = row["url"].rsplit("=", 2)[-1] doc, created = Document.objects.get_or_create( url=row["url"], meeting=meetings[row["date"]]["meeting"], defaults=defaults, ) scrape_logger.log_document(doc, created) if not created: try: meetings[row["date"]]["docs"].remove(row["url"]) except KeyError: pass if doc.scrape_status == "toscrape": doc.refresh() # Look for stale documents stale_documents: List[str] = [] for meeting in meetings.values(): stale_documents.extend(meeting["docs"]) # Deal with stale documents if stale_documents: print("These docs are stale:", stale_documents) Document.objects.filter(url__in=stale_documents).update(active=False) return False # TODO