def import_org(self): self.stdout.write("Begin Organization Import {}".format( timezone.now())) with smart_open(f"{self.bucket_path}organizations.csv", "r") as infile, smart_open( f"{self.bucket_path}organizations_map.csv", "w") as outfile: reader = csv.reader(infile) writer = csv.writer(outfile) next(reader) # discard headers fields = next(reader) uuid = fields[8] if uuid: created = False org = Organization.objects.get(uuid=uuid) org.verified_journalist = True org.save() self.stdout.write(f"Merging {fields[1]} into {org.name}") else: created = True self.stdout.write(f"Creating {fields[1]}") org = Organization.objects.create( name=fields[1], slug=fields[2], individual=False, private=fields[9] == "t", verified_journalist=True, created_at=parse(fields[3]).replace(tzinfo=pytz.UTC), updated_at=parse(fields[4]).replace(tzinfo=pytz.UTC), ) writer.writerow([fields[0], org.uuid]) self.stdout.write("End Organization Import {}".format(timezone.now())) return org, created
def import_orgs(self): print("Begin Organization Import {}".format(timezone.now())) plans = {p.slug: p for p in Plan.objects.all()} with smart_open(f"s3://{BUCKET}/squarelet_export/orgs.csv", "r") as infile: reader = csv.reader(infile) next(reader) # discard headers for i, org in enumerate(reader): if i % 1000 == 0: print("Org {} - {}".format(i, timezone.now())) plan = plans[org[3]] org_obj = Organization.objects.create( uuid=org[0], name=org[1], slug=org[2], plan=plan, next_plan=plan, individual=org[4] == "True", private=org[5] == "True", customer_id=org[6] if org[6] else None, subscription_id=org[7] if org[7] else None, payment_failed=org[8] == "True", update_on=org[9], max_users=int(org[10]), avatar=org[12], ) org_obj.set_receipt_emails(e for e in org[11].split(",") if e) org_uuid2pk[org[0]] = org_obj.pk print("End Organization Import {}".format(timezone.now()))
def _import_email(self, form): """Import the results asynchrnously""" today = date.today() file_path = ( "s3://{bucket}/agency_mass_import/{y:4d}/{m:02d}/{d:02d}/{md5}/" "import.csv".format( bucket=settings.AWS_STORAGE_BUCKET_NAME, y=today.year, m=today.month, d=today.day, md5=md5("{}{}{}".format( int(time()), settings.SECRET_KEY, self.request.user.pk).encode("utf8")).hexdigest(), )) with smart_open(file_path, "wb") as file_: for chunk in self.request.FILES["csv"].chunks(): file_.write(chunk) mass_import.delay( self.request.user.pk, file_path, form.cleaned_data.get("match_or_import") == "match", form.cleaned_data.get("dry_run"), ) messages.success( self.request, "Importing agencies, results will be emailed to you when completed", ) return self.render_to_response(self.get_context_data())
def export_members(self): """Export memberships""" print 'Begin Membership Export - {}'.format(timezone.now()) key = self.bucket.new_key('squarelet_export/members.csv') with smart_open(key, 'wb') as out_file: writer = csv.writer(out_file) writer.writerow([ 'user_uuid', 'org_uuid', 'user_username', 'org_name', 'is_admin', ]) total = Membership.objects.count() for i, member in enumerate( Membership.objects.select_related( 'user__profile', 'organization__owner' ) ): if i % 1000 == 0: print 'Member {} / {} - {}'.format(i, total, timezone.now()) writer.writerow([ member.user.profile.uuid, member.organization.uuid, member.user.username, member.organization.name, member.organization.owner == member.user, ]) print 'End Membership Export - {}'.format(timezone.now())
def run(self): """Task entry point""" with smart_open( self.key, 'wb', s3_min_part_size=settings.AWS_S3_MIN_PART_SIZE ) as out_file: self.generate_file(out_file) self.key.set_acl('public-read') self.send_notification()
def export_orgs(self): """Export organizations""" # pylint: disable=protected-access print 'Begin Organization Export - {}'.format(timezone.now()) key = self.bucket.new_key('squarelet_export/orgs.csv') with smart_open(key, 'wb') as out_file: writer = csv.writer(out_file) writer.writerow([ 'uuid', 'name', 'slug', 'plan', 'individual', 'private', 'customer_id', 'subscription_id', 'payment_failed', 'date_update', 'max_users', 'receipt_emails', 'avatar_url', ]) total = Organization.objects.count() customer_ids = set() for i, org in enumerate( Organization.objects.select_related('owner__profile', 'plan') .prefetch_related( 'owner__receipt_emails', 'owner__organization_set' ) ): if i % 1000 == 0: print 'Organization {} / {} - {}'.format( i, total, timezone.now() ) if ( len(org.owner.organization_set.all()) <= 1 or not org.individual ) and not org.owner.profile.customer_id in customer_ids: customer_id = org.owner.profile.customer_id customer_ids.add(customer_id) else: customer_id = '' writer.writerow([ org.uuid, org.name, org.slug, org.plan.slug, org.individual, org.private, customer_id, org.stripe_id, org.owner.profile.payment_failed, org.date_update, org.max_users, ','.join(r.email for r in org.owner.receipt_emails.all()), org.owner.profile.avatar.name if org.individual else '', ]) print 'End Organization Export - {}'.format(timezone.now())
def run(self): """Task entry point""" with smart_open( self.key, self.mode, s3_min_part_size=settings.AWS_S3_MIN_PART_SIZE) as out_file: self.generate_file(out_file) s3 = boto3.resource("s3") obj = s3.ObjectAcl(self.bucket, self.file_key) obj.put(ACL=settings.AWS_DEFAULT_ACL) self.send_notification()
def import_laws(file_name): """Import laws from a spreadsheet""" # pylint: disable=too-many-locals key = f"s3://{settings.AWS_MEDIA_BUCKET_NAME}/{file_name}" with smart_open(key) as law_file: law_reader = csv.reader(law_file) for ( jurisdiction_name, citation, url, name, shortname, key_dates, days, use_business_days, waiver, has_appeal, requires_proxy, fee_schedule, trade_secrets, penalties, cover_judicial, cover_legislative, cover_executive, ) in law_reader: jurisdiction = Jurisdiction.objects.exclude(level="l").filter( name=jurisdiction_name) law, _ = Law.objects.update_or_create( jurisdiction=jurisdiction, defaults={ "citation": citation, "url": url, "name": name, "shortname": shortname, "days": int(days) if days else None, "use_business_days": use_business_days == "TRUE", "waiver": waiver, "has_appeal": has_appeal == "TRUE", "requires_proxy": requires_proxy == "TRUE", "fee_schedule": fee_schedule == "TRUE", "trade_secrets": trade_secrets == "TRUE", "penalties": penalties == "TRUE", "cover_judicial": cover_judicial == "TRUE", "cover_legislative": cover_legislative == "TRUE", "cover_executive": cover_executive == "TRUE", }, ) if key_dates: law.years.all().delete() for date in key_dates.split(";"): reason, year = date.split() law.years.create(reason=reason, year=year)
def generate_file(self, out_file): """Do the import and generate the CSV file as output""" writer = csv.writer(out_file) with smart_open(self.file_path, "r") as in_file: reader = CSVReader(in_file) importer = Importer(reader) if self.match: data = importer.match() fields = self.match_fields else: data = importer.import_(dry=self.dry) fields = self.import_fields writer.writerow(fields) for datum in data: writer.writerow(datum.get(f, "") for f in fields)
def export_users(self): """Export users""" print 'Begin User Export - {}'.format(timezone.now()) key = self.bucket.new_key('squarelet_export/users.csv') with smart_open(key, 'wb') as out_file: writer = csv.writer(out_file) writer.writerow([ 'uuid', 'username', 'email', 'password', 'full_name', 'is_staff', 'is_active', 'is_superuser', 'email_confirmed', 'email_failed', 'is_agency', 'avatar_url', 'use_autologin', 'source', ]) total = User.objects.count() for i, user in enumerate( User.objects.select_related('profile') .prefetch_related('receipt_emails') ): if i % 1000 == 0: print 'User {} / {} - {}'.format(i, total, timezone.now()) writer.writerow([ user.profile.uuid, user.username, user.email, user.password, user.profile.full_name, user.is_staff, user.is_active, user.is_superuser, user.profile.email_confirmed, user.profile.email_failed, user.profile.agency is not None, user.profile.avatar.name if user.profile.avatar else '', user.profile.use_autologin, 'muckrock', ]) print 'End User Export - {}'.format(timezone.now())
def inner(): """Inner function for caching""" today = date.today() month_start = today.replace(day=1) # initalize google analytics api # we store the keyfile on s3 key = f"s3://{settings.AWS_STORAGE_BUCKET_NAME}/google/analytics_key.json" with smart_open(key) as key_file: credentials = ServiceAccountCredentials.from_json_keyfile_dict( json.loads(key_file.read()), ["https://www.googleapis.com/auth/analytics.readonly"], ) try: analytics = build( "analyticsreporting", "v4", credentials=credentials, cache_discovery=False, ) response = (analytics.reports().batchGet( body={ "reportRequests": [{ "viewId": settings.VIEW_ID, "dateRanges": [{ "startDate": month_start.isoformat(), "endDate": today.isoformat(), }], "metrics": [{ "expression": "ga:pageviews" }], }] }).execute()) except HttpError: return "Error" try: # google really buries the useful data in the response # remove format if we want to go back to a comparison return "{:,}".format( int(response["reports"][0]["data"]["rows"][0]["metrics"][0] ["values"][0])) except KeyError: return "Error"
def export_date_joined(self): """Export date joined data""" print 'Begin Date Joined Export - {}'.format(timezone.now()) key = self.bucket.new_key('squarelet_export/date_joined.csv') with smart_open(key, 'wb') as out_file: writer = csv.writer(out_file) writer.writerow([ 'uuid', 'date_joined', ]) total = User.objects.count() for i, user in enumerate(User.objects.select_related('profile')): if i % 1000 == 0: print 'User {} / {} - {}'.format(i, total, timezone.now()) writer.writerow([ user.profile.uuid, user.date_joined.isoformat(), ]) print 'End Date Joined Export - {}'.format(timezone.now())
def import_date_joined(self): print("Begin Date Joined Import {}".format(timezone.now())) with smart_open( f"s3://{BUCKET}/squarelet_export/date_joined.csv", "r" ) as infile: reader = csv.reader(infile) next(reader) # discard headers for i, user in enumerate(reader): if i % 1000 == 0: print("User {} - {}".format(i, timezone.now())) try: user_ = User.objects.get(individual_organization_id=UUID(user[0])) except User.DoesNotExist: print(f"ERROR: User {user[0]} does not exist") else: created_at = parse(user[1]) if user_.created_at > created_at: user_.created_at = created_at user_.save() print("End User Import {}".format(timezone.now()))
def import_members(self): print("Begin Member Import {}".format(timezone.now())) with smart_open(f"s3://{BUCKET}/squarelet_export/members.csv", "r") as infile: reader = csv.reader(infile) next(reader) # discard headers for i, member in enumerate(reader): if i % 1000 == 0: print("Member {} - {}".format(i, timezone.now())) # skip users we skipped above if not User.objects.filter( individual_organization_id=member[0] ).exists(): print("[Member] Skipping a missing user: {}".format(member[0])) continue Membership.objects.create( user_id=user_uuid2pk[member[0]], organization_id=org_uuid2pk[member[1]], admin=member[4] == "True", ) print("End Member Import {}".format(timezone.now()))
def import_users(self): print("Begin User Import {}".format(timezone.now())) with smart_open(f"s3://{BUCKET}/squarelet_export/users.csv", "r") as infile: reader = csv.reader(infile) next(reader) # discard headers for i, user in enumerate(reader): if i % 1000 == 0: print("User {} - {}".format(i, timezone.now())) # skip non unique emails # all emails should be unqiue before official migration # but dont skip blank emails if user[2] and User.objects.filter(email=user[2]).exists(): print("[User] Skipping a duplicate email: {}".format(user[2])) continue user_obj = User.objects.create( individual_organization_id=UUID(user[0]), username=user[1], email=user[2] if user[2] else None, password=user[3], name=user[4], is_staff=user[5] == "True", is_active=user[6] == "True", is_superuser=user[7] == "True", email_failed=user[9] == "True", is_agency=user[10] == "True", avatar=user[11], use_autologin=user[12] == "True", source=user[13], ) user_uuid2pk[user[0]] = user_obj.pk if user_obj.email: EmailAddress.objects.create( user=user_obj, email=user_obj.email, primary=True, verified=user[8] == "True", ) print("End User Import {}".format(timezone.now()))
def import_users(self, organization): print("Begin User Import {}".format(timezone.now())) with smart_open(f"{self.bucket_path}users.csv", "r") as infile, smart_open( f"{self.bucket_path}users_map.csv", "w") as outfile: reader = csv.reader(infile) writer = csv.writer(outfile) next(reader) # discard headers for user in reader: # 3 is reviewer - should not have been exported assert user[10] != "3", f"Found a rogue reviewer, {user[0]}" email = (EmailAddress.objects.filter( email__iexact=user[3]).select_related("user").first()) if email: created = False self.stdout.write(f"Found existing user: {user[3]}") user_obj = email.user else: created = True self.stdout.write(f"Creating new user: {user[3]}") user_obj = User.objects.create_user( username=UserWriteSerializer.unique_username(user[1] + user[2]), email=user[3], name=f"{user[1]} {user[2]}", is_staff=False, is_active=True, is_superuser=False, email_failed=False, is_agency=False, use_autologin=True, source="documentcloud", created_at=parse(user[5]).replace(tzinfo=pytz.UTC), updated_at=parse(user[6]).replace(tzinfo=pytz.UTC), ) user_obj.password = "******" + user[4] user_obj.save() EmailAddress.objects.create(user=user_obj, email=user[3], verified=True, primary=True) if user[10] not in ("0", "4"): # 0 is disabled - do not add to organization # 4 is freelancer - do not add to organization if not created and organization.has_member(user_obj): self.stdout.write(f"Already a member") else: self.stdout.write(f"Adding to organization") Membership.objects.create( user=user_obj, organization=organization, # 1 is admin admin=user[10] == "1", ) else: self.stdout.write( f"Not adding disabled/freelancer to organization") user_obj.individual_organization.verified_journalist = True user_obj.individual_organization.save() writer.writerow([ user[0], user_obj.uuid, user_obj.username, user_obj.individual_organization.slug, created, ])
def test_file_smart_open_can_read_multistream_bz2(self): test_file = self.create_temp_bz2(streams=5) with smart_open_lib.smart_open(test_file) as bz2f: self.assertEqual(bz2f.read(), self.TEXT * 5) self.cleanup_temp_bz2(test_file)
def import_schools(file_name): """Import schools from spreadsheet""" # pylint: disable=too-many-locals s3_path = f"s3://{settings.AWS_MEDIA_BUCKET_NAME}/{file_name}" school_district = AgencyType.objects.get(name="School District") with smart_open(s3_path) as tmp_file: reader = csv.reader(tmp_file) for row in reader: print("~~~") print(row[DISTRICT]) try: parent = Jurisdiction.objects.get(abbrev=row[STATE]) county = Jurisdiction.objects.get(name="%s County" % row[COUNTY], parent=parent, level="l") except ( Jurisdiction.DoesNotExist, Jurisdiction.MultipleObjectsReturned, ) as exc: print("****") print("Jurisdiction error") print(row) print(exc) print("****") else: agency, created = Agency.objects.get_or_create( name=row[DISTRICT], slug=slugify(row[DISTRICT]), jurisdiction=county, status="approved", defaults=dict( contact_first_name=row[FIRST_NAME], contact_last_name=row[LAST_NAME], ), ) if not created: print("agency already existed") print(agency.pk) agency.types.add(school_district) address, _ = Address.objects.get_or_create( address="{name}\n{street}\n{city}, {state} {zip}".format( name=row[DISTRICT], street=row[MAIL_STREET], city=row[MAIL_CITY], state=row[MAIL_STATE], zip=row[MAIL_ZIP], )) AgencyAddress.objects.get_or_create(agency=agency, address=address, request_type="primary") number = row[PHONE] if number: if row[EXT]: number += " x%s" % row[EXT] phone, _ = PhoneNumber.objects.get_or_create(number=number, type="phone") AgencyPhone.objects.get_or_create(agency=agency, phone=phone) if row[EMAIL]: email = EmailAddress.objects.fetch(row[EMAIL]) AgencyEmail.objects.get_or_create( agency=agency, email=email, request_type="primary", email_type="to", )