def setup(self): self.xform_writer = csv.writer(open(XFORM_FILENAME, 'w+b')) self.xform_writer.writerow(XFORM_HEADER) self.case_writer = csv.writer(open(CASE_FILE_NAME, 'w+b')) self.case_writer.writerow(CASE_HEADER) self.forms_accessor = FormAccessors(self.domain) self.case_accessors = CaseAccessors(self.domain)
def email_enterprise_report(domain, slug, couch_user): account = BillingAccount.get_account_by_domain(domain) report = EnterpriseReport.create(slug, account.id, couch_user) # Generate file csv_file = io.StringIO() writer = csv.writer(csv_file) writer.writerow(report.headers) writer.writerows(report.rows) # Store file in redis hash_id = uuid.uuid4().hex redis = get_redis_client() redis.set(hash_id, csv_file.getvalue()) redis.expire(hash_id, 60 * 60 * 24) csv_file.close() # Send email url = absolute_reverse("enterprise_dashboard_download", args=[domain, report.slug, str(hash_id)]) link = "<a href='{}'>{}</a>".format(url, url) subject = _("Enterprise Dashboard: {}").format(report.title) body = "The enterprise report you requested for the account {} is ready.<br>" \ "You can download the data at the following link: {}<br><br>" \ "Please remember that this link will only be active for 24 hours.".format(account.name, link) send_html_email_async(subject, couch_user.username, body)
def export_as_csv(modeladmin, request, queryset): """ Generic csv export admin action. based on http://djangosnippets.org/snippets/1697/ """ opts = modeladmin.model._meta field_names = set(field.name for field in opts.fields) if fields: fieldset = set(fields) field_names = field_names & fieldset elif exclude: excludeset = set(exclude) field_names = field_names - excludeset response = HttpResponse(content_type='text/csv') response[ 'Content-Disposition'] = 'attachment; filename=%s.csv' % six.text_type( opts).replace('.', '_') writer = csv.writer(response) if header: writer.writerow(list(field_names)) for obj in queryset: writer.writerow( [safe_utf8(getattr(obj, field)) for field in field_names]) return response
def handle(self, ids_file, **options): with open(ids_file, encoding='utf-8') as f: doc_ids = [line.strip() for line in f] total_doc_ids = len(doc_ids) doc_ids = set(doc_ids) print("{} total doc ids, {} unique".format(total_doc_ids, len(doc_ids))) db = XFormInstance.get_db() # Both forms and cases are in here with IterDB(db) as iter_db: for doc in iter_docs(db, with_progress_bar(doc_ids)): iter_db.save(doc) print("{} docs saved".format(len(iter_db.saved_ids))) print("{} docs errored".format(len(iter_db.error_ids))) not_found = len(doc_ids) - len(iter_db.saved_ids) - len(iter_db.error_ids) print("{} docs not found".format(not_found)) filename = '{}_{}.csv'.format(ids_file.split('/')[-1], datetime.datetime.now().isoformat()) with open(filename, 'w', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(['doc_id', 'status']) for doc_id in doc_ids: if doc_id in iter_db.saved_ids: status = "saved" elif doc_id in iter_db.error_ids: status = "errored" else: status = "not_found" writer.writerow([doc_id, status]) print("Saved results to {}".format(filename))
def handle(self, file_path, *args, **options): domain = 'icds-cas' data_source_id = StaticDataSourceConfiguration.get_doc_id(domain, PERSON_TABLE_ID) config = StaticDataSourceConfiguration.by_id(data_source_id) adapter = get_indicator_adapter(config) session_helper = connection_manager.get_session_helper(adapter.engine_id) person_table_name = get_table_name(domain, PERSON_TABLE_ID) awc_location_table_name = get_table_name(domain, AWC_LOCATION_TABLE_ID) session = session_helper.Session with open( os.path.join(os.path.dirname(__file__), 'sql_scripts', 'nos_of_deaths.sql'), encoding='utf-8' ) as f: sql_script = f.read() rows = session.execute( sql_script % { 'person_table_name': person_table_name, 'awc_location_table_name': awc_location_table_name } ) with open(file_path, 'w', encoding='utf-8') as file_object: writer = csv.writer(file_object) writer.writerow([ 'State', 'District', 'AWC', 'Month', 'Deaths', ]) writer.writerows(rows)
def handle(self, child_file, **options): relevant_districts = SQLLocation.objects.filter(domain='icds-cas', location_id__in=['d982a6fb4cca0824fbde59db18d2d422', '0ffe4a1f110ffc17bb9b749abdfd697c']) owners = SQLLocation.objects.get_queryset_descendants(relevant_districts, include_self=True) owner_name_mapping = {loc.location_id: loc.name for loc in owners} hh_cases = self._get_closed_hh_cases(list(owner_name_mapping)) with open(child_file, 'w', encoding='utf-8') as child_csv: child_writer = csv.writer(child_csv) child_writer.writerow(CSV_HEADERS) for cases in chunked(with_progress_bar(hh_cases, hh_cases.count), 500): household_ids = [] hh_map = {} for hh in cases: hh_map[hh['case_id']] = (hh['name'].encode('utf-8'), hh.get('closed_on', '').encode('utf-8')) household_ids.append(hh['case_id']) child_cases = self._get_child_cases(household_ids) ids = set(household_ids) for child in child_cases.hits: parent_index = filter( lambda index: index['referenced_id'] in ids and index['identifier'] == 'parent', child['indices'] )[0] if parent_index: hh_id = parent_index['referenced_id'] row = [child.get(prop, '').encode('utf-8') for prop in CHILD_PROPERTIES] row.append(owner_name_mapping.get(child.get('owner_id', ''), '').encode('utf-8')) hh_info = (hh_id, hh_map[hh_id][0], hh_map[hh_id][1]) row.extend(hh_info) child_writer.writerow(row)
def _trim_csv_columns(path, dest, cols_to_keep): with open(path, 'rb') as source: rdr = csv.reader(source) with open(dest, "wb") as result: wtr = csv.writer(result) for r in rdr: wtr.writerow([r[i] for i in cols_to_keep])
def handle(self, domain, file_path, *args, **options): data_source_id = StaticDataSourceConfiguration.get_doc_id( domain, PERSON_TABLE_ID) config = StaticDataSourceConfiguration.by_id(data_source_id) adapter = get_indicator_adapter(config) session_helper = connection_manager.get_session_helper( adapter.engine_id) person_table_name = get_table_name(domain, PERSON_TABLE_ID) awc_location_table_name = get_table_name(domain, AWC_LOCATION_TABLE_ID) session = session_helper.Session with open(os.path.join(os.path.dirname(__file__), 'sql_scripts', 'select_non_aadhar.sql'), encoding='utf-8') as f: sql_script = f.read() rows = session.execute( sql_script % { 'person_table_name': person_table_name, 'awc_location_table_name': awc_location_table_name }) with open(file_path, 'wb') as file_object: writer = csv.writer(file_object) writer.writerow([ 'Name of Beneficiary', 'Date of Birth', 'AWC', 'Block', 'District', 'State' ]) writer.writerows(rows)
def handle(self, filename, **options): domain = options["domain"] user = options["user"] display_superuser = options["display_superuser"] dimagi_username = "" if not display_superuser: dimagi_username = "******" if not domain and not user: raise CommandError("Please provide one of 'domain' or 'user'") if domain: domain_object = Domain.get_by_name(domain) if not domain_object: raise CommandError("Domain not found") users, super_users = get_users_to_export(user, domain) with open(filename, 'wb') as csvfile: writer = csv.writer(csvfile) writer.writerow(['Date', 'User', 'Domain', 'IP Address', 'Request Path']) for user in users: write_log_events( writer, user, domain, start_date=options['start'], end_date=options['end'] ) for user in super_users: write_log_events( writer, user, domain, override_user=dimagi_username, start_date=options['start'], end_date=options['end'] )
def _store_case_ids_with_unexpected_phone_number(self): if self.case_ids_with_unexpected_phone_number: filename = 'unexpected_phone_numbers_with_91_part_%s_%s.csv' % ( self.db_alias, datetime.utcnow()) with open(filename, 'w+b') as output: writer = csv.writer(output) for case_id in self.case_ids_with_unexpected_phone_number: writer.writerow([case_id])
def handle(self, filename, **options): with open(filename, 'wb') as f: writer = csv.writer(f) writer.writerow(['domain', 'section', 'type', 'report']) for domain in Domain.get_all(): for report_config in domain.dynamic_reports: for report in report_config.reports: writer.writerow([domain.name, report_config.section_title, report.report, report.name])
def write_row(self, row): buffer = io.StringIO() csvwriter = csv.writer(buffer, csv.excel) csvwriter.writerow([ col.decode('utf-8') if isinstance(col, six.binary_type) else col for col in row ]) self._file.write(buffer.getvalue().encode('utf-8'))
def write_row(self, row): buffer = io.StringIO() csvwriter = csv.writer(buffer, csv.excel) csvwriter.writerow([ col.decode('utf-8') if isinstance(col, bytes) else col for col in row ]) self._file.write(buffer.getvalue().encode('utf-8'))
def begin(self): self.output = (open(self.timing_file, "w", encoding='utf-8') if self.timing_file else sys.__stdout__) if not self.pretty_output: self.csv = csv.writer(self.output) self.csv.writerow(["event", "name", "elapsed time", "start time"]) self.event_start = time.time() global PLUGIN_INSTANCE PLUGIN_INSTANCE = self
def export_all(request): auditEvents = AccessAudit.view("auditcare/by_date_access_events", descending=True, include_docs=True).all() response = HttpResponse() response['Content-Disposition'] = 'attachment; filename="AuditAll.csv"' writer = csv.writer(response) writer.writerow(['User', 'Access Type', 'Date']) for a in auditEvents: writer.writerow([a.user, a.access_type, a.event_date]) return response
def _mass_email_attachment(name, rows): csv_file = io.StringIO() writer = csv.writer(csv_file) writer.writerow(['Email', 'Error']) writer.writerows(rows) attachment = { 'title': "mass_email_{}.csv".format(name), 'mimetype': 'text/csv', 'file_obj': csv_file, } return attachment
def test_can_write_to_StringIO(self): with io.StringIO(newline='') as csv_stream: csv_writer = csv.writer(csv_stream, delimiter=',') csv_writer.writerows([ ['ä', 'b', 'c'], [], [1, None, 3] ]) self.assertEqual( '\xe4,b,c\r\n\r\n1,,3\r\n', csv_stream.getvalue())
def output(usernames, path): with open(path, 'w') as out: writer = csv.writer(out) writer.writerow(["Username", "Location", "State"]) for username in usernames: user = CommCareUser.get_by_username(username) loc = user.sql_location loc_name = loc.name if loc else '' state = loc.get_ancestor_of_type('state') if loc else None state_name = state.name if state else '' writer.writerow([username, loc_name, state_name])
def dump_csv(query, path): path = os.path.expanduser(path) print("dumping to CSV: {}".format(path)) with open(path, "w", encoding="utf-8") as output: csvfile = csv.writer(output) csvfile.writerow(["domain", "couch_id", "phonenumber"]) for phone in query: csvfile.writerow([ phone.domain, phone.couch_id, phone.phone_number, ])
def get_user_data_from_csv(self): filepath = self.options.file if not os.path.isfile(filepath): raise Exception("Can't retrieve user data. %s does not exist" % filepath) user_data_list = [] wrong_rows = [] with io.open(filepath, encoding=self.options.encoding, newline='') as csv_file: self.log("Reading file %s ......" % filepath, True) csv_reader = csv.reader(csv_file, delimiter=',') for i, row in enumerate(csv_reader): if i == 0: fieldnames = row else: if len(fieldnames) != len(row): self.log( "Line %s of the CSV has incomplete data %s fields instead of %s" % (i, len(row), len(fieldnames))) wrong_rows.append(row) else: user_data = {'data': {}, 'custom_data': {}} for j, value in enumerate(row): fieldname = fieldnames[j] if 'custom_attribute_' in fieldname: custom_attr_name = fieldname.replace( 'custom_attribute_', '') user_data['custom_data'][ custom_attr_name] = value else: user_data['data'][fieldname] = value user_data_list.append(user_data) if wrong_rows: original_filename = os.path.basename(filepath) wrong_rows_filename = 'invalid_%s' % original_filename wrong_rows_filepath = filepath.replace(original_filename, wrong_rows_filename) wrong_rows_file = open(wrong_rows_filepath, 'w') writer = csv.writer(wrong_rows_file, dialect=csv.excel) writer.writerow(fieldnames) for row in wrong_rows: writer.writerow(row) wrong_rows_file.close() self.log( "Found %s wrong rows, will be skipped from the process so they are exported to %s" % (len(wrong_rows), wrong_rows_filepath), True) return user_data_list
def handle(self, domain, repeater_id, *args, **options): self.domain = domain self.repeater_id = repeater_id repeater = Repeater.get(repeater_id) print("Looking up repeat records for '{}'".format( repeater.friendly_name)) redundant_records = [] records_by_payload_id = defaultdict(list) records = iter_repeat_records_by_domain(domain, repeater_id=repeater_id, state=RECORD_CANCELLED_STATE) total_records = 0 for record in records: total_records += 1 most_recent_success = self.most_recent_success.get( record.payload_id) if most_recent_success and record.last_checked < most_recent_success: # another record with this payload has succeeded after this record failed redundant_records.append(record) else: records_by_payload_id[record.payload_id].append(record) unique_payloads = len(records_by_payload_id) redundant_payloads = len(redundant_records) print( "There are {total} total cancelled records, {redundant} with payloads which " "have since succeeded, and {unique} unsent unique payload ids.". format(total=total_records, redundant=redundant_payloads, unique=unique_payloads)) print("Delete {} duplicate records?".format(total_records - unique_payloads)) if not input("(y/n)") == 'y': print("Aborting") return redundant_log = self.delete_already_successful_records( redundant_records) duplicates_log = self.resolve_duplicates(records_by_payload_id) filename = "cancelled_{}_records-{}.csv".format( repeater.__class__.__name__, datetime.datetime.utcnow().isoformat()) print("Writing log of changes to {}".format(filename)) with open(filename, 'w', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(('RepeatRecord ID', 'Payload ID', 'Failure Reason', 'Deleted?', 'Reason')) writer.writerows(redundant_log) writer.writerows(duplicates_log)
def _update_cases(self, case_ids_with_invalid_phone_number): exceptions_raised = 0 with open( 'invalid_phone_numbers_with_91_part_%s_updated.csv' % self.db_alias, 'w+b') as output: writer = csv.writer(output) writer.writerow(['Case Id']) case_ids_to_update_chunk = list( chunked(case_ids_with_invalid_phone_number, 100)) for chunk in with_progress_bar(case_ids_to_update_chunk): case_ids_list = self._reassured_case_ids_to_update(chunk) [writer.writerow([case_id]) for case_id in case_ids_list] exceptions_raised = self._submit_update_form( case_ids_list, exceptions_raised)
def _read_and_write_csv(): has_temp_file = False try: with io.open(_TEST_CSV_PATH, 'w', encoding='utf-8', newline='') as csv_file: has_temp_file = True csv_writer = csv.writer(csv_file) for _ in range(_TEST_ROW_COUNT): csv_writer.writerow(_TEST_ROW) with io.open(_TEST_CSV_PATH, 'r', encoding='utf-8', newline='') as csv_file: csv_reader = csv.reader(csv_file) for _ in csv_reader: pass finally: if has_temp_file: os.remove(_TEST_CSV_PATH)
def download_changes(db, limit, target): """ Download changes to a target file-like object """ writer = csv.writer(target, dialect=csv.excel) # headings keys = [ 'domain', 'doc_type', 'date', 'id', 'rev', ] writer.writerow(keys) for row in get_recent_changes(db, limit): writer.writerow([row[k] for k in keys])
def _track_periodic_data_on_kiss(submit_json): """ Transform periodic data into a format that is kissmetric submission friendly, then call identify csv format: Identity (email), timestamp (epoch), Prop:<Property name>, etc... :param submit_json: Example Json below, this function assumes [ { email: <>, properties: [ { property: <>, value: <> }, (can have more than one) ] } ] :return: none """ periodic_data_list = json.loads(submit_json) headers = [ 'Identity', 'Timestamp', ] + [ 'Prop:{}'.format(prop['property']) for prop in periodic_data_list[0]['properties'] ] filename = 'periodic_data.{}.csv'.format(date.today().strftime('%Y%m%d')) with open(filename, 'w') as csvfile: csvwriter = csv.writer(csvfile) csvwriter.writerow(headers) for webuser in periodic_data_list: row = [webuser['email'], int(time.time()) ] + [prop['value'] for prop in webuser['properties']] csvwriter.writerow(row) if settings.S3_ACCESS_KEY and settings.S3_SECRET_KEY and settings.ANALYTICS_IDS.get( 'KISSMETRICS_KEY', None): s3_connection = tinys3.Connection(settings.S3_ACCESS_KEY, settings.S3_SECRET_KEY, tls=True) f = open(filename, 'rb') s3_connection.upload(filename, f, 'kiss-uploads') os.remove(filename)
def make_row_writer(output_file, write_csv): def make_row_widths_writer(rows, output_file): widths = [len(text(item)) for item in rows[0]] for row in rows[1:]: for i, item in enumerate(row): length = len(text(item)) if length > widths[i]: widths[i] = length template = " ".join("{%s:%s%s}" % (i, (">" if i else "<"), w) for i, w in enumerate(widths)) def write(row): print(template.format(*row), file=output_file) return write if output_file != sys.stdout: output_file = open(output_file, "w", encoding='utf-8') if write_csv: writer = csv.writer(output_file, dialect="excel") write = writer.writerow else: def write(row): if row: if len(row) == 1 and not pending: print(row[0], file=output_file) else: pending.append(row) else: if pending: write = make_row_widths_writer(pending, output_file) for row in pending: write(row) del pending[:] print("", file=output_file) pending = [] try: yield write finally: if pending: write([]) assert not pending, pending if output_file != sys.stdout: output_file.close()
def handle(self, month_year, file_path, **options): month_year_parsed = dateutil.parser.parse('1-' + month_year) start_date = month_year_parsed.replace(day=1) end_date = start_date + relativedelta(day=1, months=+1, microseconds=-1) with open(file_path, 'wb') as file_object: writer = csv.writer(file_object) writer.writerow([ 'domain name', 'user id', 'total number of forms submitted in a month', 'used case management', 'multiple form types' ]) for domain in with_progress_bar(Domain.get_all(include_docs=False)): domain_name = domain['key'] user_ids = CommCareUser.ids_by_domain(domain=domain_name) for users in chunked(user_ids, 100): forms = get_forms_for_users(domain_name, users, start_date, end_date) user_dict = defaultdict(list) for form in forms: user_id = form['form']['meta']['userID'] user_dict[user_id].append(form) for user_id, forms in six.iteritems(user_dict): has_two_forms_submitted = False has_case = False unique_forms = set() for form in forms: if has_case and has_two_forms_submitted: break if not has_case and form.get('form', {}).get('case'): has_case = True if not has_two_forms_submitted: xmlns = form.get('form', {}).get('@xmlns') if xmlns: unique_forms.add(xmlns) if len(unique_forms) >= 2: has_two_forms_submitted = True writer.writerow([ domain_name, user_id, len(forms), has_case, has_two_forms_submitted ])
def make_row_writer(output_file, write_csv): def make_row_widths_writer(rows, output_file): widths = [len(text(item)) for item in rows[0]] for row in rows[1:]: for i, item in enumerate(row): length = len(text(item)) if length > widths[i]: widths[i] = length template = " ".join( "{%s:%s%s}" % (i, (">" if i else "<"), w) for i, w in enumerate(widths) ) def write(row): print(template.format(*row), file=output_file) return write if output_file != sys.stdout: output_file = open(output_file, "w", encoding='utf-8') if write_csv: writer = csv.writer(output_file, dialect="excel") write = writer.writerow else: def write(row): if row: if len(row) == 1 and not pending: print(row[0], file=output_file) else: pending.append(row) else: if pending: write = make_row_widths_writer(pending, output_file) for row in pending: write(row) del pending[:] print("", file=output_file) pending = [] try: yield write finally: if pending: write([]) assert not pending, pending if output_file != sys.stdout: output_file.close()
def _send_data_validation_email(csv_columns, month, bad_data): # intentionally using length here because the query will need to evaluate anyway to send the CSV file if all(len(v) == 0 for _, v in six.iteritems(bad_data)): return bad_wasting_awcs = bad_data.get('bad_wasting_awcs', []) bad_stunting_awcs = bad_data.get('bad_stunting_awcs', []) bad_underweight_awcs = bad_data.get('bad_underweight_awcs', []) bad_lbw_awcs = bad_data.get('bad_lbw_awcs', []) csv_file = io.StringIO() writer = csv.writer(csv_file) writer.writerow(('type', ) + csv_columns) _icds_add_awcs_to_file(writer, 'wasting', bad_wasting_awcs) _icds_add_awcs_to_file(writer, 'stunting', bad_stunting_awcs) _icds_add_awcs_to_file(writer, 'underweight', bad_underweight_awcs) _icds_add_awcs_to_file(writer, 'low_birth_weight', bad_lbw_awcs) email_content = """ Incorrect wasting AWCs: {bad_wasting_awcs} Incorrect stunting AWCs: {bad_stunting_awcs} Incorrect underweight AWCs: {bad_underweight_awcs} Incorrect low birth weight AWCs: {bad_lbw_awcs} Please see attached file for more details """.format( bad_wasting_awcs=len(bad_wasting_awcs), bad_stunting_awcs=len(bad_stunting_awcs), bad_underweight_awcs=len(bad_underweight_awcs), bad_lbw_awcs=len(bad_lbw_awcs), ) filename = month.strftime('validation_results_%s.csv' % SERVER_DATE_FORMAT) send_HTML_email( '[{}] - ICDS Dashboard Validation Results'.format( settings.SERVER_ENVIRONMENT), DASHBOARD_TEAM_EMAILS, email_content, file_attachments=[{ 'file_obj': csv_file, 'title': filename, 'mimetype': 'text/csv' }], )
def handle(self, domain, repeater_id, *args, **options): self.domain = domain self.repeater_id = repeater_id repeater = Repeater.get(repeater_id) print("Looking up repeat records for '{}'".format(repeater.friendly_name)) redundant_records = [] records_by_payload_id = defaultdict(list) records = iter_repeat_records_by_domain(domain, repeater_id=repeater_id, state=RECORD_CANCELLED_STATE) total_records = 0 for record in records: total_records += 1 most_recent_success = self.most_recent_success.get(record.payload_id) if most_recent_success and record.last_checked < most_recent_success: # another record with this payload has succeeded after this record failed redundant_records.append(record) else: records_by_payload_id[record.payload_id].append(record) unique_payloads = len(records_by_payload_id) redundant_payloads = len(redundant_records) print ("There are {total} total cancelled records, {redundant} with payloads which " "have since succeeded, and {unique} unsent unique payload ids." .format(total=total_records, redundant=redundant_payloads, unique=unique_payloads)) print("Delete {} duplicate records?".format(total_records - unique_payloads)) if not input("(y/n)") == 'y': print("Aborting") return redundant_log = self.delete_already_successful_records(redundant_records) duplicates_log = self.resolve_duplicates(records_by_payload_id) filename = "cancelled_{}_records-{}.csv".format( repeater.__class__.__name__, datetime.datetime.utcnow().isoformat()) print("Writing log of changes to {}".format(filename)) with open(filename, 'w', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(('RepeatRecord ID', 'Payload ID', 'Failure Reason', 'Deleted?', 'Reason')) writer.writerows(redundant_log) writer.writerows(duplicates_log)
def _track_periodic_data_on_kiss(submit_json): """ Transform periodic data into a format that is kissmetric submission friendly, then call identify csv format: Identity (email), timestamp (epoch), Prop:<Property name>, etc... :param submit_json: Example Json below, this function assumes [ { email: <>, properties: [ { property: <>, value: <> }, (can have more than one) ] } ] :return: none """ periodic_data_list = json.loads(submit_json) headers = [ 'Identity', 'Timestamp', ] + ['Prop:{}'.format(prop['property']) for prop in periodic_data_list[0]['properties']] filename = 'periodic_data.{}.csv'.format(date.today().strftime('%Y%m%d')) with open(filename, 'wb') as csvfile: csvwriter = csv.writer(csvfile) csvwriter.writerow(headers) for webuser in periodic_data_list: row = [ webuser['email'], int(time.time()) ] + [prop['value'] for prop in webuser['properties']] csvwriter.writerow(row) if settings.S3_ACCESS_KEY and settings.S3_SECRET_KEY and settings.ANALYTICS_IDS.get('KISSMETRICS_KEY', None): s3_connection = tinys3.Connection(settings.S3_ACCESS_KEY, settings.S3_SECRET_KEY, tls=True) f = open(filename, 'rb') s3_connection.upload(filename, f, 'kiss-uploads') os.remove(filename)
def _write_file(self, slug): report = EnterpriseReport.create(slug, self.account_id, self.couch_user) row_count = 0 csv_file = io.StringIO() writer = csv.writer(csv_file) writer.writerow(report.headers) rows = report.rows row_count = len(rows) writer.writerows(rows) print('Wrote {} lines of {}'.format(row_count, slug)) attachment = { 'title': report.filename, 'mimetype': 'text/csv', 'file_obj': csv_file, } return (attachment, row_count)
def handle(self, infile, outfile, *args, **options): self.case_accessor = CaseAccessors('icds-cas') with open(infile, 'r', encoding='utf-8') as old, open(outfile, 'w', encoding='utf-8') as new: reader = csv.reader(old) writer = csv.writer(new) headers = next(reader) writer.writerow(headers) for row in reader: case_id = row[4] hh_id = row[10] if hh_id: person, hh = self.case_accessor.get_cases([case_id, hh_id], ordered=True) else: person = self.case_accessor.get_case(case_id) hh = None if hh: row[18] = hh.get_case_property('name') row[19] = hh.get_case_property('hh_num') row[20] = person.get_case_property('name') writer.writerow(row)
def write_table(self, table): if self.archive is None: raise Exception('Attempt to write to a closed CsvWriter') def _encode_row(row): return [ val.encode('utf-8') if isinstance(val, bytes) else val for val in row ] tempfile = io.StringIO() writer = csv.writer(tempfile, dialect=csv.excel) writer.writerow(_encode_row(table.headings)) for row in table.rows: writer.writerow(_encode_row(row)) # TODO: make this a polite zip and put everything in a subfolder with the same basename # as the zipfile self.archive.writestr('%s.csv' % self.zip_safe_name(table.name), tempfile.getvalue().encode('utf-8'))
def _gir_csv_response(month, year): query_month = "{year}-{month}-01".format(year=year, month=month) prev_month_year, prev_month = add_months(year, month, -1) prev_month_string = "{year}-{month}-01".format(year=prev_month_year, month=prev_month) two_ago_year, two_ago_month = add_months(year, month, -2) two_ago_string = "{year}-{month}-01".format(year=two_ago_year, month=two_ago_month) if not GIRRow.objects.filter(month=query_month).exists(): return HttpResponse('Sorry, that month is not yet available') queryset = GIRRow.objects.filter(month__in=[query_month, prev_month_string, two_ago_string]).order_by('-month') domain_months = defaultdict(list) for item in queryset: domain_months[item.domain_name].append(item) field_names = GIR_FIELDS response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename=gir.csv' writer = csv.writer(response) writer.writerow(list(field_names)) for months in domain_months.values(): writer.writerow(months[0].export_row(months[1:])) return response
def write_table(self, table): if self.archive is None: raise Exception('Attempt to write to a closed CsvWriter') def _encode_row(row): return [ val.encode('utf-8') if isinstance(val, bytes) else val for val in row ] tempfile = io.StringIO() writer = csv.writer(tempfile, dialect=csv.excel) writer.writerow(_encode_row(table['headings'])) for row in table['rows']: writer.writerow(_encode_row(row)) # TODO: make this a polite zip and put everything in a subfolder with the same basename # as the zipfile self.archive.writestr('%s.csv' % self.zip_safe_name(table['name']), tempfile.getvalue().encode('utf-8'))
def handle(self, filename, *args, **kwargs): # Doesn't work since this queries from Couch # Todo: Migrate to SQL raise CommandError("This doesn't work since the synclogs are now migrated to SQL") database = SyncLog.get_db() all_sync_log_ids = [ row['id'] for row in database.view('phone/sync_logs_by_user', reduce=False, include_docs=False) ] total_count = len(all_sync_log_ids) headers = [ 'date', 'user', 'cases', 'dependent cases', 'total cases', 'initial', 'duration', 'duration per case (ms/case)', ] with open(filename, 'wb') as f: writer = csv.writer(f, dialect=csv.excel) writer.writerow( headers ) for i, sync_log_dict in enumerate(iter_docs(database, all_sync_log_ids, 500)): duration = sync_log_dict.get('duration') cases = len(sync_log_dict.get('cases_on_phone', [])) dependent_cases = len(sync_log_dict.get('dependent_cases_on_phone', [])) total_cases = cases + dependent_cases if duration and total_cases: average_time = float(duration) * 1000 / float(total_cases) writer.writerow([ (sync_log_dict.get('date') or '1980-01-01')[:10], # strip times off of the dates sync_log_dict.get('user_id'), cases, dependent_cases, cases + dependent_cases, bool(sync_log_dict.get('previous_log_id')), duration, '{0:.2f}'.format(average_time) ]) if i % 500 == 0: print('processed {}/{} logs'.format(i, total_count))
def handle(self, child_file, **options): relevant_districts = SQLLocation.objects.filter( domain='icds-cas', location_id__in=[ 'd982a6fb4cca0824fbde59db18d2d422', '0ffe4a1f110ffc17bb9b749abdfd697c' ]) owners = SQLLocation.objects.get_queryset_descendants( relevant_districts, include_self=True) owner_name_mapping = {loc.location_id: loc.name for loc in owners} hh_cases = self._get_closed_hh_cases(list(owner_name_mapping)) with open(child_file, 'w', encoding='utf-8') as child_csv: child_writer = csv.writer(child_csv) child_writer.writerow(CSV_HEADERS) for cases in chunked(with_progress_bar(hh_cases, hh_cases.count), 500): household_ids = [] hh_map = {} for hh in cases: hh_map[hh['case_id']] = (hh['name'].encode('utf-8'), hh.get('closed_on', '').encode('utf-8')) household_ids.append(hh['case_id']) child_cases = self._get_child_cases(household_ids) ids = set(household_ids) for child in child_cases.hits: parent_index = filter( lambda index: index['referenced_id'] in ids and index[ 'identifier'] == 'parent', child['indices'])[0] if parent_index: hh_id = parent_index['referenced_id'] row = [ child.get(prop, '').encode('utf-8') for prop in CHILD_PROPERTIES ] row.append( owner_name_mapping.get(child.get('owner_id', ''), '').encode('utf-8')) hh_info = (hh_id, hh_map[hh_id][0], hh_map[hh_id][1]) row.extend(hh_info) child_writer.writerow(row)
def handle(self, **kwargs): domains_by_module = defaultdict(list) for domain, module in settings.DOMAIN_MODULE_MAP.items(): domains_by_module[module].append(domain) with open("custom-modules.csv", "w") as f: writer = csv.writer(f) writer.writerow([ 'module', 'path', 'domains', 'domains exist', 'plans', 'in DOMAIN_MODULE_MAP', 'likely removable', ]) visited_paths = set() for module, domains in domains_by_module.items(): try: path = import_module(module).__path__[0] except ImportError: path = "PATH NOT FOUND" visited_paths.add(path) writer.writerow( self.log_module_info(module, path, domains, in_module_map=True)) for app_config in apps.get_app_configs(): if (app_config.path.startswith(settings.FILEPATH + "/custom") and app_config.path not in visited_paths): # Just check and see if the label corresponds to a domain writer.writerow( self.log_module_info(app_config.label, app_config.path, [app_config.label], in_module_map=False))
def _send_data_validation_email(csv_columns, month, bad_data): # intentionally using length here because the query will need to evaluate anyway to send the CSV file if all(len(v) == 0 for _, v in six.iteritems(bad_data)): return bad_wasting_awcs = bad_data.get('bad_wasting_awcs', []) bad_stunting_awcs = bad_data.get('bad_stunting_awcs', []) bad_underweight_awcs = bad_data.get('bad_underweight_awcs', []) bad_lbw_awcs = bad_data.get('bad_lbw_awcs', []) csv_file = io.StringIO() writer = csv.writer(csv_file) writer.writerow(('type',) + csv_columns) _icds_add_awcs_to_file(writer, 'wasting', bad_wasting_awcs) _icds_add_awcs_to_file(writer, 'stunting', bad_stunting_awcs) _icds_add_awcs_to_file(writer, 'underweight', bad_underweight_awcs) _icds_add_awcs_to_file(writer, 'low_birth_weight', bad_lbw_awcs) email_content = """ Incorrect wasting AWCs: {bad_wasting_awcs} Incorrect stunting AWCs: {bad_stunting_awcs} Incorrect underweight AWCs: {bad_underweight_awcs} Incorrect low birth weight AWCs: {bad_lbw_awcs} Please see attached file for more details """.format( bad_wasting_awcs=len(bad_wasting_awcs), bad_stunting_awcs=len(bad_stunting_awcs), bad_underweight_awcs=len(bad_underweight_awcs), bad_lbw_awcs=len(bad_lbw_awcs), ) filename = month.strftime('validation_results_%s.csv' % SERVER_DATE_FORMAT) send_HTML_email( '[{}] - ICDS Dashboard Validation Results'.format(settings.SERVER_ENVIRONMENT), DASHBOARD_TEAM_EMAILS, email_content, file_attachments=[{'file_obj': csv_file, 'title': filename, 'mimetype': 'text/csv'}], )
def export_as_csv(modeladmin, request, queryset): """ Generic csv export admin action. based on http://djangosnippets.org/snippets/1697/ """ opts = modeladmin.model._meta field_names = set(field.name for field in opts.fields) if fields: fieldset = set(fields) field_names = field_names & fieldset elif exclude: excludeset = set(exclude) field_names = field_names - excludeset response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename=%s.csv' % six.text_type(opts).replace('.', '_') writer = csv.writer(response) if header: writer.writerow(list(field_names)) for obj in queryset: writer.writerow([getattr(obj, field) for field in field_names]) return response
def send_prepaid_credits_export(): if settings.ENTERPRISE_MODE: return headers = [ 'Account Name', 'Project Space', 'Edition', 'Start Date', 'End Date', '# General Credits', '# Product Credits', '# User Credits', '# SMS Credits', 'Last Date Modified' ] body = [] for subscription in Subscription.visible_objects.filter( service_type=SubscriptionType.PRODUCT, ).order_by('subscriber__domain', 'id'): general_credit_lines = CreditLine.get_credits_by_subscription_and_features(subscription) product_credit_lines = CreditLine.get_credits_by_subscription_and_features(subscription, is_product=True) user_credit_lines = CreditLine.get_credits_by_subscription_and_features( subscription, feature_type=FeatureType.USER) sms_credit_lines = CreditLine.get_credits_by_subscription_and_features( subscription, feature_type=FeatureType.SMS) all_credit_lines = general_credit_lines | product_credit_lines | user_credit_lines | sms_credit_lines body.append([ subscription.account.name, subscription.subscriber.domain, subscription.plan_version.plan.edition, subscription.date_start, subscription.date_end, sum(credit_line.balance for credit_line in general_credit_lines), sum(credit_line.balance for credit_line in product_credit_lines), sum(credit_line.balance for credit_line in user_credit_lines), sum(credit_line.balance for credit_line in sms_credit_lines), max( credit_line.last_modified for credit_line in all_credit_lines ).strftime(SERVER_DATETIME_FORMAT_NO_SEC) if all_credit_lines else 'N/A', ]) for account in BillingAccount.objects.order_by('name', 'id'): general_credit_lines = CreditLine.get_credits_for_account(account) product_credit_lines = CreditLine.get_credits_for_account(account, is_product=True) user_credit_lines = CreditLine.get_credits_for_account(account, feature_type=FeatureType.USER) sms_credit_lines = CreditLine.get_credits_for_account(account, feature_type=FeatureType.SMS) all_credit_lines = general_credit_lines | product_credit_lines | user_credit_lines | sms_credit_lines body.append([ account.name, '', '', '', '', sum(credit_line.balance for credit_line in general_credit_lines), sum(credit_line.balance for credit_line in product_credit_lines), sum(credit_line.balance for credit_line in user_credit_lines), sum(credit_line.balance for credit_line in sms_credit_lines), max( credit_line.last_modified for credit_line in all_credit_lines ).strftime(SERVER_DATETIME_FORMAT_NO_SEC) if all_credit_lines else 'N/A', ]) file_obj = io.StringIO() writer = csv.writer(file_obj) writer.writerow(headers) for row in body: writer.writerow([ val if isinstance(val, six.text_type) else bytes(val) for val in row ]) date_string = datetime.datetime.utcnow().strftime(SERVER_DATE_FORMAT) filename = 'prepaid-credits-export_%s_%s.csv' % (settings.SERVER_ENVIRONMENT, date_string) send_HTML_email( '[%s] Prepaid Credits Export - %s' % (settings.SERVER_ENVIRONMENT, date_string), settings.ACCOUNTS_EMAIL, 'See attached file.', file_attachments=[{'file_obj': file_obj, 'title': filename, 'mimetype': 'text/csv'}], )
def handle(self, domain, repeater_id, *args, **options): sleep_time = options.get('sleep_time') include_regexps = options.get('include_regexps') exclude_regexps = options.get('exclude_regexps') verbose = options.get('verbose') action = options.get('action') success_message = options.get('success_message') response_status = options.get('response_status') repeater = Repeater.get(repeater_id) print("Looking up repeat records for '{}'".format(repeater.friendly_name)) def meets_filter(record): if exclude_regexps: # Match none of the exclude expressions if record.failure_reason: if any(re.search(exclude_regex, record.failure_reason) for exclude_regex in exclude_regexps): return False if include_regexps: # Match any of the include expressions if not record.failure_reason: return False return any(re.search(include_regex, record.failure_reason) for include_regex in include_regexps) return True # No filter applied records = list(filter( meets_filter, iter_repeat_records_by_domain(domain, repeater_id=repeater_id, state=RECORD_CANCELLED_STATE) )) if verbose: for record in records: print(record.payload_id, record.failure_reason) total_records = len(records) print("Found {} matching records. {} them?".format(total_records, action)) if not input("(y/n)") == 'y': print("Aborting") return filename = "{}_{}_records-{}.csv".format( action, repeater.__class__.__name__, datetime.datetime.utcnow().strftime('%Y-%m-%d_%H.%M.%S')) with open(filename, 'w', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(('record_id', 'payload_id', 'state', 'message')) for i, record in enumerate(records): try: if action == 'retrigger': if record.next_check is None: record.next_check = datetime.datetime.utcnow() record.fire(force_send=True) elif action == 'succeed': self._succeed_record(record, success_message, response_status) except Exception as e: print("{}/{}: {} {}".format(i + 1, total_records, 'EXCEPTION', repr(e))) writer.writerow((record._id, record.payload_id, record.state, repr(e))) else: print("{}/{}: {}, {}".format(i + 1, total_records, record.state, record.attempts[-1].message)) writer.writerow((record._id, record.payload_id, record.state, record.attempts[-1].message)) if sleep_time: time.sleep(float(sleep_time)) print("Wrote log of changes to {}".format(filename))
def handle(self, **options): domain = options['domain'] debug = options['debug'] cleanup = options['cleanup'] domain_query = CaseES().domain(domain) valid_case_ids = set(domain_query.get_ids()) referenced_case_ids = { index['referenced_id'] for hit in domain_query.source('indices.referenced_id').run().hits for index in hit['indices'] } invalid_referenced_ids = referenced_case_ids - valid_case_ids if len(invalid_referenced_ids) > ES_MAX_CLAUSE_COUNT: print("there's a lot of invalid ids here. ES queries may not handle this well") cases_with_invalid_references = ( domain_query .term('indices.referenced_id', invalid_referenced_ids) .source(['_id', 'type', 'indices', 'owner_id', 'opened_by', 'xform_ids']) .run().hits ) with open(options['filename'], 'w', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) headers = [ 'case id', 'case type', 'creating form id', 'referenced id', 'referenced_type', 'index relationship', 'index identifier', 'owner id', 'owner name', 'opened by id', 'opened by name', ] if debug: headers.append('app version') writer.writerow(headers) for case in cases_with_invalid_references: for index in case['indices']: if index['referenced_id'] in invalid_referenced_ids: form_id = case['xform_ids'][0] row = [ case['_id'], case['type'], form_id, index['referenced_id'], index['referenced_type'], index['relationship'], index['identifier'], case['owner_id'], cached_owner_id_to_display(case['owner_id']), case['opened_by'], cached_owner_id_to_display(case['opened_by']), ] if debug: form = FormAccessors(domain=domain).get_form(form_id) app_version_info = get_app_version_info( domain, form.build_id, form.form_data['@version'], form.metadata, ) row.append(app_version_info.build_version) writer.writerow(row) if cleanup: missing = set() deleted = set() exists = set() for invalid_id in invalid_referenced_ids: try: case = CaseAccessors(domain).get_case(invalid_id) except CaseNotFound: missing.add(invalid_id) else: if case.is_deleted: deleted.add(case) else: exists.add(case) for case_to_resync in exists: # if the case actually exists resync it to fix the es search resave_case(domain, case_to_resync, send_post_save_signal=False) if exists: print('resynced {} cases that were actually not deleted'.format(len(exists))) for case in deleted: # delete the deleted case's entire network in one go call_command('delete_related_cases', domain, case.case_id) for case in cases_with_invalid_references: for index in case['indices']: if index['referenced_id'] in missing: # this is just an invalid reference. no recourse but to delete the case itself call_command('delete_related_cases', domain, case['_id'])