def find_all_matching_officers(min_percentage=0.75): notable_officers = set() officer_fingerprints = get_all_officer_fingerprints() sys.stderr.write("\nFinding matches...\n") writer = DictWriter(sys.stdout, [ 'Full Name (from persons)', 'officer_id (from npo_officers)' ]) writer.writeheader() for i, data in enumerate(gdocs_persons()): fingerprint = make_fingerprint(data['Full Name']) matching_ids = find_matching_officers( fingerprint, officer_fingerprints, excluded_ids=notable_officers, min_percentage=min_percentage ) for officer_id in matching_ids: writer.writerow({ 'Full Name (from persons)': data['Full Name'], 'officer_id (from npo_officers)': officer_id, }) notable_officers.update(matching_ids) sys.stderr.write("\r%d" % (i + 1)) sys.stderr.flush() sys.stderr.write("\nDone\n")
def export_aliases(project, fh): """ Dump a list of all entity names to a CSV file. The table will contain the active name of each entity, and one of the other existing names as an alias. """ writer = DictWriter(fh, ['entity_id', 'alias', 'canonical']) writer.writeheader() alias = aliased(EntityProperty) canonical = aliased(EntityProperty) q = db.session.query(alias.value_string.label('alias'), alias.entity_id) q = q.join(Entity) q = q.join(canonical) q = q.filter(Entity.project_id==project.id) q = q.filter(alias.entity_id!=None) q = q.filter(alias.name=='name') q = q.filter(canonical.name=='name') q = q.filter(canonical.active==True) q = q.add_columns(canonical.value_string.label('canonical')) for row in q.all(): #if row.alias == row.canonical: # continue writer.writerow({ 'entity_id': str(row.entity_id), 'alias': row.alias, 'canonical': row.canonical })
def studentcsv(): f = StringIO() writer = DictWriter(f,STUDENT_FIELDS) writer.writerow( dict( (x,x) for x in STUDENT_FIELDS) ) for student in models.Student.query.all(): if student.project_id: try: project = student.project record = {STUDENT_FIELDS[0]: student.project_id, STUDENT_FIELDS[1]: student.project.category.name, STUDENT_FIELDS[2]: fx(student.firstname), STUDENT_FIELDS[3]: fx(student.lastname), STUDENT_FIELDS[4]: student.grade, STUDENT_FIELDS[5]: student.gender, STUDENT_FIELDS[6]: student.school.name, STUDENT_FIELDS[7]: str(project.individual), STUDENT_FIELDS[8]: str(project.forms.first().vafa or project.forms.first().vafb), STUDENT_FIELDS[9]: str(project.forms.first().hsf), STUDENT_FIELDS[10]: str(project.forms.first().phbaf)} except AttributeError as error: app.logger.error('ProjID:%s - ID:%s - %s %s\n%s\n%s' % (student.id, student.project.id, student.firstname, student.lastname,vars(student.project), error)) if record: try: writer.writerow(record) except UnicodeEncodeError: app.logger.error("Unicode Error:\n%s"%record) return f.getvalue()
def handle(self, *args, **options): if len(args) != 2: raise CommandError( 'usage: python manage.py dump_results ' '<template_file_path> ' '<results_csv_file_path>' ) # Get paths from args, and normalize them to absolute paths: template_file_path, results_csv_file_path = map(os.path.abspath, args) try: template = HitTemplate.objects.get(name=template_file_path) except ObjectDoesNotExist: sys.exit('There is no matching <template_file_path>.') completed_hits = template.hit_set.filter(completed=True) if not completed_hits.exists(): sys.exit('There are no completed HITs.') fieldnames, rows = results_data(completed_hits) with open(results_csv_file_path, 'wb') as fh: writer = DictWriter(fh, fieldnames) writer.writeheader() for row in rows: writer.writerow(row)
def write(self, file_name, row): with self.lock: if file_name not in self.fhs: self.fhs[file_name] = open(make_path(file_name), 'wb') dw = DictWriter(self.fhs[file_name], row.keys()) self.writers[file_name] = dw dw.writeheader() self.writers[file_name].writerow(row)
def open_spider(self, spider): self.links_file = open('links.csv', 'wb') self.results_file = open('results.csv', 'wb') self.links_writer = DictWriter(self.links_file, ['source', 'destination']) self.results_writer = DictWriter(self.results_file, ['url', 'status', 'next']) self.links_writer.writeheader() self.results_writer.writeheader()
def export_aliases(project, path): """ Dump a list of all entity names to a CSV file. The table will contain the active name of each entity, and one of the other existing names as an alias. """ with open(path, 'w') as fh: writer = DictWriter(fh, ['entity_id', 'alias', 'canonical', 'schemata']) writer.writeheader() q = Entity.all().filter_by(same_as=None) q = q.filter(Entity.project==project) for i, entity in enumerate(q): export_entity(entity, writer) if i % 100 == 0: log.info("Dumped %s entity names...", i)
def export_aliases(project, path): """ Dump a list of all entity names to a CSV file. The table will contain the active name of each entity, and one of the other existing names as an alias. """ with open(path, 'w') as fh: writer = DictWriter(fh, ['entity_id', 'alias', 'canonical', 'schemata']) writer.writeheader() q = Entity.all().filter_by(same_as=None) q = q.filter(Entity.project == project) for i, entity in enumerate(q): export_entity(entity, writer) if i % 100 == 0: log.info("Dumped %s entity names...", i)
def load_data(self, options): save_data = self.settings.get("__save_data__", False) if save_data: options['full_record'] = True try: os.makedirs("./saved_data") LOG.info("Saving data to %s.", os.path.abspath("./saved_data")) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir("./saved_data"): pass else: raise if self.settings['protocol_version'] == '2': if self.settings['group_dn']: users = self.query_group(options) else: users = self.query_objects(options) else: if self.settings['group_dn']: users = self.query_group_paged(options) else: users = self.query_objects_paged(options) if save_data: data = [] keys = set() for user in users: # Note: Not all user dicts contain all the fields. So, need to loop over # all the users to make sure we don't miss any fields. keys.update(user.keys()) data.append(user) used_keys = set(self.ldap_query_fields) unused_keys = set(keys) - used_keys if unused_keys: keys = sorted(used_keys) + ['unmapped ->' ] + sorted(unused_keys) else: keys = sorted(used_keys) with open('./saved_data/ldap.csv', 'w') as save_file: writer = DictUnicodeWriter(save_file, keys) writer.writeheader() writer.writerows(data) users = data for user in users: yield user
def write_csv(self, outputdir, timestamp, items=None): path = os.path.join(outputdir, self.filename('csv', timestamp, **self.filter_kwargs)) if items is None: items = self.get_items() with open(path, 'w') as csvfile: writer = DictWriter(csvfile, self.get_fields()) writer.writeheader() for row in items: writer.writerow(row) return self
def tocsv(): f = StringIO() writer = DictWriter(f,CSV_FIELDS) writer.writerow( dict( (x,x) for x in CSV_FIELDS) ) districts = models.District.query.order_by('name').all() for district in districts: schools = district.schools.order_by('name').all() for school in schools: students = school.students.join(models.Project).order_by('title') students = students.filter(models.Student.team_leader==True).all() for student in students: try: record = {CSV_FIELDS[0]: student.project.id, CSV_FIELDS[1]: fx("%s %s"%(student.firstname, student.lastname)), CSV_FIELDS[4]: fx(student.project.title), CSV_FIELDS[5]: fx(student.project.category.name), CSV_FIELDS[6]: fx(student.project.division), CSV_FIELDS[7]: student.school.name, CSV_FIELDS[8]: student.school.district.name, CSV_FIELDS[9]: fx("%s %s"%(student.sponsor.firstname, student.sponsor.lastname)), CSV_FIELDS[10]: student.project.forms_submitted, CSV_FIELDS[11]: student.project.notes, } team = student.project.student team = team.filter(models.Student.team_leader==False).limit(2) team = team.all() i = 2 for student in team: record[CSV_FIELDS[i]]= fx("%s %s"%(student.firstname, student.lastname)) i += 1 except AttributeError as error: app.logger.error('ProjID:%s - ID:%s - %s %s\n%s\n%s' % (student.id, student.project.id, student.firstname, student.lastname,vars(student.project), error)) try: writer.writerow(record) except UnicodeEncodeError: app.logger.error("Unicode Error:\n%s"%record) return f.getvalue()
def writetocsv(xmlfile=None, month=None, year=None, outfile='../data/data.csv'): if xmlfile is None: raise Exception, "No XML file passed" if month is None: raise Exception, "No month passed" if year is None: raise Exception, "No year passed" xmldata = etree.parse(xmlfile) csvwriter = None csvfile = open('../data/data.csv', 'a') for incident in xmldata.iter('DATA'): data = {'month': month, 'year': year} for field in incident.iterchildren(): data[field.tag] = field.text if not csvwriter: csvwriter = DictWriter(csvfile, fieldnames=data.keys()) csvwriter.writeheader() csvwriter.writerow(data) csvfile.close()
def edr_export(self, request): data = [] for rec_id in request.POST.getlist("iswear"): meta_id = request.POST.get("company_%s_id" % rec_id) res = EDRPOU.get(id=meta_id) if res: rec = res.to_dict() if isinstance(rec.get("founders"), list): rec["founders"] = ";;;".join(rec["founders"]) data.append(rec) if not data: self.message_user(request, "Нічого експортувати") return redirect(reverse("admin:edr_search")) fp = StringIO() w = DictWriter(fp, fieldnames=data[0].keys()) w.writeheader() w.writerows(data) payload = fp.getvalue() fp.close() response = HttpResponse(payload, content_type="text/csv") response[ "Content-Disposition"] = "attachment; filename=edr_{:%Y%m%d_%H%M}.csv".format( datetime.datetime.now()) response["Content-Length"] = len(response.content) return response
def export_aliases(project, fh): """ Dump a list of all entity names to a CSV file. The table will contain the active name of each entity, and one of the other existing names as an alias. """ writer = DictWriter(fh, ['entity_id', 'schema', 'alias', 'canonical']) writer.writeheader() alias = aliased(Property) canonical = aliased(Property) schema = aliased(Schema) q = db.session.query(alias.value_string.label('alias'), alias.entity_id) q = q.join(Entity) q = q.join(schema) q = q.join(canonical) q = q.filter(Entity.project_id == project.id) q = q.filter(alias.entity_id != None) # noqa q = q.filter(alias.name == 'name') q = q.filter(canonical.name == 'name') q = q.filter(canonical.active == True) # noqa q = q.add_columns(canonical.value_string.label('canonical')) q = q.add_columns(schema.name.label('schema')) for row in q.all(): writer.writerow({ 'entity_id': str(row.entity_id), 'schema': row.schema, 'alias': row.alias, 'canonical': row.canonical })
def main(): prs = argparse.ArgumentParser() prs.add_argument('--count', type=int, default=100) prs.add_argument('file', type=file) args = prs.parse_args() count = args.count assert count > 0 path = os.path.abspath(args.file.name) root, ext = os.path.splitext(path) new_path = '%s_trimmed_%s%s' % (root, count, ext) reader = DictReader(open(path)) new_entries = [] for i in range(count): new_entries.append(next(reader)) with open(new_path, 'w') as new_file: writer = DictWriter(new_file, reader.unicode_fieldnames) writer.writeheader() writer.writerows(new_entries) print open(new_path).read()
def from_files(): basedir = "/Users/rikhoekstra/surfdrive/Shared/Documents/NIOD2017/International_MIgration" toread = [fl for fl in os.listdir(basedir)] result = [] for fl in toread: infl = open(os.path.join(basedir, fl), 'rU') txt = infl.read() recs = txt.split("\n\n")[1:] for r in recs: rec = r.split('\n') res = {} for l in rec: item = l.split(' - ') # print item # for item in splitted: # import pdb; pdb.set_trace() if len(item) > 1 and item[0].strip() in [ 'AU', 'TI', 'PY', 'JO' ]: res[item[0].strip()] = item[1].strip() result.append(res) flout = open('wileyrecs.csv', 'w') w = DictWriter(flout, ['AU', 'TI', 'PY', 'JO']) w.writeheader() w.writerows(result) flout.close() print('written: ', flout.name) return result
def find_all_matching_officers(min_percentage=0.75): notable_officers = set() officer_fingerprints = get_all_officer_fingerprints() sys.stderr.write("\nFinding matches...\n") writer = DictWriter( sys.stdout, ['Full Name (from persons)', 'officer_id (from npo_officers)']) writer.writeheader() for i, data in enumerate(gdocs_persons()): fingerprint = make_fingerprint(data['Full Name']) matching_ids = find_matching_officers(fingerprint, officer_fingerprints, excluded_ids=notable_officers, min_percentage=min_percentage) for officer_id in matching_ids: writer.writerow({ 'Full Name (from persons)': data['Full Name'], 'officer_id (from npo_officers)': officer_id, }) notable_officers.update(matching_ids) sys.stderr.write("\r%d" % (i + 1)) sys.stderr.flush() sys.stderr.write("\nDone\n")
def render_non_html(encoding, querySet): '''Renders non-html formats and returns an appropriate HttpResponse''' if encoding == 'csv': vals = querySet.values() response = HttpResponse(mimetype='text/csv') response['Content-Disposition'] = \ 'attachment; filename=genesets%d.csv' % len(vals) csvW = DictWriter(response, GenesetFieldNames) fieldsDict = {} for k in GenesetFieldNames: fieldsDict[k] = k csvW.writerow(fieldsDict) csvW.writerows(vals) elif encoding == 'xml': response = HttpResponse(mimetype='text/xml') response['Content-Disposition'] = \ 'attachment; filename=genesets%d.xml' % len(querySet) serializers.serialize("xml", querySet, stream=response) elif encoding == "json": response = HttpResponse(mimetype='application/json') response['Content-Disposition'] = \ 'attachment; filename=genesets%d.js' % len(querySet) serializers.serialize("json", querySet, stream=response) return response
def make_csv(): data_files = os.listdir("scraped_data") fieldnames_by_locality = get_all_variables_by_locality() for zone_type in ["city", "department", "epci", "region"]: print "Make %s csv..." % zone_type locality_data_files = [data_file for data_file in data_files if zone_type in data_file] variables_mapping = { "name": u"nom", "year": u"année", "zone_type": u"type de zone administrative", "population": u"population", "insee_code": u"cog (code officiel géographique)", "url": u"url", } fieldnames = ["year", "zone_type", "name", "population", "insee_code", "url"] + sorted( fieldnames_by_locality[zone_type].keys() ) variables_mapping.update(fieldnames_by_locality[zone_type]) if zone_type == "epci": fieldnames.append("siren") with open(os.path.join("nosdonnees", zone_type + "_all.csv"), "w") as output: csv_output = DictWriter(output, fieldnames=fieldnames, encoding="utf-8") csv_output.writerow(variables_mapping) for locality_data_file in locality_data_files: with codecs.open(os.path.join("scraped_data", locality_data_file), encoding="utf-8") as input: for line in input: data = json.loads(line, encoding="utf-8")["data"] csv_output.writerow(data)
def make_csv(): data_files = os.listdir('scraped_data') fieldnames_by_locality = get_all_variables_by_locality() for zone_type in ['city', 'epci', 'department', 'region']: print "Make %s csv..." % zone_type locality_data_files = [data_file for data_file in data_files if zone_type in data_file] variables_mapping = { 'name': u'nom', 'year': u'année', 'zone_type': u'type de zone administrative', 'population': u'population', 'insee_code': u'cog (code officiel géographique)', 'url': u'url' } fieldnames = ['year', 'zone_type', 'name', 'population', 'insee_code', 'url'] \ + sorted(fieldnames_by_locality[zone_type].keys()) variables_mapping.update(fieldnames_by_locality[zone_type]) if zone_type == 'epci': fieldnames.append('siren') with open(os.path.join('nosdonnees', zone_type + '_all.csv'), 'w') as output: csv_output = DictWriter(output, fieldnames=fieldnames, encoding='utf-8') csv_output.writerow(variables_mapping) for locality_data_file in locality_data_files: with codecs.open(os.path.join('scraped_data', locality_data_file), encoding='utf-8') as input: for line in input: data = json.loads(line, encoding='utf-8')['data'] csv_output.writerow(data)
def mappings_export(file, decided): """Export mappings to a CSV file.""" writer = DictWriter(file, fieldnames=['left', 'right', 'judgement']) writer.writeheader() for mapping in Mapping.find_by_decision(decided): writer.writerow({ 'left': mapping.left_uid, 'right': mapping.right_uid, 'judgement': mapping.judgement })
def write_csv(self, outputdir, timestamp, items=None): path = os.path.join( outputdir, self.filename('csv', timestamp, **self.filter_kwargs)) if items is None: items = self.get_items() with open(path, 'w') as csvfile: writer = DictWriter(csvfile, self.get_fields()) writer.writeheader() for row in items: writer.writerow(row) return self
def export_csv_table(archive, model, name): file_path = os.path.join(_make_export_path(), '%s.csv' % name) log.info("Exporting CSV to %s...", file_path) writer = None with open(file_path, 'w') as fh: for obj in session.query(model): row = obj.to_row() if writer is None: writer = DictWriter(fh, row.keys()) writer.writeheader() writer.writerow(row) url = archive.upload_file(file_path, mime_type='text/csv') if url is not None: os.unlink(file_path)
class SplitCSVPipeline(object): def open_spider(self, spider): self.links_file = open('links.csv', 'wb') self.results_file = open('results.csv', 'wb') self.links_writer = DictWriter(self.links_file, ['source', 'destination']) self.results_writer = DictWriter(self.results_file, ['url', 'status', 'next']) self.links_writer.writeheader() self.results_writer.writeheader() def close_spider(self, spider): self.results_file.close() self.links_file.close() def process_item(self, item, spider): if isinstance(item, Link): self.links_writer.writerow(item) if isinstance(item, Result): self.results_writer.writerow(item) return item
def stream(): buffer_ = StringIO() writer = DictWriter(buffer_, header_dict.keys(), delimiter=",", quoting=csv.QUOTE_MINIMAL) # Write Header Row data = read_and_flush(writer, buffer_, header_dict) yield data count = 0 # Write CSV for row in scanResponse: count += 1 rows_data = { key: text_type(value) for key, value in row['_source'].items() if key in header_dict.keys() } data = read_and_flush(writer, buffer_, rows_data) yield data
def convert_file(file_path): result_path = file_path.replace('/wdvc16', '/processed_wdvc16').replace('.xml', '.csv') print 'writing to %s...' % result_path xml_pages = stream_pages(file_path) fieldnames = [u'revision_id', u'revisions_in_group', u'revision_comment', u'revision_timestamp', u'page_id', u'page_group', u'page_ns', u'page_title', u'anonimous_ip', u'user_id', u'username'] with open(result_path, 'w') as csv_file: writer = DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for xml_page in tqdm(xml_pages): pages = parse_page(xml_page) for page in pages: writer.writerow(page)
def make_csv(): data_files = os.listdir('scraped_data') fieldnames_by_locality = get_all_variables_by_locality() for zone_type in ['epci']: print "Make %s csv..." % zone_type locality_data_files = [ data_file for data_file in data_files if zone_type in data_file ] variables_mapping = { 'name': u'nom', 'year': u'année', 'zone_type': u'type de zone administrative', 'population': u'population', 'insee_code': u'cog (code officiel géographique)', 'url': u'url' } fieldnames = ['year', 'zone_type', 'name', 'population', 'insee_code', 'url'] \ + sorted(fieldnames_by_locality[zone_type].keys()) variables_mapping.update(fieldnames_by_locality[zone_type]) if zone_type == 'epci': fieldnames.append('siren') with open(os.path.join('nosdonnees', zone_type + '_all.csv'), 'w') as output: csv_output = DictWriter(output, fieldnames=fieldnames, encoding='utf-8') csv_output.writerow(variables_mapping) for locality_data_file in locality_data_files: with codecs.open(os.path.join('scraped_data', locality_data_file), encoding='utf-8') as input: for line in input: data = json.loads(line, encoding='utf-8')['data'] csv_output.writerow(data)
def report(self): trello_members, google_members, board_members = self.auditor.get_members( ) all_members = set() all_members.update(trello_members) all_members.update(google_members) for members in board_members.values(): all_members.update(members) with open(self.filename, 'wb+') as fp: csv = DictWriter(fp, ['name'] + list(all_members)) csv.writeheader() board_members['google'] = google_members board_members['trello'] = trello_members for board, members in board_members.items(): row = {member: (member in members) for member in all_members} row['name'] = board csv.writerow(row)
def parse_file(xml_file): print 'converting %s to csv' % xml_file # csv file name new_file_path = xml_file.replace('wdvc16', 'converted_wdvc16').replace('.xml', '.csv') print 'writing to %s' % new_file_path # page by page generator of the xml file xml_file_by_pages = page_stream_generator(xml_file) # columns columns = [u'page_title', u'page_ns', u'page_id', u'revision_id', u'revision_timestamp', u'revision_comment', u'revision_model', u'revision_format', u'revision_count', u'username', u'user_id', u'ip_address'] with open(new_file_path, 'w') as csv_file: writer = DictWriter(csv_file, fieldnames=columns) writer.writeheader() for xml_page in xml_file_by_pages: revisions_in_page = parse_page(xml_page) for page in revisions_in_page: writer.writerow(page)
page('#parent-fieldname-description').text(), page('#parent-fieldname-text').text(), page('a.email').text(), page('#parent-fieldname-contactPhone').text(), page('a.email').attr('href').replace('mailto:', ''), page('#parent-fieldname-eventUrl').attr('href') or '' ))), ('meta_submitter_email', '*****@*****.**'), ))) with open("events.p", "wb") as dumpfile: pickle.dump(events, dumpfile) # Write output with open('output.csv', 'w') as csvfile: writer = DictWriter(csvfile, fieldnames=events[0].keys()) writer.writeheader() for event in events: writer.writerow(event) # Submit events for event in events: util.submit_event( email=event['meta_submitter_email'], title=event['title'], description=event['content_description'], location=event['location'], start_date=event['start'].split('T')[0], start_time=event['start'].split('T')[1][:5], end_time=event['end'].split('T')[1][:5], base_url=base_url
def write_csv(fieldnames, rows, path): with open(path, 'wb') as fh: writer = DictWriter(fh, fieldnames) writer.writeheader() for row in rows: writer.writerow(row)
res = [] for p2c in Person2Company.objects.filter(to_company_id=63).prefetch_related("from_person"): for d in Declaration.objects.filter(nacp_declaration=True, person=p2c.from_person, confirmed="a").order_by("year"): res.append({ "name": p2c.from_person.full_name, "year": d.year, "id": d.declaration_id.replace("nacp_", "", 1) }) with open("/tmp/mp_decls.csv", "w") as fp: from unicodecsv import DictWriter w = DictWriter(fp, fieldnames=res[0].keys()) w.writerows(res)
# print(h.HITId) # currhits[h.HITId] = h # print('{}: {}'.format(len(currhits), currhits)) # # get_all_hits iterates through all your current HITs, grabbing 100 at a time # # best to break as soon as you get all the HITIds in your group # if len(currhits) == len(hitids): # break currhits = {h.HITId: h for h in mtc.get_all_hits() if h.HITId in hitids} print('{} Current HITs: {}'.format(len(currhits), sorted(currhits.keys()))) process_assignments(assignments, all_results, currhits) outkeys.extend(list(sorted(answer_keys))) # Structure of hits # foo.Amount foo.Expiration foo.IntegerValue foo.QualificationTypeId # foo.AssignmentDurationInSeconds foo.FormattedPrice foo.Keywords foo.RequesterAnnotation # foo.AutoApprovalDelayInSeconds foo.HIT foo.LocaleValue foo.RequiredToPreview # foo.Comparator foo.HITGroupId foo.MaxAssignments foo.Reward # foo.Country foo.HITId foo.NumberOfAssignmentsAvailable foo.Title # foo.CreationTime foo.HITReviewStatus foo.NumberOfAssignmentsCompleted # foo.CurrencyCode foo.HITStatus foo.NumberOfAssignmentsPending foo.expired # foo.Description foo.HITTypeId foo.QualificationRequirement with open(args.resultsfile, 'w') as outfile: dw = DictWriter(outfile, fieldnames=outkeys, delimiter='\t') dw.writeheader() for row in all_results: dw.writerow(row)
log.update(tarefa) list_tarefa_logs.append(log) return list_tarefa_logs if __name__ == '__main__': print("Exporting tarefas to CSV...") with open(TAREFAS_DIR + '/../csv/data_exported.csv', 'wb') as fout: csv_writer = DictWriter( fout, fieldnames=[ 'id_tarefa', 'num_tarefa', 'titulo_tarefa', 'tipo_tarefa', 'data_cadastro_tarefa', 'sistema_tarefa', 'data_inicio_tarefa', 'subsistema_tarefa', 'data_deadline_tarefa', 'aberta_por_tarefa', 'localizacao_analista_tarefa', 'situacao_tarefa', 'horas_trabalhadas_tarefa', 'gerente_relacionamento_tarefa', 'num_prioridade_tarefa', 'andamento_tarefa', 'prioridade_tarefa', 'dados_build_log', 'data_cadastro_log', 'atividade_log', 'situacao_log', 'andamento_log', 'horas_trabalhadas_log', 'aberto_por_log', 'revisao_svn_log' ]) csv_writer.writeheader() for tarefa_filename in os.listdir(TAREFAS_DIR): id_tarefa = re.findall(r'(\d+)', tarefa_filename)[0] tarefa_filepath = TAREFAS_DIR + '/' + tarefa_filename with io.open(tarefa_filepath, 'r', encoding='utf-8') as fin: list_tarefa_logs = __to_list_tarefa_logs(fin.read()) for tarefa_log in list_tarefa_logs: tarefa_log.update({'id_tarefa': id_tarefa}) csv_writer.writerow(tarefa_log)
def convert_pickled_pages_to_csv_dataset(in_file, out_pages_fpath, out_transitions_fpath, delete_in_file = True): pages_columns = set() transitions_columns = set() with open(in_file, 'r') as inf: while True: try: page = pickle.load(inf) pages_columns.update(page.as_dict().viewkeys()) for trans in page.transitions: transitions_columns.update(trans.as_dict({ "FROM_LABEL__%s" % l : 1 for l in page.labels }).viewkeys()) except EOFError: break with open(out_pages_fpath, 'w') as pages_f, \ open(out_transitions_fpath, 'w') as trans_f: pages_writer = DictWriter(pages_f, sorted(pages_columns), encoding = 'utf8') pages_writer.writeheader() trans_writer = DictWriter(trans_f, sorted(transitions_columns), encoding = 'utf8') trans_writer.writeheader() with open(in_file, 'r') as inf: while True: try: page = pickle.load(inf) pages_writer.writerow(page.as_dict()) for trans in page.transitions: trans_writer.writerow(trans.as_dict({ "FROM_LABEL__%s" % l : 1 for l in page.labels })) except EOFError: break if delete_in_file: os.remove(in_file)
print( u"Too much companies returned for record '%s'" % company["company_name"] ) return True return { "id": company_db.id, "code": company_db.edrpou, "name_uk": company_db.name_uk, "name_en": company_db.name_en, } fp = open("beneficiary.csv", "w") w = DictWriter(fp, fieldnames=["company_name", "legalForm", "country", "en_name", "location", "en_address", "phone", "address", "mail", "company_code", "owner"]) w.writeheader() for d in Declaration.objects.filter(nacp_declaration=True, confirmed="a").nocache().iterator(): data = d.source["nacp_orig"] if isinstance(data.get("step_9"), dict): for cash_rec in data["step_9"].values(): if not isinstance(cash_rec, dict): continue if (cash_rec.get("country", "1") or "1") == "1": continue rec_to_export = { "company_name": cash_rec.get("name"), "legalForm": cash_rec.get("legalForm"),
itemtype, 'title': g.value(book, DC.title), 'date': g.value(book, DC.date) or '', # not all have dates 'tags': ', '.join(tags), '# tags': len(tags), '# tags ending in Y': len([t for t in tags if t.endswith('Y')]) }) items = sorted(items, key=lambda k: k['# tags ending in Y'], reverse=True) # generate csv file name based on input file filebase, ext = os.path.splitext(os.path.basename(args.filename)) csv_filename = '%s.csv' % filebase with open(csv_filename, 'w') as csvfile: # write byte-order-mark for utf-8 opening in csvfile.write(codecs.BOM_UTF8) fieldnames = [ 'identifier', 'type', 'title', 'date', '# tags', '# tags ending in Y', 'tags' ] writer = DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for item in items: writer.writerow(item)
from unicodecsv import DictWriter from core.models import Company2Country countries_list = [ "Велика Британія", "Британські Віргінські острови", "Гібралтар", "Бермудські острови", "Кайманові острови", "Гернсі", "Джерсі", "Острів Мен" ] with open("/tmp/uk_companies.csv", "w") as fp: w = DictWriter(fp, fieldnames=[ "country", "company_name", "company_code", "company_url", "founders", "managers", "rest", "sanctions" ]) w.writeheader() for c2c in Company2Country.objects.filter( to_country__name_uk__in=countries_list).select_related( "to_country"): related = c2c.from_company.all_related_persons def joiner(persons): return u"\n".join([ u"{}, https://pep.org.ua{}".format(p.full_name, p.get_absolute_url()) for p in persons ]) w.writerow({ "country": c2c.to_country.name_uk,
from django.db import models from unicodecsv import DictWriter from core.models import Company fieldnames = ["id", "url", "name", "head_is_pep"] with open("/tmp/orphaned_companies.csv", "w") as fp: w = DictWriter(fp, fieldnames=fieldnames) w.writeheader() for c in Company.objects.annotate(cnt=models.Count( "from_persons__from_person", distinct=True, filter=models.Q(from_persons__from_person__is_pep=True))).filter( cnt=0): w.writerow({ "id": c.id, "url": "https://pep.org.ua/" + c.get_absolute_url(), "name": c.name_uk, "head_is_pep": c.state_company })
from core.models import Person2Company, Company2Company from unicodecsv import DictWriter from django.utils.translation import activate from django.conf import settings from collections import Counter from tqdm import tqdm activate(settings.LANGUAGE_CODE) with open("/tmp/positions.csv", "w") as fp: w = DictWriter(fp, fieldnames=["person", "relation", "company", "url"]) w.writeheader() for p2c in tqdm(Person2Company.objects.all().select_related( "from_person", "to_company").nocache().iterator()): w.writerow({ "person": p2c.from_person.full_name, "relation": p2c.relationship_type, "company": p2c.to_company.name, "url": "https://pep.org.ua{}".format(p2c.from_person.get_absolute_url()), }) with open("/tmp/relations.csv", "w") as fp: w = DictWriter(fp, fieldnames=[ "company1", "relation", "back_relation", "company2",
def dump_csv(table, name): with open(name, 'w') as fh: writer = DictWriter(fh, fieldnames=table.columns) writer.writeheader() for row in table: writer.writerow(row)
if not isinstance(s2, dict): continue if s2.get("previous_firstname") or s2.get( "previous_lastname") or s2.get("previous_middlename"): changes.append({ "person": d.person_id, "first_name": s2.get("firstname", ""), "patronymic": s2.get("middlename", ""), "last_name": s2.get("lastname", ""), "prev_first_name": s2.get("previous_firstname", ""), "prev_patronymic": s2.get("previous_middlename", ""), "prev_last_name": s2.get("previous_lastname", ""), }) if step_1.get("previous_firstname") or step_1.get( "previous_lastname") or step_1.get("previous_middlename"): changes.append({ "person": d.person_id, "first_name": d.first_name, "patronymic": d.patronymic, "last_name": d.last_name, "prev_first_name": step_1.get("previous_firstname", ""), "prev_patronymic": step_1.get("previous_middlename", ""), "prev_last_name": step_1.get("previous_lastname", ""), }) with open("/tmp/changed_names.csv", "w") as fp: w = DictWriter(fp, fieldnames=changes[0].keys()) w.writeheader() w.writerows(changes)