def export_aliases(project, fh): """ Dump a list of all entity names to a CSV file. The table will contain the active name of each entity, and one of the other existing names as an alias. """ writer = DictWriter(fh, ['entity_id', 'alias', 'canonical']) writer.writeheader() alias = aliased(EntityProperty) canonical = aliased(EntityProperty) q = db.session.query(alias.value_string.label('alias'), alias.entity_id) q = q.join(Entity) q = q.join(canonical) q = q.filter(Entity.project_id==project.id) q = q.filter(alias.entity_id!=None) q = q.filter(alias.name=='name') q = q.filter(canonical.name=='name') q = q.filter(canonical.active==True) q = q.add_columns(canonical.value_string.label('canonical')) for row in q.all(): #if row.alias == row.canonical: # continue writer.writerow({ 'entity_id': str(row.entity_id), 'alias': row.alias, 'canonical': row.canonical })
def export_aliases(project, fh): """ Dump a list of all entity names to a CSV file. The table will contain the active name of each entity, and one of the other existing names as an alias. """ writer = DictWriter(fh, ['entity_id', 'schema', 'alias', 'canonical']) writer.writeheader() alias = aliased(Property) canonical = aliased(Property) schema = aliased(Schema) q = db.session.query(alias.value_string.label('alias'), alias.entity_id) q = q.join(Entity) q = q.join(schema) q = q.join(canonical) q = q.filter(Entity.project_id == project.id) q = q.filter(alias.entity_id != None) # noqa q = q.filter(alias.name == 'name') q = q.filter(canonical.name == 'name') q = q.filter(canonical.active == True) # noqa q = q.add_columns(canonical.value_string.label('canonical')) q = q.add_columns(schema.name.label('schema')) for row in q.all(): writer.writerow({ 'entity_id': str(row.entity_id), 'schema': row.schema, 'alias': row.alias, 'canonical': row.canonical })
def studentcsv(): f = StringIO() writer = DictWriter(f,STUDENT_FIELDS) writer.writerow( dict( (x,x) for x in STUDENT_FIELDS) ) for student in models.Student.query.all(): if student.project_id: try: project = student.project record = {STUDENT_FIELDS[0]: student.project_id, STUDENT_FIELDS[1]: student.project.category.name, STUDENT_FIELDS[2]: fx(student.firstname), STUDENT_FIELDS[3]: fx(student.lastname), STUDENT_FIELDS[4]: student.grade, STUDENT_FIELDS[5]: student.gender, STUDENT_FIELDS[6]: student.school.name, STUDENT_FIELDS[7]: str(project.individual), STUDENT_FIELDS[8]: str(project.forms.first().vafa or project.forms.first().vafb), STUDENT_FIELDS[9]: str(project.forms.first().hsf), STUDENT_FIELDS[10]: str(project.forms.first().phbaf)} except AttributeError as error: app.logger.error('ProjID:%s - ID:%s - %s %s\n%s\n%s' % (student.id, student.project.id, student.firstname, student.lastname,vars(student.project), error)) if record: try: writer.writerow(record) except UnicodeEncodeError: app.logger.error("Unicode Error:\n%s"%record) return f.getvalue()
def find_all_matching_officers(min_percentage=0.75): notable_officers = set() officer_fingerprints = get_all_officer_fingerprints() sys.stderr.write("\nFinding matches...\n") writer = DictWriter(sys.stdout, [ 'Full Name (from persons)', 'officer_id (from npo_officers)' ]) writer.writeheader() for i, data in enumerate(gdocs_persons()): fingerprint = make_fingerprint(data['Full Name']) matching_ids = find_matching_officers( fingerprint, officer_fingerprints, excluded_ids=notable_officers, min_percentage=min_percentage ) for officer_id in matching_ids: writer.writerow({ 'Full Name (from persons)': data['Full Name'], 'officer_id (from npo_officers)': officer_id, }) notable_officers.update(matching_ids) sys.stderr.write("\r%d" % (i + 1)) sys.stderr.flush() sys.stderr.write("\nDone\n")
def render_non_html(encoding, querySet): '''Renders non-html formats and returns an appropriate HttpResponse''' if encoding == 'csv': vals = querySet.values() response = HttpResponse(mimetype='text/csv') response['Content-Disposition'] = \ 'attachment; filename=genesets%d.csv' % len(vals) csvW = DictWriter(response, GenesetFieldNames) fieldsDict = {} for k in GenesetFieldNames: fieldsDict[k] = k csvW.writerow(fieldsDict) csvW.writerows(vals) elif encoding == 'xml': response = HttpResponse(mimetype='text/xml') response['Content-Disposition'] = \ 'attachment; filename=genesets%d.xml' % len(querySet) serializers.serialize("xml", querySet, stream=response) elif encoding == "json": response = HttpResponse(mimetype='application/json') response['Content-Disposition'] = \ 'attachment; filename=genesets%d.js' % len(querySet) serializers.serialize("json", querySet, stream=response) return response
def handle(self, *args, **options): if len(args) != 2: raise CommandError( 'usage: python manage.py dump_results ' '<template_file_path> ' '<results_csv_file_path>' ) # Get paths from args, and normalize them to absolute paths: template_file_path, results_csv_file_path = map(os.path.abspath, args) try: template = HitTemplate.objects.get(name=template_file_path) except ObjectDoesNotExist: sys.exit('There is no matching <template_file_path>.') completed_hits = template.hit_set.filter(completed=True) if not completed_hits.exists(): sys.exit('There are no completed HITs.') fieldnames, rows = results_data(completed_hits) with open(results_csv_file_path, 'wb') as fh: writer = DictWriter(fh, fieldnames) writer.writeheader() for row in rows: writer.writerow(row)
def find_all_matching_officers(min_percentage=0.75): notable_officers = set() officer_fingerprints = get_all_officer_fingerprints() sys.stderr.write("\nFinding matches...\n") writer = DictWriter( sys.stdout, ['Full Name (from persons)', 'officer_id (from npo_officers)']) writer.writeheader() for i, data in enumerate(gdocs_persons()): fingerprint = make_fingerprint(data['Full Name']) matching_ids = find_matching_officers(fingerprint, officer_fingerprints, excluded_ids=notable_officers, min_percentage=min_percentage) for officer_id in matching_ids: writer.writerow({ 'Full Name (from persons)': data['Full Name'], 'officer_id (from npo_officers)': officer_id, }) notable_officers.update(matching_ids) sys.stderr.write("\r%d" % (i + 1)) sys.stderr.flush() sys.stderr.write("\nDone\n")
def mappings_export(file, decided): """Export mappings to a CSV file.""" writer = DictWriter(file, fieldnames=['left', 'right', 'judgement']) writer.writeheader() for mapping in Mapping.find_by_decision(decided): writer.writerow({ 'left': mapping.left_uid, 'right': mapping.right_uid, 'judgement': mapping.judgement })
def write_csv(self, outputdir, timestamp, items=None): path = os.path.join( outputdir, self.filename('csv', timestamp, **self.filter_kwargs)) if items is None: items = self.get_items() with open(path, 'w') as csvfile: writer = DictWriter(csvfile, self.get_fields()) writer.writeheader() for row in items: writer.writerow(row) return self
def write_csv(self, outputdir, timestamp, items=None): path = os.path.join(outputdir, self.filename('csv', timestamp, **self.filter_kwargs)) if items is None: items = self.get_items() with open(path, 'w') as csvfile: writer = DictWriter(csvfile, self.get_fields()) writer.writeheader() for row in items: writer.writerow(row) return self
def export_csv_table(archive, model, name): file_path = os.path.join(_make_export_path(), '%s.csv' % name) log.info("Exporting CSV to %s...", file_path) writer = None with open(file_path, 'w') as fh: for obj in session.query(model): row = obj.to_row() if writer is None: writer = DictWriter(fh, row.keys()) writer.writeheader() writer.writerow(row) url = archive.upload_file(file_path, mime_type='text/csv') if url is not None: os.unlink(file_path)
def tocsv(): f = StringIO() writer = DictWriter(f,CSV_FIELDS) writer.writerow( dict( (x,x) for x in CSV_FIELDS) ) districts = models.District.query.order_by('name').all() for district in districts: schools = district.schools.order_by('name').all() for school in schools: students = school.students.join(models.Project).order_by('title') students = students.filter(models.Student.team_leader==True).all() for student in students: try: record = {CSV_FIELDS[0]: student.project.id, CSV_FIELDS[1]: fx("%s %s"%(student.firstname, student.lastname)), CSV_FIELDS[4]: fx(student.project.title), CSV_FIELDS[5]: fx(student.project.category.name), CSV_FIELDS[6]: fx(student.project.division), CSV_FIELDS[7]: student.school.name, CSV_FIELDS[8]: student.school.district.name, CSV_FIELDS[9]: fx("%s %s"%(student.sponsor.firstname, student.sponsor.lastname)), CSV_FIELDS[10]: student.project.forms_submitted, CSV_FIELDS[11]: student.project.notes, } team = student.project.student team = team.filter(models.Student.team_leader==False).limit(2) team = team.all() i = 2 for student in team: record[CSV_FIELDS[i]]= fx("%s %s"%(student.firstname, student.lastname)) i += 1 except AttributeError as error: app.logger.error('ProjID:%s - ID:%s - %s %s\n%s\n%s' % (student.id, student.project.id, student.firstname, student.lastname,vars(student.project), error)) try: writer.writerow(record) except UnicodeEncodeError: app.logger.error("Unicode Error:\n%s"%record) return f.getvalue()
def convert_file(file_path): result_path = file_path.replace('/wdvc16', '/processed_wdvc16').replace('.xml', '.csv') print 'writing to %s...' % result_path xml_pages = stream_pages(file_path) fieldnames = [u'revision_id', u'revisions_in_group', u'revision_comment', u'revision_timestamp', u'page_id', u'page_group', u'page_ns', u'page_title', u'anonimous_ip', u'user_id', u'username'] with open(result_path, 'w') as csv_file: writer = DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for xml_page in tqdm(xml_pages): pages = parse_page(xml_page) for page in pages: writer.writerow(page)
def report(self): trello_members, google_members, board_members = self.auditor.get_members( ) all_members = set() all_members.update(trello_members) all_members.update(google_members) for members in board_members.values(): all_members.update(members) with open(self.filename, 'wb+') as fp: csv = DictWriter(fp, ['name'] + list(all_members)) csv.writeheader() board_members['google'] = google_members board_members['trello'] = trello_members for board, members in board_members.items(): row = {member: (member in members) for member in all_members} row['name'] = board csv.writerow(row)
def writetocsv(xmlfile=None, month=None, year=None, outfile='../data/data.csv'): if xmlfile is None: raise Exception, "No XML file passed" if month is None: raise Exception, "No month passed" if year is None: raise Exception, "No year passed" xmldata = etree.parse(xmlfile) csvwriter = None csvfile = open('../data/data.csv', 'a') for incident in xmldata.iter('DATA'): data = {'month': month, 'year': year} for field in incident.iterchildren(): data[field.tag] = field.text if not csvwriter: csvwriter = DictWriter(csvfile, fieldnames=data.keys()) csvwriter.writeheader() csvwriter.writerow(data) csvfile.close()
def make_csv(): data_files = os.listdir('scraped_data') fieldnames_by_locality = get_all_variables_by_locality() for zone_type in ['epci']: print "Make %s csv..." % zone_type locality_data_files = [ data_file for data_file in data_files if zone_type in data_file ] variables_mapping = { 'name': u'nom', 'year': u'année', 'zone_type': u'type de zone administrative', 'population': u'population', 'insee_code': u'cog (code officiel géographique)', 'url': u'url' } fieldnames = ['year', 'zone_type', 'name', 'population', 'insee_code', 'url'] \ + sorted(fieldnames_by_locality[zone_type].keys()) variables_mapping.update(fieldnames_by_locality[zone_type]) if zone_type == 'epci': fieldnames.append('siren') with open(os.path.join('nosdonnees', zone_type + '_all.csv'), 'w') as output: csv_output = DictWriter(output, fieldnames=fieldnames, encoding='utf-8') csv_output.writerow(variables_mapping) for locality_data_file in locality_data_files: with codecs.open(os.path.join('scraped_data', locality_data_file), encoding='utf-8') as input: for line in input: data = json.loads(line, encoding='utf-8')['data'] csv_output.writerow(data)
def convert_pickled_pages_to_csv_dataset(in_file, out_pages_fpath, out_transitions_fpath, delete_in_file = True): pages_columns = set() transitions_columns = set() with open(in_file, 'r') as inf: while True: try: page = pickle.load(inf) pages_columns.update(page.as_dict().viewkeys()) for trans in page.transitions: transitions_columns.update(trans.as_dict({ "FROM_LABEL__%s" % l : 1 for l in page.labels }).viewkeys()) except EOFError: break with open(out_pages_fpath, 'w') as pages_f, \ open(out_transitions_fpath, 'w') as trans_f: pages_writer = DictWriter(pages_f, sorted(pages_columns), encoding = 'utf8') pages_writer.writeheader() trans_writer = DictWriter(trans_f, sorted(transitions_columns), encoding = 'utf8') trans_writer.writeheader() with open(in_file, 'r') as inf: while True: try: page = pickle.load(inf) pages_writer.writerow(page.as_dict()) for trans in page.transitions: trans_writer.writerow(trans.as_dict({ "FROM_LABEL__%s" % l : 1 for l in page.labels })) except EOFError: break if delete_in_file: os.remove(in_file)
def parse_file(xml_file): print 'converting %s to csv' % xml_file # csv file name new_file_path = xml_file.replace('wdvc16', 'converted_wdvc16').replace('.xml', '.csv') print 'writing to %s' % new_file_path # page by page generator of the xml file xml_file_by_pages = page_stream_generator(xml_file) # columns columns = [u'page_title', u'page_ns', u'page_id', u'revision_id', u'revision_timestamp', u'revision_comment', u'revision_model', u'revision_format', u'revision_count', u'username', u'user_id', u'ip_address'] with open(new_file_path, 'w') as csv_file: writer = DictWriter(csv_file, fieldnames=columns) writer.writeheader() for xml_page in xml_file_by_pages: revisions_in_page = parse_page(xml_page) for page in revisions_in_page: writer.writerow(page)
def make_csv(): data_files = os.listdir("scraped_data") fieldnames_by_locality = get_all_variables_by_locality() for zone_type in ["city", "department", "epci", "region"]: print "Make %s csv..." % zone_type locality_data_files = [data_file for data_file in data_files if zone_type in data_file] variables_mapping = { "name": u"nom", "year": u"année", "zone_type": u"type de zone administrative", "population": u"population", "insee_code": u"cog (code officiel géographique)", "url": u"url", } fieldnames = ["year", "zone_type", "name", "population", "insee_code", "url"] + sorted( fieldnames_by_locality[zone_type].keys() ) variables_mapping.update(fieldnames_by_locality[zone_type]) if zone_type == "epci": fieldnames.append("siren") with open(os.path.join("nosdonnees", zone_type + "_all.csv"), "w") as output: csv_output = DictWriter(output, fieldnames=fieldnames, encoding="utf-8") csv_output.writerow(variables_mapping) for locality_data_file in locality_data_files: with codecs.open(os.path.join("scraped_data", locality_data_file), encoding="utf-8") as input: for line in input: data = json.loads(line, encoding="utf-8")["data"] csv_output.writerow(data)
class SplitCSVPipeline(object): def open_spider(self, spider): self.links_file = open('links.csv', 'wb') self.results_file = open('results.csv', 'wb') self.links_writer = DictWriter(self.links_file, ['source', 'destination']) self.results_writer = DictWriter(self.results_file, ['url', 'status', 'next']) self.links_writer.writeheader() self.results_writer.writeheader() def close_spider(self, spider): self.results_file.close() self.links_file.close() def process_item(self, item, spider): if isinstance(item, Link): self.links_writer.writerow(item) if isinstance(item, Result): self.results_writer.writerow(item) return item
def make_csv(): data_files = os.listdir('scraped_data') fieldnames_by_locality = get_all_variables_by_locality() for zone_type in ['city', 'epci', 'department', 'region']: print "Make %s csv..." % zone_type locality_data_files = [data_file for data_file in data_files if zone_type in data_file] variables_mapping = { 'name': u'nom', 'year': u'année', 'zone_type': u'type de zone administrative', 'population': u'population', 'insee_code': u'cog (code officiel géographique)', 'url': u'url' } fieldnames = ['year', 'zone_type', 'name', 'population', 'insee_code', 'url'] \ + sorted(fieldnames_by_locality[zone_type].keys()) variables_mapping.update(fieldnames_by_locality[zone_type]) if zone_type == 'epci': fieldnames.append('siren') with open(os.path.join('nosdonnees', zone_type + '_all.csv'), 'w') as output: csv_output = DictWriter(output, fieldnames=fieldnames, encoding='utf-8') csv_output.writerow(variables_mapping) for locality_data_file in locality_data_files: with codecs.open(os.path.join('scraped_data', locality_data_file), encoding='utf-8') as input: for line in input: data = json.loads(line, encoding='utf-8')['data'] csv_output.writerow(data)
with open("/tmp/coowned_by_state.csv", "w") as fp: w = DictWriter(fp, fieldnames=["pep_company_name", "pep_company_link"] + fieldnames) w.writeheader() coowned_by_state = 0 for c in Company.objects.filter(state_company=True).nocache().iterator(): edrpou = c.edrpou.lstrip("0") if edrpou and edrpou in smida_owner_records: coowned_by_state += 1 for l in smida_owner_records[edrpou]: rec = l.copy() rec["pep_company_name"] = c.name_uk rec["pep_company_link"] = u"{}{}".format(settings.SITE_URL, c.get_absolute_url()) smida_indirect_records[l["EDRPOU"].strip().lstrip("0")].append(rec) w.writerow(rec) print("Coowned by state: {}".format(coowned_by_state)) with open("/tmp/coowned_indirectly_by_state.csv", "w") as fp: w = DictWriter(fp, fieldnames=["pep_company_name", "pep_company_link"] + fieldnames) w.writeheader() coowned_indirectly_by_state = 0 for edrpou in smida_indirect_records: coowned_indirectly_by_state += 1 for l in smida_indirect_records[edrpou]: w.writerow(l) print("Coowned indirectly by state: {}".format(coowned_indirectly_by_state))
if p2c.declarations: from_declaration = True years = set( Declaration.objects.filter( pk__in=p2c.declarations).values_list("year", flat=True)) else: years = map( lambda x: x.year, filter(None, [ p2c.date_established, p2c.date_finished, p2c.date_confirmed ])) if years: years = map(unicode, range(min(years), max(years) + 1)) w.writerow({ "pep": p2c.from_person.full_name, "url": "https://pep.org.ua{}".format(p2c.from_person.get_absolute_url()), "company_name": unicode(p2c.to_company), "edrpou": p2c.to_company.edrpou, "years": ", ".join(sorted(years)), "from_declaration": from_declaration, "person_type": "owner" if u"бенеф" in p2c.relationship_type_uk.lower() else "founder" })
itemtype, 'title': g.value(book, DC.title), 'date': g.value(book, DC.date) or '', # not all have dates 'tags': ', '.join(tags), '# tags': len(tags), '# tags ending in Y': len([t for t in tags if t.endswith('Y')]) }) items = sorted(items, key=lambda k: k['# tags ending in Y'], reverse=True) # generate csv file name based on input file filebase, ext = os.path.splitext(os.path.basename(args.filename)) csv_filename = '%s.csv' % filebase with open(csv_filename, 'w') as csvfile: # write byte-order-mark for utf-8 opening in csvfile.write(codecs.BOM_UTF8) fieldnames = [ 'identifier', 'type', 'title', 'date', '# tags', '# tags ending in Y', 'tags' ] writer = DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for item in items: writer.writerow(item)
w.writeheader() for c2c in Company2Country.objects.filter( to_country__name_uk__in=countries_list).select_related( "to_country"): related = c2c.from_company.all_related_persons def joiner(persons): return u"\n".join([ u"{}, https://pep.org.ua{}".format(p.full_name, p.get_absolute_url()) for p in persons ]) w.writerow({ "country": c2c.to_country.name_uk, "company_name": c2c.from_company.name_uk, "company_code": c2c.from_company.edrpou, "company_url": "https://pep.org.ua{}".format(c2c.from_company.get_absolute_url()), "founders": joiner(related["founders"]), "managers": joiner(related["managers"]), "rest": joiner(related["rest"]), "sanctions": joiner(related["sanctions"]), })
from tqdm import tqdm activate(settings.LANGUAGE_CODE) with open("/tmp/positions.csv", "w") as fp: w = DictWriter(fp, fieldnames=["person", "relation", "company", "url"]) w.writeheader() for p2c in tqdm(Person2Company.objects.all().select_related( "from_person", "to_company").nocache().iterator()): w.writerow({ "person": p2c.from_person.full_name, "relation": p2c.relationship_type, "company": p2c.to_company.name, "url": "https://pep.org.ua{}".format(p2c.from_person.get_absolute_url()), }) with open("/tmp/relations.csv", "w") as fp: w = DictWriter(fp, fieldnames=[ "company1", "relation", "back_relation", "company2", "url" ]) w.writeheader() for c2c in tqdm(Company2Company.objects.all().select_related(
dates_1[0] = date(1991, 1, 1) if dates_2[0] is None: dates_2[0] = date(1991, 1, 1) if dates_1[1] is None: dates_1[1] = date.today() if dates_2[1] is None: dates_2[1] = date.today() overlap = (dates_1[0] <= dates_2[1] and dates_2[0] <= dates_1[1]) if not overlap: w.writerow({ "company": company, "person1": p2p.from_person, "person2": p2p.to_person, "p1_to_p2": p2p.from_relationship_type, "p1_to_c": personnel[p2p.from_person_id].relationship_type_uk, "p2_to_c": personnel[p2p.to_person_id].relationship_type_uk, "p1_from": dates_1[0], "p1_to": dates_1[1], "p2_from": dates_2[0], "p2_to": dates_2[1], }) p2p.delete()
for d in Declaration.objects.filter(nacp_declaration=True, confirmed="a").nocache().iterator(): data = d.source["nacp_orig"] if isinstance(data.get("step_9"), dict): for cash_rec in data["step_9"].values(): if not isinstance(cash_rec, dict): continue if (cash_rec.get("country", "1") or "1") == "1": continue rec_to_export = { "company_name": cash_rec.get("name"), "legalForm": cash_rec.get("legalForm"), "country": countries[cash_rec.get("country", "1") or "1"], "en_name": cash_rec.get("en_name"), "location": cash_rec.get("location"), "en_address": cash_rec.get("en_address"), "phone": cash_rec.get("phone"), "address": cash_rec.get("address"), "mail": cash_rec.get("mail"), "company_code": cash_rec.get("beneficial_owner_company_code"), "owner": "DECLARANT" if cash_rec.get("person") == "1" else "FAMILY" } if _search_db(rec_to_export) is None: w.writerow(rec_to_export) else: print(u"Company {} found in db, skippings".format( rec_to_export["company_name"]) )
def write_csv(fieldnames, rows, path): with open(path, 'wb') as fh: writer = DictWriter(fh, fieldnames) writer.writeheader() for row in rows: writer.writerow(row)
# print(h.HITId) # currhits[h.HITId] = h # print('{}: {}'.format(len(currhits), currhits)) # # get_all_hits iterates through all your current HITs, grabbing 100 at a time # # best to break as soon as you get all the HITIds in your group # if len(currhits) == len(hitids): # break currhits = {h.HITId: h for h in mtc.get_all_hits() if h.HITId in hitids} print('{} Current HITs: {}'.format(len(currhits), sorted(currhits.keys()))) process_assignments(assignments, all_results, currhits) outkeys.extend(list(sorted(answer_keys))) # Structure of hits # foo.Amount foo.Expiration foo.IntegerValue foo.QualificationTypeId # foo.AssignmentDurationInSeconds foo.FormattedPrice foo.Keywords foo.RequesterAnnotation # foo.AutoApprovalDelayInSeconds foo.HIT foo.LocaleValue foo.RequiredToPreview # foo.Comparator foo.HITGroupId foo.MaxAssignments foo.Reward # foo.Country foo.HITId foo.NumberOfAssignmentsAvailable foo.Title # foo.CreationTime foo.HITReviewStatus foo.NumberOfAssignmentsCompleted # foo.CurrencyCode foo.HITStatus foo.NumberOfAssignmentsPending foo.expired # foo.Description foo.HITTypeId foo.QualificationRequirement with open(args.resultsfile, 'w') as outfile: dw = DictWriter(outfile, fieldnames=outkeys, delimiter='\t') dw.writeheader() for row in all_results: dw.writerow(row)
"companyName": companyName, "industries": industries, "totalViews": totalViews, "url": url } totalJobs.append(item) except: pass if __name__ == '__main__': """ Provide the location and companyName """ location = "New York" companyName = "Airbnb" getjobs(companyName, location) print "Total jobs got ", len(totalJobs) file = open('finalData.csv', 'wb') fields = [ 'jobTitle', 'companyName', 'location', 'postedTime', 'totalViews', 'jobDescription', 'industries', 'employmentType', 'experience', 'employmentType', 'jobFunctions', 'url' ] csvfile = DictWriter(file, fieldnames=fields, quoting=QUOTE_ALL, encoding="utf-8") csvfile.writeheader() for i in totalJobs: csvfile.writerow(i) file.close()
if __name__ == '__main__': print("Exporting tarefas to CSV...") with open(TAREFAS_DIR + '/../csv/data_exported.csv', 'wb') as fout: csv_writer = DictWriter( fout, fieldnames=[ 'id_tarefa', 'num_tarefa', 'titulo_tarefa', 'tipo_tarefa', 'data_cadastro_tarefa', 'sistema_tarefa', 'data_inicio_tarefa', 'subsistema_tarefa', 'data_deadline_tarefa', 'aberta_por_tarefa', 'localizacao_analista_tarefa', 'situacao_tarefa', 'horas_trabalhadas_tarefa', 'gerente_relacionamento_tarefa', 'num_prioridade_tarefa', 'andamento_tarefa', 'prioridade_tarefa', 'dados_build_log', 'data_cadastro_log', 'atividade_log', 'situacao_log', 'andamento_log', 'horas_trabalhadas_log', 'aberto_por_log', 'revisao_svn_log' ]) csv_writer.writeheader() for tarefa_filename in os.listdir(TAREFAS_DIR): id_tarefa = re.findall(r'(\d+)', tarefa_filename)[0] tarefa_filepath = TAREFAS_DIR + '/' + tarefa_filename with io.open(tarefa_filepath, 'r', encoding='utf-8') as fin: list_tarefa_logs = __to_list_tarefa_logs(fin.read()) for tarefa_log in list_tarefa_logs: tarefa_log.update({'id_tarefa': id_tarefa}) csv_writer.writerow(tarefa_log) print("Done!")
page('#parent-fieldname-contactPhone').text(), page('a.email').attr('href').replace('mailto:', ''), page('#parent-fieldname-eventUrl').attr('href') or '' ))), ('meta_submitter_email', '*****@*****.**'), ))) with open("events.p", "wb") as dumpfile: pickle.dump(events, dumpfile) # Write output with open('output.csv', 'w') as csvfile: writer = DictWriter(csvfile, fieldnames=events[0].keys()) writer.writeheader() for event in events: writer.writerow(event) # Submit events for event in events: util.submit_event( email=event['meta_submitter_email'], title=event['title'], description=event['content_description'], location=event['location'], start_date=event['start'].split('T')[0], start_time=event['start'].split('T')[1][:5], end_time=event['end'].split('T')[1][:5], base_url=base_url ) # Publish events & close tickets
from django.db import models from unicodecsv import DictWriter from core.models import Company fieldnames = ["id", "url", "name", "head_is_pep"] with open("/tmp/orphaned_companies.csv", "w") as fp: w = DictWriter(fp, fieldnames=fieldnames) w.writeheader() for c in Company.objects.annotate(cnt=models.Count( "from_persons__from_person", distinct=True, filter=models.Q(from_persons__from_person__is_pep=True))).filter( cnt=0): w.writerow({ "id": c.id, "url": "https://pep.org.ua/" + c.get_absolute_url(), "name": c.name_uk, "head_is_pep": c.state_company })
from core.models import Person2Person from tqdm import tqdm from unicodecsv import DictWriter fp = open("/tmp/p2p.csv", "w") w = DictWriter(fp, fieldnames=["person1", "person1_is", "person2", "person2_is", "proofs"]) qs = Person2Person.objects.select_related("from_person", "to_person").nocache() w.writeheader() for p2p in tqdm(qs.iterator(), total=qs.count()): w.writerow({ "person1": p2p.from_person, "person2": p2p.to_person, "person1_is": p2p.get_from_relationship_type_display(), "person2_is": p2p.get_to_relationship_type_display(), "proofs": "\n".join([p.proof_title for p in p2p.proofs.all()]) })
def dump_csv(table, name): with open(name, 'w') as fh: writer = DictWriter(fh, fieldnames=table.columns) writer.writeheader() for row in table: writer.writerow(row)