def save_job_results(geocoder, job_id): """ Download and save to S3 results for completed jobs. """ logging.info('Saving results for %s to S3' % job_id) finished_folder = 'geocode_finished_jobs' pending_folder = 'geocode_pending_jobs' connection = boto.connect_s3() bucket = connection.get_bucket(GEO_BUCKET) old_key = bucket.get_key('%s/%s' % (pending_folder, job_id)) new_name = old_key.get_contents_as_string() new_key = Key(bucket) new_key.key = '%s/%s' % (finished_folder, new_name) results = geocoder.get_job_results(job_id) result_string = StringIO.StringIO() writer = DictWriter(result_string, fieldnames=results[0].keys()) writer.writeheader() writer.writerows(results) result_string.seek(0) email_address = old_key.get_metadata('email') if email_address: new_key.set_metadata('email', email_address) send_email_notification( email_address, geocoder.get_job_statuses(job_id=job_id), new_name, 'finished') new_key.set_contents_from_string(result_string.getvalue()) new_key.make_public() old_key.delete()
def write_csv(output_file, address_dicts): geocoded_file = open(output_file, 'wb') writer = DictWriter(geocoded_file, fieldnames=address_dicts[0].keys(), \ dialect='excel', lineterminator='\n') writer.writeheader() writer.writerows(address_dicts) geocoded_file.close()
def test_01_importTab(self): #先检查表,如果都没有则直接创建 #client.drop_database(DB_INFO['DB_NAME']) SD.importTab() SD.copy_table() self.assertIn('FACT_ATTR', db.collection_names()) self.assertIn('FACT_SERVICE', db.collection_names()) self.assertIn('FACT_ATTR_SET', db.collection_names()) self.assertIn('FACT_SCENE', db.collection_names()) #如果有,先检查id如果不存在则增加,否则update L = list() table = 'FACT_ATTR' filepath = os.path.sep.join([os.path.abspath(Const.DATA_IN), table]) with open(filepath, 'r') as f: dReader = DictReader(f) L = [i for i in dReader] L[-1]['attr_set_id'] = 1 L[-1]['value'] = 'rampage' L.append({'_id': 4, 'name': 'attr4', 'type_id':6, 'value': 'test', 'attr_set_id': 2}) with open(filepath, 'w') as f: titles = L[-1].keys() dwriter = DictWriter(f, titles) header = dict(zip(titles, titles)) dwriter.writerow(header) dwriter.writerows(L) SD.importTab() #这里顺便把查询也一起测了 match = {'_id': {'$in': [3,4]}} rs = list(db[table].find(match)) self.assertEqual(len(rs), 2) self.assertEqual(rs[-2]['attr_set_id'], 1) self.assertEqual(rs[-2]['value'], 'rampage')
def convert_to_standard(): m_reader = DictReader(open('url_data.csv')) m_writer = DictWriter(open('converted_data.csv', 'wb'), ['topic', 'url', 'index', 'ratings', 'total']) li = [] index = 1 for row in m_reader: newrow = {'topic':row['Topic'], 'url':row['URL'], 'index':index} ratings = [0, 0, 0, 0, 0] total = 0 if row['Likert Rating - Microsoft']: ratings[int(row['Likert Rating - Microsoft']) - 1] += 1 total += 1 if row['Likert Rating - EPFL']: if int(row['Likert Rating - EPFL']) == 0: continue ratings[int(row['Likert Rating - EPFL']) - 1] += 1 total += 1 newrow['ratings'] = ','.join([str(r) for r in ratings]) newrow['total'] = total index += 1 li.append(newrow) print newrow # print row['Topic']+' '+row['Query'] + ' ' + row['URL'] + ' ' + row['Likert Rating - Microsoft'] + ' ' + row['Likert Rating - EPFL'] m_writer.writerows(li)
def run(args): opts = parse_args(args) opts.prefix = opts.prefix or opts.graph.split('.', 1)[0] sheets = {} sheet_headers = {} try: with file(opts.graph) as csvfile: for row in reader(csvfile): fv = dict(column.split('=', 1) for column in row) entity_type = fv.pop('Entity Type') headers = fv.keys() if entity_type not in sheets: sheets[entity_type] = [fv] sheet_headers[entity_type] = set(headers) continue else: sheets[entity_type].append(fv) if len(headers) > len(sheet_headers[entity_type]): sheet_headers[entity_type].union(headers) for entity_type in sheets: with open('%s_%s.csv' % (opts.prefix, entity_type), 'wb') as csvfile: csv = DictWriter(csvfile, sheet_headers[entity_type]) csv.writeheader() csv.writerows(sheets[entity_type]) except IOError, e: print 'csv2sheets: %s' % e exit(-1)
def main(infile, outfile): with open(infile) as inf, open(outfile, "w") as outf: r = DictReader(inf) rows = [process(row) for row in r] w = DictWriter(outf, fieldnames=rows[0].keys()) w.writeheader() w.writerows(rows)
def write_file(self, event_list, output_dir, idx=None, **kwargs): from csv import DictWriter, excel_tab # flatten cols = [] for ev in event_list: cols += [k for k in ev.keys() if not k in cols] # make sure there's a fidl dir to write to od = os.path.join(output_dir, 'txt') ensure_dir(od) # save f_name = 'events' if idx != None: f_name += '_' + str(idx) f_name += '.txt' out_path = os.path.join(od, f_name) cols.sort() with open(out_path, 'wb') as out_file: dw = DictWriter(out_file, cols, restval='', extrasaction='raise', dialect=excel_tab) dw.writer.writerow(cols) dw.writerows(event_list) return os.path.abspath(out_path)
def saveStockCsv(): global dicts saveFile = open(csvFile, "w") writer = DictWriter(saveFile, dicts[0].keys(), lineterminator='\n') writer.writeheader() writer.writerows(dicts) saveFile.close()
def job_result_csv(job_id): db_session = db.get_session() db_job = db_session.query(PersistentJob).get(job_id) if not db_job: return json_error('no task exists with id: {0}'.format(job_id)) celery_task = Job.task.AsyncResult(db_job.result_key) if celery_task.ready(): task_result = celery_task.get() csv_io = StringIO() if task_result: # if task_result is not empty find header in first row fieldnames = ['user_id'] + sorted(task_result.values()[0].keys()) else: fieldnames = ['user_id'] writer = DictWriter(csv_io, fieldnames) task_rows = [] # fold user_id into dict so we can use DictWriter to escape things for user_id, row in task_result.iteritems(): row['user_id'] = user_id task_rows.append(row) writer.writeheader() writer.writerows(task_rows) app.logger.debug('celery task is ready! returning actual result:\n%s', csv_io.getvalue()) return Response(csv_io.getvalue(), mimetype='text/csv') else: return json_response(status=celery_task.status)
def create_access_ports_csv(auth, device_list_with_target_vlan, target_vlan): access_interfaces_list = [] for device in device_list_with_target_vlan: #print (device) accessinterfaces = get_device_access_interfaces(auth.creds, auth.url, devid=device['id']) try: for interface in accessinterfaces: if (interface['pvid'] == str(target_vlan)): #print (interface) imcint = IMCInterface(device['ip'], interface['ifIndex'], auth.creds, auth.url) interface_info = {'sysname': imcint.sysname, 'sysdesc': imcint.sysdescription, 'syslocation': imcint.location, 'intname': imcint.name, 'intdescription': imcint.description, 'status': imcint.status, 'lastchange': imcint.lastchange, 'inttype': 'access', 'pvid': interface['pvid']} #print(interface_info) access_interfaces_list.append(interface_info) except: pass #prevents crashing when device is not present or data doesn't exist in database #print (access_interfaces_list) keys = access_interfaces_list[0].keys() for i in access_interfaces_list: if len(i) != len(access_interfaces_list[0].keys()): keys = access_interfaces_list[access_interfaces_list.index(i)].keys() with open('access_interfaces_vlan_' + str(target_vlan) + '.csv', 'w', newline='') as csvfile: writer = DictWriter(csvfile, fieldnames=keys) writer.writeheader() writer.writerows(access_interfaces_list)
def output_results(poi_result_set, screen=True, outfile=None): """ Outputs unified DBSCAN results to screen or csv file. The screen only shows major data elements. The CSV file has the complete dictionary (i.e., base dictionay plus ZOA attributes for each POI) """ assert not isinstance(poi_result_set, basestring), 'POI result set is not list or tuple' if screen: print "\nZOAs by POI" print "="*80, for poi in poi_result_set: print "\nLocation:\t%s" % poi[s.NAME_KEY] print "Address:\t%s" % poi[s.ADDR_KEY] print "Neighborhood:\t%s" % poi[s.NBHD_KEY] print "Coordinates:\t%.4f, %.4f" % (poi[s.LAT_KEY], poi[s.LNG_KEY]) print "ZOA ID:\t\t%d" % poi[s.ZOA_KEY] if outfile: assert isinstance (outfile, str), "Outfile name is not a string: %r" % name if outfile[-4:] != '.csv': outfile += '.csv' with open(outfile, 'wb') as f: target = DictWriter(f, poi_result_set[0].keys()) target.writeheader() target.writerows(poi_result_set) print "\nWrote output to %s.\n" % outfile
def test_behavior_strategy(b: Behavior, s: Strategy, size=20): TRIALS = 10**2 results = [] start = time() dynamic = False for _ in range(TRIALS): r = MemoryManager(s, size, dynamic).handle_string(generate_list(b)) results.append(r) end = time() avg_time = (end - start)/TRIALS print('Average time: ', avg_time) print('Minimum no. page faults: ', min(results)) print('Maximum no. page faults: ', max(results)) avg = sum(results)/len(results) print('Average no. page faults: ', avg) with open('benchmarks.csv', 'r') as record_file: data = DictReader(record_file) entries = [i for i in data] entry_fields = ['Behavior', 'Strategy', 'Res. Set Size', 'Faults'] new_entry = {'Behavior': b.name, 'Strategy': s.name, 'Res. Set Size': size, 'Faults': int(avg)} entries.append(new_entry) entries = sorted(entries, key=itemgetter('Behavior', 'Strategy')) with open('benchmarks.csv', 'w', newline='') as record_file: writer = DictWriter(record_file, entry_fields) writer.writeheader() writer.writerows(entries)
def dump_csv(pages, options): """Dump in CSV format. ``pages`` is an iterable of (field, value) tuples. It's assumed that the same fields are used in each tuple. """ from itertools import chain from csv import DictWriter from sys import stdout pages = iter(pages) try: first_row = pages.next() except StopIteration: return fields = [item[0] for item in first_row] rows = chain((first_row,), pages) dicts = (dict(page) for page in rows) dicts = (process_page(row) for row in dicts) def validate_row_length(row_dict): if len(row_dict) != len(fields): raise DataValidationError( 'Inconsistent number of fields in row {0}.\n' 'Fields: {1}'.format(row_dict, fields)) return row_dict dicts = (validate_row_length(row) for row in dicts) writer = DictWriter(stdout, fields, dialect='excel-tab') writer.writerow(dict((v, v) for v in fields)) writer.writerows(dicts)
def write_csv(filename, data, header): from csv import DictWriter with open(filename, 'wb') as fp: writer = DictWriter(fp, fieldnames=header, delimiter=';') writer.writeheader() writer.writerows(data) return
def write_rows(self, entries): """Write rows in the csv file""" with open(self.output_filename, 'w') as output_file: writer = DictWriter(output_file, FIELDNAMES) writer.writeheader() writer.writerows(entries)
def run(out_file, input_dirs): assert input_dirs files = [] for input_dir in input_dirs: for filename in listdir(input_dir): if filename.endswith(".log"): files.append(join(input_dir, filename)) assert files raw_data = sorted((get_data(filename) for filename in files), key=data_key) aggregated_data = ( indy_vars + aggregate_data(group) for (_key, indy_vars), group in groupby(raw_data, partial(data_key, include_id=False)) ) with open(out_file, "w") as f: out = DictWriter(f, independent_vars + dependent_vars) out.writeheader() out.writerows(raw_data) del out with open("-aggregate".join(splitext(out_file)), "w") as f: out = writer(f) out.writerow(independent_vars[:-1] + ("count",) + dependent_vars) out.writerows(aggregated_data)
def write_output(): with open( "urls-used-for-local-transactions-with-statuses-and-jumbled-urls-and-pageviews.csv", "w", encoding="utf8" ) as output: writer = DictWriter(output, fields) writer.writeheader() writer.writerows(urls_used_with_pageviews)
def report_result_csv(result_key): celery_task, pj = get_celery_task(result_key) if not celery_task: return json_error('no task exists with id: {0}'.format(result_key)) if celery_task.ready(): task_result = get_celery_task_result(celery_task, pj) csv_io = StringIO() if task_result: columns = [] if Aggregation.IND in task_result: columns = task_result[Aggregation.IND][0].values()[0].keys() elif Aggregation.SUM in task_result: columns = task_result[Aggregation.SUM].keys() elif Aggregation.AVG in task_result: columns = task_result[Aggregation.AVG].keys() elif Aggregation.STD in task_result: columns = task_result[Aggregation.STD].keys() # if task_result is not empty find header in first row fieldnames = ['user_id'] + columns else: fieldnames = ['user_id'] writer = DictWriter(csv_io, fieldnames) # collect rows to output in CSV task_rows = [] # Individual Results if Aggregation.IND in task_result: # fold user_id into dict so we can use DictWriter to escape things for user_id, row in task_result[Aggregation.IND][0].iteritems(): task_row = row.copy() task_row['user_id'] = user_id task_rows.append(task_row) # Aggregate Results if Aggregation.SUM in task_result: task_row = task_result[Aggregation.SUM].copy() task_row['user_id'] = Aggregation.SUM task_rows.append(task_row) if Aggregation.AVG in task_result: task_row = task_result[Aggregation.AVG].copy() task_row['user_id'] = Aggregation.AVG task_rows.append(task_row) if Aggregation.STD in task_result: task_row = task_result[Aggregation.STD].copy() task_row['user_id'] = Aggregation.STD task_rows.append(task_row) writer.writeheader() writer.writerows(task_rows) return Response(csv_io.getvalue(), mimetype='text/csv') else: return json_response(status=celery_task.status)
def get_csv_object(data_rows, schema, include_header=False): fieldnames = [x['name'] for x in schema] file_obj = StringIO() writer = DictWriter(file_obj, fieldnames=fieldnames) if include_header: writer.writeheader() writer.writerows(data_rows) return file_obj
def main(start_spd, end_spd, steps, direction): """ Performs a motor deadband test, prints the result, and saves the data to a csv. """ read_frq = 300 # Frequency of reads settle_period = 2 # Specify time (s) to let motor settle before assessing deadband moving_threshold = 10 # Number of ticks/second to consider the motor to have left the deadband comms = PIC_USB(0x0005) read_period = 1/float(read_frq) motor_speed = start_spd motor_dir = direction data = [] prev_read_quad_time = time.clock() prev_quad_counter = comms.get_quad_info()["counter"] speeds = range(start_spd, end_spd, steps) for spd in speeds: set_start = time.clock() print("\tSetting motor speed to {}.".format(spd)) comms.command_dcmotor(spd, motor_dir) motor_speed = spd while time.clock() - set_start < settle_period: this_read = {} read_start = time.clock() quad_info = comms.get_quad_info() this_read["time"] = read_start this_read["motor_speed_cmd"] = motor_speed this_read["motor_dir_cmd"] = motor_dir this_read["quad_counter"] = quad_info["counter"] this_read["quad_overflow"] = quad_info["overflow"] this_read["quad_calc_speed"] = abs(quad_info["counter"] - prev_quad_counter) / (read_start - prev_read_quad_time) data.append(this_read) prev_read_quad_time = read_start prev_quad_counter = quad_info["counter"] while time.clock() - read_start < read_period: pass # After motor has "settled" settle_check_time = time.clock() quad_info = comms.get_quad_info() settle_vel = abs(quad_info["counter"] - prev_quad_counter) / (settle_check_time - prev_read_quad_time) if settle_vel > moving_threshold: print("\tDeadband end identified at speed {}.".format(spd)) break print("\tTest Concluded. Writing data...") filename = "deadband_{0}-{1}_steps{2}_{3}".format(start_spd, end_spd, steps, direction) headers = ["time", "motor_speed_cmd", "motor_dir_cmd", "quad_counter", "quad_overflow", "quad_calc_speed"] with open("data/{}.csv".format(filename), 'wb') as out_data: writer = DictWriter(out_data, fieldnames=headers) writer.writeheader() writer.writerows(data) print("\tData saved to \"data/{}.csv\"".format(filename)) print("ENDING DEADBAND TEST")
def toCSV(self, fields, data): dialect = csv.excel() dialect.delimiter = self.getDelimiter() buffer = StringIO() writer = DictWriter(buffer, fieldnames=fields, dialect=dialect) if self.getShowHeader(): writer.writerow(dict(zip(fields, fields))) writer.writerows(data) return buffer.getvalue()
def add_snapshot(): m_reader = DictReader(open('alchemy_data.csv')) li = [] for row in m_reader: newrow = row newrow['snapshot'] = '/static/snapshots/l/url'+row['index']+'.jpg' li.append(newrow) m_writer = DictWriter(open('data1.csv', 'wb'), ['topic', 'url', 'index', 'ratings', 'total', 'snapshot']) m_writer.writerows(li)
def writeToFile(): temp = sorted(content, key=itemgetter('trackNumber')) temp = sorted(temp, key=itemgetter('album')) temp = sorted(temp, key=itemgetter('artist')) with open('musiclibrary.csv','w') as outfile: # outfile.write(u'\ufeff'.encode('utf-8')) writer = DictWriter(outfile, ('artist','album','trackNumber','name')) writer.writeheader() writer.writerows(temp)
def writelog(self, f): '''Write log to f - f can be a filename or a file opened for writing''' header, log = self.loglines() try: dw = DictWriter(f, header) except TypeError: dw = DictWriter(open(f, 'w'), header) dw.writer.writerow(header) dw.writerows(log)
def main(start_vel, end_vel): """ Performs a step response test and saves the data to a csv. """ test_len = 10 # Length of test in seconds. read_frq = 100 # Frequency of reads step_time = 1 # Time (in sec) at which to perform step. comms = PIC_USB(0x0005) read_period = 1/float(read_frq) STEPPED = False motor_speed = start_vel[0] motor_dir = start_vel[1] print("STARTING STEP RESPONSE TEST") data = [] print("\tStarting at speed {}, dir {}".format(motor_speed, motor_dir)) comms.command_dcmotor(motor_speed, motor_dir) test_start_time = time.clock() prev_read_quad_time = test_start_time prev_quad_counter = comms.get_quad_info()["counter"] while time.clock() - test_start_time < test_len: read_start = time.clock() this_read = {} if read_start > step_time and not STEPPED: motor_speed = end_vel[0] motor_dir = end_vel[1] print("\tStepping to speed {}, dir {}".format(motor_speed, motor_dir)) comms.command_dcmotor(motor_speed, motor_dir) STEPPED = True read_quad_time = time.clock() quad_info = comms.get_quad_info() this_read["time"] = read_quad_time this_read["motor_speed_cmd"] = motor_speed this_read["motor_dir_cmd"] = motor_dir this_read["quad_counter"] = quad_info["counter"] this_read["quad_overflow"] = quad_info["overflow"] this_read["quad_calc_speed"] = abs(quad_info["counter"] - prev_quad_counter) / (read_quad_time - prev_read_quad_time) data.append(this_read) prev_read_quad_time = read_quad_time prev_quad_counter = quad_info["counter"] while time.clock() - read_start < read_period: pass print("\tTest Concluded. Writing data...") filename = "step_response_{0[0]}-{0[1]}_to_{1[0]}-{1[1]}".format(start_vel, end_vel) headers = ["time", "motor_speed_cmd", "motor_dir_cmd", "quad_counter", "quad_overflow", "quad_calc_speed"] with open("data/{}.csv".format(filename), 'wb') as out_data: writer = DictWriter(out_data, fieldnames=headers) writer.writeheader() writer.writerows(data) print("\tData saved to \"data/{}.csv\"".format(filename)) print("ENDING STEP RESPONSE TEST")
def print_results(results): dict_results = [r.to_dict() for r in results] fieldnames = ['concurrency', 'total_mean', 'total_median', 'total_max', 'percentage_failed'] f = StringIO() writer = DictWriter(f, fieldnames, restval='', extrasaction='ignore') writer.writeheader() writer.writerows(dict_results) f.seek(0) print '\n' print f.read()
def list_of_dicts_to_csv(table_id, list_of_dicts): csv = StringIO.StringIO() cols = table_cols(table_id) logging.debug("Creating CSV using cols %s" % ','.join(cols)) w = DictWriter(csv, cols) for dict in list_of_dicts: for key, value in dict.iteritems(): if isinstance(value, unicode): dict[key] = value.encode('utf8') w.writerows(list_of_dicts) logging.debug("Created CSV %s" % csv.getvalue()) return csv
def _convert_csv(self, notes): if self.stdout: simple_file = StringIO() else: simple_file = open(self.simple_filename, 'w') writer = DictWriter(simple_file, self.fieldnames) writer.writerows(notes) if self.stdout: simple_file.seek(0) # XXX: this is only for the StringIO right now sys.stdout.write(simple_file.getvalue()) simple_file.close()
def write_out(stats_list): """ writes stats to CSV :param stats_list: list of dicts :return: """ with open(output_filename, "a") as output_file: writer = DictWriter( output_file, fieldnames=output_fieldnames, delimiter=",", extrasaction="ignore", lineterminator="\n" ) writer.writerows(stats_list) print("Wrote {} lines to {}".format(len(stats_list), output_filename))
def export_data(fn, messages): log.name("exporter").debug("Exporting data") ## fieldnames keys_ = SMS_PROPERTIES + TEXT_PROPERTIES + SINGLE_ENTITIES_ELEM + COMPLEX_ENTITIES_ELEM + ["tvb"] + EMOTICONS.keys() writer = DictWriter(open(fn, "w"), fieldnames=keys_, delimiter=",", quotechar='"', quoting=QUOTE_ALL) writer.writeheader() log.name("exporter").debug("Preparing and saving data") writer.writerows([prepare_message_data(m) for m in messages]) log.name("exporter").debug("Data exported and saved into {file_name}", file_name=fn)
def rewrite_roomers(self): if len(self.roomers): names = [] people = [] for i in self.roomers: people.append(i.nnn()) for i in people[0]: names.append(i) with open('.\\data\\roomers.csv', 'w') as file: writer = DictWriter(file, fieldnames=names, delimiter=';') writer.writeheader() writer.writerows(people) else: with open('.\\data\\roomers.csv', 'w') as file: pass
def to_csv(self): """ Write the CSVOutput to a csv file """ if self.data_type == 'pandas': self.records.to_csv(self.output) elif self.data_type == 'numpy': np.savetxt(self.output, self.records, delimiter=",") elif isinstance(self.records[0], dict): with open(self.output, 'w') as output: wrtr = DictWriter(output, fieldnames=self.records[0].keys()) wrtr.writeheader() wrtr.writerows(self.records) else: with open(self.output, 'w') as output: wrtr = writer(output) wrtr.writerows(self.records) return self.output
def handle(self, *args, **options): if len(args) == 0: raise CommandError('AMI catalog filename is not specified.') with open(args[0], 'w') as csvfile: writer = DictWriter(csvfile, fieldnames=('name', 'region', 'backend_id')) writer.writeheader() rows = [ dict( name=image.name, region=image.region.name, backend_id=image.backend_id, ) for image in models.Image.objects.all() ] writer.writerows(rows)
def main(): train_iter = next_row(read_tsv("data/stream_%s.tsv"%args.sz)) test_iter = iter([]) sinfo_iter = read_tsv("data/sinfo_%s.tsv"%args.sz) del_keys_set = ["HistCTR", "SearchID", "ObjectType"] for t, (data_type, rows, sinfo) in enumerate(data(train_iter=train_iter, test_iter=test_iter, sinfo_iter=sinfo_iter)): uid = int(sinfo["UserID"]) date_str = sinfo["SearchDate"] ts = convert_ts(datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S.0")) rows = filter(lambda x : int(x["ObjectType"]) == 3, rows) for row in rows: for key in del_keys_set: del row[key] for key in row: row[key] = int(row[key]) if row[key] != "" else 0 item = ( ts, int(sinfo["SearchID"]), tuple([(row["AdID"], row["IsClick"], row["Position"]) for row in rows]), ) uid_sid[uid].append(item) print "uid_sid: %s"%len(uid_sid) for uid in uid_sid: uid_sid[uid].sort() print "start user_cnt." file_name = "data/user_cnt_%s.csv"%args.sz with open(file_name, "w") as f: writer = DictWriter(f, fieldnames=["SearchID", "t_cnt", "bf_cnt", "af_cnt", "bf_3h_cnt", "af_3h_cnt", "bf_clk_cnt", "bag2", "bag1"]) writer.writeheader() for uid in uid_sid: all_se = uid_sid[uid] writer.writerows(get_rows(all_se)) os.system('sort -t"," -k1 -g -S 2G %s -o %s_sorted'%(file_name, file_name)) print "start user_aid_cnt." file_name = "data/user_aid_cnt_%s.csv"%args.sz with open(file_name, "w") as f: writer = DictWriter(f, fieldnames=["SearchID", "AdID", "clk_cnt", "show_cnt", "t_show_cnt", "pos_clk_cnt", "pos_show_cnt"]) writer.writeheader() for uid in uid_sid: all_se = uid_sid[uid] writer.writerows(get_aid_rows(uid, all_se)) os.system('sort -t"," -k1 -g -S 2G %s -o %s_sorted'%(file_name, file_name))
class CsvDumper: """This class provides convenience to CSV dumping via Context Manager.""" def __init__(self, filename, fieldnames, logger=None): #: filename for the output file where the data will be dumped self.filename = filename #: list of fieldnames that the CSV file will follow self.fieldnames = fieldnames #: file handle for writing to the output self.open_file = open(self.filename, 'a') #: :class:`DictWriter <DictWriter>` to handle writing dicts self.dict_writer = DictWriter(self.open_file, self.fieldnames, dialect='unix', quoting=QUOTE_NONNUMERIC) #: :class:`Logger <Logger>` self.logger = logger or logging.getLogger(__name__) def __enter__(self): self._write_csv_headers() return self def __exit__(self, *args): self.open_file.close() def _write_csv_headers(self): """Writes the CSV data headers into the file if it's still empty.""" if self.open_file.tell() == 0: self.dict_writer.writeheader() def dump(self, list_of_dicts): """Dumps the list of dicts into the CSV file for this instance. :param list_of_dicts: As it says. """ if len(list_of_dicts) == 0: return self.dict_writer.writerows(list_of_dicts) self.logger.info("Data collected and dumped: {}".format(self.filename))
def export_to_csv(self, sql): output = StringIO() cursor = self.get_cursor() with self.log_query(sql): cursor.execute(sql) data = cursor.fetchall() writer = DictWriter( output, fieldnames=[column[0] for column in cursor.description], lineterminator="\n", ) writer.writeheader() writer.writerows(data) result = output.getvalue().encode() output.close( ) # StringIO doesn't support context manager protocol on Python 2 return result
def export_to_CSV(self): """! Exports the generated contact list to a CSV file. """ try: # print("Attempting to save to", self.file_name+".csv") with open(self.file_name+".csv", 'w', encoding='utf8', newline='') as output_file: fc = DictWriter(output_file, fieldnames=self.contact_list[0].keys(),) fc.writeheader() fc.writerows(self.contact_list) except IOError: messagebox.showerror("Error", "Unable to access "+ self.file_name + ".csv. Please close the file if it is open.") print("Unable to access", self.file_name+".csv.", "Please close the file if it is open.") return 1 print("Contact list has been saved.") return 0
def bulkload_lod(self, lod, keys, tablename, tmp_prefix='/tmp/'): filename = tmp_prefix + datetime.now().strftime("%s") + '.csv' register_dialect('own', 'excel', delimiter=',', lineterminator='\n', quoting=QUOTE_NONNUMERIC) with open(filename, 'w', encoding='utf-8') as f: wr = DictWriter(f, keys, dialect='own') wr.writerows(lod) with open(filename, 'rt', encoding='utf-8') as f: lines = f.readlines() sublines = [sub('""', '', i) for i in lines] with open(filename, 'w', encoding='utf-8') as f: f.writelines(sublines) self.bulkload(tablename, filename) return filename
def assign_ids(f): """Function that does the bulk of the processing. Definitely too long and needs to be split out to smaller functions, oh well. Outputs the matched data to a staging folder Keyword Arguments: f -- name of the file to process """ test_file_path = os.path.join(Dirs.TEST_DIR, f) staging_file_path = os.path.join(Dirs.STAGING_DIR, f) with open(test_file_path, 'r', encoding='utf-16') as r, \ open(staging_file_path, 'w', encoding='utf-16') as w: reader = DictReader(r, dialect='excel-tab') rows = list(reader) fields = reader.fieldnames # ocdid_report is not included sometimes, and additional fields are # occassionally added. if 'ocdid_report' not in fields: fields.append('ocdid_report') writer = DictWriter(w, fieldnames=fields, dialect='excel-tab') writer.writeheader() ocdid_vals = {} unmatched = {} matched = [] for row in list(rows): row['OCDID'] = row['OCDID'].lower() ocdid = row['OCDID'] if ocdid == '': message = '{} / {} ({}) has no OCDID.' print(message.format(row['Person UUID'], row['Electoral District'], row['State'])) rows.remove(row) matched.append(row) matched.sort(key=lambda x: x['Person UUID']) try: writer.writerows(matched) except ValueError: print(matched[0]) print([match for match in matched if None in list(match)][0]) raise
def search_twitter(keyword, max_pages, twitter_config, csv_directory, **kwargs): """Searches Twitter for tweets with the passsed in keyword and stores the the result in the data folder locally as a csv file. Arguments: keyword {string} -- keyword to be used to search for relevant tweets """ config = ConfigParser() config.read(twitter_config) auth = OAuthHandler(config.get('twitter', 'consumer_key'), config.get('twitter', 'consumer_secret')) auth.set_access_token(config.get('twitter', 'access_token'), config.get('twitter', 'access_token_secret')) api = API(auth) all_tweets = [] page_num = 0 # use the {{ ds }} = execution date variable passed in as part of context provided # by Airflow through Jinja macros execution_date = kwargs.get('ds') since_date = datetime.strptime(execution_date, '%Y-%m-%d').date() - timedelta(hours=1) query += ' since:{} until:{}'.format(since_date.strftime('%Y-%m-%d'), kwargs.get('ds')) print('Searching Twitter with: %s' % keyword) for page in Cursor(api.search, q=keyword, monitor_rate_limit=True, wait_on_rate_limit=True).pages(): all_tweets.extend([extract_tweet_data(t, keyword, ) for t in page]) page_num += 1 if page_num > max_pages: break # If it is an empty result, stop here if not len(all_tweets): return if not os.path.exists(csv_directory): os.makedirs(csv_directory) filename = '{}/{}_{}.csv'.format(csv_directory, keyword, datetime.now().strftime('%m%d%Y%H%M%S')) with open(filename, 'w') as raw_file: raw_writer = DictWriter(raw_file, fieldnames=all_tweets[0].keys()) raw_writer.writeheader() raw_writer.writerows(all_tweets)
def scrapAdress(self, response): #Récupération des adresses if self.a: nbadressesbypage = self.a else: nbadressesbypage = 2 service = response.meta['service'] page = response.meta['page'] #for adresse in np.array(response.xpath("//td/a/@href").re(r'[13][a-km-zA-HJ-NP-Z1-9]{25,34}')[0:self.nbAdressByPage]): for adresse in np.array( response.xpath("//td/a/@href").re( r'[13][a-km-zA-HJ-NP-Z1-9]{25,34}') [0:int(nbadressesbypage)]): yield { 'service': service, 'adresse': adresse, 'service': service, 'page': page } self.adressDictionnary['adresses'].append({ 'service': service, adresse: service, 'page': page }) self.adressTab.append({ 'service': service, 'adresse': adresse, 'page': page }) jsonObject = json.dumps(self.adressDictionnary) f = open("adresses.json", "w") f.write(jsonObject) f.close() dicts = json.dumps(self.adressDictionnary) dictjson = json.loads(dicts) the_file = open("sample.csv", "w") writer = DictWriter(the_file, dictjson[0].keys()) writer.writeheader() writer.writerows(dicts) the_file.close()
def export_sections_to_csv(filename_or_object, sections): if isinstance(filename_or_object, str): f = open(filename_or_object, 'w', newline='') opened = True else: f = filename_or_object opened = False try: writer = DictWriter(f, ['section'] + FIELDS) writer.writeheader() writer.writerows(chain.from_iterable(map(_section2rows, sections))) except Exception: raise finally: if opened: f.close()
def save_delivery(delivery, available): deliveries = get_deliveries() if deliveries: for index, delivery_ in enumerate(deliveries): if delivery == delivery_.get('delivery'): deliveries[index]['available'] = available break with open(join(app.root_path, "data", "delivery.csv"), "w", newline="") as csv_file: writer = DictWriter(csv_file, ['delivery', 'available']) writer.writeheader() writer.writerows(deliveries) return True else: return False
def main(): parser = OptionParser(usage='%prog [options] infile targetname') parser.add_option('-f', '--force', action='store_true', default=False, help='overwrite files in current target directory, if exist') options, args = parser.parse_args() if len(args) != 2: parser.error('wrong number of positional arguments') infile = args[0] targetname = args[1] if not(exists(infile)): print >>sys.stderr, 'File %s not found.' % infile sys.exit(1) if not(exists(targetname)) or not(isdir(targetname)): print >>sys.stderr, 'Target directory %s not found.' % targetname sys.exit(1) # reading from file instream = open(infile) infieldnames = ['filename', 'lineno', 'score'] reader = DictReader(instream, fieldnames=infieldnames) entries = list(reader) instream.close() # sorting by filename ftable = {} for e in entries: insertEntry(ftable, e) # putting this information into separate files in directory tree outfieldnames = ['lineno', 'score'] for k in ftable.keys(): outfile = join(targetname, ".".join([k, 'scores', 'raw'])) if exists(outfile) and not(options.force): print >>sys.stderr, 'File %s already exists.' % outfile print >>sys.stderr, 'Use --force to overwrite.' sys.exit(1) outstream = open(outfile, 'w') writer = DictWriter(outstream, outfieldnames, extrasaction='ignore', lineterminator='\n') writer.writerows(ftable[k]) outstream.close()
def write_derived_products(papers, base_folder=BASE_FOLDER, issn=ISSN, ignore_book_reviews=False): if issn is None or issn == "": raise ValueError("ISSN is not defined - can't write out files") OUTPUT_FILE = os.path.join( base_folder, "{}.csv".format(issn) ) # dumps out a CSV with the ISSN as its name in the same directory TITLE_FREQUENCY_FILE = os.path.join( base_folder, "{}_title_frequency.csv".format(issn) ) # dumps out a CSV with the ISSN as its name in the same directory AUTHOR_FREQUENCY_FILE = os.path.join( base_folder, "{}_author_frequency.csv".format(issn) ) # dumps out a CSV with the ISSN as its name in the same directory INSTITUTION_FREQUENCY_FILE = os.path.join( base_folder, "{}_insitution_frequency.csv".format(issn) ) # dumps out a CSV with the ISSN as its name in the same directory title_frequency_info = frequency_titles( papers, ignore_book_reviews=ignore_book_reviews) author_frequency_info = frequency_authors(papers) institution_frequency_info = frequency_institutions( papers, ignore_book_reviews=ignore_book_reviews) print("Writing Paper Info") with open(OUTPUT_FILE, 'wb') as output_file_handle: csv_writer = DictWriter(output_file_handle, fieldnames=KEYS_TO_KEEP) csv_writer.writeheader() csv_writer.writerows(papers) write_frequencies([{ "name": "Title", "path": TITLE_FREQUENCY_FILE, "data": title_frequency_info }, { "name": "Author", "path": AUTHOR_FREQUENCY_FILE, "data": author_frequency_info }, { "name": "Institution", "path": INSTITUTION_FREQUENCY_FILE, "data": institution_frequency_info }])
def write_input_spreadsheet(data_units, outfile): # Merge all the keys to prepare the CSV headers headers = set([k for d in data_units for k in d.keys()]) # Specific field for test (gold) units headers.add('_golden') headers = list(headers) gold_columns = [] for header in headers: # Add gold answer columns for each chunk if re.search('chunk_[0-9]{2}$', header): gold_columns.append(header + '_gold') headers += gold_columns headers.sort() logger.debug('CSV headers: %s' % headers) writer = DictWriter(outfile, headers) writer.writeheader() writer.writerows(data_units) return 0
def main(): """The main function.""" args = parse_args() identifiers = set() for path in args.json: with open(path, 'r') as inf: for ast in parse_ast(inf): identifiers |= walk_ast_rec(ast) identifiers = [dict(identifier=iden, type=iden_type) for iden, iden_type in identifiers] csv_writer = CsvDictWriter(sys.stdout, fieldnames=('identifier', 'type')) csv_writer.writeheader() csv_writer.writerows(identifiers)
def main() -> None: session = HTMLSession() login(session, input("username: "******"password: "******"fs", "f") # sort by favorites inline_set(session, "dm", "m") # minimal display mode pagecount = scrape_pagecount(session) with open(input("output file: "), mode="w") as ofile: writer = DictWriter(ofile, fieldnames=CSV_FIELDNAMES, dialect=CSV_DIALECT) writer.writeheader() for page in range(pagecount)[::-1]: favorites = scrape_favorites(session, page)[::-1] writer.writerows(map(lambda fav: fav._asdict(), favorites)) print(f"[page {page}] scraped {len(favorites)} favorites.") sleep(SECONDS_PER_PAGE) print("all exported. >:3c")
def _check_point_to_csv(self, send_segment, now): # Writes files to the "input" directory so the pusher will find them, # archive them, and send them out. # The input directory may not have been created yet create_dirs(self.input_dir) segment_data = self.log_node.parsed_data.pop(send_segment, []) if not segment_data: return file_name = '{}_{}.csv.gz'.format(send_segment.strftime(self.file_fmt), now.strftime(self.file_fmt)) file_path = join(self.input_dir, file_name) with gz_open(file_path, 'wt') as outfile: writer = DictWriter(outfile, CSV_HEADER) writer.writeheader() writer.writerows(self._format_item(x) for x in segment_data)
def write_csv_file(interval_data, filename): """Writes the period configuration structure into a csv file Parameters ---------- interval_data: list A list of period dicts filename: str The name of the file to produce """ with open(filename, 'w+', newline='') as csvfile: headers = ['name', 'interval'] writer = DictWriter(csvfile, headers) writer.writeheader() writer.writerows({ 'name': i['name'], 'interval': json.dumps(i['interval']) } for i in interval_data)
def save_library(): temp = sorted(content, key=itemgetter('trackNumber')) temp = sorted(temp, key=itemgetter('album')) temp = sorted(temp, key=itemgetter('artist')) # library_filename = raw_input('save file as (do not include .csv or any extension!)\n: ') try: with open(library_filename + '.csv', 'w') as outfile: print("exporting library to CSV format"), writer = DictWriter( outfile, ('artist', 'album', 'trackNumber', 'name', 'playCount')) writer.writeheader() writer.writerows(temp) print('\'' + str(library_filename) + str('.csv') + '\' saved in current directory') except IOError: sys.exit("invalid filename!")
def append_csv(csv_filename, data): with open(csv_filename, 'a+') as csv_file: reader = DictReader(csv_file) new_keys = set(data.keys()).difference(reader.fieldnames) if not new_keys: csv_file.seek(0, os.SEEK_END) writer = DictWriter(csv_file, reader.fieldnames) writer.writerow(data) else: reader.fieldnames.extend(sorted(new_keys)) with NamedTemporaryFile(dir='.', delete=False) as csv_tmpfile: writer = DictWriter(csv_tmpfile, reader.fieldnames, lineterminator='\n') writer.writeheader() writer.writerows(row for row in reader) writer.writerow(data) shutil.move(csv_tmpfile.name, csv_filename)
def save_receipts(self, dest_dir, csv_filename): report_data = list() for e in self.commute_emails: trip_time = e.get_trip_time() receipt_filename = "{trip_date}_{trip_time}hrs_{provider}".format( trip_date=trip_time.strftime('%Y%m%d'), trip_time=trip_time.time().strftime('%H%M'), provider=e.get_provider()) e.save_receipt(file_name=receipt_filename, save_path=dest_dir) src, dest = e.get_source_dest_address() invoice_line = dict(trip_date=trip_time.strftime('%Y%m%d'), trip_time="%shrs" % trip_time.time().strftime('%H%M'), start_addr=src, end_addr=dest, fare=e.get_fare()) report_data.append(invoice_line) with open(os.path.join(dest_dir, csv_filename), 'w') as csvfile: fieldnames = report_data[0].keys() writer = DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(report_data)
def delete(self, phone_number): counterl = 0 print(phone_number) for index, value in enumerate(self.phone): print(index, value) if value["PhoneNumber"] == phone_number: print(index) self.phone.pop(index) counterl += 1 if counterl == 0: print("Entered phone number does not exists.") else: with open(filename, "w", newline="") as handler: writer = DictWriter(handler, fieldnames=["Name", "PhoneNumber", "Email Address"]) writer.writeheader() writer.writerows(self.phone) print("Phone Number deleted successfully")
def write_csv_result(self): out_path = self.out_dir / self.report_file new_file = not out_path.exists() data_to_store = [] for value in self.storage.values(): if isinstance(value, list): data_to_store.extend(value) else: data_to_store.append(value) with open(str(out_path), 'a+', newline='') as f: writer = DictWriter(f, fieldnames=self.fields) if new_file: writer.writeheader() writer.writerows(data_to_store) self._reset_storage()
def to_csv(list_of_dicts: List[dict], file_path: str): """ Write the given list of dicts to a csv at the given file path. The dictionaries should have a uniform structure, i.e. they should be parsable into the rows of the csv, with the keys equivalent to column names. :param list_of_dicts: the list to write to a file :type list_of_dicts: list :param file_path: the file path at which to create the file :type file_path: str """ keys = list_of_dicts[0].keys() with open(file_path, 'w', encoding='utf-8') as output_file: dict_writer = DictWriter(output_file, keys, lineterminator='\n') dict_writer.writeheader() dict_writer.writerows(list_of_dicts)
def get_product_details(self, response): for i in range(1000): #can be vary depending on the pages try: request = scrapy.Request( "https://n86t1r3owz-3.algolianet.com/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20JavaScript%20(3.35.1)%3B%20Browser%20(lite)%3B%20JS%20Helper%202.21.1&x-algolia-application-id=N86T1R3OWZ&x-algolia-api-key=5140dac5e87f47346abbda1a34ee70c3", method='post', body= "{\"requests\":[{\"indexName\":\"products\",\"params\":\"query=&hitsPerPage=16&page=" + str(i) + "&restrictSearchableAttributes=&facets=%5B%5D&tagFilters=\"}]}", headers={ "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0", "Accept": "application/json", "Accept-Language": "en-US,en;q=0.5", "content-type": "application/x-www-form-urlencoded", "Referer": "https://www.indiehackers.com/products?sorting=recently-updated" }) response = yield self.crawler.engine.download(request, self) json_data = json.loads(response.text) results = json_data["results"] hits = results[0]["hits"] if not hits: break keys = [ "name", "tagline", "revenue", "revenue_type", "numFollowers", "websiteUrl", "description", "city", "country", "twitterHandle" ] for product in hits: if ('stripe-verified-revenue' in product["_tags"]): product["revenue_type"] = 'stripe-verified revenue' else: product["revenue_type"] = 'self-reported revenue' with open(self.raw_data_file, 'a') as outfile: writer = DictWriter(outfile, keys, extrasaction='ignore') writer.writeheader() writer.writerows(hits) except Exception as e: print("exception is", e)
def configure(self, config_dict, registry_config_str): """ Set the Interface attributes from the configurations provided by the Platform Driver, and create the "device" if it doesn't already exist :param config_dict: Dictionary of configuration values passed from the Platform Driver :param registry_config_str: String representation of the registry configuration passed from the Platform Driver """ # Set the CSV interface's necessary attributes from the configuration self.csv_path = config_dict.get("csv_path", "csv_device.csv") # If the configured path doesn't exist, create the CSV "device" file using the global defaults # so that we have something to test against if not os.path.isfile(self.csv_path): _log.info("Creating csv 'device'") with open(self.csv_path, "w+") as csv_device: writer = DictWriter(csv_device, fieldnames=CSV_FIELDNAMES) writer.writeheader() writer.writerows(CSV_DEFAULT) # Then parse the registry configuration to create our registers self.parse_config(registry_config_str)
def run_set_output_check() -> None: """get the latest all json, pop out the energy values for the last month""" r = requests.get(V3_ALL_URL) v3 = load_statset(r.json()) if not v3: raise Exception("Failed to get v3 all data") check_date = get_last_month() logger.info("Checking {}".format(check_date)) check_result = { "run_dt": str(datetime.now()), "check_network": "NEM", "check_date": str(check_date), } for i in v3.data: if not i.id: continue if not i.id.endswith("energy"): continue d = i.history.get_date(check_date) if i.fuel_tech and d: check_result[i.fuel_tech] = d current_records = _get_current_records() logger.info("Have {} current records".format(len(current_records))) current_records.append(check_result) # write to csv string output = io.StringIO() csv_output = DictWriter(output, fieldnames=CSV_OUTPUT_COLUMNS) csv_output.writeheader() csv_output.writerows(current_records) write_to_s3(output.getvalue(), OUTPUT_THE_CHECK_CSV_PATH, "text/csv")
def _dicts_to_tsv(data_dicts, first_fields, descriptions_dict): ''' >>> data_dicts = [ ... # explicit subtitle ... {'title': 'Star Wars', 'subtitle': 'A New Hope', 'date': '1977'}, ... # empty subtitle ... {'title': 'The Empire Strikes Back', 'subtitle': '', 'date': '1980'}, ... # N/A subtitle ... {'title': 'Return of the Jedi', 'date': '1983'} ... ] >>> descriptions_dict = { ... 'title': 'main title', ... 'date': 'date released', ... 'extra': 'should be ignored' ... } >>> lines = _dicts_to_tsv(data_dicts, ['title'], descriptions_dict).split('\\r\\n') >>> for line in lines: ... print('| ' + ' | '.join(line.split('\\t')) + ' |') | title | date | subtitle | | #main title | date released | | | Star Wars | 1977 | A New Hope | | The Empire Strikes Back | 1980 | | | Return of the Jedi | 1983 | N/A | | | ''' # wrap in default dicts that return 'n/a' body_fields = sorted(set().union(*[d.keys() for d in data_dicts]) - set(first_fields)) for dd in data_dicts: for field in body_fields: if field not in dd: dd[field] = 'N/A' output = StringIO() writer = DictWriter(output, first_fields + body_fields, delimiter='\t', extrasaction='ignore') writer.writeheader() writer.writerows([descriptions_dict] + data_dicts) tsv = output.getvalue() tsv_lines = tsv.split('\n') tsv_lines[1] = '#' + tsv_lines[1] return '\n'.join(tsv_lines)